dor-services 4.25.1 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/bin/dor-indexer +20 -19
  3. data/bin/dor-indexerd +3 -2
  4. data/config/certs/robots-dor-dev.crt +29 -0
  5. data/config/certs/robots-dor-dev.key +27 -0
  6. data/config/config_defaults.yml +0 -6
  7. data/config/dev_console_env.rb +65 -0
  8. data/config/environments/development.rb +84 -0
  9. data/config/environments/development.rb.old +84 -0
  10. data/config/environments/test.rb +84 -0
  11. data/lib/dor-services.rb +8 -18
  12. data/lib/dor/config.rb +18 -24
  13. data/lib/dor/datastreams/administrative_metadata_ds.rb +8 -7
  14. data/lib/dor/datastreams/content_metadata_ds.rb +200 -278
  15. data/lib/dor/datastreams/datastream_spec_solrizer.rb +1 -1
  16. data/lib/dor/datastreams/default_object_rights_ds.rb +10 -8
  17. data/lib/dor/datastreams/desc_metadata_ds.rb +30 -34
  18. data/lib/dor/datastreams/embargo_metadata_ds.rb +17 -13
  19. data/lib/dor/datastreams/events_ds.rb +12 -12
  20. data/lib/dor/datastreams/geo_metadata_ds.rb +3 -244
  21. data/lib/dor/datastreams/identity_metadata_ds.rb +34 -30
  22. data/lib/dor/datastreams/role_metadata_ds.rb +6 -6
  23. data/lib/dor/datastreams/simple_dublin_core_ds.rb +12 -9
  24. data/lib/dor/datastreams/version_metadata_ds.rb +14 -33
  25. data/lib/dor/datastreams/workflow_definition_ds.rb +18 -18
  26. data/lib/dor/datastreams/workflow_ds.rb +74 -65
  27. data/lib/dor/migrations/identifiable/assert_adminPolicy.rb +1 -1
  28. data/lib/dor/migrations/identifiable/fix_model_assertions.rb +1 -1
  29. data/lib/dor/migrations/identifiable/record_remediation.rb +2 -2
  30. data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +1 -1
  31. data/lib/dor/migrations/identifiable/uriify_contentlocation_refs.rb +1 -1
  32. data/lib/dor/migrations/processable/unify_workflows.rb +4 -4
  33. data/lib/dor/migrations/versionable/add_missing_version_md.rb +1 -1
  34. data/lib/dor/models/admin_policy_object.rb +1 -1
  35. data/lib/dor/models/assembleable.rb +3 -4
  36. data/lib/dor/models/collection.rb +0 -2
  37. data/lib/dor/models/contentable.rb +34 -35
  38. data/lib/dor/models/describable.rb +80 -122
  39. data/lib/dor/models/editable.rb +57 -73
  40. data/lib/dor/models/embargoable.rb +13 -15
  41. data/lib/dor/models/eventable.rb +3 -3
  42. data/lib/dor/models/geoable.rb +8 -9
  43. data/lib/dor/models/governable.rb +36 -54
  44. data/lib/dor/models/identifiable.rb +119 -115
  45. data/lib/dor/models/item.rb +4 -4
  46. data/lib/dor/models/itemizable.rb +9 -9
  47. data/lib/dor/models/presentable.rb +133 -0
  48. data/lib/dor/models/preservable.rb +4 -4
  49. data/lib/dor/models/processable.rb +29 -28
  50. data/lib/dor/models/publishable.rb +36 -30
  51. data/lib/dor/models/releasable.rb +310 -0
  52. data/lib/dor/models/shelvable.rb +14 -14
  53. data/lib/dor/models/upgradable.rb +13 -13
  54. data/lib/dor/models/versionable.rb +4 -7
  55. data/lib/dor/models/workflow_object.rb +16 -36
  56. data/lib/dor/services/cleanup_reset_service.rb +28 -34
  57. data/lib/dor/services/cleanup_service.rb +4 -4
  58. data/lib/dor/services/digital_stacks_service.rb +10 -10
  59. data/lib/dor/services/merge_service.rb +1 -1
  60. data/lib/dor/services/metadata_handlers/mdtoolkit_handler.rb +2 -2
  61. data/lib/dor/services/metadata_service.rb +20 -20
  62. data/lib/dor/services/registration_service.rb +26 -27
  63. data/lib/dor/services/reset_workspace_service.rb +15 -15
  64. data/lib/dor/services/sdr_ingest_service.rb +4 -4
  65. data/lib/dor/services/search_service.rb +4 -9
  66. data/lib/dor/services/suri_service.rb +5 -5
  67. data/lib/dor/services/technical_metadata_service.rb +3 -2
  68. data/lib/dor/utils/ng_tidy.rb +9 -9
  69. data/lib/dor/utils/predicate_patch.rb +1 -1
  70. data/lib/dor/utils/solr_doc_helper.rb +13 -5
  71. data/lib/dor/version.rb +1 -1
  72. data/lib/dor/workflow/document.rb +28 -30
  73. data/lib/dor/workflow/graph.rb +36 -36
  74. data/lib/dor/workflow/process.rb +12 -12
  75. data/lib/tasks/dor.rake +1 -1
  76. data/lib/tasks/rdoc.rake +3 -3
  77. metadata +67 -76
  78. data/lib/dor/datastreams/geo2mods.xsl +0 -867
  79. data/lib/dor/models/discoverable.rb +0 -64
  80. data/lib/dor/models/releaseable.rb +0 -357
  81. data/lib/dor/services/indexing_service.rb +0 -64
  82. data/lib/dor/utils/sdr_client.rb +0 -23
  83. data/lib/dor/utils/utc_date_field_mapper.rb +0 -7
@@ -1,64 +0,0 @@
1
- module Dor
2
- module Discoverable
3
- extend ActiveSupport::Concern
4
- #index gryphondor fields
5
- require 'stanford-mods'
6
- def to_solr(solr_doc = Hash.new, *args)
7
- super solr_doc, *args
8
-
9
- if descMetadata && !descMetadata.new?
10
- stanford_mods_record=Stanford::Mods::Record.new
11
- stanford_mods_record.from_str(descMetadata.ng_xml.to_s)
12
- doc_hash = {
13
- :sw_format_facet => stanford_mods_record.format,
14
- # title fields
15
- :sw_title_245a_search_facet_facet => stanford_mods_record.sw_short_title,
16
- :sw_title_245_search_facet_facet => stanford_mods_record.sw_full_title,
17
- :sw_title_variant_search_facet_facet => stanford_mods_record.sw_addl_titles,
18
- :sw_title_sort_facet => stanford_mods_record.sw_sort_title,
19
- :sw_title_245a_display_facet => stanford_mods_record.sw_short_title,
20
- :sw_title_display_facet => stanford_mods_record.sw_full_title,
21
- :sw_title_full_display_facet => stanford_mods_record.sw_full_title,
22
-
23
- # author fields
24
- :sw_author_1xx_search_facet_facet => stanford_mods_record.sw_main_author,
25
- :sw_author_7xx_search_facet_facet => stanford_mods_record.sw_addl_authors,
26
- :sw_author_person_facet_facet => stanford_mods_record.sw_person_authors,
27
- :sw_author_other_facet_facet => stanford_mods_record.sw_impersonal_authors,
28
- #:sw_author_sort_facet => stanford_mods_record.sw_sort_author,
29
- :sw_author_corp_display_facet => stanford_mods_record.sw_corporate_authors,
30
- :sw_author_meeting_display_facet => stanford_mods_record.sw_meeting_authors,
31
- :sw_author_person_display_facet => stanford_mods_record.sw_person_authors,
32
- :sw_author_person_full_display_facet => stanford_mods_record.sw_person_authors,
33
-
34
- # subject search fields
35
- :sw_topic_search_facet_facet => stanford_mods_record.topic_search,
36
- :sw_geographic_search_facet_facet => stanford_mods_record.geographic_search,
37
- :sw_subject_other_search_facet_facet => stanford_mods_record.subject_other_search,
38
- :sw_subject_other_subvy_search_facet_facet => stanford_mods_record.subject_other_subvy_search,
39
- :sw_subject_all_search_facet_facet => stanford_mods_record.subject_all_search,
40
- :sw_topic_facet_facet => stanford_mods_record.topic_facet,
41
- :sw_geographic_facet_facet => stanford_mods_record.geographic_facet,
42
- :sw_era_facet_facet => stanford_mods_record.era_facet,
43
-
44
- :sw_language_facet => stanford_mods_record.sw_language_facet,
45
- #:sw_physical => stanford_mods_record.term_values([:sw_physical_description, :sw_extent]),
46
- #:sw_summary_search_facet_facet => stanford_mods_record.term_values(:sw_abstract),
47
- #:sw_toc_search_facet_facet => stanford_mods_record.term_values(:sw_tableOfContents),
48
- #:sw_url_suppl => stanford_mods_record.term_values([:sw_related_item, :sw_location, :sw_url]),
49
-
50
- #publish date fields
51
- :sw_pub_search_facet_facet => stanford_mods_record.place,
52
- :sw_pub_date_sort_facet => stanford_mods_record.pub_date_sort,
53
- :sw_pub_date_group_facet_facet => stanford_mods_record.pub_date_groups(stanford_mods_record.pub_date),
54
- :sw_pub_date_facet =>stanford_mods_record.pub_date_facet,
55
- :sw_pub_date_display_facet => stanford_mods_record.pub_date_display,
56
- :sw_all_search_facet_facet => stanford_mods_record.text
57
-
58
- }
59
- solr_doc.merge!(doc_hash) if doc_hash
60
- end
61
- solr_doc
62
- end
63
- end
64
- end
@@ -1,357 +0,0 @@
1
- require 'open-uri'
2
- require 'retries'
3
-
4
- module Dor
5
- module Releaseable
6
- extend ActiveSupport::Concern
7
- include Itemizable
8
-
9
- # Add release tags to an item and initialize the item release workflow
10
- # @param release_tags [Hash or Array] Either a hash of a single release tag. Each tag should be in the form of {:tag=>'Fitch : Batch2',:what=>'self',:to=>'Searchworks',:who=>'petucket', :release=>true/false}
11
- # @raise [ArgumentError] Raised if the tags are improperly supplied
12
- def add_release_nodes_and_start_releaseWF(release_tags)
13
- release_tags = [release_tags] unless release_tags.is_a?(Array)
14
-
15
- # Add in each tag
16
- release_tags.each do |r_tag|
17
- add_release_node(r_tag[:release],r_tag)
18
- end
19
-
20
- # Save item to dor so the robots work with the latest data
21
- save
22
- initialize_workflow('releaseWF')
23
- end
24
-
25
- # Generate XML structure for inclusion to Purl
26
- # @return [String] The XML release node as a string, with ReleaseDigest as the root document
27
- def generate_release_xml
28
- builder = Nokogiri::XML::Builder.new do |xml|
29
- xml.releaseData {
30
- released_for.each do |project, released_value|
31
- xml.release(released_value['release'], :to => project)
32
- end
33
- }
34
- end
35
- builder.to_xml
36
- end
37
-
38
- # Determine which projects an item is released for
39
- # @param [Boolean] skip_live_purl set true to skip requesting from purl backend
40
- # @return [Hash{String => Boolean}] all namespaces, keys are Project name Strings, values are Boolean
41
- def released_for(skip_live_purl = false)
42
- released_hash = {}
43
- # Get release tags on the item itself
44
- release_tags_on_this_item = release_nodes
45
-
46
- # Get any self tags on this item
47
- self_release_tags = get_self_release_tags(release_tags_on_this_item)
48
-
49
- # Get the most recent self tag for all targets and save their result since most recent self always trumps any other non self tags
50
- latest_self_tags = get_newest_release_tag(self_release_tags)
51
- latest_self_tags.keys.each do |target|
52
- released_hash[target] = clean_release_tag_for_purl(latest_self_tags[target])
53
- end
54
-
55
- # With Self Tags Resolved We Now need to deal with tags on all sets this object is part of
56
- potential_applicable_release_tags = {} # This will be where we store all tags that apply, regardless of their timestamp
57
-
58
- # Get all release tags on the item and strip out the what = self ones, we've already processed all the self tags on this item
59
- potential_applicable_release_tags = get_tags_for_what_value(get_release_tags_for_item_and_all_governing_sets, 'collection')
60
- administrative_tags = tags # Get them once here and pass them down
61
-
62
- # We now have the keys for all potential releases, we need to check the tags: the most recent timestamp with an explicit true or false wins.
63
- # In a nil case, the lack of an explicit false tag we do nothing.
64
- (potential_applicable_release_tags.keys - released_hash.keys).each do |key| # don't bother checking if already added to the release hash, they were added due to a self tag so that has won
65
- latest_applicable_tag_for_key = latest_applicable_release_tag_in_array(potential_applicable_release_tags[key], administrative_tags)
66
- next if latest_applicable_tag_for_key.nil? # We have a valid tag, record it
67
- released_hash[key] = clean_release_tag_for_purl(latest_applicable_tag_for_key)
68
- end
69
-
70
- # See what the application is currently released for on Purl. If something is released in purl but not listed here, it needs to be added as a false
71
- add_tags_from_purl(released_hash) unless skip_live_purl
72
-
73
- released_hash
74
- end
75
-
76
- #Take a hash of tags as obtained via Dor::Item.release_tags and returns all self tags
77
- #
78
- #@param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
79
- #
80
- #@return [Hash] a hash of self tags for each to value
81
- def get_self_release_tags(tags)
82
- get_tags_for_what_value(tags, 'self')
83
- end
84
-
85
- #Take an item and get all of its release tags and all tags on collections it is a member of it
86
- #
87
- #
88
- #@return [Hash] a hash of all tags
89
- def get_release_tags_for_item_and_all_governing_sets
90
- return_tags = release_nodes || {}
91
- collections.each do |collection|
92
- return_tags = combine_two_release_tag_hashes(return_tags, Dor::Item.find(collection.id).get_release_tags_for_item_and_all_governing_sets) # recurvise so parents of parents are found
93
- end
94
- return_tags
95
- end
96
-
97
- #Take two hashes of tags and combine them, will not overwrite but will enforce uniqueness of the tags
98
- #
99
- #@param hash_one [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
100
- #@param hash_two [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
101
- #
102
- #@return [Hash] the combined hash with uniquiness enforced
103
- def combine_two_release_tag_hashes(hash_one, hash_two)
104
- hash_two.keys.each do |key|
105
- hash_one[key] = hash_two[key] if hash_one[key].nil?
106
- hash_one[key] = (hash_one[key] + hash_two[key]).uniq unless hash_one[key].nil?
107
- end
108
- hash_one
109
- end
110
-
111
- #Take a hash of tags and return all tags with the matching what target
112
- #
113
- #@param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
114
- #@param what_target [String] the target for the 'what' key, self or collection
115
- #
116
- #@return [Hash] a hash of self tags for each to value
117
- def get_tags_for_what_value(tags, what_target)
118
- return_hash = {}
119
- tags.keys.each do |key|
120
- self_tags = tags[key].select {|tag| tag['what'] == what_target.downcase}
121
- return_hash[key] = self_tags if self_tags.size > 0
122
- end
123
- return_hash
124
- end
125
-
126
- # Take a hash of tags as obtained via Dor::Item.release_tags and returns the newest tag for each namespace
127
- # @param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
128
- # @return [Hash] a hash of latest tags for each to value
129
- def get_newest_release_tag(tags)
130
- Hash[tags.map {|key, val| [key, newest_release_tag_in_an_array(val)]}]
131
- end
132
-
133
- # Take a tag and return only the attributes we want to put into purl
134
- # @param tag [Hash] a tag
135
- # @return [Hash] a hash of the attributes we want for purl
136
- def clean_release_tag_for_purl(tag)
137
- {'release' => tag['release']}
138
- end
139
-
140
- # Takes an array of release tags and returns the most recent one
141
- # @param tags [Array] an array of hashes, with the hashes being release tags
142
- # @return [Hash] the most recent tag
143
- def newest_release_tag_in_an_array(array_of_tags)
144
- latest_tag_in_array = array_of_tags[0] || {}
145
- array_of_tags.each do |tag|
146
- latest_tag_in_array = tag if tag['when'] > latest_tag_in_array['when']
147
- end
148
- latest_tag_in_array
149
- end
150
-
151
- # Takes a tag and returns true or false if it applies to the specific item
152
- # @param release_tag [Hash] the tag in a hashed form
153
- # @param admin_tags [Array] the administrative tags on an item, if not supplied it will attempt to retrieve them
154
- # @return [Boolean] true or false if it applies (not true or false if it is released, that is the release_tag data)
155
- def does_release_tag_apply(release_tag, admin_tags = false)
156
- # Is the tag global or restricted
157
- return true if release_tag['tag'].nil? # no specific tag specificied means this tag is global to all members of the collection
158
- admin_tags = tags unless admin_tags # We use false instead of [], since an item can have no admin_tags at which point we'd be passing this var as [] and would not attempt to retrieve it
159
- admin_tags.include?(release_tag['tag'])
160
- end
161
-
162
- #Takes an array of release tags and returns the most recent one that applies to this item
163
- #
164
- #@param release_tags [Array] an array of release tags in hashed form
165
- #param admin_tags [Array] the administrative tags on an on item
166
- #
167
- #@return [Hash] the tag
168
- def latest_applicable_release_tag_in_array(release_tags, admin_tags)
169
- newest_tag = newest_release_tag_in_an_array(release_tags)
170
- return newest_tag if does_release_tag_apply(newest_tag, admin_tags)
171
-
172
- # The latest tag wasn't applicable, slice it off and try again
173
- # This could be optimized by reordering on the timestamp and just running down it instead of constantly resorting, at least if we end up getting numerous release tags on an item
174
- release_tags.slice!(release_tags.index(newest_tag))
175
-
176
- return latest_applicable_release_tag_in_array(release_tags, admin_tags) if release_tags.size > 0 # Try again after dropping the inapplicable
177
- nil # We're out of tags, no applicable ones
178
- end
179
-
180
- #helper method to get the release tags as a nodeset
181
- #
182
- #@return [Nokogiri::XML::NodeSet] of all release tags and their attributes
183
- def release_tags
184
- release_tags = identityMetadata.ng_xml.xpath('//release')
185
- return_hash = {}
186
- release_tags.each do |release_tag|
187
- hashed_node = release_tag_node_to_hash(release_tag)
188
- if !return_hash[hashed_node[:to]].nil?
189
- return_hash[hashed_node[:to]] << hashed_node[:attrs]
190
- else
191
- return_hash[hashed_node[:to]] = [hashed_node[:attrs]]
192
- end
193
- end
194
- return_hash
195
- end
196
-
197
- # Method to convert one release element into an array
198
- # @param rtag [Nokogiri::XML::Element] the release tag element
199
- # @return [Hash] in the form of {:to => String :attrs = Hash}
200
- def release_tag_node_to_hash(rtag)
201
- to = 'to'
202
- release = 'release'
203
- when_word = 'when' # TODO: Make to and when_word load from some config file instead of hardcoded here
204
- attrs = rtag.attributes
205
- return_hash = { :to => attrs[to].value }
206
- attrs.tap { |a| a.delete(to) }
207
- attrs[release] = rtag.text.downcase == 'true' #save release as a boolean
208
- return_hash[:attrs] = attrs
209
-
210
- # convert all the attrs beside :to to strings, they are currently Nokogiri::XML::Attr
211
- (return_hash[:attrs].keys - [to]).each do |a|
212
- return_hash[:attrs][a] = return_hash[:attrs][a].to_s if a != release
213
- end
214
-
215
- return_hash[:attrs][when_word] = Time.parse(return_hash[:attrs][when_word]) #convert when to a datetime
216
- return_hash
217
- end
218
-
219
- # Add a release node for the item
220
- # Will use the current time to add in the timestamp if you do not supply a timestamp, you can supply a timestap for correcting history, etc if desired
221
- # @param tag [Boolean] True or false for the release node
222
- # @param attrs [hash] A hash of any attributes to be placed onto the tag
223
- # Timestamp will be calculated by the function, if no displayType is passed in, it will default to file
224
- # @return [Nokogiri::XML::Element] the tag added if successful
225
- # @raise [ArgumentError] Raised if attributes are improperly supplied
226
- # @example
227
- # item.add_tag(true,:release,{:tag=>'Fitch : Batch2',:what=>'self',:to=>'Searchworks',:who=>'petucket', :displayType='filmstrip'})
228
- def add_release_node(release, attrs = {})
229
- identity_metadata_ds = identityMetadata
230
- attrs[:when] = Time.now.utc.iso8601 if attrs[:when].nil? #add the timestamp
231
- attrs[:displayType] = 'file' if attrs[:displayType].nil? #default to file is no display type is passed
232
- valid_release_attributes(release, attrs)
233
-
234
- # Remove the old displayType and then add the one for this tag
235
- remove_displayTypes
236
- identity_metadata_ds.add_value(:displayType, attrs[:displayType], {})
237
- identity_metadata_ds.add_value(:release, release.to_s, attrs)
238
- end
239
-
240
- # Determine if the supplied tag is a valid release node that meets all requirements
241
- # @raises [ArgumentError] Raises an error of the first fault in the release tag
242
- # @return [Boolean] Returns true if no errors found
243
- # @param attrs [hash] A hash of attributes for the tag, must contain :when, a ISO 8601 timestamp and :who to identify who or what added the tag, :to,
244
- def valid_release_attributes(tag, attrs = {})
245
- raise ArgumentError, ":when is not iso8601" if attrs[:when].match('\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z').nil?
246
- [:who, :to, :what].each do |check_attr|
247
- raise ArgumentError, "#{check_attr} not supplied as a String" if attrs[check_attr].class != String
248
- end
249
-
250
- what_correct = false
251
- ['self', 'collection'].each do |allowed_what_value|
252
- what_correct = true if attrs[:what] == allowed_what_value
253
- end
254
- raise ArgumentError, ':what must be self or collection' unless what_correct
255
- raise ArgumentError, 'the value set for this tag is not a boolean' if !!tag != tag
256
- raise ArgumentError, ':displayType must be passed in as a String' unless attrs[:displayType].class == String
257
-
258
- validate_tag_format(attrs[:tag]) unless attrs[:tag].nil? # Will Raise exception if invalid tag
259
- true
260
- end
261
-
262
- # Helper method to get the release nodes as a nodeset
263
- # @return [Nokogiri::XML::NodeSet] of all release tags and their attributes
264
- def release_nodes
265
- release_tags = identityMetadata.ng_xml.xpath('//release')
266
- return_hash = {}
267
- release_tags.each do |release_tag|
268
- hashed_node = release_tag_node_to_hash(release_tag)
269
- if !return_hash[hashed_node[:to]].nil?
270
- return_hash[hashed_node[:to]] << hashed_node[:attrs]
271
- else
272
- return_hash[hashed_node[:to]] = [hashed_node[:attrs]]
273
- end
274
- end
275
- return_hash
276
- end
277
-
278
- # Get a list of all release nodes found in a purl document, fetches purl xml for a druid
279
- # @param druid [String]
280
- # @raises [OpenURI::HTTPError]
281
- # @return [Nokogiri::HTML::Document] the parsed xml for the druid or an empty document if no purl is found
282
- def get_xml_from_purl
283
- url = form_purl_url
284
- handler = Proc.new do |exception, attempt_number, total_delay|
285
- # We assume a 404 means the document has never been published before and thus has no purl
286
- Dor.logger.warn "[Attempt #{attempt_number}] GET #{url} -- #{exception.class}: #{exception.message}; #{total_delay} seconds elapsed."
287
- raise exception unless exception.is_a? OpenURI::HTTPError
288
- return Nokogiri::HTML::Document.new if exception.io.status[0] == '404'
289
- end
290
-
291
- with_retries(:max_retries => 3, :base_sleep_seconds => 3, :max_sleep_seconds => 5, :handler => handler) do |attempt|
292
- # If you change the method used for opening the webpage, you can change the :rescue param to handle the new method's errors
293
- Dor.logger.info "[Attempt #{attempt}] GET #{url}"
294
- return Nokogiri::HTML(OpenURI.open_uri(url))
295
- end
296
- end
297
-
298
- #Since purl does not use the druid: prefix but much of dor does, use this function to strip the druid: if needed
299
- #
300
- #@return [String] the druid sans the druid: or if there was no druid: prefix, the entire string you passed
301
- def remove_druid_prefix
302
- druid_prefix = "druid:"
303
- return id.split(druid_prefix)[1] if id.split(druid_prefix).size > 1
304
- druid
305
- end
306
-
307
- # Take the and create the entire purl url that will usable for the open method in open-uri, returns https
308
- # @return [String] the full url
309
- def form_purl_url
310
- 'https://' + Dor::Config.stacks.document_cache_host + "/#{remove_druid_prefix}.xml"
311
- end
312
-
313
- # Pull all release nodes from the public xml obtained via the purl query
314
- # @param druid [Nokogiri::HTML::Document] The druid of the object you want
315
- # @return [Array] An array containing all the release tags
316
- def get_release_tags_from_purl_xml(doc)
317
- nodes = doc.xpath('//html/body/publicobject/releasedata').children
318
- # We only want the nodes with a name that isn't text
319
- nodes.reject {|n| n.name.nil? || n.name.downcase == 'text'}.map {|n| n.attr('to')}.uniq
320
- end
321
-
322
- # Pull all release nodes from the public xml obtained via the purl query
323
- # @return [Array] An array containing all the release tags
324
- def get_release_tags_from_purl
325
- xml = get_xml_from_purl
326
- get_release_tags_from_purl_xml(xml)
327
- end
328
-
329
- # This function calls purl and gets a list of all release tags currently in purl. It then compares to the list you have generated.
330
- # Any tag that is on purl, but not in the newly generated list is added to the new list with a value of false.
331
- # @param new_tags [Hash{String => Boolean}] all new tags in the form of !{"Project" => Boolean}
332
- # @return [Hash], a hash in the same form as new_tags, with all missing tags not in new_tags, but in current_tag_names, added in with a Boolean value of false
333
- def add_tags_from_purl(new_tags)
334
- tags_currently_in_purl = get_release_tags_from_purl
335
- missing_tags = tags_currently_in_purl.map(&:downcase) - new_tags.keys.map(&:downcase)
336
- missing_tags.each do |missing_tag|
337
- new_tags[missing_tag.capitalize] = {"release"=>false}
338
- end
339
- new_tags
340
- end
341
-
342
- def to_solr(solr_doc = {}, *args)
343
- super(solr_doc, *args)
344
-
345
- # TODO: sort of worried about the performance impact in bulk reindex
346
- # situations, since released_for recurses all parent collections. jmartin 2015-07-14
347
- released_for(true).each { |key, val|
348
- add_solr_value(solr_doc, 'released_to', key, :symbol, []) if val
349
- }
350
-
351
- # TODO: need to solrize whether item is released to purl? does released_for return that?
352
- # logic is: "True when there is a published lifecycle and Access Rights is anything but Dark"
353
-
354
- solr_doc
355
- end
356
- end
357
- end
@@ -1,64 +0,0 @@
1
- module Dor
2
- class IndexingService
3
- ##
4
- # Returns a Logger instance for recording info about indexing attempts
5
- # @yield attempt to execute 'entry_id_block' and use the result as an extra identifier for the log
6
- # entry. a placeholder will be used otherwise. 'request.uuid' might be useful in a Rails app.
7
- def self.generate_index_logger(&entry_id_block)
8
- index_logger = Logger.new(Config.indexing_svc.log, Config.indexing_svc.log_rotation_interval)
9
- index_logger.formatter = proc do |severity, datetime, progname, msg|
10
- date_format_str = Config.indexing_svc.log_date_format_str
11
- entry_id = begin entry_id_block.call rescue '---' end
12
- "[#{entry_id}] [#{datetime.utc.strftime(date_format_str)}] #{msg}\n"
13
- end
14
- index_logger
15
- end
16
-
17
- # memoize the loggers we create in a hash, init with a nil default logger
18
- @@loggers = { default: nil }
19
-
20
- def self.default_index_logger
21
- @@loggers[:default] ||= generate_index_logger
22
- end
23
-
24
- # takes a Dor object and indexes it to solr. doesn't commit automatically.
25
- def self.reindex_object(obj)
26
- solr_doc = obj.to_solr
27
- Dor::SearchService.solr.add(solr_doc)
28
- solr_doc
29
- end
30
-
31
- # retrieves a single Dor object by pid, indexes the object to solr, does some logging
32
- # (will use a defualt logger if one is not provided). doesn't commit automatically.
33
- #
34
- # WARNING/TODO: the tests indicate that the "rescue Exception" block at the end will
35
- # get skipped, and the thrown exception (e.g. SystemStackError) will not be logged. since
36
- # that's the only consequence, and the exception bubbles up as we would want anyway, it
37
- # doesn't seem worth blocking refactoring. see https://github.com/sul-dlss/dor-services/issues/156
38
- # extra logging in this case would be nice, but centralized indexing that's otherwise
39
- # fully functional is nicer.
40
- def self.reindex_pid(pid, index_logger = nil, should_raise_errors = true)
41
- index_logger ||= default_index_logger
42
- obj = Dor.load_instance pid
43
- solr_doc = reindex_object obj
44
- index_logger.info "updated index for #{pid}"
45
- solr_doc
46
- rescue StandardError => se
47
- if se.is_a? ActiveFedora::ObjectNotFoundError
48
- index_logger.warn "failed to update index for #{pid}, object not found in Fedora"
49
- else
50
- index_logger.warn "failed to update index for #{pid}, unexpected StandardError, see main app log: #{se.backtrace}"
51
- end
52
- raise se if should_raise_errors
53
- rescue Exception => ex
54
- index_logger.error "failed to update index for #{pid}, unexpected Exception, see main app log: #{ex.backtrace}"
55
- raise ex # don't swallow anything worse than StandardError
56
- end
57
-
58
- # given a list of pids, retrieve those objects from fedora, index each to solr, optionally commit
59
- def self.reindex_pid_list(pid_list, should_commit = false)
60
- pid_list.each { |pid| reindex_pid pid, nil, false } # use the default logger, don't let individual errors nuke the rest of the batch
61
- ActiveFedora.solr.conn.commit if should_commit
62
- end
63
- end
64
- end