dor-services 4.25.1 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/dor-indexer +20 -19
- data/bin/dor-indexerd +3 -2
- data/config/certs/robots-dor-dev.crt +29 -0
- data/config/certs/robots-dor-dev.key +27 -0
- data/config/config_defaults.yml +0 -6
- data/config/dev_console_env.rb +65 -0
- data/config/environments/development.rb +84 -0
- data/config/environments/development.rb.old +84 -0
- data/config/environments/test.rb +84 -0
- data/lib/dor-services.rb +8 -18
- data/lib/dor/config.rb +18 -24
- data/lib/dor/datastreams/administrative_metadata_ds.rb +8 -7
- data/lib/dor/datastreams/content_metadata_ds.rb +200 -278
- data/lib/dor/datastreams/datastream_spec_solrizer.rb +1 -1
- data/lib/dor/datastreams/default_object_rights_ds.rb +10 -8
- data/lib/dor/datastreams/desc_metadata_ds.rb +30 -34
- data/lib/dor/datastreams/embargo_metadata_ds.rb +17 -13
- data/lib/dor/datastreams/events_ds.rb +12 -12
- data/lib/dor/datastreams/geo_metadata_ds.rb +3 -244
- data/lib/dor/datastreams/identity_metadata_ds.rb +34 -30
- data/lib/dor/datastreams/role_metadata_ds.rb +6 -6
- data/lib/dor/datastreams/simple_dublin_core_ds.rb +12 -9
- data/lib/dor/datastreams/version_metadata_ds.rb +14 -33
- data/lib/dor/datastreams/workflow_definition_ds.rb +18 -18
- data/lib/dor/datastreams/workflow_ds.rb +74 -65
- data/lib/dor/migrations/identifiable/assert_adminPolicy.rb +1 -1
- data/lib/dor/migrations/identifiable/fix_model_assertions.rb +1 -1
- data/lib/dor/migrations/identifiable/record_remediation.rb +2 -2
- data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +1 -1
- data/lib/dor/migrations/identifiable/uriify_contentlocation_refs.rb +1 -1
- data/lib/dor/migrations/processable/unify_workflows.rb +4 -4
- data/lib/dor/migrations/versionable/add_missing_version_md.rb +1 -1
- data/lib/dor/models/admin_policy_object.rb +1 -1
- data/lib/dor/models/assembleable.rb +3 -4
- data/lib/dor/models/collection.rb +0 -2
- data/lib/dor/models/contentable.rb +34 -35
- data/lib/dor/models/describable.rb +80 -122
- data/lib/dor/models/editable.rb +57 -73
- data/lib/dor/models/embargoable.rb +13 -15
- data/lib/dor/models/eventable.rb +3 -3
- data/lib/dor/models/geoable.rb +8 -9
- data/lib/dor/models/governable.rb +36 -54
- data/lib/dor/models/identifiable.rb +119 -115
- data/lib/dor/models/item.rb +4 -4
- data/lib/dor/models/itemizable.rb +9 -9
- data/lib/dor/models/presentable.rb +133 -0
- data/lib/dor/models/preservable.rb +4 -4
- data/lib/dor/models/processable.rb +29 -28
- data/lib/dor/models/publishable.rb +36 -30
- data/lib/dor/models/releasable.rb +310 -0
- data/lib/dor/models/shelvable.rb +14 -14
- data/lib/dor/models/upgradable.rb +13 -13
- data/lib/dor/models/versionable.rb +4 -7
- data/lib/dor/models/workflow_object.rb +16 -36
- data/lib/dor/services/cleanup_reset_service.rb +28 -34
- data/lib/dor/services/cleanup_service.rb +4 -4
- data/lib/dor/services/digital_stacks_service.rb +10 -10
- data/lib/dor/services/merge_service.rb +1 -1
- data/lib/dor/services/metadata_handlers/mdtoolkit_handler.rb +2 -2
- data/lib/dor/services/metadata_service.rb +20 -20
- data/lib/dor/services/registration_service.rb +26 -27
- data/lib/dor/services/reset_workspace_service.rb +15 -15
- data/lib/dor/services/sdr_ingest_service.rb +4 -4
- data/lib/dor/services/search_service.rb +4 -9
- data/lib/dor/services/suri_service.rb +5 -5
- data/lib/dor/services/technical_metadata_service.rb +3 -2
- data/lib/dor/utils/ng_tidy.rb +9 -9
- data/lib/dor/utils/predicate_patch.rb +1 -1
- data/lib/dor/utils/solr_doc_helper.rb +13 -5
- data/lib/dor/version.rb +1 -1
- data/lib/dor/workflow/document.rb +28 -30
- data/lib/dor/workflow/graph.rb +36 -36
- data/lib/dor/workflow/process.rb +12 -12
- data/lib/tasks/dor.rake +1 -1
- data/lib/tasks/rdoc.rake +3 -3
- metadata +67 -76
- data/lib/dor/datastreams/geo2mods.xsl +0 -867
- data/lib/dor/models/discoverable.rb +0 -64
- data/lib/dor/models/releaseable.rb +0 -357
- data/lib/dor/services/indexing_service.rb +0 -64
- data/lib/dor/utils/sdr_client.rb +0 -23
- data/lib/dor/utils/utc_date_field_mapper.rb +0 -7
@@ -1,64 +0,0 @@
|
|
1
|
-
module Dor
|
2
|
-
module Discoverable
|
3
|
-
extend ActiveSupport::Concern
|
4
|
-
#index gryphondor fields
|
5
|
-
require 'stanford-mods'
|
6
|
-
def to_solr(solr_doc = Hash.new, *args)
|
7
|
-
super solr_doc, *args
|
8
|
-
|
9
|
-
if descMetadata && !descMetadata.new?
|
10
|
-
stanford_mods_record=Stanford::Mods::Record.new
|
11
|
-
stanford_mods_record.from_str(descMetadata.ng_xml.to_s)
|
12
|
-
doc_hash = {
|
13
|
-
:sw_format_facet => stanford_mods_record.format,
|
14
|
-
# title fields
|
15
|
-
:sw_title_245a_search_facet_facet => stanford_mods_record.sw_short_title,
|
16
|
-
:sw_title_245_search_facet_facet => stanford_mods_record.sw_full_title,
|
17
|
-
:sw_title_variant_search_facet_facet => stanford_mods_record.sw_addl_titles,
|
18
|
-
:sw_title_sort_facet => stanford_mods_record.sw_sort_title,
|
19
|
-
:sw_title_245a_display_facet => stanford_mods_record.sw_short_title,
|
20
|
-
:sw_title_display_facet => stanford_mods_record.sw_full_title,
|
21
|
-
:sw_title_full_display_facet => stanford_mods_record.sw_full_title,
|
22
|
-
|
23
|
-
# author fields
|
24
|
-
:sw_author_1xx_search_facet_facet => stanford_mods_record.sw_main_author,
|
25
|
-
:sw_author_7xx_search_facet_facet => stanford_mods_record.sw_addl_authors,
|
26
|
-
:sw_author_person_facet_facet => stanford_mods_record.sw_person_authors,
|
27
|
-
:sw_author_other_facet_facet => stanford_mods_record.sw_impersonal_authors,
|
28
|
-
#:sw_author_sort_facet => stanford_mods_record.sw_sort_author,
|
29
|
-
:sw_author_corp_display_facet => stanford_mods_record.sw_corporate_authors,
|
30
|
-
:sw_author_meeting_display_facet => stanford_mods_record.sw_meeting_authors,
|
31
|
-
:sw_author_person_display_facet => stanford_mods_record.sw_person_authors,
|
32
|
-
:sw_author_person_full_display_facet => stanford_mods_record.sw_person_authors,
|
33
|
-
|
34
|
-
# subject search fields
|
35
|
-
:sw_topic_search_facet_facet => stanford_mods_record.topic_search,
|
36
|
-
:sw_geographic_search_facet_facet => stanford_mods_record.geographic_search,
|
37
|
-
:sw_subject_other_search_facet_facet => stanford_mods_record.subject_other_search,
|
38
|
-
:sw_subject_other_subvy_search_facet_facet => stanford_mods_record.subject_other_subvy_search,
|
39
|
-
:sw_subject_all_search_facet_facet => stanford_mods_record.subject_all_search,
|
40
|
-
:sw_topic_facet_facet => stanford_mods_record.topic_facet,
|
41
|
-
:sw_geographic_facet_facet => stanford_mods_record.geographic_facet,
|
42
|
-
:sw_era_facet_facet => stanford_mods_record.era_facet,
|
43
|
-
|
44
|
-
:sw_language_facet => stanford_mods_record.sw_language_facet,
|
45
|
-
#:sw_physical => stanford_mods_record.term_values([:sw_physical_description, :sw_extent]),
|
46
|
-
#:sw_summary_search_facet_facet => stanford_mods_record.term_values(:sw_abstract),
|
47
|
-
#:sw_toc_search_facet_facet => stanford_mods_record.term_values(:sw_tableOfContents),
|
48
|
-
#:sw_url_suppl => stanford_mods_record.term_values([:sw_related_item, :sw_location, :sw_url]),
|
49
|
-
|
50
|
-
#publish date fields
|
51
|
-
:sw_pub_search_facet_facet => stanford_mods_record.place,
|
52
|
-
:sw_pub_date_sort_facet => stanford_mods_record.pub_date_sort,
|
53
|
-
:sw_pub_date_group_facet_facet => stanford_mods_record.pub_date_groups(stanford_mods_record.pub_date),
|
54
|
-
:sw_pub_date_facet =>stanford_mods_record.pub_date_facet,
|
55
|
-
:sw_pub_date_display_facet => stanford_mods_record.pub_date_display,
|
56
|
-
:sw_all_search_facet_facet => stanford_mods_record.text
|
57
|
-
|
58
|
-
}
|
59
|
-
solr_doc.merge!(doc_hash) if doc_hash
|
60
|
-
end
|
61
|
-
solr_doc
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,357 +0,0 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
require 'retries'
|
3
|
-
|
4
|
-
module Dor
|
5
|
-
module Releaseable
|
6
|
-
extend ActiveSupport::Concern
|
7
|
-
include Itemizable
|
8
|
-
|
9
|
-
# Add release tags to an item and initialize the item release workflow
|
10
|
-
# @param release_tags [Hash or Array] Either a hash of a single release tag. Each tag should be in the form of {:tag=>'Fitch : Batch2',:what=>'self',:to=>'Searchworks',:who=>'petucket', :release=>true/false}
|
11
|
-
# @raise [ArgumentError] Raised if the tags are improperly supplied
|
12
|
-
def add_release_nodes_and_start_releaseWF(release_tags)
|
13
|
-
release_tags = [release_tags] unless release_tags.is_a?(Array)
|
14
|
-
|
15
|
-
# Add in each tag
|
16
|
-
release_tags.each do |r_tag|
|
17
|
-
add_release_node(r_tag[:release],r_tag)
|
18
|
-
end
|
19
|
-
|
20
|
-
# Save item to dor so the robots work with the latest data
|
21
|
-
save
|
22
|
-
initialize_workflow('releaseWF')
|
23
|
-
end
|
24
|
-
|
25
|
-
# Generate XML structure for inclusion to Purl
|
26
|
-
# @return [String] The XML release node as a string, with ReleaseDigest as the root document
|
27
|
-
def generate_release_xml
|
28
|
-
builder = Nokogiri::XML::Builder.new do |xml|
|
29
|
-
xml.releaseData {
|
30
|
-
released_for.each do |project, released_value|
|
31
|
-
xml.release(released_value['release'], :to => project)
|
32
|
-
end
|
33
|
-
}
|
34
|
-
end
|
35
|
-
builder.to_xml
|
36
|
-
end
|
37
|
-
|
38
|
-
# Determine which projects an item is released for
|
39
|
-
# @param [Boolean] skip_live_purl set true to skip requesting from purl backend
|
40
|
-
# @return [Hash{String => Boolean}] all namespaces, keys are Project name Strings, values are Boolean
|
41
|
-
def released_for(skip_live_purl = false)
|
42
|
-
released_hash = {}
|
43
|
-
# Get release tags on the item itself
|
44
|
-
release_tags_on_this_item = release_nodes
|
45
|
-
|
46
|
-
# Get any self tags on this item
|
47
|
-
self_release_tags = get_self_release_tags(release_tags_on_this_item)
|
48
|
-
|
49
|
-
# Get the most recent self tag for all targets and save their result since most recent self always trumps any other non self tags
|
50
|
-
latest_self_tags = get_newest_release_tag(self_release_tags)
|
51
|
-
latest_self_tags.keys.each do |target|
|
52
|
-
released_hash[target] = clean_release_tag_for_purl(latest_self_tags[target])
|
53
|
-
end
|
54
|
-
|
55
|
-
# With Self Tags Resolved We Now need to deal with tags on all sets this object is part of
|
56
|
-
potential_applicable_release_tags = {} # This will be where we store all tags that apply, regardless of their timestamp
|
57
|
-
|
58
|
-
# Get all release tags on the item and strip out the what = self ones, we've already processed all the self tags on this item
|
59
|
-
potential_applicable_release_tags = get_tags_for_what_value(get_release_tags_for_item_and_all_governing_sets, 'collection')
|
60
|
-
administrative_tags = tags # Get them once here and pass them down
|
61
|
-
|
62
|
-
# We now have the keys for all potential releases, we need to check the tags: the most recent timestamp with an explicit true or false wins.
|
63
|
-
# In a nil case, the lack of an explicit false tag we do nothing.
|
64
|
-
(potential_applicable_release_tags.keys - released_hash.keys).each do |key| # don't bother checking if already added to the release hash, they were added due to a self tag so that has won
|
65
|
-
latest_applicable_tag_for_key = latest_applicable_release_tag_in_array(potential_applicable_release_tags[key], administrative_tags)
|
66
|
-
next if latest_applicable_tag_for_key.nil? # We have a valid tag, record it
|
67
|
-
released_hash[key] = clean_release_tag_for_purl(latest_applicable_tag_for_key)
|
68
|
-
end
|
69
|
-
|
70
|
-
# See what the application is currently released for on Purl. If something is released in purl but not listed here, it needs to be added as a false
|
71
|
-
add_tags_from_purl(released_hash) unless skip_live_purl
|
72
|
-
|
73
|
-
released_hash
|
74
|
-
end
|
75
|
-
|
76
|
-
#Take a hash of tags as obtained via Dor::Item.release_tags and returns all self tags
|
77
|
-
#
|
78
|
-
#@param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
|
79
|
-
#
|
80
|
-
#@return [Hash] a hash of self tags for each to value
|
81
|
-
def get_self_release_tags(tags)
|
82
|
-
get_tags_for_what_value(tags, 'self')
|
83
|
-
end
|
84
|
-
|
85
|
-
#Take an item and get all of its release tags and all tags on collections it is a member of it
|
86
|
-
#
|
87
|
-
#
|
88
|
-
#@return [Hash] a hash of all tags
|
89
|
-
def get_release_tags_for_item_and_all_governing_sets
|
90
|
-
return_tags = release_nodes || {}
|
91
|
-
collections.each do |collection|
|
92
|
-
return_tags = combine_two_release_tag_hashes(return_tags, Dor::Item.find(collection.id).get_release_tags_for_item_and_all_governing_sets) # recurvise so parents of parents are found
|
93
|
-
end
|
94
|
-
return_tags
|
95
|
-
end
|
96
|
-
|
97
|
-
#Take two hashes of tags and combine them, will not overwrite but will enforce uniqueness of the tags
|
98
|
-
#
|
99
|
-
#@param hash_one [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
|
100
|
-
#@param hash_two [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
|
101
|
-
#
|
102
|
-
#@return [Hash] the combined hash with uniquiness enforced
|
103
|
-
def combine_two_release_tag_hashes(hash_one, hash_two)
|
104
|
-
hash_two.keys.each do |key|
|
105
|
-
hash_one[key] = hash_two[key] if hash_one[key].nil?
|
106
|
-
hash_one[key] = (hash_one[key] + hash_two[key]).uniq unless hash_one[key].nil?
|
107
|
-
end
|
108
|
-
hash_one
|
109
|
-
end
|
110
|
-
|
111
|
-
#Take a hash of tags and return all tags with the matching what target
|
112
|
-
#
|
113
|
-
#@param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
|
114
|
-
#@param what_target [String] the target for the 'what' key, self or collection
|
115
|
-
#
|
116
|
-
#@return [Hash] a hash of self tags for each to value
|
117
|
-
def get_tags_for_what_value(tags, what_target)
|
118
|
-
return_hash = {}
|
119
|
-
tags.keys.each do |key|
|
120
|
-
self_tags = tags[key].select {|tag| tag['what'] == what_target.downcase}
|
121
|
-
return_hash[key] = self_tags if self_tags.size > 0
|
122
|
-
end
|
123
|
-
return_hash
|
124
|
-
end
|
125
|
-
|
126
|
-
# Take a hash of tags as obtained via Dor::Item.release_tags and returns the newest tag for each namespace
|
127
|
-
# @param tags [Hash] a hash of tags obtained via Dor::Item.release_tags or matching format
|
128
|
-
# @return [Hash] a hash of latest tags for each to value
|
129
|
-
def get_newest_release_tag(tags)
|
130
|
-
Hash[tags.map {|key, val| [key, newest_release_tag_in_an_array(val)]}]
|
131
|
-
end
|
132
|
-
|
133
|
-
# Take a tag and return only the attributes we want to put into purl
|
134
|
-
# @param tag [Hash] a tag
|
135
|
-
# @return [Hash] a hash of the attributes we want for purl
|
136
|
-
def clean_release_tag_for_purl(tag)
|
137
|
-
{'release' => tag['release']}
|
138
|
-
end
|
139
|
-
|
140
|
-
# Takes an array of release tags and returns the most recent one
|
141
|
-
# @param tags [Array] an array of hashes, with the hashes being release tags
|
142
|
-
# @return [Hash] the most recent tag
|
143
|
-
def newest_release_tag_in_an_array(array_of_tags)
|
144
|
-
latest_tag_in_array = array_of_tags[0] || {}
|
145
|
-
array_of_tags.each do |tag|
|
146
|
-
latest_tag_in_array = tag if tag['when'] > latest_tag_in_array['when']
|
147
|
-
end
|
148
|
-
latest_tag_in_array
|
149
|
-
end
|
150
|
-
|
151
|
-
# Takes a tag and returns true or false if it applies to the specific item
|
152
|
-
# @param release_tag [Hash] the tag in a hashed form
|
153
|
-
# @param admin_tags [Array] the administrative tags on an item, if not supplied it will attempt to retrieve them
|
154
|
-
# @return [Boolean] true or false if it applies (not true or false if it is released, that is the release_tag data)
|
155
|
-
def does_release_tag_apply(release_tag, admin_tags = false)
|
156
|
-
# Is the tag global or restricted
|
157
|
-
return true if release_tag['tag'].nil? # no specific tag specificied means this tag is global to all members of the collection
|
158
|
-
admin_tags = tags unless admin_tags # We use false instead of [], since an item can have no admin_tags at which point we'd be passing this var as [] and would not attempt to retrieve it
|
159
|
-
admin_tags.include?(release_tag['tag'])
|
160
|
-
end
|
161
|
-
|
162
|
-
#Takes an array of release tags and returns the most recent one that applies to this item
|
163
|
-
#
|
164
|
-
#@param release_tags [Array] an array of release tags in hashed form
|
165
|
-
#param admin_tags [Array] the administrative tags on an on item
|
166
|
-
#
|
167
|
-
#@return [Hash] the tag
|
168
|
-
def latest_applicable_release_tag_in_array(release_tags, admin_tags)
|
169
|
-
newest_tag = newest_release_tag_in_an_array(release_tags)
|
170
|
-
return newest_tag if does_release_tag_apply(newest_tag, admin_tags)
|
171
|
-
|
172
|
-
# The latest tag wasn't applicable, slice it off and try again
|
173
|
-
# This could be optimized by reordering on the timestamp and just running down it instead of constantly resorting, at least if we end up getting numerous release tags on an item
|
174
|
-
release_tags.slice!(release_tags.index(newest_tag))
|
175
|
-
|
176
|
-
return latest_applicable_release_tag_in_array(release_tags, admin_tags) if release_tags.size > 0 # Try again after dropping the inapplicable
|
177
|
-
nil # We're out of tags, no applicable ones
|
178
|
-
end
|
179
|
-
|
180
|
-
#helper method to get the release tags as a nodeset
|
181
|
-
#
|
182
|
-
#@return [Nokogiri::XML::NodeSet] of all release tags and their attributes
|
183
|
-
def release_tags
|
184
|
-
release_tags = identityMetadata.ng_xml.xpath('//release')
|
185
|
-
return_hash = {}
|
186
|
-
release_tags.each do |release_tag|
|
187
|
-
hashed_node = release_tag_node_to_hash(release_tag)
|
188
|
-
if !return_hash[hashed_node[:to]].nil?
|
189
|
-
return_hash[hashed_node[:to]] << hashed_node[:attrs]
|
190
|
-
else
|
191
|
-
return_hash[hashed_node[:to]] = [hashed_node[:attrs]]
|
192
|
-
end
|
193
|
-
end
|
194
|
-
return_hash
|
195
|
-
end
|
196
|
-
|
197
|
-
# Method to convert one release element into an array
|
198
|
-
# @param rtag [Nokogiri::XML::Element] the release tag element
|
199
|
-
# @return [Hash] in the form of {:to => String :attrs = Hash}
|
200
|
-
def release_tag_node_to_hash(rtag)
|
201
|
-
to = 'to'
|
202
|
-
release = 'release'
|
203
|
-
when_word = 'when' # TODO: Make to and when_word load from some config file instead of hardcoded here
|
204
|
-
attrs = rtag.attributes
|
205
|
-
return_hash = { :to => attrs[to].value }
|
206
|
-
attrs.tap { |a| a.delete(to) }
|
207
|
-
attrs[release] = rtag.text.downcase == 'true' #save release as a boolean
|
208
|
-
return_hash[:attrs] = attrs
|
209
|
-
|
210
|
-
# convert all the attrs beside :to to strings, they are currently Nokogiri::XML::Attr
|
211
|
-
(return_hash[:attrs].keys - [to]).each do |a|
|
212
|
-
return_hash[:attrs][a] = return_hash[:attrs][a].to_s if a != release
|
213
|
-
end
|
214
|
-
|
215
|
-
return_hash[:attrs][when_word] = Time.parse(return_hash[:attrs][when_word]) #convert when to a datetime
|
216
|
-
return_hash
|
217
|
-
end
|
218
|
-
|
219
|
-
# Add a release node for the item
|
220
|
-
# Will use the current time to add in the timestamp if you do not supply a timestamp, you can supply a timestap for correcting history, etc if desired
|
221
|
-
# @param tag [Boolean] True or false for the release node
|
222
|
-
# @param attrs [hash] A hash of any attributes to be placed onto the tag
|
223
|
-
# Timestamp will be calculated by the function, if no displayType is passed in, it will default to file
|
224
|
-
# @return [Nokogiri::XML::Element] the tag added if successful
|
225
|
-
# @raise [ArgumentError] Raised if attributes are improperly supplied
|
226
|
-
# @example
|
227
|
-
# item.add_tag(true,:release,{:tag=>'Fitch : Batch2',:what=>'self',:to=>'Searchworks',:who=>'petucket', :displayType='filmstrip'})
|
228
|
-
def add_release_node(release, attrs = {})
|
229
|
-
identity_metadata_ds = identityMetadata
|
230
|
-
attrs[:when] = Time.now.utc.iso8601 if attrs[:when].nil? #add the timestamp
|
231
|
-
attrs[:displayType] = 'file' if attrs[:displayType].nil? #default to file is no display type is passed
|
232
|
-
valid_release_attributes(release, attrs)
|
233
|
-
|
234
|
-
# Remove the old displayType and then add the one for this tag
|
235
|
-
remove_displayTypes
|
236
|
-
identity_metadata_ds.add_value(:displayType, attrs[:displayType], {})
|
237
|
-
identity_metadata_ds.add_value(:release, release.to_s, attrs)
|
238
|
-
end
|
239
|
-
|
240
|
-
# Determine if the supplied tag is a valid release node that meets all requirements
|
241
|
-
# @raises [ArgumentError] Raises an error of the first fault in the release tag
|
242
|
-
# @return [Boolean] Returns true if no errors found
|
243
|
-
# @param attrs [hash] A hash of attributes for the tag, must contain :when, a ISO 8601 timestamp and :who to identify who or what added the tag, :to,
|
244
|
-
def valid_release_attributes(tag, attrs = {})
|
245
|
-
raise ArgumentError, ":when is not iso8601" if attrs[:when].match('\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z').nil?
|
246
|
-
[:who, :to, :what].each do |check_attr|
|
247
|
-
raise ArgumentError, "#{check_attr} not supplied as a String" if attrs[check_attr].class != String
|
248
|
-
end
|
249
|
-
|
250
|
-
what_correct = false
|
251
|
-
['self', 'collection'].each do |allowed_what_value|
|
252
|
-
what_correct = true if attrs[:what] == allowed_what_value
|
253
|
-
end
|
254
|
-
raise ArgumentError, ':what must be self or collection' unless what_correct
|
255
|
-
raise ArgumentError, 'the value set for this tag is not a boolean' if !!tag != tag
|
256
|
-
raise ArgumentError, ':displayType must be passed in as a String' unless attrs[:displayType].class == String
|
257
|
-
|
258
|
-
validate_tag_format(attrs[:tag]) unless attrs[:tag].nil? # Will Raise exception if invalid tag
|
259
|
-
true
|
260
|
-
end
|
261
|
-
|
262
|
-
# Helper method to get the release nodes as a nodeset
|
263
|
-
# @return [Nokogiri::XML::NodeSet] of all release tags and their attributes
|
264
|
-
def release_nodes
|
265
|
-
release_tags = identityMetadata.ng_xml.xpath('//release')
|
266
|
-
return_hash = {}
|
267
|
-
release_tags.each do |release_tag|
|
268
|
-
hashed_node = release_tag_node_to_hash(release_tag)
|
269
|
-
if !return_hash[hashed_node[:to]].nil?
|
270
|
-
return_hash[hashed_node[:to]] << hashed_node[:attrs]
|
271
|
-
else
|
272
|
-
return_hash[hashed_node[:to]] = [hashed_node[:attrs]]
|
273
|
-
end
|
274
|
-
end
|
275
|
-
return_hash
|
276
|
-
end
|
277
|
-
|
278
|
-
# Get a list of all release nodes found in a purl document, fetches purl xml for a druid
|
279
|
-
# @param druid [String]
|
280
|
-
# @raises [OpenURI::HTTPError]
|
281
|
-
# @return [Nokogiri::HTML::Document] the parsed xml for the druid or an empty document if no purl is found
|
282
|
-
def get_xml_from_purl
|
283
|
-
url = form_purl_url
|
284
|
-
handler = Proc.new do |exception, attempt_number, total_delay|
|
285
|
-
# We assume a 404 means the document has never been published before and thus has no purl
|
286
|
-
Dor.logger.warn "[Attempt #{attempt_number}] GET #{url} -- #{exception.class}: #{exception.message}; #{total_delay} seconds elapsed."
|
287
|
-
raise exception unless exception.is_a? OpenURI::HTTPError
|
288
|
-
return Nokogiri::HTML::Document.new if exception.io.status[0] == '404'
|
289
|
-
end
|
290
|
-
|
291
|
-
with_retries(:max_retries => 3, :base_sleep_seconds => 3, :max_sleep_seconds => 5, :handler => handler) do |attempt|
|
292
|
-
# If you change the method used for opening the webpage, you can change the :rescue param to handle the new method's errors
|
293
|
-
Dor.logger.info "[Attempt #{attempt}] GET #{url}"
|
294
|
-
return Nokogiri::HTML(OpenURI.open_uri(url))
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
#Since purl does not use the druid: prefix but much of dor does, use this function to strip the druid: if needed
|
299
|
-
#
|
300
|
-
#@return [String] the druid sans the druid: or if there was no druid: prefix, the entire string you passed
|
301
|
-
def remove_druid_prefix
|
302
|
-
druid_prefix = "druid:"
|
303
|
-
return id.split(druid_prefix)[1] if id.split(druid_prefix).size > 1
|
304
|
-
druid
|
305
|
-
end
|
306
|
-
|
307
|
-
# Take the and create the entire purl url that will usable for the open method in open-uri, returns https
|
308
|
-
# @return [String] the full url
|
309
|
-
def form_purl_url
|
310
|
-
'https://' + Dor::Config.stacks.document_cache_host + "/#{remove_druid_prefix}.xml"
|
311
|
-
end
|
312
|
-
|
313
|
-
# Pull all release nodes from the public xml obtained via the purl query
|
314
|
-
# @param druid [Nokogiri::HTML::Document] The druid of the object you want
|
315
|
-
# @return [Array] An array containing all the release tags
|
316
|
-
def get_release_tags_from_purl_xml(doc)
|
317
|
-
nodes = doc.xpath('//html/body/publicobject/releasedata').children
|
318
|
-
# We only want the nodes with a name that isn't text
|
319
|
-
nodes.reject {|n| n.name.nil? || n.name.downcase == 'text'}.map {|n| n.attr('to')}.uniq
|
320
|
-
end
|
321
|
-
|
322
|
-
# Pull all release nodes from the public xml obtained via the purl query
|
323
|
-
# @return [Array] An array containing all the release tags
|
324
|
-
def get_release_tags_from_purl
|
325
|
-
xml = get_xml_from_purl
|
326
|
-
get_release_tags_from_purl_xml(xml)
|
327
|
-
end
|
328
|
-
|
329
|
-
# This function calls purl and gets a list of all release tags currently in purl. It then compares to the list you have generated.
|
330
|
-
# Any tag that is on purl, but not in the newly generated list is added to the new list with a value of false.
|
331
|
-
# @param new_tags [Hash{String => Boolean}] all new tags in the form of !{"Project" => Boolean}
|
332
|
-
# @return [Hash], a hash in the same form as new_tags, with all missing tags not in new_tags, but in current_tag_names, added in with a Boolean value of false
|
333
|
-
def add_tags_from_purl(new_tags)
|
334
|
-
tags_currently_in_purl = get_release_tags_from_purl
|
335
|
-
missing_tags = tags_currently_in_purl.map(&:downcase) - new_tags.keys.map(&:downcase)
|
336
|
-
missing_tags.each do |missing_tag|
|
337
|
-
new_tags[missing_tag.capitalize] = {"release"=>false}
|
338
|
-
end
|
339
|
-
new_tags
|
340
|
-
end
|
341
|
-
|
342
|
-
def to_solr(solr_doc = {}, *args)
|
343
|
-
super(solr_doc, *args)
|
344
|
-
|
345
|
-
# TODO: sort of worried about the performance impact in bulk reindex
|
346
|
-
# situations, since released_for recurses all parent collections. jmartin 2015-07-14
|
347
|
-
released_for(true).each { |key, val|
|
348
|
-
add_solr_value(solr_doc, 'released_to', key, :symbol, []) if val
|
349
|
-
}
|
350
|
-
|
351
|
-
# TODO: need to solrize whether item is released to purl? does released_for return that?
|
352
|
-
# logic is: "True when there is a published lifecycle and Access Rights is anything but Dark"
|
353
|
-
|
354
|
-
solr_doc
|
355
|
-
end
|
356
|
-
end
|
357
|
-
end
|
@@ -1,64 +0,0 @@
|
|
1
|
-
module Dor
|
2
|
-
class IndexingService
|
3
|
-
##
|
4
|
-
# Returns a Logger instance for recording info about indexing attempts
|
5
|
-
# @yield attempt to execute 'entry_id_block' and use the result as an extra identifier for the log
|
6
|
-
# entry. a placeholder will be used otherwise. 'request.uuid' might be useful in a Rails app.
|
7
|
-
def self.generate_index_logger(&entry_id_block)
|
8
|
-
index_logger = Logger.new(Config.indexing_svc.log, Config.indexing_svc.log_rotation_interval)
|
9
|
-
index_logger.formatter = proc do |severity, datetime, progname, msg|
|
10
|
-
date_format_str = Config.indexing_svc.log_date_format_str
|
11
|
-
entry_id = begin entry_id_block.call rescue '---' end
|
12
|
-
"[#{entry_id}] [#{datetime.utc.strftime(date_format_str)}] #{msg}\n"
|
13
|
-
end
|
14
|
-
index_logger
|
15
|
-
end
|
16
|
-
|
17
|
-
# memoize the loggers we create in a hash, init with a nil default logger
|
18
|
-
@@loggers = { default: nil }
|
19
|
-
|
20
|
-
def self.default_index_logger
|
21
|
-
@@loggers[:default] ||= generate_index_logger
|
22
|
-
end
|
23
|
-
|
24
|
-
# takes a Dor object and indexes it to solr. doesn't commit automatically.
|
25
|
-
def self.reindex_object(obj)
|
26
|
-
solr_doc = obj.to_solr
|
27
|
-
Dor::SearchService.solr.add(solr_doc)
|
28
|
-
solr_doc
|
29
|
-
end
|
30
|
-
|
31
|
-
# retrieves a single Dor object by pid, indexes the object to solr, does some logging
|
32
|
-
# (will use a defualt logger if one is not provided). doesn't commit automatically.
|
33
|
-
#
|
34
|
-
# WARNING/TODO: the tests indicate that the "rescue Exception" block at the end will
|
35
|
-
# get skipped, and the thrown exception (e.g. SystemStackError) will not be logged. since
|
36
|
-
# that's the only consequence, and the exception bubbles up as we would want anyway, it
|
37
|
-
# doesn't seem worth blocking refactoring. see https://github.com/sul-dlss/dor-services/issues/156
|
38
|
-
# extra logging in this case would be nice, but centralized indexing that's otherwise
|
39
|
-
# fully functional is nicer.
|
40
|
-
def self.reindex_pid(pid, index_logger = nil, should_raise_errors = true)
|
41
|
-
index_logger ||= default_index_logger
|
42
|
-
obj = Dor.load_instance pid
|
43
|
-
solr_doc = reindex_object obj
|
44
|
-
index_logger.info "updated index for #{pid}"
|
45
|
-
solr_doc
|
46
|
-
rescue StandardError => se
|
47
|
-
if se.is_a? ActiveFedora::ObjectNotFoundError
|
48
|
-
index_logger.warn "failed to update index for #{pid}, object not found in Fedora"
|
49
|
-
else
|
50
|
-
index_logger.warn "failed to update index for #{pid}, unexpected StandardError, see main app log: #{se.backtrace}"
|
51
|
-
end
|
52
|
-
raise se if should_raise_errors
|
53
|
-
rescue Exception => ex
|
54
|
-
index_logger.error "failed to update index for #{pid}, unexpected Exception, see main app log: #{ex.backtrace}"
|
55
|
-
raise ex # don't swallow anything worse than StandardError
|
56
|
-
end
|
57
|
-
|
58
|
-
# given a list of pids, retrieve those objects from fedora, index each to solr, optionally commit
|
59
|
-
def self.reindex_pid_list(pid_list, should_commit = false)
|
60
|
-
pid_list.each { |pid| reindex_pid pid, nil, false } # use the default logger, don't let individual errors nuke the rest of the batch
|
61
|
-
ActiveFedora.solr.conn.commit if should_commit
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|