dor_indexing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,359 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Helper for MARC country codes
5
+ # rubocop:disable Metrics/ClassLength
6
+ class MarcCountry
7
+ MARC_COUNTRY_URI = 'http://id.loc.gov/vocabulary/countries/'
8
+
9
+ MARC_COUNTRY_CODE = 'marccountry'
10
+
11
+ COUNTRY_CODES = { # rubocop:disable Metrics/CollectionLiteralLength
12
+ 'aa' => 'Albania',
13
+ 'abc' => 'Alberta',
14
+ 'aca' => 'Australian Capital Territory',
15
+ 'ae' => 'Algeria',
16
+ 'af' => 'Afghanistan',
17
+ 'ag' => 'Argentina',
18
+ 'ai' => 'Armenia (Republic)',
19
+ 'aj' => 'Azerbaijan',
20
+ 'aku' => 'Alaska',
21
+ 'alu' => 'Alabama',
22
+ 'am' => 'Anguilla',
23
+ 'an' => 'Andorra',
24
+ 'ao' => 'Angola',
25
+ 'aq' => 'Antigua and Barbuda',
26
+ 'aru' => 'Arkansas',
27
+ 'as' => 'American Samoa',
28
+ 'at' => 'Australia',
29
+ 'au' => 'Austria',
30
+ 'aw' => 'Aruba',
31
+ 'ay' => 'Antarctica',
32
+ 'azu' => 'Arizona',
33
+ 'ba' => 'Bahrain',
34
+ 'bb' => 'Barbados',
35
+ 'bcc' => 'British Columbia',
36
+ 'bd' => 'Burundi',
37
+ 'be' => 'Belgium',
38
+ 'bf' => 'Bahamas',
39
+ 'bg' => 'Bangladesh',
40
+ 'bh' => 'Belize',
41
+ 'bi' => 'British Indian Ocean Territory',
42
+ 'bl' => 'Brazil',
43
+ 'bm' => 'Bermuda Islands',
44
+ 'bn' => 'Bosnia and Herzegovina',
45
+ 'bo' => 'Bolivia',
46
+ 'bp' => 'Solomon Islands',
47
+ 'br' => 'Burma',
48
+ 'bs' => 'Botswana',
49
+ 'bt' => 'Bhutan',
50
+ 'bu' => 'Bulgaria',
51
+ 'bv' => 'Bouvet Island',
52
+ 'bw' => 'Belarus',
53
+ 'bx' => 'Brunei',
54
+ 'ca' => 'Caribbean Netherlands',
55
+ 'cau' => 'California',
56
+ 'cb' => 'Cambodia',
57
+ 'cc' => 'China',
58
+ 'cd' => 'Chad',
59
+ 'ce' => 'Sri Lanka',
60
+ 'cf' => 'Congo (Brazzaville)',
61
+ 'cg' => 'Congo (Democratic Republic)',
62
+ 'ch' => 'China (Republic : 1949- )',
63
+ 'ci' => 'Croatia',
64
+ 'cj' => 'Cayman Islands',
65
+ 'ck' => 'Colombia',
66
+ 'cl' => 'Chile',
67
+ 'cm' => 'Cameroon',
68
+ 'co' => 'Curaçao',
69
+ 'cou' => 'Colorado',
70
+ 'cq' => 'Comoros',
71
+ 'cr' => 'Costa Rica',
72
+ 'ctu' => 'Connecticut',
73
+ 'cu' => 'Cuba',
74
+ 'cv' => 'Cabo Verde',
75
+ 'cw' => 'Cook Islands',
76
+ 'cx' => 'Central African Republic',
77
+ 'cy' => 'Cyprus',
78
+ 'dcu' => 'District of Columbia',
79
+ 'deu' => 'Delaware',
80
+ 'dk' => 'Denmark',
81
+ 'dm' => 'Benin',
82
+ 'dq' => 'Dominica',
83
+ 'dr' => 'Dominican Republic',
84
+ 'ea' => 'Eritrea',
85
+ 'ec' => 'Ecuador',
86
+ 'eg' => 'Equatorial Guinea',
87
+ 'em' => 'Timor-Leste',
88
+ 'enk' => 'England',
89
+ 'er' => 'Estonia',
90
+ 'es' => 'El Salvador',
91
+ 'et' => 'Ethiopia',
92
+ 'fa' => 'Faroe Islands',
93
+ 'fg' => 'French Guiana',
94
+ 'fi' => 'Finland',
95
+ 'fj' => 'Fiji',
96
+ 'fk' => 'Falkland Islands',
97
+ 'flu' => 'Florida',
98
+ 'fm' => 'Micronesia (Federated States)',
99
+ 'fp' => 'French Polynesia',
100
+ 'fr' => 'France',
101
+ 'fs' => 'Terres australes et antarctiques françaises',
102
+ 'ft' => 'Djibouti',
103
+ 'gau' => 'Georgia',
104
+ 'gb' => 'Kiribati',
105
+ 'gd' => 'Grenada',
106
+ 'gg' => 'Guernsey',
107
+ 'gh' => 'Ghana',
108
+ 'gi' => 'Gibraltar',
109
+ 'gl' => 'Greenland',
110
+ 'gm' => 'Gambia',
111
+ 'go' => 'Gabon',
112
+ 'gp' => 'Guadeloupe',
113
+ 'gr' => 'Greece',
114
+ 'gs' => 'Georgia (Republic)',
115
+ 'gt' => 'Guatemala',
116
+ 'gu' => 'Guam',
117
+ 'gv' => 'Guinea',
118
+ 'gw' => 'Germany',
119
+ 'gy' => 'Guyana',
120
+ 'gz' => 'Gaza Strip',
121
+ 'hiu' => 'Hawaii',
122
+ 'hm' => 'Heard and McDonald Islands',
123
+ 'ho' => 'Honduras',
124
+ 'ht' => 'Haiti',
125
+ 'hu' => 'Hungary',
126
+ 'iau' => 'Iowa',
127
+ 'ic' => 'Iceland',
128
+ 'idu' => 'Idaho',
129
+ 'ie' => 'Ireland',
130
+ 'ii' => 'India',
131
+ 'ilu' => 'Illinois',
132
+ 'im' => 'Isle of Man',
133
+ 'inu' => 'Indiana',
134
+ 'io' => 'Indonesia',
135
+ 'iq' => 'Iraq',
136
+ 'ir' => 'Iran',
137
+ 'is' => 'Israel',
138
+ 'it' => 'Italy',
139
+ 'iv' => "Côte d'Ivoire",
140
+ 'iy' => 'Iraq-Saudi Arabia Neutral Zone',
141
+ 'ja' => 'Japan',
142
+ 'je' => 'Jersey',
143
+ 'ji' => 'Johnston Atoll',
144
+ 'jm' => 'Jamaica',
145
+ 'jo' => 'Jordan',
146
+ 'ke' => 'Kenya',
147
+ 'kg' => 'Kyrgyzstan',
148
+ 'kn' => 'Korea (North)',
149
+ 'ko' => 'Korea (South)',
150
+ 'ksu' => 'Kansas',
151
+ 'ku' => 'Kuwait',
152
+ 'kv' => 'Kosovo',
153
+ 'kyu' => 'Kentucky',
154
+ 'kz' => 'Kazakhstan',
155
+ 'lau' => 'Louisiana',
156
+ 'lb' => 'Liberia',
157
+ 'le' => 'Lebanon',
158
+ 'lh' => 'Liechtenstein',
159
+ 'li' => 'Lithuania',
160
+ 'lo' => 'Lesotho',
161
+ 'ls' => 'Laos',
162
+ 'lu' => 'Luxembourg',
163
+ 'lv' => 'Latvia',
164
+ 'ly' => 'Libya',
165
+ 'mau' => 'Massachusetts',
166
+ 'mbc' => 'Manitoba',
167
+ 'mc' => 'Monaco',
168
+ 'mdu' => 'Maryland',
169
+ 'meu' => 'Maine',
170
+ 'mf' => 'Mauritius',
171
+ 'mg' => 'Madagascar',
172
+ 'miu' => 'Michigan',
173
+ 'mj' => 'Montserrat',
174
+ 'mk' => 'Oman',
175
+ 'ml' => 'Mali',
176
+ 'mm' => 'Malta',
177
+ 'mnu' => 'Minnesota',
178
+ 'mo' => 'Montenegro',
179
+ 'mou' => 'Missouri',
180
+ 'mp' => 'Mongolia',
181
+ 'mq' => 'Martinique',
182
+ 'mr' => 'Morocco',
183
+ 'msu' => 'Mississippi',
184
+ 'mtu' => 'Montana',
185
+ 'mu' => 'Mauritania',
186
+ 'mv' => 'Moldova',
187
+ 'mw' => 'Malawi',
188
+ 'mx' => 'Mexico',
189
+ 'my' => 'Malaysia',
190
+ 'mz' => 'Mozambique',
191
+ 'nbu' => 'Nebraska',
192
+ 'ncu' => 'North Carolina',
193
+ 'ndu' => 'North Dakota',
194
+ 'ne' => 'Netherlands',
195
+ 'nfc' => 'Newfoundland and Labrador',
196
+ 'ng' => 'Niger',
197
+ 'nhu' => 'New Hampshire',
198
+ 'nik' => 'Northern Ireland',
199
+ 'nju' => 'New Jersey',
200
+ 'nkc' => 'New Brunswick',
201
+ 'nl' => 'New Caledonia',
202
+ 'nmu' => 'New Mexico',
203
+ 'nn' => 'Vanuatu',
204
+ 'no' => 'Norway',
205
+ 'np' => 'Nepal',
206
+ 'nq' => 'Nicaragua',
207
+ 'nr' => 'Nigeria',
208
+ 'nsc' => 'Nova Scotia',
209
+ 'ntc' => 'Northwest Territories',
210
+ 'nu' => 'Nauru',
211
+ 'nuc' => 'Nunavut',
212
+ 'nvu' => 'Nevada',
213
+ 'nw' => 'Northern Mariana Islands',
214
+ 'nx' => 'Norfolk Island',
215
+ 'nyu' => 'New York (State)',
216
+ 'nz' => 'New Zealand',
217
+ 'ohu' => 'Ohio',
218
+ 'oku' => 'Oklahoma',
219
+ 'onc' => 'Ontario',
220
+ 'oru' => 'Oregon',
221
+ 'ot' => 'Mayotte',
222
+ 'pau' => 'Pennsylvania',
223
+ 'pc' => 'Pitcairn Island',
224
+ 'pe' => 'Peru',
225
+ 'pf' => 'Paracel Islands]',
226
+ 'pg' => 'Guinea-Bissau',
227
+ 'ph' => 'Philippines',
228
+ 'pic' => 'Prince Edward Island',
229
+ 'pk' => 'Pakistan',
230
+ 'pl' => 'Poland',
231
+ 'pn' => 'Panama',
232
+ 'po' => 'Portugal',
233
+ 'pp' => 'Papua New Guinea',
234
+ 'pr' => 'Puerto Rico',
235
+ 'pw' => 'Palau',
236
+ 'py' => 'Paraguay',
237
+ 'qa' => 'Qatar',
238
+ 'qea' => 'Queensland',
239
+ 'quc' => 'Québec (Province)',
240
+ 'rb' => 'Serbia',
241
+ 're' => 'Réunion',
242
+ 'rh' => 'Zimbabwe',
243
+ 'riu' => 'Rhode Island',
244
+ 'rm' => 'Romania',
245
+ 'ru' => 'Russia (Federation)',
246
+ 'rw' => 'Rwanda',
247
+ 'sa' => 'South Africa',
248
+ 'sc' => 'Saint-Barthélemy',
249
+ 'scu' => 'South Carolina',
250
+ 'sd' => 'South Sudan',
251
+ 'sdu' => 'South Dakota',
252
+ 'se' => 'Seychelles',
253
+ 'sf' => 'Sao Tome and Principe',
254
+ 'sg' => 'Senegal',
255
+ 'sh' => 'Spanish North Africa',
256
+ 'si' => 'Singapore',
257
+ 'sj' => 'Sudan',
258
+ 'sl' => 'Sierra Leone',
259
+ 'sm' => 'San Marino',
260
+ 'sn' => 'Sint Maarten',
261
+ 'snc' => 'Saskatchewan',
262
+ 'so' => 'Somalia',
263
+ 'sp' => 'Spain',
264
+ 'sq' => 'Eswatini',
265
+ 'sr' => 'Surinam',
266
+ 'ss' => 'Western Sahara',
267
+ 'st' => 'Saint-Martin',
268
+ 'stk' => 'Scotland',
269
+ 'su' => 'Saudi Arabia',
270
+ 'sw' => 'Sweden',
271
+ 'sx' => 'Namibia',
272
+ 'sy' => 'Syria',
273
+ 'sz' => 'Switzerland',
274
+ 'ta' => 'Tajikistan',
275
+ 'tc' => 'Turks and Caicos Islands',
276
+ 'tg' => 'Togo',
277
+ 'th' => 'Thailand',
278
+ 'ti' => 'Tunisia',
279
+ 'tk' => 'Turkmenistan',
280
+ 'tl' => 'Tokelau',
281
+ 'tma' => 'Tasmania',
282
+ 'tnu' => 'Tennessee',
283
+ 'to' => 'Tonga',
284
+ 'tr' => 'Trinidad and Tobago',
285
+ 'ts' => 'United Arab Emirates',
286
+ 'tu' => 'Turkey',
287
+ 'tv' => 'Tuvalu',
288
+ 'txu' => 'Texas',
289
+ 'tz' => 'Tanzania',
290
+ 'ua' => 'Egypt',
291
+ 'uc' => 'United States Misc. Caribbean Islands',
292
+ 'ug' => 'Uganda',
293
+ 'un' => 'Ukraine',
294
+ 'up' => 'United States Misc. Pacific Islands',
295
+ 'utu' => 'Utah',
296
+ 'uv' => 'Burkina Faso',
297
+ 'uy' => 'Uruguay',
298
+ 'uz' => 'Uzbekistan',
299
+ 'vau' => 'Virginia',
300
+ 'vb' => 'British Virgin Islands',
301
+ 'vc' => 'Vatican City',
302
+ 've' => 'Venezuela',
303
+ 'vi' => 'Virgin Islands of the United States',
304
+ 'vm' => 'Vietnam',
305
+ 'vp' => 'Various places',
306
+ 'vra' => 'Victoria',
307
+ 'vtu' => 'Vermont',
308
+ 'wau' => 'Washington (State)',
309
+ 'wea' => 'Western Australia',
310
+ 'wf' => 'Wallis and Futuna',
311
+ 'wiu' => 'Wisconsin',
312
+ 'wj' => 'West Bank of the Jordan River',
313
+ 'wk' => 'Wake Island',
314
+ 'wlk' => 'Wales',
315
+ 'ws' => 'Samoa',
316
+ 'wvu' => 'West Virginia',
317
+ 'wyu' => 'Wyoming',
318
+ 'xa' => 'Christmas Island (Indian Ocean)',
319
+ 'xb' => 'Cocos (Keeling) Islands',
320
+ 'xc' => 'Maldives',
321
+ 'xd' => 'Saint Kitts-Nevis',
322
+ 'xe' => 'Marshall Islands',
323
+ 'xf' => 'Midway Islands',
324
+ 'xga' => 'Coral Sea Islands Territory',
325
+ 'xh' => 'Niue',
326
+ 'xj' => 'Saint Helena',
327
+ 'xk' => 'Saint Lucia',
328
+ 'xl' => 'Saint Pierre and Miquelon',
329
+ 'xm' => 'Saint Vincent and the Grenadines',
330
+ 'xn' => 'North Macedonia',
331
+ 'xna' => 'New South Wales',
332
+ 'xo' => 'Slovakia',
333
+ 'xoa' => 'Northern Territory',
334
+ 'xp' => 'Spratly Island',
335
+ 'xr' => 'Czech Republic',
336
+ 'xra' => 'South Australia',
337
+ 'xs' => 'South Georgia and the South Sandwich Islands',
338
+ 'xv' => 'Slovenia',
339
+ 'xx' => '"No place, unknown, or undetermined"',
340
+ 'xxc' => 'Canada',
341
+ 'xxk' => 'United Kingdom',
342
+ 'xxu' => 'United States',
343
+ 'ye' => 'Yemen',
344
+ 'ykc' => 'Yukon Territory',
345
+ 'za' => 'Zambia'
346
+ }.freeze
347
+
348
+ def self.from_code(code)
349
+ COUNTRY_CODES[code]
350
+ end
351
+
352
+ def self.from_uri(uri)
353
+ return unless uri&.start_with?('http://id.loc.gov/vocabulary/countries/')
354
+
355
+ COUNTRY_CODES[uri[MARC_COUNTRY_URI.length..]]
356
+ end
357
+ end
358
+ # rubocop:enable Metrics/ClassLength
359
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Selectors
5
+ # Selects the best event to use for indexing
6
+ class EventSelector
7
+ # @param [Array<Cocina::Models::Event>] events
8
+ # @param [String] desired_date_type a string to match the date.type in a Cocina::Models::Event
9
+ # @return [Cocina::Models::Event, nil] event best matching selected
10
+ def self.select(events, desired_date_type)
11
+ date_type_matches_and_primary(events, desired_date_type) ||
12
+ date_and_event_type_match(events, desired_date_type) ||
13
+ event_type_matches_but_no_date_type(events, desired_date_type) ||
14
+ event_has_date_type(events, desired_date_type)
15
+ end
16
+
17
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
18
+ # @return [Boolean] true if date.status == primary
19
+ def self.date_status_primary(date)
20
+ structured_primary = Array(date.structuredValue).find do |structured_date|
21
+ structured_date.status == 'primary'
22
+ end
23
+
24
+ parallel_value_primary = Array(date.parallelValue).find do |parallel_value|
25
+ parallel_value.status == 'primary'
26
+ end
27
+
28
+ date.status == 'primary' || structured_primary || parallel_value_primary
29
+ end
30
+
31
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type and of status primary
32
+ def self.date_type_matches_and_primary(events, desired_date_type)
33
+ events.find do |event|
34
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
35
+ event_dates.flatten.compact.find do |date|
36
+ next if desired_date_type != date_type(date)
37
+
38
+ date_status_primary(date)
39
+ end
40
+ end
41
+ end
42
+ private_class_method :date_type_matches_and_primary
43
+
44
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type and the event has matching type
45
+ def self.date_and_event_type_match(events, desired_date_type)
46
+ events.find do |event|
47
+ next unless event_type_matches(event, desired_date_type)
48
+
49
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
50
+ event_dates.flatten.compact.find do |date|
51
+ desired_date_type == date_type(date)
52
+ end
53
+ end
54
+ end
55
+ private_class_method :date_and_event_type_match
56
+
57
+ # @return [Cocina::Models::Event, nil] event with type of desired_date_type and a date field without a type
58
+ def self.event_type_matches_but_no_date_type(events, desired_date_type)
59
+ events.find do |event|
60
+ next unless event_type_matches(event, desired_date_type)
61
+
62
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
63
+ event_dates.flatten.compact.find do |date|
64
+ date_type(date).nil?
65
+ end
66
+ end
67
+ end
68
+ private_class_method :event_type_matches_but_no_date_type
69
+
70
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type
71
+ def self.event_has_date_type(events, desired_date_type)
72
+ events.find do |event|
73
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
74
+ event_dates.flatten.compact.find do |date|
75
+ desired_date_type == date_type(date)
76
+ end
77
+ end
78
+ end
79
+ private_class_method :event_has_date_type
80
+
81
+ # @return [Boolean] true if event type matches or parallelEvent type matches the param
82
+ def self.event_type_matches(event, desired_type)
83
+ return true if event.type == desired_type
84
+
85
+ matching_event = event.parallelEvent&.find { |parallel_event| parallel_event.type == desired_type }
86
+ matching_event.present?
87
+ end
88
+ private_class_method :event_type_matches
89
+
90
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
91
+ # @return [String, nil] type from date object
92
+ # rubocop:disable Metrics/PerceivedComplexity
93
+ # rubocop:disable Metrics/CyclomaticComplexity
94
+ # rubocop:disable Metrics/AbcSize
95
+ def self.date_type(date)
96
+ return date.type if date&.type.present?
97
+
98
+ Array(date.structuredValue).find do |structured_value|
99
+ return structured_value.type if structured_value&.type.present?
100
+ end
101
+
102
+ Array(date.parallelValue).find do |parallel_value|
103
+ return parallel_value.type if parallel_value&.type.present?
104
+ end
105
+ end
106
+ # rubocop:enable Metrics/PerceivedComplexity
107
+ # rubocop:enable Metrics/CyclomaticComplexity
108
+ # rubocop:enable Metrics/AbcSize
109
+ private_class_method :date_type
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Selectors
5
+ # Finds the pub date to index from events
6
+ class PubYearSelector
7
+ # @param [Array<Cocina::Models::Events>] events
8
+ # @return [String] the year value for Solr
9
+ def self.build(events)
10
+ new(events).build
11
+ end
12
+
13
+ def initialize(events)
14
+ @events = events
15
+ end
16
+
17
+ def build
18
+ date = find_date
19
+ ParseDate.earliest_year(date).to_s if date.present?
20
+ end
21
+
22
+ # rubocop:disable Metrics/PerceivedComplexity
23
+ # rubocop:disable Metrics/CyclomaticComplexity
24
+ def find_date
25
+ primary_date(events) ||
26
+ EventDateBuilder.build(production_event, 'production') ||
27
+ EventDateBuilder.build(publication_event, 'publication') ||
28
+ EventDateBuilder.build(capture_event, 'capture') ||
29
+ EventDateBuilder.build(copyright_event, 'copyright') ||
30
+ creation_date ||
31
+ first_date ||
32
+ structured_dates(events) ||
33
+ find_date_in_parallel_events
34
+ end
35
+ # rubocop:enable Metrics/PerceivedComplexity
36
+ # rubocop:enable Metrics/CyclomaticComplexity
37
+
38
+ private
39
+
40
+ attr_reader :events
41
+
42
+ def find_date_in_parallel_events
43
+ parallel_events = events.flat_map(&:parallelEvent).compact
44
+ primary_date(parallel_events) ||
45
+ structured_dates(parallel_events)
46
+ end
47
+
48
+ def primary_date(eligible_events)
49
+ dates = eligible_events.flat_map(&:date).compact
50
+ return if dates.blank?
51
+
52
+ dates.find { |date| date.status == 'primary' }&.value
53
+ end
54
+
55
+ def first_date
56
+ dates = events.flat_map(&:date).compact
57
+ return if dates.blank?
58
+
59
+ date_value(dates.first)
60
+ end
61
+
62
+ # rubocop:disable Metrics/AbcSize
63
+ def date_value(date)
64
+ return date.value if date.value
65
+ return if date.parallelValue.blank?
66
+
67
+ primary = date.parallelValue.find { |val| val.status == 'primary' }
68
+ return primary.value if primary
69
+
70
+ structured_values = date.parallelValue.first.structuredValue
71
+ return find_within_structured_values(structured_values) if structured_values.present?
72
+
73
+ date.parallelValue.first.value
74
+ end
75
+ # rubocop:enable Metrics/AbcSize
76
+
77
+ def structured_dates(eligible_events)
78
+ dates = eligible_events.flat_map(&:date).compact
79
+ return if dates.blank?
80
+
81
+ structured_values = dates.first.structuredValue
82
+ return if structured_values.blank?
83
+
84
+ find_within_structured_values(structured_values)
85
+ end
86
+
87
+ def find_within_structured_values(structured_values)
88
+ primary = structured_values.find { |date| date.status == 'primary' }
89
+ return primary.value if primary
90
+
91
+ structured_values.first.value
92
+ end
93
+
94
+ def creation_date
95
+ @creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
96
+ end
97
+
98
+ def publication_event
99
+ @publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
100
+ end
101
+
102
+ def creation_event
103
+ @creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
104
+ end
105
+
106
+ def capture_event
107
+ @capture_event ||= DorIndexing::Selectors::EventSelector.select(events, 'capture')
108
+ end
109
+
110
+ def copyright_event
111
+ @copyright_event ||= DorIndexing::Selectors::EventSelector.select(events, 'copyright')
112
+ end
113
+
114
+ def production_event
115
+ @production_event ||= DorIndexing::Selectors::EventSelector.select(events, 'production')
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Model for workflow fields
5
+ class WorkflowFields
6
+ def self.for(druid:, version:, workflow_client:)
7
+ new(druid:, version:, workflow_client:).result
8
+ end
9
+
10
+ attr_reader :druid, :version, :workflow_client
11
+
12
+ def initialize(druid:, version:, workflow_client:)
13
+ @druid = druid
14
+ @version = version
15
+ @workflow_client = workflow_client
16
+ end
17
+
18
+ # @return [Hash] the partial solr document for processable concerns
19
+ def result
20
+ {}.tap do |solr_doc|
21
+ add_sortable_milestones(solr_doc)
22
+ add_status(solr_doc)
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def status_service
29
+ @status_service ||= workflow_client.status(druid:, version:)
30
+ end
31
+
32
+ def add_status(solr_doc)
33
+ # This is the status on the Argo show page (e.g. "v4 In accessioning (described, published, deposited)")
34
+ solr_doc['status_ssi'] = status_service.display
35
+ return unless status_service.info[:status_code]
36
+
37
+ # This is used for Argo's "Processing Status" facet
38
+ solr_doc['processing_status_text_ssi'] = status_service.display_simplified
39
+ end
40
+
41
+ def sortable_milestones
42
+ status_service.milestones.each_with_object({}) do |milestone, sortable|
43
+ sortable[milestone[:milestone]] ||= []
44
+ sortable[milestone[:milestone]] << milestone[:at].utc.xmlschema
45
+ end
46
+ end
47
+
48
+ def add_sortable_milestones(solr_doc)
49
+ sortable_milestones.each do |milestone, unordered_dates|
50
+ dates = unordered_dates.sort
51
+ # create the published_dttsi and published_day fields and the like
52
+ dates.each do |date|
53
+ solr_doc["#{milestone}_dttsim"] ||= []
54
+ solr_doc["#{milestone}_dttsim"] << date unless solr_doc["#{milestone}_dttsim"].include?(date)
55
+ end
56
+ # fields for OAI havester to sort on: _dttsi is trie date +stored +indexed (single valued, i.e. sortable)
57
+ # TODO: we really only need accessioned_earliest and registered_earliest
58
+ solr_doc["#{milestone}_earliest_dttsi"] = dates.first
59
+ solr_doc["#{milestone}_latest_dttsi"] = dates.last
60
+ end
61
+ end
62
+ end
63
+ end