dor_indexing 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,359 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Helper for MARC country codes
5
+ # rubocop:disable Metrics/ClassLength
6
+ class MarcCountry
7
+ MARC_COUNTRY_URI = 'http://id.loc.gov/vocabulary/countries/'
8
+
9
+ MARC_COUNTRY_CODE = 'marccountry'
10
+
11
+ COUNTRY_CODES = { # rubocop:disable Metrics/CollectionLiteralLength
12
+ 'aa' => 'Albania',
13
+ 'abc' => 'Alberta',
14
+ 'aca' => 'Australian Capital Territory',
15
+ 'ae' => 'Algeria',
16
+ 'af' => 'Afghanistan',
17
+ 'ag' => 'Argentina',
18
+ 'ai' => 'Armenia (Republic)',
19
+ 'aj' => 'Azerbaijan',
20
+ 'aku' => 'Alaska',
21
+ 'alu' => 'Alabama',
22
+ 'am' => 'Anguilla',
23
+ 'an' => 'Andorra',
24
+ 'ao' => 'Angola',
25
+ 'aq' => 'Antigua and Barbuda',
26
+ 'aru' => 'Arkansas',
27
+ 'as' => 'American Samoa',
28
+ 'at' => 'Australia',
29
+ 'au' => 'Austria',
30
+ 'aw' => 'Aruba',
31
+ 'ay' => 'Antarctica',
32
+ 'azu' => 'Arizona',
33
+ 'ba' => 'Bahrain',
34
+ 'bb' => 'Barbados',
35
+ 'bcc' => 'British Columbia',
36
+ 'bd' => 'Burundi',
37
+ 'be' => 'Belgium',
38
+ 'bf' => 'Bahamas',
39
+ 'bg' => 'Bangladesh',
40
+ 'bh' => 'Belize',
41
+ 'bi' => 'British Indian Ocean Territory',
42
+ 'bl' => 'Brazil',
43
+ 'bm' => 'Bermuda Islands',
44
+ 'bn' => 'Bosnia and Herzegovina',
45
+ 'bo' => 'Bolivia',
46
+ 'bp' => 'Solomon Islands',
47
+ 'br' => 'Burma',
48
+ 'bs' => 'Botswana',
49
+ 'bt' => 'Bhutan',
50
+ 'bu' => 'Bulgaria',
51
+ 'bv' => 'Bouvet Island',
52
+ 'bw' => 'Belarus',
53
+ 'bx' => 'Brunei',
54
+ 'ca' => 'Caribbean Netherlands',
55
+ 'cau' => 'California',
56
+ 'cb' => 'Cambodia',
57
+ 'cc' => 'China',
58
+ 'cd' => 'Chad',
59
+ 'ce' => 'Sri Lanka',
60
+ 'cf' => 'Congo (Brazzaville)',
61
+ 'cg' => 'Congo (Democratic Republic)',
62
+ 'ch' => 'China (Republic : 1949- )',
63
+ 'ci' => 'Croatia',
64
+ 'cj' => 'Cayman Islands',
65
+ 'ck' => 'Colombia',
66
+ 'cl' => 'Chile',
67
+ 'cm' => 'Cameroon',
68
+ 'co' => 'Curaçao',
69
+ 'cou' => 'Colorado',
70
+ 'cq' => 'Comoros',
71
+ 'cr' => 'Costa Rica',
72
+ 'ctu' => 'Connecticut',
73
+ 'cu' => 'Cuba',
74
+ 'cv' => 'Cabo Verde',
75
+ 'cw' => 'Cook Islands',
76
+ 'cx' => 'Central African Republic',
77
+ 'cy' => 'Cyprus',
78
+ 'dcu' => 'District of Columbia',
79
+ 'deu' => 'Delaware',
80
+ 'dk' => 'Denmark',
81
+ 'dm' => 'Benin',
82
+ 'dq' => 'Dominica',
83
+ 'dr' => 'Dominican Republic',
84
+ 'ea' => 'Eritrea',
85
+ 'ec' => 'Ecuador',
86
+ 'eg' => 'Equatorial Guinea',
87
+ 'em' => 'Timor-Leste',
88
+ 'enk' => 'England',
89
+ 'er' => 'Estonia',
90
+ 'es' => 'El Salvador',
91
+ 'et' => 'Ethiopia',
92
+ 'fa' => 'Faroe Islands',
93
+ 'fg' => 'French Guiana',
94
+ 'fi' => 'Finland',
95
+ 'fj' => 'Fiji',
96
+ 'fk' => 'Falkland Islands',
97
+ 'flu' => 'Florida',
98
+ 'fm' => 'Micronesia (Federated States)',
99
+ 'fp' => 'French Polynesia',
100
+ 'fr' => 'France',
101
+ 'fs' => 'Terres australes et antarctiques françaises',
102
+ 'ft' => 'Djibouti',
103
+ 'gau' => 'Georgia',
104
+ 'gb' => 'Kiribati',
105
+ 'gd' => 'Grenada',
106
+ 'gg' => 'Guernsey',
107
+ 'gh' => 'Ghana',
108
+ 'gi' => 'Gibraltar',
109
+ 'gl' => 'Greenland',
110
+ 'gm' => 'Gambia',
111
+ 'go' => 'Gabon',
112
+ 'gp' => 'Guadeloupe',
113
+ 'gr' => 'Greece',
114
+ 'gs' => 'Georgia (Republic)',
115
+ 'gt' => 'Guatemala',
116
+ 'gu' => 'Guam',
117
+ 'gv' => 'Guinea',
118
+ 'gw' => 'Germany',
119
+ 'gy' => 'Guyana',
120
+ 'gz' => 'Gaza Strip',
121
+ 'hiu' => 'Hawaii',
122
+ 'hm' => 'Heard and McDonald Islands',
123
+ 'ho' => 'Honduras',
124
+ 'ht' => 'Haiti',
125
+ 'hu' => 'Hungary',
126
+ 'iau' => 'Iowa',
127
+ 'ic' => 'Iceland',
128
+ 'idu' => 'Idaho',
129
+ 'ie' => 'Ireland',
130
+ 'ii' => 'India',
131
+ 'ilu' => 'Illinois',
132
+ 'im' => 'Isle of Man',
133
+ 'inu' => 'Indiana',
134
+ 'io' => 'Indonesia',
135
+ 'iq' => 'Iraq',
136
+ 'ir' => 'Iran',
137
+ 'is' => 'Israel',
138
+ 'it' => 'Italy',
139
+ 'iv' => "Côte d'Ivoire",
140
+ 'iy' => 'Iraq-Saudi Arabia Neutral Zone',
141
+ 'ja' => 'Japan',
142
+ 'je' => 'Jersey',
143
+ 'ji' => 'Johnston Atoll',
144
+ 'jm' => 'Jamaica',
145
+ 'jo' => 'Jordan',
146
+ 'ke' => 'Kenya',
147
+ 'kg' => 'Kyrgyzstan',
148
+ 'kn' => 'Korea (North)',
149
+ 'ko' => 'Korea (South)',
150
+ 'ksu' => 'Kansas',
151
+ 'ku' => 'Kuwait',
152
+ 'kv' => 'Kosovo',
153
+ 'kyu' => 'Kentucky',
154
+ 'kz' => 'Kazakhstan',
155
+ 'lau' => 'Louisiana',
156
+ 'lb' => 'Liberia',
157
+ 'le' => 'Lebanon',
158
+ 'lh' => 'Liechtenstein',
159
+ 'li' => 'Lithuania',
160
+ 'lo' => 'Lesotho',
161
+ 'ls' => 'Laos',
162
+ 'lu' => 'Luxembourg',
163
+ 'lv' => 'Latvia',
164
+ 'ly' => 'Libya',
165
+ 'mau' => 'Massachusetts',
166
+ 'mbc' => 'Manitoba',
167
+ 'mc' => 'Monaco',
168
+ 'mdu' => 'Maryland',
169
+ 'meu' => 'Maine',
170
+ 'mf' => 'Mauritius',
171
+ 'mg' => 'Madagascar',
172
+ 'miu' => 'Michigan',
173
+ 'mj' => 'Montserrat',
174
+ 'mk' => 'Oman',
175
+ 'ml' => 'Mali',
176
+ 'mm' => 'Malta',
177
+ 'mnu' => 'Minnesota',
178
+ 'mo' => 'Montenegro',
179
+ 'mou' => 'Missouri',
180
+ 'mp' => 'Mongolia',
181
+ 'mq' => 'Martinique',
182
+ 'mr' => 'Morocco',
183
+ 'msu' => 'Mississippi',
184
+ 'mtu' => 'Montana',
185
+ 'mu' => 'Mauritania',
186
+ 'mv' => 'Moldova',
187
+ 'mw' => 'Malawi',
188
+ 'mx' => 'Mexico',
189
+ 'my' => 'Malaysia',
190
+ 'mz' => 'Mozambique',
191
+ 'nbu' => 'Nebraska',
192
+ 'ncu' => 'North Carolina',
193
+ 'ndu' => 'North Dakota',
194
+ 'ne' => 'Netherlands',
195
+ 'nfc' => 'Newfoundland and Labrador',
196
+ 'ng' => 'Niger',
197
+ 'nhu' => 'New Hampshire',
198
+ 'nik' => 'Northern Ireland',
199
+ 'nju' => 'New Jersey',
200
+ 'nkc' => 'New Brunswick',
201
+ 'nl' => 'New Caledonia',
202
+ 'nmu' => 'New Mexico',
203
+ 'nn' => 'Vanuatu',
204
+ 'no' => 'Norway',
205
+ 'np' => 'Nepal',
206
+ 'nq' => 'Nicaragua',
207
+ 'nr' => 'Nigeria',
208
+ 'nsc' => 'Nova Scotia',
209
+ 'ntc' => 'Northwest Territories',
210
+ 'nu' => 'Nauru',
211
+ 'nuc' => 'Nunavut',
212
+ 'nvu' => 'Nevada',
213
+ 'nw' => 'Northern Mariana Islands',
214
+ 'nx' => 'Norfolk Island',
215
+ 'nyu' => 'New York (State)',
216
+ 'nz' => 'New Zealand',
217
+ 'ohu' => 'Ohio',
218
+ 'oku' => 'Oklahoma',
219
+ 'onc' => 'Ontario',
220
+ 'oru' => 'Oregon',
221
+ 'ot' => 'Mayotte',
222
+ 'pau' => 'Pennsylvania',
223
+ 'pc' => 'Pitcairn Island',
224
+ 'pe' => 'Peru',
225
+ 'pf' => 'Paracel Islands]',
226
+ 'pg' => 'Guinea-Bissau',
227
+ 'ph' => 'Philippines',
228
+ 'pic' => 'Prince Edward Island',
229
+ 'pk' => 'Pakistan',
230
+ 'pl' => 'Poland',
231
+ 'pn' => 'Panama',
232
+ 'po' => 'Portugal',
233
+ 'pp' => 'Papua New Guinea',
234
+ 'pr' => 'Puerto Rico',
235
+ 'pw' => 'Palau',
236
+ 'py' => 'Paraguay',
237
+ 'qa' => 'Qatar',
238
+ 'qea' => 'Queensland',
239
+ 'quc' => 'Québec (Province)',
240
+ 'rb' => 'Serbia',
241
+ 're' => 'Réunion',
242
+ 'rh' => 'Zimbabwe',
243
+ 'riu' => 'Rhode Island',
244
+ 'rm' => 'Romania',
245
+ 'ru' => 'Russia (Federation)',
246
+ 'rw' => 'Rwanda',
247
+ 'sa' => 'South Africa',
248
+ 'sc' => 'Saint-Barthélemy',
249
+ 'scu' => 'South Carolina',
250
+ 'sd' => 'South Sudan',
251
+ 'sdu' => 'South Dakota',
252
+ 'se' => 'Seychelles',
253
+ 'sf' => 'Sao Tome and Principe',
254
+ 'sg' => 'Senegal',
255
+ 'sh' => 'Spanish North Africa',
256
+ 'si' => 'Singapore',
257
+ 'sj' => 'Sudan',
258
+ 'sl' => 'Sierra Leone',
259
+ 'sm' => 'San Marino',
260
+ 'sn' => 'Sint Maarten',
261
+ 'snc' => 'Saskatchewan',
262
+ 'so' => 'Somalia',
263
+ 'sp' => 'Spain',
264
+ 'sq' => 'Eswatini',
265
+ 'sr' => 'Surinam',
266
+ 'ss' => 'Western Sahara',
267
+ 'st' => 'Saint-Martin',
268
+ 'stk' => 'Scotland',
269
+ 'su' => 'Saudi Arabia',
270
+ 'sw' => 'Sweden',
271
+ 'sx' => 'Namibia',
272
+ 'sy' => 'Syria',
273
+ 'sz' => 'Switzerland',
274
+ 'ta' => 'Tajikistan',
275
+ 'tc' => 'Turks and Caicos Islands',
276
+ 'tg' => 'Togo',
277
+ 'th' => 'Thailand',
278
+ 'ti' => 'Tunisia',
279
+ 'tk' => 'Turkmenistan',
280
+ 'tl' => 'Tokelau',
281
+ 'tma' => 'Tasmania',
282
+ 'tnu' => 'Tennessee',
283
+ 'to' => 'Tonga',
284
+ 'tr' => 'Trinidad and Tobago',
285
+ 'ts' => 'United Arab Emirates',
286
+ 'tu' => 'Turkey',
287
+ 'tv' => 'Tuvalu',
288
+ 'txu' => 'Texas',
289
+ 'tz' => 'Tanzania',
290
+ 'ua' => 'Egypt',
291
+ 'uc' => 'United States Misc. Caribbean Islands',
292
+ 'ug' => 'Uganda',
293
+ 'un' => 'Ukraine',
294
+ 'up' => 'United States Misc. Pacific Islands',
295
+ 'utu' => 'Utah',
296
+ 'uv' => 'Burkina Faso',
297
+ 'uy' => 'Uruguay',
298
+ 'uz' => 'Uzbekistan',
299
+ 'vau' => 'Virginia',
300
+ 'vb' => 'British Virgin Islands',
301
+ 'vc' => 'Vatican City',
302
+ 've' => 'Venezuela',
303
+ 'vi' => 'Virgin Islands of the United States',
304
+ 'vm' => 'Vietnam',
305
+ 'vp' => 'Various places',
306
+ 'vra' => 'Victoria',
307
+ 'vtu' => 'Vermont',
308
+ 'wau' => 'Washington (State)',
309
+ 'wea' => 'Western Australia',
310
+ 'wf' => 'Wallis and Futuna',
311
+ 'wiu' => 'Wisconsin',
312
+ 'wj' => 'West Bank of the Jordan River',
313
+ 'wk' => 'Wake Island',
314
+ 'wlk' => 'Wales',
315
+ 'ws' => 'Samoa',
316
+ 'wvu' => 'West Virginia',
317
+ 'wyu' => 'Wyoming',
318
+ 'xa' => 'Christmas Island (Indian Ocean)',
319
+ 'xb' => 'Cocos (Keeling) Islands',
320
+ 'xc' => 'Maldives',
321
+ 'xd' => 'Saint Kitts-Nevis',
322
+ 'xe' => 'Marshall Islands',
323
+ 'xf' => 'Midway Islands',
324
+ 'xga' => 'Coral Sea Islands Territory',
325
+ 'xh' => 'Niue',
326
+ 'xj' => 'Saint Helena',
327
+ 'xk' => 'Saint Lucia',
328
+ 'xl' => 'Saint Pierre and Miquelon',
329
+ 'xm' => 'Saint Vincent and the Grenadines',
330
+ 'xn' => 'North Macedonia',
331
+ 'xna' => 'New South Wales',
332
+ 'xo' => 'Slovakia',
333
+ 'xoa' => 'Northern Territory',
334
+ 'xp' => 'Spratly Island',
335
+ 'xr' => 'Czech Republic',
336
+ 'xra' => 'South Australia',
337
+ 'xs' => 'South Georgia and the South Sandwich Islands',
338
+ 'xv' => 'Slovenia',
339
+ 'xx' => '"No place, unknown, or undetermined"',
340
+ 'xxc' => 'Canada',
341
+ 'xxk' => 'United Kingdom',
342
+ 'xxu' => 'United States',
343
+ 'ye' => 'Yemen',
344
+ 'ykc' => 'Yukon Territory',
345
+ 'za' => 'Zambia'
346
+ }.freeze
347
+
348
+ def self.from_code(code)
349
+ COUNTRY_CODES[code]
350
+ end
351
+
352
+ def self.from_uri(uri)
353
+ return unless uri&.start_with?('http://id.loc.gov/vocabulary/countries/')
354
+
355
+ COUNTRY_CODES[uri[MARC_COUNTRY_URI.length..]]
356
+ end
357
+ end
358
+ # rubocop:enable Metrics/ClassLength
359
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Selectors
5
+ # Selects the best event to use for indexing
6
+ class EventSelector
7
+ # @param [Array<Cocina::Models::Event>] events
8
+ # @param [String] desired_date_type a string to match the date.type in a Cocina::Models::Event
9
+ # @return [Cocina::Models::Event, nil] event best matching selected
10
+ def self.select(events, desired_date_type)
11
+ date_type_matches_and_primary(events, desired_date_type) ||
12
+ date_and_event_type_match(events, desired_date_type) ||
13
+ event_type_matches_but_no_date_type(events, desired_date_type) ||
14
+ event_has_date_type(events, desired_date_type)
15
+ end
16
+
17
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
18
+ # @return [Boolean] true if date.status == primary
19
+ def self.date_status_primary(date)
20
+ structured_primary = Array(date.structuredValue).find do |structured_date|
21
+ structured_date.status == 'primary'
22
+ end
23
+
24
+ parallel_value_primary = Array(date.parallelValue).find do |parallel_value|
25
+ parallel_value.status == 'primary'
26
+ end
27
+
28
+ date.status == 'primary' || structured_primary || parallel_value_primary
29
+ end
30
+
31
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type and of status primary
32
+ def self.date_type_matches_and_primary(events, desired_date_type)
33
+ events.find do |event|
34
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
35
+ event_dates.flatten.compact.find do |date|
36
+ next if desired_date_type != date_type(date)
37
+
38
+ date_status_primary(date)
39
+ end
40
+ end
41
+ end
42
+ private_class_method :date_type_matches_and_primary
43
+
44
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type and the event has matching type
45
+ def self.date_and_event_type_match(events, desired_date_type)
46
+ events.find do |event|
47
+ next unless event_type_matches(event, desired_date_type)
48
+
49
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
50
+ event_dates.flatten.compact.find do |date|
51
+ desired_date_type == date_type(date)
52
+ end
53
+ end
54
+ end
55
+ private_class_method :date_and_event_type_match
56
+
57
+ # @return [Cocina::Models::Event, nil] event with type of desired_date_type and a date field without a type
58
+ def self.event_type_matches_but_no_date_type(events, desired_date_type)
59
+ events.find do |event|
60
+ next unless event_type_matches(event, desired_date_type)
61
+
62
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
63
+ event_dates.flatten.compact.find do |date|
64
+ date_type(date).nil?
65
+ end
66
+ end
67
+ end
68
+ private_class_method :event_type_matches_but_no_date_type
69
+
70
+ # @return [Cocina::Models::Event, nil] event with date of type desired_date_type
71
+ def self.event_has_date_type(events, desired_date_type)
72
+ events.find do |event|
73
+ event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
74
+ event_dates.flatten.compact.find do |date|
75
+ desired_date_type == date_type(date)
76
+ end
77
+ end
78
+ end
79
+ private_class_method :event_has_date_type
80
+
81
+ # @return [Boolean] true if event type matches or parallelEvent type matches the param
82
+ def self.event_type_matches(event, desired_type)
83
+ return true if event.type == desired_type
84
+
85
+ matching_event = event.parallelEvent&.find { |parallel_event| parallel_event.type == desired_type }
86
+ matching_event.present?
87
+ end
88
+ private_class_method :event_type_matches
89
+
90
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
91
+ # @return [String, nil] type from date object
92
+ # rubocop:disable Metrics/PerceivedComplexity
93
+ # rubocop:disable Metrics/CyclomaticComplexity
94
+ # rubocop:disable Metrics/AbcSize
95
+ def self.date_type(date)
96
+ return date.type if date&.type.present?
97
+
98
+ Array(date.structuredValue).find do |structured_value|
99
+ return structured_value.type if structured_value&.type.present?
100
+ end
101
+
102
+ Array(date.parallelValue).find do |parallel_value|
103
+ return parallel_value.type if parallel_value&.type.present?
104
+ end
105
+ end
106
+ # rubocop:enable Metrics/PerceivedComplexity
107
+ # rubocop:enable Metrics/CyclomaticComplexity
108
+ # rubocop:enable Metrics/AbcSize
109
+ private_class_method :date_type
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Selectors
5
+ # Finds the pub date to index from events
6
+ class PubYearSelector
7
+ # @param [Array<Cocina::Models::Events>] events
8
+ # @return [String] the year value for Solr
9
+ def self.build(events)
10
+ new(events).build
11
+ end
12
+
13
+ def initialize(events)
14
+ @events = events
15
+ end
16
+
17
+ def build
18
+ date = find_date
19
+ ParseDate.earliest_year(date).to_s if date.present?
20
+ end
21
+
22
+ # rubocop:disable Metrics/PerceivedComplexity
23
+ # rubocop:disable Metrics/CyclomaticComplexity
24
+ def find_date
25
+ primary_date(events) ||
26
+ EventDateBuilder.build(production_event, 'production') ||
27
+ EventDateBuilder.build(publication_event, 'publication') ||
28
+ EventDateBuilder.build(capture_event, 'capture') ||
29
+ EventDateBuilder.build(copyright_event, 'copyright') ||
30
+ creation_date ||
31
+ first_date ||
32
+ structured_dates(events) ||
33
+ find_date_in_parallel_events
34
+ end
35
+ # rubocop:enable Metrics/PerceivedComplexity
36
+ # rubocop:enable Metrics/CyclomaticComplexity
37
+
38
+ private
39
+
40
+ attr_reader :events
41
+
42
+ def find_date_in_parallel_events
43
+ parallel_events = events.flat_map(&:parallelEvent).compact
44
+ primary_date(parallel_events) ||
45
+ structured_dates(parallel_events)
46
+ end
47
+
48
+ def primary_date(eligible_events)
49
+ dates = eligible_events.flat_map(&:date).compact
50
+ return if dates.blank?
51
+
52
+ dates.find { |date| date.status == 'primary' }&.value
53
+ end
54
+
55
+ def first_date
56
+ dates = events.flat_map(&:date).compact
57
+ return if dates.blank?
58
+
59
+ date_value(dates.first)
60
+ end
61
+
62
+ # rubocop:disable Metrics/AbcSize
63
+ def date_value(date)
64
+ return date.value if date.value
65
+ return if date.parallelValue.blank?
66
+
67
+ primary = date.parallelValue.find { |val| val.status == 'primary' }
68
+ return primary.value if primary
69
+
70
+ structured_values = date.parallelValue.first.structuredValue
71
+ return find_within_structured_values(structured_values) if structured_values.present?
72
+
73
+ date.parallelValue.first.value
74
+ end
75
+ # rubocop:enable Metrics/AbcSize
76
+
77
+ def structured_dates(eligible_events)
78
+ dates = eligible_events.flat_map(&:date).compact
79
+ return if dates.blank?
80
+
81
+ structured_values = dates.first.structuredValue
82
+ return if structured_values.blank?
83
+
84
+ find_within_structured_values(structured_values)
85
+ end
86
+
87
+ def find_within_structured_values(structured_values)
88
+ primary = structured_values.find { |date| date.status == 'primary' }
89
+ return primary.value if primary
90
+
91
+ structured_values.first.value
92
+ end
93
+
94
+ def creation_date
95
+ @creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
96
+ end
97
+
98
+ def publication_event
99
+ @publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
100
+ end
101
+
102
+ def creation_event
103
+ @creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
104
+ end
105
+
106
+ def capture_event
107
+ @capture_event ||= DorIndexing::Selectors::EventSelector.select(events, 'capture')
108
+ end
109
+
110
+ def copyright_event
111
+ @copyright_event ||= DorIndexing::Selectors::EventSelector.select(events, 'copyright')
112
+ end
113
+
114
+ def production_event
115
+ @production_event ||= DorIndexing::Selectors::EventSelector.select(events, 'production')
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Model for workflow fields
5
+ class WorkflowFields
6
+ def self.for(druid:, version:, workflow_client:)
7
+ new(druid:, version:, workflow_client:).result
8
+ end
9
+
10
+ attr_reader :druid, :version, :workflow_client
11
+
12
+ def initialize(druid:, version:, workflow_client:)
13
+ @druid = druid
14
+ @version = version
15
+ @workflow_client = workflow_client
16
+ end
17
+
18
+ # @return [Hash] the partial solr document for processable concerns
19
+ def result
20
+ {}.tap do |solr_doc|
21
+ add_sortable_milestones(solr_doc)
22
+ add_status(solr_doc)
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def status_service
29
+ @status_service ||= workflow_client.status(druid:, version:)
30
+ end
31
+
32
+ def add_status(solr_doc)
33
+ # This is the status on the Argo show page (e.g. "v4 In accessioning (described, published, deposited)")
34
+ solr_doc['status_ssi'] = status_service.display
35
+ return unless status_service.info[:status_code]
36
+
37
+ # This is used for Argo's "Processing Status" facet
38
+ solr_doc['processing_status_text_ssi'] = status_service.display_simplified
39
+ end
40
+
41
+ def sortable_milestones
42
+ status_service.milestones.each_with_object({}) do |milestone, sortable|
43
+ sortable[milestone[:milestone]] ||= []
44
+ sortable[milestone[:milestone]] << milestone[:at].utc.xmlschema
45
+ end
46
+ end
47
+
48
+ def add_sortable_milestones(solr_doc)
49
+ sortable_milestones.each do |milestone, unordered_dates|
50
+ dates = unordered_dates.sort
51
+ # create the published_dttsi and published_day fields and the like
52
+ dates.each do |date|
53
+ solr_doc["#{milestone}_dttsim"] ||= []
54
+ solr_doc["#{milestone}_dttsim"] << date unless solr_doc["#{milestone}_dttsim"].include?(date)
55
+ end
56
+ # fields for OAI havester to sort on: _dttsi is trie date +stored +indexed (single valued, i.e. sortable)
57
+ # TODO: we really only need accessioned_earliest and registered_earliest
58
+ solr_doc["#{milestone}_earliest_dttsi"] = dates.first
59
+ solr_doc["#{milestone}_latest_dttsi"] = dates.last
60
+ end
61
+ end
62
+ end
63
+ end