dor_indexing 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +355 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +218 -0
- data/README.md +33 -0
- data/Rakefile +11 -0
- data/dor_indexing.gemspec +40 -0
- data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
- data/lib/dor_indexing/builders/author_builder.rb +31 -0
- data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
- data/lib/dor_indexing/builders/document_builder.rb +106 -0
- data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
- data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
- data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
- data/lib/dor_indexing/builders/name_builder.rb +70 -0
- data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
- data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
- data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
- data/lib/dor_indexing/builders/topic_builder.rb +96 -0
- data/lib/dor_indexing/cocina_repository.rb +24 -0
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
- data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
- data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
- data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
- data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
- data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
- data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
- data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
- data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
- data/lib/dor_indexing/marc_country.rb +359 -0
- data/lib/dor_indexing/selectors/event_selector.rb +112 -0
- data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
- data/lib/dor_indexing/version.rb +5 -0
- data/lib/dor_indexing/workflow_fields.rb +63 -0
- data/lib/dor_indexing/workflow_solr_document.rb +93 -0
- data/lib/dor_indexing.rb +19 -0
- metadata +173 -0
@@ -0,0 +1,359 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Helper for MARC country codes
|
5
|
+
# rubocop:disable Metrics/ClassLength
|
6
|
+
class MarcCountry
|
7
|
+
MARC_COUNTRY_URI = 'http://id.loc.gov/vocabulary/countries/'
|
8
|
+
|
9
|
+
MARC_COUNTRY_CODE = 'marccountry'
|
10
|
+
|
11
|
+
COUNTRY_CODES = { # rubocop:disable Metrics/CollectionLiteralLength
|
12
|
+
'aa' => 'Albania',
|
13
|
+
'abc' => 'Alberta',
|
14
|
+
'aca' => 'Australian Capital Territory',
|
15
|
+
'ae' => 'Algeria',
|
16
|
+
'af' => 'Afghanistan',
|
17
|
+
'ag' => 'Argentina',
|
18
|
+
'ai' => 'Armenia (Republic)',
|
19
|
+
'aj' => 'Azerbaijan',
|
20
|
+
'aku' => 'Alaska',
|
21
|
+
'alu' => 'Alabama',
|
22
|
+
'am' => 'Anguilla',
|
23
|
+
'an' => 'Andorra',
|
24
|
+
'ao' => 'Angola',
|
25
|
+
'aq' => 'Antigua and Barbuda',
|
26
|
+
'aru' => 'Arkansas',
|
27
|
+
'as' => 'American Samoa',
|
28
|
+
'at' => 'Australia',
|
29
|
+
'au' => 'Austria',
|
30
|
+
'aw' => 'Aruba',
|
31
|
+
'ay' => 'Antarctica',
|
32
|
+
'azu' => 'Arizona',
|
33
|
+
'ba' => 'Bahrain',
|
34
|
+
'bb' => 'Barbados',
|
35
|
+
'bcc' => 'British Columbia',
|
36
|
+
'bd' => 'Burundi',
|
37
|
+
'be' => 'Belgium',
|
38
|
+
'bf' => 'Bahamas',
|
39
|
+
'bg' => 'Bangladesh',
|
40
|
+
'bh' => 'Belize',
|
41
|
+
'bi' => 'British Indian Ocean Territory',
|
42
|
+
'bl' => 'Brazil',
|
43
|
+
'bm' => 'Bermuda Islands',
|
44
|
+
'bn' => 'Bosnia and Herzegovina',
|
45
|
+
'bo' => 'Bolivia',
|
46
|
+
'bp' => 'Solomon Islands',
|
47
|
+
'br' => 'Burma',
|
48
|
+
'bs' => 'Botswana',
|
49
|
+
'bt' => 'Bhutan',
|
50
|
+
'bu' => 'Bulgaria',
|
51
|
+
'bv' => 'Bouvet Island',
|
52
|
+
'bw' => 'Belarus',
|
53
|
+
'bx' => 'Brunei',
|
54
|
+
'ca' => 'Caribbean Netherlands',
|
55
|
+
'cau' => 'California',
|
56
|
+
'cb' => 'Cambodia',
|
57
|
+
'cc' => 'China',
|
58
|
+
'cd' => 'Chad',
|
59
|
+
'ce' => 'Sri Lanka',
|
60
|
+
'cf' => 'Congo (Brazzaville)',
|
61
|
+
'cg' => 'Congo (Democratic Republic)',
|
62
|
+
'ch' => 'China (Republic : 1949- )',
|
63
|
+
'ci' => 'Croatia',
|
64
|
+
'cj' => 'Cayman Islands',
|
65
|
+
'ck' => 'Colombia',
|
66
|
+
'cl' => 'Chile',
|
67
|
+
'cm' => 'Cameroon',
|
68
|
+
'co' => 'Curaçao',
|
69
|
+
'cou' => 'Colorado',
|
70
|
+
'cq' => 'Comoros',
|
71
|
+
'cr' => 'Costa Rica',
|
72
|
+
'ctu' => 'Connecticut',
|
73
|
+
'cu' => 'Cuba',
|
74
|
+
'cv' => 'Cabo Verde',
|
75
|
+
'cw' => 'Cook Islands',
|
76
|
+
'cx' => 'Central African Republic',
|
77
|
+
'cy' => 'Cyprus',
|
78
|
+
'dcu' => 'District of Columbia',
|
79
|
+
'deu' => 'Delaware',
|
80
|
+
'dk' => 'Denmark',
|
81
|
+
'dm' => 'Benin',
|
82
|
+
'dq' => 'Dominica',
|
83
|
+
'dr' => 'Dominican Republic',
|
84
|
+
'ea' => 'Eritrea',
|
85
|
+
'ec' => 'Ecuador',
|
86
|
+
'eg' => 'Equatorial Guinea',
|
87
|
+
'em' => 'Timor-Leste',
|
88
|
+
'enk' => 'England',
|
89
|
+
'er' => 'Estonia',
|
90
|
+
'es' => 'El Salvador',
|
91
|
+
'et' => 'Ethiopia',
|
92
|
+
'fa' => 'Faroe Islands',
|
93
|
+
'fg' => 'French Guiana',
|
94
|
+
'fi' => 'Finland',
|
95
|
+
'fj' => 'Fiji',
|
96
|
+
'fk' => 'Falkland Islands',
|
97
|
+
'flu' => 'Florida',
|
98
|
+
'fm' => 'Micronesia (Federated States)',
|
99
|
+
'fp' => 'French Polynesia',
|
100
|
+
'fr' => 'France',
|
101
|
+
'fs' => 'Terres australes et antarctiques françaises',
|
102
|
+
'ft' => 'Djibouti',
|
103
|
+
'gau' => 'Georgia',
|
104
|
+
'gb' => 'Kiribati',
|
105
|
+
'gd' => 'Grenada',
|
106
|
+
'gg' => 'Guernsey',
|
107
|
+
'gh' => 'Ghana',
|
108
|
+
'gi' => 'Gibraltar',
|
109
|
+
'gl' => 'Greenland',
|
110
|
+
'gm' => 'Gambia',
|
111
|
+
'go' => 'Gabon',
|
112
|
+
'gp' => 'Guadeloupe',
|
113
|
+
'gr' => 'Greece',
|
114
|
+
'gs' => 'Georgia (Republic)',
|
115
|
+
'gt' => 'Guatemala',
|
116
|
+
'gu' => 'Guam',
|
117
|
+
'gv' => 'Guinea',
|
118
|
+
'gw' => 'Germany',
|
119
|
+
'gy' => 'Guyana',
|
120
|
+
'gz' => 'Gaza Strip',
|
121
|
+
'hiu' => 'Hawaii',
|
122
|
+
'hm' => 'Heard and McDonald Islands',
|
123
|
+
'ho' => 'Honduras',
|
124
|
+
'ht' => 'Haiti',
|
125
|
+
'hu' => 'Hungary',
|
126
|
+
'iau' => 'Iowa',
|
127
|
+
'ic' => 'Iceland',
|
128
|
+
'idu' => 'Idaho',
|
129
|
+
'ie' => 'Ireland',
|
130
|
+
'ii' => 'India',
|
131
|
+
'ilu' => 'Illinois',
|
132
|
+
'im' => 'Isle of Man',
|
133
|
+
'inu' => 'Indiana',
|
134
|
+
'io' => 'Indonesia',
|
135
|
+
'iq' => 'Iraq',
|
136
|
+
'ir' => 'Iran',
|
137
|
+
'is' => 'Israel',
|
138
|
+
'it' => 'Italy',
|
139
|
+
'iv' => "Côte d'Ivoire",
|
140
|
+
'iy' => 'Iraq-Saudi Arabia Neutral Zone',
|
141
|
+
'ja' => 'Japan',
|
142
|
+
'je' => 'Jersey',
|
143
|
+
'ji' => 'Johnston Atoll',
|
144
|
+
'jm' => 'Jamaica',
|
145
|
+
'jo' => 'Jordan',
|
146
|
+
'ke' => 'Kenya',
|
147
|
+
'kg' => 'Kyrgyzstan',
|
148
|
+
'kn' => 'Korea (North)',
|
149
|
+
'ko' => 'Korea (South)',
|
150
|
+
'ksu' => 'Kansas',
|
151
|
+
'ku' => 'Kuwait',
|
152
|
+
'kv' => 'Kosovo',
|
153
|
+
'kyu' => 'Kentucky',
|
154
|
+
'kz' => 'Kazakhstan',
|
155
|
+
'lau' => 'Louisiana',
|
156
|
+
'lb' => 'Liberia',
|
157
|
+
'le' => 'Lebanon',
|
158
|
+
'lh' => 'Liechtenstein',
|
159
|
+
'li' => 'Lithuania',
|
160
|
+
'lo' => 'Lesotho',
|
161
|
+
'ls' => 'Laos',
|
162
|
+
'lu' => 'Luxembourg',
|
163
|
+
'lv' => 'Latvia',
|
164
|
+
'ly' => 'Libya',
|
165
|
+
'mau' => 'Massachusetts',
|
166
|
+
'mbc' => 'Manitoba',
|
167
|
+
'mc' => 'Monaco',
|
168
|
+
'mdu' => 'Maryland',
|
169
|
+
'meu' => 'Maine',
|
170
|
+
'mf' => 'Mauritius',
|
171
|
+
'mg' => 'Madagascar',
|
172
|
+
'miu' => 'Michigan',
|
173
|
+
'mj' => 'Montserrat',
|
174
|
+
'mk' => 'Oman',
|
175
|
+
'ml' => 'Mali',
|
176
|
+
'mm' => 'Malta',
|
177
|
+
'mnu' => 'Minnesota',
|
178
|
+
'mo' => 'Montenegro',
|
179
|
+
'mou' => 'Missouri',
|
180
|
+
'mp' => 'Mongolia',
|
181
|
+
'mq' => 'Martinique',
|
182
|
+
'mr' => 'Morocco',
|
183
|
+
'msu' => 'Mississippi',
|
184
|
+
'mtu' => 'Montana',
|
185
|
+
'mu' => 'Mauritania',
|
186
|
+
'mv' => 'Moldova',
|
187
|
+
'mw' => 'Malawi',
|
188
|
+
'mx' => 'Mexico',
|
189
|
+
'my' => 'Malaysia',
|
190
|
+
'mz' => 'Mozambique',
|
191
|
+
'nbu' => 'Nebraska',
|
192
|
+
'ncu' => 'North Carolina',
|
193
|
+
'ndu' => 'North Dakota',
|
194
|
+
'ne' => 'Netherlands',
|
195
|
+
'nfc' => 'Newfoundland and Labrador',
|
196
|
+
'ng' => 'Niger',
|
197
|
+
'nhu' => 'New Hampshire',
|
198
|
+
'nik' => 'Northern Ireland',
|
199
|
+
'nju' => 'New Jersey',
|
200
|
+
'nkc' => 'New Brunswick',
|
201
|
+
'nl' => 'New Caledonia',
|
202
|
+
'nmu' => 'New Mexico',
|
203
|
+
'nn' => 'Vanuatu',
|
204
|
+
'no' => 'Norway',
|
205
|
+
'np' => 'Nepal',
|
206
|
+
'nq' => 'Nicaragua',
|
207
|
+
'nr' => 'Nigeria',
|
208
|
+
'nsc' => 'Nova Scotia',
|
209
|
+
'ntc' => 'Northwest Territories',
|
210
|
+
'nu' => 'Nauru',
|
211
|
+
'nuc' => 'Nunavut',
|
212
|
+
'nvu' => 'Nevada',
|
213
|
+
'nw' => 'Northern Mariana Islands',
|
214
|
+
'nx' => 'Norfolk Island',
|
215
|
+
'nyu' => 'New York (State)',
|
216
|
+
'nz' => 'New Zealand',
|
217
|
+
'ohu' => 'Ohio',
|
218
|
+
'oku' => 'Oklahoma',
|
219
|
+
'onc' => 'Ontario',
|
220
|
+
'oru' => 'Oregon',
|
221
|
+
'ot' => 'Mayotte',
|
222
|
+
'pau' => 'Pennsylvania',
|
223
|
+
'pc' => 'Pitcairn Island',
|
224
|
+
'pe' => 'Peru',
|
225
|
+
'pf' => 'Paracel Islands]',
|
226
|
+
'pg' => 'Guinea-Bissau',
|
227
|
+
'ph' => 'Philippines',
|
228
|
+
'pic' => 'Prince Edward Island',
|
229
|
+
'pk' => 'Pakistan',
|
230
|
+
'pl' => 'Poland',
|
231
|
+
'pn' => 'Panama',
|
232
|
+
'po' => 'Portugal',
|
233
|
+
'pp' => 'Papua New Guinea',
|
234
|
+
'pr' => 'Puerto Rico',
|
235
|
+
'pw' => 'Palau',
|
236
|
+
'py' => 'Paraguay',
|
237
|
+
'qa' => 'Qatar',
|
238
|
+
'qea' => 'Queensland',
|
239
|
+
'quc' => 'Québec (Province)',
|
240
|
+
'rb' => 'Serbia',
|
241
|
+
're' => 'Réunion',
|
242
|
+
'rh' => 'Zimbabwe',
|
243
|
+
'riu' => 'Rhode Island',
|
244
|
+
'rm' => 'Romania',
|
245
|
+
'ru' => 'Russia (Federation)',
|
246
|
+
'rw' => 'Rwanda',
|
247
|
+
'sa' => 'South Africa',
|
248
|
+
'sc' => 'Saint-Barthélemy',
|
249
|
+
'scu' => 'South Carolina',
|
250
|
+
'sd' => 'South Sudan',
|
251
|
+
'sdu' => 'South Dakota',
|
252
|
+
'se' => 'Seychelles',
|
253
|
+
'sf' => 'Sao Tome and Principe',
|
254
|
+
'sg' => 'Senegal',
|
255
|
+
'sh' => 'Spanish North Africa',
|
256
|
+
'si' => 'Singapore',
|
257
|
+
'sj' => 'Sudan',
|
258
|
+
'sl' => 'Sierra Leone',
|
259
|
+
'sm' => 'San Marino',
|
260
|
+
'sn' => 'Sint Maarten',
|
261
|
+
'snc' => 'Saskatchewan',
|
262
|
+
'so' => 'Somalia',
|
263
|
+
'sp' => 'Spain',
|
264
|
+
'sq' => 'Eswatini',
|
265
|
+
'sr' => 'Surinam',
|
266
|
+
'ss' => 'Western Sahara',
|
267
|
+
'st' => 'Saint-Martin',
|
268
|
+
'stk' => 'Scotland',
|
269
|
+
'su' => 'Saudi Arabia',
|
270
|
+
'sw' => 'Sweden',
|
271
|
+
'sx' => 'Namibia',
|
272
|
+
'sy' => 'Syria',
|
273
|
+
'sz' => 'Switzerland',
|
274
|
+
'ta' => 'Tajikistan',
|
275
|
+
'tc' => 'Turks and Caicos Islands',
|
276
|
+
'tg' => 'Togo',
|
277
|
+
'th' => 'Thailand',
|
278
|
+
'ti' => 'Tunisia',
|
279
|
+
'tk' => 'Turkmenistan',
|
280
|
+
'tl' => 'Tokelau',
|
281
|
+
'tma' => 'Tasmania',
|
282
|
+
'tnu' => 'Tennessee',
|
283
|
+
'to' => 'Tonga',
|
284
|
+
'tr' => 'Trinidad and Tobago',
|
285
|
+
'ts' => 'United Arab Emirates',
|
286
|
+
'tu' => 'Turkey',
|
287
|
+
'tv' => 'Tuvalu',
|
288
|
+
'txu' => 'Texas',
|
289
|
+
'tz' => 'Tanzania',
|
290
|
+
'ua' => 'Egypt',
|
291
|
+
'uc' => 'United States Misc. Caribbean Islands',
|
292
|
+
'ug' => 'Uganda',
|
293
|
+
'un' => 'Ukraine',
|
294
|
+
'up' => 'United States Misc. Pacific Islands',
|
295
|
+
'utu' => 'Utah',
|
296
|
+
'uv' => 'Burkina Faso',
|
297
|
+
'uy' => 'Uruguay',
|
298
|
+
'uz' => 'Uzbekistan',
|
299
|
+
'vau' => 'Virginia',
|
300
|
+
'vb' => 'British Virgin Islands',
|
301
|
+
'vc' => 'Vatican City',
|
302
|
+
've' => 'Venezuela',
|
303
|
+
'vi' => 'Virgin Islands of the United States',
|
304
|
+
'vm' => 'Vietnam',
|
305
|
+
'vp' => 'Various places',
|
306
|
+
'vra' => 'Victoria',
|
307
|
+
'vtu' => 'Vermont',
|
308
|
+
'wau' => 'Washington (State)',
|
309
|
+
'wea' => 'Western Australia',
|
310
|
+
'wf' => 'Wallis and Futuna',
|
311
|
+
'wiu' => 'Wisconsin',
|
312
|
+
'wj' => 'West Bank of the Jordan River',
|
313
|
+
'wk' => 'Wake Island',
|
314
|
+
'wlk' => 'Wales',
|
315
|
+
'ws' => 'Samoa',
|
316
|
+
'wvu' => 'West Virginia',
|
317
|
+
'wyu' => 'Wyoming',
|
318
|
+
'xa' => 'Christmas Island (Indian Ocean)',
|
319
|
+
'xb' => 'Cocos (Keeling) Islands',
|
320
|
+
'xc' => 'Maldives',
|
321
|
+
'xd' => 'Saint Kitts-Nevis',
|
322
|
+
'xe' => 'Marshall Islands',
|
323
|
+
'xf' => 'Midway Islands',
|
324
|
+
'xga' => 'Coral Sea Islands Territory',
|
325
|
+
'xh' => 'Niue',
|
326
|
+
'xj' => 'Saint Helena',
|
327
|
+
'xk' => 'Saint Lucia',
|
328
|
+
'xl' => 'Saint Pierre and Miquelon',
|
329
|
+
'xm' => 'Saint Vincent and the Grenadines',
|
330
|
+
'xn' => 'North Macedonia',
|
331
|
+
'xna' => 'New South Wales',
|
332
|
+
'xo' => 'Slovakia',
|
333
|
+
'xoa' => 'Northern Territory',
|
334
|
+
'xp' => 'Spratly Island',
|
335
|
+
'xr' => 'Czech Republic',
|
336
|
+
'xra' => 'South Australia',
|
337
|
+
'xs' => 'South Georgia and the South Sandwich Islands',
|
338
|
+
'xv' => 'Slovenia',
|
339
|
+
'xx' => '"No place, unknown, or undetermined"',
|
340
|
+
'xxc' => 'Canada',
|
341
|
+
'xxk' => 'United Kingdom',
|
342
|
+
'xxu' => 'United States',
|
343
|
+
'ye' => 'Yemen',
|
344
|
+
'ykc' => 'Yukon Territory',
|
345
|
+
'za' => 'Zambia'
|
346
|
+
}.freeze
|
347
|
+
|
348
|
+
def self.from_code(code)
|
349
|
+
COUNTRY_CODES[code]
|
350
|
+
end
|
351
|
+
|
352
|
+
def self.from_uri(uri)
|
353
|
+
return unless uri&.start_with?('http://id.loc.gov/vocabulary/countries/')
|
354
|
+
|
355
|
+
COUNTRY_CODES[uri[MARC_COUNTRY_URI.length..]]
|
356
|
+
end
|
357
|
+
end
|
358
|
+
# rubocop:enable Metrics/ClassLength
|
359
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Selectors
|
5
|
+
# Selects the best event to use for indexing
|
6
|
+
class EventSelector
|
7
|
+
# @param [Array<Cocina::Models::Event>] events
|
8
|
+
# @param [String] desired_date_type a string to match the date.type in a Cocina::Models::Event
|
9
|
+
# @return [Cocina::Models::Event, nil] event best matching selected
|
10
|
+
def self.select(events, desired_date_type)
|
11
|
+
date_type_matches_and_primary(events, desired_date_type) ||
|
12
|
+
date_and_event_type_match(events, desired_date_type) ||
|
13
|
+
event_type_matches_but_no_date_type(events, desired_date_type) ||
|
14
|
+
event_has_date_type(events, desired_date_type)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
18
|
+
# @return [Boolean] true if date.status == primary
|
19
|
+
def self.date_status_primary(date)
|
20
|
+
structured_primary = Array(date.structuredValue).find do |structured_date|
|
21
|
+
structured_date.status == 'primary'
|
22
|
+
end
|
23
|
+
|
24
|
+
parallel_value_primary = Array(date.parallelValue).find do |parallel_value|
|
25
|
+
parallel_value.status == 'primary'
|
26
|
+
end
|
27
|
+
|
28
|
+
date.status == 'primary' || structured_primary || parallel_value_primary
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type and of status primary
|
32
|
+
def self.date_type_matches_and_primary(events, desired_date_type)
|
33
|
+
events.find do |event|
|
34
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
35
|
+
event_dates.flatten.compact.find do |date|
|
36
|
+
next if desired_date_type != date_type(date)
|
37
|
+
|
38
|
+
date_status_primary(date)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
private_class_method :date_type_matches_and_primary
|
43
|
+
|
44
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type and the event has matching type
|
45
|
+
def self.date_and_event_type_match(events, desired_date_type)
|
46
|
+
events.find do |event|
|
47
|
+
next unless event_type_matches(event, desired_date_type)
|
48
|
+
|
49
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
50
|
+
event_dates.flatten.compact.find do |date|
|
51
|
+
desired_date_type == date_type(date)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
private_class_method :date_and_event_type_match
|
56
|
+
|
57
|
+
# @return [Cocina::Models::Event, nil] event with type of desired_date_type and a date field without a type
|
58
|
+
def self.event_type_matches_but_no_date_type(events, desired_date_type)
|
59
|
+
events.find do |event|
|
60
|
+
next unless event_type_matches(event, desired_date_type)
|
61
|
+
|
62
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
63
|
+
event_dates.flatten.compact.find do |date|
|
64
|
+
date_type(date).nil?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
private_class_method :event_type_matches_but_no_date_type
|
69
|
+
|
70
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type
|
71
|
+
def self.event_has_date_type(events, desired_date_type)
|
72
|
+
events.find do |event|
|
73
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
74
|
+
event_dates.flatten.compact.find do |date|
|
75
|
+
desired_date_type == date_type(date)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
private_class_method :event_has_date_type
|
80
|
+
|
81
|
+
# @return [Boolean] true if event type matches or parallelEvent type matches the param
|
82
|
+
def self.event_type_matches(event, desired_type)
|
83
|
+
return true if event.type == desired_type
|
84
|
+
|
85
|
+
matching_event = event.parallelEvent&.find { |parallel_event| parallel_event.type == desired_type }
|
86
|
+
matching_event.present?
|
87
|
+
end
|
88
|
+
private_class_method :event_type_matches
|
89
|
+
|
90
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
91
|
+
# @return [String, nil] type from date object
|
92
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
93
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
94
|
+
# rubocop:disable Metrics/AbcSize
|
95
|
+
def self.date_type(date)
|
96
|
+
return date.type if date&.type.present?
|
97
|
+
|
98
|
+
Array(date.structuredValue).find do |structured_value|
|
99
|
+
return structured_value.type if structured_value&.type.present?
|
100
|
+
end
|
101
|
+
|
102
|
+
Array(date.parallelValue).find do |parallel_value|
|
103
|
+
return parallel_value.type if parallel_value&.type.present?
|
104
|
+
end
|
105
|
+
end
|
106
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
107
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
108
|
+
# rubocop:enable Metrics/AbcSize
|
109
|
+
private_class_method :date_type
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Selectors
|
5
|
+
# Finds the pub date to index from events
|
6
|
+
class PubYearSelector
|
7
|
+
# @param [Array<Cocina::Models::Events>] events
|
8
|
+
# @return [String] the year value for Solr
|
9
|
+
def self.build(events)
|
10
|
+
new(events).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(events)
|
14
|
+
@events = events
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
date = find_date
|
19
|
+
ParseDate.earliest_year(date).to_s if date.present?
|
20
|
+
end
|
21
|
+
|
22
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
23
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
24
|
+
def find_date
|
25
|
+
primary_date(events) ||
|
26
|
+
EventDateBuilder.build(production_event, 'production') ||
|
27
|
+
EventDateBuilder.build(publication_event, 'publication') ||
|
28
|
+
EventDateBuilder.build(capture_event, 'capture') ||
|
29
|
+
EventDateBuilder.build(copyright_event, 'copyright') ||
|
30
|
+
creation_date ||
|
31
|
+
first_date ||
|
32
|
+
structured_dates(events) ||
|
33
|
+
find_date_in_parallel_events
|
34
|
+
end
|
35
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
36
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
attr_reader :events
|
41
|
+
|
42
|
+
def find_date_in_parallel_events
|
43
|
+
parallel_events = events.flat_map(&:parallelEvent).compact
|
44
|
+
primary_date(parallel_events) ||
|
45
|
+
structured_dates(parallel_events)
|
46
|
+
end
|
47
|
+
|
48
|
+
def primary_date(eligible_events)
|
49
|
+
dates = eligible_events.flat_map(&:date).compact
|
50
|
+
return if dates.blank?
|
51
|
+
|
52
|
+
dates.find { |date| date.status == 'primary' }&.value
|
53
|
+
end
|
54
|
+
|
55
|
+
def first_date
|
56
|
+
dates = events.flat_map(&:date).compact
|
57
|
+
return if dates.blank?
|
58
|
+
|
59
|
+
date_value(dates.first)
|
60
|
+
end
|
61
|
+
|
62
|
+
# rubocop:disable Metrics/AbcSize
|
63
|
+
def date_value(date)
|
64
|
+
return date.value if date.value
|
65
|
+
return if date.parallelValue.blank?
|
66
|
+
|
67
|
+
primary = date.parallelValue.find { |val| val.status == 'primary' }
|
68
|
+
return primary.value if primary
|
69
|
+
|
70
|
+
structured_values = date.parallelValue.first.structuredValue
|
71
|
+
return find_within_structured_values(structured_values) if structured_values.present?
|
72
|
+
|
73
|
+
date.parallelValue.first.value
|
74
|
+
end
|
75
|
+
# rubocop:enable Metrics/AbcSize
|
76
|
+
|
77
|
+
def structured_dates(eligible_events)
|
78
|
+
dates = eligible_events.flat_map(&:date).compact
|
79
|
+
return if dates.blank?
|
80
|
+
|
81
|
+
structured_values = dates.first.structuredValue
|
82
|
+
return if structured_values.blank?
|
83
|
+
|
84
|
+
find_within_structured_values(structured_values)
|
85
|
+
end
|
86
|
+
|
87
|
+
def find_within_structured_values(structured_values)
|
88
|
+
primary = structured_values.find { |date| date.status == 'primary' }
|
89
|
+
return primary.value if primary
|
90
|
+
|
91
|
+
structured_values.first.value
|
92
|
+
end
|
93
|
+
|
94
|
+
def creation_date
|
95
|
+
@creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
|
96
|
+
end
|
97
|
+
|
98
|
+
def publication_event
|
99
|
+
@publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
|
100
|
+
end
|
101
|
+
|
102
|
+
def creation_event
|
103
|
+
@creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
|
104
|
+
end
|
105
|
+
|
106
|
+
def capture_event
|
107
|
+
@capture_event ||= DorIndexing::Selectors::EventSelector.select(events, 'capture')
|
108
|
+
end
|
109
|
+
|
110
|
+
def copyright_event
|
111
|
+
@copyright_event ||= DorIndexing::Selectors::EventSelector.select(events, 'copyright')
|
112
|
+
end
|
113
|
+
|
114
|
+
def production_event
|
115
|
+
@production_event ||= DorIndexing::Selectors::EventSelector.select(events, 'production')
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Model for workflow fields
|
5
|
+
class WorkflowFields
|
6
|
+
def self.for(druid:, version:, workflow_client:)
|
7
|
+
new(druid:, version:, workflow_client:).result
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :druid, :version, :workflow_client
|
11
|
+
|
12
|
+
def initialize(druid:, version:, workflow_client:)
|
13
|
+
@druid = druid
|
14
|
+
@version = version
|
15
|
+
@workflow_client = workflow_client
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Hash] the partial solr document for processable concerns
|
19
|
+
def result
|
20
|
+
{}.tap do |solr_doc|
|
21
|
+
add_sortable_milestones(solr_doc)
|
22
|
+
add_status(solr_doc)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def status_service
|
29
|
+
@status_service ||= workflow_client.status(druid:, version:)
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_status(solr_doc)
|
33
|
+
# This is the status on the Argo show page (e.g. "v4 In accessioning (described, published, deposited)")
|
34
|
+
solr_doc['status_ssi'] = status_service.display
|
35
|
+
return unless status_service.info[:status_code]
|
36
|
+
|
37
|
+
# This is used for Argo's "Processing Status" facet
|
38
|
+
solr_doc['processing_status_text_ssi'] = status_service.display_simplified
|
39
|
+
end
|
40
|
+
|
41
|
+
def sortable_milestones
|
42
|
+
status_service.milestones.each_with_object({}) do |milestone, sortable|
|
43
|
+
sortable[milestone[:milestone]] ||= []
|
44
|
+
sortable[milestone[:milestone]] << milestone[:at].utc.xmlschema
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_sortable_milestones(solr_doc)
|
49
|
+
sortable_milestones.each do |milestone, unordered_dates|
|
50
|
+
dates = unordered_dates.sort
|
51
|
+
# create the published_dttsi and published_day fields and the like
|
52
|
+
dates.each do |date|
|
53
|
+
solr_doc["#{milestone}_dttsim"] ||= []
|
54
|
+
solr_doc["#{milestone}_dttsim"] << date unless solr_doc["#{milestone}_dttsim"].include?(date)
|
55
|
+
end
|
56
|
+
# fields for OAI havester to sort on: _dttsi is trie date +stored +indexed (single valued, i.e. sortable)
|
57
|
+
# TODO: we really only need accessioned_earliest and registered_earliest
|
58
|
+
solr_doc["#{milestone}_earliest_dttsi"] = dates.first
|
59
|
+
solr_doc["#{milestone}_latest_dttsi"] = dates.last
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|