dor_indexing 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +355 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +218 -0
- data/README.md +33 -0
- data/Rakefile +11 -0
- data/dor_indexing.gemspec +40 -0
- data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
- data/lib/dor_indexing/builders/author_builder.rb +31 -0
- data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
- data/lib/dor_indexing/builders/document_builder.rb +106 -0
- data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
- data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
- data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
- data/lib/dor_indexing/builders/name_builder.rb +70 -0
- data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
- data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
- data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
- data/lib/dor_indexing/builders/topic_builder.rb +96 -0
- data/lib/dor_indexing/cocina_repository.rb +24 -0
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
- data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
- data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
- data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
- data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
- data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
- data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
- data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
- data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
- data/lib/dor_indexing/marc_country.rb +359 -0
- data/lib/dor_indexing/selectors/event_selector.rb +112 -0
- data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
- data/lib/dor_indexing/version.rb +5 -0
- data/lib/dor_indexing/workflow_fields.rb +63 -0
- data/lib/dor_indexing/workflow_solr_document.rb +93 -0
- data/lib/dor_indexing.rb +19 -0
- metadata +173 -0
@@ -0,0 +1,359 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Helper for MARC country codes
|
5
|
+
# rubocop:disable Metrics/ClassLength
|
6
|
+
class MarcCountry
|
7
|
+
MARC_COUNTRY_URI = 'http://id.loc.gov/vocabulary/countries/'
|
8
|
+
|
9
|
+
MARC_COUNTRY_CODE = 'marccountry'
|
10
|
+
|
11
|
+
COUNTRY_CODES = { # rubocop:disable Metrics/CollectionLiteralLength
|
12
|
+
'aa' => 'Albania',
|
13
|
+
'abc' => 'Alberta',
|
14
|
+
'aca' => 'Australian Capital Territory',
|
15
|
+
'ae' => 'Algeria',
|
16
|
+
'af' => 'Afghanistan',
|
17
|
+
'ag' => 'Argentina',
|
18
|
+
'ai' => 'Armenia (Republic)',
|
19
|
+
'aj' => 'Azerbaijan',
|
20
|
+
'aku' => 'Alaska',
|
21
|
+
'alu' => 'Alabama',
|
22
|
+
'am' => 'Anguilla',
|
23
|
+
'an' => 'Andorra',
|
24
|
+
'ao' => 'Angola',
|
25
|
+
'aq' => 'Antigua and Barbuda',
|
26
|
+
'aru' => 'Arkansas',
|
27
|
+
'as' => 'American Samoa',
|
28
|
+
'at' => 'Australia',
|
29
|
+
'au' => 'Austria',
|
30
|
+
'aw' => 'Aruba',
|
31
|
+
'ay' => 'Antarctica',
|
32
|
+
'azu' => 'Arizona',
|
33
|
+
'ba' => 'Bahrain',
|
34
|
+
'bb' => 'Barbados',
|
35
|
+
'bcc' => 'British Columbia',
|
36
|
+
'bd' => 'Burundi',
|
37
|
+
'be' => 'Belgium',
|
38
|
+
'bf' => 'Bahamas',
|
39
|
+
'bg' => 'Bangladesh',
|
40
|
+
'bh' => 'Belize',
|
41
|
+
'bi' => 'British Indian Ocean Territory',
|
42
|
+
'bl' => 'Brazil',
|
43
|
+
'bm' => 'Bermuda Islands',
|
44
|
+
'bn' => 'Bosnia and Herzegovina',
|
45
|
+
'bo' => 'Bolivia',
|
46
|
+
'bp' => 'Solomon Islands',
|
47
|
+
'br' => 'Burma',
|
48
|
+
'bs' => 'Botswana',
|
49
|
+
'bt' => 'Bhutan',
|
50
|
+
'bu' => 'Bulgaria',
|
51
|
+
'bv' => 'Bouvet Island',
|
52
|
+
'bw' => 'Belarus',
|
53
|
+
'bx' => 'Brunei',
|
54
|
+
'ca' => 'Caribbean Netherlands',
|
55
|
+
'cau' => 'California',
|
56
|
+
'cb' => 'Cambodia',
|
57
|
+
'cc' => 'China',
|
58
|
+
'cd' => 'Chad',
|
59
|
+
'ce' => 'Sri Lanka',
|
60
|
+
'cf' => 'Congo (Brazzaville)',
|
61
|
+
'cg' => 'Congo (Democratic Republic)',
|
62
|
+
'ch' => 'China (Republic : 1949- )',
|
63
|
+
'ci' => 'Croatia',
|
64
|
+
'cj' => 'Cayman Islands',
|
65
|
+
'ck' => 'Colombia',
|
66
|
+
'cl' => 'Chile',
|
67
|
+
'cm' => 'Cameroon',
|
68
|
+
'co' => 'Curaçao',
|
69
|
+
'cou' => 'Colorado',
|
70
|
+
'cq' => 'Comoros',
|
71
|
+
'cr' => 'Costa Rica',
|
72
|
+
'ctu' => 'Connecticut',
|
73
|
+
'cu' => 'Cuba',
|
74
|
+
'cv' => 'Cabo Verde',
|
75
|
+
'cw' => 'Cook Islands',
|
76
|
+
'cx' => 'Central African Republic',
|
77
|
+
'cy' => 'Cyprus',
|
78
|
+
'dcu' => 'District of Columbia',
|
79
|
+
'deu' => 'Delaware',
|
80
|
+
'dk' => 'Denmark',
|
81
|
+
'dm' => 'Benin',
|
82
|
+
'dq' => 'Dominica',
|
83
|
+
'dr' => 'Dominican Republic',
|
84
|
+
'ea' => 'Eritrea',
|
85
|
+
'ec' => 'Ecuador',
|
86
|
+
'eg' => 'Equatorial Guinea',
|
87
|
+
'em' => 'Timor-Leste',
|
88
|
+
'enk' => 'England',
|
89
|
+
'er' => 'Estonia',
|
90
|
+
'es' => 'El Salvador',
|
91
|
+
'et' => 'Ethiopia',
|
92
|
+
'fa' => 'Faroe Islands',
|
93
|
+
'fg' => 'French Guiana',
|
94
|
+
'fi' => 'Finland',
|
95
|
+
'fj' => 'Fiji',
|
96
|
+
'fk' => 'Falkland Islands',
|
97
|
+
'flu' => 'Florida',
|
98
|
+
'fm' => 'Micronesia (Federated States)',
|
99
|
+
'fp' => 'French Polynesia',
|
100
|
+
'fr' => 'France',
|
101
|
+
'fs' => 'Terres australes et antarctiques françaises',
|
102
|
+
'ft' => 'Djibouti',
|
103
|
+
'gau' => 'Georgia',
|
104
|
+
'gb' => 'Kiribati',
|
105
|
+
'gd' => 'Grenada',
|
106
|
+
'gg' => 'Guernsey',
|
107
|
+
'gh' => 'Ghana',
|
108
|
+
'gi' => 'Gibraltar',
|
109
|
+
'gl' => 'Greenland',
|
110
|
+
'gm' => 'Gambia',
|
111
|
+
'go' => 'Gabon',
|
112
|
+
'gp' => 'Guadeloupe',
|
113
|
+
'gr' => 'Greece',
|
114
|
+
'gs' => 'Georgia (Republic)',
|
115
|
+
'gt' => 'Guatemala',
|
116
|
+
'gu' => 'Guam',
|
117
|
+
'gv' => 'Guinea',
|
118
|
+
'gw' => 'Germany',
|
119
|
+
'gy' => 'Guyana',
|
120
|
+
'gz' => 'Gaza Strip',
|
121
|
+
'hiu' => 'Hawaii',
|
122
|
+
'hm' => 'Heard and McDonald Islands',
|
123
|
+
'ho' => 'Honduras',
|
124
|
+
'ht' => 'Haiti',
|
125
|
+
'hu' => 'Hungary',
|
126
|
+
'iau' => 'Iowa',
|
127
|
+
'ic' => 'Iceland',
|
128
|
+
'idu' => 'Idaho',
|
129
|
+
'ie' => 'Ireland',
|
130
|
+
'ii' => 'India',
|
131
|
+
'ilu' => 'Illinois',
|
132
|
+
'im' => 'Isle of Man',
|
133
|
+
'inu' => 'Indiana',
|
134
|
+
'io' => 'Indonesia',
|
135
|
+
'iq' => 'Iraq',
|
136
|
+
'ir' => 'Iran',
|
137
|
+
'is' => 'Israel',
|
138
|
+
'it' => 'Italy',
|
139
|
+
'iv' => "Côte d'Ivoire",
|
140
|
+
'iy' => 'Iraq-Saudi Arabia Neutral Zone',
|
141
|
+
'ja' => 'Japan',
|
142
|
+
'je' => 'Jersey',
|
143
|
+
'ji' => 'Johnston Atoll',
|
144
|
+
'jm' => 'Jamaica',
|
145
|
+
'jo' => 'Jordan',
|
146
|
+
'ke' => 'Kenya',
|
147
|
+
'kg' => 'Kyrgyzstan',
|
148
|
+
'kn' => 'Korea (North)',
|
149
|
+
'ko' => 'Korea (South)',
|
150
|
+
'ksu' => 'Kansas',
|
151
|
+
'ku' => 'Kuwait',
|
152
|
+
'kv' => 'Kosovo',
|
153
|
+
'kyu' => 'Kentucky',
|
154
|
+
'kz' => 'Kazakhstan',
|
155
|
+
'lau' => 'Louisiana',
|
156
|
+
'lb' => 'Liberia',
|
157
|
+
'le' => 'Lebanon',
|
158
|
+
'lh' => 'Liechtenstein',
|
159
|
+
'li' => 'Lithuania',
|
160
|
+
'lo' => 'Lesotho',
|
161
|
+
'ls' => 'Laos',
|
162
|
+
'lu' => 'Luxembourg',
|
163
|
+
'lv' => 'Latvia',
|
164
|
+
'ly' => 'Libya',
|
165
|
+
'mau' => 'Massachusetts',
|
166
|
+
'mbc' => 'Manitoba',
|
167
|
+
'mc' => 'Monaco',
|
168
|
+
'mdu' => 'Maryland',
|
169
|
+
'meu' => 'Maine',
|
170
|
+
'mf' => 'Mauritius',
|
171
|
+
'mg' => 'Madagascar',
|
172
|
+
'miu' => 'Michigan',
|
173
|
+
'mj' => 'Montserrat',
|
174
|
+
'mk' => 'Oman',
|
175
|
+
'ml' => 'Mali',
|
176
|
+
'mm' => 'Malta',
|
177
|
+
'mnu' => 'Minnesota',
|
178
|
+
'mo' => 'Montenegro',
|
179
|
+
'mou' => 'Missouri',
|
180
|
+
'mp' => 'Mongolia',
|
181
|
+
'mq' => 'Martinique',
|
182
|
+
'mr' => 'Morocco',
|
183
|
+
'msu' => 'Mississippi',
|
184
|
+
'mtu' => 'Montana',
|
185
|
+
'mu' => 'Mauritania',
|
186
|
+
'mv' => 'Moldova',
|
187
|
+
'mw' => 'Malawi',
|
188
|
+
'mx' => 'Mexico',
|
189
|
+
'my' => 'Malaysia',
|
190
|
+
'mz' => 'Mozambique',
|
191
|
+
'nbu' => 'Nebraska',
|
192
|
+
'ncu' => 'North Carolina',
|
193
|
+
'ndu' => 'North Dakota',
|
194
|
+
'ne' => 'Netherlands',
|
195
|
+
'nfc' => 'Newfoundland and Labrador',
|
196
|
+
'ng' => 'Niger',
|
197
|
+
'nhu' => 'New Hampshire',
|
198
|
+
'nik' => 'Northern Ireland',
|
199
|
+
'nju' => 'New Jersey',
|
200
|
+
'nkc' => 'New Brunswick',
|
201
|
+
'nl' => 'New Caledonia',
|
202
|
+
'nmu' => 'New Mexico',
|
203
|
+
'nn' => 'Vanuatu',
|
204
|
+
'no' => 'Norway',
|
205
|
+
'np' => 'Nepal',
|
206
|
+
'nq' => 'Nicaragua',
|
207
|
+
'nr' => 'Nigeria',
|
208
|
+
'nsc' => 'Nova Scotia',
|
209
|
+
'ntc' => 'Northwest Territories',
|
210
|
+
'nu' => 'Nauru',
|
211
|
+
'nuc' => 'Nunavut',
|
212
|
+
'nvu' => 'Nevada',
|
213
|
+
'nw' => 'Northern Mariana Islands',
|
214
|
+
'nx' => 'Norfolk Island',
|
215
|
+
'nyu' => 'New York (State)',
|
216
|
+
'nz' => 'New Zealand',
|
217
|
+
'ohu' => 'Ohio',
|
218
|
+
'oku' => 'Oklahoma',
|
219
|
+
'onc' => 'Ontario',
|
220
|
+
'oru' => 'Oregon',
|
221
|
+
'ot' => 'Mayotte',
|
222
|
+
'pau' => 'Pennsylvania',
|
223
|
+
'pc' => 'Pitcairn Island',
|
224
|
+
'pe' => 'Peru',
|
225
|
+
'pf' => 'Paracel Islands]',
|
226
|
+
'pg' => 'Guinea-Bissau',
|
227
|
+
'ph' => 'Philippines',
|
228
|
+
'pic' => 'Prince Edward Island',
|
229
|
+
'pk' => 'Pakistan',
|
230
|
+
'pl' => 'Poland',
|
231
|
+
'pn' => 'Panama',
|
232
|
+
'po' => 'Portugal',
|
233
|
+
'pp' => 'Papua New Guinea',
|
234
|
+
'pr' => 'Puerto Rico',
|
235
|
+
'pw' => 'Palau',
|
236
|
+
'py' => 'Paraguay',
|
237
|
+
'qa' => 'Qatar',
|
238
|
+
'qea' => 'Queensland',
|
239
|
+
'quc' => 'Québec (Province)',
|
240
|
+
'rb' => 'Serbia',
|
241
|
+
're' => 'Réunion',
|
242
|
+
'rh' => 'Zimbabwe',
|
243
|
+
'riu' => 'Rhode Island',
|
244
|
+
'rm' => 'Romania',
|
245
|
+
'ru' => 'Russia (Federation)',
|
246
|
+
'rw' => 'Rwanda',
|
247
|
+
'sa' => 'South Africa',
|
248
|
+
'sc' => 'Saint-Barthélemy',
|
249
|
+
'scu' => 'South Carolina',
|
250
|
+
'sd' => 'South Sudan',
|
251
|
+
'sdu' => 'South Dakota',
|
252
|
+
'se' => 'Seychelles',
|
253
|
+
'sf' => 'Sao Tome and Principe',
|
254
|
+
'sg' => 'Senegal',
|
255
|
+
'sh' => 'Spanish North Africa',
|
256
|
+
'si' => 'Singapore',
|
257
|
+
'sj' => 'Sudan',
|
258
|
+
'sl' => 'Sierra Leone',
|
259
|
+
'sm' => 'San Marino',
|
260
|
+
'sn' => 'Sint Maarten',
|
261
|
+
'snc' => 'Saskatchewan',
|
262
|
+
'so' => 'Somalia',
|
263
|
+
'sp' => 'Spain',
|
264
|
+
'sq' => 'Eswatini',
|
265
|
+
'sr' => 'Surinam',
|
266
|
+
'ss' => 'Western Sahara',
|
267
|
+
'st' => 'Saint-Martin',
|
268
|
+
'stk' => 'Scotland',
|
269
|
+
'su' => 'Saudi Arabia',
|
270
|
+
'sw' => 'Sweden',
|
271
|
+
'sx' => 'Namibia',
|
272
|
+
'sy' => 'Syria',
|
273
|
+
'sz' => 'Switzerland',
|
274
|
+
'ta' => 'Tajikistan',
|
275
|
+
'tc' => 'Turks and Caicos Islands',
|
276
|
+
'tg' => 'Togo',
|
277
|
+
'th' => 'Thailand',
|
278
|
+
'ti' => 'Tunisia',
|
279
|
+
'tk' => 'Turkmenistan',
|
280
|
+
'tl' => 'Tokelau',
|
281
|
+
'tma' => 'Tasmania',
|
282
|
+
'tnu' => 'Tennessee',
|
283
|
+
'to' => 'Tonga',
|
284
|
+
'tr' => 'Trinidad and Tobago',
|
285
|
+
'ts' => 'United Arab Emirates',
|
286
|
+
'tu' => 'Turkey',
|
287
|
+
'tv' => 'Tuvalu',
|
288
|
+
'txu' => 'Texas',
|
289
|
+
'tz' => 'Tanzania',
|
290
|
+
'ua' => 'Egypt',
|
291
|
+
'uc' => 'United States Misc. Caribbean Islands',
|
292
|
+
'ug' => 'Uganda',
|
293
|
+
'un' => 'Ukraine',
|
294
|
+
'up' => 'United States Misc. Pacific Islands',
|
295
|
+
'utu' => 'Utah',
|
296
|
+
'uv' => 'Burkina Faso',
|
297
|
+
'uy' => 'Uruguay',
|
298
|
+
'uz' => 'Uzbekistan',
|
299
|
+
'vau' => 'Virginia',
|
300
|
+
'vb' => 'British Virgin Islands',
|
301
|
+
'vc' => 'Vatican City',
|
302
|
+
've' => 'Venezuela',
|
303
|
+
'vi' => 'Virgin Islands of the United States',
|
304
|
+
'vm' => 'Vietnam',
|
305
|
+
'vp' => 'Various places',
|
306
|
+
'vra' => 'Victoria',
|
307
|
+
'vtu' => 'Vermont',
|
308
|
+
'wau' => 'Washington (State)',
|
309
|
+
'wea' => 'Western Australia',
|
310
|
+
'wf' => 'Wallis and Futuna',
|
311
|
+
'wiu' => 'Wisconsin',
|
312
|
+
'wj' => 'West Bank of the Jordan River',
|
313
|
+
'wk' => 'Wake Island',
|
314
|
+
'wlk' => 'Wales',
|
315
|
+
'ws' => 'Samoa',
|
316
|
+
'wvu' => 'West Virginia',
|
317
|
+
'wyu' => 'Wyoming',
|
318
|
+
'xa' => 'Christmas Island (Indian Ocean)',
|
319
|
+
'xb' => 'Cocos (Keeling) Islands',
|
320
|
+
'xc' => 'Maldives',
|
321
|
+
'xd' => 'Saint Kitts-Nevis',
|
322
|
+
'xe' => 'Marshall Islands',
|
323
|
+
'xf' => 'Midway Islands',
|
324
|
+
'xga' => 'Coral Sea Islands Territory',
|
325
|
+
'xh' => 'Niue',
|
326
|
+
'xj' => 'Saint Helena',
|
327
|
+
'xk' => 'Saint Lucia',
|
328
|
+
'xl' => 'Saint Pierre and Miquelon',
|
329
|
+
'xm' => 'Saint Vincent and the Grenadines',
|
330
|
+
'xn' => 'North Macedonia',
|
331
|
+
'xna' => 'New South Wales',
|
332
|
+
'xo' => 'Slovakia',
|
333
|
+
'xoa' => 'Northern Territory',
|
334
|
+
'xp' => 'Spratly Island',
|
335
|
+
'xr' => 'Czech Republic',
|
336
|
+
'xra' => 'South Australia',
|
337
|
+
'xs' => 'South Georgia and the South Sandwich Islands',
|
338
|
+
'xv' => 'Slovenia',
|
339
|
+
'xx' => '"No place, unknown, or undetermined"',
|
340
|
+
'xxc' => 'Canada',
|
341
|
+
'xxk' => 'United Kingdom',
|
342
|
+
'xxu' => 'United States',
|
343
|
+
'ye' => 'Yemen',
|
344
|
+
'ykc' => 'Yukon Territory',
|
345
|
+
'za' => 'Zambia'
|
346
|
+
}.freeze
|
347
|
+
|
348
|
+
def self.from_code(code)
|
349
|
+
COUNTRY_CODES[code]
|
350
|
+
end
|
351
|
+
|
352
|
+
def self.from_uri(uri)
|
353
|
+
return unless uri&.start_with?('http://id.loc.gov/vocabulary/countries/')
|
354
|
+
|
355
|
+
COUNTRY_CODES[uri[MARC_COUNTRY_URI.length..]]
|
356
|
+
end
|
357
|
+
end
|
358
|
+
# rubocop:enable Metrics/ClassLength
|
359
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Selectors
|
5
|
+
# Selects the best event to use for indexing
|
6
|
+
class EventSelector
|
7
|
+
# @param [Array<Cocina::Models::Event>] events
|
8
|
+
# @param [String] desired_date_type a string to match the date.type in a Cocina::Models::Event
|
9
|
+
# @return [Cocina::Models::Event, nil] event best matching selected
|
10
|
+
def self.select(events, desired_date_type)
|
11
|
+
date_type_matches_and_primary(events, desired_date_type) ||
|
12
|
+
date_and_event_type_match(events, desired_date_type) ||
|
13
|
+
event_type_matches_but_no_date_type(events, desired_date_type) ||
|
14
|
+
event_has_date_type(events, desired_date_type)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
18
|
+
# @return [Boolean] true if date.status == primary
|
19
|
+
def self.date_status_primary(date)
|
20
|
+
structured_primary = Array(date.structuredValue).find do |structured_date|
|
21
|
+
structured_date.status == 'primary'
|
22
|
+
end
|
23
|
+
|
24
|
+
parallel_value_primary = Array(date.parallelValue).find do |parallel_value|
|
25
|
+
parallel_value.status == 'primary'
|
26
|
+
end
|
27
|
+
|
28
|
+
date.status == 'primary' || structured_primary || parallel_value_primary
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type and of status primary
|
32
|
+
def self.date_type_matches_and_primary(events, desired_date_type)
|
33
|
+
events.find do |event|
|
34
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
35
|
+
event_dates.flatten.compact.find do |date|
|
36
|
+
next if desired_date_type != date_type(date)
|
37
|
+
|
38
|
+
date_status_primary(date)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
private_class_method :date_type_matches_and_primary
|
43
|
+
|
44
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type and the event has matching type
|
45
|
+
def self.date_and_event_type_match(events, desired_date_type)
|
46
|
+
events.find do |event|
|
47
|
+
next unless event_type_matches(event, desired_date_type)
|
48
|
+
|
49
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
50
|
+
event_dates.flatten.compact.find do |date|
|
51
|
+
desired_date_type == date_type(date)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
private_class_method :date_and_event_type_match
|
56
|
+
|
57
|
+
# @return [Cocina::Models::Event, nil] event with type of desired_date_type and a date field without a type
|
58
|
+
def self.event_type_matches_but_no_date_type(events, desired_date_type)
|
59
|
+
events.find do |event|
|
60
|
+
next unless event_type_matches(event, desired_date_type)
|
61
|
+
|
62
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
63
|
+
event_dates.flatten.compact.find do |date|
|
64
|
+
date_type(date).nil?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
private_class_method :event_type_matches_but_no_date_type
|
69
|
+
|
70
|
+
# @return [Cocina::Models::Event, nil] event with date of type desired_date_type
|
71
|
+
def self.event_has_date_type(events, desired_date_type)
|
72
|
+
events.find do |event|
|
73
|
+
event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
|
74
|
+
event_dates.flatten.compact.find do |date|
|
75
|
+
desired_date_type == date_type(date)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
private_class_method :event_has_date_type
|
80
|
+
|
81
|
+
# @return [Boolean] true if event type matches or parallelEvent type matches the param
|
82
|
+
def self.event_type_matches(event, desired_type)
|
83
|
+
return true if event.type == desired_type
|
84
|
+
|
85
|
+
matching_event = event.parallelEvent&.find { |parallel_event| parallel_event.type == desired_type }
|
86
|
+
matching_event.present?
|
87
|
+
end
|
88
|
+
private_class_method :event_type_matches
|
89
|
+
|
90
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
91
|
+
# @return [String, nil] type from date object
|
92
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
93
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
94
|
+
# rubocop:disable Metrics/AbcSize
|
95
|
+
def self.date_type(date)
|
96
|
+
return date.type if date&.type.present?
|
97
|
+
|
98
|
+
Array(date.structuredValue).find do |structured_value|
|
99
|
+
return structured_value.type if structured_value&.type.present?
|
100
|
+
end
|
101
|
+
|
102
|
+
Array(date.parallelValue).find do |parallel_value|
|
103
|
+
return parallel_value.type if parallel_value&.type.present?
|
104
|
+
end
|
105
|
+
end
|
106
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
107
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
108
|
+
# rubocop:enable Metrics/AbcSize
|
109
|
+
private_class_method :date_type
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Selectors
|
5
|
+
# Finds the pub date to index from events
|
6
|
+
class PubYearSelector
|
7
|
+
# @param [Array<Cocina::Models::Events>] events
|
8
|
+
# @return [String] the year value for Solr
|
9
|
+
def self.build(events)
|
10
|
+
new(events).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(events)
|
14
|
+
@events = events
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
date = find_date
|
19
|
+
ParseDate.earliest_year(date).to_s if date.present?
|
20
|
+
end
|
21
|
+
|
22
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
23
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
24
|
+
def find_date
|
25
|
+
primary_date(events) ||
|
26
|
+
EventDateBuilder.build(production_event, 'production') ||
|
27
|
+
EventDateBuilder.build(publication_event, 'publication') ||
|
28
|
+
EventDateBuilder.build(capture_event, 'capture') ||
|
29
|
+
EventDateBuilder.build(copyright_event, 'copyright') ||
|
30
|
+
creation_date ||
|
31
|
+
first_date ||
|
32
|
+
structured_dates(events) ||
|
33
|
+
find_date_in_parallel_events
|
34
|
+
end
|
35
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
36
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
attr_reader :events
|
41
|
+
|
42
|
+
def find_date_in_parallel_events
|
43
|
+
parallel_events = events.flat_map(&:parallelEvent).compact
|
44
|
+
primary_date(parallel_events) ||
|
45
|
+
structured_dates(parallel_events)
|
46
|
+
end
|
47
|
+
|
48
|
+
def primary_date(eligible_events)
|
49
|
+
dates = eligible_events.flat_map(&:date).compact
|
50
|
+
return if dates.blank?
|
51
|
+
|
52
|
+
dates.find { |date| date.status == 'primary' }&.value
|
53
|
+
end
|
54
|
+
|
55
|
+
def first_date
|
56
|
+
dates = events.flat_map(&:date).compact
|
57
|
+
return if dates.blank?
|
58
|
+
|
59
|
+
date_value(dates.first)
|
60
|
+
end
|
61
|
+
|
62
|
+
# rubocop:disable Metrics/AbcSize
|
63
|
+
def date_value(date)
|
64
|
+
return date.value if date.value
|
65
|
+
return if date.parallelValue.blank?
|
66
|
+
|
67
|
+
primary = date.parallelValue.find { |val| val.status == 'primary' }
|
68
|
+
return primary.value if primary
|
69
|
+
|
70
|
+
structured_values = date.parallelValue.first.structuredValue
|
71
|
+
return find_within_structured_values(structured_values) if structured_values.present?
|
72
|
+
|
73
|
+
date.parallelValue.first.value
|
74
|
+
end
|
75
|
+
# rubocop:enable Metrics/AbcSize
|
76
|
+
|
77
|
+
def structured_dates(eligible_events)
|
78
|
+
dates = eligible_events.flat_map(&:date).compact
|
79
|
+
return if dates.blank?
|
80
|
+
|
81
|
+
structured_values = dates.first.structuredValue
|
82
|
+
return if structured_values.blank?
|
83
|
+
|
84
|
+
find_within_structured_values(structured_values)
|
85
|
+
end
|
86
|
+
|
87
|
+
def find_within_structured_values(structured_values)
|
88
|
+
primary = structured_values.find { |date| date.status == 'primary' }
|
89
|
+
return primary.value if primary
|
90
|
+
|
91
|
+
structured_values.first.value
|
92
|
+
end
|
93
|
+
|
94
|
+
def creation_date
|
95
|
+
@creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
|
96
|
+
end
|
97
|
+
|
98
|
+
def publication_event
|
99
|
+
@publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
|
100
|
+
end
|
101
|
+
|
102
|
+
def creation_event
|
103
|
+
@creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
|
104
|
+
end
|
105
|
+
|
106
|
+
def capture_event
|
107
|
+
@capture_event ||= DorIndexing::Selectors::EventSelector.select(events, 'capture')
|
108
|
+
end
|
109
|
+
|
110
|
+
def copyright_event
|
111
|
+
@copyright_event ||= DorIndexing::Selectors::EventSelector.select(events, 'copyright')
|
112
|
+
end
|
113
|
+
|
114
|
+
def production_event
|
115
|
+
@production_event ||= DorIndexing::Selectors::EventSelector.select(events, 'production')
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Model for workflow fields
|
5
|
+
class WorkflowFields
|
6
|
+
def self.for(druid:, version:, workflow_client:)
|
7
|
+
new(druid:, version:, workflow_client:).result
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :druid, :version, :workflow_client
|
11
|
+
|
12
|
+
def initialize(druid:, version:, workflow_client:)
|
13
|
+
@druid = druid
|
14
|
+
@version = version
|
15
|
+
@workflow_client = workflow_client
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Hash] the partial solr document for processable concerns
|
19
|
+
def result
|
20
|
+
{}.tap do |solr_doc|
|
21
|
+
add_sortable_milestones(solr_doc)
|
22
|
+
add_status(solr_doc)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def status_service
|
29
|
+
@status_service ||= workflow_client.status(druid:, version:)
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_status(solr_doc)
|
33
|
+
# This is the status on the Argo show page (e.g. "v4 In accessioning (described, published, deposited)")
|
34
|
+
solr_doc['status_ssi'] = status_service.display
|
35
|
+
return unless status_service.info[:status_code]
|
36
|
+
|
37
|
+
# This is used for Argo's "Processing Status" facet
|
38
|
+
solr_doc['processing_status_text_ssi'] = status_service.display_simplified
|
39
|
+
end
|
40
|
+
|
41
|
+
def sortable_milestones
|
42
|
+
status_service.milestones.each_with_object({}) do |milestone, sortable|
|
43
|
+
sortable[milestone[:milestone]] ||= []
|
44
|
+
sortable[milestone[:milestone]] << milestone[:at].utc.xmlschema
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_sortable_milestones(solr_doc)
|
49
|
+
sortable_milestones.each do |milestone, unordered_dates|
|
50
|
+
dates = unordered_dates.sort
|
51
|
+
# create the published_dttsi and published_day fields and the like
|
52
|
+
dates.each do |date|
|
53
|
+
solr_doc["#{milestone}_dttsim"] ||= []
|
54
|
+
solr_doc["#{milestone}_dttsim"] << date unless solr_doc["#{milestone}_dttsim"].include?(date)
|
55
|
+
end
|
56
|
+
# fields for OAI havester to sort on: _dttsi is trie date +stored +indexed (single valued, i.e. sortable)
|
57
|
+
# TODO: we really only need accessioned_earliest and registered_earliest
|
58
|
+
solr_doc["#{milestone}_earliest_dttsi"] = dates.first
|
59
|
+
solr_doc["#{milestone}_latest_dttsi"] = dates.last
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|