stanford-mods 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/lib/marc_countries.rb +387 -0
- data/lib/stanford-mods.rb +1 -0
- data/lib/stanford-mods/date_parsing.rb +7 -1
- data/lib/stanford-mods/imprint.rb +418 -0
- data/lib/stanford-mods/origin_info.rb +6 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +8 -4
- data/spec/fixtures/searchworks_imprint_data.rb +1300 -0
- data/spec/fixtures/searchworks_pub_date_data.rb +52 -4
- data/spec/imprint_spec.rb +251 -0
- data/spec/sw_publication_spec.rb +11 -0
- metadata +9 -4
- data/config/mappings_hash.rb +0 -78
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abcbc3357a121647ed103d8341b2cdb48d0c600d
|
4
|
+
data.tar.gz: 789c2fa194b7c837c7cdf10898eeab3b4b3636b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14231328428d5a250701cdac1f16977ce67ddc6db89fa465635bd7ad3171391c7b11921146020c8992f2af4f9647abbaa0f4f11954ee6a30fb039dd4e1426c32
|
7
|
+
data.tar.gz: 253c2201ff7f6ffc206226523ae6bcac56eb48cb386bd93789e2c1babf03f5808adaddcdf9881222b83a76e89567c551ac9c72568e29e784977c9eb117df9dea
|
data/.travis.yml
CHANGED
@@ -0,0 +1,387 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Map 3 letter MARC Country/Location Code to User Friendly Name
|
4
|
+
# includes discontinued codes, because they're probably in our data
|
5
|
+
# from https://www.loc.gov/marc/countries/countries_code.html 01/20/2016
|
6
|
+
MARC_COUNTRIES =
|
7
|
+
{
|
8
|
+
'aa' => 'Albania',
|
9
|
+
'abc' => 'Alberta',
|
10
|
+
'ac' => 'Ashmore and Cartier Islands',
|
11
|
+
'aca' => 'Australian Capital Territory',
|
12
|
+
'ae' => 'Algeria',
|
13
|
+
'af' => 'Afghanistan',
|
14
|
+
'ag' => 'Argentina',
|
15
|
+
'ai' => 'Armenia (Republic)',
|
16
|
+
'air' => 'Armenian S.S.R.',
|
17
|
+
'aj' => 'Azerbaijan',
|
18
|
+
'ajr' => 'Azerbaijan S.S.R.',
|
19
|
+
'aku' => 'Alaska',
|
20
|
+
'alu' => 'Alabama',
|
21
|
+
'am' => 'Anguilla',
|
22
|
+
'an' => 'Andorra',
|
23
|
+
'ao' => 'Angola',
|
24
|
+
'aq' => 'Antigua and Barbuda',
|
25
|
+
'aru' => 'Arkansas',
|
26
|
+
'as' => 'American Samoa',
|
27
|
+
'at' => 'Australia',
|
28
|
+
'au' => 'Austria',
|
29
|
+
'aw' => 'Aruba',
|
30
|
+
'ay' => 'Antarctica',
|
31
|
+
'azu' => 'Arizona',
|
32
|
+
'ba' => 'Bahrain',
|
33
|
+
'bb' => 'Barbados',
|
34
|
+
'bcc' => 'British Columbia',
|
35
|
+
'bd' => 'Burundi',
|
36
|
+
'be' => 'Belgium',
|
37
|
+
'bf' => 'Bahamas',
|
38
|
+
'bg' => 'Bangladesh',
|
39
|
+
'bh' => 'Belize',
|
40
|
+
'bi' => 'British Indian Ocean Territory',
|
41
|
+
'bl' => 'Brazil',
|
42
|
+
'bm' => 'Bermuda Islands',
|
43
|
+
'bn' => 'Bosnia and Herzegovina',
|
44
|
+
'bo' => 'Bolivia',
|
45
|
+
'bp' => 'Solomon Islands',
|
46
|
+
'br' => 'Burma',
|
47
|
+
'bs' => 'Botswana',
|
48
|
+
'bt' => 'Bhutan',
|
49
|
+
'bu' => 'Bulgaria',
|
50
|
+
'bv' => 'Bouvet Island',
|
51
|
+
'bw' => 'Belarus',
|
52
|
+
'bwr' => 'Byelorussian S.S.R.',
|
53
|
+
'bx' => 'Brunei',
|
54
|
+
'ca' => 'Caribbean Netherlands',
|
55
|
+
'cau' => 'California',
|
56
|
+
'cb' => 'Cambodia',
|
57
|
+
'cc' => 'China',
|
58
|
+
'cd' => 'Chad',
|
59
|
+
'ce' => 'Sri Lanka',
|
60
|
+
'cf' => 'Congo (Brazzaville)',
|
61
|
+
'cg' => 'Congo (Democratic Republic)',
|
62
|
+
'ch' => 'China (Republic : 1949 )',
|
63
|
+
'ci' => 'Croatia',
|
64
|
+
'cj' => 'Cayman Islands',
|
65
|
+
'ck' => 'Colombia',
|
66
|
+
'cl' => 'Chile',
|
67
|
+
'cm' => 'Cameroon',
|
68
|
+
'cn' => 'Canada',
|
69
|
+
'co' => 'Curaçao',
|
70
|
+
'cou' => 'Colorado',
|
71
|
+
'cp' => 'Canton and Enderbury Islands',
|
72
|
+
'cq' => 'Comoros',
|
73
|
+
'cr' => 'Costa Rica',
|
74
|
+
'cs' => 'Czechoslovakia',
|
75
|
+
'ctu' => 'Connecticut',
|
76
|
+
'cu' => 'Cuba',
|
77
|
+
'cv' => 'Cabo Verde',
|
78
|
+
'cw' => 'Cook Islands',
|
79
|
+
'cx' => 'Central African Republic',
|
80
|
+
'cy' => 'Cyprus',
|
81
|
+
'cz' => 'Canal Zone',
|
82
|
+
'dcu' => 'District of Columbia',
|
83
|
+
'deu' => 'Delaware',
|
84
|
+
'dk' => 'Denmark',
|
85
|
+
'dm' => 'Benin',
|
86
|
+
'dq' => 'Dominica',
|
87
|
+
'dr' => 'Dominican Republic',
|
88
|
+
'ea' => 'Eritrea',
|
89
|
+
'ec' => 'Ecuador',
|
90
|
+
'eg' => 'Equatorial Guinea',
|
91
|
+
'em' => 'TimorLeste',
|
92
|
+
'enk' => 'England',
|
93
|
+
'er' => 'Estonia',
|
94
|
+
'err' => 'Estonia',
|
95
|
+
'es' => 'El Salvador',
|
96
|
+
'et' => 'Ethiopia',
|
97
|
+
'fa' => 'Faroe Islands',
|
98
|
+
'fg' => 'French Guiana',
|
99
|
+
'fi' => 'Finland',
|
100
|
+
'fj' => 'Fiji',
|
101
|
+
'fk' => 'Falkland Islands',
|
102
|
+
'flu' => 'Florida',
|
103
|
+
'fm' => 'Micronesia (Federated States)',
|
104
|
+
'fp' => 'French Polynesia',
|
105
|
+
'fr' => 'France',
|
106
|
+
'fs' => 'Terres australes et antarctiques françaises',
|
107
|
+
'ft' => 'Djibouti',
|
108
|
+
'gau' => 'Georgia',
|
109
|
+
'gb' => 'Kiribati',
|
110
|
+
'gd' => 'Grenada',
|
111
|
+
'ge' => 'Germany (East)',
|
112
|
+
'gh' => 'Ghana',
|
113
|
+
'gi' => 'Gibraltar',
|
114
|
+
'gl' => 'Greenland',
|
115
|
+
'gm' => 'Gambia',
|
116
|
+
'gn' => 'Gilbert and Ellice Islands',
|
117
|
+
'go' => 'Gabon',
|
118
|
+
'gp' => 'Guadeloupe',
|
119
|
+
'gr' => 'Greece',
|
120
|
+
'gs' => 'Georgia (Republic)',
|
121
|
+
'gsr' => 'Georgian S.S.R.',
|
122
|
+
'gt' => 'Guatemala',
|
123
|
+
'gu' => 'Guam',
|
124
|
+
'gv' => 'Guinea',
|
125
|
+
'gw' => 'Germany',
|
126
|
+
'gy' => 'Guyana',
|
127
|
+
'gz' => 'Gaza Strip',
|
128
|
+
'hiu' => 'Hawaii',
|
129
|
+
'hk' => 'Hong Kong',
|
130
|
+
'hm' => 'Heard and McDonald Islands',
|
131
|
+
'ho' => 'Honduras',
|
132
|
+
'ht' => 'Haiti',
|
133
|
+
'hu' => 'Hungary',
|
134
|
+
'iau' => 'Iowa',
|
135
|
+
'ic' => 'Iceland',
|
136
|
+
'idu' => 'Idaho',
|
137
|
+
'ie' => 'Ireland',
|
138
|
+
'ii' => 'India',
|
139
|
+
'ilu' => 'Illinois',
|
140
|
+
'inu' => 'Indiana',
|
141
|
+
'io' => 'Indonesia',
|
142
|
+
'iq' => 'Iraq',
|
143
|
+
'ir' => 'Iran',
|
144
|
+
'is' => 'Israel',
|
145
|
+
'it' => 'Italy',
|
146
|
+
'iu' => 'IsraelSyria Demilitarized Zones',
|
147
|
+
'iv' => "Côte d'Ivoire",
|
148
|
+
'iw' => 'IsraelJordan Demilitarized Zones',
|
149
|
+
'iy' => 'IraqSaudi Arabia Neutral Zone',
|
150
|
+
'ja' => 'Japan',
|
151
|
+
'ji' => 'Johnston Atoll',
|
152
|
+
'jm' => 'Jamaica',
|
153
|
+
'jn' => 'Jan Mayen',
|
154
|
+
'jo' => 'Jordan',
|
155
|
+
'ke' => 'Kenya',
|
156
|
+
'kg' => 'Kyrgyzstan',
|
157
|
+
'kgr' => 'Kirghiz S.S.R.',
|
158
|
+
'kn' => 'Korea (North)',
|
159
|
+
'ko' => 'Korea (South)',
|
160
|
+
'ksu' => 'Kansas',
|
161
|
+
'ku' => 'Kuwait',
|
162
|
+
'kv' => 'Kosovo',
|
163
|
+
'kyu' => 'Kentucky',
|
164
|
+
'kz' => 'Kazakhstan',
|
165
|
+
'kzr' => 'Kazakh S.S.R.',
|
166
|
+
'lau' => 'Louisiana',
|
167
|
+
'lb' => 'Liberia',
|
168
|
+
'le' => 'Lebanon',
|
169
|
+
'lh' => 'Liechtenstein',
|
170
|
+
'li' => 'Lithuania',
|
171
|
+
'lir' => 'Lithuania',
|
172
|
+
'ln' => 'Central and Southern Line Islands',
|
173
|
+
'lo' => 'Lesotho',
|
174
|
+
'ls' => 'Laos',
|
175
|
+
'lu' => 'Luxembourg',
|
176
|
+
'lv' => 'Latvia',
|
177
|
+
'lvr' => 'Latvia',
|
178
|
+
'ly' => 'Libya',
|
179
|
+
'mau' => 'Massachusetts',
|
180
|
+
'mbc' => 'Manitoba',
|
181
|
+
'mc' => 'Monaco',
|
182
|
+
'mdu' => 'Maryland',
|
183
|
+
'meu' => 'Maine',
|
184
|
+
'mf' => 'Mauritius',
|
185
|
+
'mg' => 'Madagascar',
|
186
|
+
'mh' => 'Macao',
|
187
|
+
'miu' => 'Michigan',
|
188
|
+
'mj' => 'Montserrat',
|
189
|
+
'mk' => 'Oman',
|
190
|
+
'ml' => 'Mali',
|
191
|
+
'mm' => 'Malta',
|
192
|
+
'mnu' => 'Minnesota',
|
193
|
+
'mo' => 'Montenegro',
|
194
|
+
'mou' => 'Missouri',
|
195
|
+
'mp' => 'Mongolia',
|
196
|
+
'mq' => 'Martinique',
|
197
|
+
'mr' => 'Morocco',
|
198
|
+
'msu' => 'Mississippi',
|
199
|
+
'mtu' => 'Montana',
|
200
|
+
'mu' => 'Mauritania',
|
201
|
+
'mv' => 'Moldova',
|
202
|
+
'mvr' => 'Moldavian S.S.R.',
|
203
|
+
'mw' => 'Malawi',
|
204
|
+
'mx' => 'Mexico',
|
205
|
+
'my' => 'Malaysia',
|
206
|
+
'mz' => 'Mozambique',
|
207
|
+
'na' => 'Netherlands Antilles',
|
208
|
+
'nbu' => 'Nebraska',
|
209
|
+
'ncu' => 'North Carolina',
|
210
|
+
'ndu' => 'North Dakota',
|
211
|
+
'ne' => 'Netherlands',
|
212
|
+
'nfc' => 'Newfoundland and Labrador',
|
213
|
+
'ng' => 'Niger',
|
214
|
+
'nhu' => 'New Hampshire',
|
215
|
+
'nik' => 'Northern Ireland',
|
216
|
+
'nju' => 'New Jersey',
|
217
|
+
'nkc' => 'New Brunswick',
|
218
|
+
'nl' => 'New Caledonia',
|
219
|
+
'nm' => 'Northern Mariana Islands',
|
220
|
+
'nmu' => 'New Mexico',
|
221
|
+
'nn' => 'Vanuatu',
|
222
|
+
'no' => 'Norway',
|
223
|
+
'np' => 'Nepal',
|
224
|
+
'nq' => 'Nicaragua',
|
225
|
+
'nr' => 'Nigeria',
|
226
|
+
'nsc' => 'Nova Scotia',
|
227
|
+
'ntc' => 'Northwest Territories',
|
228
|
+
'nu' => 'Nauru',
|
229
|
+
'nuc' => 'Nunavut',
|
230
|
+
'nvu' => 'Nevada',
|
231
|
+
'nw' => 'Northern Mariana Islands',
|
232
|
+
'nx' => 'Norfolk Island',
|
233
|
+
'nyu' => 'New York (State)',
|
234
|
+
'nz' => 'New Zealand',
|
235
|
+
'ohu' => 'Ohio',
|
236
|
+
'oku' => 'Oklahoma',
|
237
|
+
'onc' => 'Ontario',
|
238
|
+
'oru' => 'Oregon',
|
239
|
+
'ot' => 'Mayotte',
|
240
|
+
'pau' => 'Pennsylvania',
|
241
|
+
'pc' => 'Pitcairn Island',
|
242
|
+
'pe' => 'Peru',
|
243
|
+
'pf' => 'Paracel Islands',
|
244
|
+
'pg' => 'GuineaBissau',
|
245
|
+
'ph' => 'Philippines',
|
246
|
+
'pic' => 'Prince Edward Island',
|
247
|
+
'pk' => 'Pakistan',
|
248
|
+
'pl' => 'Poland',
|
249
|
+
'pn' => 'Panama',
|
250
|
+
'po' => 'Portugal',
|
251
|
+
'pp' => 'Papua New Guinea',
|
252
|
+
'pr' => 'Puerto Rico',
|
253
|
+
'pt' => 'Portuguese Timor',
|
254
|
+
'pw' => 'Palau',
|
255
|
+
'py' => 'Paraguay',
|
256
|
+
'qa' => 'Qatar',
|
257
|
+
'qea' => 'Queensland',
|
258
|
+
'quc' => 'Québec (Province)',
|
259
|
+
'rb' => 'Serbia',
|
260
|
+
're' => 'Réunion',
|
261
|
+
'rh' => 'Zimbabwe',
|
262
|
+
'riu' => 'Rhode Island',
|
263
|
+
'rm' => 'Romania',
|
264
|
+
'ru' => 'Russia (Federation)',
|
265
|
+
'rur' => 'Russian S.F.S.R.',
|
266
|
+
'rw' => 'Rwanda',
|
267
|
+
'ry' => 'Ryukyu Islands, Southern',
|
268
|
+
'sa' => 'South Africa',
|
269
|
+
'sb' => 'Svalbard',
|
270
|
+
'sc' => 'SaintBarthélemy',
|
271
|
+
'scu' => 'South Carolina',
|
272
|
+
'sd' => 'South Sudan',
|
273
|
+
'sdu' => 'South Dakota',
|
274
|
+
'se' => 'Seychelles',
|
275
|
+
'sf' => 'Sao Tome and Principe',
|
276
|
+
'sg' => 'Senegal',
|
277
|
+
'sh' => 'Spanish North Africa',
|
278
|
+
'si' => 'Singapore',
|
279
|
+
'sj' => 'Sudan',
|
280
|
+
'sk' => 'Sikkim',
|
281
|
+
'sl' => 'Sierra Leone',
|
282
|
+
'sm' => 'San Marino',
|
283
|
+
'sn' => 'Sint Maarten',
|
284
|
+
'snc' => 'Saskatchewan',
|
285
|
+
'so' => 'Somalia',
|
286
|
+
'sp' => 'Spain',
|
287
|
+
'sq' => 'Swaziland',
|
288
|
+
'sr' => 'Surinam',
|
289
|
+
'ss' => 'Western Sahara',
|
290
|
+
'st' => 'SaintMartin',
|
291
|
+
'stk' => 'Scotland',
|
292
|
+
'su' => 'Saudi Arabia',
|
293
|
+
'sv' => 'Swan Islands',
|
294
|
+
'sw' => 'Sweden',
|
295
|
+
'sx' => 'Namibia',
|
296
|
+
'sy' => 'Syria',
|
297
|
+
'sz' => 'Switzerland',
|
298
|
+
'ta' => 'Tajikistan',
|
299
|
+
'tar' => 'Tajik S.S.R.',
|
300
|
+
'tc' => 'Turks and Caicos Islands',
|
301
|
+
'tg' => 'Togo',
|
302
|
+
'th' => 'Thailand',
|
303
|
+
'ti' => 'Tunisia',
|
304
|
+
'tk' => 'Turkmenistan',
|
305
|
+
'tkr' => 'Turkmen S.S.R.',
|
306
|
+
'tl' => 'Tokelau',
|
307
|
+
'tma' => 'Tasmania',
|
308
|
+
'tnu' => 'Tennessee',
|
309
|
+
'to' => 'Tonga',
|
310
|
+
'tr' => 'Trinidad and Tobago',
|
311
|
+
'ts' => 'United Arab Emirates',
|
312
|
+
'tt' => 'Trust Territory of the Pacific Islands',
|
313
|
+
'tu' => 'Turkey',
|
314
|
+
'tv' => 'Tuvalu',
|
315
|
+
'txu' => 'Texas',
|
316
|
+
'tz' => 'Tanzania',
|
317
|
+
'ua' => 'Egypt',
|
318
|
+
'uc' => 'United States Misc. Caribbean Islands',
|
319
|
+
'ug' => 'Uganda',
|
320
|
+
'ui' => 'United Kingdom Misc. Islands',
|
321
|
+
'uik' => 'United Kingdom Misc. Islands',
|
322
|
+
'uk' => 'United Kingdom',
|
323
|
+
'un' => 'Ukraine',
|
324
|
+
'unr' => 'Ukraine',
|
325
|
+
'up' => 'United States Misc. Pacific Islands',
|
326
|
+
'ur' => 'Soviet Union',
|
327
|
+
'us' => 'United States',
|
328
|
+
'utu' => 'Utah',
|
329
|
+
'uv' => 'Burkina Faso',
|
330
|
+
'uy' => 'Uruguay',
|
331
|
+
'uz' => 'Uzbekistan',
|
332
|
+
'uzr' => 'Uzbek S.S.R.',
|
333
|
+
'vau' => 'Virginia',
|
334
|
+
'vb' => 'British Virgin Islands',
|
335
|
+
'vc' => 'Vatican City',
|
336
|
+
've' => 'Venezuela',
|
337
|
+
'vi' => 'Virgin Islands of the United States',
|
338
|
+
'vm' => 'Vietnam',
|
339
|
+
'vn' => 'Vietnam, North',
|
340
|
+
'vp' => 'Various places',
|
341
|
+
'vra' => 'Victoria',
|
342
|
+
'vs' => 'Vietnam, South',
|
343
|
+
'vtu' => 'Vermont',
|
344
|
+
'wau' => 'Washington (State)',
|
345
|
+
'wb' => 'West Berlin',
|
346
|
+
'wea' => 'Western Australia',
|
347
|
+
'wf' => 'Wallis and Futuna',
|
348
|
+
'wiu' => 'Wisconsin',
|
349
|
+
'wj' => 'West Bank of the Jordan River',
|
350
|
+
'wk' => 'Wake Island',
|
351
|
+
'wlk' => 'Wales',
|
352
|
+
'ws' => 'Samoa',
|
353
|
+
'wvu' => 'West Virginia',
|
354
|
+
'wyu' => 'Wyoming',
|
355
|
+
'xa' => 'Christmas Island (Indian Ocean)',
|
356
|
+
'xb' => 'Cocos (Keeling) Islands',
|
357
|
+
'xc' => 'Maldives',
|
358
|
+
'xd' => 'Saint KittsNevis',
|
359
|
+
'xe' => 'Marshall Islands',
|
360
|
+
'xf' => 'Midway Islands',
|
361
|
+
'xga' => 'Coral Sea Islands Territory',
|
362
|
+
'xh' => 'Niue',
|
363
|
+
'xi' => 'Saint KittsNevisAnguilla',
|
364
|
+
'xj' => 'Saint Helena',
|
365
|
+
'xk' => 'Saint Lucia',
|
366
|
+
'xl' => 'Saint Pierre and Miquelon',
|
367
|
+
'xm' => 'Saint Vincent and the Grenadines',
|
368
|
+
'xn' => 'Macedonia',
|
369
|
+
'xna' => 'New South Wales',
|
370
|
+
'xo' => 'Slovakia',
|
371
|
+
'xoa' => 'Northern Territory',
|
372
|
+
'xp' => 'Spratly Island',
|
373
|
+
'xr' => 'Czech Republic',
|
374
|
+
'xra' => 'South Australia',
|
375
|
+
'xs' => 'South Georgia and the South Sandwich Islands',
|
376
|
+
'xv' => 'Slovenia',
|
377
|
+
#'xx' => 'No place, unknown, or undetermined',
|
378
|
+
'xxc' => 'Canada',
|
379
|
+
'xxk' => 'United Kingdom',
|
380
|
+
'xxr' => 'Soviet Union',
|
381
|
+
'xxu' => 'United States',
|
382
|
+
'ye' => 'Yemen',
|
383
|
+
'ykc' => 'Yukon Territory',
|
384
|
+
'ys' => "Yemen (People's Democratic Republic)",
|
385
|
+
'yu' => 'Serbia and Montenegro',
|
386
|
+
'za' => 'Zambia'
|
387
|
+
}
|
data/lib/stanford-mods.rb
CHANGED
@@ -2,6 +2,7 @@ require 'mods'
|
|
2
2
|
require 'stanford-mods/date_parsing'
|
3
3
|
require 'stanford-mods/coordinate'
|
4
4
|
require 'stanford-mods/geo_spatial'
|
5
|
+
require 'stanford-mods/imprint'
|
5
6
|
require 'stanford-mods/name'
|
6
7
|
require 'stanford-mods/origin_info'
|
7
8
|
require 'stanford-mods/physical_location'
|
@@ -281,10 +281,16 @@ module Stanford
|
|
281
281
|
|
282
282
|
# get display value for date String containing yyy, yy, y, -y, -yy, -yyy
|
283
283
|
# negative number strings will be changed to B.C. strings
|
284
|
+
# note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
|
285
|
+
# "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
|
286
|
+
# There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
|
287
|
+
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
284
288
|
def display_str_for_early_numeric
|
285
289
|
return unless orig_date_str.match(EARLY_NUMERIC)
|
290
|
+
# return 1 B.C. when the date is 0 since there is no 0 year
|
291
|
+
return '1 B.C.' if orig_date_str == '0'
|
286
292
|
# negative number becomes B.C.
|
287
|
-
return "#{orig_date_str[1..-1]} B.C." if orig_date_str.match(/^\-/)
|
293
|
+
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str.match(/^\-/)
|
288
294
|
# remove leading 0s from early dates
|
289
295
|
"#{orig_date_str.to_i} A.D."
|
290
296
|
end
|
@@ -0,0 +1,418 @@
|
|
1
|
+
module Stanford
|
2
|
+
module Mods
|
3
|
+
##
|
4
|
+
# Get the imprint information from originInfo elements (and sub elements) to create display strings
|
5
|
+
#
|
6
|
+
# This code is adapted from the mods_display gem. In a perfect world, this
|
7
|
+
# code would make use of the date_parsing class instead of reimplementing pieces of it;
|
8
|
+
# however, the date_parsing class only does years, and this does finer tuned dates and also
|
9
|
+
# reformats them according to the encoding.
|
10
|
+
class Imprint
|
11
|
+
# @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
|
12
|
+
def initialize(originInfo_ng_nodeset)
|
13
|
+
@originInfo_ng_nodeset = originInfo_ng_nodeset
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'marc_countries'
|
17
|
+
|
18
|
+
# @return Array<String> each String is an imprint statement from a single originInfo element
|
19
|
+
def imprint_statements
|
20
|
+
results = []
|
21
|
+
@originInfo_ng_nodeset.each do |origin_info_node|
|
22
|
+
edition = edition_vals_str(origin_info_node)
|
23
|
+
place = place_vals_str(origin_info_node)
|
24
|
+
publisher = publisher_vals_str(origin_info_node)
|
25
|
+
dates = date_str(origin_info_node)
|
26
|
+
|
27
|
+
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
28
|
+
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
29
|
+
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
30
|
+
|
31
|
+
results << ed_place_pub_dates unless ed_place_pub_dates.empty?
|
32
|
+
end
|
33
|
+
results
|
34
|
+
end
|
35
|
+
|
36
|
+
def display_str
|
37
|
+
imprint_statements.join('; ') if imprint_statements.present?
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def compact_and_join_with_delimiter(values, delimiter)
|
43
|
+
compact_values = values.compact.reject { |v| v.strip.empty? }
|
44
|
+
return compact_values.join(delimiter) if compact_values.length == 1 ||
|
45
|
+
!ends_in_terminating_punctuation?(delimiter)
|
46
|
+
compact_values.each_with_index.map do |value, i|
|
47
|
+
if (compact_values.length - 1) == i || # last item?
|
48
|
+
ends_in_terminating_punctuation?(value)
|
49
|
+
value << ' '
|
50
|
+
else
|
51
|
+
value << delimiter
|
52
|
+
end
|
53
|
+
end.join.strip
|
54
|
+
end
|
55
|
+
|
56
|
+
def ends_in_terminating_punctuation?(value)
|
57
|
+
value.strip.end_with?('.', ',', ':', ';')
|
58
|
+
end
|
59
|
+
|
60
|
+
def edition_vals_str(origin_info_node)
|
61
|
+
origin_info_node.edition.reject do |e|
|
62
|
+
e.text.strip.empty?
|
63
|
+
end.map(&:text).join(' ').strip
|
64
|
+
end
|
65
|
+
|
66
|
+
def publisher_vals_str(origin_info_node)
|
67
|
+
return if origin_info_node.publisher.text.strip.empty?
|
68
|
+
publishers = origin_info_node.publisher.reject do |p|
|
69
|
+
p.text.strip.empty?
|
70
|
+
end.map(&:text)
|
71
|
+
compact_and_join_with_delimiter(publishers, ' : ')
|
72
|
+
end
|
73
|
+
|
74
|
+
# PLACE processing methods ------
|
75
|
+
|
76
|
+
def place_vals_str(origin_info_node)
|
77
|
+
return if origin_info_node.place.text.strip.empty?
|
78
|
+
places = place_terms(origin_info_node).reject do |p|
|
79
|
+
p.text.strip.empty?
|
80
|
+
end.map(&:text)
|
81
|
+
compact_and_join_with_delimiter(places, ' : ')
|
82
|
+
end
|
83
|
+
|
84
|
+
def unencoded_place_terms?(element)
|
85
|
+
element.place.placeTerm.any? do |term|
|
86
|
+
!term.attributes['type'].respond_to?(:value) ||
|
87
|
+
term.attributes['type'].value == 'text'
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def place_terms(origin_info_element)
|
92
|
+
return [] unless origin_info_element.respond_to?(:place) &&
|
93
|
+
origin_info_element.place.respond_to?(:placeTerm)
|
94
|
+
if unencoded_place_terms?(origin_info_element)
|
95
|
+
origin_info_element.place.placeTerm.select do |term|
|
96
|
+
!term.attributes['type'].respond_to?(:value) ||
|
97
|
+
term.attributes['type'].value == 'text'
|
98
|
+
end.compact
|
99
|
+
else
|
100
|
+
origin_info_element.place.placeTerm.map do |term|
|
101
|
+
next unless term.attributes['type'].respond_to?(:value) &&
|
102
|
+
term.attributes['type'].value == 'code' &&
|
103
|
+
term.attributes['authority'].respond_to?(:value) &&
|
104
|
+
term.attributes['authority'].value == 'marccountry' &&
|
105
|
+
MARC_COUNTRIES.include?(term.text.strip)
|
106
|
+
term = term.clone
|
107
|
+
term.content = MARC_COUNTRIES[term.text.strip]
|
108
|
+
term
|
109
|
+
end.compact
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# DATE processing methods ------
|
114
|
+
|
115
|
+
def date_str(origin_info_node)
|
116
|
+
date_vals = origin_info_date_vals(origin_info_node)
|
117
|
+
return if date_vals.empty?
|
118
|
+
date_vals.map(&:strip).join(' ')
|
119
|
+
end
|
120
|
+
|
121
|
+
def origin_info_date_vals(origin_info_node)
|
122
|
+
date_field_keys.map do |date_field|
|
123
|
+
next unless origin_info_node.respond_to?(date_field)
|
124
|
+
date_elements = origin_info_node.send(date_field)
|
125
|
+
date_elements_display_vals(date_elements) if date_elements.present?
|
126
|
+
end.compact.flatten
|
127
|
+
end
|
128
|
+
|
129
|
+
def date_elements_display_vals(ng_date_elements)
|
130
|
+
apply_date_qualifier_decoration(
|
131
|
+
dedup_dates(
|
132
|
+
join_date_ranges(
|
133
|
+
process_decade_century_dates(
|
134
|
+
process_bc_ad_dates(
|
135
|
+
process_encoded_dates(ignore_bad_dates(ng_date_elements))
|
136
|
+
)
|
137
|
+
)
|
138
|
+
)
|
139
|
+
)
|
140
|
+
)
|
141
|
+
end
|
142
|
+
|
143
|
+
def date_field_keys
|
144
|
+
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
|
145
|
+
end
|
146
|
+
|
147
|
+
def ignore_bad_dates(ng_date_elements)
|
148
|
+
ng_date_elements.select do |ng_date_element|
|
149
|
+
val = ng_date_element.text.strip
|
150
|
+
val != '9999' && val != '0000-00-00' && val != 'uuuu'
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def process_encoded_dates(ng_date_elements)
|
155
|
+
ng_date_elements.map do |ng_date_element|
|
156
|
+
if date_is_w3cdtf?(ng_date_element)
|
157
|
+
process_w3cdtf_date(ng_date_element)
|
158
|
+
elsif date_is_iso8601?(ng_date_element)
|
159
|
+
process_iso8601_date(ng_date_element)
|
160
|
+
else
|
161
|
+
ng_date_element
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
|
167
|
+
# "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
|
168
|
+
# There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
|
169
|
+
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
170
|
+
def process_bc_ad_dates(ng_date_elements)
|
171
|
+
ng_date_elements.map do |ng_date_element|
|
172
|
+
case
|
173
|
+
when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
|
174
|
+
ng_date_element.content = "1 B.C."
|
175
|
+
when date_is_bc_edtf?(ng_date_element)
|
176
|
+
year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
|
177
|
+
ng_date_element.content = "#{year} B.C."
|
178
|
+
when date_is_ad?(ng_date_element)
|
179
|
+
ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
|
180
|
+
end
|
181
|
+
ng_date_element
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def process_decade_century_dates(ng_date_elements)
|
186
|
+
ng_date_elements.map do |ng_date_element|
|
187
|
+
if date_is_decade?(ng_date_element)
|
188
|
+
process_decade_date(ng_date_element)
|
189
|
+
elsif date_is_century?(ng_date_element)
|
190
|
+
process_century_date(ng_date_element)
|
191
|
+
else
|
192
|
+
ng_date_element
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def join_date_ranges(ng_date_elements)
|
198
|
+
if dates_are_range?(ng_date_elements)
|
199
|
+
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
200
|
+
end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
|
201
|
+
ng_date_elements.map do |date|
|
202
|
+
date = date.clone # clone the date object so we don't append the same one
|
203
|
+
if normalize_date(date.text) == normalize_date(start_date.text)
|
204
|
+
date.content = [start_date.text, end_date.text].join(' - ')
|
205
|
+
date
|
206
|
+
elsif normalize_date(date.text) != normalize_date(end_date.text)
|
207
|
+
date
|
208
|
+
end
|
209
|
+
end.compact
|
210
|
+
elsif dates_are_open_range?(ng_date_elements)
|
211
|
+
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
212
|
+
ng_date_elements.map do |date|
|
213
|
+
date = date.clone # clone the date object so we don't append the same one
|
214
|
+
date.content = "#{start_date.text}-" if date.text == start_date.text
|
215
|
+
date
|
216
|
+
end
|
217
|
+
else
|
218
|
+
ng_date_elements
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def dedup_dates(ng_date_elements)
|
223
|
+
date_text = ng_date_elements.map { |d| normalize_date(d.text) }
|
224
|
+
if date_text != date_text.uniq
|
225
|
+
if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
|
226
|
+
[ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
|
227
|
+
elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
|
228
|
+
[ng_date_elements.find { |d| !d.attributes['encoding'] }]
|
229
|
+
else
|
230
|
+
[ng_date_elements.first]
|
231
|
+
end
|
232
|
+
else
|
233
|
+
ng_date_elements
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def apply_date_qualifier_decoration(ng_date_elements)
|
238
|
+
return_fields = ng_date_elements.map do |date|
|
239
|
+
date = date.clone
|
240
|
+
if date_is_approximate?(date)
|
241
|
+
date.content = "[ca. #{date.text}]"
|
242
|
+
elsif date_is_questionable?(date)
|
243
|
+
date.content = "[#{date.text}?]"
|
244
|
+
elsif date_is_inferred?(date)
|
245
|
+
date.content = "[#{date.text}]"
|
246
|
+
end
|
247
|
+
date
|
248
|
+
end
|
249
|
+
return_fields.map(&:text)
|
250
|
+
end
|
251
|
+
|
252
|
+
def date_is_approximate?(ng_date_element)
|
253
|
+
ng_date_element.attributes['qualifier'] &&
|
254
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
255
|
+
ng_date_element.attributes['qualifier'].value == 'approximate'
|
256
|
+
end
|
257
|
+
|
258
|
+
def date_is_questionable?(ng_date_element)
|
259
|
+
ng_date_element.attributes['qualifier'] &&
|
260
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
261
|
+
ng_date_element.attributes['qualifier'].value == 'questionable'
|
262
|
+
end
|
263
|
+
|
264
|
+
def date_is_inferred?(ng_date_element)
|
265
|
+
ng_date_element.attributes['qualifier'] &&
|
266
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
267
|
+
ng_date_element.attributes['qualifier'].value == 'inferred'
|
268
|
+
end
|
269
|
+
|
270
|
+
def dates_are_open_range?(ng_date_elements)
|
271
|
+
ng_date_elements.any? do |element|
|
272
|
+
element.attributes['point'] &&
|
273
|
+
element.attributes['point'].respond_to?(:value) &&
|
274
|
+
element.attributes['point'].value == 'start'
|
275
|
+
end && !ng_date_elements.any? do |element|
|
276
|
+
element.attributes['point'] &&
|
277
|
+
element.attributes['point'].respond_to?(:value) &&
|
278
|
+
element.attributes['point'].value == 'end'
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def dates_are_range?(ng_date_elements)
|
283
|
+
attributes = ng_date_elements.map do |date|
|
284
|
+
if date.attributes['point'].respond_to?(:value)
|
285
|
+
date.attributes['point'].value
|
286
|
+
end
|
287
|
+
end
|
288
|
+
attributes.include?('start') &&
|
289
|
+
attributes.include?('end')
|
290
|
+
end
|
291
|
+
|
292
|
+
def process_w3cdtf_date(ng_date_element)
|
293
|
+
ng_date_element = ng_date_element.clone
|
294
|
+
ng_date_element.content = begin
|
295
|
+
if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
|
296
|
+
Date.parse(ng_date_element.text).strftime(full_date_format)
|
297
|
+
elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
|
298
|
+
Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
|
299
|
+
else
|
300
|
+
ng_date_element.content
|
301
|
+
end
|
302
|
+
rescue
|
303
|
+
ng_date_element.content
|
304
|
+
end
|
305
|
+
ng_date_element
|
306
|
+
end
|
307
|
+
|
308
|
+
def process_iso8601_date(ng_date_element)
|
309
|
+
ng_date_element = ng_date_element.clone
|
310
|
+
ng_date_element.content = begin
|
311
|
+
if ng_date_element.text.strip =~ /^\d{8,}$/
|
312
|
+
Date.parse(ng_date_element.text).strftime(full_date_format)
|
313
|
+
else
|
314
|
+
ng_date_element.content
|
315
|
+
end
|
316
|
+
rescue
|
317
|
+
ng_date_element.content
|
318
|
+
end
|
319
|
+
ng_date_element
|
320
|
+
end
|
321
|
+
|
322
|
+
DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
|
323
|
+
|
324
|
+
# strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
|
325
|
+
def process_decade_date(ng_date_element)
|
326
|
+
my_ng_date_element = ng_date_element.clone
|
327
|
+
my_ng_date_element.content = begin
|
328
|
+
orig_date_str = ng_date_element.text.strip
|
329
|
+
# note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
|
330
|
+
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
331
|
+
if decade_matches
|
332
|
+
decade_str = decade_matches[2]
|
333
|
+
changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
|
334
|
+
zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
335
|
+
new_decade_str = "#{zeroth_year}s" if zeroth_year
|
336
|
+
my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
|
337
|
+
else
|
338
|
+
my_ng_date_element.content
|
339
|
+
end
|
340
|
+
rescue
|
341
|
+
my_ng_date_element.content
|
342
|
+
end
|
343
|
+
my_ng_date_element
|
344
|
+
end
|
345
|
+
|
346
|
+
CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
|
347
|
+
|
348
|
+
# strings like 18uu, 18-- become '19th century' in the ng_date_element content
|
349
|
+
def process_century_date(ng_date_element)
|
350
|
+
my_ng_date_element = ng_date_element.clone
|
351
|
+
my_ng_date_element.content = begin
|
352
|
+
orig_date_str = ng_date_element.text.strip
|
353
|
+
# note: not calling DateParsing.display_str_for_century directly because non-year text is lost
|
354
|
+
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
|
355
|
+
if century_matches
|
356
|
+
require 'active_support/core_ext/integer/inflections'
|
357
|
+
new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
|
358
|
+
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
359
|
+
else
|
360
|
+
my_ng_date_element.content
|
361
|
+
end
|
362
|
+
rescue
|
363
|
+
my_ng_date_element.content
|
364
|
+
end
|
365
|
+
my_ng_date_element
|
366
|
+
end
|
367
|
+
|
368
|
+
def field_is_encoded?(ng_element, encoding)
|
369
|
+
ng_element.attributes['encoding'] &&
|
370
|
+
ng_element.attributes['encoding'].respond_to?(:value) &&
|
371
|
+
ng_element.attributes['encoding'].value.downcase == encoding
|
372
|
+
end
|
373
|
+
|
374
|
+
def date_is_bc_edtf?(ng_date_element)
|
375
|
+
ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
|
376
|
+
end
|
377
|
+
|
378
|
+
def date_is_ad?(ng_date_element)
|
379
|
+
str = ng_date_element.text.strip.gsub(/^0*/, '')
|
380
|
+
str.present? && str.length < 4 && !str.match('A.D.')
|
381
|
+
end
|
382
|
+
|
383
|
+
def date_is_edtf?(ng_date_element)
|
384
|
+
field_is_encoded?(ng_date_element, 'edtf')
|
385
|
+
end
|
386
|
+
|
387
|
+
def date_is_w3cdtf?(ng_date_element)
|
388
|
+
field_is_encoded?(ng_date_element, 'w3cdtf')
|
389
|
+
end
|
390
|
+
|
391
|
+
def date_is_iso8601?(ng_date_element)
|
392
|
+
field_is_encoded?(ng_date_element, 'iso8601')
|
393
|
+
end
|
394
|
+
|
395
|
+
# @return true if decade string needs tweaking for display
|
396
|
+
def date_is_decade?(ng_date_element)
|
397
|
+
ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
|
398
|
+
end
|
399
|
+
|
400
|
+
# @return true if century string needs tweaking for display
|
401
|
+
def date_is_century?(ng_date_element)
|
402
|
+
ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
|
403
|
+
end
|
404
|
+
|
405
|
+
def full_date_format(full_date_format = '%B %-d, %Y')
|
406
|
+
@full_date_format ||= full_date_format
|
407
|
+
end
|
408
|
+
|
409
|
+
def short_date_format(short_date_format = '%B %Y')
|
410
|
+
@short_date_format ||= short_date_format
|
411
|
+
end
|
412
|
+
|
413
|
+
def normalize_date(date_str)
|
414
|
+
date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|