stanford-mods 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/lib/marc_countries.rb +387 -0
- data/lib/stanford-mods.rb +1 -0
- data/lib/stanford-mods/date_parsing.rb +7 -1
- data/lib/stanford-mods/imprint.rb +418 -0
- data/lib/stanford-mods/origin_info.rb +6 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +8 -4
- data/spec/fixtures/searchworks_imprint_data.rb +1300 -0
- data/spec/fixtures/searchworks_pub_date_data.rb +52 -4
- data/spec/imprint_spec.rb +251 -0
- data/spec/sw_publication_spec.rb +11 -0
- metadata +9 -4
- data/config/mappings_hash.rb +0 -78
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: abcbc3357a121647ed103d8341b2cdb48d0c600d
|
|
4
|
+
data.tar.gz: 789c2fa194b7c837c7cdf10898eeab3b4b3636b9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 14231328428d5a250701cdac1f16977ce67ddc6db89fa465635bd7ad3171391c7b11921146020c8992f2af4f9647abbaa0f4f11954ee6a30fb039dd4e1426c32
|
|
7
|
+
data.tar.gz: 253c2201ff7f6ffc206226523ae6bcac56eb48cb386bd93789e2c1babf03f5808adaddcdf9881222b83a76e89567c551ac9c72568e29e784977c9eb117df9dea
|
data/.travis.yml
CHANGED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Map 3 letter MARC Country/Location Code to User Friendly Name
|
|
4
|
+
# includes discontinued codes, because they're probably in our data
|
|
5
|
+
# from https://www.loc.gov/marc/countries/countries_code.html 01/20/2016
|
|
6
|
+
MARC_COUNTRIES =
|
|
7
|
+
{
|
|
8
|
+
'aa' => 'Albania',
|
|
9
|
+
'abc' => 'Alberta',
|
|
10
|
+
'ac' => 'Ashmore and Cartier Islands',
|
|
11
|
+
'aca' => 'Australian Capital Territory',
|
|
12
|
+
'ae' => 'Algeria',
|
|
13
|
+
'af' => 'Afghanistan',
|
|
14
|
+
'ag' => 'Argentina',
|
|
15
|
+
'ai' => 'Armenia (Republic)',
|
|
16
|
+
'air' => 'Armenian S.S.R.',
|
|
17
|
+
'aj' => 'Azerbaijan',
|
|
18
|
+
'ajr' => 'Azerbaijan S.S.R.',
|
|
19
|
+
'aku' => 'Alaska',
|
|
20
|
+
'alu' => 'Alabama',
|
|
21
|
+
'am' => 'Anguilla',
|
|
22
|
+
'an' => 'Andorra',
|
|
23
|
+
'ao' => 'Angola',
|
|
24
|
+
'aq' => 'Antigua and Barbuda',
|
|
25
|
+
'aru' => 'Arkansas',
|
|
26
|
+
'as' => 'American Samoa',
|
|
27
|
+
'at' => 'Australia',
|
|
28
|
+
'au' => 'Austria',
|
|
29
|
+
'aw' => 'Aruba',
|
|
30
|
+
'ay' => 'Antarctica',
|
|
31
|
+
'azu' => 'Arizona',
|
|
32
|
+
'ba' => 'Bahrain',
|
|
33
|
+
'bb' => 'Barbados',
|
|
34
|
+
'bcc' => 'British Columbia',
|
|
35
|
+
'bd' => 'Burundi',
|
|
36
|
+
'be' => 'Belgium',
|
|
37
|
+
'bf' => 'Bahamas',
|
|
38
|
+
'bg' => 'Bangladesh',
|
|
39
|
+
'bh' => 'Belize',
|
|
40
|
+
'bi' => 'British Indian Ocean Territory',
|
|
41
|
+
'bl' => 'Brazil',
|
|
42
|
+
'bm' => 'Bermuda Islands',
|
|
43
|
+
'bn' => 'Bosnia and Herzegovina',
|
|
44
|
+
'bo' => 'Bolivia',
|
|
45
|
+
'bp' => 'Solomon Islands',
|
|
46
|
+
'br' => 'Burma',
|
|
47
|
+
'bs' => 'Botswana',
|
|
48
|
+
'bt' => 'Bhutan',
|
|
49
|
+
'bu' => 'Bulgaria',
|
|
50
|
+
'bv' => 'Bouvet Island',
|
|
51
|
+
'bw' => 'Belarus',
|
|
52
|
+
'bwr' => 'Byelorussian S.S.R.',
|
|
53
|
+
'bx' => 'Brunei',
|
|
54
|
+
'ca' => 'Caribbean Netherlands',
|
|
55
|
+
'cau' => 'California',
|
|
56
|
+
'cb' => 'Cambodia',
|
|
57
|
+
'cc' => 'China',
|
|
58
|
+
'cd' => 'Chad',
|
|
59
|
+
'ce' => 'Sri Lanka',
|
|
60
|
+
'cf' => 'Congo (Brazzaville)',
|
|
61
|
+
'cg' => 'Congo (Democratic Republic)',
|
|
62
|
+
'ch' => 'China (Republic : 1949 )',
|
|
63
|
+
'ci' => 'Croatia',
|
|
64
|
+
'cj' => 'Cayman Islands',
|
|
65
|
+
'ck' => 'Colombia',
|
|
66
|
+
'cl' => 'Chile',
|
|
67
|
+
'cm' => 'Cameroon',
|
|
68
|
+
'cn' => 'Canada',
|
|
69
|
+
'co' => 'Curaçao',
|
|
70
|
+
'cou' => 'Colorado',
|
|
71
|
+
'cp' => 'Canton and Enderbury Islands',
|
|
72
|
+
'cq' => 'Comoros',
|
|
73
|
+
'cr' => 'Costa Rica',
|
|
74
|
+
'cs' => 'Czechoslovakia',
|
|
75
|
+
'ctu' => 'Connecticut',
|
|
76
|
+
'cu' => 'Cuba',
|
|
77
|
+
'cv' => 'Cabo Verde',
|
|
78
|
+
'cw' => 'Cook Islands',
|
|
79
|
+
'cx' => 'Central African Republic',
|
|
80
|
+
'cy' => 'Cyprus',
|
|
81
|
+
'cz' => 'Canal Zone',
|
|
82
|
+
'dcu' => 'District of Columbia',
|
|
83
|
+
'deu' => 'Delaware',
|
|
84
|
+
'dk' => 'Denmark',
|
|
85
|
+
'dm' => 'Benin',
|
|
86
|
+
'dq' => 'Dominica',
|
|
87
|
+
'dr' => 'Dominican Republic',
|
|
88
|
+
'ea' => 'Eritrea',
|
|
89
|
+
'ec' => 'Ecuador',
|
|
90
|
+
'eg' => 'Equatorial Guinea',
|
|
91
|
+
'em' => 'TimorLeste',
|
|
92
|
+
'enk' => 'England',
|
|
93
|
+
'er' => 'Estonia',
|
|
94
|
+
'err' => 'Estonia',
|
|
95
|
+
'es' => 'El Salvador',
|
|
96
|
+
'et' => 'Ethiopia',
|
|
97
|
+
'fa' => 'Faroe Islands',
|
|
98
|
+
'fg' => 'French Guiana',
|
|
99
|
+
'fi' => 'Finland',
|
|
100
|
+
'fj' => 'Fiji',
|
|
101
|
+
'fk' => 'Falkland Islands',
|
|
102
|
+
'flu' => 'Florida',
|
|
103
|
+
'fm' => 'Micronesia (Federated States)',
|
|
104
|
+
'fp' => 'French Polynesia',
|
|
105
|
+
'fr' => 'France',
|
|
106
|
+
'fs' => 'Terres australes et antarctiques françaises',
|
|
107
|
+
'ft' => 'Djibouti',
|
|
108
|
+
'gau' => 'Georgia',
|
|
109
|
+
'gb' => 'Kiribati',
|
|
110
|
+
'gd' => 'Grenada',
|
|
111
|
+
'ge' => 'Germany (East)',
|
|
112
|
+
'gh' => 'Ghana',
|
|
113
|
+
'gi' => 'Gibraltar',
|
|
114
|
+
'gl' => 'Greenland',
|
|
115
|
+
'gm' => 'Gambia',
|
|
116
|
+
'gn' => 'Gilbert and Ellice Islands',
|
|
117
|
+
'go' => 'Gabon',
|
|
118
|
+
'gp' => 'Guadeloupe',
|
|
119
|
+
'gr' => 'Greece',
|
|
120
|
+
'gs' => 'Georgia (Republic)',
|
|
121
|
+
'gsr' => 'Georgian S.S.R.',
|
|
122
|
+
'gt' => 'Guatemala',
|
|
123
|
+
'gu' => 'Guam',
|
|
124
|
+
'gv' => 'Guinea',
|
|
125
|
+
'gw' => 'Germany',
|
|
126
|
+
'gy' => 'Guyana',
|
|
127
|
+
'gz' => 'Gaza Strip',
|
|
128
|
+
'hiu' => 'Hawaii',
|
|
129
|
+
'hk' => 'Hong Kong',
|
|
130
|
+
'hm' => 'Heard and McDonald Islands',
|
|
131
|
+
'ho' => 'Honduras',
|
|
132
|
+
'ht' => 'Haiti',
|
|
133
|
+
'hu' => 'Hungary',
|
|
134
|
+
'iau' => 'Iowa',
|
|
135
|
+
'ic' => 'Iceland',
|
|
136
|
+
'idu' => 'Idaho',
|
|
137
|
+
'ie' => 'Ireland',
|
|
138
|
+
'ii' => 'India',
|
|
139
|
+
'ilu' => 'Illinois',
|
|
140
|
+
'inu' => 'Indiana',
|
|
141
|
+
'io' => 'Indonesia',
|
|
142
|
+
'iq' => 'Iraq',
|
|
143
|
+
'ir' => 'Iran',
|
|
144
|
+
'is' => 'Israel',
|
|
145
|
+
'it' => 'Italy',
|
|
146
|
+
'iu' => 'IsraelSyria Demilitarized Zones',
|
|
147
|
+
'iv' => "Côte d'Ivoire",
|
|
148
|
+
'iw' => 'IsraelJordan Demilitarized Zones',
|
|
149
|
+
'iy' => 'IraqSaudi Arabia Neutral Zone',
|
|
150
|
+
'ja' => 'Japan',
|
|
151
|
+
'ji' => 'Johnston Atoll',
|
|
152
|
+
'jm' => 'Jamaica',
|
|
153
|
+
'jn' => 'Jan Mayen',
|
|
154
|
+
'jo' => 'Jordan',
|
|
155
|
+
'ke' => 'Kenya',
|
|
156
|
+
'kg' => 'Kyrgyzstan',
|
|
157
|
+
'kgr' => 'Kirghiz S.S.R.',
|
|
158
|
+
'kn' => 'Korea (North)',
|
|
159
|
+
'ko' => 'Korea (South)',
|
|
160
|
+
'ksu' => 'Kansas',
|
|
161
|
+
'ku' => 'Kuwait',
|
|
162
|
+
'kv' => 'Kosovo',
|
|
163
|
+
'kyu' => 'Kentucky',
|
|
164
|
+
'kz' => 'Kazakhstan',
|
|
165
|
+
'kzr' => 'Kazakh S.S.R.',
|
|
166
|
+
'lau' => 'Louisiana',
|
|
167
|
+
'lb' => 'Liberia',
|
|
168
|
+
'le' => 'Lebanon',
|
|
169
|
+
'lh' => 'Liechtenstein',
|
|
170
|
+
'li' => 'Lithuania',
|
|
171
|
+
'lir' => 'Lithuania',
|
|
172
|
+
'ln' => 'Central and Southern Line Islands',
|
|
173
|
+
'lo' => 'Lesotho',
|
|
174
|
+
'ls' => 'Laos',
|
|
175
|
+
'lu' => 'Luxembourg',
|
|
176
|
+
'lv' => 'Latvia',
|
|
177
|
+
'lvr' => 'Latvia',
|
|
178
|
+
'ly' => 'Libya',
|
|
179
|
+
'mau' => 'Massachusetts',
|
|
180
|
+
'mbc' => 'Manitoba',
|
|
181
|
+
'mc' => 'Monaco',
|
|
182
|
+
'mdu' => 'Maryland',
|
|
183
|
+
'meu' => 'Maine',
|
|
184
|
+
'mf' => 'Mauritius',
|
|
185
|
+
'mg' => 'Madagascar',
|
|
186
|
+
'mh' => 'Macao',
|
|
187
|
+
'miu' => 'Michigan',
|
|
188
|
+
'mj' => 'Montserrat',
|
|
189
|
+
'mk' => 'Oman',
|
|
190
|
+
'ml' => 'Mali',
|
|
191
|
+
'mm' => 'Malta',
|
|
192
|
+
'mnu' => 'Minnesota',
|
|
193
|
+
'mo' => 'Montenegro',
|
|
194
|
+
'mou' => 'Missouri',
|
|
195
|
+
'mp' => 'Mongolia',
|
|
196
|
+
'mq' => 'Martinique',
|
|
197
|
+
'mr' => 'Morocco',
|
|
198
|
+
'msu' => 'Mississippi',
|
|
199
|
+
'mtu' => 'Montana',
|
|
200
|
+
'mu' => 'Mauritania',
|
|
201
|
+
'mv' => 'Moldova',
|
|
202
|
+
'mvr' => 'Moldavian S.S.R.',
|
|
203
|
+
'mw' => 'Malawi',
|
|
204
|
+
'mx' => 'Mexico',
|
|
205
|
+
'my' => 'Malaysia',
|
|
206
|
+
'mz' => 'Mozambique',
|
|
207
|
+
'na' => 'Netherlands Antilles',
|
|
208
|
+
'nbu' => 'Nebraska',
|
|
209
|
+
'ncu' => 'North Carolina',
|
|
210
|
+
'ndu' => 'North Dakota',
|
|
211
|
+
'ne' => 'Netherlands',
|
|
212
|
+
'nfc' => 'Newfoundland and Labrador',
|
|
213
|
+
'ng' => 'Niger',
|
|
214
|
+
'nhu' => 'New Hampshire',
|
|
215
|
+
'nik' => 'Northern Ireland',
|
|
216
|
+
'nju' => 'New Jersey',
|
|
217
|
+
'nkc' => 'New Brunswick',
|
|
218
|
+
'nl' => 'New Caledonia',
|
|
219
|
+
'nm' => 'Northern Mariana Islands',
|
|
220
|
+
'nmu' => 'New Mexico',
|
|
221
|
+
'nn' => 'Vanuatu',
|
|
222
|
+
'no' => 'Norway',
|
|
223
|
+
'np' => 'Nepal',
|
|
224
|
+
'nq' => 'Nicaragua',
|
|
225
|
+
'nr' => 'Nigeria',
|
|
226
|
+
'nsc' => 'Nova Scotia',
|
|
227
|
+
'ntc' => 'Northwest Territories',
|
|
228
|
+
'nu' => 'Nauru',
|
|
229
|
+
'nuc' => 'Nunavut',
|
|
230
|
+
'nvu' => 'Nevada',
|
|
231
|
+
'nw' => 'Northern Mariana Islands',
|
|
232
|
+
'nx' => 'Norfolk Island',
|
|
233
|
+
'nyu' => 'New York (State)',
|
|
234
|
+
'nz' => 'New Zealand',
|
|
235
|
+
'ohu' => 'Ohio',
|
|
236
|
+
'oku' => 'Oklahoma',
|
|
237
|
+
'onc' => 'Ontario',
|
|
238
|
+
'oru' => 'Oregon',
|
|
239
|
+
'ot' => 'Mayotte',
|
|
240
|
+
'pau' => 'Pennsylvania',
|
|
241
|
+
'pc' => 'Pitcairn Island',
|
|
242
|
+
'pe' => 'Peru',
|
|
243
|
+
'pf' => 'Paracel Islands',
|
|
244
|
+
'pg' => 'GuineaBissau',
|
|
245
|
+
'ph' => 'Philippines',
|
|
246
|
+
'pic' => 'Prince Edward Island',
|
|
247
|
+
'pk' => 'Pakistan',
|
|
248
|
+
'pl' => 'Poland',
|
|
249
|
+
'pn' => 'Panama',
|
|
250
|
+
'po' => 'Portugal',
|
|
251
|
+
'pp' => 'Papua New Guinea',
|
|
252
|
+
'pr' => 'Puerto Rico',
|
|
253
|
+
'pt' => 'Portuguese Timor',
|
|
254
|
+
'pw' => 'Palau',
|
|
255
|
+
'py' => 'Paraguay',
|
|
256
|
+
'qa' => 'Qatar',
|
|
257
|
+
'qea' => 'Queensland',
|
|
258
|
+
'quc' => 'Québec (Province)',
|
|
259
|
+
'rb' => 'Serbia',
|
|
260
|
+
're' => 'Réunion',
|
|
261
|
+
'rh' => 'Zimbabwe',
|
|
262
|
+
'riu' => 'Rhode Island',
|
|
263
|
+
'rm' => 'Romania',
|
|
264
|
+
'ru' => 'Russia (Federation)',
|
|
265
|
+
'rur' => 'Russian S.F.S.R.',
|
|
266
|
+
'rw' => 'Rwanda',
|
|
267
|
+
'ry' => 'Ryukyu Islands, Southern',
|
|
268
|
+
'sa' => 'South Africa',
|
|
269
|
+
'sb' => 'Svalbard',
|
|
270
|
+
'sc' => 'SaintBarthélemy',
|
|
271
|
+
'scu' => 'South Carolina',
|
|
272
|
+
'sd' => 'South Sudan',
|
|
273
|
+
'sdu' => 'South Dakota',
|
|
274
|
+
'se' => 'Seychelles',
|
|
275
|
+
'sf' => 'Sao Tome and Principe',
|
|
276
|
+
'sg' => 'Senegal',
|
|
277
|
+
'sh' => 'Spanish North Africa',
|
|
278
|
+
'si' => 'Singapore',
|
|
279
|
+
'sj' => 'Sudan',
|
|
280
|
+
'sk' => 'Sikkim',
|
|
281
|
+
'sl' => 'Sierra Leone',
|
|
282
|
+
'sm' => 'San Marino',
|
|
283
|
+
'sn' => 'Sint Maarten',
|
|
284
|
+
'snc' => 'Saskatchewan',
|
|
285
|
+
'so' => 'Somalia',
|
|
286
|
+
'sp' => 'Spain',
|
|
287
|
+
'sq' => 'Swaziland',
|
|
288
|
+
'sr' => 'Surinam',
|
|
289
|
+
'ss' => 'Western Sahara',
|
|
290
|
+
'st' => 'SaintMartin',
|
|
291
|
+
'stk' => 'Scotland',
|
|
292
|
+
'su' => 'Saudi Arabia',
|
|
293
|
+
'sv' => 'Swan Islands',
|
|
294
|
+
'sw' => 'Sweden',
|
|
295
|
+
'sx' => 'Namibia',
|
|
296
|
+
'sy' => 'Syria',
|
|
297
|
+
'sz' => 'Switzerland',
|
|
298
|
+
'ta' => 'Tajikistan',
|
|
299
|
+
'tar' => 'Tajik S.S.R.',
|
|
300
|
+
'tc' => 'Turks and Caicos Islands',
|
|
301
|
+
'tg' => 'Togo',
|
|
302
|
+
'th' => 'Thailand',
|
|
303
|
+
'ti' => 'Tunisia',
|
|
304
|
+
'tk' => 'Turkmenistan',
|
|
305
|
+
'tkr' => 'Turkmen S.S.R.',
|
|
306
|
+
'tl' => 'Tokelau',
|
|
307
|
+
'tma' => 'Tasmania',
|
|
308
|
+
'tnu' => 'Tennessee',
|
|
309
|
+
'to' => 'Tonga',
|
|
310
|
+
'tr' => 'Trinidad and Tobago',
|
|
311
|
+
'ts' => 'United Arab Emirates',
|
|
312
|
+
'tt' => 'Trust Territory of the Pacific Islands',
|
|
313
|
+
'tu' => 'Turkey',
|
|
314
|
+
'tv' => 'Tuvalu',
|
|
315
|
+
'txu' => 'Texas',
|
|
316
|
+
'tz' => 'Tanzania',
|
|
317
|
+
'ua' => 'Egypt',
|
|
318
|
+
'uc' => 'United States Misc. Caribbean Islands',
|
|
319
|
+
'ug' => 'Uganda',
|
|
320
|
+
'ui' => 'United Kingdom Misc. Islands',
|
|
321
|
+
'uik' => 'United Kingdom Misc. Islands',
|
|
322
|
+
'uk' => 'United Kingdom',
|
|
323
|
+
'un' => 'Ukraine',
|
|
324
|
+
'unr' => 'Ukraine',
|
|
325
|
+
'up' => 'United States Misc. Pacific Islands',
|
|
326
|
+
'ur' => 'Soviet Union',
|
|
327
|
+
'us' => 'United States',
|
|
328
|
+
'utu' => 'Utah',
|
|
329
|
+
'uv' => 'Burkina Faso',
|
|
330
|
+
'uy' => 'Uruguay',
|
|
331
|
+
'uz' => 'Uzbekistan',
|
|
332
|
+
'uzr' => 'Uzbek S.S.R.',
|
|
333
|
+
'vau' => 'Virginia',
|
|
334
|
+
'vb' => 'British Virgin Islands',
|
|
335
|
+
'vc' => 'Vatican City',
|
|
336
|
+
've' => 'Venezuela',
|
|
337
|
+
'vi' => 'Virgin Islands of the United States',
|
|
338
|
+
'vm' => 'Vietnam',
|
|
339
|
+
'vn' => 'Vietnam, North',
|
|
340
|
+
'vp' => 'Various places',
|
|
341
|
+
'vra' => 'Victoria',
|
|
342
|
+
'vs' => 'Vietnam, South',
|
|
343
|
+
'vtu' => 'Vermont',
|
|
344
|
+
'wau' => 'Washington (State)',
|
|
345
|
+
'wb' => 'West Berlin',
|
|
346
|
+
'wea' => 'Western Australia',
|
|
347
|
+
'wf' => 'Wallis and Futuna',
|
|
348
|
+
'wiu' => 'Wisconsin',
|
|
349
|
+
'wj' => 'West Bank of the Jordan River',
|
|
350
|
+
'wk' => 'Wake Island',
|
|
351
|
+
'wlk' => 'Wales',
|
|
352
|
+
'ws' => 'Samoa',
|
|
353
|
+
'wvu' => 'West Virginia',
|
|
354
|
+
'wyu' => 'Wyoming',
|
|
355
|
+
'xa' => 'Christmas Island (Indian Ocean)',
|
|
356
|
+
'xb' => 'Cocos (Keeling) Islands',
|
|
357
|
+
'xc' => 'Maldives',
|
|
358
|
+
'xd' => 'Saint KittsNevis',
|
|
359
|
+
'xe' => 'Marshall Islands',
|
|
360
|
+
'xf' => 'Midway Islands',
|
|
361
|
+
'xga' => 'Coral Sea Islands Territory',
|
|
362
|
+
'xh' => 'Niue',
|
|
363
|
+
'xi' => 'Saint KittsNevisAnguilla',
|
|
364
|
+
'xj' => 'Saint Helena',
|
|
365
|
+
'xk' => 'Saint Lucia',
|
|
366
|
+
'xl' => 'Saint Pierre and Miquelon',
|
|
367
|
+
'xm' => 'Saint Vincent and the Grenadines',
|
|
368
|
+
'xn' => 'Macedonia',
|
|
369
|
+
'xna' => 'New South Wales',
|
|
370
|
+
'xo' => 'Slovakia',
|
|
371
|
+
'xoa' => 'Northern Territory',
|
|
372
|
+
'xp' => 'Spratly Island',
|
|
373
|
+
'xr' => 'Czech Republic',
|
|
374
|
+
'xra' => 'South Australia',
|
|
375
|
+
'xs' => 'South Georgia and the South Sandwich Islands',
|
|
376
|
+
'xv' => 'Slovenia',
|
|
377
|
+
#'xx' => 'No place, unknown, or undetermined',
|
|
378
|
+
'xxc' => 'Canada',
|
|
379
|
+
'xxk' => 'United Kingdom',
|
|
380
|
+
'xxr' => 'Soviet Union',
|
|
381
|
+
'xxu' => 'United States',
|
|
382
|
+
'ye' => 'Yemen',
|
|
383
|
+
'ykc' => 'Yukon Territory',
|
|
384
|
+
'ys' => "Yemen (People's Democratic Republic)",
|
|
385
|
+
'yu' => 'Serbia and Montenegro',
|
|
386
|
+
'za' => 'Zambia'
|
|
387
|
+
}
|
data/lib/stanford-mods.rb
CHANGED
|
@@ -2,6 +2,7 @@ require 'mods'
|
|
|
2
2
|
require 'stanford-mods/date_parsing'
|
|
3
3
|
require 'stanford-mods/coordinate'
|
|
4
4
|
require 'stanford-mods/geo_spatial'
|
|
5
|
+
require 'stanford-mods/imprint'
|
|
5
6
|
require 'stanford-mods/name'
|
|
6
7
|
require 'stanford-mods/origin_info'
|
|
7
8
|
require 'stanford-mods/physical_location'
|
|
@@ -281,10 +281,16 @@ module Stanford
|
|
|
281
281
|
|
|
282
282
|
# get display value for date String containing yyy, yy, y, -y, -yy, -yyy
|
|
283
283
|
# negative number strings will be changed to B.C. strings
|
|
284
|
+
# note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
|
|
285
|
+
# "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
|
|
286
|
+
# There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
|
|
287
|
+
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
|
284
288
|
def display_str_for_early_numeric
|
|
285
289
|
return unless orig_date_str.match(EARLY_NUMERIC)
|
|
290
|
+
# return 1 B.C. when the date is 0 since there is no 0 year
|
|
291
|
+
return '1 B.C.' if orig_date_str == '0'
|
|
286
292
|
# negative number becomes B.C.
|
|
287
|
-
return "#{orig_date_str[1..-1]} B.C." if orig_date_str.match(/^\-/)
|
|
293
|
+
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str.match(/^\-/)
|
|
288
294
|
# remove leading 0s from early dates
|
|
289
295
|
"#{orig_date_str.to_i} A.D."
|
|
290
296
|
end
|
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
module Stanford
|
|
2
|
+
module Mods
|
|
3
|
+
##
|
|
4
|
+
# Get the imprint information from originInfo elements (and sub elements) to create display strings
|
|
5
|
+
#
|
|
6
|
+
# This code is adapted from the mods_display gem. In a perfect world, this
|
|
7
|
+
# code would make use of the date_parsing class instead of reimplementing pieces of it;
|
|
8
|
+
# however, the date_parsing class only does years, and this does finer tuned dates and also
|
|
9
|
+
# reformats them according to the encoding.
|
|
10
|
+
class Imprint
|
|
11
|
+
# @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
|
|
12
|
+
def initialize(originInfo_ng_nodeset)
|
|
13
|
+
@originInfo_ng_nodeset = originInfo_ng_nodeset
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
require 'marc_countries'
|
|
17
|
+
|
|
18
|
+
# @return Array<String> each String is an imprint statement from a single originInfo element
|
|
19
|
+
def imprint_statements
|
|
20
|
+
results = []
|
|
21
|
+
@originInfo_ng_nodeset.each do |origin_info_node|
|
|
22
|
+
edition = edition_vals_str(origin_info_node)
|
|
23
|
+
place = place_vals_str(origin_info_node)
|
|
24
|
+
publisher = publisher_vals_str(origin_info_node)
|
|
25
|
+
dates = date_str(origin_info_node)
|
|
26
|
+
|
|
27
|
+
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
|
28
|
+
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
|
29
|
+
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
|
30
|
+
|
|
31
|
+
results << ed_place_pub_dates unless ed_place_pub_dates.empty?
|
|
32
|
+
end
|
|
33
|
+
results
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def display_str
|
|
37
|
+
imprint_statements.join('; ') if imprint_statements.present?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def compact_and_join_with_delimiter(values, delimiter)
|
|
43
|
+
compact_values = values.compact.reject { |v| v.strip.empty? }
|
|
44
|
+
return compact_values.join(delimiter) if compact_values.length == 1 ||
|
|
45
|
+
!ends_in_terminating_punctuation?(delimiter)
|
|
46
|
+
compact_values.each_with_index.map do |value, i|
|
|
47
|
+
if (compact_values.length - 1) == i || # last item?
|
|
48
|
+
ends_in_terminating_punctuation?(value)
|
|
49
|
+
value << ' '
|
|
50
|
+
else
|
|
51
|
+
value << delimiter
|
|
52
|
+
end
|
|
53
|
+
end.join.strip
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def ends_in_terminating_punctuation?(value)
|
|
57
|
+
value.strip.end_with?('.', ',', ':', ';')
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def edition_vals_str(origin_info_node)
|
|
61
|
+
origin_info_node.edition.reject do |e|
|
|
62
|
+
e.text.strip.empty?
|
|
63
|
+
end.map(&:text).join(' ').strip
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def publisher_vals_str(origin_info_node)
|
|
67
|
+
return if origin_info_node.publisher.text.strip.empty?
|
|
68
|
+
publishers = origin_info_node.publisher.reject do |p|
|
|
69
|
+
p.text.strip.empty?
|
|
70
|
+
end.map(&:text)
|
|
71
|
+
compact_and_join_with_delimiter(publishers, ' : ')
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# PLACE processing methods ------
|
|
75
|
+
|
|
76
|
+
def place_vals_str(origin_info_node)
|
|
77
|
+
return if origin_info_node.place.text.strip.empty?
|
|
78
|
+
places = place_terms(origin_info_node).reject do |p|
|
|
79
|
+
p.text.strip.empty?
|
|
80
|
+
end.map(&:text)
|
|
81
|
+
compact_and_join_with_delimiter(places, ' : ')
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def unencoded_place_terms?(element)
|
|
85
|
+
element.place.placeTerm.any? do |term|
|
|
86
|
+
!term.attributes['type'].respond_to?(:value) ||
|
|
87
|
+
term.attributes['type'].value == 'text'
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def place_terms(origin_info_element)
|
|
92
|
+
return [] unless origin_info_element.respond_to?(:place) &&
|
|
93
|
+
origin_info_element.place.respond_to?(:placeTerm)
|
|
94
|
+
if unencoded_place_terms?(origin_info_element)
|
|
95
|
+
origin_info_element.place.placeTerm.select do |term|
|
|
96
|
+
!term.attributes['type'].respond_to?(:value) ||
|
|
97
|
+
term.attributes['type'].value == 'text'
|
|
98
|
+
end.compact
|
|
99
|
+
else
|
|
100
|
+
origin_info_element.place.placeTerm.map do |term|
|
|
101
|
+
next unless term.attributes['type'].respond_to?(:value) &&
|
|
102
|
+
term.attributes['type'].value == 'code' &&
|
|
103
|
+
term.attributes['authority'].respond_to?(:value) &&
|
|
104
|
+
term.attributes['authority'].value == 'marccountry' &&
|
|
105
|
+
MARC_COUNTRIES.include?(term.text.strip)
|
|
106
|
+
term = term.clone
|
|
107
|
+
term.content = MARC_COUNTRIES[term.text.strip]
|
|
108
|
+
term
|
|
109
|
+
end.compact
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# DATE processing methods ------
|
|
114
|
+
|
|
115
|
+
def date_str(origin_info_node)
|
|
116
|
+
date_vals = origin_info_date_vals(origin_info_node)
|
|
117
|
+
return if date_vals.empty?
|
|
118
|
+
date_vals.map(&:strip).join(' ')
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def origin_info_date_vals(origin_info_node)
|
|
122
|
+
date_field_keys.map do |date_field|
|
|
123
|
+
next unless origin_info_node.respond_to?(date_field)
|
|
124
|
+
date_elements = origin_info_node.send(date_field)
|
|
125
|
+
date_elements_display_vals(date_elements) if date_elements.present?
|
|
126
|
+
end.compact.flatten
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def date_elements_display_vals(ng_date_elements)
|
|
130
|
+
apply_date_qualifier_decoration(
|
|
131
|
+
dedup_dates(
|
|
132
|
+
join_date_ranges(
|
|
133
|
+
process_decade_century_dates(
|
|
134
|
+
process_bc_ad_dates(
|
|
135
|
+
process_encoded_dates(ignore_bad_dates(ng_date_elements))
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def date_field_keys
|
|
144
|
+
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def ignore_bad_dates(ng_date_elements)
|
|
148
|
+
ng_date_elements.select do |ng_date_element|
|
|
149
|
+
val = ng_date_element.text.strip
|
|
150
|
+
val != '9999' && val != '0000-00-00' && val != 'uuuu'
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def process_encoded_dates(ng_date_elements)
|
|
155
|
+
ng_date_elements.map do |ng_date_element|
|
|
156
|
+
if date_is_w3cdtf?(ng_date_element)
|
|
157
|
+
process_w3cdtf_date(ng_date_element)
|
|
158
|
+
elsif date_is_iso8601?(ng_date_element)
|
|
159
|
+
process_iso8601_date(ng_date_element)
|
|
160
|
+
else
|
|
161
|
+
ng_date_element
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
|
|
167
|
+
# "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
|
|
168
|
+
# There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
|
|
169
|
+
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
|
170
|
+
def process_bc_ad_dates(ng_date_elements)
|
|
171
|
+
ng_date_elements.map do |ng_date_element|
|
|
172
|
+
case
|
|
173
|
+
when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
|
|
174
|
+
ng_date_element.content = "1 B.C."
|
|
175
|
+
when date_is_bc_edtf?(ng_date_element)
|
|
176
|
+
year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
|
|
177
|
+
ng_date_element.content = "#{year} B.C."
|
|
178
|
+
when date_is_ad?(ng_date_element)
|
|
179
|
+
ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
|
|
180
|
+
end
|
|
181
|
+
ng_date_element
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def process_decade_century_dates(ng_date_elements)
|
|
186
|
+
ng_date_elements.map do |ng_date_element|
|
|
187
|
+
if date_is_decade?(ng_date_element)
|
|
188
|
+
process_decade_date(ng_date_element)
|
|
189
|
+
elsif date_is_century?(ng_date_element)
|
|
190
|
+
process_century_date(ng_date_element)
|
|
191
|
+
else
|
|
192
|
+
ng_date_element
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def join_date_ranges(ng_date_elements)
|
|
198
|
+
if dates_are_range?(ng_date_elements)
|
|
199
|
+
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
|
200
|
+
end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
|
|
201
|
+
ng_date_elements.map do |date|
|
|
202
|
+
date = date.clone # clone the date object so we don't append the same one
|
|
203
|
+
if normalize_date(date.text) == normalize_date(start_date.text)
|
|
204
|
+
date.content = [start_date.text, end_date.text].join(' - ')
|
|
205
|
+
date
|
|
206
|
+
elsif normalize_date(date.text) != normalize_date(end_date.text)
|
|
207
|
+
date
|
|
208
|
+
end
|
|
209
|
+
end.compact
|
|
210
|
+
elsif dates_are_open_range?(ng_date_elements)
|
|
211
|
+
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
|
212
|
+
ng_date_elements.map do |date|
|
|
213
|
+
date = date.clone # clone the date object so we don't append the same one
|
|
214
|
+
date.content = "#{start_date.text}-" if date.text == start_date.text
|
|
215
|
+
date
|
|
216
|
+
end
|
|
217
|
+
else
|
|
218
|
+
ng_date_elements
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def dedup_dates(ng_date_elements)
|
|
223
|
+
date_text = ng_date_elements.map { |d| normalize_date(d.text) }
|
|
224
|
+
if date_text != date_text.uniq
|
|
225
|
+
if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
|
|
226
|
+
[ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
|
|
227
|
+
elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
|
|
228
|
+
[ng_date_elements.find { |d| !d.attributes['encoding'] }]
|
|
229
|
+
else
|
|
230
|
+
[ng_date_elements.first]
|
|
231
|
+
end
|
|
232
|
+
else
|
|
233
|
+
ng_date_elements
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def apply_date_qualifier_decoration(ng_date_elements)
|
|
238
|
+
return_fields = ng_date_elements.map do |date|
|
|
239
|
+
date = date.clone
|
|
240
|
+
if date_is_approximate?(date)
|
|
241
|
+
date.content = "[ca. #{date.text}]"
|
|
242
|
+
elsif date_is_questionable?(date)
|
|
243
|
+
date.content = "[#{date.text}?]"
|
|
244
|
+
elsif date_is_inferred?(date)
|
|
245
|
+
date.content = "[#{date.text}]"
|
|
246
|
+
end
|
|
247
|
+
date
|
|
248
|
+
end
|
|
249
|
+
return_fields.map(&:text)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def date_is_approximate?(ng_date_element)
|
|
253
|
+
ng_date_element.attributes['qualifier'] &&
|
|
254
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
|
255
|
+
ng_date_element.attributes['qualifier'].value == 'approximate'
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def date_is_questionable?(ng_date_element)
|
|
259
|
+
ng_date_element.attributes['qualifier'] &&
|
|
260
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
|
261
|
+
ng_date_element.attributes['qualifier'].value == 'questionable'
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def date_is_inferred?(ng_date_element)
|
|
265
|
+
ng_date_element.attributes['qualifier'] &&
|
|
266
|
+
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
|
267
|
+
ng_date_element.attributes['qualifier'].value == 'inferred'
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def dates_are_open_range?(ng_date_elements)
|
|
271
|
+
ng_date_elements.any? do |element|
|
|
272
|
+
element.attributes['point'] &&
|
|
273
|
+
element.attributes['point'].respond_to?(:value) &&
|
|
274
|
+
element.attributes['point'].value == 'start'
|
|
275
|
+
end && !ng_date_elements.any? do |element|
|
|
276
|
+
element.attributes['point'] &&
|
|
277
|
+
element.attributes['point'].respond_to?(:value) &&
|
|
278
|
+
element.attributes['point'].value == 'end'
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def dates_are_range?(ng_date_elements)
|
|
283
|
+
attributes = ng_date_elements.map do |date|
|
|
284
|
+
if date.attributes['point'].respond_to?(:value)
|
|
285
|
+
date.attributes['point'].value
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
attributes.include?('start') &&
|
|
289
|
+
attributes.include?('end')
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def process_w3cdtf_date(ng_date_element)
|
|
293
|
+
ng_date_element = ng_date_element.clone
|
|
294
|
+
ng_date_element.content = begin
|
|
295
|
+
if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
|
|
296
|
+
Date.parse(ng_date_element.text).strftime(full_date_format)
|
|
297
|
+
elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
|
|
298
|
+
Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
|
|
299
|
+
else
|
|
300
|
+
ng_date_element.content
|
|
301
|
+
end
|
|
302
|
+
rescue
|
|
303
|
+
ng_date_element.content
|
|
304
|
+
end
|
|
305
|
+
ng_date_element
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def process_iso8601_date(ng_date_element)
|
|
309
|
+
ng_date_element = ng_date_element.clone
|
|
310
|
+
ng_date_element.content = begin
|
|
311
|
+
if ng_date_element.text.strip =~ /^\d{8,}$/
|
|
312
|
+
Date.parse(ng_date_element.text).strftime(full_date_format)
|
|
313
|
+
else
|
|
314
|
+
ng_date_element.content
|
|
315
|
+
end
|
|
316
|
+
rescue
|
|
317
|
+
ng_date_element.content
|
|
318
|
+
end
|
|
319
|
+
ng_date_element
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
|
|
323
|
+
|
|
324
|
+
# strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
|
|
325
|
+
def process_decade_date(ng_date_element)
|
|
326
|
+
my_ng_date_element = ng_date_element.clone
|
|
327
|
+
my_ng_date_element.content = begin
|
|
328
|
+
orig_date_str = ng_date_element.text.strip
|
|
329
|
+
# note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
|
|
330
|
+
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
|
331
|
+
if decade_matches
|
|
332
|
+
decade_str = decade_matches[2]
|
|
333
|
+
changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
|
|
334
|
+
zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
|
335
|
+
new_decade_str = "#{zeroth_year}s" if zeroth_year
|
|
336
|
+
my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
|
|
337
|
+
else
|
|
338
|
+
my_ng_date_element.content
|
|
339
|
+
end
|
|
340
|
+
rescue
|
|
341
|
+
my_ng_date_element.content
|
|
342
|
+
end
|
|
343
|
+
my_ng_date_element
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
|
|
347
|
+
|
|
348
|
+
# strings like 18uu, 18-- become '19th century' in the ng_date_element content
|
|
349
|
+
def process_century_date(ng_date_element)
|
|
350
|
+
my_ng_date_element = ng_date_element.clone
|
|
351
|
+
my_ng_date_element.content = begin
|
|
352
|
+
orig_date_str = ng_date_element.text.strip
|
|
353
|
+
# note: not calling DateParsing.display_str_for_century directly because non-year text is lost
|
|
354
|
+
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
|
|
355
|
+
if century_matches
|
|
356
|
+
require 'active_support/core_ext/integer/inflections'
|
|
357
|
+
new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
|
|
358
|
+
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
|
359
|
+
else
|
|
360
|
+
my_ng_date_element.content
|
|
361
|
+
end
|
|
362
|
+
rescue
|
|
363
|
+
my_ng_date_element.content
|
|
364
|
+
end
|
|
365
|
+
my_ng_date_element
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def field_is_encoded?(ng_element, encoding)
|
|
369
|
+
ng_element.attributes['encoding'] &&
|
|
370
|
+
ng_element.attributes['encoding'].respond_to?(:value) &&
|
|
371
|
+
ng_element.attributes['encoding'].value.downcase == encoding
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def date_is_bc_edtf?(ng_date_element)
|
|
375
|
+
ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def date_is_ad?(ng_date_element)
|
|
379
|
+
str = ng_date_element.text.strip.gsub(/^0*/, '')
|
|
380
|
+
str.present? && str.length < 4 && !str.match('A.D.')
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def date_is_edtf?(ng_date_element)
|
|
384
|
+
field_is_encoded?(ng_date_element, 'edtf')
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def date_is_w3cdtf?(ng_date_element)
|
|
388
|
+
field_is_encoded?(ng_date_element, 'w3cdtf')
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def date_is_iso8601?(ng_date_element)
|
|
392
|
+
field_is_encoded?(ng_date_element, 'iso8601')
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# @return true if decade string needs tweaking for display
|
|
396
|
+
def date_is_decade?(ng_date_element)
|
|
397
|
+
ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# @return true if century string needs tweaking for display
|
|
401
|
+
def date_is_century?(ng_date_element)
|
|
402
|
+
ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def full_date_format(full_date_format = '%B %-d, %Y')
|
|
406
|
+
@full_date_format ||= full_date_format
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def short_date_format(short_date_format = '%B %Y')
|
|
410
|
+
@short_date_format ||= short_date_format
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def normalize_date(date_str)
|
|
414
|
+
date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
end
|