stanford-mods 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 691c6e601bb934cb7a1e396d901a6f8205c54b35
4
- data.tar.gz: 5efa051bf96155491d58b1bc45c335c138f6b171
3
+ metadata.gz: abcbc3357a121647ed103d8341b2cdb48d0c600d
4
+ data.tar.gz: 789c2fa194b7c837c7cdf10898eeab3b4b3636b9
5
5
  SHA512:
6
- metadata.gz: d6eb960553c5e11b42ebe3a3ddc86ab41a60ce37d10511380914c95e4bcbbf8ba1b71547313e8be6af97d4850ca4310032b34de920e6075ba33de0e2601bc1ff
7
- data.tar.gz: 307a7186e583b23980bb321f614bdc2c8b92dc51895a1cbc959dfba5f7838a5822e71726332b2ba0330b3edd7d0b2d992dc3f9d1a542259004fffcd33fb441eb
6
+ metadata.gz: 14231328428d5a250701cdac1f16977ce67ddc6db89fa465635bd7ad3171391c7b11921146020c8992f2af4f9647abbaa0f4f11954ee6a30fb039dd4e1426c32
7
+ data.tar.gz: 253c2201ff7f6ffc206226523ae6bcac56eb48cb386bd93789e2c1babf03f5808adaddcdf9881222b83a76e89567c551ac9c72568e29e784977c9eb117df9dea
data/.travis.yml CHANGED
@@ -5,7 +5,7 @@ rvm:
5
5
  - 2.2.0
6
6
  - 2.1.5
7
7
  - 2.0.0
8
- - 1.9.3 # argo, FRDA
8
+ - 1.9.3 # FRDA
9
9
  # we used to use jruby for merged DOR + MARC records, but no more ...
10
10
  - jruby-head
11
11
  # we also test against ruby-head, which may be unstable.
@@ -0,0 +1,387 @@
1
+ # encoding: utf-8
2
+
3
+ # Map 3 letter MARC Country/Location Code to User Friendly Name
4
+ # includes discontinued codes, because they're probably in our data
5
+ # from https://www.loc.gov/marc/countries/countries_code.html 01/20/2016
6
+ MARC_COUNTRIES =
7
+ {
8
+ 'aa' => 'Albania',
9
+ 'abc' => 'Alberta',
10
+ 'ac' => 'Ashmore and Cartier Islands',
11
+ 'aca' => 'Australian Capital Territory',
12
+ 'ae' => 'Algeria',
13
+ 'af' => 'Afghanistan',
14
+ 'ag' => 'Argentina',
15
+ 'ai' => 'Armenia (Republic)',
16
+ 'air' => 'Armenian S.S.R.',
17
+ 'aj' => 'Azerbaijan',
18
+ 'ajr' => 'Azerbaijan S.S.R.',
19
+ 'aku' => 'Alaska',
20
+ 'alu' => 'Alabama',
21
+ 'am' => 'Anguilla',
22
+ 'an' => 'Andorra',
23
+ 'ao' => 'Angola',
24
+ 'aq' => 'Antigua and Barbuda',
25
+ 'aru' => 'Arkansas',
26
+ 'as' => 'American Samoa',
27
+ 'at' => 'Australia',
28
+ 'au' => 'Austria',
29
+ 'aw' => 'Aruba',
30
+ 'ay' => 'Antarctica',
31
+ 'azu' => 'Arizona',
32
+ 'ba' => 'Bahrain',
33
+ 'bb' => 'Barbados',
34
+ 'bcc' => 'British Columbia',
35
+ 'bd' => 'Burundi',
36
+ 'be' => 'Belgium',
37
+ 'bf' => 'Bahamas',
38
+ 'bg' => 'Bangladesh',
39
+ 'bh' => 'Belize',
40
+ 'bi' => 'British Indian Ocean Territory',
41
+ 'bl' => 'Brazil',
42
+ 'bm' => 'Bermuda Islands',
43
+ 'bn' => 'Bosnia and Herzegovina',
44
+ 'bo' => 'Bolivia',
45
+ 'bp' => 'Solomon Islands',
46
+ 'br' => 'Burma',
47
+ 'bs' => 'Botswana',
48
+ 'bt' => 'Bhutan',
49
+ 'bu' => 'Bulgaria',
50
+ 'bv' => 'Bouvet Island',
51
+ 'bw' => 'Belarus',
52
+ 'bwr' => 'Byelorussian S.S.R.',
53
+ 'bx' => 'Brunei',
54
+ 'ca' => 'Caribbean Netherlands',
55
+ 'cau' => 'California',
56
+ 'cb' => 'Cambodia',
57
+ 'cc' => 'China',
58
+ 'cd' => 'Chad',
59
+ 'ce' => 'Sri Lanka',
60
+ 'cf' => 'Congo (Brazzaville)',
61
+ 'cg' => 'Congo (Democratic Republic)',
62
+ 'ch' => 'China (Republic : 1949 )',
63
+ 'ci' => 'Croatia',
64
+ 'cj' => 'Cayman Islands',
65
+ 'ck' => 'Colombia',
66
+ 'cl' => 'Chile',
67
+ 'cm' => 'Cameroon',
68
+ 'cn' => 'Canada',
69
+ 'co' => 'Curaçao',
70
+ 'cou' => 'Colorado',
71
+ 'cp' => 'Canton and Enderbury Islands',
72
+ 'cq' => 'Comoros',
73
+ 'cr' => 'Costa Rica',
74
+ 'cs' => 'Czechoslovakia',
75
+ 'ctu' => 'Connecticut',
76
+ 'cu' => 'Cuba',
77
+ 'cv' => 'Cabo Verde',
78
+ 'cw' => 'Cook Islands',
79
+ 'cx' => 'Central African Republic',
80
+ 'cy' => 'Cyprus',
81
+ 'cz' => 'Canal Zone',
82
+ 'dcu' => 'District of Columbia',
83
+ 'deu' => 'Delaware',
84
+ 'dk' => 'Denmark',
85
+ 'dm' => 'Benin',
86
+ 'dq' => 'Dominica',
87
+ 'dr' => 'Dominican Republic',
88
+ 'ea' => 'Eritrea',
89
+ 'ec' => 'Ecuador',
90
+ 'eg' => 'Equatorial Guinea',
91
+ 'em' => 'TimorLeste',
92
+ 'enk' => 'England',
93
+ 'er' => 'Estonia',
94
+ 'err' => 'Estonia',
95
+ 'es' => 'El Salvador',
96
+ 'et' => 'Ethiopia',
97
+ 'fa' => 'Faroe Islands',
98
+ 'fg' => 'French Guiana',
99
+ 'fi' => 'Finland',
100
+ 'fj' => 'Fiji',
101
+ 'fk' => 'Falkland Islands',
102
+ 'flu' => 'Florida',
103
+ 'fm' => 'Micronesia (Federated States)',
104
+ 'fp' => 'French Polynesia',
105
+ 'fr' => 'France',
106
+ 'fs' => 'Terres australes et antarctiques françaises',
107
+ 'ft' => 'Djibouti',
108
+ 'gau' => 'Georgia',
109
+ 'gb' => 'Kiribati',
110
+ 'gd' => 'Grenada',
111
+ 'ge' => 'Germany (East)',
112
+ 'gh' => 'Ghana',
113
+ 'gi' => 'Gibraltar',
114
+ 'gl' => 'Greenland',
115
+ 'gm' => 'Gambia',
116
+ 'gn' => 'Gilbert and Ellice Islands',
117
+ 'go' => 'Gabon',
118
+ 'gp' => 'Guadeloupe',
119
+ 'gr' => 'Greece',
120
+ 'gs' => 'Georgia (Republic)',
121
+ 'gsr' => 'Georgian S.S.R.',
122
+ 'gt' => 'Guatemala',
123
+ 'gu' => 'Guam',
124
+ 'gv' => 'Guinea',
125
+ 'gw' => 'Germany',
126
+ 'gy' => 'Guyana',
127
+ 'gz' => 'Gaza Strip',
128
+ 'hiu' => 'Hawaii',
129
+ 'hk' => 'Hong Kong',
130
+ 'hm' => 'Heard and McDonald Islands',
131
+ 'ho' => 'Honduras',
132
+ 'ht' => 'Haiti',
133
+ 'hu' => 'Hungary',
134
+ 'iau' => 'Iowa',
135
+ 'ic' => 'Iceland',
136
+ 'idu' => 'Idaho',
137
+ 'ie' => 'Ireland',
138
+ 'ii' => 'India',
139
+ 'ilu' => 'Illinois',
140
+ 'inu' => 'Indiana',
141
+ 'io' => 'Indonesia',
142
+ 'iq' => 'Iraq',
143
+ 'ir' => 'Iran',
144
+ 'is' => 'Israel',
145
+ 'it' => 'Italy',
146
+ 'iu' => 'IsraelSyria Demilitarized Zones',
147
+ 'iv' => "Côte d'Ivoire",
148
+ 'iw' => 'IsraelJordan Demilitarized Zones',
149
+ 'iy' => 'IraqSaudi Arabia Neutral Zone',
150
+ 'ja' => 'Japan',
151
+ 'ji' => 'Johnston Atoll',
152
+ 'jm' => 'Jamaica',
153
+ 'jn' => 'Jan Mayen',
154
+ 'jo' => 'Jordan',
155
+ 'ke' => 'Kenya',
156
+ 'kg' => 'Kyrgyzstan',
157
+ 'kgr' => 'Kirghiz S.S.R.',
158
+ 'kn' => 'Korea (North)',
159
+ 'ko' => 'Korea (South)',
160
+ 'ksu' => 'Kansas',
161
+ 'ku' => 'Kuwait',
162
+ 'kv' => 'Kosovo',
163
+ 'kyu' => 'Kentucky',
164
+ 'kz' => 'Kazakhstan',
165
+ 'kzr' => 'Kazakh S.S.R.',
166
+ 'lau' => 'Louisiana',
167
+ 'lb' => 'Liberia',
168
+ 'le' => 'Lebanon',
169
+ 'lh' => 'Liechtenstein',
170
+ 'li' => 'Lithuania',
171
+ 'lir' => 'Lithuania',
172
+ 'ln' => 'Central and Southern Line Islands',
173
+ 'lo' => 'Lesotho',
174
+ 'ls' => 'Laos',
175
+ 'lu' => 'Luxembourg',
176
+ 'lv' => 'Latvia',
177
+ 'lvr' => 'Latvia',
178
+ 'ly' => 'Libya',
179
+ 'mau' => 'Massachusetts',
180
+ 'mbc' => 'Manitoba',
181
+ 'mc' => 'Monaco',
182
+ 'mdu' => 'Maryland',
183
+ 'meu' => 'Maine',
184
+ 'mf' => 'Mauritius',
185
+ 'mg' => 'Madagascar',
186
+ 'mh' => 'Macao',
187
+ 'miu' => 'Michigan',
188
+ 'mj' => 'Montserrat',
189
+ 'mk' => 'Oman',
190
+ 'ml' => 'Mali',
191
+ 'mm' => 'Malta',
192
+ 'mnu' => 'Minnesota',
193
+ 'mo' => 'Montenegro',
194
+ 'mou' => 'Missouri',
195
+ 'mp' => 'Mongolia',
196
+ 'mq' => 'Martinique',
197
+ 'mr' => 'Morocco',
198
+ 'msu' => 'Mississippi',
199
+ 'mtu' => 'Montana',
200
+ 'mu' => 'Mauritania',
201
+ 'mv' => 'Moldova',
202
+ 'mvr' => 'Moldavian S.S.R.',
203
+ 'mw' => 'Malawi',
204
+ 'mx' => 'Mexico',
205
+ 'my' => 'Malaysia',
206
+ 'mz' => 'Mozambique',
207
+ 'na' => 'Netherlands Antilles',
208
+ 'nbu' => 'Nebraska',
209
+ 'ncu' => 'North Carolina',
210
+ 'ndu' => 'North Dakota',
211
+ 'ne' => 'Netherlands',
212
+ 'nfc' => 'Newfoundland and Labrador',
213
+ 'ng' => 'Niger',
214
+ 'nhu' => 'New Hampshire',
215
+ 'nik' => 'Northern Ireland',
216
+ 'nju' => 'New Jersey',
217
+ 'nkc' => 'New Brunswick',
218
+ 'nl' => 'New Caledonia',
219
+ 'nm' => 'Northern Mariana Islands',
220
+ 'nmu' => 'New Mexico',
221
+ 'nn' => 'Vanuatu',
222
+ 'no' => 'Norway',
223
+ 'np' => 'Nepal',
224
+ 'nq' => 'Nicaragua',
225
+ 'nr' => 'Nigeria',
226
+ 'nsc' => 'Nova Scotia',
227
+ 'ntc' => 'Northwest Territories',
228
+ 'nu' => 'Nauru',
229
+ 'nuc' => 'Nunavut',
230
+ 'nvu' => 'Nevada',
231
+ 'nw' => 'Northern Mariana Islands',
232
+ 'nx' => 'Norfolk Island',
233
+ 'nyu' => 'New York (State)',
234
+ 'nz' => 'New Zealand',
235
+ 'ohu' => 'Ohio',
236
+ 'oku' => 'Oklahoma',
237
+ 'onc' => 'Ontario',
238
+ 'oru' => 'Oregon',
239
+ 'ot' => 'Mayotte',
240
+ 'pau' => 'Pennsylvania',
241
+ 'pc' => 'Pitcairn Island',
242
+ 'pe' => 'Peru',
243
+ 'pf' => 'Paracel Islands',
244
+ 'pg' => 'GuineaBissau',
245
+ 'ph' => 'Philippines',
246
+ 'pic' => 'Prince Edward Island',
247
+ 'pk' => 'Pakistan',
248
+ 'pl' => 'Poland',
249
+ 'pn' => 'Panama',
250
+ 'po' => 'Portugal',
251
+ 'pp' => 'Papua New Guinea',
252
+ 'pr' => 'Puerto Rico',
253
+ 'pt' => 'Portuguese Timor',
254
+ 'pw' => 'Palau',
255
+ 'py' => 'Paraguay',
256
+ 'qa' => 'Qatar',
257
+ 'qea' => 'Queensland',
258
+ 'quc' => 'Québec (Province)',
259
+ 'rb' => 'Serbia',
260
+ 're' => 'Réunion',
261
+ 'rh' => 'Zimbabwe',
262
+ 'riu' => 'Rhode Island',
263
+ 'rm' => 'Romania',
264
+ 'ru' => 'Russia (Federation)',
265
+ 'rur' => 'Russian S.F.S.R.',
266
+ 'rw' => 'Rwanda',
267
+ 'ry' => 'Ryukyu Islands, Southern',
268
+ 'sa' => 'South Africa',
269
+ 'sb' => 'Svalbard',
270
+ 'sc' => 'SaintBarthélemy',
271
+ 'scu' => 'South Carolina',
272
+ 'sd' => 'South Sudan',
273
+ 'sdu' => 'South Dakota',
274
+ 'se' => 'Seychelles',
275
+ 'sf' => 'Sao Tome and Principe',
276
+ 'sg' => 'Senegal',
277
+ 'sh' => 'Spanish North Africa',
278
+ 'si' => 'Singapore',
279
+ 'sj' => 'Sudan',
280
+ 'sk' => 'Sikkim',
281
+ 'sl' => 'Sierra Leone',
282
+ 'sm' => 'San Marino',
283
+ 'sn' => 'Sint Maarten',
284
+ 'snc' => 'Saskatchewan',
285
+ 'so' => 'Somalia',
286
+ 'sp' => 'Spain',
287
+ 'sq' => 'Swaziland',
288
+ 'sr' => 'Surinam',
289
+ 'ss' => 'Western Sahara',
290
+ 'st' => 'SaintMartin',
291
+ 'stk' => 'Scotland',
292
+ 'su' => 'Saudi Arabia',
293
+ 'sv' => 'Swan Islands',
294
+ 'sw' => 'Sweden',
295
+ 'sx' => 'Namibia',
296
+ 'sy' => 'Syria',
297
+ 'sz' => 'Switzerland',
298
+ 'ta' => 'Tajikistan',
299
+ 'tar' => 'Tajik S.S.R.',
300
+ 'tc' => 'Turks and Caicos Islands',
301
+ 'tg' => 'Togo',
302
+ 'th' => 'Thailand',
303
+ 'ti' => 'Tunisia',
304
+ 'tk' => 'Turkmenistan',
305
+ 'tkr' => 'Turkmen S.S.R.',
306
+ 'tl' => 'Tokelau',
307
+ 'tma' => 'Tasmania',
308
+ 'tnu' => 'Tennessee',
309
+ 'to' => 'Tonga',
310
+ 'tr' => 'Trinidad and Tobago',
311
+ 'ts' => 'United Arab Emirates',
312
+ 'tt' => 'Trust Territory of the Pacific Islands',
313
+ 'tu' => 'Turkey',
314
+ 'tv' => 'Tuvalu',
315
+ 'txu' => 'Texas',
316
+ 'tz' => 'Tanzania',
317
+ 'ua' => 'Egypt',
318
+ 'uc' => 'United States Misc. Caribbean Islands',
319
+ 'ug' => 'Uganda',
320
+ 'ui' => 'United Kingdom Misc. Islands',
321
+ 'uik' => 'United Kingdom Misc. Islands',
322
+ 'uk' => 'United Kingdom',
323
+ 'un' => 'Ukraine',
324
+ 'unr' => 'Ukraine',
325
+ 'up' => 'United States Misc. Pacific Islands',
326
+ 'ur' => 'Soviet Union',
327
+ 'us' => 'United States',
328
+ 'utu' => 'Utah',
329
+ 'uv' => 'Burkina Faso',
330
+ 'uy' => 'Uruguay',
331
+ 'uz' => 'Uzbekistan',
332
+ 'uzr' => 'Uzbek S.S.R.',
333
+ 'vau' => 'Virginia',
334
+ 'vb' => 'British Virgin Islands',
335
+ 'vc' => 'Vatican City',
336
+ 've' => 'Venezuela',
337
+ 'vi' => 'Virgin Islands of the United States',
338
+ 'vm' => 'Vietnam',
339
+ 'vn' => 'Vietnam, North',
340
+ 'vp' => 'Various places',
341
+ 'vra' => 'Victoria',
342
+ 'vs' => 'Vietnam, South',
343
+ 'vtu' => 'Vermont',
344
+ 'wau' => 'Washington (State)',
345
+ 'wb' => 'West Berlin',
346
+ 'wea' => 'Western Australia',
347
+ 'wf' => 'Wallis and Futuna',
348
+ 'wiu' => 'Wisconsin',
349
+ 'wj' => 'West Bank of the Jordan River',
350
+ 'wk' => 'Wake Island',
351
+ 'wlk' => 'Wales',
352
+ 'ws' => 'Samoa',
353
+ 'wvu' => 'West Virginia',
354
+ 'wyu' => 'Wyoming',
355
+ 'xa' => 'Christmas Island (Indian Ocean)',
356
+ 'xb' => 'Cocos (Keeling) Islands',
357
+ 'xc' => 'Maldives',
358
+ 'xd' => 'Saint KittsNevis',
359
+ 'xe' => 'Marshall Islands',
360
+ 'xf' => 'Midway Islands',
361
+ 'xga' => 'Coral Sea Islands Territory',
362
+ 'xh' => 'Niue',
363
+ 'xi' => 'Saint KittsNevisAnguilla',
364
+ 'xj' => 'Saint Helena',
365
+ 'xk' => 'Saint Lucia',
366
+ 'xl' => 'Saint Pierre and Miquelon',
367
+ 'xm' => 'Saint Vincent and the Grenadines',
368
+ 'xn' => 'Macedonia',
369
+ 'xna' => 'New South Wales',
370
+ 'xo' => 'Slovakia',
371
+ 'xoa' => 'Northern Territory',
372
+ 'xp' => 'Spratly Island',
373
+ 'xr' => 'Czech Republic',
374
+ 'xra' => 'South Australia',
375
+ 'xs' => 'South Georgia and the South Sandwich Islands',
376
+ 'xv' => 'Slovenia',
377
+ #'xx' => 'No place, unknown, or undetermined',
378
+ 'xxc' => 'Canada',
379
+ 'xxk' => 'United Kingdom',
380
+ 'xxr' => 'Soviet Union',
381
+ 'xxu' => 'United States',
382
+ 'ye' => 'Yemen',
383
+ 'ykc' => 'Yukon Territory',
384
+ 'ys' => "Yemen (People's Democratic Republic)",
385
+ 'yu' => 'Serbia and Montenegro',
386
+ 'za' => 'Zambia'
387
+ }
data/lib/stanford-mods.rb CHANGED
@@ -2,6 +2,7 @@ require 'mods'
2
2
  require 'stanford-mods/date_parsing'
3
3
  require 'stanford-mods/coordinate'
4
4
  require 'stanford-mods/geo_spatial'
5
+ require 'stanford-mods/imprint'
5
6
  require 'stanford-mods/name'
6
7
  require 'stanford-mods/origin_info'
7
8
  require 'stanford-mods/physical_location'
@@ -281,10 +281,16 @@ module Stanford
281
281
 
282
282
  # get display value for date String containing yyy, yy, y, -y, -yy, -yyy
283
283
  # negative number strings will be changed to B.C. strings
284
+ # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
285
+ # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
286
+ # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
287
+ # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
284
288
  def display_str_for_early_numeric
285
289
  return unless orig_date_str.match(EARLY_NUMERIC)
290
+ # return 1 B.C. when the date is 0 since there is no 0 year
291
+ return '1 B.C.' if orig_date_str == '0'
286
292
  # negative number becomes B.C.
287
- return "#{orig_date_str[1..-1]} B.C." if orig_date_str.match(/^\-/)
293
+ return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str.match(/^\-/)
288
294
  # remove leading 0s from early dates
289
295
  "#{orig_date_str.to_i} A.D."
290
296
  end
@@ -0,0 +1,418 @@
1
+ module Stanford
2
+ module Mods
3
+ ##
4
+ # Get the imprint information from originInfo elements (and sub elements) to create display strings
5
+ #
6
+ # This code is adapted from the mods_display gem. In a perfect world, this
7
+ # code would make use of the date_parsing class instead of reimplementing pieces of it;
8
+ # however, the date_parsing class only does years, and this does finer tuned dates and also
9
+ # reformats them according to the encoding.
10
+ class Imprint
11
+ # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
12
+ def initialize(originInfo_ng_nodeset)
13
+ @originInfo_ng_nodeset = originInfo_ng_nodeset
14
+ end
15
+
16
+ require 'marc_countries'
17
+
18
+ # @return Array<String> each String is an imprint statement from a single originInfo element
19
+ def imprint_statements
20
+ results = []
21
+ @originInfo_ng_nodeset.each do |origin_info_node|
22
+ edition = edition_vals_str(origin_info_node)
23
+ place = place_vals_str(origin_info_node)
24
+ publisher = publisher_vals_str(origin_info_node)
25
+ dates = date_str(origin_info_node)
26
+
27
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
28
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
29
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
30
+
31
+ results << ed_place_pub_dates unless ed_place_pub_dates.empty?
32
+ end
33
+ results
34
+ end
35
+
36
+ def display_str
37
+ imprint_statements.join('; ') if imprint_statements.present?
38
+ end
39
+
40
+ private
41
+
42
+ def compact_and_join_with_delimiter(values, delimiter)
43
+ compact_values = values.compact.reject { |v| v.strip.empty? }
44
+ return compact_values.join(delimiter) if compact_values.length == 1 ||
45
+ !ends_in_terminating_punctuation?(delimiter)
46
+ compact_values.each_with_index.map do |value, i|
47
+ if (compact_values.length - 1) == i || # last item?
48
+ ends_in_terminating_punctuation?(value)
49
+ value << ' '
50
+ else
51
+ value << delimiter
52
+ end
53
+ end.join.strip
54
+ end
55
+
56
+ def ends_in_terminating_punctuation?(value)
57
+ value.strip.end_with?('.', ',', ':', ';')
58
+ end
59
+
60
+ def edition_vals_str(origin_info_node)
61
+ origin_info_node.edition.reject do |e|
62
+ e.text.strip.empty?
63
+ end.map(&:text).join(' ').strip
64
+ end
65
+
66
+ def publisher_vals_str(origin_info_node)
67
+ return if origin_info_node.publisher.text.strip.empty?
68
+ publishers = origin_info_node.publisher.reject do |p|
69
+ p.text.strip.empty?
70
+ end.map(&:text)
71
+ compact_and_join_with_delimiter(publishers, ' : ')
72
+ end
73
+
74
+ # PLACE processing methods ------
75
+
76
+ def place_vals_str(origin_info_node)
77
+ return if origin_info_node.place.text.strip.empty?
78
+ places = place_terms(origin_info_node).reject do |p|
79
+ p.text.strip.empty?
80
+ end.map(&:text)
81
+ compact_and_join_with_delimiter(places, ' : ')
82
+ end
83
+
84
+ def unencoded_place_terms?(element)
85
+ element.place.placeTerm.any? do |term|
86
+ !term.attributes['type'].respond_to?(:value) ||
87
+ term.attributes['type'].value == 'text'
88
+ end
89
+ end
90
+
91
+ def place_terms(origin_info_element)
92
+ return [] unless origin_info_element.respond_to?(:place) &&
93
+ origin_info_element.place.respond_to?(:placeTerm)
94
+ if unencoded_place_terms?(origin_info_element)
95
+ origin_info_element.place.placeTerm.select do |term|
96
+ !term.attributes['type'].respond_to?(:value) ||
97
+ term.attributes['type'].value == 'text'
98
+ end.compact
99
+ else
100
+ origin_info_element.place.placeTerm.map do |term|
101
+ next unless term.attributes['type'].respond_to?(:value) &&
102
+ term.attributes['type'].value == 'code' &&
103
+ term.attributes['authority'].respond_to?(:value) &&
104
+ term.attributes['authority'].value == 'marccountry' &&
105
+ MARC_COUNTRIES.include?(term.text.strip)
106
+ term = term.clone
107
+ term.content = MARC_COUNTRIES[term.text.strip]
108
+ term
109
+ end.compact
110
+ end
111
+ end
112
+
113
+ # DATE processing methods ------
114
+
115
+ def date_str(origin_info_node)
116
+ date_vals = origin_info_date_vals(origin_info_node)
117
+ return if date_vals.empty?
118
+ date_vals.map(&:strip).join(' ')
119
+ end
120
+
121
+ def origin_info_date_vals(origin_info_node)
122
+ date_field_keys.map do |date_field|
123
+ next unless origin_info_node.respond_to?(date_field)
124
+ date_elements = origin_info_node.send(date_field)
125
+ date_elements_display_vals(date_elements) if date_elements.present?
126
+ end.compact.flatten
127
+ end
128
+
129
+ def date_elements_display_vals(ng_date_elements)
130
+ apply_date_qualifier_decoration(
131
+ dedup_dates(
132
+ join_date_ranges(
133
+ process_decade_century_dates(
134
+ process_bc_ad_dates(
135
+ process_encoded_dates(ignore_bad_dates(ng_date_elements))
136
+ )
137
+ )
138
+ )
139
+ )
140
+ )
141
+ end
142
+
143
+ def date_field_keys
144
+ [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
145
+ end
146
+
147
+ def ignore_bad_dates(ng_date_elements)
148
+ ng_date_elements.select do |ng_date_element|
149
+ val = ng_date_element.text.strip
150
+ val != '9999' && val != '0000-00-00' && val != 'uuuu'
151
+ end
152
+ end
153
+
154
+ def process_encoded_dates(ng_date_elements)
155
+ ng_date_elements.map do |ng_date_element|
156
+ if date_is_w3cdtf?(ng_date_element)
157
+ process_w3cdtf_date(ng_date_element)
158
+ elsif date_is_iso8601?(ng_date_element)
159
+ process_iso8601_date(ng_date_element)
160
+ else
161
+ ng_date_element
162
+ end
163
+ end
164
+ end
165
+
166
+ # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
167
+ # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
168
+ # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
169
+ # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
170
+ def process_bc_ad_dates(ng_date_elements)
171
+ ng_date_elements.map do |ng_date_element|
172
+ case
173
+ when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
174
+ ng_date_element.content = "1 B.C."
175
+ when date_is_bc_edtf?(ng_date_element)
176
+ year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
177
+ ng_date_element.content = "#{year} B.C."
178
+ when date_is_ad?(ng_date_element)
179
+ ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
180
+ end
181
+ ng_date_element
182
+ end
183
+ end
184
+
185
+ def process_decade_century_dates(ng_date_elements)
186
+ ng_date_elements.map do |ng_date_element|
187
+ if date_is_decade?(ng_date_element)
188
+ process_decade_date(ng_date_element)
189
+ elsif date_is_century?(ng_date_element)
190
+ process_century_date(ng_date_element)
191
+ else
192
+ ng_date_element
193
+ end
194
+ end
195
+ end
196
+
197
+ def join_date_ranges(ng_date_elements)
198
+ if dates_are_range?(ng_date_elements)
199
+ start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
200
+ end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
201
+ ng_date_elements.map do |date|
202
+ date = date.clone # clone the date object so we don't append the same one
203
+ if normalize_date(date.text) == normalize_date(start_date.text)
204
+ date.content = [start_date.text, end_date.text].join(' - ')
205
+ date
206
+ elsif normalize_date(date.text) != normalize_date(end_date.text)
207
+ date
208
+ end
209
+ end.compact
210
+ elsif dates_are_open_range?(ng_date_elements)
211
+ start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
212
+ ng_date_elements.map do |date|
213
+ date = date.clone # clone the date object so we don't append the same one
214
+ date.content = "#{start_date.text}-" if date.text == start_date.text
215
+ date
216
+ end
217
+ else
218
+ ng_date_elements
219
+ end
220
+ end
221
+
222
+ def dedup_dates(ng_date_elements)
223
+ date_text = ng_date_elements.map { |d| normalize_date(d.text) }
224
+ if date_text != date_text.uniq
225
+ if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
226
+ [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
227
+ elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
228
+ [ng_date_elements.find { |d| !d.attributes['encoding'] }]
229
+ else
230
+ [ng_date_elements.first]
231
+ end
232
+ else
233
+ ng_date_elements
234
+ end
235
+ end
236
+
237
+ def apply_date_qualifier_decoration(ng_date_elements)
238
+ return_fields = ng_date_elements.map do |date|
239
+ date = date.clone
240
+ if date_is_approximate?(date)
241
+ date.content = "[ca. #{date.text}]"
242
+ elsif date_is_questionable?(date)
243
+ date.content = "[#{date.text}?]"
244
+ elsif date_is_inferred?(date)
245
+ date.content = "[#{date.text}]"
246
+ end
247
+ date
248
+ end
249
+ return_fields.map(&:text)
250
+ end
251
+
252
+ def date_is_approximate?(ng_date_element)
253
+ ng_date_element.attributes['qualifier'] &&
254
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
255
+ ng_date_element.attributes['qualifier'].value == 'approximate'
256
+ end
257
+
258
+ def date_is_questionable?(ng_date_element)
259
+ ng_date_element.attributes['qualifier'] &&
260
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
261
+ ng_date_element.attributes['qualifier'].value == 'questionable'
262
+ end
263
+
264
+ def date_is_inferred?(ng_date_element)
265
+ ng_date_element.attributes['qualifier'] &&
266
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
267
+ ng_date_element.attributes['qualifier'].value == 'inferred'
268
+ end
269
+
270
+ def dates_are_open_range?(ng_date_elements)
271
+ ng_date_elements.any? do |element|
272
+ element.attributes['point'] &&
273
+ element.attributes['point'].respond_to?(:value) &&
274
+ element.attributes['point'].value == 'start'
275
+ end && !ng_date_elements.any? do |element|
276
+ element.attributes['point'] &&
277
+ element.attributes['point'].respond_to?(:value) &&
278
+ element.attributes['point'].value == 'end'
279
+ end
280
+ end
281
+
282
+ def dates_are_range?(ng_date_elements)
283
+ attributes = ng_date_elements.map do |date|
284
+ if date.attributes['point'].respond_to?(:value)
285
+ date.attributes['point'].value
286
+ end
287
+ end
288
+ attributes.include?('start') &&
289
+ attributes.include?('end')
290
+ end
291
+
292
+ def process_w3cdtf_date(ng_date_element)
293
+ ng_date_element = ng_date_element.clone
294
+ ng_date_element.content = begin
295
+ if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
296
+ Date.parse(ng_date_element.text).strftime(full_date_format)
297
+ elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
298
+ Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
299
+ else
300
+ ng_date_element.content
301
+ end
302
+ rescue
303
+ ng_date_element.content
304
+ end
305
+ ng_date_element
306
+ end
307
+
308
+ def process_iso8601_date(ng_date_element)
309
+ ng_date_element = ng_date_element.clone
310
+ ng_date_element.content = begin
311
+ if ng_date_element.text.strip =~ /^\d{8,}$/
312
+ Date.parse(ng_date_element.text).strftime(full_date_format)
313
+ else
314
+ ng_date_element.content
315
+ end
316
+ rescue
317
+ ng_date_element.content
318
+ end
319
+ ng_date_element
320
+ end
321
+
322
+ DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
323
+
324
+ # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
325
+ def process_decade_date(ng_date_element)
326
+ my_ng_date_element = ng_date_element.clone
327
+ my_ng_date_element.content = begin
328
+ orig_date_str = ng_date_element.text.strip
329
+ # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
330
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
331
+ if decade_matches
332
+ decade_str = decade_matches[2]
333
+ changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
334
+ zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
335
+ new_decade_str = "#{zeroth_year}s" if zeroth_year
336
+ my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
337
+ else
338
+ my_ng_date_element.content
339
+ end
340
+ rescue
341
+ my_ng_date_element.content
342
+ end
343
+ my_ng_date_element
344
+ end
345
+
346
+ CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
347
+
348
+ # strings like 18uu, 18-- become '19th century' in the ng_date_element content
349
+ def process_century_date(ng_date_element)
350
+ my_ng_date_element = ng_date_element.clone
351
+ my_ng_date_element.content = begin
352
+ orig_date_str = ng_date_element.text.strip
353
+ # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
354
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
355
+ if century_matches
356
+ require 'active_support/core_ext/integer/inflections'
357
+ new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
358
+ my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
359
+ else
360
+ my_ng_date_element.content
361
+ end
362
+ rescue
363
+ my_ng_date_element.content
364
+ end
365
+ my_ng_date_element
366
+ end
367
+
368
+ def field_is_encoded?(ng_element, encoding)
369
+ ng_element.attributes['encoding'] &&
370
+ ng_element.attributes['encoding'].respond_to?(:value) &&
371
+ ng_element.attributes['encoding'].value.downcase == encoding
372
+ end
373
+
374
+ def date_is_bc_edtf?(ng_date_element)
375
+ ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
376
+ end
377
+
378
+ def date_is_ad?(ng_date_element)
379
+ str = ng_date_element.text.strip.gsub(/^0*/, '')
380
+ str.present? && str.length < 4 && !str.match('A.D.')
381
+ end
382
+
383
+ def date_is_edtf?(ng_date_element)
384
+ field_is_encoded?(ng_date_element, 'edtf')
385
+ end
386
+
387
+ def date_is_w3cdtf?(ng_date_element)
388
+ field_is_encoded?(ng_date_element, 'w3cdtf')
389
+ end
390
+
391
+ def date_is_iso8601?(ng_date_element)
392
+ field_is_encoded?(ng_date_element, 'iso8601')
393
+ end
394
+
395
+ # @return true if decade string needs tweaking for display
396
+ def date_is_decade?(ng_date_element)
397
+ ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
398
+ end
399
+
400
+ # @return true if century string needs tweaking for display
401
+ def date_is_century?(ng_date_element)
402
+ ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
403
+ end
404
+
405
+ def full_date_format(full_date_format = '%B %-d, %Y')
406
+ @full_date_format ||= full_date_format
407
+ end
408
+
409
+ def short_date_format(short_date_format = '%B %Y')
410
+ @short_date_format ||= short_date_format
411
+ end
412
+
413
+ def normalize_date(date_str)
414
+ date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
415
+ end
416
+ end
417
+ end
418
+ end