stanford-mods 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 691c6e601bb934cb7a1e396d901a6f8205c54b35
4
- data.tar.gz: 5efa051bf96155491d58b1bc45c335c138f6b171
3
+ metadata.gz: abcbc3357a121647ed103d8341b2cdb48d0c600d
4
+ data.tar.gz: 789c2fa194b7c837c7cdf10898eeab3b4b3636b9
5
5
  SHA512:
6
- metadata.gz: d6eb960553c5e11b42ebe3a3ddc86ab41a60ce37d10511380914c95e4bcbbf8ba1b71547313e8be6af97d4850ca4310032b34de920e6075ba33de0e2601bc1ff
7
- data.tar.gz: 307a7186e583b23980bb321f614bdc2c8b92dc51895a1cbc959dfba5f7838a5822e71726332b2ba0330b3edd7d0b2d992dc3f9d1a542259004fffcd33fb441eb
6
+ metadata.gz: 14231328428d5a250701cdac1f16977ce67ddc6db89fa465635bd7ad3171391c7b11921146020c8992f2af4f9647abbaa0f4f11954ee6a30fb039dd4e1426c32
7
+ data.tar.gz: 253c2201ff7f6ffc206226523ae6bcac56eb48cb386bd93789e2c1babf03f5808adaddcdf9881222b83a76e89567c551ac9c72568e29e784977c9eb117df9dea
data/.travis.yml CHANGED
@@ -5,7 +5,7 @@ rvm:
5
5
  - 2.2.0
6
6
  - 2.1.5
7
7
  - 2.0.0
8
- - 1.9.3 # argo, FRDA
8
+ - 1.9.3 # FRDA
9
9
  # we used to use jruby for merged DOR + MARC records, but no more ...
10
10
  - jruby-head
11
11
  # we also test against ruby-head, which may be unstable.
@@ -0,0 +1,387 @@
1
+ # encoding: utf-8
2
+
3
+ # Map 3 letter MARC Country/Location Code to User Friendly Name
4
+ # includes discontinued codes, because they're probably in our data
5
+ # from https://www.loc.gov/marc/countries/countries_code.html 01/20/2016
6
+ MARC_COUNTRIES =
7
+ {
8
+ 'aa' => 'Albania',
9
+ 'abc' => 'Alberta',
10
+ 'ac' => 'Ashmore and Cartier Islands',
11
+ 'aca' => 'Australian Capital Territory',
12
+ 'ae' => 'Algeria',
13
+ 'af' => 'Afghanistan',
14
+ 'ag' => 'Argentina',
15
+ 'ai' => 'Armenia (Republic)',
16
+ 'air' => 'Armenian S.S.R.',
17
+ 'aj' => 'Azerbaijan',
18
+ 'ajr' => 'Azerbaijan S.S.R.',
19
+ 'aku' => 'Alaska',
20
+ 'alu' => 'Alabama',
21
+ 'am' => 'Anguilla',
22
+ 'an' => 'Andorra',
23
+ 'ao' => 'Angola',
24
+ 'aq' => 'Antigua and Barbuda',
25
+ 'aru' => 'Arkansas',
26
+ 'as' => 'American Samoa',
27
+ 'at' => 'Australia',
28
+ 'au' => 'Austria',
29
+ 'aw' => 'Aruba',
30
+ 'ay' => 'Antarctica',
31
+ 'azu' => 'Arizona',
32
+ 'ba' => 'Bahrain',
33
+ 'bb' => 'Barbados',
34
+ 'bcc' => 'British Columbia',
35
+ 'bd' => 'Burundi',
36
+ 'be' => 'Belgium',
37
+ 'bf' => 'Bahamas',
38
+ 'bg' => 'Bangladesh',
39
+ 'bh' => 'Belize',
40
+ 'bi' => 'British Indian Ocean Territory',
41
+ 'bl' => 'Brazil',
42
+ 'bm' => 'Bermuda Islands',
43
+ 'bn' => 'Bosnia and Herzegovina',
44
+ 'bo' => 'Bolivia',
45
+ 'bp' => 'Solomon Islands',
46
+ 'br' => 'Burma',
47
+ 'bs' => 'Botswana',
48
+ 'bt' => 'Bhutan',
49
+ 'bu' => 'Bulgaria',
50
+ 'bv' => 'Bouvet Island',
51
+ 'bw' => 'Belarus',
52
+ 'bwr' => 'Byelorussian S.S.R.',
53
+ 'bx' => 'Brunei',
54
+ 'ca' => 'Caribbean Netherlands',
55
+ 'cau' => 'California',
56
+ 'cb' => 'Cambodia',
57
+ 'cc' => 'China',
58
+ 'cd' => 'Chad',
59
+ 'ce' => 'Sri Lanka',
60
+ 'cf' => 'Congo (Brazzaville)',
61
+ 'cg' => 'Congo (Democratic Republic)',
62
+ 'ch' => 'China (Republic : 1949 )',
63
+ 'ci' => 'Croatia',
64
+ 'cj' => 'Cayman Islands',
65
+ 'ck' => 'Colombia',
66
+ 'cl' => 'Chile',
67
+ 'cm' => 'Cameroon',
68
+ 'cn' => 'Canada',
69
+ 'co' => 'Curaçao',
70
+ 'cou' => 'Colorado',
71
+ 'cp' => 'Canton and Enderbury Islands',
72
+ 'cq' => 'Comoros',
73
+ 'cr' => 'Costa Rica',
74
+ 'cs' => 'Czechoslovakia',
75
+ 'ctu' => 'Connecticut',
76
+ 'cu' => 'Cuba',
77
+ 'cv' => 'Cabo Verde',
78
+ 'cw' => 'Cook Islands',
79
+ 'cx' => 'Central African Republic',
80
+ 'cy' => 'Cyprus',
81
+ 'cz' => 'Canal Zone',
82
+ 'dcu' => 'District of Columbia',
83
+ 'deu' => 'Delaware',
84
+ 'dk' => 'Denmark',
85
+ 'dm' => 'Benin',
86
+ 'dq' => 'Dominica',
87
+ 'dr' => 'Dominican Republic',
88
+ 'ea' => 'Eritrea',
89
+ 'ec' => 'Ecuador',
90
+ 'eg' => 'Equatorial Guinea',
91
+ 'em' => 'TimorLeste',
92
+ 'enk' => 'England',
93
+ 'er' => 'Estonia',
94
+ 'err' => 'Estonia',
95
+ 'es' => 'El Salvador',
96
+ 'et' => 'Ethiopia',
97
+ 'fa' => 'Faroe Islands',
98
+ 'fg' => 'French Guiana',
99
+ 'fi' => 'Finland',
100
+ 'fj' => 'Fiji',
101
+ 'fk' => 'Falkland Islands',
102
+ 'flu' => 'Florida',
103
+ 'fm' => 'Micronesia (Federated States)',
104
+ 'fp' => 'French Polynesia',
105
+ 'fr' => 'France',
106
+ 'fs' => 'Terres australes et antarctiques françaises',
107
+ 'ft' => 'Djibouti',
108
+ 'gau' => 'Georgia',
109
+ 'gb' => 'Kiribati',
110
+ 'gd' => 'Grenada',
111
+ 'ge' => 'Germany (East)',
112
+ 'gh' => 'Ghana',
113
+ 'gi' => 'Gibraltar',
114
+ 'gl' => 'Greenland',
115
+ 'gm' => 'Gambia',
116
+ 'gn' => 'Gilbert and Ellice Islands',
117
+ 'go' => 'Gabon',
118
+ 'gp' => 'Guadeloupe',
119
+ 'gr' => 'Greece',
120
+ 'gs' => 'Georgia (Republic)',
121
+ 'gsr' => 'Georgian S.S.R.',
122
+ 'gt' => 'Guatemala',
123
+ 'gu' => 'Guam',
124
+ 'gv' => 'Guinea',
125
+ 'gw' => 'Germany',
126
+ 'gy' => 'Guyana',
127
+ 'gz' => 'Gaza Strip',
128
+ 'hiu' => 'Hawaii',
129
+ 'hk' => 'Hong Kong',
130
+ 'hm' => 'Heard and McDonald Islands',
131
+ 'ho' => 'Honduras',
132
+ 'ht' => 'Haiti',
133
+ 'hu' => 'Hungary',
134
+ 'iau' => 'Iowa',
135
+ 'ic' => 'Iceland',
136
+ 'idu' => 'Idaho',
137
+ 'ie' => 'Ireland',
138
+ 'ii' => 'India',
139
+ 'ilu' => 'Illinois',
140
+ 'inu' => 'Indiana',
141
+ 'io' => 'Indonesia',
142
+ 'iq' => 'Iraq',
143
+ 'ir' => 'Iran',
144
+ 'is' => 'Israel',
145
+ 'it' => 'Italy',
146
+ 'iu' => 'IsraelSyria Demilitarized Zones',
147
+ 'iv' => "Côte d'Ivoire",
148
+ 'iw' => 'IsraelJordan Demilitarized Zones',
149
+ 'iy' => 'IraqSaudi Arabia Neutral Zone',
150
+ 'ja' => 'Japan',
151
+ 'ji' => 'Johnston Atoll',
152
+ 'jm' => 'Jamaica',
153
+ 'jn' => 'Jan Mayen',
154
+ 'jo' => 'Jordan',
155
+ 'ke' => 'Kenya',
156
+ 'kg' => 'Kyrgyzstan',
157
+ 'kgr' => 'Kirghiz S.S.R.',
158
+ 'kn' => 'Korea (North)',
159
+ 'ko' => 'Korea (South)',
160
+ 'ksu' => 'Kansas',
161
+ 'ku' => 'Kuwait',
162
+ 'kv' => 'Kosovo',
163
+ 'kyu' => 'Kentucky',
164
+ 'kz' => 'Kazakhstan',
165
+ 'kzr' => 'Kazakh S.S.R.',
166
+ 'lau' => 'Louisiana',
167
+ 'lb' => 'Liberia',
168
+ 'le' => 'Lebanon',
169
+ 'lh' => 'Liechtenstein',
170
+ 'li' => 'Lithuania',
171
+ 'lir' => 'Lithuania',
172
+ 'ln' => 'Central and Southern Line Islands',
173
+ 'lo' => 'Lesotho',
174
+ 'ls' => 'Laos',
175
+ 'lu' => 'Luxembourg',
176
+ 'lv' => 'Latvia',
177
+ 'lvr' => 'Latvia',
178
+ 'ly' => 'Libya',
179
+ 'mau' => 'Massachusetts',
180
+ 'mbc' => 'Manitoba',
181
+ 'mc' => 'Monaco',
182
+ 'mdu' => 'Maryland',
183
+ 'meu' => 'Maine',
184
+ 'mf' => 'Mauritius',
185
+ 'mg' => 'Madagascar',
186
+ 'mh' => 'Macao',
187
+ 'miu' => 'Michigan',
188
+ 'mj' => 'Montserrat',
189
+ 'mk' => 'Oman',
190
+ 'ml' => 'Mali',
191
+ 'mm' => 'Malta',
192
+ 'mnu' => 'Minnesota',
193
+ 'mo' => 'Montenegro',
194
+ 'mou' => 'Missouri',
195
+ 'mp' => 'Mongolia',
196
+ 'mq' => 'Martinique',
197
+ 'mr' => 'Morocco',
198
+ 'msu' => 'Mississippi',
199
+ 'mtu' => 'Montana',
200
+ 'mu' => 'Mauritania',
201
+ 'mv' => 'Moldova',
202
+ 'mvr' => 'Moldavian S.S.R.',
203
+ 'mw' => 'Malawi',
204
+ 'mx' => 'Mexico',
205
+ 'my' => 'Malaysia',
206
+ 'mz' => 'Mozambique',
207
+ 'na' => 'Netherlands Antilles',
208
+ 'nbu' => 'Nebraska',
209
+ 'ncu' => 'North Carolina',
210
+ 'ndu' => 'North Dakota',
211
+ 'ne' => 'Netherlands',
212
+ 'nfc' => 'Newfoundland and Labrador',
213
+ 'ng' => 'Niger',
214
+ 'nhu' => 'New Hampshire',
215
+ 'nik' => 'Northern Ireland',
216
+ 'nju' => 'New Jersey',
217
+ 'nkc' => 'New Brunswick',
218
+ 'nl' => 'New Caledonia',
219
+ 'nm' => 'Northern Mariana Islands',
220
+ 'nmu' => 'New Mexico',
221
+ 'nn' => 'Vanuatu',
222
+ 'no' => 'Norway',
223
+ 'np' => 'Nepal',
224
+ 'nq' => 'Nicaragua',
225
+ 'nr' => 'Nigeria',
226
+ 'nsc' => 'Nova Scotia',
227
+ 'ntc' => 'Northwest Territories',
228
+ 'nu' => 'Nauru',
229
+ 'nuc' => 'Nunavut',
230
+ 'nvu' => 'Nevada',
231
+ 'nw' => 'Northern Mariana Islands',
232
+ 'nx' => 'Norfolk Island',
233
+ 'nyu' => 'New York (State)',
234
+ 'nz' => 'New Zealand',
235
+ 'ohu' => 'Ohio',
236
+ 'oku' => 'Oklahoma',
237
+ 'onc' => 'Ontario',
238
+ 'oru' => 'Oregon',
239
+ 'ot' => 'Mayotte',
240
+ 'pau' => 'Pennsylvania',
241
+ 'pc' => 'Pitcairn Island',
242
+ 'pe' => 'Peru',
243
+ 'pf' => 'Paracel Islands',
244
+ 'pg' => 'GuineaBissau',
245
+ 'ph' => 'Philippines',
246
+ 'pic' => 'Prince Edward Island',
247
+ 'pk' => 'Pakistan',
248
+ 'pl' => 'Poland',
249
+ 'pn' => 'Panama',
250
+ 'po' => 'Portugal',
251
+ 'pp' => 'Papua New Guinea',
252
+ 'pr' => 'Puerto Rico',
253
+ 'pt' => 'Portuguese Timor',
254
+ 'pw' => 'Palau',
255
+ 'py' => 'Paraguay',
256
+ 'qa' => 'Qatar',
257
+ 'qea' => 'Queensland',
258
+ 'quc' => 'Québec (Province)',
259
+ 'rb' => 'Serbia',
260
+ 're' => 'Réunion',
261
+ 'rh' => 'Zimbabwe',
262
+ 'riu' => 'Rhode Island',
263
+ 'rm' => 'Romania',
264
+ 'ru' => 'Russia (Federation)',
265
+ 'rur' => 'Russian S.F.S.R.',
266
+ 'rw' => 'Rwanda',
267
+ 'ry' => 'Ryukyu Islands, Southern',
268
+ 'sa' => 'South Africa',
269
+ 'sb' => 'Svalbard',
270
+ 'sc' => 'SaintBarthélemy',
271
+ 'scu' => 'South Carolina',
272
+ 'sd' => 'South Sudan',
273
+ 'sdu' => 'South Dakota',
274
+ 'se' => 'Seychelles',
275
+ 'sf' => 'Sao Tome and Principe',
276
+ 'sg' => 'Senegal',
277
+ 'sh' => 'Spanish North Africa',
278
+ 'si' => 'Singapore',
279
+ 'sj' => 'Sudan',
280
+ 'sk' => 'Sikkim',
281
+ 'sl' => 'Sierra Leone',
282
+ 'sm' => 'San Marino',
283
+ 'sn' => 'Sint Maarten',
284
+ 'snc' => 'Saskatchewan',
285
+ 'so' => 'Somalia',
286
+ 'sp' => 'Spain',
287
+ 'sq' => 'Swaziland',
288
+ 'sr' => 'Surinam',
289
+ 'ss' => 'Western Sahara',
290
+ 'st' => 'SaintMartin',
291
+ 'stk' => 'Scotland',
292
+ 'su' => 'Saudi Arabia',
293
+ 'sv' => 'Swan Islands',
294
+ 'sw' => 'Sweden',
295
+ 'sx' => 'Namibia',
296
+ 'sy' => 'Syria',
297
+ 'sz' => 'Switzerland',
298
+ 'ta' => 'Tajikistan',
299
+ 'tar' => 'Tajik S.S.R.',
300
+ 'tc' => 'Turks and Caicos Islands',
301
+ 'tg' => 'Togo',
302
+ 'th' => 'Thailand',
303
+ 'ti' => 'Tunisia',
304
+ 'tk' => 'Turkmenistan',
305
+ 'tkr' => 'Turkmen S.S.R.',
306
+ 'tl' => 'Tokelau',
307
+ 'tma' => 'Tasmania',
308
+ 'tnu' => 'Tennessee',
309
+ 'to' => 'Tonga',
310
+ 'tr' => 'Trinidad and Tobago',
311
+ 'ts' => 'United Arab Emirates',
312
+ 'tt' => 'Trust Territory of the Pacific Islands',
313
+ 'tu' => 'Turkey',
314
+ 'tv' => 'Tuvalu',
315
+ 'txu' => 'Texas',
316
+ 'tz' => 'Tanzania',
317
+ 'ua' => 'Egypt',
318
+ 'uc' => 'United States Misc. Caribbean Islands',
319
+ 'ug' => 'Uganda',
320
+ 'ui' => 'United Kingdom Misc. Islands',
321
+ 'uik' => 'United Kingdom Misc. Islands',
322
+ 'uk' => 'United Kingdom',
323
+ 'un' => 'Ukraine',
324
+ 'unr' => 'Ukraine',
325
+ 'up' => 'United States Misc. Pacific Islands',
326
+ 'ur' => 'Soviet Union',
327
+ 'us' => 'United States',
328
+ 'utu' => 'Utah',
329
+ 'uv' => 'Burkina Faso',
330
+ 'uy' => 'Uruguay',
331
+ 'uz' => 'Uzbekistan',
332
+ 'uzr' => 'Uzbek S.S.R.',
333
+ 'vau' => 'Virginia',
334
+ 'vb' => 'British Virgin Islands',
335
+ 'vc' => 'Vatican City',
336
+ 've' => 'Venezuela',
337
+ 'vi' => 'Virgin Islands of the United States',
338
+ 'vm' => 'Vietnam',
339
+ 'vn' => 'Vietnam, North',
340
+ 'vp' => 'Various places',
341
+ 'vra' => 'Victoria',
342
+ 'vs' => 'Vietnam, South',
343
+ 'vtu' => 'Vermont',
344
+ 'wau' => 'Washington (State)',
345
+ 'wb' => 'West Berlin',
346
+ 'wea' => 'Western Australia',
347
+ 'wf' => 'Wallis and Futuna',
348
+ 'wiu' => 'Wisconsin',
349
+ 'wj' => 'West Bank of the Jordan River',
350
+ 'wk' => 'Wake Island',
351
+ 'wlk' => 'Wales',
352
+ 'ws' => 'Samoa',
353
+ 'wvu' => 'West Virginia',
354
+ 'wyu' => 'Wyoming',
355
+ 'xa' => 'Christmas Island (Indian Ocean)',
356
+ 'xb' => 'Cocos (Keeling) Islands',
357
+ 'xc' => 'Maldives',
358
+ 'xd' => 'Saint KittsNevis',
359
+ 'xe' => 'Marshall Islands',
360
+ 'xf' => 'Midway Islands',
361
+ 'xga' => 'Coral Sea Islands Territory',
362
+ 'xh' => 'Niue',
363
+ 'xi' => 'Saint KittsNevisAnguilla',
364
+ 'xj' => 'Saint Helena',
365
+ 'xk' => 'Saint Lucia',
366
+ 'xl' => 'Saint Pierre and Miquelon',
367
+ 'xm' => 'Saint Vincent and the Grenadines',
368
+ 'xn' => 'Macedonia',
369
+ 'xna' => 'New South Wales',
370
+ 'xo' => 'Slovakia',
371
+ 'xoa' => 'Northern Territory',
372
+ 'xp' => 'Spratly Island',
373
+ 'xr' => 'Czech Republic',
374
+ 'xra' => 'South Australia',
375
+ 'xs' => 'South Georgia and the South Sandwich Islands',
376
+ 'xv' => 'Slovenia',
377
+ #'xx' => 'No place, unknown, or undetermined',
378
+ 'xxc' => 'Canada',
379
+ 'xxk' => 'United Kingdom',
380
+ 'xxr' => 'Soviet Union',
381
+ 'xxu' => 'United States',
382
+ 'ye' => 'Yemen',
383
+ 'ykc' => 'Yukon Territory',
384
+ 'ys' => "Yemen (People's Democratic Republic)",
385
+ 'yu' => 'Serbia and Montenegro',
386
+ 'za' => 'Zambia'
387
+ }
data/lib/stanford-mods.rb CHANGED
@@ -2,6 +2,7 @@ require 'mods'
2
2
  require 'stanford-mods/date_parsing'
3
3
  require 'stanford-mods/coordinate'
4
4
  require 'stanford-mods/geo_spatial'
5
+ require 'stanford-mods/imprint'
5
6
  require 'stanford-mods/name'
6
7
  require 'stanford-mods/origin_info'
7
8
  require 'stanford-mods/physical_location'
@@ -281,10 +281,16 @@ module Stanford
281
281
 
282
282
  # get display value for date String containing yyy, yy, y, -y, -yy, -yyy
283
283
  # negative number strings will be changed to B.C. strings
284
+ # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
285
+ # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
286
+ # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
287
+ # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
284
288
  def display_str_for_early_numeric
285
289
  return unless orig_date_str.match(EARLY_NUMERIC)
290
+ # return 1 B.C. when the date is 0 since there is no 0 year
291
+ return '1 B.C.' if orig_date_str == '0'
286
292
  # negative number becomes B.C.
287
- return "#{orig_date_str[1..-1]} B.C." if orig_date_str.match(/^\-/)
293
+ return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str.match(/^\-/)
288
294
  # remove leading 0s from early dates
289
295
  "#{orig_date_str.to_i} A.D."
290
296
  end
@@ -0,0 +1,418 @@
1
+ module Stanford
2
+ module Mods
3
+ ##
4
+ # Get the imprint information from originInfo elements (and sub elements) to create display strings
5
+ #
6
+ # This code is adapted from the mods_display gem. In a perfect world, this
7
+ # code would make use of the date_parsing class instead of reimplementing pieces of it;
8
+ # however, the date_parsing class only does years, and this does finer tuned dates and also
9
+ # reformats them according to the encoding.
10
+ class Imprint
11
+ # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
12
+ def initialize(originInfo_ng_nodeset)
13
+ @originInfo_ng_nodeset = originInfo_ng_nodeset
14
+ end
15
+
16
+ require 'marc_countries'
17
+
18
+ # @return Array<String> each String is an imprint statement from a single originInfo element
19
+ def imprint_statements
20
+ results = []
21
+ @originInfo_ng_nodeset.each do |origin_info_node|
22
+ edition = edition_vals_str(origin_info_node)
23
+ place = place_vals_str(origin_info_node)
24
+ publisher = publisher_vals_str(origin_info_node)
25
+ dates = date_str(origin_info_node)
26
+
27
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
28
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
29
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
30
+
31
+ results << ed_place_pub_dates unless ed_place_pub_dates.empty?
32
+ end
33
+ results
34
+ end
35
+
36
+ def display_str
37
+ imprint_statements.join('; ') if imprint_statements.present?
38
+ end
39
+
40
+ private
41
+
42
+ def compact_and_join_with_delimiter(values, delimiter)
43
+ compact_values = values.compact.reject { |v| v.strip.empty? }
44
+ return compact_values.join(delimiter) if compact_values.length == 1 ||
45
+ !ends_in_terminating_punctuation?(delimiter)
46
+ compact_values.each_with_index.map do |value, i|
47
+ if (compact_values.length - 1) == i || # last item?
48
+ ends_in_terminating_punctuation?(value)
49
+ value << ' '
50
+ else
51
+ value << delimiter
52
+ end
53
+ end.join.strip
54
+ end
55
+
56
+ def ends_in_terminating_punctuation?(value)
57
+ value.strip.end_with?('.', ',', ':', ';')
58
+ end
59
+
60
+ def edition_vals_str(origin_info_node)
61
+ origin_info_node.edition.reject do |e|
62
+ e.text.strip.empty?
63
+ end.map(&:text).join(' ').strip
64
+ end
65
+
66
+ def publisher_vals_str(origin_info_node)
67
+ return if origin_info_node.publisher.text.strip.empty?
68
+ publishers = origin_info_node.publisher.reject do |p|
69
+ p.text.strip.empty?
70
+ end.map(&:text)
71
+ compact_and_join_with_delimiter(publishers, ' : ')
72
+ end
73
+
74
+ # PLACE processing methods ------
75
+
76
+ def place_vals_str(origin_info_node)
77
+ return if origin_info_node.place.text.strip.empty?
78
+ places = place_terms(origin_info_node).reject do |p|
79
+ p.text.strip.empty?
80
+ end.map(&:text)
81
+ compact_and_join_with_delimiter(places, ' : ')
82
+ end
83
+
84
+ def unencoded_place_terms?(element)
85
+ element.place.placeTerm.any? do |term|
86
+ !term.attributes['type'].respond_to?(:value) ||
87
+ term.attributes['type'].value == 'text'
88
+ end
89
+ end
90
+
91
+ def place_terms(origin_info_element)
92
+ return [] unless origin_info_element.respond_to?(:place) &&
93
+ origin_info_element.place.respond_to?(:placeTerm)
94
+ if unencoded_place_terms?(origin_info_element)
95
+ origin_info_element.place.placeTerm.select do |term|
96
+ !term.attributes['type'].respond_to?(:value) ||
97
+ term.attributes['type'].value == 'text'
98
+ end.compact
99
+ else
100
+ origin_info_element.place.placeTerm.map do |term|
101
+ next unless term.attributes['type'].respond_to?(:value) &&
102
+ term.attributes['type'].value == 'code' &&
103
+ term.attributes['authority'].respond_to?(:value) &&
104
+ term.attributes['authority'].value == 'marccountry' &&
105
+ MARC_COUNTRIES.include?(term.text.strip)
106
+ term = term.clone
107
+ term.content = MARC_COUNTRIES[term.text.strip]
108
+ term
109
+ end.compact
110
+ end
111
+ end
112
+
113
+ # DATE processing methods ------
114
+
115
+ def date_str(origin_info_node)
116
+ date_vals = origin_info_date_vals(origin_info_node)
117
+ return if date_vals.empty?
118
+ date_vals.map(&:strip).join(' ')
119
+ end
120
+
121
+ def origin_info_date_vals(origin_info_node)
122
+ date_field_keys.map do |date_field|
123
+ next unless origin_info_node.respond_to?(date_field)
124
+ date_elements = origin_info_node.send(date_field)
125
+ date_elements_display_vals(date_elements) if date_elements.present?
126
+ end.compact.flatten
127
+ end
128
+
129
+ def date_elements_display_vals(ng_date_elements)
130
+ apply_date_qualifier_decoration(
131
+ dedup_dates(
132
+ join_date_ranges(
133
+ process_decade_century_dates(
134
+ process_bc_ad_dates(
135
+ process_encoded_dates(ignore_bad_dates(ng_date_elements))
136
+ )
137
+ )
138
+ )
139
+ )
140
+ )
141
+ end
142
+
143
+ def date_field_keys
144
+ [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
145
+ end
146
+
147
+ def ignore_bad_dates(ng_date_elements)
148
+ ng_date_elements.select do |ng_date_element|
149
+ val = ng_date_element.text.strip
150
+ val != '9999' && val != '0000-00-00' && val != 'uuuu'
151
+ end
152
+ end
153
+
154
+ def process_encoded_dates(ng_date_elements)
155
+ ng_date_elements.map do |ng_date_element|
156
+ if date_is_w3cdtf?(ng_date_element)
157
+ process_w3cdtf_date(ng_date_element)
158
+ elsif date_is_iso8601?(ng_date_element)
159
+ process_iso8601_date(ng_date_element)
160
+ else
161
+ ng_date_element
162
+ end
163
+ end
164
+ end
165
+
166
+ # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
167
+ # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
168
+ # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
169
+ # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
170
+ def process_bc_ad_dates(ng_date_elements)
171
+ ng_date_elements.map do |ng_date_element|
172
+ case
173
+ when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
174
+ ng_date_element.content = "1 B.C."
175
+ when date_is_bc_edtf?(ng_date_element)
176
+ year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
177
+ ng_date_element.content = "#{year} B.C."
178
+ when date_is_ad?(ng_date_element)
179
+ ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
180
+ end
181
+ ng_date_element
182
+ end
183
+ end
184
+
185
+ def process_decade_century_dates(ng_date_elements)
186
+ ng_date_elements.map do |ng_date_element|
187
+ if date_is_decade?(ng_date_element)
188
+ process_decade_date(ng_date_element)
189
+ elsif date_is_century?(ng_date_element)
190
+ process_century_date(ng_date_element)
191
+ else
192
+ ng_date_element
193
+ end
194
+ end
195
+ end
196
+
197
+ def join_date_ranges(ng_date_elements)
198
+ if dates_are_range?(ng_date_elements)
199
+ start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
200
+ end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
201
+ ng_date_elements.map do |date|
202
+ date = date.clone # clone the date object so we don't append the same one
203
+ if normalize_date(date.text) == normalize_date(start_date.text)
204
+ date.content = [start_date.text, end_date.text].join(' - ')
205
+ date
206
+ elsif normalize_date(date.text) != normalize_date(end_date.text)
207
+ date
208
+ end
209
+ end.compact
210
+ elsif dates_are_open_range?(ng_date_elements)
211
+ start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
212
+ ng_date_elements.map do |date|
213
+ date = date.clone # clone the date object so we don't append the same one
214
+ date.content = "#{start_date.text}-" if date.text == start_date.text
215
+ date
216
+ end
217
+ else
218
+ ng_date_elements
219
+ end
220
+ end
221
+
222
+ def dedup_dates(ng_date_elements)
223
+ date_text = ng_date_elements.map { |d| normalize_date(d.text) }
224
+ if date_text != date_text.uniq
225
+ if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
226
+ [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
227
+ elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
228
+ [ng_date_elements.find { |d| !d.attributes['encoding'] }]
229
+ else
230
+ [ng_date_elements.first]
231
+ end
232
+ else
233
+ ng_date_elements
234
+ end
235
+ end
236
+
237
+ def apply_date_qualifier_decoration(ng_date_elements)
238
+ return_fields = ng_date_elements.map do |date|
239
+ date = date.clone
240
+ if date_is_approximate?(date)
241
+ date.content = "[ca. #{date.text}]"
242
+ elsif date_is_questionable?(date)
243
+ date.content = "[#{date.text}?]"
244
+ elsif date_is_inferred?(date)
245
+ date.content = "[#{date.text}]"
246
+ end
247
+ date
248
+ end
249
+ return_fields.map(&:text)
250
+ end
251
+
252
+ def date_is_approximate?(ng_date_element)
253
+ ng_date_element.attributes['qualifier'] &&
254
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
255
+ ng_date_element.attributes['qualifier'].value == 'approximate'
256
+ end
257
+
258
+ def date_is_questionable?(ng_date_element)
259
+ ng_date_element.attributes['qualifier'] &&
260
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
261
+ ng_date_element.attributes['qualifier'].value == 'questionable'
262
+ end
263
+
264
+ def date_is_inferred?(ng_date_element)
265
+ ng_date_element.attributes['qualifier'] &&
266
+ ng_date_element.attributes['qualifier'].respond_to?(:value) &&
267
+ ng_date_element.attributes['qualifier'].value == 'inferred'
268
+ end
269
+
270
+ def dates_are_open_range?(ng_date_elements)
271
+ ng_date_elements.any? do |element|
272
+ element.attributes['point'] &&
273
+ element.attributes['point'].respond_to?(:value) &&
274
+ element.attributes['point'].value == 'start'
275
+ end && !ng_date_elements.any? do |element|
276
+ element.attributes['point'] &&
277
+ element.attributes['point'].respond_to?(:value) &&
278
+ element.attributes['point'].value == 'end'
279
+ end
280
+ end
281
+
282
+ def dates_are_range?(ng_date_elements)
283
+ attributes = ng_date_elements.map do |date|
284
+ if date.attributes['point'].respond_to?(:value)
285
+ date.attributes['point'].value
286
+ end
287
+ end
288
+ attributes.include?('start') &&
289
+ attributes.include?('end')
290
+ end
291
+
292
+ def process_w3cdtf_date(ng_date_element)
293
+ ng_date_element = ng_date_element.clone
294
+ ng_date_element.content = begin
295
+ if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
296
+ Date.parse(ng_date_element.text).strftime(full_date_format)
297
+ elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
298
+ Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
299
+ else
300
+ ng_date_element.content
301
+ end
302
+ rescue
303
+ ng_date_element.content
304
+ end
305
+ ng_date_element
306
+ end
307
+
308
+ def process_iso8601_date(ng_date_element)
309
+ ng_date_element = ng_date_element.clone
310
+ ng_date_element.content = begin
311
+ if ng_date_element.text.strip =~ /^\d{8,}$/
312
+ Date.parse(ng_date_element.text).strftime(full_date_format)
313
+ else
314
+ ng_date_element.content
315
+ end
316
+ rescue
317
+ ng_date_element.content
318
+ end
319
+ ng_date_element
320
+ end
321
+
322
+ DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
323
+
324
+ # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
325
+ def process_decade_date(ng_date_element)
326
+ my_ng_date_element = ng_date_element.clone
327
+ my_ng_date_element.content = begin
328
+ orig_date_str = ng_date_element.text.strip
329
+ # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
330
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
331
+ if decade_matches
332
+ decade_str = decade_matches[2]
333
+ changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
334
+ zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
335
+ new_decade_str = "#{zeroth_year}s" if zeroth_year
336
+ my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
337
+ else
338
+ my_ng_date_element.content
339
+ end
340
+ rescue
341
+ my_ng_date_element.content
342
+ end
343
+ my_ng_date_element
344
+ end
345
+
346
+ CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
347
+
348
+ # strings like 18uu, 18-- become '19th century' in the ng_date_element content
349
+ def process_century_date(ng_date_element)
350
+ my_ng_date_element = ng_date_element.clone
351
+ my_ng_date_element.content = begin
352
+ orig_date_str = ng_date_element.text.strip
353
+ # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
354
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
355
+ if century_matches
356
+ require 'active_support/core_ext/integer/inflections'
357
+ new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
358
+ my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
359
+ else
360
+ my_ng_date_element.content
361
+ end
362
+ rescue
363
+ my_ng_date_element.content
364
+ end
365
+ my_ng_date_element
366
+ end
367
+
368
+ def field_is_encoded?(ng_element, encoding)
369
+ ng_element.attributes['encoding'] &&
370
+ ng_element.attributes['encoding'].respond_to?(:value) &&
371
+ ng_element.attributes['encoding'].value.downcase == encoding
372
+ end
373
+
374
+ def date_is_bc_edtf?(ng_date_element)
375
+ ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
376
+ end
377
+
378
+ def date_is_ad?(ng_date_element)
379
+ str = ng_date_element.text.strip.gsub(/^0*/, '')
380
+ str.present? && str.length < 4 && !str.match('A.D.')
381
+ end
382
+
383
+ def date_is_edtf?(ng_date_element)
384
+ field_is_encoded?(ng_date_element, 'edtf')
385
+ end
386
+
387
+ def date_is_w3cdtf?(ng_date_element)
388
+ field_is_encoded?(ng_date_element, 'w3cdtf')
389
+ end
390
+
391
+ def date_is_iso8601?(ng_date_element)
392
+ field_is_encoded?(ng_date_element, 'iso8601')
393
+ end
394
+
395
+ # @return true if decade string needs tweaking for display
396
+ def date_is_decade?(ng_date_element)
397
+ ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
398
+ end
399
+
400
+ # @return true if century string needs tweaking for display
401
+ def date_is_century?(ng_date_element)
402
+ ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
403
+ end
404
+
405
+ def full_date_format(full_date_format = '%B %-d, %Y')
406
+ @full_date_format ||= full_date_format
407
+ end
408
+
409
+ def short_date_format(short_date_format = '%B %Y')
410
+ @short_date_format ||= short_date_format
411
+ end
412
+
413
+ def normalize_date(date_str)
414
+ date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
415
+ end
416
+ end
417
+ end
418
+ end