stanford-mods 1.3.3 → 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,208 @@
1
+ # encoding: UTF-8
2
+ require 'logger'
3
+ require 'mods'
4
+
5
+ # SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
6
+ module Stanford
7
+ module Mods
8
+ class Record < ::Mods::Record
9
+
10
+ # Values are the contents of:
11
+ # subject/geographic
12
+ # subject/hierarchicalGeographic
13
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
14
+ # @param [String] sep - the separator string for joining hierarchicalGeographic sub elements
15
+ # @return [Array<String>] values for geographic_search Solr field for this document or [] if none
16
+ def sw_geographic_search(sep = ' ')
17
+ result = term_values([:subject, :geographic]) || []
18
+
19
+ # hierarchicalGeographic has sub elements
20
+ @mods_ng_xml.subject.hierarchicalGeographic.each { |hg_node|
21
+ hg_vals = []
22
+ hg_node.element_children.each { |e|
23
+ hg_vals << e.text unless e.text.empty?
24
+ }
25
+ result << hg_vals.join(sep) unless hg_vals.empty?
26
+ }
27
+
28
+ trans_code_vals = @mods_ng_xml.subject.geographicCode.translated_value
29
+ if trans_code_vals
30
+ trans_code_vals.each { |val|
31
+ result << val if !result.include?(val)
32
+ }
33
+ end
34
+
35
+ result
36
+ end
37
+
38
+ # Values are the contents of:
39
+ # subject/name/namePart
40
+ # "Values from namePart subelements should be concatenated in the order they appear (e.g. "Shakespeare, William, 1564-1616")"
41
+ # @param [String] sep - the separator string for joining namePart sub elements
42
+ # @return [Array<String>] values for names inside subject elements or [] if none
43
+ def sw_subject_names(sep = ', ')
44
+ result = []
45
+ @mods_ng_xml.subject.name_el.select { |n_el| n_el.namePart }.each { |name_el_w_np|
46
+ parts = name_el_w_np.namePart.map { |npn| npn.text unless npn.text.empty? }.compact
47
+ result << parts.join(sep).strip unless parts.empty?
48
+ }
49
+ result
50
+ end
51
+
52
+ # Values are the contents of:
53
+ # subject/titleInfo/(subelements)
54
+ # @param [String] sep - the separator string for joining titleInfo sub elements
55
+ # @return [Array<String>] values for titles inside subject elements or [] if none
56
+ def sw_subject_titles(sep = ' ')
57
+ result = []
58
+ @mods_ng_xml.subject.titleInfo.each { |ti_el|
59
+ parts = ti_el.element_children.map { |el| el.text unless el.text.empty? }.compact
60
+ result << parts.join(sep).strip unless parts.empty?
61
+ }
62
+ result
63
+ end
64
+
65
+ # Values are the contents of:
66
+ # mods/genre
67
+ # mods/subject/topic
68
+ # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
69
+ def topic_search
70
+ @topic_search ||= begin
71
+ vals = self.term_values(:genre) || []
72
+ vals.concat(subject_topics) if subject_topics
73
+ vals.empty? ? nil : vals
74
+ end
75
+ end
76
+
77
+ # Values are the contents of:
78
+ # subject/topic
79
+ # subject/name
80
+ # subject/title
81
+ # subject/occupation
82
+ # with trailing comma, semicolon, and backslash (and any preceding spaces) removed
83
+ # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
84
+ def topic_facet
85
+ vals = subject_topics ? Array.new(subject_topics) : []
86
+ vals.concat(subject_names) if subject_names
87
+ vals.concat(subject_titles) if subject_titles
88
+ vals.concat(subject_occupations) if subject_occupations
89
+ vals.map! { |val|
90
+ v = val.sub(/[\\,;]$/, '')
91
+ v.strip
92
+ }
93
+ vals.empty? ? nil : vals
94
+ end
95
+
96
+ # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
97
+ # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
98
+ def geographic_facet
99
+ geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
100
+ end
101
+
102
+ # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
103
+ # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
104
+ def era_facet
105
+ subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
106
+ end
107
+
108
+ # Values are the contents of:
109
+ # subject/geographic
110
+ # subject/hierarchicalGeographic
111
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
112
+ # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
113
+ def geographic_search
114
+ @geographic_search ||= begin
115
+ result = self.sw_geographic_search
116
+
117
+ # TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
118
+ # print a message for any unrecognized encodings
119
+ xvals = self.subject.geographicCode.translated_value
120
+ codes = self.term_values([:subject, :geographicCode])
121
+ if codes && codes.size > xvals.size
122
+ self.subject.geographicCode.each { |n|
123
+ if n.authority != 'marcgac' && n.authority != 'marccountry'
124
+ sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
125
+ end
126
+ }
127
+ end
128
+
129
+ # FIXME: stanford-mods should be returning [], not nil ...
130
+ return nil if !result || result.empty?
131
+ result
132
+ end
133
+ end
134
+
135
+ # Values are the contents of:
136
+ # subject/name
137
+ # subject/occupation - no subelements
138
+ # subject/titleInfo
139
+ # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
140
+ def subject_other_search
141
+ @subject_other_search ||= begin
142
+ vals = subject_occupations ? Array.new(subject_occupations) : []
143
+ vals.concat(subject_names) if subject_names
144
+ vals.concat(subject_titles) if subject_titles
145
+ vals.empty? ? nil : vals
146
+ end
147
+ end
148
+
149
+ # Values are the contents of:
150
+ # subject/temporal
151
+ # subject/genre
152
+ # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
153
+ def subject_other_subvy_search
154
+ @subject_other_subvy_search ||= begin
155
+ vals = subject_temporal ? Array.new(subject_temporal) : []
156
+ gvals = self.term_values([:subject, :genre])
157
+ vals.concat(gvals) if gvals
158
+
159
+ # print a message for any temporal encodings
160
+ self.subject.temporal.each { |n|
161
+ sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
162
+ }
163
+
164
+ vals.empty? ? nil : vals
165
+ end
166
+ end
167
+
168
+ # Values are the contents of:
169
+ # all subject subelements except subject/cartographic plus genre top level element
170
+ # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
171
+ def subject_all_search
172
+ vals = topic_search ? Array.new(topic_search) : []
173
+ vals.concat(geographic_search) if geographic_search
174
+ vals.concat(subject_other_search) if subject_other_search
175
+ vals.concat(subject_other_subvy_search) if subject_other_subvy_search
176
+ vals.empty? ? nil : vals
177
+ end
178
+
179
+ protected #----------------------------------------------------------
180
+
181
+ # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
182
+ def subject_names
183
+ @subject_names ||= self.sw_subject_names
184
+ end
185
+
186
+ # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
187
+ def subject_occupations
188
+ @subject_occupations ||= self.term_values([:subject, :occupation])
189
+ end
190
+
191
+ # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
192
+ def subject_temporal
193
+ @subject_temporal ||= self.term_values([:subject, :temporal])
194
+ end
195
+
196
+ # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
197
+ def subject_titles
198
+ @subject_titles ||= self.sw_subject_titles
199
+ end
200
+
201
+ # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
202
+ def subject_topics
203
+ @subject_topics ||= self.term_values([:subject, :topic])
204
+ end
205
+
206
+ end
207
+ end
208
+ end
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = "1.3.3"
4
+ VERSION = "1.3.4"
5
5
  end
6
6
  end
@@ -0,0 +1,746 @@
1
+ # encoding: utf-8
2
+ describe "date parsing methods" do
3
+
4
+ unparseable = [ # here to remind us of what they might look like in our data
5
+ nil,
6
+ '',
7
+ '[]',
8
+ '?',
9
+ 'uuuu',
10
+ 'Aug',
11
+ 'publiée le 26 germinal an VI',
12
+ "l'an IVe",
13
+ 'Feb',
14
+ "L'AN 2 DE LA // LIBERTÉ",
15
+ 'Paris',
16
+ "publié en frimaire l'an 3.e de la République française",
17
+ 'an 6',
18
+ 'an sept',
19
+ 's.n.]',
20
+ 'M. D. LXI',
21
+ '[An 4]',
22
+ '[s.d.]',
23
+ 'Undated'
24
+ ]
25
+ # example string as key, expected parsed value as value
26
+ invalid_but_can_get_year = {
27
+ '1966-14-14' => '1966', # 14 isn't a valid month ...
28
+ '1966\4\11' => '1966', # slashes wrong way
29
+ '2/31/1950' => '1950', # no 31 of Feb
30
+ '1869-00-00' => '1869',
31
+ '1862-01-00' => '1862',
32
+ '1985-05-00' => '1985'
33
+ }
34
+ # example string as key, expected parsed value as value
35
+ single_year = {
36
+ '0700' => '0700',
37
+ '0999' => '0999',
38
+ '1000' => '1000',
39
+ '1798' => '1798',
40
+ '1583.' => '1583',
41
+ '1885-' => '1885',
42
+ '1644.]' => '1644',
43
+ '1644]' => '1644',
44
+ '1584].' => '1584',
45
+ '1729?]' => '1729',
46
+ '1500 CE' => '1500',
47
+ '1877?' => '1877',
48
+ '1797 goda' => '1797',
49
+ "1616: Con licenza de'svperiori" => '1616',
50
+
51
+ '[1789]' => '1789',
52
+ '[1968?-' => '1968',
53
+ '[1860?]' => '1860',
54
+ '[1789 ?]' => '1789',
55
+ '[[1790]]' => '1790',
56
+ '[1579].' => '1579',
57
+ '[Ca 1790]' => '1790',
58
+ '[c1926]' => '1926',
59
+ '[ca 1790]' => '1790',
60
+ '[ca. 1790]' => '1790',
61
+ '[ca. 1850?]' => '1850',
62
+ '[ca.1600]' => '1600',
63
+ '[after 1726]' => '1726',
64
+ '[an II, i.e. 1794]' => '1794',
65
+ '[approximately 1600]' => '1600',
66
+ '[approximately 1558].' => '1558',
67
+ '[approximately 1717?]' => '1717',
68
+ '[not after 1652]' => '1652',
69
+ '[not before 1543].' => '1543',
70
+
71
+ "A' 1640" => '1640',
72
+ 'A1566' => '1566',
73
+ 'Ans. 1656' => '1656',
74
+ 'Antonio Laffreri 1570' => '1570',
75
+ 'An 6. 1798' => '1798',
76
+ 'An 6 1798' => '1798',
77
+ 'a. 1652' => '1652',
78
+ 'ad decennium 1592' => '1592',
79
+ 'after 1622' => '1622',
80
+ 'an 10 (1802)' => '1802',
81
+ 'an 14, 1805' => '1805',
82
+ 'anno 1801' => '1801',
83
+ 'anno 1603.' => '1603',
84
+ 'approximately 1580.' => '1580',
85
+ 'approximately 1700?' => '1700',
86
+ 'approximately 1544]' => '1544',
87
+ 'anno 1599 (v. 1).' => '1599',
88
+ 'anno MDCXXXV [1635].' => '1635',
89
+ 'anno dom. 1600 (v. 3).' => '1600',
90
+ 'anno j65i [1651]' => '1651',
91
+ 'Ca. 1580 CE' => '1580',
92
+ 'c1887' => '1887',
93
+ 'ca 1796]' => '1796',
94
+ 'ca. 1558' => '1558',
95
+ 'ca. 1560?]' => '1560',
96
+ 'ca. 1700]' => '1700',
97
+ 'circa 1860' => '1860',
98
+ 'copyright 1855' => '1855',
99
+ 'en 1788' => '1788',
100
+ 'im jahr 1681' => '1681',
101
+ "l'an 1.er de la Rep. 1792" => '1792',
102
+ "l'anno1570" => '1570',
103
+ 'MDLXXXVIII [1588]]' => '1588',
104
+ 'MDLXI [1561]' => '1561',
105
+ 'MDCCLII. [1752-' => '1752',
106
+ 'No. 15 1792' => '1792',
107
+ 's.a. [1712]' => '1712',
108
+ 'publié le 24 floréal [1796]' => '1796',
109
+ "Fructidor l'an 3.e [i.e. 1795]" => '1795',
110
+ }
111
+ # example string as key, expected parsed value as value
112
+ specific_month = {
113
+ '1975-05' => '1975', # vs 1918-27
114
+ '1996 Jun' => '1996',
115
+ 'February 1798' => '1798',
116
+ 'March, 1794' => '1794',
117
+ '[ ?] 10 1793' => '1793',
118
+ 'agosto 1799' => '1799',
119
+ 'Jan.y. thes.et 1798' => '1798',
120
+ '[[décembre 1783]]' => '1783',
121
+ 'im Mai 1793' => '1793',
122
+ 'in Febr. 1795' => '1795',
123
+ "juin année 1797" => '1797'
124
+ }
125
+ # example string as key, expected parsed value as value
126
+ specific_day = {
127
+ '1/1/1961' => '1961',
128
+ '10/1/1987' => '1987',
129
+ '5-1-1959' => '1959',
130
+
131
+ # year first
132
+ '1888-02-18' => '1888',
133
+ '1966-2-5' => '1966',
134
+
135
+ # text; starts with day
136
+ '1 July 1799' => '1799',
137
+ '1 Feb. 1782' => '1782',
138
+ '15 Jan.y 1797' => '1797',
139
+ '12.th May 1794' => '1794',
140
+ '12th May 1794' => '1794',
141
+ '12th Dec.r 1794' => '1794',
142
+ '14th Feb.y 1794' => '1794',
143
+ '18 Febr. 1790' => '1790',
144
+ '23 Nov.r 1797' => '1797',
145
+
146
+ # text; starts with year
147
+ '1793 March 1st' => '1793',
148
+ '1892, Jan. 1' => '1892',
149
+ '1991 May 14' => '1991',
150
+ '1997 Sep 6' => '1997',
151
+
152
+ # text starts with words
153
+ 'Boston, November 25, 1851' => '1851',
154
+ 'd. 16 Feb. 1793' => '1793',
155
+ 'published the 30 of June 1799' => '1799',
156
+ 'Published the 1 of June 1799' => '1799',
157
+ 'Pub.d Nov.r 1st 1798' => '1798',
158
+ 'Published July 5th, 1784' => '1784',
159
+
160
+ # text starts with month
161
+ 'April 01 1797' => '1797',
162
+ 'April 1 1796' => '1796',
163
+ 'April 1. 1796' => '1796',
164
+ 'April 16, 1632' => '1632',
165
+ 'April 11th 1792' => '1792',
166
+ '[April 1 1795]' => '1795',
167
+
168
+ 'Aug. 1st 1797' => '1797',
169
+ 'Aug 30th 1794' => '1794',
170
+ 'Aug. 16 1790' => '1790',
171
+ 'Aug. 20, 1883' => '1883',
172
+ 'Aug. 3rd, 1886' => '1886',
173
+ 'Aug.st 4 1795' => '1795',
174
+ 'Aug.t 16 1794' => '1794',
175
+ 'Augt. 29, 1804' => '1804',
176
+ 'August 1 1794' => '1794',
177
+
178
+ 'Dec. 1 1792' => '1792',
179
+ 'Dec.r 1 1792' => '1792',
180
+ 'Dec.r 8th 1798' => '1798',
181
+ 'Decb.r 1, 1789' => '1789',
182
+ 'December 16 1795' => '1795',
183
+
184
+ 'Feb 12 1800' => '1800',
185
+ 'Feb. 10 1798' => '1798',
186
+ 'Feb. 25, 1744]' => '1744',
187
+ 'Feb.ry 12 1793' => '1793',
188
+ 'Feb.ry 7th 1796' => '1796',
189
+ 'Feb.y 1 1794' => '1794',
190
+ 'Feb.y 13th 1798' => '1798',
191
+ 'Feb.y 23rd 1799' => '1799',
192
+ '[Feb.y 18 1793]' => '1793',
193
+
194
+ 'Jan. 1 1789' => '1789',
195
+ 'Jan. 1. 1795' => '1795',
196
+ 'Jan.y 15. 1795' => '1795',
197
+ 'Jan.y 12st 1793' => '1793',
198
+ 'Jan.y 18th 1790' => '1790',
199
+
200
+ 'July 1 1796' => '1796',
201
+ 'July 1. 1793' => '1793',
202
+ 'July 13, 1787' => '1787',
203
+ 'July 15th 1797' => '1797',
204
+
205
+ 'June 1 1793' => '1793',
206
+ 'June 1. 1800' => '1800',
207
+ 'June1st.1805' => '1805',
208
+ 'June 22, 1804' => '1804',
209
+ 'July 23d 1792' => '1792',
210
+ 'June 30th 1799' => '1799',
211
+ '[June 2 1793]' => '1793',
212
+
213
+ 'May 9, 1795' => '1795',
214
+ 'May 12 1792' => '1792',
215
+ 'May 21st 1798' => '1798',
216
+ 'May 15th 1798' => '1798',
217
+
218
+ 'Mar. 1. 1792' => '1792',
219
+ 'March 1 1795' => '1795',
220
+ 'March 1.t 1797' => '1797',
221
+ 'March 1, 1793' => '1793',
222
+ 'March 1st 1797' => '1797',
223
+ 'March 6th 1798' => '1798',
224
+ '[March 16 1798]' => '1798',
225
+
226
+ 'Nov. 1. 1796' => '1796',
227
+ 'Nov. 14th 1792' => '1792',
228
+ 'Nov. 20 1789' => '1789',
229
+ 'Nov.r 9, 1793' => '1793',
230
+ 'Novem. 13th 1797' => '1797',
231
+ 'Novembr 22nd 1794' => '1794',
232
+
233
+ 'Oct 12 1792' => '1792',
234
+ 'Oct 18th 1794' => '1794',
235
+ 'Oct. 29 1796' => '1796',
236
+ 'Oct. 11th 1794' => '1794',
237
+ 'Oct.er 1st 1786' => '1786',
238
+ 'Oct.r 25 1796' => '1796',
239
+ 'Oct.r 25th 1794' => '1794',
240
+ 'Octo.r 15 1795' => '1795',
241
+
242
+ 'Sep.r 1, 1795' => '1795',
243
+ 'Sep.tr 15.th 1796' => '1796',
244
+ 'Sept.r 5th 1793' => '1793'
245
+ }
246
+ specific_day_ruby_parse_fail = {
247
+ # note ruby Date.parse only handles american or euro date order, not both ??
248
+ '1/30/1979' => '1979',
249
+ '10/20/1976' => '1976',
250
+ '5-18-2014' => '2014',
251
+ # year first
252
+ '1980-23-02' => '1980',
253
+ '1792 20 Dec' => '1792',
254
+ # text
255
+ 'le 22 juin 1794' => '1794',
256
+ 'mis au jour le 26 juillet 1791' => '1791',
257
+ 'April 12 sd 1794' => '1794',
258
+ 'Dec. 10 & 11, 1855' => '1855',
259
+ 'January 22th [1800]' => '1800',
260
+ 'June the 12, 1794' => '1794',
261
+ 'Mai 1st 1789' => '1789',
262
+ 'March 22 d. 1794' => '1794',
263
+ 'N. 7 1796' => '1796',
264
+ 'N[ovember] 21st 1786' => '1786',
265
+ 'Oct. the 2.d 1793' => '1793',
266
+ }
267
+ # example string as key, expected parsed value as value
268
+ specific_day_2_digit_year = {
269
+ '1/2/79' => '1979',
270
+ '2/12/15' => '2015',
271
+ '6/11/99' => '1999',
272
+ '10/1/90' => '1990',
273
+ '10/21/08' => '2008',
274
+ '5-1-59' => '1959',
275
+ '5-1-21' => '1921',
276
+ '5-1-14' => '2014'
277
+ }
278
+ # example string as key, expected parsed value as value
279
+ multiple_years = {
280
+ '1783-1788' => ['1783', '1784', '1785', '1786', '1787', '1788'],
281
+ '1862-1868]' => ['1862', '1863', '1864', '1865', '1866', '1867', '1868'],
282
+ '1640-1645?]' => ['1640', '1641', '1642', '1643', '1644', '1645'],
283
+ '1578, 1584]' => ['1578', '1584'],
284
+ '1860, [1862]' => ['1860', '1862'],
285
+ '1901, c1900' => ['1901', '1900'], # pub date is one without the c,
286
+ '1627 [i.e. 1646]' => ['1627', '1646'],
287
+ '1698/1715' => ['1698', '1715'],
288
+ '1965,1968' => ['1965', '1968'], # revs
289
+ '1965|1968' => ['1965', '1968'], # revs
290
+ '1789 ou 1790]' => ['1789', '1790'],
291
+ '1689 [i.e. 1688-89]' => ['1689', '1688'],
292
+ '1598 or 1599' => ['1598', '1599'],
293
+ '1890 [c1884]' => ['1890', '1884'], # pub date is one without the c
294
+ '1873,c1868' => ['1873', '1868'], # # pub date is one without the c
295
+ '1872-1877 [t.5, 1874]' => ['1872', '1873', '1874', '1875', '1876', '1877'],
296
+ '1809 [ca. 1810]' => ['1809', '1810'],
297
+ '1726 or 1738]' => ['1726', '1738'],
298
+
299
+ '[1789-1791]' => ['1789', '1790', '1791'],
300
+ '[1627-1628].' => ['1627', '1628'],
301
+ '[1789-1791' => ['1789', '1790', '1791'],
302
+ '[1793 ou 1794]' => ['1793', '1794'],
303
+ '[entre 1789 et 1791]' => ['1789', '1790', '1791'],
304
+ '[Entre 1789 et 1791]' => ['1789', '1790', '1791'],
305
+ '[entre 1789-1791]' => ['1789', '1790', '1791'],
306
+ '[entre 1789 et 1791 ?]' => ['1789', '1790', '1791'],
307
+ '[between 1882 and 1887]' => ['1882', '1883', '1884', '1885', '1886', '1887'],
308
+ '[ca 1789-1791]' => ['1789', '1790', '1791'],
309
+ '[ca 1790 et 1792]' => ['1790', '1791', '1792'],
310
+ '[ca. 1550-1552]' => ['1550', '1551', '1552'],
311
+
312
+ 'Anno 1789-1790' => ['1789', '1790'],
313
+ "L'an VII de la République [1798 or 1799]" => ['1798', '1799'],
314
+ 'MDCXIII [1613] (v. 1); MDLXXXIII [1583] (v. 2); and MDCVI [1606] (v. 3).' => ['1613', '1583', '1606'],
315
+ 'entre 1793 et 1795' => ['1793', '1794', '1795'],
316
+ 'entre 1793 et 1795]' => ['1793', '1794', '1795'],
317
+ 'approximately 1600-1602.' => ['1600', '1601', '1602'],
318
+ 'approximately 1650-1652]' => ['1650', '1651', '1652'],
319
+ 'approximately 1643-1644.]' => ['1643', '1644'],
320
+ 'ca. 1740-1745]' => ['1740', '1741', '1742', '1743', '1744', '1745'],
321
+ 'circa 1851-1852' => ['1851', '1852'],
322
+ 's.a. [ca. 1660, erschienen: 1782]' => ['1660', '1782'],
323
+ 'view of approximately 1848, published about 1865' => ['1848', '1865']
324
+ }
325
+ # example string as key, expected parsed value as value
326
+ multiple_years_4_digits_once = {
327
+ '1918-20' => ['1918', '1919', '1920'], # vs. 1961-04
328
+ '1965-8' => ['1965', '1966', '1967', '1968'], # revs
329
+ '[1846-51]' => ['1846', '1847', '1848', '1849', '1850', '1851']
330
+ }
331
+ # example string as key, expected parsed value as value
332
+ decade_only_4_digits = {
333
+ 'early 1890s' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
334
+ '1950s' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959'],
335
+ "1950's" => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
336
+ }
337
+ decade_only = {
338
+ '156u' => ['1560', '1561', '1562', '1563', '1564', '1565', '1566', '1567', '1568', '1569'],
339
+ '167-?]' => ['1670', '1671', '1672', '1673', '1674', '1675', '1676', '1677', '1678', '1679'],
340
+ '[171-?]' => ['1710', '1711', '1712', '1713', '1714', '1715', '1716', '1717', '1718', '1719'],
341
+ '[189-]' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
342
+ 'ca.170-?]' => ['1700', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709'],
343
+ '200-?]' => ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009'],
344
+ '186?' => ['1860', '1861', '1862', '1863', '1864', '1865', '1866', '1867', '1868', '1869'],
345
+ '195x' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
346
+ }
347
+ century_only = {
348
+ '18th century CE' => '18th century',
349
+ '17uu' => '18th century',
350
+ '17--?]' => '18th century',
351
+ '17--]' => '18th century',
352
+ '[17--]' => '18th century',
353
+ '[17--?]' => '18th century'
354
+ }
355
+ brackets_in_middle_of_year = {
356
+ '169[5]' => '1695',
357
+ 'October 3, [18]91' => '1891'
358
+ }
359
+ # we have data like this for our Roman coins collection
360
+ early_numeric_dates = {
361
+ # note that values must lexically sort to create a chronological sort. (-999 before -914)
362
+ '-999' => '-001',
363
+ '-914' => '-086',
364
+ '-18' => '-982',
365
+ '-1' => '-999',
366
+ '0' => '0000',
367
+ '5' => '0005',
368
+ '33' => '0033',
369
+ '945' => '0945'
370
+ }
371
+ bc_dates = {
372
+ # note that values must lexically sort to create a chronological sort (800 B.C. before 750 B.C.)
373
+ '801 B.C.' => '-199',
374
+ '800 B.C.' => '-200',
375
+ '750 B.C.' => '-250',
376
+ '700 B.C.' => '-300',
377
+ '699 B.C.' => '-301',
378
+ '75 B.C.' => '-925',
379
+ '8 B.C.' => '-992'
380
+ }
381
+
382
+ context '*facet_string_from_date_str' do
383
+ it 'calls instance method facet_string_from_date_str' do
384
+ expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:facet_string_from_date_str)
385
+ Stanford::Mods::DateParsing.facet_string_from_date_str('1666')
386
+ end
387
+ end
388
+ context '*sortable_year_string_from_date_str' do
389
+ it 'calls instance method sortable_year_string_from_date_str' do
390
+ expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:sortable_year_string_from_date_str)
391
+ Stanford::Mods::DateParsing.sortable_year_string_from_date_str('1666')
392
+ end
393
+ end
394
+
395
+ context '#facet_string_from_date_str' do
396
+ single_year
397
+ .merge(specific_month)
398
+ .merge(specific_day)
399
+ .merge(specific_day_2_digit_year)
400
+ .merge(specific_day_ruby_parse_fail)
401
+ .merge(century_only)
402
+ .merge(brackets_in_middle_of_year)
403
+ .merge(invalid_but_can_get_year).each do |example, expected|
404
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
405
+ it "#{expected} for single value #{example}" do
406
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
407
+ end
408
+ end
409
+
410
+ multiple_years
411
+ .merge(multiple_years_4_digits_once)
412
+ .merge(decade_only)
413
+ .merge(decade_only_4_digits).each do |example, expected|
414
+ it "#{expected.first} for multi-value #{example}" do
415
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected.first
416
+ end
417
+ end
418
+
419
+ early_numeric_dates.each do |example, expected|
420
+ if example.start_with?('-')
421
+ exp = example[1..-1] + " B.C."
422
+ it "#{exp} for #{example}" do
423
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq exp
424
+ end
425
+ else
426
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
427
+ it "#{expected} for #{example}" do
428
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
429
+ end
430
+ end
431
+ end
432
+
433
+ bc_dates.keys.each do |example|
434
+ it "#{example} for #{example}" do
435
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq example
436
+ end
437
+ end
438
+ it '1600 B.C. for 1600 B.C.' do
439
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_from_date_str).to eq '1600 B.C.'
440
+ end
441
+
442
+ [ # bad dates
443
+ '9999',
444
+ '2035',
445
+ '0000-00-00'
446
+ ].each do |example|
447
+ it "nil for #{example}" do
448
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq nil
449
+ end
450
+ end
451
+ end
452
+
453
+ context '#sortable_year_string_from_date_str' do
454
+ single_year
455
+ .merge(specific_month)
456
+ .merge(specific_day)
457
+ .merge(specific_day_2_digit_year)
458
+ .merge(specific_day_ruby_parse_fail)
459
+ .merge(early_numeric_dates)
460
+ .merge(bc_dates)
461
+ .merge(brackets_in_middle_of_year)
462
+ .merge(invalid_but_can_get_year).each do |example, expected|
463
+ it "#{expected} for single value #{example}" do
464
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected
465
+ end
466
+ end
467
+
468
+ multiple_years
469
+ .merge(multiple_years_4_digits_once)
470
+ .merge(decade_only)
471
+ .merge(decade_only_4_digits).each do |example, expected|
472
+ it "#{expected.first} for multi-value #{example}" do
473
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected.first
474
+ end
475
+ end
476
+
477
+ century_only.keys.each do |example|
478
+ it "1700 from #{example}" do
479
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq '1700'
480
+ end
481
+ end
482
+ it '0700 for 7--' do
483
+ expect(Stanford::Mods::DateParsing.new('7--').sortable_year_string_from_date_str).to eq '0700'
484
+ end
485
+
486
+ it 'nil for 1600 B.C.' do
487
+ skip "code broken for dddd B.C. but no existing data for this yet"
488
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').sortable_year_string_from_date_str).to eq nil
489
+ end
490
+
491
+ [ # bad dates
492
+ '9999',
493
+ '2035',
494
+ '0000-00-00'
495
+ ].each do |example|
496
+ it "nil for #{example}" do
497
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq nil
498
+ end
499
+ end
500
+ end
501
+
502
+ context '*year_str_valid?' do
503
+ { # example string as key, expected result as value
504
+ '-1000' => false,
505
+ '-999' => true,
506
+ '-35' => true,
507
+ '-3' => true,
508
+ '0000' => true,
509
+ '0' => true,
510
+ '5' => true,
511
+ '33' => true,
512
+ '150' => true,
513
+ (Date.today.year + 1).to_s => true, # current year + 1
514
+ (Date.today.year + 2).to_s => false, # current year + 2
515
+ '9999' => false,
516
+ '165x' => false,
517
+ '198-' => false,
518
+ 'random text' => false,
519
+ nil => false
520
+ }.each do |example, expected|
521
+ it "#{expected} for #{example}" do
522
+ expect(Stanford::Mods::DateParsing.year_str_valid?(example)).to eq expected
523
+ end
524
+ end
525
+ end
526
+
527
+ context '#sortable_year_for_yyyy' do
528
+ single_year
529
+ .merge(specific_month)
530
+ .merge(specific_day)
531
+ .merge(invalid_but_can_get_year)
532
+ .merge(specific_day_ruby_parse_fail).each do |example, expected|
533
+ it "#{expected} for #{example}" do
534
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected
535
+ end
536
+ end
537
+
538
+ multiple_years
539
+ .merge(multiple_years_4_digits_once)
540
+ .merge(decade_only_4_digits).each do |example, expected|
541
+ it "#{expected.first} for #{example}" do
542
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected.first
543
+ end
544
+ end
545
+
546
+ # indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
547
+ unparseable
548
+ .push(*brackets_in_middle_of_year.keys)
549
+ .push(*specific_day_2_digit_year.keys)
550
+ .push(*decade_only.keys)
551
+ .push(*century_only.keys).each do |example|
552
+ it "nil for #{example}" do
553
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq nil
554
+ end
555
+ end
556
+ end
557
+
558
+ context '#sortable_year_for_yy' do
559
+ specific_day_2_digit_year.each do |example, expected|
560
+ it "#{expected} for #{example}" do
561
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq expected
562
+ end
563
+ end
564
+ it '2000 for 12/25/00' do
565
+ expect(Stanford::Mods::DateParsing.new('12/25/00').sortable_year_for_yy).to eq '2000'
566
+ end
567
+
568
+ # indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
569
+ [
570
+ '92/1/31', # yy/mm/dd: doesn't work. :-(
571
+ '92-31-1', # yy-dd-mm: doesn't work. :-(
572
+ ].push(*decade_only.keys).each do |example|
573
+ it "nil for #{example}" do
574
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq nil
575
+ end
576
+ end
577
+ end
578
+
579
+ context '#sortable_year_for_decade' do
580
+ decade_only.each do |example, expected|
581
+ it "#{expected.first} for #{example}" do
582
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected.first
583
+ end
584
+ end
585
+ { # example string as key, expected result as value
586
+ '199u' => '1990',
587
+ '200-' => '2000',
588
+ '201?' => '2010',
589
+ '202x' => '2020'
590
+ }.each do |example, expected|
591
+ it "#{expected} for #{example}" do
592
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected
593
+ end
594
+ end
595
+
596
+ # some of the strings this method cannot handle (so must be parsed with other instance methods)
597
+ decade_only_4_digits.keys
598
+ .push(*specific_day_2_digit_year.keys).each do |example|
599
+ it "nil for #{example}" do
600
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq nil
601
+ end
602
+ end
603
+ end
604
+
605
+ context '#sortable_year_for_century' do
606
+ century_only.keys.each do |example|
607
+ it "1700 from #{example}" do
608
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_century).to eq '1700'
609
+ end
610
+ end
611
+ it '0700 for 7--' do
612
+ expect(Stanford::Mods::DateParsing.new('7--').sortable_year_for_century).to eq '0700'
613
+ end
614
+ it 'nil for 7th century B.C. (to be handled in different method)' do
615
+ expect(Stanford::Mods::DateParsing.new('7th century B.C.').sortable_year_for_century).to eq nil
616
+ end
617
+ end
618
+
619
+ context '#facet_string_for_century' do
620
+ century_only.each do |example, expected|
621
+ it "#{expected} for #{example}" do
622
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
623
+ end
624
+ end
625
+ { # example string as key, expected result as value
626
+ '16--' => '17th century',
627
+ '7--' => '8th century',
628
+ # check suffixes
629
+ '20--' => '21st century',
630
+ '1--' => '2nd century',
631
+ '2--' => '3rd century'
632
+ }.each do |example, expected|
633
+ it "#{expected} for #{example}" do
634
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
635
+ end
636
+ end
637
+
638
+ it 'nil for 7th century B.C. (to be handled in different method)' do
639
+ expect(Stanford::Mods::DateParsing.new('7th century B.C.').facet_string_for_century).to eq nil
640
+ end
641
+ end
642
+
643
+ context '#sortable_year_for_early_numeric' do
644
+ early_numeric_dates.each do |example, expected|
645
+ it "#{expected} for #{example}" do
646
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_early_numeric).to eq expected
647
+ end
648
+ end
649
+ end
650
+
651
+ context '#facet_string_for_early_numeric' do
652
+ early_numeric_dates.each do |example, expected|
653
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
654
+ if example.start_with?('-')
655
+ exp = example[1..-1] + " B.C."
656
+ it "#{exp} for #{example}" do
657
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq exp
658
+ end
659
+ else
660
+ it "#{expected} for #{example}" do
661
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq expected
662
+ end
663
+ end
664
+ end
665
+ end
666
+
667
+ context '#sortable_year_for_bc' do
668
+ bc_dates.each do |example, expected|
669
+ it "#{expected} for #{example}" do
670
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_bc).to eq expected
671
+ end
672
+ end
673
+ end
674
+
675
+ context '#facet_string_for_bc' do
676
+ bc_dates.keys.each do |example|
677
+ it "#{example} for #{example}" do
678
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_bc).to eq example
679
+ end
680
+ end
681
+ it '1600 B.C. for 1600 B.C.' do
682
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_for_bc).to eq '1600 B.C.'
683
+ end
684
+ end
685
+
686
+ context '#year_via_ruby_parsing' do
687
+ specific_day.each do |example, expected|
688
+ it "#{expected} for #{example}" do
689
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq expected
690
+ end
691
+ end
692
+
693
+ # some of the strings this method cannot handle (and must be parsed with other instance methods)
694
+ multiple_years.keys
695
+ .push(*multiple_years_4_digits_once.keys)
696
+ .push(*decade_only_4_digits.keys)
697
+ .push(*century_only.keys)
698
+ .push(*invalid_but_can_get_year.keys).each do |example|
699
+ it "nil for #{example}" do
700
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
701
+ end
702
+ end
703
+
704
+ # data works via #sortable_year_for_yyyy (and don't all work here):
705
+ # single_year
706
+ # specific_month
707
+ # specific_day_ruby_parse_fail
708
+
709
+ # data fails *sortable_year_for_yyyy AND for *year_via_ruby_parsing:
710
+ # multiple_years
711
+ # century_only
712
+
713
+ # data fails *sortable_year_for_yyyy
714
+ # and partially works for *year_via_ruby_parsing:
715
+ skip 'parsed incorrectly' do
716
+ # assigns incorrect values to 13 out of 92 (rest with no val assigned)
717
+ unparseable.each do |example|
718
+ it "nil for unparseable: #{example}" do
719
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
720
+ end
721
+ end
722
+
723
+ # assigns incorrect values to 2 out of 2
724
+ brackets_in_middle_of_year.keys.each do |example|
725
+ it "nil for brackets_in_middle_of_year: #{example}" do
726
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
727
+ end
728
+ end
729
+
730
+ # assigns incorrect values to 3 out of 8 (5 with no val assigned)
731
+ specific_day_2_digit_year.keys.each do |example|
732
+ it "nil for specific_day_2_digit_year: #{example}" do
733
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
734
+ end
735
+ end
736
+
737
+ # assigns incorrect values to 8 out of 8
738
+ decade_only.keys.each do |example|
739
+ it "nil for decade_only: #{example}" do
740
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
741
+ end
742
+ end
743
+ end
744
+ end
745
+
746
+ end