stanford-mods 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,208 @@
1
+ # encoding: UTF-8
2
+ require 'logger'
3
+ require 'mods'
4
+
5
+ # SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
6
+ module Stanford
7
+ module Mods
8
+ class Record < ::Mods::Record
9
+
10
+ # Values are the contents of:
11
+ # subject/geographic
12
+ # subject/hierarchicalGeographic
13
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
14
+ # @param [String] sep - the separator string for joining hierarchicalGeographic sub elements
15
+ # @return [Array<String>] values for geographic_search Solr field for this document or [] if none
16
+ def sw_geographic_search(sep = ' ')
17
+ result = term_values([:subject, :geographic]) || []
18
+
19
+ # hierarchicalGeographic has sub elements
20
+ @mods_ng_xml.subject.hierarchicalGeographic.each { |hg_node|
21
+ hg_vals = []
22
+ hg_node.element_children.each { |e|
23
+ hg_vals << e.text unless e.text.empty?
24
+ }
25
+ result << hg_vals.join(sep) unless hg_vals.empty?
26
+ }
27
+
28
+ trans_code_vals = @mods_ng_xml.subject.geographicCode.translated_value
29
+ if trans_code_vals
30
+ trans_code_vals.each { |val|
31
+ result << val if !result.include?(val)
32
+ }
33
+ end
34
+
35
+ result
36
+ end
37
+
38
+ # Values are the contents of:
39
+ # subject/name/namePart
40
+ # "Values from namePart subelements should be concatenated in the order they appear (e.g. "Shakespeare, William, 1564-1616")"
41
+ # @param [String] sep - the separator string for joining namePart sub elements
42
+ # @return [Array<String>] values for names inside subject elements or [] if none
43
+ def sw_subject_names(sep = ', ')
44
+ result = []
45
+ @mods_ng_xml.subject.name_el.select { |n_el| n_el.namePart }.each { |name_el_w_np|
46
+ parts = name_el_w_np.namePart.map { |npn| npn.text unless npn.text.empty? }.compact
47
+ result << parts.join(sep).strip unless parts.empty?
48
+ }
49
+ result
50
+ end
51
+
52
+ # Values are the contents of:
53
+ # subject/titleInfo/(subelements)
54
+ # @param [String] sep - the separator string for joining titleInfo sub elements
55
+ # @return [Array<String>] values for titles inside subject elements or [] if none
56
+ def sw_subject_titles(sep = ' ')
57
+ result = []
58
+ @mods_ng_xml.subject.titleInfo.each { |ti_el|
59
+ parts = ti_el.element_children.map { |el| el.text unless el.text.empty? }.compact
60
+ result << parts.join(sep).strip unless parts.empty?
61
+ }
62
+ result
63
+ end
64
+
65
+ # Values are the contents of:
66
+ # mods/genre
67
+ # mods/subject/topic
68
+ # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
69
+ def topic_search
70
+ @topic_search ||= begin
71
+ vals = self.term_values(:genre) || []
72
+ vals.concat(subject_topics) if subject_topics
73
+ vals.empty? ? nil : vals
74
+ end
75
+ end
76
+
77
+ # Values are the contents of:
78
+ # subject/topic
79
+ # subject/name
80
+ # subject/title
81
+ # subject/occupation
82
+ # with trailing comma, semicolon, and backslash (and any preceding spaces) removed
83
+ # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
84
+ def topic_facet
85
+ vals = subject_topics ? Array.new(subject_topics) : []
86
+ vals.concat(subject_names) if subject_names
87
+ vals.concat(subject_titles) if subject_titles
88
+ vals.concat(subject_occupations) if subject_occupations
89
+ vals.map! { |val|
90
+ v = val.sub(/[\\,;]$/, '')
91
+ v.strip
92
+ }
93
+ vals.empty? ? nil : vals
94
+ end
95
+
96
+ # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
97
+ # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
98
+ def geographic_facet
99
+ geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
100
+ end
101
+
102
+ # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
103
+ # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
104
+ def era_facet
105
+ subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
106
+ end
107
+
108
+ # Values are the contents of:
109
+ # subject/geographic
110
+ # subject/hierarchicalGeographic
111
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
112
+ # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
113
+ def geographic_search
114
+ @geographic_search ||= begin
115
+ result = self.sw_geographic_search
116
+
117
+ # TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
118
+ # print a message for any unrecognized encodings
119
+ xvals = self.subject.geographicCode.translated_value
120
+ codes = self.term_values([:subject, :geographicCode])
121
+ if codes && codes.size > xvals.size
122
+ self.subject.geographicCode.each { |n|
123
+ if n.authority != 'marcgac' && n.authority != 'marccountry'
124
+ sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
125
+ end
126
+ }
127
+ end
128
+
129
+ # FIXME: stanford-mods should be returning [], not nil ...
130
+ return nil if !result || result.empty?
131
+ result
132
+ end
133
+ end
134
+
135
+ # Values are the contents of:
136
+ # subject/name
137
+ # subject/occupation - no subelements
138
+ # subject/titleInfo
139
+ # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
140
+ def subject_other_search
141
+ @subject_other_search ||= begin
142
+ vals = subject_occupations ? Array.new(subject_occupations) : []
143
+ vals.concat(subject_names) if subject_names
144
+ vals.concat(subject_titles) if subject_titles
145
+ vals.empty? ? nil : vals
146
+ end
147
+ end
148
+
149
+ # Values are the contents of:
150
+ # subject/temporal
151
+ # subject/genre
152
+ # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
153
+ def subject_other_subvy_search
154
+ @subject_other_subvy_search ||= begin
155
+ vals = subject_temporal ? Array.new(subject_temporal) : []
156
+ gvals = self.term_values([:subject, :genre])
157
+ vals.concat(gvals) if gvals
158
+
159
+ # print a message for any temporal encodings
160
+ self.subject.temporal.each { |n|
161
+ sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
162
+ }
163
+
164
+ vals.empty? ? nil : vals
165
+ end
166
+ end
167
+
168
+ # Values are the contents of:
169
+ # all subject subelements except subject/cartographic plus genre top level element
170
+ # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
171
+ def subject_all_search
172
+ vals = topic_search ? Array.new(topic_search) : []
173
+ vals.concat(geographic_search) if geographic_search
174
+ vals.concat(subject_other_search) if subject_other_search
175
+ vals.concat(subject_other_subvy_search) if subject_other_subvy_search
176
+ vals.empty? ? nil : vals
177
+ end
178
+
179
+ protected #----------------------------------------------------------
180
+
181
+ # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
182
+ def subject_names
183
+ @subject_names ||= self.sw_subject_names
184
+ end
185
+
186
+ # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
187
+ def subject_occupations
188
+ @subject_occupations ||= self.term_values([:subject, :occupation])
189
+ end
190
+
191
+ # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
192
+ def subject_temporal
193
+ @subject_temporal ||= self.term_values([:subject, :temporal])
194
+ end
195
+
196
+ # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
197
+ def subject_titles
198
+ @subject_titles ||= self.sw_subject_titles
199
+ end
200
+
201
+ # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
202
+ def subject_topics
203
+ @subject_topics ||= self.term_values([:subject, :topic])
204
+ end
205
+
206
+ end
207
+ end
208
+ end
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = "1.3.3"
4
+ VERSION = "1.3.4"
5
5
  end
6
6
  end
@@ -0,0 +1,746 @@
1
+ # encoding: utf-8
2
+ describe "date parsing methods" do
3
+
4
+ unparseable = [ # here to remind us of what they might look like in our data
5
+ nil,
6
+ '',
7
+ '[]',
8
+ '?',
9
+ 'uuuu',
10
+ 'Aug',
11
+ 'publiée le 26 germinal an VI',
12
+ "l'an IVe",
13
+ 'Feb',
14
+ "L'AN 2 DE LA // LIBERTÉ",
15
+ 'Paris',
16
+ "publié en frimaire l'an 3.e de la République française",
17
+ 'an 6',
18
+ 'an sept',
19
+ 's.n.]',
20
+ 'M. D. LXI',
21
+ '[An 4]',
22
+ '[s.d.]',
23
+ 'Undated'
24
+ ]
25
+ # example string as key, expected parsed value as value
26
+ invalid_but_can_get_year = {
27
+ '1966-14-14' => '1966', # 14 isn't a valid month ...
28
+ '1966\4\11' => '1966', # slashes wrong way
29
+ '2/31/1950' => '1950', # no 31 of Feb
30
+ '1869-00-00' => '1869',
31
+ '1862-01-00' => '1862',
32
+ '1985-05-00' => '1985'
33
+ }
34
+ # example string as key, expected parsed value as value
35
+ single_year = {
36
+ '0700' => '0700',
37
+ '0999' => '0999',
38
+ '1000' => '1000',
39
+ '1798' => '1798',
40
+ '1583.' => '1583',
41
+ '1885-' => '1885',
42
+ '1644.]' => '1644',
43
+ '1644]' => '1644',
44
+ '1584].' => '1584',
45
+ '1729?]' => '1729',
46
+ '1500 CE' => '1500',
47
+ '1877?' => '1877',
48
+ '1797 goda' => '1797',
49
+ "1616: Con licenza de'svperiori" => '1616',
50
+
51
+ '[1789]' => '1789',
52
+ '[1968?-' => '1968',
53
+ '[1860?]' => '1860',
54
+ '[1789 ?]' => '1789',
55
+ '[[1790]]' => '1790',
56
+ '[1579].' => '1579',
57
+ '[Ca 1790]' => '1790',
58
+ '[c1926]' => '1926',
59
+ '[ca 1790]' => '1790',
60
+ '[ca. 1790]' => '1790',
61
+ '[ca. 1850?]' => '1850',
62
+ '[ca.1600]' => '1600',
63
+ '[after 1726]' => '1726',
64
+ '[an II, i.e. 1794]' => '1794',
65
+ '[approximately 1600]' => '1600',
66
+ '[approximately 1558].' => '1558',
67
+ '[approximately 1717?]' => '1717',
68
+ '[not after 1652]' => '1652',
69
+ '[not before 1543].' => '1543',
70
+
71
+ "A' 1640" => '1640',
72
+ 'A1566' => '1566',
73
+ 'Ans. 1656' => '1656',
74
+ 'Antonio Laffreri 1570' => '1570',
75
+ 'An 6. 1798' => '1798',
76
+ 'An 6 1798' => '1798',
77
+ 'a. 1652' => '1652',
78
+ 'ad decennium 1592' => '1592',
79
+ 'after 1622' => '1622',
80
+ 'an 10 (1802)' => '1802',
81
+ 'an 14, 1805' => '1805',
82
+ 'anno 1801' => '1801',
83
+ 'anno 1603.' => '1603',
84
+ 'approximately 1580.' => '1580',
85
+ 'approximately 1700?' => '1700',
86
+ 'approximately 1544]' => '1544',
87
+ 'anno 1599 (v. 1).' => '1599',
88
+ 'anno MDCXXXV [1635].' => '1635',
89
+ 'anno dom. 1600 (v. 3).' => '1600',
90
+ 'anno j65i [1651]' => '1651',
91
+ 'Ca. 1580 CE' => '1580',
92
+ 'c1887' => '1887',
93
+ 'ca 1796]' => '1796',
94
+ 'ca. 1558' => '1558',
95
+ 'ca. 1560?]' => '1560',
96
+ 'ca. 1700]' => '1700',
97
+ 'circa 1860' => '1860',
98
+ 'copyright 1855' => '1855',
99
+ 'en 1788' => '1788',
100
+ 'im jahr 1681' => '1681',
101
+ "l'an 1.er de la Rep. 1792" => '1792',
102
+ "l'anno1570" => '1570',
103
+ 'MDLXXXVIII [1588]]' => '1588',
104
+ 'MDLXI [1561]' => '1561',
105
+ 'MDCCLII. [1752-' => '1752',
106
+ 'No. 15 1792' => '1792',
107
+ 's.a. [1712]' => '1712',
108
+ 'publié le 24 floréal [1796]' => '1796',
109
+ "Fructidor l'an 3.e [i.e. 1795]" => '1795',
110
+ }
111
+ # example string as key, expected parsed value as value
112
+ specific_month = {
113
+ '1975-05' => '1975', # vs 1918-27
114
+ '1996 Jun' => '1996',
115
+ 'February 1798' => '1798',
116
+ 'March, 1794' => '1794',
117
+ '[ ?] 10 1793' => '1793',
118
+ 'agosto 1799' => '1799',
119
+ 'Jan.y. thes.et 1798' => '1798',
120
+ '[[décembre 1783]]' => '1783',
121
+ 'im Mai 1793' => '1793',
122
+ 'in Febr. 1795' => '1795',
123
+ "juin année 1797" => '1797'
124
+ }
125
+ # example string as key, expected parsed value as value
126
+ specific_day = {
127
+ '1/1/1961' => '1961',
128
+ '10/1/1987' => '1987',
129
+ '5-1-1959' => '1959',
130
+
131
+ # year first
132
+ '1888-02-18' => '1888',
133
+ '1966-2-5' => '1966',
134
+
135
+ # text; starts with day
136
+ '1 July 1799' => '1799',
137
+ '1 Feb. 1782' => '1782',
138
+ '15 Jan.y 1797' => '1797',
139
+ '12.th May 1794' => '1794',
140
+ '12th May 1794' => '1794',
141
+ '12th Dec.r 1794' => '1794',
142
+ '14th Feb.y 1794' => '1794',
143
+ '18 Febr. 1790' => '1790',
144
+ '23 Nov.r 1797' => '1797',
145
+
146
+ # text; starts with year
147
+ '1793 March 1st' => '1793',
148
+ '1892, Jan. 1' => '1892',
149
+ '1991 May 14' => '1991',
150
+ '1997 Sep 6' => '1997',
151
+
152
+ # text starts with words
153
+ 'Boston, November 25, 1851' => '1851',
154
+ 'd. 16 Feb. 1793' => '1793',
155
+ 'published the 30 of June 1799' => '1799',
156
+ 'Published the 1 of June 1799' => '1799',
157
+ 'Pub.d Nov.r 1st 1798' => '1798',
158
+ 'Published July 5th, 1784' => '1784',
159
+
160
+ # text starts with month
161
+ 'April 01 1797' => '1797',
162
+ 'April 1 1796' => '1796',
163
+ 'April 1. 1796' => '1796',
164
+ 'April 16, 1632' => '1632',
165
+ 'April 11th 1792' => '1792',
166
+ '[April 1 1795]' => '1795',
167
+
168
+ 'Aug. 1st 1797' => '1797',
169
+ 'Aug 30th 1794' => '1794',
170
+ 'Aug. 16 1790' => '1790',
171
+ 'Aug. 20, 1883' => '1883',
172
+ 'Aug. 3rd, 1886' => '1886',
173
+ 'Aug.st 4 1795' => '1795',
174
+ 'Aug.t 16 1794' => '1794',
175
+ 'Augt. 29, 1804' => '1804',
176
+ 'August 1 1794' => '1794',
177
+
178
+ 'Dec. 1 1792' => '1792',
179
+ 'Dec.r 1 1792' => '1792',
180
+ 'Dec.r 8th 1798' => '1798',
181
+ 'Decb.r 1, 1789' => '1789',
182
+ 'December 16 1795' => '1795',
183
+
184
+ 'Feb 12 1800' => '1800',
185
+ 'Feb. 10 1798' => '1798',
186
+ 'Feb. 25, 1744]' => '1744',
187
+ 'Feb.ry 12 1793' => '1793',
188
+ 'Feb.ry 7th 1796' => '1796',
189
+ 'Feb.y 1 1794' => '1794',
190
+ 'Feb.y 13th 1798' => '1798',
191
+ 'Feb.y 23rd 1799' => '1799',
192
+ '[Feb.y 18 1793]' => '1793',
193
+
194
+ 'Jan. 1 1789' => '1789',
195
+ 'Jan. 1. 1795' => '1795',
196
+ 'Jan.y 15. 1795' => '1795',
197
+ 'Jan.y 12st 1793' => '1793',
198
+ 'Jan.y 18th 1790' => '1790',
199
+
200
+ 'July 1 1796' => '1796',
201
+ 'July 1. 1793' => '1793',
202
+ 'July 13, 1787' => '1787',
203
+ 'July 15th 1797' => '1797',
204
+
205
+ 'June 1 1793' => '1793',
206
+ 'June 1. 1800' => '1800',
207
+ 'June1st.1805' => '1805',
208
+ 'June 22, 1804' => '1804',
209
+ 'July 23d 1792' => '1792',
210
+ 'June 30th 1799' => '1799',
211
+ '[June 2 1793]' => '1793',
212
+
213
+ 'May 9, 1795' => '1795',
214
+ 'May 12 1792' => '1792',
215
+ 'May 21st 1798' => '1798',
216
+ 'May 15th 1798' => '1798',
217
+
218
+ 'Mar. 1. 1792' => '1792',
219
+ 'March 1 1795' => '1795',
220
+ 'March 1.t 1797' => '1797',
221
+ 'March 1, 1793' => '1793',
222
+ 'March 1st 1797' => '1797',
223
+ 'March 6th 1798' => '1798',
224
+ '[March 16 1798]' => '1798',
225
+
226
+ 'Nov. 1. 1796' => '1796',
227
+ 'Nov. 14th 1792' => '1792',
228
+ 'Nov. 20 1789' => '1789',
229
+ 'Nov.r 9, 1793' => '1793',
230
+ 'Novem. 13th 1797' => '1797',
231
+ 'Novembr 22nd 1794' => '1794',
232
+
233
+ 'Oct 12 1792' => '1792',
234
+ 'Oct 18th 1794' => '1794',
235
+ 'Oct. 29 1796' => '1796',
236
+ 'Oct. 11th 1794' => '1794',
237
+ 'Oct.er 1st 1786' => '1786',
238
+ 'Oct.r 25 1796' => '1796',
239
+ 'Oct.r 25th 1794' => '1794',
240
+ 'Octo.r 15 1795' => '1795',
241
+
242
+ 'Sep.r 1, 1795' => '1795',
243
+ 'Sep.tr 15.th 1796' => '1796',
244
+ 'Sept.r 5th 1793' => '1793'
245
+ }
246
+ specific_day_ruby_parse_fail = {
247
+ # note ruby Date.parse only handles american or euro date order, not both ??
248
+ '1/30/1979' => '1979',
249
+ '10/20/1976' => '1976',
250
+ '5-18-2014' => '2014',
251
+ # year first
252
+ '1980-23-02' => '1980',
253
+ '1792 20 Dec' => '1792',
254
+ # text
255
+ 'le 22 juin 1794' => '1794',
256
+ 'mis au jour le 26 juillet 1791' => '1791',
257
+ 'April 12 sd 1794' => '1794',
258
+ 'Dec. 10 & 11, 1855' => '1855',
259
+ 'January 22th [1800]' => '1800',
260
+ 'June the 12, 1794' => '1794',
261
+ 'Mai 1st 1789' => '1789',
262
+ 'March 22 d. 1794' => '1794',
263
+ 'N. 7 1796' => '1796',
264
+ 'N[ovember] 21st 1786' => '1786',
265
+ 'Oct. the 2.d 1793' => '1793',
266
+ }
267
+ # example string as key, expected parsed value as value
268
+ specific_day_2_digit_year = {
269
+ '1/2/79' => '1979',
270
+ '2/12/15' => '2015',
271
+ '6/11/99' => '1999',
272
+ '10/1/90' => '1990',
273
+ '10/21/08' => '2008',
274
+ '5-1-59' => '1959',
275
+ '5-1-21' => '1921',
276
+ '5-1-14' => '2014'
277
+ }
278
+ # example string as key, expected parsed value as value
279
+ multiple_years = {
280
+ '1783-1788' => ['1783', '1784', '1785', '1786', '1787', '1788'],
281
+ '1862-1868]' => ['1862', '1863', '1864', '1865', '1866', '1867', '1868'],
282
+ '1640-1645?]' => ['1640', '1641', '1642', '1643', '1644', '1645'],
283
+ '1578, 1584]' => ['1578', '1584'],
284
+ '1860, [1862]' => ['1860', '1862'],
285
+ '1901, c1900' => ['1901', '1900'], # pub date is one without the c,
286
+ '1627 [i.e. 1646]' => ['1627', '1646'],
287
+ '1698/1715' => ['1698', '1715'],
288
+ '1965,1968' => ['1965', '1968'], # revs
289
+ '1965|1968' => ['1965', '1968'], # revs
290
+ '1789 ou 1790]' => ['1789', '1790'],
291
+ '1689 [i.e. 1688-89]' => ['1689', '1688'],
292
+ '1598 or 1599' => ['1598', '1599'],
293
+ '1890 [c1884]' => ['1890', '1884'], # pub date is one without the c
294
+ '1873,c1868' => ['1873', '1868'], # # pub date is one without the c
295
+ '1872-1877 [t.5, 1874]' => ['1872', '1873', '1874', '1875', '1876', '1877'],
296
+ '1809 [ca. 1810]' => ['1809', '1810'],
297
+ '1726 or 1738]' => ['1726', '1738'],
298
+
299
+ '[1789-1791]' => ['1789', '1790', '1791'],
300
+ '[1627-1628].' => ['1627', '1628'],
301
+ '[1789-1791' => ['1789', '1790', '1791'],
302
+ '[1793 ou 1794]' => ['1793', '1794'],
303
+ '[entre 1789 et 1791]' => ['1789', '1790', '1791'],
304
+ '[Entre 1789 et 1791]' => ['1789', '1790', '1791'],
305
+ '[entre 1789-1791]' => ['1789', '1790', '1791'],
306
+ '[entre 1789 et 1791 ?]' => ['1789', '1790', '1791'],
307
+ '[between 1882 and 1887]' => ['1882', '1883', '1884', '1885', '1886', '1887'],
308
+ '[ca 1789-1791]' => ['1789', '1790', '1791'],
309
+ '[ca 1790 et 1792]' => ['1790', '1791', '1792'],
310
+ '[ca. 1550-1552]' => ['1550', '1551', '1552'],
311
+
312
+ 'Anno 1789-1790' => ['1789', '1790'],
313
+ "L'an VII de la République [1798 or 1799]" => ['1798', '1799'],
314
+ 'MDCXIII [1613] (v. 1); MDLXXXIII [1583] (v. 2); and MDCVI [1606] (v. 3).' => ['1613', '1583', '1606'],
315
+ 'entre 1793 et 1795' => ['1793', '1794', '1795'],
316
+ 'entre 1793 et 1795]' => ['1793', '1794', '1795'],
317
+ 'approximately 1600-1602.' => ['1600', '1601', '1602'],
318
+ 'approximately 1650-1652]' => ['1650', '1651', '1652'],
319
+ 'approximately 1643-1644.]' => ['1643', '1644'],
320
+ 'ca. 1740-1745]' => ['1740', '1741', '1742', '1743', '1744', '1745'],
321
+ 'circa 1851-1852' => ['1851', '1852'],
322
+ 's.a. [ca. 1660, erschienen: 1782]' => ['1660', '1782'],
323
+ 'view of approximately 1848, published about 1865' => ['1848', '1865']
324
+ }
325
+ # example string as key, expected parsed value as value
326
+ multiple_years_4_digits_once = {
327
+ '1918-20' => ['1918', '1919', '1920'], # vs. 1961-04
328
+ '1965-8' => ['1965', '1966', '1967', '1968'], # revs
329
+ '[1846-51]' => ['1846', '1847', '1848', '1849', '1850', '1851']
330
+ }
331
+ # example string as key, expected parsed value as value
332
+ decade_only_4_digits = {
333
+ 'early 1890s' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
334
+ '1950s' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959'],
335
+ "1950's" => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
336
+ }
337
+ decade_only = {
338
+ '156u' => ['1560', '1561', '1562', '1563', '1564', '1565', '1566', '1567', '1568', '1569'],
339
+ '167-?]' => ['1670', '1671', '1672', '1673', '1674', '1675', '1676', '1677', '1678', '1679'],
340
+ '[171-?]' => ['1710', '1711', '1712', '1713', '1714', '1715', '1716', '1717', '1718', '1719'],
341
+ '[189-]' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
342
+ 'ca.170-?]' => ['1700', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709'],
343
+ '200-?]' => ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009'],
344
+ '186?' => ['1860', '1861', '1862', '1863', '1864', '1865', '1866', '1867', '1868', '1869'],
345
+ '195x' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
346
+ }
347
+ century_only = {
348
+ '18th century CE' => '18th century',
349
+ '17uu' => '18th century',
350
+ '17--?]' => '18th century',
351
+ '17--]' => '18th century',
352
+ '[17--]' => '18th century',
353
+ '[17--?]' => '18th century'
354
+ }
355
+ brackets_in_middle_of_year = {
356
+ '169[5]' => '1695',
357
+ 'October 3, [18]91' => '1891'
358
+ }
359
+ # we have data like this for our Roman coins collection
360
+ early_numeric_dates = {
361
+ # note that values must lexically sort to create a chronological sort. (-999 before -914)
362
+ '-999' => '-001',
363
+ '-914' => '-086',
364
+ '-18' => '-982',
365
+ '-1' => '-999',
366
+ '0' => '0000',
367
+ '5' => '0005',
368
+ '33' => '0033',
369
+ '945' => '0945'
370
+ }
371
+ bc_dates = {
372
+ # note that values must lexically sort to create a chronological sort (800 B.C. before 750 B.C.)
373
+ '801 B.C.' => '-199',
374
+ '800 B.C.' => '-200',
375
+ '750 B.C.' => '-250',
376
+ '700 B.C.' => '-300',
377
+ '699 B.C.' => '-301',
378
+ '75 B.C.' => '-925',
379
+ '8 B.C.' => '-992'
380
+ }
381
+
382
+ context '*facet_string_from_date_str' do
383
+ it 'calls instance method facet_string_from_date_str' do
384
+ expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:facet_string_from_date_str)
385
+ Stanford::Mods::DateParsing.facet_string_from_date_str('1666')
386
+ end
387
+ end
388
+ context '*sortable_year_string_from_date_str' do
389
+ it 'calls instance method sortable_year_string_from_date_str' do
390
+ expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:sortable_year_string_from_date_str)
391
+ Stanford::Mods::DateParsing.sortable_year_string_from_date_str('1666')
392
+ end
393
+ end
394
+
395
+ context '#facet_string_from_date_str' do
396
+ single_year
397
+ .merge(specific_month)
398
+ .merge(specific_day)
399
+ .merge(specific_day_2_digit_year)
400
+ .merge(specific_day_ruby_parse_fail)
401
+ .merge(century_only)
402
+ .merge(brackets_in_middle_of_year)
403
+ .merge(invalid_but_can_get_year).each do |example, expected|
404
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
405
+ it "#{expected} for single value #{example}" do
406
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
407
+ end
408
+ end
409
+
410
+ multiple_years
411
+ .merge(multiple_years_4_digits_once)
412
+ .merge(decade_only)
413
+ .merge(decade_only_4_digits).each do |example, expected|
414
+ it "#{expected.first} for multi-value #{example}" do
415
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected.first
416
+ end
417
+ end
418
+
419
+ early_numeric_dates.each do |example, expected|
420
+ if example.start_with?('-')
421
+ exp = example[1..-1] + " B.C."
422
+ it "#{exp} for #{example}" do
423
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq exp
424
+ end
425
+ else
426
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
427
+ it "#{expected} for #{example}" do
428
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
429
+ end
430
+ end
431
+ end
432
+
433
+ bc_dates.keys.each do |example|
434
+ it "#{example} for #{example}" do
435
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq example
436
+ end
437
+ end
438
+ it '1600 B.C. for 1600 B.C.' do
439
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_from_date_str).to eq '1600 B.C.'
440
+ end
441
+
442
+ [ # bad dates
443
+ '9999',
444
+ '2035',
445
+ '0000-00-00'
446
+ ].each do |example|
447
+ it "nil for #{example}" do
448
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq nil
449
+ end
450
+ end
451
+ end
452
+
453
+ context '#sortable_year_string_from_date_str' do
454
+ single_year
455
+ .merge(specific_month)
456
+ .merge(specific_day)
457
+ .merge(specific_day_2_digit_year)
458
+ .merge(specific_day_ruby_parse_fail)
459
+ .merge(early_numeric_dates)
460
+ .merge(bc_dates)
461
+ .merge(brackets_in_middle_of_year)
462
+ .merge(invalid_but_can_get_year).each do |example, expected|
463
+ it "#{expected} for single value #{example}" do
464
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected
465
+ end
466
+ end
467
+
468
+ multiple_years
469
+ .merge(multiple_years_4_digits_once)
470
+ .merge(decade_only)
471
+ .merge(decade_only_4_digits).each do |example, expected|
472
+ it "#{expected.first} for multi-value #{example}" do
473
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected.first
474
+ end
475
+ end
476
+
477
+ century_only.keys.each do |example|
478
+ it "1700 from #{example}" do
479
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq '1700'
480
+ end
481
+ end
482
+ it '0700 for 7--' do
483
+ expect(Stanford::Mods::DateParsing.new('7--').sortable_year_string_from_date_str).to eq '0700'
484
+ end
485
+
486
+ it 'nil for 1600 B.C.' do
487
+ skip "code broken for dddd B.C. but no existing data for this yet"
488
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').sortable_year_string_from_date_str).to eq nil
489
+ end
490
+
491
+ [ # bad dates
492
+ '9999',
493
+ '2035',
494
+ '0000-00-00'
495
+ ].each do |example|
496
+ it "nil for #{example}" do
497
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq nil
498
+ end
499
+ end
500
+ end
501
+
502
+ context '*year_str_valid?' do
503
+ { # example string as key, expected result as value
504
+ '-1000' => false,
505
+ '-999' => true,
506
+ '-35' => true,
507
+ '-3' => true,
508
+ '0000' => true,
509
+ '0' => true,
510
+ '5' => true,
511
+ '33' => true,
512
+ '150' => true,
513
+ (Date.today.year + 1).to_s => true, # current year + 1
514
+ (Date.today.year + 2).to_s => false, # current year + 2
515
+ '9999' => false,
516
+ '165x' => false,
517
+ '198-' => false,
518
+ 'random text' => false,
519
+ nil => false
520
+ }.each do |example, expected|
521
+ it "#{expected} for #{example}" do
522
+ expect(Stanford::Mods::DateParsing.year_str_valid?(example)).to eq expected
523
+ end
524
+ end
525
+ end
526
+
527
+ context '#sortable_year_for_yyyy' do
528
+ single_year
529
+ .merge(specific_month)
530
+ .merge(specific_day)
531
+ .merge(invalid_but_can_get_year)
532
+ .merge(specific_day_ruby_parse_fail).each do |example, expected|
533
+ it "#{expected} for #{example}" do
534
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected
535
+ end
536
+ end
537
+
538
+ multiple_years
539
+ .merge(multiple_years_4_digits_once)
540
+ .merge(decade_only_4_digits).each do |example, expected|
541
+ it "#{expected.first} for #{example}" do
542
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected.first
543
+ end
544
+ end
545
+
546
+ # indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
547
+ unparseable
548
+ .push(*brackets_in_middle_of_year.keys)
549
+ .push(*specific_day_2_digit_year.keys)
550
+ .push(*decade_only.keys)
551
+ .push(*century_only.keys).each do |example|
552
+ it "nil for #{example}" do
553
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq nil
554
+ end
555
+ end
556
+ end
557
+
558
+ context '#sortable_year_for_yy' do
559
+ specific_day_2_digit_year.each do |example, expected|
560
+ it "#{expected} for #{example}" do
561
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq expected
562
+ end
563
+ end
564
+ it '2000 for 12/25/00' do
565
+ expect(Stanford::Mods::DateParsing.new('12/25/00').sortable_year_for_yy).to eq '2000'
566
+ end
567
+
568
+ # indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
569
+ [
570
+ '92/1/31', # yy/mm/dd: doesn't work. :-(
571
+ '92-31-1', # yy-dd-mm: doesn't work. :-(
572
+ ].push(*decade_only.keys).each do |example|
573
+ it "nil for #{example}" do
574
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq nil
575
+ end
576
+ end
577
+ end
578
+
579
+ context '#sortable_year_for_decade' do
580
+ decade_only.each do |example, expected|
581
+ it "#{expected.first} for #{example}" do
582
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected.first
583
+ end
584
+ end
585
+ { # example string as key, expected result as value
586
+ '199u' => '1990',
587
+ '200-' => '2000',
588
+ '201?' => '2010',
589
+ '202x' => '2020'
590
+ }.each do |example, expected|
591
+ it "#{expected} for #{example}" do
592
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected
593
+ end
594
+ end
595
+
596
+ # some of the strings this method cannot handle (so must be parsed with other instance methods)
597
+ decade_only_4_digits.keys
598
+ .push(*specific_day_2_digit_year.keys).each do |example|
599
+ it "nil for #{example}" do
600
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq nil
601
+ end
602
+ end
603
+ end
604
+
605
+ context '#sortable_year_for_century' do
606
+ century_only.keys.each do |example|
607
+ it "1700 from #{example}" do
608
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_century).to eq '1700'
609
+ end
610
+ end
611
+ it '0700 for 7--' do
612
+ expect(Stanford::Mods::DateParsing.new('7--').sortable_year_for_century).to eq '0700'
613
+ end
614
+ it 'nil for 7th century B.C. (to be handled in different method)' do
615
+ expect(Stanford::Mods::DateParsing.new('7th century B.C.').sortable_year_for_century).to eq nil
616
+ end
617
+ end
618
+
619
+ context '#facet_string_for_century' do
620
+ century_only.each do |example, expected|
621
+ it "#{expected} for #{example}" do
622
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
623
+ end
624
+ end
625
+ { # example string as key, expected result as value
626
+ '16--' => '17th century',
627
+ '7--' => '8th century',
628
+ # check suffixes
629
+ '20--' => '21st century',
630
+ '1--' => '2nd century',
631
+ '2--' => '3rd century'
632
+ }.each do |example, expected|
633
+ it "#{expected} for #{example}" do
634
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
635
+ end
636
+ end
637
+
638
+ it 'nil for 7th century B.C. (to be handled in different method)' do
639
+ expect(Stanford::Mods::DateParsing.new('7th century B.C.').facet_string_for_century).to eq nil
640
+ end
641
+ end
642
+
643
+ context '#sortable_year_for_early_numeric' do
644
+ early_numeric_dates.each do |example, expected|
645
+ it "#{expected} for #{example}" do
646
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_early_numeric).to eq expected
647
+ end
648
+ end
649
+ end
650
+
651
+ context '#facet_string_for_early_numeric' do
652
+ early_numeric_dates.each do |example, expected|
653
+ expected = expected.to_i.to_s if expected.match(/^\d+$/)
654
+ if example.start_with?('-')
655
+ exp = example[1..-1] + " B.C."
656
+ it "#{exp} for #{example}" do
657
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq exp
658
+ end
659
+ else
660
+ it "#{expected} for #{example}" do
661
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq expected
662
+ end
663
+ end
664
+ end
665
+ end
666
+
667
+ context '#sortable_year_for_bc' do
668
+ bc_dates.each do |example, expected|
669
+ it "#{expected} for #{example}" do
670
+ expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_bc).to eq expected
671
+ end
672
+ end
673
+ end
674
+
675
+ context '#facet_string_for_bc' do
676
+ bc_dates.keys.each do |example|
677
+ it "#{example} for #{example}" do
678
+ expect(Stanford::Mods::DateParsing.new(example).facet_string_for_bc).to eq example
679
+ end
680
+ end
681
+ it '1600 B.C. for 1600 B.C.' do
682
+ expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_for_bc).to eq '1600 B.C.'
683
+ end
684
+ end
685
+
686
+ context '#year_via_ruby_parsing' do
687
+ specific_day.each do |example, expected|
688
+ it "#{expected} for #{example}" do
689
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq expected
690
+ end
691
+ end
692
+
693
+ # some of the strings this method cannot handle (and must be parsed with other instance methods)
694
+ multiple_years.keys
695
+ .push(*multiple_years_4_digits_once.keys)
696
+ .push(*decade_only_4_digits.keys)
697
+ .push(*century_only.keys)
698
+ .push(*invalid_but_can_get_year.keys).each do |example|
699
+ it "nil for #{example}" do
700
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
701
+ end
702
+ end
703
+
704
+ # data works via #sortable_year_for_yyyy (and don't all work here):
705
+ # single_year
706
+ # specific_month
707
+ # specific_day_ruby_parse_fail
708
+
709
+ # data fails *sortable_year_for_yyyy AND for *year_via_ruby_parsing:
710
+ # multiple_years
711
+ # century_only
712
+
713
+ # data fails *sortable_year_for_yyyy
714
+ # and partially works for *year_via_ruby_parsing:
715
+ skip 'parsed incorrectly' do
716
+ # assigns incorrect values to 13 out of 92 (rest with no val assigned)
717
+ unparseable.each do |example|
718
+ it "nil for unparseable: #{example}" do
719
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
720
+ end
721
+ end
722
+
723
+ # assigns incorrect values to 2 out of 2
724
+ brackets_in_middle_of_year.keys.each do |example|
725
+ it "nil for brackets_in_middle_of_year: #{example}" do
726
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
727
+ end
728
+ end
729
+
730
+ # assigns incorrect values to 3 out of 8 (5 with no val assigned)
731
+ specific_day_2_digit_year.keys.each do |example|
732
+ it "nil for specific_day_2_digit_year: #{example}" do
733
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
734
+ end
735
+ end
736
+
737
+ # assigns incorrect values to 8 out of 8
738
+ decade_only.keys.each do |example|
739
+ it "nil for decade_only: #{example}" do
740
+ expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
741
+ end
742
+ end
743
+ end
744
+ end
745
+
746
+ end