stanford-mods 1.3.3 → 1.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +1 -0
- data/.rubocop.yml +4 -0
- data/Gemfile +1 -0
- data/lib/stanford-mods.rb +5 -5
- data/lib/stanford-mods/date_parsing.rb +245 -0
- data/lib/stanford-mods/origin_info.rb +411 -0
- data/lib/stanford-mods/searchworks.rb +23 -474
- data/lib/stanford-mods/searchworks_subjects.rb +208 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +746 -0
- data/spec/fixtures/spotlight_pub_date_data.rb +316 -0
- data/spec/origin_info_spec.rb +449 -0
- data/spec/searchworks_pub_dates_spec.rb +166 -163
- data/spec/spec_helper.rb +16 -5
- data/stanford-mods.gemspec +2 -0
- metadata +25 -2
@@ -0,0 +1,208 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'logger'
|
3
|
+
require 'mods'
|
4
|
+
|
5
|
+
# SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
|
6
|
+
module Stanford
|
7
|
+
module Mods
|
8
|
+
class Record < ::Mods::Record
|
9
|
+
|
10
|
+
# Values are the contents of:
|
11
|
+
# subject/geographic
|
12
|
+
# subject/hierarchicalGeographic
|
13
|
+
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
14
|
+
# @param [String] sep - the separator string for joining hierarchicalGeographic sub elements
|
15
|
+
# @return [Array<String>] values for geographic_search Solr field for this document or [] if none
|
16
|
+
def sw_geographic_search(sep = ' ')
|
17
|
+
result = term_values([:subject, :geographic]) || []
|
18
|
+
|
19
|
+
# hierarchicalGeographic has sub elements
|
20
|
+
@mods_ng_xml.subject.hierarchicalGeographic.each { |hg_node|
|
21
|
+
hg_vals = []
|
22
|
+
hg_node.element_children.each { |e|
|
23
|
+
hg_vals << e.text unless e.text.empty?
|
24
|
+
}
|
25
|
+
result << hg_vals.join(sep) unless hg_vals.empty?
|
26
|
+
}
|
27
|
+
|
28
|
+
trans_code_vals = @mods_ng_xml.subject.geographicCode.translated_value
|
29
|
+
if trans_code_vals
|
30
|
+
trans_code_vals.each { |val|
|
31
|
+
result << val if !result.include?(val)
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
result
|
36
|
+
end
|
37
|
+
|
38
|
+
# Values are the contents of:
|
39
|
+
# subject/name/namePart
|
40
|
+
# "Values from namePart subelements should be concatenated in the order they appear (e.g. "Shakespeare, William, 1564-1616")"
|
41
|
+
# @param [String] sep - the separator string for joining namePart sub elements
|
42
|
+
# @return [Array<String>] values for names inside subject elements or [] if none
|
43
|
+
def sw_subject_names(sep = ', ')
|
44
|
+
result = []
|
45
|
+
@mods_ng_xml.subject.name_el.select { |n_el| n_el.namePart }.each { |name_el_w_np|
|
46
|
+
parts = name_el_w_np.namePart.map { |npn| npn.text unless npn.text.empty? }.compact
|
47
|
+
result << parts.join(sep).strip unless parts.empty?
|
48
|
+
}
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
# Values are the contents of:
|
53
|
+
# subject/titleInfo/(subelements)
|
54
|
+
# @param [String] sep - the separator string for joining titleInfo sub elements
|
55
|
+
# @return [Array<String>] values for titles inside subject elements or [] if none
|
56
|
+
def sw_subject_titles(sep = ' ')
|
57
|
+
result = []
|
58
|
+
@mods_ng_xml.subject.titleInfo.each { |ti_el|
|
59
|
+
parts = ti_el.element_children.map { |el| el.text unless el.text.empty? }.compact
|
60
|
+
result << parts.join(sep).strip unless parts.empty?
|
61
|
+
}
|
62
|
+
result
|
63
|
+
end
|
64
|
+
|
65
|
+
# Values are the contents of:
|
66
|
+
# mods/genre
|
67
|
+
# mods/subject/topic
|
68
|
+
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
69
|
+
def topic_search
|
70
|
+
@topic_search ||= begin
|
71
|
+
vals = self.term_values(:genre) || []
|
72
|
+
vals.concat(subject_topics) if subject_topics
|
73
|
+
vals.empty? ? nil : vals
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Values are the contents of:
|
78
|
+
# subject/topic
|
79
|
+
# subject/name
|
80
|
+
# subject/title
|
81
|
+
# subject/occupation
|
82
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
83
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
84
|
+
def topic_facet
|
85
|
+
vals = subject_topics ? Array.new(subject_topics) : []
|
86
|
+
vals.concat(subject_names) if subject_names
|
87
|
+
vals.concat(subject_titles) if subject_titles
|
88
|
+
vals.concat(subject_occupations) if subject_occupations
|
89
|
+
vals.map! { |val|
|
90
|
+
v = val.sub(/[\\,;]$/, '')
|
91
|
+
v.strip
|
92
|
+
}
|
93
|
+
vals.empty? ? nil : vals
|
94
|
+
end
|
95
|
+
|
96
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
97
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
98
|
+
def geographic_facet
|
99
|
+
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
100
|
+
end
|
101
|
+
|
102
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
103
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
104
|
+
def era_facet
|
105
|
+
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
106
|
+
end
|
107
|
+
|
108
|
+
# Values are the contents of:
|
109
|
+
# subject/geographic
|
110
|
+
# subject/hierarchicalGeographic
|
111
|
+
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
112
|
+
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
113
|
+
def geographic_search
|
114
|
+
@geographic_search ||= begin
|
115
|
+
result = self.sw_geographic_search
|
116
|
+
|
117
|
+
# TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
|
118
|
+
# print a message for any unrecognized encodings
|
119
|
+
xvals = self.subject.geographicCode.translated_value
|
120
|
+
codes = self.term_values([:subject, :geographicCode])
|
121
|
+
if codes && codes.size > xvals.size
|
122
|
+
self.subject.geographicCode.each { |n|
|
123
|
+
if n.authority != 'marcgac' && n.authority != 'marccountry'
|
124
|
+
sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
|
125
|
+
end
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
# FIXME: stanford-mods should be returning [], not nil ...
|
130
|
+
return nil if !result || result.empty?
|
131
|
+
result
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Values are the contents of:
|
136
|
+
# subject/name
|
137
|
+
# subject/occupation - no subelements
|
138
|
+
# subject/titleInfo
|
139
|
+
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
140
|
+
def subject_other_search
|
141
|
+
@subject_other_search ||= begin
|
142
|
+
vals = subject_occupations ? Array.new(subject_occupations) : []
|
143
|
+
vals.concat(subject_names) if subject_names
|
144
|
+
vals.concat(subject_titles) if subject_titles
|
145
|
+
vals.empty? ? nil : vals
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Values are the contents of:
|
150
|
+
# subject/temporal
|
151
|
+
# subject/genre
|
152
|
+
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
153
|
+
def subject_other_subvy_search
|
154
|
+
@subject_other_subvy_search ||= begin
|
155
|
+
vals = subject_temporal ? Array.new(subject_temporal) : []
|
156
|
+
gvals = self.term_values([:subject, :genre])
|
157
|
+
vals.concat(gvals) if gvals
|
158
|
+
|
159
|
+
# print a message for any temporal encodings
|
160
|
+
self.subject.temporal.each { |n|
|
161
|
+
sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
|
162
|
+
}
|
163
|
+
|
164
|
+
vals.empty? ? nil : vals
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Values are the contents of:
|
169
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
170
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
171
|
+
def subject_all_search
|
172
|
+
vals = topic_search ? Array.new(topic_search) : []
|
173
|
+
vals.concat(geographic_search) if geographic_search
|
174
|
+
vals.concat(subject_other_search) if subject_other_search
|
175
|
+
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
176
|
+
vals.empty? ? nil : vals
|
177
|
+
end
|
178
|
+
|
179
|
+
protected #----------------------------------------------------------
|
180
|
+
|
181
|
+
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
182
|
+
def subject_names
|
183
|
+
@subject_names ||= self.sw_subject_names
|
184
|
+
end
|
185
|
+
|
186
|
+
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
187
|
+
def subject_occupations
|
188
|
+
@subject_occupations ||= self.term_values([:subject, :occupation])
|
189
|
+
end
|
190
|
+
|
191
|
+
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
192
|
+
def subject_temporal
|
193
|
+
@subject_temporal ||= self.term_values([:subject, :temporal])
|
194
|
+
end
|
195
|
+
|
196
|
+
# convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
|
197
|
+
def subject_titles
|
198
|
+
@subject_titles ||= self.sw_subject_titles
|
199
|
+
end
|
200
|
+
|
201
|
+
# convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
|
202
|
+
def subject_topics
|
203
|
+
@subject_topics ||= self.term_values([:subject, :topic])
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
@@ -0,0 +1,746 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
describe "date parsing methods" do
|
3
|
+
|
4
|
+
unparseable = [ # here to remind us of what they might look like in our data
|
5
|
+
nil,
|
6
|
+
'',
|
7
|
+
'[]',
|
8
|
+
'?',
|
9
|
+
'uuuu',
|
10
|
+
'Aug',
|
11
|
+
'publiée le 26 germinal an VI',
|
12
|
+
"l'an IVe",
|
13
|
+
'Feb',
|
14
|
+
"L'AN 2 DE LA // LIBERTÉ",
|
15
|
+
'Paris',
|
16
|
+
"publié en frimaire l'an 3.e de la République française",
|
17
|
+
'an 6',
|
18
|
+
'an sept',
|
19
|
+
's.n.]',
|
20
|
+
'M. D. LXI',
|
21
|
+
'[An 4]',
|
22
|
+
'[s.d.]',
|
23
|
+
'Undated'
|
24
|
+
]
|
25
|
+
# example string as key, expected parsed value as value
|
26
|
+
invalid_but_can_get_year = {
|
27
|
+
'1966-14-14' => '1966', # 14 isn't a valid month ...
|
28
|
+
'1966\4\11' => '1966', # slashes wrong way
|
29
|
+
'2/31/1950' => '1950', # no 31 of Feb
|
30
|
+
'1869-00-00' => '1869',
|
31
|
+
'1862-01-00' => '1862',
|
32
|
+
'1985-05-00' => '1985'
|
33
|
+
}
|
34
|
+
# example string as key, expected parsed value as value
|
35
|
+
single_year = {
|
36
|
+
'0700' => '0700',
|
37
|
+
'0999' => '0999',
|
38
|
+
'1000' => '1000',
|
39
|
+
'1798' => '1798',
|
40
|
+
'1583.' => '1583',
|
41
|
+
'1885-' => '1885',
|
42
|
+
'1644.]' => '1644',
|
43
|
+
'1644]' => '1644',
|
44
|
+
'1584].' => '1584',
|
45
|
+
'1729?]' => '1729',
|
46
|
+
'1500 CE' => '1500',
|
47
|
+
'1877?' => '1877',
|
48
|
+
'1797 goda' => '1797',
|
49
|
+
"1616: Con licenza de'svperiori" => '1616',
|
50
|
+
|
51
|
+
'[1789]' => '1789',
|
52
|
+
'[1968?-' => '1968',
|
53
|
+
'[1860?]' => '1860',
|
54
|
+
'[1789 ?]' => '1789',
|
55
|
+
'[[1790]]' => '1790',
|
56
|
+
'[1579].' => '1579',
|
57
|
+
'[Ca 1790]' => '1790',
|
58
|
+
'[c1926]' => '1926',
|
59
|
+
'[ca 1790]' => '1790',
|
60
|
+
'[ca. 1790]' => '1790',
|
61
|
+
'[ca. 1850?]' => '1850',
|
62
|
+
'[ca.1600]' => '1600',
|
63
|
+
'[after 1726]' => '1726',
|
64
|
+
'[an II, i.e. 1794]' => '1794',
|
65
|
+
'[approximately 1600]' => '1600',
|
66
|
+
'[approximately 1558].' => '1558',
|
67
|
+
'[approximately 1717?]' => '1717',
|
68
|
+
'[not after 1652]' => '1652',
|
69
|
+
'[not before 1543].' => '1543',
|
70
|
+
|
71
|
+
"A' 1640" => '1640',
|
72
|
+
'A1566' => '1566',
|
73
|
+
'Ans. 1656' => '1656',
|
74
|
+
'Antonio Laffreri 1570' => '1570',
|
75
|
+
'An 6. 1798' => '1798',
|
76
|
+
'An 6 1798' => '1798',
|
77
|
+
'a. 1652' => '1652',
|
78
|
+
'ad decennium 1592' => '1592',
|
79
|
+
'after 1622' => '1622',
|
80
|
+
'an 10 (1802)' => '1802',
|
81
|
+
'an 14, 1805' => '1805',
|
82
|
+
'anno 1801' => '1801',
|
83
|
+
'anno 1603.' => '1603',
|
84
|
+
'approximately 1580.' => '1580',
|
85
|
+
'approximately 1700?' => '1700',
|
86
|
+
'approximately 1544]' => '1544',
|
87
|
+
'anno 1599 (v. 1).' => '1599',
|
88
|
+
'anno MDCXXXV [1635].' => '1635',
|
89
|
+
'anno dom. 1600 (v. 3).' => '1600',
|
90
|
+
'anno j65i [1651]' => '1651',
|
91
|
+
'Ca. 1580 CE' => '1580',
|
92
|
+
'c1887' => '1887',
|
93
|
+
'ca 1796]' => '1796',
|
94
|
+
'ca. 1558' => '1558',
|
95
|
+
'ca. 1560?]' => '1560',
|
96
|
+
'ca. 1700]' => '1700',
|
97
|
+
'circa 1860' => '1860',
|
98
|
+
'copyright 1855' => '1855',
|
99
|
+
'en 1788' => '1788',
|
100
|
+
'im jahr 1681' => '1681',
|
101
|
+
"l'an 1.er de la Rep. 1792" => '1792',
|
102
|
+
"l'anno1570" => '1570',
|
103
|
+
'MDLXXXVIII [1588]]' => '1588',
|
104
|
+
'MDLXI [1561]' => '1561',
|
105
|
+
'MDCCLII. [1752-' => '1752',
|
106
|
+
'No. 15 1792' => '1792',
|
107
|
+
's.a. [1712]' => '1712',
|
108
|
+
'publié le 24 floréal [1796]' => '1796',
|
109
|
+
"Fructidor l'an 3.e [i.e. 1795]" => '1795',
|
110
|
+
}
|
111
|
+
# example string as key, expected parsed value as value
|
112
|
+
specific_month = {
|
113
|
+
'1975-05' => '1975', # vs 1918-27
|
114
|
+
'1996 Jun' => '1996',
|
115
|
+
'February 1798' => '1798',
|
116
|
+
'March, 1794' => '1794',
|
117
|
+
'[ ?] 10 1793' => '1793',
|
118
|
+
'agosto 1799' => '1799',
|
119
|
+
'Jan.y. thes.et 1798' => '1798',
|
120
|
+
'[[décembre 1783]]' => '1783',
|
121
|
+
'im Mai 1793' => '1793',
|
122
|
+
'in Febr. 1795' => '1795',
|
123
|
+
"juin année 1797" => '1797'
|
124
|
+
}
|
125
|
+
# example string as key, expected parsed value as value
|
126
|
+
specific_day = {
|
127
|
+
'1/1/1961' => '1961',
|
128
|
+
'10/1/1987' => '1987',
|
129
|
+
'5-1-1959' => '1959',
|
130
|
+
|
131
|
+
# year first
|
132
|
+
'1888-02-18' => '1888',
|
133
|
+
'1966-2-5' => '1966',
|
134
|
+
|
135
|
+
# text; starts with day
|
136
|
+
'1 July 1799' => '1799',
|
137
|
+
'1 Feb. 1782' => '1782',
|
138
|
+
'15 Jan.y 1797' => '1797',
|
139
|
+
'12.th May 1794' => '1794',
|
140
|
+
'12th May 1794' => '1794',
|
141
|
+
'12th Dec.r 1794' => '1794',
|
142
|
+
'14th Feb.y 1794' => '1794',
|
143
|
+
'18 Febr. 1790' => '1790',
|
144
|
+
'23 Nov.r 1797' => '1797',
|
145
|
+
|
146
|
+
# text; starts with year
|
147
|
+
'1793 March 1st' => '1793',
|
148
|
+
'1892, Jan. 1' => '1892',
|
149
|
+
'1991 May 14' => '1991',
|
150
|
+
'1997 Sep 6' => '1997',
|
151
|
+
|
152
|
+
# text starts with words
|
153
|
+
'Boston, November 25, 1851' => '1851',
|
154
|
+
'd. 16 Feb. 1793' => '1793',
|
155
|
+
'published the 30 of June 1799' => '1799',
|
156
|
+
'Published the 1 of June 1799' => '1799',
|
157
|
+
'Pub.d Nov.r 1st 1798' => '1798',
|
158
|
+
'Published July 5th, 1784' => '1784',
|
159
|
+
|
160
|
+
# text starts with month
|
161
|
+
'April 01 1797' => '1797',
|
162
|
+
'April 1 1796' => '1796',
|
163
|
+
'April 1. 1796' => '1796',
|
164
|
+
'April 16, 1632' => '1632',
|
165
|
+
'April 11th 1792' => '1792',
|
166
|
+
'[April 1 1795]' => '1795',
|
167
|
+
|
168
|
+
'Aug. 1st 1797' => '1797',
|
169
|
+
'Aug 30th 1794' => '1794',
|
170
|
+
'Aug. 16 1790' => '1790',
|
171
|
+
'Aug. 20, 1883' => '1883',
|
172
|
+
'Aug. 3rd, 1886' => '1886',
|
173
|
+
'Aug.st 4 1795' => '1795',
|
174
|
+
'Aug.t 16 1794' => '1794',
|
175
|
+
'Augt. 29, 1804' => '1804',
|
176
|
+
'August 1 1794' => '1794',
|
177
|
+
|
178
|
+
'Dec. 1 1792' => '1792',
|
179
|
+
'Dec.r 1 1792' => '1792',
|
180
|
+
'Dec.r 8th 1798' => '1798',
|
181
|
+
'Decb.r 1, 1789' => '1789',
|
182
|
+
'December 16 1795' => '1795',
|
183
|
+
|
184
|
+
'Feb 12 1800' => '1800',
|
185
|
+
'Feb. 10 1798' => '1798',
|
186
|
+
'Feb. 25, 1744]' => '1744',
|
187
|
+
'Feb.ry 12 1793' => '1793',
|
188
|
+
'Feb.ry 7th 1796' => '1796',
|
189
|
+
'Feb.y 1 1794' => '1794',
|
190
|
+
'Feb.y 13th 1798' => '1798',
|
191
|
+
'Feb.y 23rd 1799' => '1799',
|
192
|
+
'[Feb.y 18 1793]' => '1793',
|
193
|
+
|
194
|
+
'Jan. 1 1789' => '1789',
|
195
|
+
'Jan. 1. 1795' => '1795',
|
196
|
+
'Jan.y 15. 1795' => '1795',
|
197
|
+
'Jan.y 12st 1793' => '1793',
|
198
|
+
'Jan.y 18th 1790' => '1790',
|
199
|
+
|
200
|
+
'July 1 1796' => '1796',
|
201
|
+
'July 1. 1793' => '1793',
|
202
|
+
'July 13, 1787' => '1787',
|
203
|
+
'July 15th 1797' => '1797',
|
204
|
+
|
205
|
+
'June 1 1793' => '1793',
|
206
|
+
'June 1. 1800' => '1800',
|
207
|
+
'June1st.1805' => '1805',
|
208
|
+
'June 22, 1804' => '1804',
|
209
|
+
'July 23d 1792' => '1792',
|
210
|
+
'June 30th 1799' => '1799',
|
211
|
+
'[June 2 1793]' => '1793',
|
212
|
+
|
213
|
+
'May 9, 1795' => '1795',
|
214
|
+
'May 12 1792' => '1792',
|
215
|
+
'May 21st 1798' => '1798',
|
216
|
+
'May 15th 1798' => '1798',
|
217
|
+
|
218
|
+
'Mar. 1. 1792' => '1792',
|
219
|
+
'March 1 1795' => '1795',
|
220
|
+
'March 1.t 1797' => '1797',
|
221
|
+
'March 1, 1793' => '1793',
|
222
|
+
'March 1st 1797' => '1797',
|
223
|
+
'March 6th 1798' => '1798',
|
224
|
+
'[March 16 1798]' => '1798',
|
225
|
+
|
226
|
+
'Nov. 1. 1796' => '1796',
|
227
|
+
'Nov. 14th 1792' => '1792',
|
228
|
+
'Nov. 20 1789' => '1789',
|
229
|
+
'Nov.r 9, 1793' => '1793',
|
230
|
+
'Novem. 13th 1797' => '1797',
|
231
|
+
'Novembr 22nd 1794' => '1794',
|
232
|
+
|
233
|
+
'Oct 12 1792' => '1792',
|
234
|
+
'Oct 18th 1794' => '1794',
|
235
|
+
'Oct. 29 1796' => '1796',
|
236
|
+
'Oct. 11th 1794' => '1794',
|
237
|
+
'Oct.er 1st 1786' => '1786',
|
238
|
+
'Oct.r 25 1796' => '1796',
|
239
|
+
'Oct.r 25th 1794' => '1794',
|
240
|
+
'Octo.r 15 1795' => '1795',
|
241
|
+
|
242
|
+
'Sep.r 1, 1795' => '1795',
|
243
|
+
'Sep.tr 15.th 1796' => '1796',
|
244
|
+
'Sept.r 5th 1793' => '1793'
|
245
|
+
}
|
246
|
+
specific_day_ruby_parse_fail = {
|
247
|
+
# note ruby Date.parse only handles american or euro date order, not both ??
|
248
|
+
'1/30/1979' => '1979',
|
249
|
+
'10/20/1976' => '1976',
|
250
|
+
'5-18-2014' => '2014',
|
251
|
+
# year first
|
252
|
+
'1980-23-02' => '1980',
|
253
|
+
'1792 20 Dec' => '1792',
|
254
|
+
# text
|
255
|
+
'le 22 juin 1794' => '1794',
|
256
|
+
'mis au jour le 26 juillet 1791' => '1791',
|
257
|
+
'April 12 sd 1794' => '1794',
|
258
|
+
'Dec. 10 & 11, 1855' => '1855',
|
259
|
+
'January 22th [1800]' => '1800',
|
260
|
+
'June the 12, 1794' => '1794',
|
261
|
+
'Mai 1st 1789' => '1789',
|
262
|
+
'March 22 d. 1794' => '1794',
|
263
|
+
'N. 7 1796' => '1796',
|
264
|
+
'N[ovember] 21st 1786' => '1786',
|
265
|
+
'Oct. the 2.d 1793' => '1793',
|
266
|
+
}
|
267
|
+
# example string as key, expected parsed value as value
|
268
|
+
specific_day_2_digit_year = {
|
269
|
+
'1/2/79' => '1979',
|
270
|
+
'2/12/15' => '2015',
|
271
|
+
'6/11/99' => '1999',
|
272
|
+
'10/1/90' => '1990',
|
273
|
+
'10/21/08' => '2008',
|
274
|
+
'5-1-59' => '1959',
|
275
|
+
'5-1-21' => '1921',
|
276
|
+
'5-1-14' => '2014'
|
277
|
+
}
|
278
|
+
# example string as key, expected parsed value as value
|
279
|
+
multiple_years = {
|
280
|
+
'1783-1788' => ['1783', '1784', '1785', '1786', '1787', '1788'],
|
281
|
+
'1862-1868]' => ['1862', '1863', '1864', '1865', '1866', '1867', '1868'],
|
282
|
+
'1640-1645?]' => ['1640', '1641', '1642', '1643', '1644', '1645'],
|
283
|
+
'1578, 1584]' => ['1578', '1584'],
|
284
|
+
'1860, [1862]' => ['1860', '1862'],
|
285
|
+
'1901, c1900' => ['1901', '1900'], # pub date is one without the c,
|
286
|
+
'1627 [i.e. 1646]' => ['1627', '1646'],
|
287
|
+
'1698/1715' => ['1698', '1715'],
|
288
|
+
'1965,1968' => ['1965', '1968'], # revs
|
289
|
+
'1965|1968' => ['1965', '1968'], # revs
|
290
|
+
'1789 ou 1790]' => ['1789', '1790'],
|
291
|
+
'1689 [i.e. 1688-89]' => ['1689', '1688'],
|
292
|
+
'1598 or 1599' => ['1598', '1599'],
|
293
|
+
'1890 [c1884]' => ['1890', '1884'], # pub date is one without the c
|
294
|
+
'1873,c1868' => ['1873', '1868'], # # pub date is one without the c
|
295
|
+
'1872-1877 [t.5, 1874]' => ['1872', '1873', '1874', '1875', '1876', '1877'],
|
296
|
+
'1809 [ca. 1810]' => ['1809', '1810'],
|
297
|
+
'1726 or 1738]' => ['1726', '1738'],
|
298
|
+
|
299
|
+
'[1789-1791]' => ['1789', '1790', '1791'],
|
300
|
+
'[1627-1628].' => ['1627', '1628'],
|
301
|
+
'[1789-1791' => ['1789', '1790', '1791'],
|
302
|
+
'[1793 ou 1794]' => ['1793', '1794'],
|
303
|
+
'[entre 1789 et 1791]' => ['1789', '1790', '1791'],
|
304
|
+
'[Entre 1789 et 1791]' => ['1789', '1790', '1791'],
|
305
|
+
'[entre 1789-1791]' => ['1789', '1790', '1791'],
|
306
|
+
'[entre 1789 et 1791 ?]' => ['1789', '1790', '1791'],
|
307
|
+
'[between 1882 and 1887]' => ['1882', '1883', '1884', '1885', '1886', '1887'],
|
308
|
+
'[ca 1789-1791]' => ['1789', '1790', '1791'],
|
309
|
+
'[ca 1790 et 1792]' => ['1790', '1791', '1792'],
|
310
|
+
'[ca. 1550-1552]' => ['1550', '1551', '1552'],
|
311
|
+
|
312
|
+
'Anno 1789-1790' => ['1789', '1790'],
|
313
|
+
"L'an VII de la République [1798 or 1799]" => ['1798', '1799'],
|
314
|
+
'MDCXIII [1613] (v. 1); MDLXXXIII [1583] (v. 2); and MDCVI [1606] (v. 3).' => ['1613', '1583', '1606'],
|
315
|
+
'entre 1793 et 1795' => ['1793', '1794', '1795'],
|
316
|
+
'entre 1793 et 1795]' => ['1793', '1794', '1795'],
|
317
|
+
'approximately 1600-1602.' => ['1600', '1601', '1602'],
|
318
|
+
'approximately 1650-1652]' => ['1650', '1651', '1652'],
|
319
|
+
'approximately 1643-1644.]' => ['1643', '1644'],
|
320
|
+
'ca. 1740-1745]' => ['1740', '1741', '1742', '1743', '1744', '1745'],
|
321
|
+
'circa 1851-1852' => ['1851', '1852'],
|
322
|
+
's.a. [ca. 1660, erschienen: 1782]' => ['1660', '1782'],
|
323
|
+
'view of approximately 1848, published about 1865' => ['1848', '1865']
|
324
|
+
}
|
325
|
+
# example string as key, expected parsed value as value
|
326
|
+
multiple_years_4_digits_once = {
|
327
|
+
'1918-20' => ['1918', '1919', '1920'], # vs. 1961-04
|
328
|
+
'1965-8' => ['1965', '1966', '1967', '1968'], # revs
|
329
|
+
'[1846-51]' => ['1846', '1847', '1848', '1849', '1850', '1851']
|
330
|
+
}
|
331
|
+
# example string as key, expected parsed value as value
|
332
|
+
decade_only_4_digits = {
|
333
|
+
'early 1890s' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
|
334
|
+
'1950s' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959'],
|
335
|
+
"1950's" => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
|
336
|
+
}
|
337
|
+
decade_only = {
|
338
|
+
'156u' => ['1560', '1561', '1562', '1563', '1564', '1565', '1566', '1567', '1568', '1569'],
|
339
|
+
'167-?]' => ['1670', '1671', '1672', '1673', '1674', '1675', '1676', '1677', '1678', '1679'],
|
340
|
+
'[171-?]' => ['1710', '1711', '1712', '1713', '1714', '1715', '1716', '1717', '1718', '1719'],
|
341
|
+
'[189-]' => ['1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899'],
|
342
|
+
'ca.170-?]' => ['1700', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709'],
|
343
|
+
'200-?]' => ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009'],
|
344
|
+
'186?' => ['1860', '1861', '1862', '1863', '1864', '1865', '1866', '1867', '1868', '1869'],
|
345
|
+
'195x' => ['1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959']
|
346
|
+
}
|
347
|
+
century_only = {
|
348
|
+
'18th century CE' => '18th century',
|
349
|
+
'17uu' => '18th century',
|
350
|
+
'17--?]' => '18th century',
|
351
|
+
'17--]' => '18th century',
|
352
|
+
'[17--]' => '18th century',
|
353
|
+
'[17--?]' => '18th century'
|
354
|
+
}
|
355
|
+
brackets_in_middle_of_year = {
|
356
|
+
'169[5]' => '1695',
|
357
|
+
'October 3, [18]91' => '1891'
|
358
|
+
}
|
359
|
+
# we have data like this for our Roman coins collection
|
360
|
+
early_numeric_dates = {
|
361
|
+
# note that values must lexically sort to create a chronological sort. (-999 before -914)
|
362
|
+
'-999' => '-001',
|
363
|
+
'-914' => '-086',
|
364
|
+
'-18' => '-982',
|
365
|
+
'-1' => '-999',
|
366
|
+
'0' => '0000',
|
367
|
+
'5' => '0005',
|
368
|
+
'33' => '0033',
|
369
|
+
'945' => '0945'
|
370
|
+
}
|
371
|
+
bc_dates = {
|
372
|
+
# note that values must lexically sort to create a chronological sort (800 B.C. before 750 B.C.)
|
373
|
+
'801 B.C.' => '-199',
|
374
|
+
'800 B.C.' => '-200',
|
375
|
+
'750 B.C.' => '-250',
|
376
|
+
'700 B.C.' => '-300',
|
377
|
+
'699 B.C.' => '-301',
|
378
|
+
'75 B.C.' => '-925',
|
379
|
+
'8 B.C.' => '-992'
|
380
|
+
}
|
381
|
+
|
382
|
+
context '*facet_string_from_date_str' do
|
383
|
+
it 'calls instance method facet_string_from_date_str' do
|
384
|
+
expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:facet_string_from_date_str)
|
385
|
+
Stanford::Mods::DateParsing.facet_string_from_date_str('1666')
|
386
|
+
end
|
387
|
+
end
|
388
|
+
context '*sortable_year_string_from_date_str' do
|
389
|
+
it 'calls instance method sortable_year_string_from_date_str' do
|
390
|
+
expect_any_instance_of(Stanford::Mods::DateParsing).to receive(:sortable_year_string_from_date_str)
|
391
|
+
Stanford::Mods::DateParsing.sortable_year_string_from_date_str('1666')
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
context '#facet_string_from_date_str' do
|
396
|
+
single_year
|
397
|
+
.merge(specific_month)
|
398
|
+
.merge(specific_day)
|
399
|
+
.merge(specific_day_2_digit_year)
|
400
|
+
.merge(specific_day_ruby_parse_fail)
|
401
|
+
.merge(century_only)
|
402
|
+
.merge(brackets_in_middle_of_year)
|
403
|
+
.merge(invalid_but_can_get_year).each do |example, expected|
|
404
|
+
expected = expected.to_i.to_s if expected.match(/^\d+$/)
|
405
|
+
it "#{expected} for single value #{example}" do
|
406
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
multiple_years
|
411
|
+
.merge(multiple_years_4_digits_once)
|
412
|
+
.merge(decade_only)
|
413
|
+
.merge(decade_only_4_digits).each do |example, expected|
|
414
|
+
it "#{expected.first} for multi-value #{example}" do
|
415
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected.first
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
early_numeric_dates.each do |example, expected|
|
420
|
+
if example.start_with?('-')
|
421
|
+
exp = example[1..-1] + " B.C."
|
422
|
+
it "#{exp} for #{example}" do
|
423
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq exp
|
424
|
+
end
|
425
|
+
else
|
426
|
+
expected = expected.to_i.to_s if expected.match(/^\d+$/)
|
427
|
+
it "#{expected} for #{example}" do
|
428
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq expected
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
bc_dates.keys.each do |example|
|
434
|
+
it "#{example} for #{example}" do
|
435
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq example
|
436
|
+
end
|
437
|
+
end
|
438
|
+
it '1600 B.C. for 1600 B.C.' do
|
439
|
+
expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_from_date_str).to eq '1600 B.C.'
|
440
|
+
end
|
441
|
+
|
442
|
+
[ # bad dates
|
443
|
+
'9999',
|
444
|
+
'2035',
|
445
|
+
'0000-00-00'
|
446
|
+
].each do |example|
|
447
|
+
it "nil for #{example}" do
|
448
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_from_date_str).to eq nil
|
449
|
+
end
|
450
|
+
end
|
451
|
+
end
|
452
|
+
|
453
|
+
context '#sortable_year_string_from_date_str' do
|
454
|
+
single_year
|
455
|
+
.merge(specific_month)
|
456
|
+
.merge(specific_day)
|
457
|
+
.merge(specific_day_2_digit_year)
|
458
|
+
.merge(specific_day_ruby_parse_fail)
|
459
|
+
.merge(early_numeric_dates)
|
460
|
+
.merge(bc_dates)
|
461
|
+
.merge(brackets_in_middle_of_year)
|
462
|
+
.merge(invalid_but_can_get_year).each do |example, expected|
|
463
|
+
it "#{expected} for single value #{example}" do
|
464
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
multiple_years
|
469
|
+
.merge(multiple_years_4_digits_once)
|
470
|
+
.merge(decade_only)
|
471
|
+
.merge(decade_only_4_digits).each do |example, expected|
|
472
|
+
it "#{expected.first} for multi-value #{example}" do
|
473
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq expected.first
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
century_only.keys.each do |example|
|
478
|
+
it "1700 from #{example}" do
|
479
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq '1700'
|
480
|
+
end
|
481
|
+
end
|
482
|
+
it '0700 for 7--' do
|
483
|
+
expect(Stanford::Mods::DateParsing.new('7--').sortable_year_string_from_date_str).to eq '0700'
|
484
|
+
end
|
485
|
+
|
486
|
+
it 'nil for 1600 B.C.' do
|
487
|
+
skip "code broken for dddd B.C. but no existing data for this yet"
|
488
|
+
expect(Stanford::Mods::DateParsing.new('1600 B.C.').sortable_year_string_from_date_str).to eq nil
|
489
|
+
end
|
490
|
+
|
491
|
+
[ # bad dates
|
492
|
+
'9999',
|
493
|
+
'2035',
|
494
|
+
'0000-00-00'
|
495
|
+
].each do |example|
|
496
|
+
it "nil for #{example}" do
|
497
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_string_from_date_str).to eq nil
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
context '*year_str_valid?' do
|
503
|
+
{ # example string as key, expected result as value
|
504
|
+
'-1000' => false,
|
505
|
+
'-999' => true,
|
506
|
+
'-35' => true,
|
507
|
+
'-3' => true,
|
508
|
+
'0000' => true,
|
509
|
+
'0' => true,
|
510
|
+
'5' => true,
|
511
|
+
'33' => true,
|
512
|
+
'150' => true,
|
513
|
+
(Date.today.year + 1).to_s => true, # current year + 1
|
514
|
+
(Date.today.year + 2).to_s => false, # current year + 2
|
515
|
+
'9999' => false,
|
516
|
+
'165x' => false,
|
517
|
+
'198-' => false,
|
518
|
+
'random text' => false,
|
519
|
+
nil => false
|
520
|
+
}.each do |example, expected|
|
521
|
+
it "#{expected} for #{example}" do
|
522
|
+
expect(Stanford::Mods::DateParsing.year_str_valid?(example)).to eq expected
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
|
527
|
+
context '#sortable_year_for_yyyy' do
|
528
|
+
single_year
|
529
|
+
.merge(specific_month)
|
530
|
+
.merge(specific_day)
|
531
|
+
.merge(invalid_but_can_get_year)
|
532
|
+
.merge(specific_day_ruby_parse_fail).each do |example, expected|
|
533
|
+
it "#{expected} for #{example}" do
|
534
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
multiple_years
|
539
|
+
.merge(multiple_years_4_digits_once)
|
540
|
+
.merge(decade_only_4_digits).each do |example, expected|
|
541
|
+
it "#{expected.first} for #{example}" do
|
542
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq expected.first
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
# indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
|
547
|
+
unparseable
|
548
|
+
.push(*brackets_in_middle_of_year.keys)
|
549
|
+
.push(*specific_day_2_digit_year.keys)
|
550
|
+
.push(*decade_only.keys)
|
551
|
+
.push(*century_only.keys).each do |example|
|
552
|
+
it "nil for #{example}" do
|
553
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yyyy).to eq nil
|
554
|
+
end
|
555
|
+
end
|
556
|
+
end
|
557
|
+
|
558
|
+
context '#sortable_year_for_yy' do
|
559
|
+
specific_day_2_digit_year.each do |example, expected|
|
560
|
+
it "#{expected} for #{example}" do
|
561
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq expected
|
562
|
+
end
|
563
|
+
end
|
564
|
+
it '2000 for 12/25/00' do
|
565
|
+
expect(Stanford::Mods::DateParsing.new('12/25/00').sortable_year_for_yy).to eq '2000'
|
566
|
+
end
|
567
|
+
|
568
|
+
# indicate some of the strings this method cannot handle (so must be parsed with other instance methods)
|
569
|
+
[
|
570
|
+
'92/1/31', # yy/mm/dd: doesn't work. :-(
|
571
|
+
'92-31-1', # yy-dd-mm: doesn't work. :-(
|
572
|
+
].push(*decade_only.keys).each do |example|
|
573
|
+
it "nil for #{example}" do
|
574
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_yy).to eq nil
|
575
|
+
end
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
context '#sortable_year_for_decade' do
|
580
|
+
decade_only.each do |example, expected|
|
581
|
+
it "#{expected.first} for #{example}" do
|
582
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected.first
|
583
|
+
end
|
584
|
+
end
|
585
|
+
{ # example string as key, expected result as value
|
586
|
+
'199u' => '1990',
|
587
|
+
'200-' => '2000',
|
588
|
+
'201?' => '2010',
|
589
|
+
'202x' => '2020'
|
590
|
+
}.each do |example, expected|
|
591
|
+
it "#{expected} for #{example}" do
|
592
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq expected
|
593
|
+
end
|
594
|
+
end
|
595
|
+
|
596
|
+
# some of the strings this method cannot handle (so must be parsed with other instance methods)
|
597
|
+
decade_only_4_digits.keys
|
598
|
+
.push(*specific_day_2_digit_year.keys).each do |example|
|
599
|
+
it "nil for #{example}" do
|
600
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_decade).to eq nil
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
|
605
|
+
context '#sortable_year_for_century' do
|
606
|
+
century_only.keys.each do |example|
|
607
|
+
it "1700 from #{example}" do
|
608
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_century).to eq '1700'
|
609
|
+
end
|
610
|
+
end
|
611
|
+
it '0700 for 7--' do
|
612
|
+
expect(Stanford::Mods::DateParsing.new('7--').sortable_year_for_century).to eq '0700'
|
613
|
+
end
|
614
|
+
it 'nil for 7th century B.C. (to be handled in different method)' do
|
615
|
+
expect(Stanford::Mods::DateParsing.new('7th century B.C.').sortable_year_for_century).to eq nil
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
619
|
+
context '#facet_string_for_century' do
|
620
|
+
century_only.each do |example, expected|
|
621
|
+
it "#{expected} for #{example}" do
|
622
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
|
623
|
+
end
|
624
|
+
end
|
625
|
+
{ # example string as key, expected result as value
|
626
|
+
'16--' => '17th century',
|
627
|
+
'7--' => '8th century',
|
628
|
+
# check suffixes
|
629
|
+
'20--' => '21st century',
|
630
|
+
'1--' => '2nd century',
|
631
|
+
'2--' => '3rd century'
|
632
|
+
}.each do |example, expected|
|
633
|
+
it "#{expected} for #{example}" do
|
634
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_for_century).to eq expected
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
638
|
+
it 'nil for 7th century B.C. (to be handled in different method)' do
|
639
|
+
expect(Stanford::Mods::DateParsing.new('7th century B.C.').facet_string_for_century).to eq nil
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
context '#sortable_year_for_early_numeric' do
|
644
|
+
early_numeric_dates.each do |example, expected|
|
645
|
+
it "#{expected} for #{example}" do
|
646
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_early_numeric).to eq expected
|
647
|
+
end
|
648
|
+
end
|
649
|
+
end
|
650
|
+
|
651
|
+
context '#facet_string_for_early_numeric' do
|
652
|
+
early_numeric_dates.each do |example, expected|
|
653
|
+
expected = expected.to_i.to_s if expected.match(/^\d+$/)
|
654
|
+
if example.start_with?('-')
|
655
|
+
exp = example[1..-1] + " B.C."
|
656
|
+
it "#{exp} for #{example}" do
|
657
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq exp
|
658
|
+
end
|
659
|
+
else
|
660
|
+
it "#{expected} for #{example}" do
|
661
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_for_early_numeric).to eq expected
|
662
|
+
end
|
663
|
+
end
|
664
|
+
end
|
665
|
+
end
|
666
|
+
|
667
|
+
context '#sortable_year_for_bc' do
|
668
|
+
bc_dates.each do |example, expected|
|
669
|
+
it "#{expected} for #{example}" do
|
670
|
+
expect(Stanford::Mods::DateParsing.new(example).sortable_year_for_bc).to eq expected
|
671
|
+
end
|
672
|
+
end
|
673
|
+
end
|
674
|
+
|
675
|
+
context '#facet_string_for_bc' do
|
676
|
+
bc_dates.keys.each do |example|
|
677
|
+
it "#{example} for #{example}" do
|
678
|
+
expect(Stanford::Mods::DateParsing.new(example).facet_string_for_bc).to eq example
|
679
|
+
end
|
680
|
+
end
|
681
|
+
it '1600 B.C. for 1600 B.C.' do
|
682
|
+
expect(Stanford::Mods::DateParsing.new('1600 B.C.').facet_string_for_bc).to eq '1600 B.C.'
|
683
|
+
end
|
684
|
+
end
|
685
|
+
|
686
|
+
context '#year_via_ruby_parsing' do
|
687
|
+
specific_day.each do |example, expected|
|
688
|
+
it "#{expected} for #{example}" do
|
689
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq expected
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
# some of the strings this method cannot handle (and must be parsed with other instance methods)
|
694
|
+
multiple_years.keys
|
695
|
+
.push(*multiple_years_4_digits_once.keys)
|
696
|
+
.push(*decade_only_4_digits.keys)
|
697
|
+
.push(*century_only.keys)
|
698
|
+
.push(*invalid_but_can_get_year.keys).each do |example|
|
699
|
+
it "nil for #{example}" do
|
700
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
|
701
|
+
end
|
702
|
+
end
|
703
|
+
|
704
|
+
# data works via #sortable_year_for_yyyy (and don't all work here):
|
705
|
+
# single_year
|
706
|
+
# specific_month
|
707
|
+
# specific_day_ruby_parse_fail
|
708
|
+
|
709
|
+
# data fails *sortable_year_for_yyyy AND for *year_via_ruby_parsing:
|
710
|
+
# multiple_years
|
711
|
+
# century_only
|
712
|
+
|
713
|
+
# data fails *sortable_year_for_yyyy
|
714
|
+
# and partially works for *year_via_ruby_parsing:
|
715
|
+
skip 'parsed incorrectly' do
|
716
|
+
# assigns incorrect values to 13 out of 92 (rest with no val assigned)
|
717
|
+
unparseable.each do |example|
|
718
|
+
it "nil for unparseable: #{example}" do
|
719
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
# assigns incorrect values to 2 out of 2
|
724
|
+
brackets_in_middle_of_year.keys.each do |example|
|
725
|
+
it "nil for brackets_in_middle_of_year: #{example}" do
|
726
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
|
727
|
+
end
|
728
|
+
end
|
729
|
+
|
730
|
+
# assigns incorrect values to 3 out of 8 (5 with no val assigned)
|
731
|
+
specific_day_2_digit_year.keys.each do |example|
|
732
|
+
it "nil for specific_day_2_digit_year: #{example}" do
|
733
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
|
734
|
+
end
|
735
|
+
end
|
736
|
+
|
737
|
+
# assigns incorrect values to 8 out of 8
|
738
|
+
decade_only.keys.each do |example|
|
739
|
+
it "nil for decade_only: #{example}" do
|
740
|
+
expect(Stanford::Mods::DateParsing.new(example).year_via_ruby_parsing).to eq nil
|
741
|
+
end
|
742
|
+
end
|
743
|
+
end
|
744
|
+
end
|
745
|
+
|
746
|
+
end
|