modsulator 1.0.5 → 1.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +0 -10
- data/lib/modsulator.rb +4 -4
- data/lib/modsulator/modsulator_template.xlsx +0 -0
- data/lib/modsulator/modsulator_template.xml +194 -181
- data/spec/fixtures/filled_template_20160711.xml +58 -53
- data/spec/fixtures/point_coord_test.xlsx +0 -0
- data/spec/fixtures/point_coord_test.xml +23 -0
- data/spec/integration_tests/integration_spec.rb +2 -1
- metadata +21 -7
- data/lib/modsulator/normalizer.rb +0 -225
- data/spec/features/normalizer_unit_spec.rb +0 -150
@@ -1,29 +1,29 @@
|
|
1
1
|
<?xml version="1.0"?>
|
2
|
-
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="
|
2
|
+
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="2018-04-18 02:42:25PM" sourceFile="filled_template_20160711.xlsx">
|
3
3
|
<xmlDoc id="descMetadata" objectId="123">
|
4
4
|
<mods xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.loc.gov/mods/v3" version="3.5" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd">
|
5
|
-
<titleInfo authority="ti1:authority" authorityURI="ti1:authorityURI" valueURI="ti1:valueURI" nameTitleGroup="ti1:nameTitleGroup">
|
5
|
+
<titleInfo authority="ti1:authority" authorityURI="ti1:authorityURI" valueURI="ti1:valueURI" nameTitleGroup="ti1:nameTitleGroup" lang="ti1:language" script="ti1:script" transliteration="ti1:transliteration" altRepGroup="ti1:altRepGroup">
|
6
6
|
<nonSort>ti1:nonSort</nonSort>
|
7
7
|
<title>ti1:title</title>
|
8
8
|
<subTitle>ti1:subTitle</subTitle>
|
9
9
|
<partNumber>ti1:partNumber</partNumber>
|
10
10
|
<partName>ti1:partName</partName>
|
11
11
|
</titleInfo>
|
12
|
-
<titleInfo type="abbreviated" displayLabel="ti2:displayLabel" authority="ti2:authority" authorityURI="ti2:authorityURI" valueURI="ti2:valueURI" nameTitleGroup="ti2:nameTitleGroup">
|
12
|
+
<titleInfo type="abbreviated" displayLabel="ti2:displayLabel" authority="ti2:authority" authorityURI="ti2:authorityURI" valueURI="ti2:valueURI" nameTitleGroup="ti2:nameTitleGroup" lang="ti2:language" script="ti2:script" transliteration="ti2:transliteration" altRepGroup="ti2:altRepGroup">
|
13
13
|
<nonSort>ti2:nonSort</nonSort>
|
14
14
|
<title>ti2:title</title>
|
15
15
|
<subTitle>ti2:subTitle</subTitle>
|
16
16
|
<partNumber>ti2:partNumber</partNumber>
|
17
17
|
<partName>ti2:partName</partName>
|
18
18
|
</titleInfo>
|
19
|
-
<titleInfo type="abbreviated" displayLabel="ti3:displayLabel" authority="ti3:authority" authorityURI="ti3:authorityURI" valueURI="ti3:valueURI" nameTitleGroup="ti3:nameTitleGroup">
|
19
|
+
<titleInfo type="abbreviated" displayLabel="ti3:displayLabel" authority="ti3:authority" authorityURI="ti3:authorityURI" valueURI="ti3:valueURI" nameTitleGroup="ti3:nameTitleGroup" lang="ti3:language" script="ti3:script" transliteration="ti3:transliteration" altRepGroup="ti3:altRepGroup">
|
20
20
|
<nonSort>ti3:nonSort</nonSort>
|
21
21
|
<title>ti3:title</title>
|
22
22
|
<subTitle>ti3:subTitle</subTitle>
|
23
23
|
<partNumber>ti3:partNumber</partNumber>
|
24
24
|
<partName>ti3:partName</partName>
|
25
25
|
</titleInfo>
|
26
|
-
<name type="personal" usage="primary" authority="na1:authority" authorityURI="na1:authorityURI" valueURI="na1:valueURI" nameTitleGroup="na1:nameTitleGroup">
|
26
|
+
<name type="personal" usage="primary" authority="na1:authority" authorityURI="na1:authorityURI" valueURI="na1:valueURI" nameTitleGroup="na1:nameTitleGroup" lang="na1:language" script="na1:script" transliteration="na1:transliteration" altRepGroup="na1:altRepGroup">
|
27
27
|
<namePart>na1:namePart</namePart>
|
28
28
|
<role>
|
29
29
|
<roleTerm type="code" authority="ro1:authority" authorityURI="ro1:authorityURI" valueURI="ro1:valueURI">ro1:roleCode</roleTerm>
|
@@ -38,7 +38,7 @@
|
|
38
38
|
<roleTerm type="text" authority="ro1:authority3" authorityURI="ro1:authorityURI3" valueURI="ro1:valueURI3">ro1:roleText3</roleTerm>
|
39
39
|
</role>
|
40
40
|
</name>
|
41
|
-
<name type="personal" authority="na2:authority" authorityURI="na2:authorityURI" valueURI="na2:valueURI" nameTitleGroup="na2:nameTitleGroup">
|
41
|
+
<name type="personal" authority="na2:authority" authorityURI="na2:authorityURI" valueURI="na2:valueURI" nameTitleGroup="na2:nameTitleGroup" lang="na2:language" script="na2:script" transliteration="na2:transliteration" altRepGroup="na2:altRepGroup">
|
42
42
|
<namePart>na2:namePart</namePart>
|
43
43
|
<role>
|
44
44
|
<roleTerm type="code" authority="ro2:authority" authorityURI="ro2:authorityURI" valueURI="ro2:valueURI">ro2:roleCode</roleTerm>
|
@@ -53,7 +53,7 @@
|
|
53
53
|
<roleTerm type="text" authority="ro2:authority3" authorityURI="ro2:authorityURI3" valueURI="ro2:valueURI3">ro2:roleText3</roleTerm>
|
54
54
|
</role>
|
55
55
|
</name>
|
56
|
-
<name type="personal" authority="na3:authority" authorityURI="na3:authorityURI" valueURI="na3:valueURI" nameTitleGroup="na3:nameTitleGroup">
|
56
|
+
<name type="personal" authority="na3:authority" authorityURI="na3:authorityURI" valueURI="na3:valueURI" nameTitleGroup="na3:nameTitleGroup" lang="na3:language" script="na3:script" transliteration="na3:transliteration" altRepGroup="na3:altRepGroup">
|
57
57
|
<namePart>na3:namePart</namePart>
|
58
58
|
<role>
|
59
59
|
<roleTerm type="code" authority="ro3:authority" authorityURI="ro3:authorityURI" valueURI="ro3:valueURI">ro3:roleCode</roleTerm>
|
@@ -68,7 +68,7 @@
|
|
68
68
|
<roleTerm type="text" authority="ro3:authority3" authorityURI="ro3:authorityURI3" valueURI="ro3:valueURI3">ro3:roleText3</roleTerm>
|
69
69
|
</role>
|
70
70
|
</name>
|
71
|
-
<name type="personal" authority="na4:authority" authorityURI="na4:authorityURI" valueURI="na4:valueURI" nameTitleGroup="na4:nameTitleGroup">
|
71
|
+
<name type="personal" authority="na4:authority" authorityURI="na4:authorityURI" valueURI="na4:valueURI" nameTitleGroup="na4:nameTitleGroup" lang="na4:language" script="na4:script" transliteration="na4:transliteration" altRepGroup="na4:altRepGroup">
|
72
72
|
<namePart>na4:namePart</namePart>
|
73
73
|
<role>
|
74
74
|
<roleTerm type="code" authority="ro4:authority" authorityURI="ro4:authorityURI" valueURI="ro4:valueURI">ro4:roleCode</roleTerm>
|
@@ -83,7 +83,7 @@
|
|
83
83
|
<roleTerm type="text" authority="ro4:authority3" authorityURI="ro4:authorityURI3" valueURI="ro4:valueURI3">ro4:roleText3</roleTerm>
|
84
84
|
</role>
|
85
85
|
</name>
|
86
|
-
<name type="personal" authority="na5:authority" authorityURI="na5:authorityURI" valueURI="na5:valueURI" nameTitleGroup="na5:nameTitleGroup">
|
86
|
+
<name type="personal" authority="na5:authority" authorityURI="na5:authorityURI" valueURI="na5:valueURI" nameTitleGroup="na5:nameTitleGroup" lang="na5:language" script="na5:script" transliteration="na5:transliteration" altRepGroup="na5:altRepGroup">
|
87
87
|
<namePart>na5:namePart</namePart>
|
88
88
|
<role>
|
89
89
|
<roleTerm type="code" authority="ro5:authority" authorityURI="ro5:authorityURI" valueURI="ro5:valueURI">ro5:roleCode</roleTerm>
|
@@ -104,12 +104,12 @@
|
|
104
104
|
<genre type="ge1:type" authority="ge1:authority" authorityURI="ge1:authorityURI" valueURI="ge1:valueURI">ge1:genre</genre>
|
105
105
|
<genre type="ge2:type" authority="ge2:authority" authorityURI="ge2:authorityURI" valueURI="ge2:valueURI">ge2:genre</genre>
|
106
106
|
<genre type="ge3:type" authority="ge3:authority" authorityURI="ge3:authorityURI" valueURI="ge3:valueURI">ge3:genre</genre>
|
107
|
-
<originInfo displayLabel="or:displayLabel" eventType="or:eventType">
|
107
|
+
<originInfo displayLabel="or:displayLabel" eventType="or:eventType" altRepGroup="or:altRepGroup">
|
108
108
|
<place>
|
109
109
|
<placeTerm type="code" authority="marcgac" authorityURI="pl:authorityURI" valueURI="pl:valueURI">pl:placeCode</placeTerm>
|
110
110
|
<placeTerm type="text" authority="marcgac" authorityURI="pl:authorityURI" valueURI="pl:valueURI">pl:placeText</placeTerm>
|
111
111
|
</place>
|
112
|
-
<publisher>or:publisher</publisher>
|
112
|
+
<publisher lang="or:publisherLanguage" script="or:publisherScript" transliteration="or:publisherTransliteration">or:publisher</publisher>
|
113
113
|
<dateCreated keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCreated</dateCreated>
|
114
114
|
<dateCreated encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCreated2</dateCreated>
|
115
115
|
<dateIssued keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:dateIssued</dateIssued>
|
@@ -118,15 +118,17 @@
|
|
118
118
|
<dateCaptured encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCaptured2</dateCaptured>
|
119
119
|
<copyrightDate keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:copyrightDate</copyrightDate>
|
120
120
|
<copyrightDate encoding="w3cdtf" qualifier="approximate" point="start">dt:copyrightDate2</copyrightDate>
|
121
|
+
<dateOther keyDate="yes" type="dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="dt:dateOtherScript" transliteration="dt:dateOtherTransliteration">dt:dateOther</dateOther>
|
122
|
+
<dateOther type="dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="dt:dateOtherScript" transliteration="dt:dateOtherTransliteration">dt:dateOther2</dateOther>
|
121
123
|
<edition>or:edition</edition>
|
122
124
|
<issuance>continuing</issuance>
|
123
125
|
</originInfo>
|
124
|
-
<originInfo displayLabel="or2:displayLabel" eventType="or2:eventType">
|
126
|
+
<originInfo displayLabel="or2:displayLabel" eventType="or2:eventType" altRepGroup="or2:altRepGroup">
|
125
127
|
<place>
|
126
128
|
<placeTerm type="code" authority="marcgac" authorityURI="or2:pl:authorityURI" valueURI="or2:pl:valueURI">or2:pl:placeCode</placeTerm>
|
127
129
|
<placeTerm type="text" authority="marcgac" authorityURI="or2:pl:authorityURI" valueURI="or2:pl:valueURI">or2:pl:placeText</placeTerm>
|
128
130
|
</place>
|
129
|
-
<publisher>or2:publisher</publisher>
|
131
|
+
<publisher lang="or2:publisherLanguage" script="or2:publisherScript" transliteration="or2:publisherTransliteration">or2:publisher</publisher>
|
130
132
|
<dateCreated keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCreated</dateCreated>
|
131
133
|
<dateCreated encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCreated2</dateCreated>
|
132
134
|
<dateIssued keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateIssued</dateIssued>
|
@@ -135,6 +137,9 @@
|
|
135
137
|
<dateCaptured encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCaptured2</dateCaptured>
|
136
138
|
<copyrightDate keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:copyrightDate</copyrightDate>
|
137
139
|
<copyrightDate encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:copyrightDate2</copyrightDate>
|
140
|
+
<dateOther keyDate="yes" type="or2:dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="or2:dt:dateOtherScript" transliteration="or2:dt:dateOtherTransliteration">or2:dt:dateOther</dateOther>
|
141
|
+
<dateOther type="or2:dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="or2:dt:dateOtherScript" transliteration="or2:dt:dateOtherTransliteration">or2:dt:dateOther2</dateOther>
|
142
|
+
<edition>or2:edition</edition>
|
138
143
|
<issuance>continuing</issuance>
|
139
144
|
</originInfo>
|
140
145
|
<language>
|
@@ -159,42 +164,42 @@
|
|
159
164
|
<internetMediaType>ph1:internetMediaType3</internetMediaType>
|
160
165
|
<internetMediaType>ph1:internetMediaType4</internetMediaType>
|
161
166
|
<internetMediaType>ph1:internetMediaType5</internetMediaType>
|
162
|
-
<note displayLabel="ph1:displayLabel1">ph1:note1</note>
|
163
|
-
<note displayLabel="ph1:displayLabel2">ph1:note2</note>
|
164
|
-
<note displayLabel="ph1:displayLabel3">ph1:note3</note>
|
165
|
-
<note displayLabel="ph1:displayLabel4">ph1:note4</note>
|
166
|
-
<note displayLabel="ph1:displayLabel5">ph1:note5</note>
|
167
|
-
<note displayLabel="ph1:displayLabel6">ph1:note6</note>
|
168
|
-
<note displayLabel="ph1:displayLabel7">ph1:note7</note>
|
169
|
-
<note displayLabel="ph1:displayLabel8">ph1:note8</note>
|
170
|
-
<note displayLabel="ph1:displayLabel9">ph1:note9</note>
|
167
|
+
<note displayLabel="ph1:displayLabel1" lang="ph1:language1" script="ph1:script1" transliteration="ph1:transliteration1">ph1:note1</note>
|
168
|
+
<note displayLabel="ph1:displayLabel2" lang="ph1:language2" script="ph1:script2" transliteration="ph1:transliteration2">ph1:note2</note>
|
169
|
+
<note displayLabel="ph1:displayLabel3" lang="ph1:language3" script="ph1:script3" transliteration="ph1:transliteration3">ph1:note3</note>
|
170
|
+
<note displayLabel="ph1:displayLabel4" lang="ph1:language4" script="ph1:script4" transliteration="ph1:transliteration4">ph1:note4</note>
|
171
|
+
<note displayLabel="ph1:displayLabel5" lang="ph1:language5" script="ph1:script5" transliteration="ph1:transliteration5">ph1:note5</note>
|
172
|
+
<note displayLabel="ph1:displayLabel6" lang="ph1:language6" script="ph1:script6" transliteration="ph1:transliteration6">ph1:note6</note>
|
173
|
+
<note displayLabel="ph1:displayLabel7" lang="ph1:language7" script="ph1:script7" transliteration="ph1:transliteration7">ph1:note7</note>
|
174
|
+
<note displayLabel="ph1:displayLabel8" lang="ph1:language8" script="ph1:script8" transliteration="ph1:transliteration8">ph1:note8</note>
|
175
|
+
<note displayLabel="ph1:displayLabel9" lang="ph1:language9" script="ph1:script9" transliteration="ph1:transliteration9">ph1:note9</note>
|
171
176
|
</physicalDescription>
|
172
177
|
<physicalDescription>
|
173
178
|
<form authority="ph2:formAuthority" authorityURI="ph2:formAuthorityURI" valueURI="ph2:formValueURI">ph2:form</form>
|
174
179
|
<extent>ph2:extent</extent>
|
175
180
|
<reformattingQuality>access</reformattingQuality>
|
176
181
|
<digitalOrigin>born digital</digitalOrigin>
|
177
|
-
<note displayLabel="ph2:displayLabel1">ph2:note1</note>
|
182
|
+
<note displayLabel="ph2:displayLabel1" lang="ph2:language1" script="ph2:script1" transliteration="ph2:transliteration1">ph2:note1</note>
|
178
183
|
</physicalDescription>
|
179
184
|
<physicalDescription>
|
180
185
|
<form authority="ph3:formAuthority" authorityURI="ph3:formAuthorityURI" valueURI="ph3:formValueURI">ph3:form</form>
|
181
186
|
<extent>ph3:extent</extent>
|
182
187
|
<reformattingQuality>access</reformattingQuality>
|
183
188
|
<digitalOrigin>born digital</digitalOrigin>
|
184
|
-
<note displayLabel="ph3:displayLabel1">ph3:note1</note>
|
189
|
+
<note displayLabel="ph3:displayLabel1" lang="ph3:language1" script="ph3:script1" transliteration="ph3:transliteration1">ph3:note1</note>
|
185
190
|
</physicalDescription>
|
186
|
-
<abstract displayLabel="ab:displayLabel">ab:abstract</abstract>
|
187
|
-
<tableOfContents displayLabel="tc:displayLabel">tc:tableOfContents</tableOfContents>
|
188
|
-
<note type="no1:type" displayLabel="no1:displayLabel">no1:note</note>
|
189
|
-
<note type="no2:type" displayLabel="no2:displayLabel">no2:note</note>
|
190
|
-
<note type="no3:type" displayLabel="no3:displayLabel">no3:note</note>
|
191
|
-
<note type="no4:type" displayLabel="no4:displayLabel">no4:note</note>
|
192
|
-
<note type="no5:type" displayLabel="no5:displayLabel">no5:note</note>
|
193
|
-
<subject authority="sn1:authority" authorityURI="sn1:authorityURI" valueURI="sn1:valueURI">
|
194
|
-
<name type="personal" authority="sn1:p1:nm:authority" authorityURI="sn1:p1:nm:authorityURI" valueURI="sn1:p1:nm:valueURI">
|
191
|
+
<abstract displayLabel="ab:displayLabel" lang="ab:language" script="ab:script" transliteration="ab:transliteration">ab:abstract</abstract>
|
192
|
+
<tableOfContents displayLabel="tc:displayLabel" lang="tc:language" script="tc:script" transliteration="tc:transliteration">tc:tableOfContents</tableOfContents>
|
193
|
+
<note type="no1:type" displayLabel="no1:displayLabel" lang="no1:language" script="no1:script" transliteration="no1:transliteration" altRepGroup="no1:altRepGroup">no1:note</note>
|
194
|
+
<note type="no2:type" displayLabel="no2:displayLabel" lang="no2:language" script="no2:script" transliteration="no2:transliteration" altRepGroup="no2:altRepGroup">no2:note</note>
|
195
|
+
<note type="no3:type" displayLabel="no3:displayLabel" lang="no3:language" script="no3:script" transliteration="no3:transliteration" altRepGroup="no3:altRepGroup">no3:note</note>
|
196
|
+
<note type="no4:type" displayLabel="no4:displayLabel" lang="no4:language" script="no4:script" transliteration="no4:transliteration" altRepGroup="no4:altRepGroup">no4:note</note>
|
197
|
+
<note type="no5:type" displayLabel="no5:displayLabel" lang="no5:language" script="no5:script" transliteration="no5:transliteration" altRepGroup="no5:altRepGroup">no5:note</note>
|
198
|
+
<subject authority="sn1:authority" authorityURI="sn1:authorityURI" valueURI="sn1:valueURI" altRepGroup="sn1:altRepGroup">
|
199
|
+
<name type="personal" authority="sn1:p1:nm:authority" authorityURI="sn1:p1:nm:authorityURI" valueURI="sn1:p1:nm:valueURI" lang="sn1:p1:nm:language" script="sn1:p1:nm:script" transliteration="sn1:p1:nm:transliteration">
|
195
200
|
<namePart>sn1:p1:name</namePart>
|
196
201
|
</name>
|
197
|
-
<titleInfo type="abbreviated" authority="sn1:p1:ti:authority" authorityURI="sn1:p1:ti:authorityURI" valueURI="sn1:p1:ti:valueURI">
|
202
|
+
<titleInfo type="abbreviated" authority="sn1:p1:ti:authority" authorityURI="sn1:p1:ti:authorityURI" valueURI="sn1:p1:ti:valueURI" lang="sn1:p1:ti:language" script="sn1:p1:ti:script" transliteration="sn1:p1:ti:transliteration">
|
198
203
|
<title>sn1:p1:title</title>
|
199
204
|
<subTitle>sn1:p1:subTitle</subTitle>
|
200
205
|
<partNumber>sn1:p1:partNumber</partNumber>
|
@@ -205,11 +210,11 @@
|
|
205
210
|
<topic authority="sn1:p4:authority" authorityURI="sn1:p4:authorityURI" valueURI="sn1:p4:valueURI">sn1:p4:value</topic>
|
206
211
|
<topic authority="sn1:p5:authority" authorityURI="sn1:p5:authorityURI" valueURI="sn1:p5:valueURI">sn1:p5:value</topic>
|
207
212
|
</subject>
|
208
|
-
<subject authority="sn2:authority" authorityURI="sn2:authorityURI" valueURI="sn2:valueURI">
|
209
|
-
<name type="personal" authority="sn2:p1:nm:authority" authorityURI="sn2:p1:nm:authorityURI" valueURI="sn2:p1:nm:valueURI">
|
213
|
+
<subject authority="sn2:authority" authorityURI="sn2:authorityURI" valueURI="sn2:valueURI" altRepGroup="sn2:altRepGroup">
|
214
|
+
<name type="personal" authority="sn2:p1:nm:authority" authorityURI="sn2:p1:nm:authorityURI" valueURI="sn2:p1:nm:valueURI" lang="sn2:p1:nm:language" script="sn2:p1:nm:script" transliteration="sn2:p1:nm:transliteration">
|
210
215
|
<namePart>sn2:p1:name</namePart>
|
211
216
|
</name>
|
212
|
-
<titleInfo type="abbreviated" authority="sn2:p1:ti:authority" authorityURI="sn2:p1:ti:authorityURI" valueURI="sn2:p1:ti:valueURI">
|
217
|
+
<titleInfo type="abbreviated" authority="sn2:p1:ti:authority" authorityURI="sn2:p1:ti:authorityURI" valueURI="sn2:p1:ti:valueURI" lang="sn2:p1:ti:language" script="sn2:p1:ti:script" transliteration="sn2:p1:ti:transliteration">
|
213
218
|
<title>sn2:p1:title</title>
|
214
219
|
<subTitle>sn2:p1:subTitle</subTitle>
|
215
220
|
<partNumber>sn2:p1:partNumber</partNumber>
|
@@ -220,11 +225,11 @@
|
|
220
225
|
<topic authority="sn2:p4:authority" authorityURI="sn2:p4:authorityURI" valueURI="sn2:p4:valueURI">sn2:p4:value</topic>
|
221
226
|
<topic authority="sn2:p5:authority" authorityURI="sn2:p5:authorityURI" valueURI="sn2:p5:valueURI">sn2:p5:value</topic>
|
222
227
|
</subject>
|
223
|
-
<subject authority="sn3:authority" authorityURI="sn3:authorityURI" valueURI="sn3:valueURI">
|
224
|
-
<name type="personal" authority="sn3:p1:nm:authority" authorityURI="sn3:p1:nm:authorityURI" valueURI="sn3:p1:nm:valueURI">
|
228
|
+
<subject authority="sn3:authority" authorityURI="sn3:authorityURI" valueURI="sn3:valueURI" altRepGroup="sn3:altRepGroup">
|
229
|
+
<name type="personal" authority="sn3:p1:nm:authority" authorityURI="sn3:p1:nm:authorityURI" valueURI="sn3:p1:nm:valueURI" lang="sn3:p1:nm:language" script="sn3:p1:nm:script" transliteration="sn3:p1:nm:transliteration">
|
225
230
|
<namePart>sn3:p1:name</namePart>
|
226
231
|
</name>
|
227
|
-
<titleInfo type="abbreviated" authority="sn3:p1:ti:authority" authorityURI="sn3:p1:ti:authorityURI" valueURI="sn3:p1:ti:valueURI">
|
232
|
+
<titleInfo type="abbreviated" authority="sn3:p1:ti:authority" authorityURI="sn3:p1:ti:authorityURI" valueURI="sn3:p1:ti:valueURI" lang="sn3:p1:ti:language" script="sn3:p1:ti:script" transliteration="sn3:p1:ti:transliteration">
|
228
233
|
<title>sn3:p1:title</title>
|
229
234
|
<subTitle>sn3:p1:subTitle</subTitle>
|
230
235
|
<partNumber>sn3:p1:partNumber</partNumber>
|
@@ -235,11 +240,11 @@
|
|
235
240
|
<topic authority="sn3:p4:authority" authorityURI="sn3:p4:authorityURI" valueURI="sn3:p4:valueURI">sn3:p4:value</topic>
|
236
241
|
<topic authority="sn3:p5:authority" authorityURI="sn3:p5:authorityURI" valueURI="sn3:p5:valueURI">sn3:p5:value</topic>
|
237
242
|
</subject>
|
238
|
-
<subject authority="sn4:authority" authorityURI="sn4:authorityURI" valueURI="sn4:valueURI">
|
239
|
-
<name type="personal" authority="sn4:p1:nm:authority" authorityURI="sn4:p1:nm:authorityURI" valueURI="sn4:p1:nm:valueURI">
|
243
|
+
<subject authority="sn4:authority" authorityURI="sn4:authorityURI" valueURI="sn4:valueURI" altRepGroup="sn4:altRepGroup">
|
244
|
+
<name type="personal" authority="sn4:p1:nm:authority" authorityURI="sn4:p1:nm:authorityURI" valueURI="sn4:p1:nm:valueURI" lang="sn4:p1:nm:language" script="sn4:p1:nm:script" transliteration="sn4:p1:nm:transliteration">
|
240
245
|
<namePart>sn4:p1:name</namePart>
|
241
246
|
</name>
|
242
|
-
<titleInfo type="abbreviated" authority="sn4:p1:ti:authority" authorityURI="sn4:p1:ti:authorityURI" valueURI="sn4:p1:ti:valueURI">
|
247
|
+
<titleInfo type="abbreviated" authority="sn4:p1:ti:authority" authorityURI="sn4:p1:ti:authorityURI" valueURI="sn4:p1:ti:valueURI" lang="sn4:p1:ti:language" script="sn4:p1:ti:script" transliteration="sn4:p1:ti:transliteration">
|
243
248
|
<title>sn4:p1:title</title>
|
244
249
|
<subTitle>sn4:p1:subTitle</subTitle>
|
245
250
|
<partNumber>sn4:p1:partNumber</partNumber>
|
@@ -250,11 +255,11 @@
|
|
250
255
|
<topic authority="sn4:p4:authority" authorityURI="sn4:p4:authorityURI" valueURI="sn4:p4:valueURI">sn4:p4:value</topic>
|
251
256
|
<topic authority="sn4:p5:authority" authorityURI="sn4:p5:authorityURI" valueURI="sn4:p5:valueURI">sn4:p5:value</topic>
|
252
257
|
</subject>
|
253
|
-
<subject authority="sn5:authority" authorityURI="sn5:authorityURI" valueURI="sn5:valueURI">
|
254
|
-
<name type="personal" authority="sn5:p1:nm:authority" authorityURI="sn5:p1:nm:authorityURI" valueURI="sn5:p1:nm:valueURI">
|
258
|
+
<subject authority="sn5:authority" authorityURI="sn5:authorityURI" valueURI="sn5:valueURI" altRepGroup="sn5:altRepGroup">
|
259
|
+
<name type="personal" authority="sn5:p1:nm:authority" authorityURI="sn5:p1:nm:authorityURI" valueURI="sn5:p1:nm:valueURI" lang="sn5:p1:nm:language" script="sn5:p1:nm:script" transliteration="sn5:p1:nm:transliteration">
|
255
260
|
<namePart>sn5:p1:name</namePart>
|
256
261
|
</name>
|
257
|
-
<titleInfo type="abbreviated" authority="sn5:p1:ti:authority" authorityURI="sn5:p1:ti:authorityURI" valueURI="sn5:p1:ti:valueURI">
|
262
|
+
<titleInfo type="abbreviated" authority="sn5:p1:ti:authority" authorityURI="sn5:p1:ti:authorityURI" valueURI="sn5:p1:ti:valueURI" lang="sn5:p1:ti:language" script="sn5:p1:ti:script" transliteration="sn5:p1:ti:transliteration">
|
258
263
|
<title>sn5:p1:title</title>
|
259
264
|
<subTitle>sn5:p1:subTitle</subTitle>
|
260
265
|
<partNumber>sn5:p1:partNumber</partNumber>
|
@@ -265,35 +270,35 @@
|
|
265
270
|
<topic authority="sn5:p4:authority" authorityURI="sn5:p4:authorityURI" valueURI="sn5:p4:valueURI">sn5:p4:value</topic>
|
266
271
|
<topic authority="sn5:p5:authority" authorityURI="sn5:p5:authorityURI" valueURI="sn5:p5:valueURI">sn5:p5:value</topic>
|
267
272
|
</subject>
|
268
|
-
<subject authority="su1:authority" authorityURI="su1:authorityURI" valueURI="su1:valueURI">
|
273
|
+
<subject authority="su1:authority" authorityURI="su1:authorityURI" valueURI="su1:valueURI" lang="su1:language" script="su1:script" transliteration="su1:transliteration" altRepGroup="su1:altRepGroup">
|
269
274
|
<topic authority="su1:p1:authority" authorityURI="su1:p1:authorityURI" valueURI="su1:p1:valueURI">su1:p1:value</topic>
|
270
275
|
<topic authority="su1:p2:authority" authorityURI="su1:p2:authorityURI" valueURI="su1:p2:valueURI">su1:p2:value</topic>
|
271
276
|
<topic authority="su1:p3:authority" authorityURI="su1:p3:authorityURI" valueURI="su1:p3:valueURI">su1:p3:value</topic>
|
272
277
|
<topic authority="su1:p4:authority" authorityURI="su1:p4:authorityURI" valueURI="su1:p4:valueURI">su1:p4:value</topic>
|
273
278
|
<topic authority="su1:p5:authority" authorityURI="su1:p5:authorityURI" valueURI="su1:p5:valueURI">su1:p5:value</topic>
|
274
279
|
</subject>
|
275
|
-
<subject authority="su2:authority" authorityURI="su2:authorityURI" valueURI="su2:valueURI">
|
280
|
+
<subject authority="su2:authority" authorityURI="su2:authorityURI" valueURI="su2:valueURI" lang="su2:language" script="su2:script" transliteration="su2:transliteration" altRepGroup="su2:altRepGroup">
|
276
281
|
<topic authority="su2:p1:authority" authorityURI="su2:p1:authorityURI" valueURI="su2:p1:valueURI">su2:p1:value</topic>
|
277
282
|
<topic authority="su2:p2:authority" authorityURI="su2:p2:authorityURI" valueURI="su2:p2:valueURI">su2:p2:value</topic>
|
278
283
|
<topic authority="su2:p3:authority" authorityURI="su2:p3:authorityURI" valueURI="su2:p3:valueURI">su2:p3:value</topic>
|
279
284
|
<topic authority="su2:p4:authority" authorityURI="su2:p4:authorityURI" valueURI="su2:p4:valueURI">su2:p4:value</topic>
|
280
285
|
<topic authority="su2:p5:authority" authorityURI="su2:p5:authorityURI" valueURI="su2:p5:valueURI">su2:p5:value</topic>
|
281
286
|
</subject>
|
282
|
-
<subject authority="su3:authority" authorityURI="su3:authorityURI" valueURI="su3:valueURI">
|
287
|
+
<subject authority="su3:authority" authorityURI="su3:authorityURI" valueURI="su3:valueURI" lang="su3:language" script="su3:script" transliteration="su3:transliteration" altRepGroup="su3:altRepGroup">
|
283
288
|
<topic authority="su3:p1:authority" authorityURI="su3:p1:authorityURI" valueURI="su3:p1:valueURI">su3:p1:value</topic>
|
284
289
|
<topic authority="su3:p2:authority" authorityURI="su3:p2:authorityURI" valueURI="su3:p2:valueURI">su3:p2:value</topic>
|
285
290
|
<topic authority="su3:p3:authority" authorityURI="su3:p3:authorityURI" valueURI="su3:p3:valueURI">su3:p3:value</topic>
|
286
291
|
<topic authority="su3:p4:authority" authorityURI="su3:p4:authorityURI" valueURI="su3:p4:valueURI">su3:p4:value</topic>
|
287
292
|
<topic authority="su3:p5:authority" authorityURI="su3:p5:authorityURI" valueURI="su3:p5:valueURI">su3:p5:value</topic>
|
288
293
|
</subject>
|
289
|
-
<subject authority="su4:authority" authorityURI="su4:authorityURI" valueURI="su4:valueURI">
|
294
|
+
<subject authority="su4:authority" authorityURI="su4:authorityURI" valueURI="su4:valueURI" lang="su4:language" script="su4:script" transliteration="su4:transliteration" altRepGroup="su4:altRepGroup">
|
290
295
|
<topic authority="su4:p1:authority" authorityURI="su4:p1:authorityURI" valueURI="su4:p1:valueURI">su4:p1:value</topic>
|
291
296
|
<topic authority="su4:p2:authority" authorityURI="su4:p2:authorityURI" valueURI="su4:p2:valueURI">su4:p2:value</topic>
|
292
297
|
<topic authority="su4:p3:authority" authorityURI="su4:p3:authorityURI" valueURI="su4:p3:valueURI">su4:p3:value</topic>
|
293
298
|
<topic authority="su4:p4:authority" authorityURI="su4:p4:authorityURI" valueURI="su4:p4:valueURI">su4:p4:value</topic>
|
294
299
|
<topic authority="su4:p5:authority" authorityURI="su4:p5:authorityURI" valueURI="su4:p5:valueURI">su4:p5:value</topic>
|
295
300
|
</subject>
|
296
|
-
<subject authority="su5:authority" authorityURI="su5:authorityURI" valueURI="su5:valueURI">
|
301
|
+
<subject authority="su5:authority" authorityURI="su5:authorityURI" valueURI="su5:valueURI" lang="su5:language" script="su5:script" transliteration="su5:transliteration" altRepGroup="su5:altRepGroup">
|
297
302
|
<topic authority="su5:p1:authority" authorityURI="su5:p1:authorityURI" valueURI="su5:p1:valueURI">su5:p1:value</topic>
|
298
303
|
<topic authority="su5:p2:authority" authorityURI="su5:p2:authorityURI" valueURI="su5:p2:valueURI">su5:p2:value</topic>
|
299
304
|
<topic authority="su5:p3:authority" authorityURI="su5:p3:authorityURI" valueURI="su5:p3:valueURI">su5:p3:value</topic>
|
@@ -313,7 +318,7 @@
|
|
313
318
|
<identifier type="id4:type" displayLabel="id4:displayLabel">id4:identifier</identifier>
|
314
319
|
<identifier type="id5:type" displayLabel="id5:displayLabel">id5:identifier</identifier>
|
315
320
|
<location>
|
316
|
-
<physicalLocation type="repository" authority="lo:authority" valueURI="lo:valueURI">lo:repository</physicalLocation>
|
321
|
+
<physicalLocation type="repository" authority="lo:authority" valueURI="lo:valueURI" lang="lo:language" script="lo:script" transliteration="lo:transliteration">lo:repository</physicalLocation>
|
317
322
|
<physicalLocation type="discovery">lo:physicalLocation</physicalLocation>
|
318
323
|
<shelfLocator>lo:callNumber</shelfLocator>
|
319
324
|
<url usage="primary display">lo:purl</url>
|
@@ -418,7 +423,7 @@
|
|
418
423
|
</location>
|
419
424
|
</relatedItem>
|
420
425
|
<extension displayLabel="geo">
|
421
|
-
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
426
|
+
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:gmd="http://www.isotc211.org/2005/gmd">
|
422
427
|
<rdf:Description rdf:about="ext:purl">
|
423
428
|
<dc:format>ext:dc:format</dc:format>
|
424
429
|
<dc:type>ext:dc:type</dc:type>
|
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="2018-04-18 02:31:53PM" sourceFile="point_coord_test.xlsx">
|
3
|
+
<xmlDoc id="descMetadata" objectId="aa22aaa2222">
|
4
|
+
<mods xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.loc.gov/mods/v3" version="3.5" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd">
|
5
|
+
<titleInfo>
|
6
|
+
<title>data with long lat</title>
|
7
|
+
</titleInfo>
|
8
|
+
<extension displayLabel="geo">
|
9
|
+
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:gmd="http://www.isotc211.org/2005/gmd">
|
10
|
+
<rdf:Description>
|
11
|
+
<dc:format>image/jpeg</dc:format>
|
12
|
+
<dc:type>Image</dc:type>
|
13
|
+
<gmd:centerPoint>
|
14
|
+
<gml:Point gml:id="ID">
|
15
|
+
<gml:pos>111 222</gml:pos>
|
16
|
+
</gml:Point>
|
17
|
+
</gmd:centerPoint>
|
18
|
+
</rdf:Description>
|
19
|
+
</rdf:RDF>
|
20
|
+
</extension>
|
21
|
+
</mods>
|
22
|
+
</xmlDoc>
|
23
|
+
</xmlDocs>
|
@@ -21,7 +21,8 @@ RSpec.describe Modsulator do
|
|
21
21
|
'SC1049_metadata.xlsx' => 'SC1049_metadata.xml',
|
22
22
|
'edition_physLoc_intmediatype.xlsx' => 'edition_physLoc_intmediatype.xml',
|
23
23
|
'filled_template_20160711.xlsx' => 'filled_template_20160711.xml',
|
24
|
-
'location_url.xlsx' => 'location_url.xml'
|
24
|
+
'location_url.xlsx' => 'location_url.xml',
|
25
|
+
'point_coord_test.xlsx' => 'point_coord_test.xml'
|
25
26
|
}.each do |testfile, results_file|
|
26
27
|
it "converts #{testfile} correctly to valid XML" do
|
27
28
|
generated_xml_string = Modsulator.new(File.join(FIXTURES_DIR, testfile), testfile).convert_rows()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: modsulator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tommy Ingulfsen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: stanford-mods-normalizer
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.1'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.1'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rake
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,10 +181,8 @@ files:
|
|
167
181
|
- lib/modsulator/modsulator_sheet.rb
|
168
182
|
- lib/modsulator/modsulator_template.xlsx
|
169
183
|
- lib/modsulator/modsulator_template.xml
|
170
|
-
- lib/modsulator/normalizer.rb
|
171
184
|
- lib/modsulator/validator.rb
|
172
185
|
- spec/features/modsulator_sheet_unit_spec.rb
|
173
|
-
- spec/features/normalizer_unit_spec.rb
|
174
186
|
- spec/features/process_template_spec.rb
|
175
187
|
- spec/features/validator_unit_spec.rb
|
176
188
|
- spec/fixtures/Fitch_Chavez.xlsx
|
@@ -202,6 +214,8 @@ files:
|
|
202
214
|
- spec/fixtures/location_url.xml
|
203
215
|
- spec/fixtures/manifest_v0174.csv
|
204
216
|
- spec/fixtures/manifest_v0174.xml
|
217
|
+
- spec/fixtures/point_coord_test.xlsx
|
218
|
+
- spec/fixtures/point_coord_test.xml
|
205
219
|
- spec/fixtures/roman_coins_mods.xlsx
|
206
220
|
- spec/fixtures/roman_coins_mods.xml
|
207
221
|
- spec/fixtures/test_002.csv
|
@@ -229,13 +243,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
229
243
|
version: '0'
|
230
244
|
requirements: []
|
231
245
|
rubyforge_project:
|
232
|
-
rubygems_version: 2.
|
246
|
+
rubygems_version: 2.7.3
|
233
247
|
signing_key:
|
234
248
|
specification_version: 4
|
235
249
|
summary: Produces (Stanford) MODS XML from spreadsheets.
|
236
250
|
test_files:
|
237
251
|
- spec/features/modsulator_sheet_unit_spec.rb
|
238
|
-
- spec/features/normalizer_unit_spec.rb
|
239
252
|
- spec/features/process_template_spec.rb
|
240
253
|
- spec/features/validator_unit_spec.rb
|
241
254
|
- spec/fixtures/ars0056_manifest.csv
|
@@ -263,6 +276,8 @@ test_files:
|
|
263
276
|
- spec/fixtures/manifest_v0174.xml
|
264
277
|
- spec/fixtures/Matter_manifest.csv
|
265
278
|
- spec/fixtures/Matter_manifest.xml
|
279
|
+
- spec/fixtures/point_coord_test.xlsx
|
280
|
+
- spec/fixtures/point_coord_test.xml
|
266
281
|
- spec/fixtures/PosadaSpreadsheet.xlsx
|
267
282
|
- spec/fixtures/PosadaSpreadsheet.xml
|
268
283
|
- spec/fixtures/roman_coins_mods.xlsx
|
@@ -274,4 +289,3 @@ test_files:
|
|
274
289
|
- spec/integration_tests/integration_spec.rb
|
275
290
|
- spec/lib/modsulator_spec.rb
|
276
291
|
- spec/spec_helper.rb
|
277
|
-
has_rdoc:
|
@@ -1,225 +0,0 @@
|
|
1
|
-
# File "normalizer.rb" - defines a class for normalizing MODS XML according to the Stanford guidelines.
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
# This class provides methods to normalize MODS XML according to the Stanford guidelines.
|
6
|
-
# @see https://consul.stanford.edu/display/chimera/MODS+validation+and+normalization Requirements (Stanford Consul page - requires login)
|
7
|
-
class Normalizer
|
8
|
-
# Linefeed character entity reference
|
9
|
-
LINEFEED = ' '
|
10
|
-
|
11
|
-
# Select all single <dateCreated> and <dateIssued> fields
|
12
|
-
LONE_DATE_XPATH = '//mods:originInfo/mods:dateCreated[1][not(following-sibling::*[1][self::mods:dateCreated])] | //mods:originInfo/mods:dateIssued[1][not(following-sibling::*[1][self::mods:dateIssued])]'
|
13
|
-
|
14
|
-
# Select all <dateCreated> and <dateIssued> fields
|
15
|
-
DATE_CREATED_ISSUED_XPATH = '//mods:dateCreated | //mods:dateIssued'
|
16
|
-
|
17
|
-
# The official MODS namespace, courtesy of the Library of Congress
|
18
|
-
MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
|
19
|
-
|
20
|
-
# Selects <abstract>, <tableOfContents> and <note> when no namespace is present
|
21
|
-
LINEFEED_XPATH = '//abstract | //tableOfContents | //note'
|
22
|
-
|
23
|
-
# Selects <abstract>, <tableOfContents> and <note> when a namespace is present
|
24
|
-
LINEFEED_XPATH_NAMESPACED = '//ns:abstract | //ns:tableOfContents | //ns:note'
|
25
|
-
|
26
|
-
|
27
|
-
# Checks if a node has attributes that we make exeptions for. There are two such exceptions.
|
28
|
-
#
|
29
|
-
# * A "collection" attribute with the value "yes" <em>on a typeOfResource tag</em>.
|
30
|
-
# * A "manuscript" attribute with the value "yes" <em>on a typeOfResource tag</em>.
|
31
|
-
#
|
32
|
-
# Nodes that fall under any of these exceptions should not be deleted, even if they have no content.
|
33
|
-
#
|
34
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
35
|
-
# @return [Boolean] true if the node contains any of the exceptional attributes, false otherwise.
|
36
|
-
def exceptional?(node)
|
37
|
-
return false unless node != nil
|
38
|
-
|
39
|
-
tag = node.name
|
40
|
-
attributes = node.attributes
|
41
|
-
|
42
|
-
return false if(attributes.empty?)
|
43
|
-
|
44
|
-
attributes.each do |key, value|
|
45
|
-
if(tag == 'typeOfResource') # Note that according to the MODS schema, any other value than 'yes' for these attributes is invalid
|
46
|
-
if((key == 'collection' && value.to_s.downcase == 'yes') ||
|
47
|
-
(key == 'manuscript' && value.to_s.downcase == 'yes'))
|
48
|
-
return true
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return false
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
# Recursive helper method for {Normalizer#clean_linefeeds} to do string substitution.
|
57
|
-
#
|
58
|
-
# @param [Nokogiri::XML::Element] node An XML node
|
59
|
-
# @return [String] A string composed of the entire contents of the given node, with substitutions made as described for {#clean_linefeeds}.
|
60
|
-
def substitute_linefeeds(node)
|
61
|
-
new_text = String.new
|
62
|
-
|
63
|
-
# If we substitute in ' ' by itself, Nokogiri interprets that and then prints '&#10;' when printing the document later. This
|
64
|
-
# is an ugly way to add linefeed characters in a way that we at least get well-formatted output in the end.
|
65
|
-
if(node.text?)
|
66
|
-
new_text = node.content.gsub(/\r\n/, Nokogiri::HTML(LINEFEED).text).gsub(/\n/, Nokogiri::HTML(LINEFEED).text).gsub(/\r/, Nokogiri::HTML(LINEFEED).text).gsub('\\n', Nokogiri::HTML(LINEFEED).text)
|
67
|
-
else
|
68
|
-
if(node.node_name == 'br')
|
69
|
-
new_text += Nokogiri::HTML(LINEFEED).text
|
70
|
-
elsif(node.node_name == 'p')
|
71
|
-
new_text += Nokogiri::HTML(LINEFEED).text + Nokogiri::HTML(LINEFEED).text
|
72
|
-
end
|
73
|
-
|
74
|
-
node.children.each do |c|
|
75
|
-
new_text += substitute_linefeeds(c)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
return new_text
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
# Given the root of an XML document, replaces linefeed characters inside <tableOfContents>, <abstract> and <note> XML node by
|
83
|
-
# \n, \r, <br> and <br/> are all replaced by a single
|
84
|
-
# <p> is replaced by two
|
85
|
-
# </p> is removed
|
86
|
-
# \r\n is replaced by
|
87
|
-
# Any tags not listed above are removed. MODS 3.5 does not allow for anything other than text inside these three nodes.
|
88
|
-
#
|
89
|
-
# @param [Nokogiri::XML::NodeSet] node_list All <tableOfContents>, <abstract> and <node> elements.
|
90
|
-
# @return [Void] This method doesn't return anything, but introduces UTF-8 linefeed characters in place, as described above.
|
91
|
-
def clean_linefeeds(node_list)
|
92
|
-
node_list.each do |current_node|
|
93
|
-
new_text = substitute_linefeeds(current_node)
|
94
|
-
current_node.children.remove
|
95
|
-
current_node.content = new_text
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
|
100
|
-
# Cleans up the text of a node:
|
101
|
-
#
|
102
|
-
# * Removes extra whitespace at the beginning and end.
|
103
|
-
# * Removes any consecutive whitespace within the string.
|
104
|
-
#
|
105
|
-
# @param [String] s The text of an XML node.
|
106
|
-
# @return [String] The cleaned string, as described. Returns nil if the input is nil, or if the input is an empty string.
|
107
|
-
def clean_text(s)
|
108
|
-
return nil unless s != nil && s != ''
|
109
|
-
return s.gsub!(/\s+/, ' ').strip!
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
# Removes empty attributes from a given node.
|
114
|
-
#
|
115
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
116
|
-
# @return [Void] This method doesn't return anything, but modifies the XML tree starting at the given node.
|
117
|
-
def remove_empty_attributes(node)
|
118
|
-
children = node.children
|
119
|
-
attributes = node.attributes
|
120
|
-
|
121
|
-
attributes.each do |key, value|
|
122
|
-
node.remove_attribute(key) if(value.to_s.strip.empty?)
|
123
|
-
end
|
124
|
-
|
125
|
-
children.each do |c|
|
126
|
-
remove_empty_attributes(c)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
# Removes empty nodes from an XML tree. See {#exceptional?} for nodes that are kept even if empty.
|
132
|
-
#
|
133
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
134
|
-
# @return [Void] This method doesn't return anything, but modifies the XML tree starting at the given node.
|
135
|
-
def remove_empty_nodes(node)
|
136
|
-
children = node.children
|
137
|
-
|
138
|
-
if(node.text?)
|
139
|
-
if(node.to_s.strip.empty?)
|
140
|
-
node.remove
|
141
|
-
else
|
142
|
-
return
|
143
|
-
end
|
144
|
-
elsif(children.length > 0)
|
145
|
-
children.each do |c|
|
146
|
-
remove_empty_nodes(c)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
if(!exceptional?(node) && (node.children.length == 0))
|
151
|
-
node.remove
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
|
-
|
156
|
-
# Removes leading and trailing spaces from a node.
|
157
|
-
#
|
158
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
159
|
-
# @return [Void] This method doesn't return anything, but modifies the entire XML tree starting at the
|
160
|
-
# the given node, removing leading and trailing spaces from all text. If the input is nil,
|
161
|
-
# an exception will be raised.
|
162
|
-
def trim_text(node)
|
163
|
-
children = node.children
|
164
|
-
|
165
|
-
if(node.text?)
|
166
|
-
node.parent.content = node.text.strip
|
167
|
-
else
|
168
|
-
children.each do |c|
|
169
|
-
trim_text(c)
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
|
175
|
-
# Sometimes there are spurious decimal digits within the date fields. This method removes any trailing decimal points within
|
176
|
-
# <dateCreated> and <dateIssued>.
|
177
|
-
#
|
178
|
-
# @param [Nokogiri::XML::NodeSet] nodes A set of all affected <dateCreated> and <dateIssued> elements.
|
179
|
-
# @return [Void] The given document is modified in place.
|
180
|
-
def clean_date_values(nodes)
|
181
|
-
nodes.each do |current_node|
|
182
|
-
current_node.content = current_node.content.sub(/(.*)\.\d+$/, '\1')
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
# Normalizes the given MODS XML document according to the Stanford guidelines.
|
187
|
-
#
|
188
|
-
# @param [Nokogiri::XML::Element] root The root of a MODS XML document.
|
189
|
-
# @return [Void] The given document is modified in place.
|
190
|
-
def normalize_mods_document(root)
|
191
|
-
node_list = []
|
192
|
-
if(root.namespace.nil?)
|
193
|
-
node_list = root.xpath(LINEFEED_XPATH)
|
194
|
-
else
|
195
|
-
node_list = root.xpath(LINEFEED_XPATH_NAMESPACED, 'ns' => root.namespace.href)
|
196
|
-
end
|
197
|
-
clean_linefeeds(node_list) # Do this before deleting <br> and <p> with remove_empty_nodes()
|
198
|
-
|
199
|
-
remove_empty_attributes(root)
|
200
|
-
remove_empty_nodes(root)
|
201
|
-
trim_text(root)
|
202
|
-
clean_date_values(root.xpath(DATE_CREATED_ISSUED_XPATH, 'mods' => MODS_NAMESPACE))
|
203
|
-
end
|
204
|
-
|
205
|
-
# Normalizes the given MODS XML document according to the Stanford guidelines.
|
206
|
-
#
|
207
|
-
# @deprecated Use normalize_mods_document instead.
|
208
|
-
# @param [Nokogiri::XML::Element] root The root of a MODS XML document.
|
209
|
-
# @return [Void] The given document is modified in place.
|
210
|
-
def normalize_document(root)
|
211
|
-
normalize_mods_document(root)
|
212
|
-
end
|
213
|
-
|
214
|
-
|
215
|
-
# Normalizes the given XML document string according to the Stanford guidelines.
|
216
|
-
#
|
217
|
-
# @param [String] xml_string An XML document
|
218
|
-
# @return [String] The XML string, with normalizations applied.
|
219
|
-
def normalize_xml_string(xml_string)
|
220
|
-
doc = Nokogiri::XML(xml_string)
|
221
|
-
normalize_document(doc.root)
|
222
|
-
doc.to_s
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|