modsulator 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +0 -10
- data/lib/modsulator.rb +4 -4
- data/lib/modsulator/modsulator_template.xlsx +0 -0
- data/lib/modsulator/modsulator_template.xml +194 -181
- data/spec/fixtures/filled_template_20160711.xml +58 -53
- data/spec/fixtures/point_coord_test.xlsx +0 -0
- data/spec/fixtures/point_coord_test.xml +23 -0
- data/spec/integration_tests/integration_spec.rb +2 -1
- metadata +21 -7
- data/lib/modsulator/normalizer.rb +0 -225
- data/spec/features/normalizer_unit_spec.rb +0 -150
@@ -1,29 +1,29 @@
|
|
1
1
|
<?xml version="1.0"?>
|
2
|
-
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="
|
2
|
+
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="2018-04-18 02:42:25PM" sourceFile="filled_template_20160711.xlsx">
|
3
3
|
<xmlDoc id="descMetadata" objectId="123">
|
4
4
|
<mods xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.loc.gov/mods/v3" version="3.5" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd">
|
5
|
-
<titleInfo authority="ti1:authority" authorityURI="ti1:authorityURI" valueURI="ti1:valueURI" nameTitleGroup="ti1:nameTitleGroup">
|
5
|
+
<titleInfo authority="ti1:authority" authorityURI="ti1:authorityURI" valueURI="ti1:valueURI" nameTitleGroup="ti1:nameTitleGroup" lang="ti1:language" script="ti1:script" transliteration="ti1:transliteration" altRepGroup="ti1:altRepGroup">
|
6
6
|
<nonSort>ti1:nonSort</nonSort>
|
7
7
|
<title>ti1:title</title>
|
8
8
|
<subTitle>ti1:subTitle</subTitle>
|
9
9
|
<partNumber>ti1:partNumber</partNumber>
|
10
10
|
<partName>ti1:partName</partName>
|
11
11
|
</titleInfo>
|
12
|
-
<titleInfo type="abbreviated" displayLabel="ti2:displayLabel" authority="ti2:authority" authorityURI="ti2:authorityURI" valueURI="ti2:valueURI" nameTitleGroup="ti2:nameTitleGroup">
|
12
|
+
<titleInfo type="abbreviated" displayLabel="ti2:displayLabel" authority="ti2:authority" authorityURI="ti2:authorityURI" valueURI="ti2:valueURI" nameTitleGroup="ti2:nameTitleGroup" lang="ti2:language" script="ti2:script" transliteration="ti2:transliteration" altRepGroup="ti2:altRepGroup">
|
13
13
|
<nonSort>ti2:nonSort</nonSort>
|
14
14
|
<title>ti2:title</title>
|
15
15
|
<subTitle>ti2:subTitle</subTitle>
|
16
16
|
<partNumber>ti2:partNumber</partNumber>
|
17
17
|
<partName>ti2:partName</partName>
|
18
18
|
</titleInfo>
|
19
|
-
<titleInfo type="abbreviated" displayLabel="ti3:displayLabel" authority="ti3:authority" authorityURI="ti3:authorityURI" valueURI="ti3:valueURI" nameTitleGroup="ti3:nameTitleGroup">
|
19
|
+
<titleInfo type="abbreviated" displayLabel="ti3:displayLabel" authority="ti3:authority" authorityURI="ti3:authorityURI" valueURI="ti3:valueURI" nameTitleGroup="ti3:nameTitleGroup" lang="ti3:language" script="ti3:script" transliteration="ti3:transliteration" altRepGroup="ti3:altRepGroup">
|
20
20
|
<nonSort>ti3:nonSort</nonSort>
|
21
21
|
<title>ti3:title</title>
|
22
22
|
<subTitle>ti3:subTitle</subTitle>
|
23
23
|
<partNumber>ti3:partNumber</partNumber>
|
24
24
|
<partName>ti3:partName</partName>
|
25
25
|
</titleInfo>
|
26
|
-
<name type="personal" usage="primary" authority="na1:authority" authorityURI="na1:authorityURI" valueURI="na1:valueURI" nameTitleGroup="na1:nameTitleGroup">
|
26
|
+
<name type="personal" usage="primary" authority="na1:authority" authorityURI="na1:authorityURI" valueURI="na1:valueURI" nameTitleGroup="na1:nameTitleGroup" lang="na1:language" script="na1:script" transliteration="na1:transliteration" altRepGroup="na1:altRepGroup">
|
27
27
|
<namePart>na1:namePart</namePart>
|
28
28
|
<role>
|
29
29
|
<roleTerm type="code" authority="ro1:authority" authorityURI="ro1:authorityURI" valueURI="ro1:valueURI">ro1:roleCode</roleTerm>
|
@@ -38,7 +38,7 @@
|
|
38
38
|
<roleTerm type="text" authority="ro1:authority3" authorityURI="ro1:authorityURI3" valueURI="ro1:valueURI3">ro1:roleText3</roleTerm>
|
39
39
|
</role>
|
40
40
|
</name>
|
41
|
-
<name type="personal" authority="na2:authority" authorityURI="na2:authorityURI" valueURI="na2:valueURI" nameTitleGroup="na2:nameTitleGroup">
|
41
|
+
<name type="personal" authority="na2:authority" authorityURI="na2:authorityURI" valueURI="na2:valueURI" nameTitleGroup="na2:nameTitleGroup" lang="na2:language" script="na2:script" transliteration="na2:transliteration" altRepGroup="na2:altRepGroup">
|
42
42
|
<namePart>na2:namePart</namePart>
|
43
43
|
<role>
|
44
44
|
<roleTerm type="code" authority="ro2:authority" authorityURI="ro2:authorityURI" valueURI="ro2:valueURI">ro2:roleCode</roleTerm>
|
@@ -53,7 +53,7 @@
|
|
53
53
|
<roleTerm type="text" authority="ro2:authority3" authorityURI="ro2:authorityURI3" valueURI="ro2:valueURI3">ro2:roleText3</roleTerm>
|
54
54
|
</role>
|
55
55
|
</name>
|
56
|
-
<name type="personal" authority="na3:authority" authorityURI="na3:authorityURI" valueURI="na3:valueURI" nameTitleGroup="na3:nameTitleGroup">
|
56
|
+
<name type="personal" authority="na3:authority" authorityURI="na3:authorityURI" valueURI="na3:valueURI" nameTitleGroup="na3:nameTitleGroup" lang="na3:language" script="na3:script" transliteration="na3:transliteration" altRepGroup="na3:altRepGroup">
|
57
57
|
<namePart>na3:namePart</namePart>
|
58
58
|
<role>
|
59
59
|
<roleTerm type="code" authority="ro3:authority" authorityURI="ro3:authorityURI" valueURI="ro3:valueURI">ro3:roleCode</roleTerm>
|
@@ -68,7 +68,7 @@
|
|
68
68
|
<roleTerm type="text" authority="ro3:authority3" authorityURI="ro3:authorityURI3" valueURI="ro3:valueURI3">ro3:roleText3</roleTerm>
|
69
69
|
</role>
|
70
70
|
</name>
|
71
|
-
<name type="personal" authority="na4:authority" authorityURI="na4:authorityURI" valueURI="na4:valueURI" nameTitleGroup="na4:nameTitleGroup">
|
71
|
+
<name type="personal" authority="na4:authority" authorityURI="na4:authorityURI" valueURI="na4:valueURI" nameTitleGroup="na4:nameTitleGroup" lang="na4:language" script="na4:script" transliteration="na4:transliteration" altRepGroup="na4:altRepGroup">
|
72
72
|
<namePart>na4:namePart</namePart>
|
73
73
|
<role>
|
74
74
|
<roleTerm type="code" authority="ro4:authority" authorityURI="ro4:authorityURI" valueURI="ro4:valueURI">ro4:roleCode</roleTerm>
|
@@ -83,7 +83,7 @@
|
|
83
83
|
<roleTerm type="text" authority="ro4:authority3" authorityURI="ro4:authorityURI3" valueURI="ro4:valueURI3">ro4:roleText3</roleTerm>
|
84
84
|
</role>
|
85
85
|
</name>
|
86
|
-
<name type="personal" authority="na5:authority" authorityURI="na5:authorityURI" valueURI="na5:valueURI" nameTitleGroup="na5:nameTitleGroup">
|
86
|
+
<name type="personal" authority="na5:authority" authorityURI="na5:authorityURI" valueURI="na5:valueURI" nameTitleGroup="na5:nameTitleGroup" lang="na5:language" script="na5:script" transliteration="na5:transliteration" altRepGroup="na5:altRepGroup">
|
87
87
|
<namePart>na5:namePart</namePart>
|
88
88
|
<role>
|
89
89
|
<roleTerm type="code" authority="ro5:authority" authorityURI="ro5:authorityURI" valueURI="ro5:valueURI">ro5:roleCode</roleTerm>
|
@@ -104,12 +104,12 @@
|
|
104
104
|
<genre type="ge1:type" authority="ge1:authority" authorityURI="ge1:authorityURI" valueURI="ge1:valueURI">ge1:genre</genre>
|
105
105
|
<genre type="ge2:type" authority="ge2:authority" authorityURI="ge2:authorityURI" valueURI="ge2:valueURI">ge2:genre</genre>
|
106
106
|
<genre type="ge3:type" authority="ge3:authority" authorityURI="ge3:authorityURI" valueURI="ge3:valueURI">ge3:genre</genre>
|
107
|
-
<originInfo displayLabel="or:displayLabel" eventType="or:eventType">
|
107
|
+
<originInfo displayLabel="or:displayLabel" eventType="or:eventType" altRepGroup="or:altRepGroup">
|
108
108
|
<place>
|
109
109
|
<placeTerm type="code" authority="marcgac" authorityURI="pl:authorityURI" valueURI="pl:valueURI">pl:placeCode</placeTerm>
|
110
110
|
<placeTerm type="text" authority="marcgac" authorityURI="pl:authorityURI" valueURI="pl:valueURI">pl:placeText</placeTerm>
|
111
111
|
</place>
|
112
|
-
<publisher>or:publisher</publisher>
|
112
|
+
<publisher lang="or:publisherLanguage" script="or:publisherScript" transliteration="or:publisherTransliteration">or:publisher</publisher>
|
113
113
|
<dateCreated keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCreated</dateCreated>
|
114
114
|
<dateCreated encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCreated2</dateCreated>
|
115
115
|
<dateIssued keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:dateIssued</dateIssued>
|
@@ -118,15 +118,17 @@
|
|
118
118
|
<dateCaptured encoding="w3cdtf" qualifier="approximate" point="start">dt:dateCaptured2</dateCaptured>
|
119
119
|
<copyrightDate keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">dt:copyrightDate</copyrightDate>
|
120
120
|
<copyrightDate encoding="w3cdtf" qualifier="approximate" point="start">dt:copyrightDate2</copyrightDate>
|
121
|
+
<dateOther keyDate="yes" type="dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="dt:dateOtherScript" transliteration="dt:dateOtherTransliteration">dt:dateOther</dateOther>
|
122
|
+
<dateOther type="dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="dt:dateOtherScript" transliteration="dt:dateOtherTransliteration">dt:dateOther2</dateOther>
|
121
123
|
<edition>or:edition</edition>
|
122
124
|
<issuance>continuing</issuance>
|
123
125
|
</originInfo>
|
124
|
-
<originInfo displayLabel="or2:displayLabel" eventType="or2:eventType">
|
126
|
+
<originInfo displayLabel="or2:displayLabel" eventType="or2:eventType" altRepGroup="or2:altRepGroup">
|
125
127
|
<place>
|
126
128
|
<placeTerm type="code" authority="marcgac" authorityURI="or2:pl:authorityURI" valueURI="or2:pl:valueURI">or2:pl:placeCode</placeTerm>
|
127
129
|
<placeTerm type="text" authority="marcgac" authorityURI="or2:pl:authorityURI" valueURI="or2:pl:valueURI">or2:pl:placeText</placeTerm>
|
128
130
|
</place>
|
129
|
-
<publisher>or2:publisher</publisher>
|
131
|
+
<publisher lang="or2:publisherLanguage" script="or2:publisherScript" transliteration="or2:publisherTransliteration">or2:publisher</publisher>
|
130
132
|
<dateCreated keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCreated</dateCreated>
|
131
133
|
<dateCreated encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCreated2</dateCreated>
|
132
134
|
<dateIssued keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateIssued</dateIssued>
|
@@ -135,6 +137,9 @@
|
|
135
137
|
<dateCaptured encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:dateCaptured2</dateCaptured>
|
136
138
|
<copyrightDate keyDate="yes" encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:copyrightDate</copyrightDate>
|
137
139
|
<copyrightDate encoding="w3cdtf" qualifier="approximate" point="start">or2:dt:copyrightDate2</copyrightDate>
|
140
|
+
<dateOther keyDate="yes" type="or2:dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="or2:dt:dateOtherScript" transliteration="or2:dt:dateOtherTransliteration">or2:dt:dateOther</dateOther>
|
141
|
+
<dateOther type="or2:dt:dateOtherType" encoding="w3cdtf" qualifier="approximate" point="start" script="or2:dt:dateOtherScript" transliteration="or2:dt:dateOtherTransliteration">or2:dt:dateOther2</dateOther>
|
142
|
+
<edition>or2:edition</edition>
|
138
143
|
<issuance>continuing</issuance>
|
139
144
|
</originInfo>
|
140
145
|
<language>
|
@@ -159,42 +164,42 @@
|
|
159
164
|
<internetMediaType>ph1:internetMediaType3</internetMediaType>
|
160
165
|
<internetMediaType>ph1:internetMediaType4</internetMediaType>
|
161
166
|
<internetMediaType>ph1:internetMediaType5</internetMediaType>
|
162
|
-
<note displayLabel="ph1:displayLabel1">ph1:note1</note>
|
163
|
-
<note displayLabel="ph1:displayLabel2">ph1:note2</note>
|
164
|
-
<note displayLabel="ph1:displayLabel3">ph1:note3</note>
|
165
|
-
<note displayLabel="ph1:displayLabel4">ph1:note4</note>
|
166
|
-
<note displayLabel="ph1:displayLabel5">ph1:note5</note>
|
167
|
-
<note displayLabel="ph1:displayLabel6">ph1:note6</note>
|
168
|
-
<note displayLabel="ph1:displayLabel7">ph1:note7</note>
|
169
|
-
<note displayLabel="ph1:displayLabel8">ph1:note8</note>
|
170
|
-
<note displayLabel="ph1:displayLabel9">ph1:note9</note>
|
167
|
+
<note displayLabel="ph1:displayLabel1" lang="ph1:language1" script="ph1:script1" transliteration="ph1:transliteration1">ph1:note1</note>
|
168
|
+
<note displayLabel="ph1:displayLabel2" lang="ph1:language2" script="ph1:script2" transliteration="ph1:transliteration2">ph1:note2</note>
|
169
|
+
<note displayLabel="ph1:displayLabel3" lang="ph1:language3" script="ph1:script3" transliteration="ph1:transliteration3">ph1:note3</note>
|
170
|
+
<note displayLabel="ph1:displayLabel4" lang="ph1:language4" script="ph1:script4" transliteration="ph1:transliteration4">ph1:note4</note>
|
171
|
+
<note displayLabel="ph1:displayLabel5" lang="ph1:language5" script="ph1:script5" transliteration="ph1:transliteration5">ph1:note5</note>
|
172
|
+
<note displayLabel="ph1:displayLabel6" lang="ph1:language6" script="ph1:script6" transliteration="ph1:transliteration6">ph1:note6</note>
|
173
|
+
<note displayLabel="ph1:displayLabel7" lang="ph1:language7" script="ph1:script7" transliteration="ph1:transliteration7">ph1:note7</note>
|
174
|
+
<note displayLabel="ph1:displayLabel8" lang="ph1:language8" script="ph1:script8" transliteration="ph1:transliteration8">ph1:note8</note>
|
175
|
+
<note displayLabel="ph1:displayLabel9" lang="ph1:language9" script="ph1:script9" transliteration="ph1:transliteration9">ph1:note9</note>
|
171
176
|
</physicalDescription>
|
172
177
|
<physicalDescription>
|
173
178
|
<form authority="ph2:formAuthority" authorityURI="ph2:formAuthorityURI" valueURI="ph2:formValueURI">ph2:form</form>
|
174
179
|
<extent>ph2:extent</extent>
|
175
180
|
<reformattingQuality>access</reformattingQuality>
|
176
181
|
<digitalOrigin>born digital</digitalOrigin>
|
177
|
-
<note displayLabel="ph2:displayLabel1">ph2:note1</note>
|
182
|
+
<note displayLabel="ph2:displayLabel1" lang="ph2:language1" script="ph2:script1" transliteration="ph2:transliteration1">ph2:note1</note>
|
178
183
|
</physicalDescription>
|
179
184
|
<physicalDescription>
|
180
185
|
<form authority="ph3:formAuthority" authorityURI="ph3:formAuthorityURI" valueURI="ph3:formValueURI">ph3:form</form>
|
181
186
|
<extent>ph3:extent</extent>
|
182
187
|
<reformattingQuality>access</reformattingQuality>
|
183
188
|
<digitalOrigin>born digital</digitalOrigin>
|
184
|
-
<note displayLabel="ph3:displayLabel1">ph3:note1</note>
|
189
|
+
<note displayLabel="ph3:displayLabel1" lang="ph3:language1" script="ph3:script1" transliteration="ph3:transliteration1">ph3:note1</note>
|
185
190
|
</physicalDescription>
|
186
|
-
<abstract displayLabel="ab:displayLabel">ab:abstract</abstract>
|
187
|
-
<tableOfContents displayLabel="tc:displayLabel">tc:tableOfContents</tableOfContents>
|
188
|
-
<note type="no1:type" displayLabel="no1:displayLabel">no1:note</note>
|
189
|
-
<note type="no2:type" displayLabel="no2:displayLabel">no2:note</note>
|
190
|
-
<note type="no3:type" displayLabel="no3:displayLabel">no3:note</note>
|
191
|
-
<note type="no4:type" displayLabel="no4:displayLabel">no4:note</note>
|
192
|
-
<note type="no5:type" displayLabel="no5:displayLabel">no5:note</note>
|
193
|
-
<subject authority="sn1:authority" authorityURI="sn1:authorityURI" valueURI="sn1:valueURI">
|
194
|
-
<name type="personal" authority="sn1:p1:nm:authority" authorityURI="sn1:p1:nm:authorityURI" valueURI="sn1:p1:nm:valueURI">
|
191
|
+
<abstract displayLabel="ab:displayLabel" lang="ab:language" script="ab:script" transliteration="ab:transliteration">ab:abstract</abstract>
|
192
|
+
<tableOfContents displayLabel="tc:displayLabel" lang="tc:language" script="tc:script" transliteration="tc:transliteration">tc:tableOfContents</tableOfContents>
|
193
|
+
<note type="no1:type" displayLabel="no1:displayLabel" lang="no1:language" script="no1:script" transliteration="no1:transliteration" altRepGroup="no1:altRepGroup">no1:note</note>
|
194
|
+
<note type="no2:type" displayLabel="no2:displayLabel" lang="no2:language" script="no2:script" transliteration="no2:transliteration" altRepGroup="no2:altRepGroup">no2:note</note>
|
195
|
+
<note type="no3:type" displayLabel="no3:displayLabel" lang="no3:language" script="no3:script" transliteration="no3:transliteration" altRepGroup="no3:altRepGroup">no3:note</note>
|
196
|
+
<note type="no4:type" displayLabel="no4:displayLabel" lang="no4:language" script="no4:script" transliteration="no4:transliteration" altRepGroup="no4:altRepGroup">no4:note</note>
|
197
|
+
<note type="no5:type" displayLabel="no5:displayLabel" lang="no5:language" script="no5:script" transliteration="no5:transliteration" altRepGroup="no5:altRepGroup">no5:note</note>
|
198
|
+
<subject authority="sn1:authority" authorityURI="sn1:authorityURI" valueURI="sn1:valueURI" altRepGroup="sn1:altRepGroup">
|
199
|
+
<name type="personal" authority="sn1:p1:nm:authority" authorityURI="sn1:p1:nm:authorityURI" valueURI="sn1:p1:nm:valueURI" lang="sn1:p1:nm:language" script="sn1:p1:nm:script" transliteration="sn1:p1:nm:transliteration">
|
195
200
|
<namePart>sn1:p1:name</namePart>
|
196
201
|
</name>
|
197
|
-
<titleInfo type="abbreviated" authority="sn1:p1:ti:authority" authorityURI="sn1:p1:ti:authorityURI" valueURI="sn1:p1:ti:valueURI">
|
202
|
+
<titleInfo type="abbreviated" authority="sn1:p1:ti:authority" authorityURI="sn1:p1:ti:authorityURI" valueURI="sn1:p1:ti:valueURI" lang="sn1:p1:ti:language" script="sn1:p1:ti:script" transliteration="sn1:p1:ti:transliteration">
|
198
203
|
<title>sn1:p1:title</title>
|
199
204
|
<subTitle>sn1:p1:subTitle</subTitle>
|
200
205
|
<partNumber>sn1:p1:partNumber</partNumber>
|
@@ -205,11 +210,11 @@
|
|
205
210
|
<topic authority="sn1:p4:authority" authorityURI="sn1:p4:authorityURI" valueURI="sn1:p4:valueURI">sn1:p4:value</topic>
|
206
211
|
<topic authority="sn1:p5:authority" authorityURI="sn1:p5:authorityURI" valueURI="sn1:p5:valueURI">sn1:p5:value</topic>
|
207
212
|
</subject>
|
208
|
-
<subject authority="sn2:authority" authorityURI="sn2:authorityURI" valueURI="sn2:valueURI">
|
209
|
-
<name type="personal" authority="sn2:p1:nm:authority" authorityURI="sn2:p1:nm:authorityURI" valueURI="sn2:p1:nm:valueURI">
|
213
|
+
<subject authority="sn2:authority" authorityURI="sn2:authorityURI" valueURI="sn2:valueURI" altRepGroup="sn2:altRepGroup">
|
214
|
+
<name type="personal" authority="sn2:p1:nm:authority" authorityURI="sn2:p1:nm:authorityURI" valueURI="sn2:p1:nm:valueURI" lang="sn2:p1:nm:language" script="sn2:p1:nm:script" transliteration="sn2:p1:nm:transliteration">
|
210
215
|
<namePart>sn2:p1:name</namePart>
|
211
216
|
</name>
|
212
|
-
<titleInfo type="abbreviated" authority="sn2:p1:ti:authority" authorityURI="sn2:p1:ti:authorityURI" valueURI="sn2:p1:ti:valueURI">
|
217
|
+
<titleInfo type="abbreviated" authority="sn2:p1:ti:authority" authorityURI="sn2:p1:ti:authorityURI" valueURI="sn2:p1:ti:valueURI" lang="sn2:p1:ti:language" script="sn2:p1:ti:script" transliteration="sn2:p1:ti:transliteration">
|
213
218
|
<title>sn2:p1:title</title>
|
214
219
|
<subTitle>sn2:p1:subTitle</subTitle>
|
215
220
|
<partNumber>sn2:p1:partNumber</partNumber>
|
@@ -220,11 +225,11 @@
|
|
220
225
|
<topic authority="sn2:p4:authority" authorityURI="sn2:p4:authorityURI" valueURI="sn2:p4:valueURI">sn2:p4:value</topic>
|
221
226
|
<topic authority="sn2:p5:authority" authorityURI="sn2:p5:authorityURI" valueURI="sn2:p5:valueURI">sn2:p5:value</topic>
|
222
227
|
</subject>
|
223
|
-
<subject authority="sn3:authority" authorityURI="sn3:authorityURI" valueURI="sn3:valueURI">
|
224
|
-
<name type="personal" authority="sn3:p1:nm:authority" authorityURI="sn3:p1:nm:authorityURI" valueURI="sn3:p1:nm:valueURI">
|
228
|
+
<subject authority="sn3:authority" authorityURI="sn3:authorityURI" valueURI="sn3:valueURI" altRepGroup="sn3:altRepGroup">
|
229
|
+
<name type="personal" authority="sn3:p1:nm:authority" authorityURI="sn3:p1:nm:authorityURI" valueURI="sn3:p1:nm:valueURI" lang="sn3:p1:nm:language" script="sn3:p1:nm:script" transliteration="sn3:p1:nm:transliteration">
|
225
230
|
<namePart>sn3:p1:name</namePart>
|
226
231
|
</name>
|
227
|
-
<titleInfo type="abbreviated" authority="sn3:p1:ti:authority" authorityURI="sn3:p1:ti:authorityURI" valueURI="sn3:p1:ti:valueURI">
|
232
|
+
<titleInfo type="abbreviated" authority="sn3:p1:ti:authority" authorityURI="sn3:p1:ti:authorityURI" valueURI="sn3:p1:ti:valueURI" lang="sn3:p1:ti:language" script="sn3:p1:ti:script" transliteration="sn3:p1:ti:transliteration">
|
228
233
|
<title>sn3:p1:title</title>
|
229
234
|
<subTitle>sn3:p1:subTitle</subTitle>
|
230
235
|
<partNumber>sn3:p1:partNumber</partNumber>
|
@@ -235,11 +240,11 @@
|
|
235
240
|
<topic authority="sn3:p4:authority" authorityURI="sn3:p4:authorityURI" valueURI="sn3:p4:valueURI">sn3:p4:value</topic>
|
236
241
|
<topic authority="sn3:p5:authority" authorityURI="sn3:p5:authorityURI" valueURI="sn3:p5:valueURI">sn3:p5:value</topic>
|
237
242
|
</subject>
|
238
|
-
<subject authority="sn4:authority" authorityURI="sn4:authorityURI" valueURI="sn4:valueURI">
|
239
|
-
<name type="personal" authority="sn4:p1:nm:authority" authorityURI="sn4:p1:nm:authorityURI" valueURI="sn4:p1:nm:valueURI">
|
243
|
+
<subject authority="sn4:authority" authorityURI="sn4:authorityURI" valueURI="sn4:valueURI" altRepGroup="sn4:altRepGroup">
|
244
|
+
<name type="personal" authority="sn4:p1:nm:authority" authorityURI="sn4:p1:nm:authorityURI" valueURI="sn4:p1:nm:valueURI" lang="sn4:p1:nm:language" script="sn4:p1:nm:script" transliteration="sn4:p1:nm:transliteration">
|
240
245
|
<namePart>sn4:p1:name</namePart>
|
241
246
|
</name>
|
242
|
-
<titleInfo type="abbreviated" authority="sn4:p1:ti:authority" authorityURI="sn4:p1:ti:authorityURI" valueURI="sn4:p1:ti:valueURI">
|
247
|
+
<titleInfo type="abbreviated" authority="sn4:p1:ti:authority" authorityURI="sn4:p1:ti:authorityURI" valueURI="sn4:p1:ti:valueURI" lang="sn4:p1:ti:language" script="sn4:p1:ti:script" transliteration="sn4:p1:ti:transliteration">
|
243
248
|
<title>sn4:p1:title</title>
|
244
249
|
<subTitle>sn4:p1:subTitle</subTitle>
|
245
250
|
<partNumber>sn4:p1:partNumber</partNumber>
|
@@ -250,11 +255,11 @@
|
|
250
255
|
<topic authority="sn4:p4:authority" authorityURI="sn4:p4:authorityURI" valueURI="sn4:p4:valueURI">sn4:p4:value</topic>
|
251
256
|
<topic authority="sn4:p5:authority" authorityURI="sn4:p5:authorityURI" valueURI="sn4:p5:valueURI">sn4:p5:value</topic>
|
252
257
|
</subject>
|
253
|
-
<subject authority="sn5:authority" authorityURI="sn5:authorityURI" valueURI="sn5:valueURI">
|
254
|
-
<name type="personal" authority="sn5:p1:nm:authority" authorityURI="sn5:p1:nm:authorityURI" valueURI="sn5:p1:nm:valueURI">
|
258
|
+
<subject authority="sn5:authority" authorityURI="sn5:authorityURI" valueURI="sn5:valueURI" altRepGroup="sn5:altRepGroup">
|
259
|
+
<name type="personal" authority="sn5:p1:nm:authority" authorityURI="sn5:p1:nm:authorityURI" valueURI="sn5:p1:nm:valueURI" lang="sn5:p1:nm:language" script="sn5:p1:nm:script" transliteration="sn5:p1:nm:transliteration">
|
255
260
|
<namePart>sn5:p1:name</namePart>
|
256
261
|
</name>
|
257
|
-
<titleInfo type="abbreviated" authority="sn5:p1:ti:authority" authorityURI="sn5:p1:ti:authorityURI" valueURI="sn5:p1:ti:valueURI">
|
262
|
+
<titleInfo type="abbreviated" authority="sn5:p1:ti:authority" authorityURI="sn5:p1:ti:authorityURI" valueURI="sn5:p1:ti:valueURI" lang="sn5:p1:ti:language" script="sn5:p1:ti:script" transliteration="sn5:p1:ti:transliteration">
|
258
263
|
<title>sn5:p1:title</title>
|
259
264
|
<subTitle>sn5:p1:subTitle</subTitle>
|
260
265
|
<partNumber>sn5:p1:partNumber</partNumber>
|
@@ -265,35 +270,35 @@
|
|
265
270
|
<topic authority="sn5:p4:authority" authorityURI="sn5:p4:authorityURI" valueURI="sn5:p4:valueURI">sn5:p4:value</topic>
|
266
271
|
<topic authority="sn5:p5:authority" authorityURI="sn5:p5:authorityURI" valueURI="sn5:p5:valueURI">sn5:p5:value</topic>
|
267
272
|
</subject>
|
268
|
-
<subject authority="su1:authority" authorityURI="su1:authorityURI" valueURI="su1:valueURI">
|
273
|
+
<subject authority="su1:authority" authorityURI="su1:authorityURI" valueURI="su1:valueURI" lang="su1:language" script="su1:script" transliteration="su1:transliteration" altRepGroup="su1:altRepGroup">
|
269
274
|
<topic authority="su1:p1:authority" authorityURI="su1:p1:authorityURI" valueURI="su1:p1:valueURI">su1:p1:value</topic>
|
270
275
|
<topic authority="su1:p2:authority" authorityURI="su1:p2:authorityURI" valueURI="su1:p2:valueURI">su1:p2:value</topic>
|
271
276
|
<topic authority="su1:p3:authority" authorityURI="su1:p3:authorityURI" valueURI="su1:p3:valueURI">su1:p3:value</topic>
|
272
277
|
<topic authority="su1:p4:authority" authorityURI="su1:p4:authorityURI" valueURI="su1:p4:valueURI">su1:p4:value</topic>
|
273
278
|
<topic authority="su1:p5:authority" authorityURI="su1:p5:authorityURI" valueURI="su1:p5:valueURI">su1:p5:value</topic>
|
274
279
|
</subject>
|
275
|
-
<subject authority="su2:authority" authorityURI="su2:authorityURI" valueURI="su2:valueURI">
|
280
|
+
<subject authority="su2:authority" authorityURI="su2:authorityURI" valueURI="su2:valueURI" lang="su2:language" script="su2:script" transliteration="su2:transliteration" altRepGroup="su2:altRepGroup">
|
276
281
|
<topic authority="su2:p1:authority" authorityURI="su2:p1:authorityURI" valueURI="su2:p1:valueURI">su2:p1:value</topic>
|
277
282
|
<topic authority="su2:p2:authority" authorityURI="su2:p2:authorityURI" valueURI="su2:p2:valueURI">su2:p2:value</topic>
|
278
283
|
<topic authority="su2:p3:authority" authorityURI="su2:p3:authorityURI" valueURI="su2:p3:valueURI">su2:p3:value</topic>
|
279
284
|
<topic authority="su2:p4:authority" authorityURI="su2:p4:authorityURI" valueURI="su2:p4:valueURI">su2:p4:value</topic>
|
280
285
|
<topic authority="su2:p5:authority" authorityURI="su2:p5:authorityURI" valueURI="su2:p5:valueURI">su2:p5:value</topic>
|
281
286
|
</subject>
|
282
|
-
<subject authority="su3:authority" authorityURI="su3:authorityURI" valueURI="su3:valueURI">
|
287
|
+
<subject authority="su3:authority" authorityURI="su3:authorityURI" valueURI="su3:valueURI" lang="su3:language" script="su3:script" transliteration="su3:transliteration" altRepGroup="su3:altRepGroup">
|
283
288
|
<topic authority="su3:p1:authority" authorityURI="su3:p1:authorityURI" valueURI="su3:p1:valueURI">su3:p1:value</topic>
|
284
289
|
<topic authority="su3:p2:authority" authorityURI="su3:p2:authorityURI" valueURI="su3:p2:valueURI">su3:p2:value</topic>
|
285
290
|
<topic authority="su3:p3:authority" authorityURI="su3:p3:authorityURI" valueURI="su3:p3:valueURI">su3:p3:value</topic>
|
286
291
|
<topic authority="su3:p4:authority" authorityURI="su3:p4:authorityURI" valueURI="su3:p4:valueURI">su3:p4:value</topic>
|
287
292
|
<topic authority="su3:p5:authority" authorityURI="su3:p5:authorityURI" valueURI="su3:p5:valueURI">su3:p5:value</topic>
|
288
293
|
</subject>
|
289
|
-
<subject authority="su4:authority" authorityURI="su4:authorityURI" valueURI="su4:valueURI">
|
294
|
+
<subject authority="su4:authority" authorityURI="su4:authorityURI" valueURI="su4:valueURI" lang="su4:language" script="su4:script" transliteration="su4:transliteration" altRepGroup="su4:altRepGroup">
|
290
295
|
<topic authority="su4:p1:authority" authorityURI="su4:p1:authorityURI" valueURI="su4:p1:valueURI">su4:p1:value</topic>
|
291
296
|
<topic authority="su4:p2:authority" authorityURI="su4:p2:authorityURI" valueURI="su4:p2:valueURI">su4:p2:value</topic>
|
292
297
|
<topic authority="su4:p3:authority" authorityURI="su4:p3:authorityURI" valueURI="su4:p3:valueURI">su4:p3:value</topic>
|
293
298
|
<topic authority="su4:p4:authority" authorityURI="su4:p4:authorityURI" valueURI="su4:p4:valueURI">su4:p4:value</topic>
|
294
299
|
<topic authority="su4:p5:authority" authorityURI="su4:p5:authorityURI" valueURI="su4:p5:valueURI">su4:p5:value</topic>
|
295
300
|
</subject>
|
296
|
-
<subject authority="su5:authority" authorityURI="su5:authorityURI" valueURI="su5:valueURI">
|
301
|
+
<subject authority="su5:authority" authorityURI="su5:authorityURI" valueURI="su5:valueURI" lang="su5:language" script="su5:script" transliteration="su5:transliteration" altRepGroup="su5:altRepGroup">
|
297
302
|
<topic authority="su5:p1:authority" authorityURI="su5:p1:authorityURI" valueURI="su5:p1:valueURI">su5:p1:value</topic>
|
298
303
|
<topic authority="su5:p2:authority" authorityURI="su5:p2:authorityURI" valueURI="su5:p2:valueURI">su5:p2:value</topic>
|
299
304
|
<topic authority="su5:p3:authority" authorityURI="su5:p3:authorityURI" valueURI="su5:p3:valueURI">su5:p3:value</topic>
|
@@ -313,7 +318,7 @@
|
|
313
318
|
<identifier type="id4:type" displayLabel="id4:displayLabel">id4:identifier</identifier>
|
314
319
|
<identifier type="id5:type" displayLabel="id5:displayLabel">id5:identifier</identifier>
|
315
320
|
<location>
|
316
|
-
<physicalLocation type="repository" authority="lo:authority" valueURI="lo:valueURI">lo:repository</physicalLocation>
|
321
|
+
<physicalLocation type="repository" authority="lo:authority" valueURI="lo:valueURI" lang="lo:language" script="lo:script" transliteration="lo:transliteration">lo:repository</physicalLocation>
|
317
322
|
<physicalLocation type="discovery">lo:physicalLocation</physicalLocation>
|
318
323
|
<shelfLocator>lo:callNumber</shelfLocator>
|
319
324
|
<url usage="primary display">lo:purl</url>
|
@@ -418,7 +423,7 @@
|
|
418
423
|
</location>
|
419
424
|
</relatedItem>
|
420
425
|
<extension displayLabel="geo">
|
421
|
-
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
426
|
+
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:gmd="http://www.isotc211.org/2005/gmd">
|
422
427
|
<rdf:Description rdf:about="ext:purl">
|
423
428
|
<dc:format>ext:dc:format</dc:format>
|
424
429
|
<dc:type>ext:dc:type</dc:type>
|
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<xmlDocs xmlns="http://library.stanford.edu/xmlDocs" datetime="2018-04-18 02:31:53PM" sourceFile="point_coord_test.xlsx">
|
3
|
+
<xmlDoc id="descMetadata" objectId="aa22aaa2222">
|
4
|
+
<mods xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.loc.gov/mods/v3" version="3.5" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd">
|
5
|
+
<titleInfo>
|
6
|
+
<title>data with long lat</title>
|
7
|
+
</titleInfo>
|
8
|
+
<extension displayLabel="geo">
|
9
|
+
<rdf:RDF xmlns:gml="http://www.opengis.net/gml/3.2/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:gmd="http://www.isotc211.org/2005/gmd">
|
10
|
+
<rdf:Description>
|
11
|
+
<dc:format>image/jpeg</dc:format>
|
12
|
+
<dc:type>Image</dc:type>
|
13
|
+
<gmd:centerPoint>
|
14
|
+
<gml:Point gml:id="ID">
|
15
|
+
<gml:pos>111 222</gml:pos>
|
16
|
+
</gml:Point>
|
17
|
+
</gmd:centerPoint>
|
18
|
+
</rdf:Description>
|
19
|
+
</rdf:RDF>
|
20
|
+
</extension>
|
21
|
+
</mods>
|
22
|
+
</xmlDoc>
|
23
|
+
</xmlDocs>
|
@@ -21,7 +21,8 @@ RSpec.describe Modsulator do
|
|
21
21
|
'SC1049_metadata.xlsx' => 'SC1049_metadata.xml',
|
22
22
|
'edition_physLoc_intmediatype.xlsx' => 'edition_physLoc_intmediatype.xml',
|
23
23
|
'filled_template_20160711.xlsx' => 'filled_template_20160711.xml',
|
24
|
-
'location_url.xlsx' => 'location_url.xml'
|
24
|
+
'location_url.xlsx' => 'location_url.xml',
|
25
|
+
'point_coord_test.xlsx' => 'point_coord_test.xml'
|
25
26
|
}.each do |testfile, results_file|
|
26
27
|
it "converts #{testfile} correctly to valid XML" do
|
27
28
|
generated_xml_string = Modsulator.new(File.join(FIXTURES_DIR, testfile), testfile).convert_rows()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: modsulator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tommy Ingulfsen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: stanford-mods-normalizer
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.1'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.1'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rake
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,10 +181,8 @@ files:
|
|
167
181
|
- lib/modsulator/modsulator_sheet.rb
|
168
182
|
- lib/modsulator/modsulator_template.xlsx
|
169
183
|
- lib/modsulator/modsulator_template.xml
|
170
|
-
- lib/modsulator/normalizer.rb
|
171
184
|
- lib/modsulator/validator.rb
|
172
185
|
- spec/features/modsulator_sheet_unit_spec.rb
|
173
|
-
- spec/features/normalizer_unit_spec.rb
|
174
186
|
- spec/features/process_template_spec.rb
|
175
187
|
- spec/features/validator_unit_spec.rb
|
176
188
|
- spec/fixtures/Fitch_Chavez.xlsx
|
@@ -202,6 +214,8 @@ files:
|
|
202
214
|
- spec/fixtures/location_url.xml
|
203
215
|
- spec/fixtures/manifest_v0174.csv
|
204
216
|
- spec/fixtures/manifest_v0174.xml
|
217
|
+
- spec/fixtures/point_coord_test.xlsx
|
218
|
+
- spec/fixtures/point_coord_test.xml
|
205
219
|
- spec/fixtures/roman_coins_mods.xlsx
|
206
220
|
- spec/fixtures/roman_coins_mods.xml
|
207
221
|
- spec/fixtures/test_002.csv
|
@@ -229,13 +243,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
229
243
|
version: '0'
|
230
244
|
requirements: []
|
231
245
|
rubyforge_project:
|
232
|
-
rubygems_version: 2.
|
246
|
+
rubygems_version: 2.7.3
|
233
247
|
signing_key:
|
234
248
|
specification_version: 4
|
235
249
|
summary: Produces (Stanford) MODS XML from spreadsheets.
|
236
250
|
test_files:
|
237
251
|
- spec/features/modsulator_sheet_unit_spec.rb
|
238
|
-
- spec/features/normalizer_unit_spec.rb
|
239
252
|
- spec/features/process_template_spec.rb
|
240
253
|
- spec/features/validator_unit_spec.rb
|
241
254
|
- spec/fixtures/ars0056_manifest.csv
|
@@ -263,6 +276,8 @@ test_files:
|
|
263
276
|
- spec/fixtures/manifest_v0174.xml
|
264
277
|
- spec/fixtures/Matter_manifest.csv
|
265
278
|
- spec/fixtures/Matter_manifest.xml
|
279
|
+
- spec/fixtures/point_coord_test.xlsx
|
280
|
+
- spec/fixtures/point_coord_test.xml
|
266
281
|
- spec/fixtures/PosadaSpreadsheet.xlsx
|
267
282
|
- spec/fixtures/PosadaSpreadsheet.xml
|
268
283
|
- spec/fixtures/roman_coins_mods.xlsx
|
@@ -274,4 +289,3 @@ test_files:
|
|
274
289
|
- spec/integration_tests/integration_spec.rb
|
275
290
|
- spec/lib/modsulator_spec.rb
|
276
291
|
- spec/spec_helper.rb
|
277
|
-
has_rdoc:
|
@@ -1,225 +0,0 @@
|
|
1
|
-
# File "normalizer.rb" - defines a class for normalizing MODS XML according to the Stanford guidelines.
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
# This class provides methods to normalize MODS XML according to the Stanford guidelines.
|
6
|
-
# @see https://consul.stanford.edu/display/chimera/MODS+validation+and+normalization Requirements (Stanford Consul page - requires login)
|
7
|
-
class Normalizer
|
8
|
-
# Linefeed character entity reference
|
9
|
-
LINEFEED = ' '
|
10
|
-
|
11
|
-
# Select all single <dateCreated> and <dateIssued> fields
|
12
|
-
LONE_DATE_XPATH = '//mods:originInfo/mods:dateCreated[1][not(following-sibling::*[1][self::mods:dateCreated])] | //mods:originInfo/mods:dateIssued[1][not(following-sibling::*[1][self::mods:dateIssued])]'
|
13
|
-
|
14
|
-
# Select all <dateCreated> and <dateIssued> fields
|
15
|
-
DATE_CREATED_ISSUED_XPATH = '//mods:dateCreated | //mods:dateIssued'
|
16
|
-
|
17
|
-
# The official MODS namespace, courtesy of the Library of Congress
|
18
|
-
MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
|
19
|
-
|
20
|
-
# Selects <abstract>, <tableOfContents> and <note> when no namespace is present
|
21
|
-
LINEFEED_XPATH = '//abstract | //tableOfContents | //note'
|
22
|
-
|
23
|
-
# Selects <abstract>, <tableOfContents> and <note> when a namespace is present
|
24
|
-
LINEFEED_XPATH_NAMESPACED = '//ns:abstract | //ns:tableOfContents | //ns:note'
|
25
|
-
|
26
|
-
|
27
|
-
# Checks if a node has attributes that we make exeptions for. There are two such exceptions.
|
28
|
-
#
|
29
|
-
# * A "collection" attribute with the value "yes" <em>on a typeOfResource tag</em>.
|
30
|
-
# * A "manuscript" attribute with the value "yes" <em>on a typeOfResource tag</em>.
|
31
|
-
#
|
32
|
-
# Nodes that fall under any of these exceptions should not be deleted, even if they have no content.
|
33
|
-
#
|
34
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
35
|
-
# @return [Boolean] true if the node contains any of the exceptional attributes, false otherwise.
|
36
|
-
def exceptional?(node)
|
37
|
-
return false unless node != nil
|
38
|
-
|
39
|
-
tag = node.name
|
40
|
-
attributes = node.attributes
|
41
|
-
|
42
|
-
return false if(attributes.empty?)
|
43
|
-
|
44
|
-
attributes.each do |key, value|
|
45
|
-
if(tag == 'typeOfResource') # Note that according to the MODS schema, any other value than 'yes' for these attributes is invalid
|
46
|
-
if((key == 'collection' && value.to_s.downcase == 'yes') ||
|
47
|
-
(key == 'manuscript' && value.to_s.downcase == 'yes'))
|
48
|
-
return true
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return false
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
# Recursive helper method for {Normalizer#clean_linefeeds} to do string substitution.
|
57
|
-
#
|
58
|
-
# @param [Nokogiri::XML::Element] node An XML node
|
59
|
-
# @return [String] A string composed of the entire contents of the given node, with substitutions made as described for {#clean_linefeeds}.
|
60
|
-
def substitute_linefeeds(node)
|
61
|
-
new_text = String.new
|
62
|
-
|
63
|
-
# If we substitute in ' ' by itself, Nokogiri interprets that and then prints '&#10;' when printing the document later. This
|
64
|
-
# is an ugly way to add linefeed characters in a way that we at least get well-formatted output in the end.
|
65
|
-
if(node.text?)
|
66
|
-
new_text = node.content.gsub(/\r\n/, Nokogiri::HTML(LINEFEED).text).gsub(/\n/, Nokogiri::HTML(LINEFEED).text).gsub(/\r/, Nokogiri::HTML(LINEFEED).text).gsub('\\n', Nokogiri::HTML(LINEFEED).text)
|
67
|
-
else
|
68
|
-
if(node.node_name == 'br')
|
69
|
-
new_text += Nokogiri::HTML(LINEFEED).text
|
70
|
-
elsif(node.node_name == 'p')
|
71
|
-
new_text += Nokogiri::HTML(LINEFEED).text + Nokogiri::HTML(LINEFEED).text
|
72
|
-
end
|
73
|
-
|
74
|
-
node.children.each do |c|
|
75
|
-
new_text += substitute_linefeeds(c)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
return new_text
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
# Given the root of an XML document, replaces linefeed characters inside <tableOfContents>, <abstract> and <note> XML node by
|
83
|
-
# \n, \r, <br> and <br/> are all replaced by a single
|
84
|
-
# <p> is replaced by two
|
85
|
-
# </p> is removed
|
86
|
-
# \r\n is replaced by
|
87
|
-
# Any tags not listed above are removed. MODS 3.5 does not allow for anything other than text inside these three nodes.
|
88
|
-
#
|
89
|
-
# @param [Nokogiri::XML::NodeSet] node_list All <tableOfContents>, <abstract> and <node> elements.
|
90
|
-
# @return [Void] This method doesn't return anything, but introduces UTF-8 linefeed characters in place, as described above.
|
91
|
-
def clean_linefeeds(node_list)
|
92
|
-
node_list.each do |current_node|
|
93
|
-
new_text = substitute_linefeeds(current_node)
|
94
|
-
current_node.children.remove
|
95
|
-
current_node.content = new_text
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
|
100
|
-
# Cleans up the text of a node:
|
101
|
-
#
|
102
|
-
# * Removes extra whitespace at the beginning and end.
|
103
|
-
# * Removes any consecutive whitespace within the string.
|
104
|
-
#
|
105
|
-
# @param [String] s The text of an XML node.
|
106
|
-
# @return [String] The cleaned string, as described. Returns nil if the input is nil, or if the input is an empty string.
|
107
|
-
def clean_text(s)
|
108
|
-
return nil unless s != nil && s != ''
|
109
|
-
return s.gsub!(/\s+/, ' ').strip!
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
# Removes empty attributes from a given node.
|
114
|
-
#
|
115
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
116
|
-
# @return [Void] This method doesn't return anything, but modifies the XML tree starting at the given node.
|
117
|
-
def remove_empty_attributes(node)
|
118
|
-
children = node.children
|
119
|
-
attributes = node.attributes
|
120
|
-
|
121
|
-
attributes.each do |key, value|
|
122
|
-
node.remove_attribute(key) if(value.to_s.strip.empty?)
|
123
|
-
end
|
124
|
-
|
125
|
-
children.each do |c|
|
126
|
-
remove_empty_attributes(c)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
# Removes empty nodes from an XML tree. See {#exceptional?} for nodes that are kept even if empty.
|
132
|
-
#
|
133
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
134
|
-
# @return [Void] This method doesn't return anything, but modifies the XML tree starting at the given node.
|
135
|
-
def remove_empty_nodes(node)
|
136
|
-
children = node.children
|
137
|
-
|
138
|
-
if(node.text?)
|
139
|
-
if(node.to_s.strip.empty?)
|
140
|
-
node.remove
|
141
|
-
else
|
142
|
-
return
|
143
|
-
end
|
144
|
-
elsif(children.length > 0)
|
145
|
-
children.each do |c|
|
146
|
-
remove_empty_nodes(c)
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
if(!exceptional?(node) && (node.children.length == 0))
|
151
|
-
node.remove
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
|
-
|
156
|
-
# Removes leading and trailing spaces from a node.
|
157
|
-
#
|
158
|
-
# @param [Nokogiri::XML::Element] node An XML node.
|
159
|
-
# @return [Void] This method doesn't return anything, but modifies the entire XML tree starting at the
|
160
|
-
# the given node, removing leading and trailing spaces from all text. If the input is nil,
|
161
|
-
# an exception will be raised.
|
162
|
-
def trim_text(node)
|
163
|
-
children = node.children
|
164
|
-
|
165
|
-
if(node.text?)
|
166
|
-
node.parent.content = node.text.strip
|
167
|
-
else
|
168
|
-
children.each do |c|
|
169
|
-
trim_text(c)
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
|
175
|
-
# Sometimes there are spurious decimal digits within the date fields. This method removes any trailing decimal points within
|
176
|
-
# <dateCreated> and <dateIssued>.
|
177
|
-
#
|
178
|
-
# @param [Nokogiri::XML::NodeSet] nodes A set of all affected <dateCreated> and <dateIssued> elements.
|
179
|
-
# @return [Void] The given document is modified in place.
|
180
|
-
def clean_date_values(nodes)
|
181
|
-
nodes.each do |current_node|
|
182
|
-
current_node.content = current_node.content.sub(/(.*)\.\d+$/, '\1')
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
# Normalizes the given MODS XML document according to the Stanford guidelines.
|
187
|
-
#
|
188
|
-
# @param [Nokogiri::XML::Element] root The root of a MODS XML document.
|
189
|
-
# @return [Void] The given document is modified in place.
|
190
|
-
def normalize_mods_document(root)
|
191
|
-
node_list = []
|
192
|
-
if(root.namespace.nil?)
|
193
|
-
node_list = root.xpath(LINEFEED_XPATH)
|
194
|
-
else
|
195
|
-
node_list = root.xpath(LINEFEED_XPATH_NAMESPACED, 'ns' => root.namespace.href)
|
196
|
-
end
|
197
|
-
clean_linefeeds(node_list) # Do this before deleting <br> and <p> with remove_empty_nodes()
|
198
|
-
|
199
|
-
remove_empty_attributes(root)
|
200
|
-
remove_empty_nodes(root)
|
201
|
-
trim_text(root)
|
202
|
-
clean_date_values(root.xpath(DATE_CREATED_ISSUED_XPATH, 'mods' => MODS_NAMESPACE))
|
203
|
-
end
|
204
|
-
|
205
|
-
# Normalizes the given MODS XML document according to the Stanford guidelines.
|
206
|
-
#
|
207
|
-
# @deprecated Use normalize_mods_document instead.
|
208
|
-
# @param [Nokogiri::XML::Element] root The root of a MODS XML document.
|
209
|
-
# @return [Void] The given document is modified in place.
|
210
|
-
def normalize_document(root)
|
211
|
-
normalize_mods_document(root)
|
212
|
-
end
|
213
|
-
|
214
|
-
|
215
|
-
# Normalizes the given XML document string according to the Stanford guidelines.
|
216
|
-
#
|
217
|
-
# @param [String] xml_string An XML document
|
218
|
-
# @return [String] The XML string, with normalizations applied.
|
219
|
-
def normalize_xml_string(xml_string)
|
220
|
-
doc = Nokogiri::XML(xml_string)
|
221
|
-
normalize_document(doc.root)
|
222
|
-
doc.to_s
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|