geoblacklight-schema 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +3 -0
  3. data/LICENSE +14 -0
  4. data/README.md +44 -0
  5. data/bin/fgdc2mods.rb +5 -0
  6. data/bin/mods2geoblacklight.rb +5 -0
  7. data/bin/xsltproc-saxon +14 -0
  8. data/conf/protwords.txt +21 -0
  9. data/conf/schema.xml +158 -0
  10. data/conf/solrconfig.xml +160 -0
  11. data/conf/stopwords_en.txt +34 -0
  12. data/conf/synonyms.txt +29 -0
  13. data/examples/Gemfile +4 -0
  14. data/examples/generate-example-doc.rb +42 -0
  15. data/examples/selected.json +5787 -0
  16. data/examples/upload-to-solr.rb +50 -0
  17. data/geoblacklight-schema.gemspec +23 -0
  18. data/lib/geoblacklight/gazetteer.csv +1011 -0
  19. data/lib/geoblacklight/gazetteer.rb +104 -0
  20. data/lib/xslt/arcgis_to_iso19110.xsl +364 -0
  21. data/lib/xslt/fgdc2mods.xsl +1007 -0
  22. data/lib/xslt/iso2mods.xsl +939 -0
  23. data/lib/xslt/mods2geoblacklight.xsl +268 -0
  24. data/lib/xslt/mods2ogp.xsl +195 -0
  25. data/tools/fgdc2html/Gemfile +2 -0
  26. data/tools/fgdc2html/fgdc2html.css +71 -0
  27. data/tools/fgdc2html/fgdc2html.js +6 -0
  28. data/tools/fgdc2html/fgdc2html.xsl +1034 -0
  29. data/tools/fgdc2html/render.rb +30 -0
  30. data/tools/iso2html/Gemfile +2 -0
  31. data/tools/iso2html/iso-html.xsl +1745 -0
  32. data/tools/iso2html/render.rb +24 -0
  33. data/tools/iso2html/utils/convert-enumerations.xsl +97 -0
  34. data/tools/iso2html/utils/convert-latlong.xsl +73 -0
  35. data/tools/iso2html/utils/decode-uri/base.css +408 -0
  36. data/tools/iso2html/utils/decode-uri/index.html +29 -0
  37. data/tools/iso2html/utils/elements-fgdc.xml +824 -0
  38. data/tools/iso2html/utils/elements-iso.xml +636 -0
  39. data/tools/iso2html/utils/printFormatted.xsl +267 -0
  40. data/tools/iso2html/utils/printTextLines.xsl +192 -0
  41. data/tools/iso2html/utils/replace-newlines.xsl +97 -0
  42. data/tools/iso2html/utils/replace-string.xsl +80 -0
  43. data/tools/iso2html/utils/strip-digits.xsl +60 -0
  44. data/tools/iso2html/utils/url-decode.xsl +87 -0
  45. data/tools/iso2html/utils/wrap-text.xsl +174 -0
  46. data/tools/ogp/0_download.rb +96 -0
  47. data/tools/ogp/1_validate.rb +225 -0
  48. data/tools/ogp/2_transform.rb +438 -0
  49. data/tools/ogp/3_stanford.rb +35 -0
  50. data/tools/ogp/4_select.rb +189 -0
  51. data/tools/ogp/5_ingest.rb +55 -0
  52. data/tools/ogp/Gemfile +2 -0
  53. data/tools/solr/Gemfile +3 -0
  54. data/tools/solr/purge.rb +33 -0
  55. data/tools/solr/upload.rb +35 -0
  56. data/vendor/.keep +0 -0
  57. metadata +131 -0
@@ -0,0 +1,104 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'csv'
4
+
5
+ # Gazetteer data look like this:
6
+ # "l_kw","geonames_kw","geonames_id","lc_kw","lc_id"
7
+ # "Ahmadābād District (India)","Ahmadābād",1279234,"Ahmadābād (India : District)","n78019943"
8
+ module GeoBlacklightSchema
9
+ class Gazetteer
10
+
11
+ CSV_FN = File.join(File.dirname(__FILE__), 'gazetteer.csv')
12
+
13
+ def initialize
14
+ @registry = {}
15
+ CSV.foreach(CSV_FN, :encoding => 'UTF-8', :headers => true) do |v|
16
+ v = v.each { |k,v| v.to_s.strip }
17
+ k = v[0]
18
+ k = v[1] if k.nil? or k.empty?
19
+ k.strip!
20
+ @registry[k] = {
21
+ :geonames_placename => v[1],
22
+ :geonames_id => v[2].to_i,
23
+ :loc_keyword => (v[3].nil? or v[3].empty?)? nil : v[3],
24
+ :loc_id => (v[4].nil? or v[4].empty?)? nil : v[4]
25
+ }
26
+ if @registry[k][:geonames_placename].nil? && @registry[k][:loc_keyword].nil?
27
+ @registry[k] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def each
33
+ @registry.each_key.to_a.sort.each {|k| yield k }
34
+ end
35
+
36
+ # @return [String] geonames name
37
+ def find_placename(k)
38
+ _get(k, :geonames_placename)
39
+ end
40
+
41
+ # @return [Integer] geonames id
42
+ def find_id(k)
43
+ _get(k, :geonames_id)
44
+ end
45
+
46
+ # @return [String] library of congress name
47
+ def find_loc_keyword(k)
48
+ _get(k, :loc_keyword)
49
+ end
50
+
51
+ # @return [String] library of congress valueURI
52
+ def find_loc_uri(k)
53
+ lcid = _get(k, :loc_id)
54
+ if lcid =~ /^lcsh:(\d+)$/ or lcid =~ /^sh(\d+)$/
55
+ "http://id.loc.gov/authorities/subjects/sh#{$1}"
56
+ elsif lcid =~ /^lcnaf:(\d+)$/ or lcid =~ /^n(\d+)$/
57
+ "http://id.loc.gov/authorities/names/n#{$1}"
58
+ elsif lcid =~ /^no(\d+)$/
59
+ "http://id.loc.gov/authorities/names/no#{$1}"
60
+ else
61
+ nil
62
+ end
63
+ end
64
+
65
+ # @return [String] authority name
66
+ def find_loc_authority(k)
67
+ lcid = _get(k, :loc_id)
68
+ return $1 if lcid =~ /^(lcsh|lcnaf):/
69
+ return 'lcsh' if lcid =~ /^sh\d+$/
70
+ return 'lcnaf' if lcid =~ /^(n|no)\d+$/
71
+ return 'lcsh' unless find_loc_keyword(k).nil? # default to lcsh if present
72
+ nil
73
+ end
74
+
75
+
76
+ # @see http://www.geonames.org/ontology/documentation.html
77
+ # @return [String] geonames uri (includes trailing / as specified)
78
+ def find_placename_uri(k)
79
+ return nil if _get(k, :geonames_id).nil?
80
+ "http://sws.geonames.org/#{_get(k, :geonames_id)}/"
81
+ end
82
+
83
+ # @return [String] The keyword
84
+ def find_keyword_by_id(id)
85
+ @registry.each do |k,v|
86
+ return k if v[:geonames_id] == id
87
+ end
88
+ nil
89
+ end
90
+
91
+ def blank?(k)
92
+ @registry.include?(k) && @registry[k].nil?
93
+ end
94
+
95
+ private
96
+ def _get(k, i)
97
+ return nil unless @registry.include?(k.strip)
98
+ raise ArgumentError unless i.is_a? Symbol
99
+ @registry[k.strip].nil?? nil : @registry[k.strip][i]
100
+ end
101
+
102
+ end
103
+ end
104
+
@@ -0,0 +1,364 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!-- ArcGIS to ISO19110 feature cataloging methodology transformation
3
+ This file transforms ArcGIS formatted metadata into ISO19110 xml. Metadata expresses entity and attribute information and is linked to the 19139 record using the 'uuid' attribute.
4
+ created 2013-07 by Kim Durante, Stanford University Libraries. -->
5
+
6
+ <xsl:stylesheet version="1.0"
7
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
8
+ xmlns:gco="http://www.isotc211.org/2005/gco"
9
+ xmlns:gfc="http://www.isotc211.org/2005/gfc"
10
+ xmlns:gmd="http://www.isotc211.org/2005/gmd"
11
+ xmlns:gml="http://www.opengis.net/gml/3.2"
12
+ xmlns:gmx="http://www.isotc211.org/2005/gmx"
13
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
14
+ <xsl:output method="xml" encoding="utf-8" indent="yes"/>
15
+
16
+ <xsl:template match="/">
17
+ <gfc:FC_FeatureCatalogue xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
18
+ xmlns:gco="http://www.isotc211.org/2005/gco" xmlns:gfc="http://www.isotc211.org/2005/gfc" xmlns:gmd="http://www.isotc211.org/2005/gmd"
19
+ xmlns:gmx="http://www.isotc211.org/2005/gmx" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:xlink="http://www.w3.org/1999/xlink"
20
+ xmlns="http://www.isotc211.org/2005/gfc" xsi:schemaLocation="http://www.isotc211.org/2005/gfc http://www.isotc211.org/2005/gfc/gfc.xsd">
21
+
22
+ <xsl:attribute name="uuid">
23
+ <xsl:value-of select="metadata/contInfo/FetCatDesc/catCitation/citId"/>
24
+ </xsl:attribute>
25
+
26
+ <xsl:apply-templates select="child::node()"/>
27
+ </gfc:FC_FeatureCatalogue>
28
+ </xsl:template>
29
+
30
+ <xsl:template match="metadata">
31
+ <gmx:name>
32
+ <gco:CharacterString>
33
+ <xsl:value-of select="contInfo/FetCatDesc/catCitation/resTitle"/>
34
+ </gco:CharacterString>
35
+ </gmx:name>
36
+ <gmx:scope>
37
+ <gco:CharacterString>
38
+ <xsl:for-each select="dataIdInfo/themeKeys/keyword">
39
+ <xsl:value-of select="."/>
40
+ <xsl:if test="position()!=last()">
41
+ <xsl:text>; </xsl:text>
42
+ </xsl:if>
43
+ </xsl:for-each>
44
+ </gco:CharacterString>
45
+ </gmx:scope>
46
+ <gmx:versionNumber>
47
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
48
+ </gmx:versionNumber>
49
+ <gmx:versionDate>
50
+ <gco:Date>
51
+ <xsl:choose>
52
+ <xsl:when test="contInfo/FetCatDesc/catCitation/date/pubDate">
53
+ <xsl:value-of select="substring(dataIdInfo/idCitation/date/pubDate,1,4)"/>
54
+ </xsl:when>
55
+ <xsl:when test="dataIdInfo/idCitation/date/pubDate">
56
+ <xsl:value-of select="substring(dataIdInfo/idCitation/date/pubDate,1,4)"/>
57
+ </xsl:when>
58
+ <xsl:otherwise>
59
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
60
+ </xsl:otherwise>
61
+ </xsl:choose>
62
+ </gco:Date>
63
+ </gmx:versionDate>
64
+ <gmx:language>
65
+ <gco:CharacterString>
66
+ <xsl:value-of select="'eng; US'"/>
67
+ </gco:CharacterString>
68
+ </gmx:language>
69
+ <gmx:characterSet>
70
+ <gmd:MD_CharacterSetCode>
71
+ <xsl:attribute name="codeList">http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode</xsl:attribute>
72
+ <xsl:attribute name="codeListValue">utf8</xsl:attribute>
73
+ <xsl:attribute name="codeSpace">ISOTC211/19115</xsl:attribute>
74
+ </gmd:MD_CharacterSetCode>
75
+ </gmx:characterSet>
76
+ <xsl:choose>
77
+ <xsl:when test="contInfo/FetCatDesc/catCitation/citRespParty/rpOrgName">
78
+ <gfc:producer>
79
+ <gmd:CI_ResponsibleParty>
80
+ <gmd:organisationName>
81
+ <gco:CharacterString>
82
+ <xsl:value-of select="contInfo/FetCatDesc/catCitation/citRespParty/rpOrgName"/>
83
+ </gco:CharacterString>
84
+ </gmd:organisationName>
85
+
86
+ <gmd:role>
87
+ <gmd:CI_RoleCode>
88
+ <xsl:attribute name="codeList">
89
+ <xsl:value-of select="'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode'"/></xsl:attribute>
90
+ <xsl:attribute name="codeListValue">
91
+ <xsl:value-of select="'originator'"/>
92
+ </xsl:attribute>
93
+ <xsl:attribute name="codeSpace">
94
+ <xsl:value-of select="'006'"/>
95
+ </xsl:attribute>
96
+ </gmd:CI_RoleCode>
97
+ </gmd:role>
98
+ </gmd:CI_ResponsibleParty>
99
+ </gfc:producer>
100
+ </xsl:when>
101
+ <xsl:when test="contInfo/FetCatDesc/catCitation/citRespParty/rpIndName">
102
+ <gfc:producer>
103
+ <gmd:CI_ResponsibleParty>
104
+ <gmd:individualName>
105
+ <gco:CharacterString>
106
+ <xsl:value-of select="contInfo/FetCatDesc/catCitation/citRespParty/rpIndName"/>
107
+ </gco:CharacterString>
108
+ </gmd:individualName>
109
+
110
+ <gmd:role>
111
+ <gmd:CI_RoleCode>
112
+ <xsl:attribute name="codeList">
113
+ <xsl:value-of select="'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode'"/></xsl:attribute>
114
+ <xsl:attribute name="codeListValue">
115
+ <xsl:value-of select="'originator'"/>
116
+ </xsl:attribute>
117
+ <xsl:attribute name="codeSpace">
118
+ <xsl:value-of select="'006'"/>
119
+ </xsl:attribute>
120
+ </gmd:CI_RoleCode>
121
+ </gmd:role>
122
+ </gmd:CI_ResponsibleParty>
123
+ </gfc:producer>
124
+ </xsl:when>
125
+ </xsl:choose>
126
+ <xsl:for-each select="eainfo/detailed">
127
+ <gfc:featureType>
128
+ <gfc:FC_FeatureType>
129
+ <xsl:for-each select="enttyp">
130
+ <gfc:typeName>
131
+ <xsl:for-each select="enttypl">
132
+ <gco:LocalName>
133
+ <xsl:value-of select="."/>
134
+ </gco:LocalName>
135
+ </xsl:for-each>
136
+ </gfc:typeName>
137
+ <gfc:definition>
138
+ <xsl:for-each select="enttypd">
139
+ <gco:CharacterString>
140
+ <xsl:value-of select="."/>
141
+ </gco:CharacterString>
142
+ </xsl:for-each>
143
+ </gfc:definition>
144
+ </xsl:for-each>
145
+ <gfc:isAbstract>
146
+ <gco:Boolean>false</gco:Boolean>
147
+ </gfc:isAbstract>
148
+ <gfc:featureCatalogue>
149
+ <xsl:attribute name="uuidref">
150
+ <xsl:value-of select="//contInfo/FetCatDesc/catCitation/citId"/>
151
+ </xsl:attribute>
152
+ </gfc:featureCatalogue>
153
+ <xsl:for-each select="attr">
154
+ <gfc:carrierOfCharacteristics>
155
+ <gfc:FC_FeatureAttribute>
156
+ <!-- for range values -->
157
+
158
+ <xsl:for-each select="attrlabl">
159
+ <gfc:memberName>
160
+ <gco:LocalName>
161
+ <xsl:value-of select="."/>
162
+ </gco:LocalName>
163
+ </gfc:memberName>
164
+ </xsl:for-each>
165
+
166
+ <xsl:for-each select="attrdef">
167
+ <gfc:definition>
168
+ <gco:CharacterString>
169
+ <xsl:value-of select="."/>
170
+ </gco:CharacterString>
171
+ </gfc:definition>
172
+ </xsl:for-each>
173
+
174
+ <gfc:cardinality>
175
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
176
+ </gfc:cardinality>
177
+
178
+ <xsl:for-each select="attrdefs">
179
+ <gfc:definitionReference>
180
+ <gfc:FC_DefinitionReference>
181
+ <gfc:definitionSource>
182
+ <gfc:FC_DefinitionSource>
183
+ <gfc:source>
184
+ <gmd:CI_Citation>
185
+ <gmd:title>
186
+ <gco:CharacterString><xsl:value-of select="."/>
187
+ </gco:CharacterString>
188
+ </gmd:title>
189
+ <gmd:date>
190
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
191
+ </gmd:date>
192
+ <gmd:citedResponsibleParty>
193
+ <gmd:CI_ResponsibleParty>
194
+ <gmd:organisationName>
195
+ <gco:CharacterString>
196
+ <xsl:value-of select="."/>
197
+ </gco:CharacterString>
198
+ </gmd:organisationName>
199
+ <gmd:role>
200
+ <gmd:CI_RoleCode>
201
+ <xsl:attribute name="codeList">
202
+ <xsl:value-of select="'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode'"/>
203
+ </xsl:attribute>
204
+ <xsl:attribute name="codeListValue">
205
+ <xsl:value-of select="'resourceProvider'"/>
206
+ </xsl:attribute>
207
+ <xsl:attribute name="codeSpace">
208
+ <xsl:value-of select="'001'"/>
209
+ </xsl:attribute>
210
+ </gmd:CI_RoleCode>
211
+ </gmd:role>
212
+ </gmd:CI_ResponsibleParty>
213
+ </gmd:citedResponsibleParty>
214
+ </gmd:CI_Citation>
215
+ </gfc:source>
216
+ </gfc:FC_DefinitionSource>
217
+ </gfc:definitionSource>
218
+ </gfc:FC_DefinitionReference>
219
+ </gfc:definitionReference>
220
+ </xsl:for-each>
221
+
222
+ <xsl:for-each select="attrtype">
223
+ <gfc:valueType>
224
+ <gco:TypeName>
225
+ <gco:aName>
226
+ <gco:CharacterString>
227
+ <xsl:value-of select="."/>
228
+ </gco:CharacterString>
229
+ </gco:aName>
230
+ </gco:TypeName>
231
+ </gfc:valueType>
232
+ </xsl:for-each>
233
+
234
+ <xsl:for-each select="attrdomv/edom">
235
+ <gfc:listedValue>
236
+ <gfc:FC_ListedValue>
237
+ <xsl:for-each select="edomv">
238
+ <gfc:label>
239
+ <gco:CharacterString>
240
+ <xsl:value-of select="."/>
241
+ </gco:CharacterString>
242
+ </gfc:label>
243
+ </xsl:for-each>
244
+ <xsl:for-each select="edomvd">
245
+ <gfc:definition>
246
+ <gco:CharacterString>
247
+ <xsl:value-of select="."/>
248
+ </gco:CharacterString>
249
+ </gfc:definition>
250
+ </xsl:for-each>
251
+ <xsl:for-each select="edomvds">
252
+ <gfc:definitionReference>
253
+ <gfc:FC_DefinitionReference>
254
+ <gfc:definitionSource>
255
+ <gfc:FC_DefinitionSource>
256
+ <gfc:source>
257
+ <gmd:CI_Citation>
258
+ <gmd:title>
259
+ <gco:CharacterString>
260
+ <xsl:value-of select="."/>
261
+ </gco:CharacterString>
262
+ </gmd:title>
263
+ <gmd:date>
264
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
265
+ </gmd:date>
266
+ <gmd:citedResponsibleParty>
267
+ <gmd:CI_ResponsibleParty>
268
+ <gmd:organisationName>
269
+ <gco:CharacterString>
270
+ <xsl:value-of select="."/>
271
+ </gco:CharacterString>
272
+ </gmd:organisationName>
273
+ <gmd:role>
274
+ <gmd:CI_RoleCode>
275
+ <xsl:attribute name="codeList">
276
+ <xsl:value-of select="'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode'"/>
277
+ </xsl:attribute>
278
+ <xsl:attribute name="codeListValue">
279
+ <xsl:value-of select="'resourceProvider'"/>
280
+ </xsl:attribute>
281
+ <xsl:attribute name="codeSpace">
282
+ <xsl:value-of select="'001'"/>
283
+ </xsl:attribute>
284
+ </gmd:CI_RoleCode>
285
+ </gmd:role>
286
+ </gmd:CI_ResponsibleParty>
287
+ </gmd:citedResponsibleParty>
288
+ </gmd:CI_Citation>
289
+ </gfc:source>
290
+ </gfc:FC_DefinitionSource>
291
+ </gfc:definitionSource>
292
+ </gfc:FC_DefinitionReference>
293
+ </gfc:definitionReference>
294
+ </xsl:for-each>
295
+ </gfc:FC_ListedValue>
296
+ </gfc:listedValue>
297
+ </xsl:for-each>
298
+
299
+
300
+ <xsl:for-each select="attudomv/codesetd">
301
+ <gfc:listedValue>
302
+ <gfc:FC_ListedValue>
303
+ <xsl:for-each select="codesetn">
304
+ <gfc:label>
305
+ <gco:CharacterString>
306
+ <xsl:value-of select="."/>
307
+ </gco:CharacterString>
308
+ </gfc:label>
309
+ </xsl:for-each>
310
+ <gfc:definitionReference>
311
+ <gfc:FC_DefinitionReference>
312
+ <xsl:for-each select="codesets">
313
+ <gfc:definitionSource>
314
+ <gfc:FC_DefinitionSource>
315
+ <gfc:source>
316
+ <gmd:CI_Citation>
317
+ <gmd:title>
318
+ <gco:CharacterString>
319
+ <xsl:value-of select="."/>
320
+ </gco:CharacterString>
321
+ </gmd:title>
322
+ <gmd:date>
323
+ <xsl:attribute name="gco:nilReason">unknown</xsl:attribute>
324
+ </gmd:date>
325
+ <gmd:citedResponsibleParty>
326
+ <gmd:CI_ResponsibleParty>
327
+ <gmd:organisationName>
328
+ <gco:CharacterString>
329
+ <xsl:value-of select="."/>
330
+ </gco:CharacterString>
331
+ </gmd:organisationName>
332
+ <gmd:role>
333
+ <gmd:CI_RoleCode>
334
+ <xsl:attribute name="codeList">
335
+ <xsl:value-of select="'http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode'"/>
336
+ </xsl:attribute>
337
+ <xsl:attribute name="codeListValue">
338
+ <xsl:value-of select="'resourceProvider'"/>
339
+ </xsl:attribute>
340
+ <xsl:attribute name="codeSpace">
341
+ <xsl:value-of select="'001'"/>
342
+ </xsl:attribute>
343
+ </gmd:CI_RoleCode>
344
+ </gmd:role>
345
+ </gmd:CI_ResponsibleParty>
346
+ </gmd:citedResponsibleParty>
347
+ </gmd:CI_Citation>
348
+ </gfc:source>
349
+ </gfc:FC_DefinitionSource>
350
+ </gfc:definitionSource>
351
+ </xsl:for-each>
352
+ </gfc:FC_DefinitionReference>
353
+ </gfc:definitionReference>
354
+ </gfc:FC_ListedValue>
355
+ </gfc:listedValue>
356
+ </xsl:for-each>
357
+ </gfc:FC_FeatureAttribute>
358
+ </gfc:carrierOfCharacteristics>
359
+ </xsl:for-each>
360
+ </gfc:FC_FeatureType>
361
+ </gfc:featureType>
362
+ </xsl:for-each>
363
+ </xsl:template>
364
+ </xsl:stylesheet>