geo_combine 0.2.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +4 -3
- data/Gemfile +2 -1
- data/README.md +107 -26
- data/geo_combine.gemspec +4 -2
- data/lib/geo_combine.rb +8 -1
- data/lib/geo_combine/bounding_box.rb +71 -0
- data/lib/geo_combine/ckan_metadata.rb +112 -0
- data/lib/geo_combine/exceptions.rb +2 -0
- data/lib/geo_combine/formatting.rb +6 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +204 -0
- data/lib/geo_combine/geoblacklight.rb +62 -13
- data/lib/geo_combine/ogp.rb +229 -0
- data/lib/geo_combine/railtie.rb +7 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/tasks/geo_combine.rake +54 -20
- data/lib/xslt/fgdc2html.xsl +105 -157
- data/lib/xslt/iso2html.xsl +1107 -1070
- data/spec/features/iso2html_spec.rb +7 -1
- data/spec/fixtures/docs/ckan.json +456 -0
- data/spec/fixtures/docs/geoblacklight_pre_v1.json +37 -0
- data/spec/fixtures/docs/ogp_harvard_line.json +28 -0
- data/spec/fixtures/docs/ogp_harvard_raster.json +28 -0
- data/spec/fixtures/docs/ogp_tufts_vector.json +31 -0
- data/spec/fixtures/json_docs.rb +20 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +59 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +114 -0
- data/spec/lib/geo_combine/formatting_spec.rb +6 -0
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +190 -0
- data/spec/lib/geo_combine/geoblacklight_spec.rb +38 -7
- data/spec/lib/geo_combine/ogp_spec.rb +163 -0
- data/spec/spec_helper.rb +1 -0
- metadata +65 -15
data/lib/geo_combine/version.rb
CHANGED
data/lib/tasks/geo_combine.rake
CHANGED
@@ -1,39 +1,73 @@
|
|
1
|
-
require 'find'
|
2
1
|
require 'net/http'
|
3
2
|
require 'json'
|
4
3
|
require 'rsolr'
|
4
|
+
require 'find'
|
5
|
+
require 'geo_combine/geo_blacklight_harvester'
|
5
6
|
|
6
7
|
namespace :geocombine do
|
8
|
+
commit_within = (ENV['SOLR_COMMIT_WITHIN'] || 5000).to_i
|
7
9
|
ogm_path = ENV['OGM_PATH'] || 'tmp/opengeometadata'
|
8
10
|
solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
11
|
+
whitelist = %w[
|
12
|
+
https://github.com/OpenGeoMetadata/big-ten.git
|
13
|
+
]
|
14
|
+
|
15
|
+
desc 'Clone OpenGeoMetadata repositories'
|
16
|
+
task :clone, [:repo] do |_t, args|
|
17
|
+
if args.repo
|
18
|
+
ogm_repos = ["https://github.com/OpenGeoMetadata/#{args.repo}.git"]
|
19
|
+
else
|
20
|
+
ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
|
21
|
+
ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map do |repo|
|
22
|
+
repo['clone_url'] if repo['size'] > 0
|
23
|
+
end.compact
|
24
|
+
ogm_repos.select! { |repo| whitelist.include?(repo) || repo =~ /(edu|org|uk)\..*\.git$/ }
|
25
|
+
end
|
13
26
|
ogm_repos.each do |repo|
|
14
|
-
|
15
|
-
system "mkdir -p #{ogm_path} && cd #{ogm_path} && git clone --depth 1 #{repo}"
|
16
|
-
end
|
27
|
+
system "echo #{repo} && mkdir -p #{ogm_path} && cd #{ogm_path} && git clone --depth 1 #{repo}"
|
17
28
|
end
|
18
29
|
end
|
30
|
+
|
19
31
|
desc '"git pull" OpenGeoMetadata repositories'
|
20
|
-
task :pull do
|
21
|
-
|
32
|
+
task :pull, [:repo] do |_t, args|
|
33
|
+
paths = if args.repo
|
34
|
+
[File.join(ogm_path, args.repo)]
|
35
|
+
else
|
36
|
+
Dir.glob("#{ogm_path}/*")
|
37
|
+
end
|
38
|
+
paths.each do |path|
|
39
|
+
next unless File.directory?(path)
|
40
|
+
system "echo #{path} && cd #{path} && git pull origin"
|
41
|
+
end
|
22
42
|
end
|
23
|
-
|
43
|
+
|
44
|
+
desc 'Index all of the GeoBlacklight JSON documents'
|
24
45
|
task :index do
|
25
|
-
|
46
|
+
puts "Indexing #{ogm_path} into #{solr_url}"
|
47
|
+
solr = RSolr.connect url: solr_url, adapter: :net_http_persistent
|
26
48
|
Find.find(ogm_path) do |path|
|
27
|
-
next unless path
|
49
|
+
next unless File.basename(path) == 'geoblacklight.json'
|
28
50
|
doc = JSON.parse(File.read(path))
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
51
|
+
[doc].flatten.each do |record|
|
52
|
+
begin
|
53
|
+
puts "Indexing #{record['layer_slug_s']}: #{path}" if $DEBUG
|
54
|
+
solr.update params: { commitWithin: commit_within, overwrite: true },
|
55
|
+
data: [record].to_json,
|
56
|
+
headers: { 'Content-Type' => 'application/json' }
|
57
|
+
rescue RSolr::Error::Http => error
|
58
|
+
puts error
|
59
|
+
end
|
36
60
|
end
|
37
61
|
end
|
62
|
+
solr.commit
|
63
|
+
end
|
64
|
+
|
65
|
+
namespace :geoblacklight_harvester do
|
66
|
+
desc 'Harvest documents from a configured GeoBlacklight instance'
|
67
|
+
task :index, [:site] => [:environment] do |_t, args|
|
68
|
+
raise ArgumentError, 'A site argument is required' unless args.site
|
69
|
+
|
70
|
+
GeoCombine::GeoBlacklightHarvester.new(args.site.to_sym).index
|
71
|
+
end
|
38
72
|
end
|
39
73
|
end
|
data/lib/xslt/fgdc2html.xsl
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
2
|
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
|
3
|
-
|
4
|
-
|
3
|
+
<xsl:output method="html" encoding="utf-8" indent="yes" />
|
4
|
+
<!--
|
5
|
+
fgdc2html.xsl - Transformation from CSDGM/FGDC into HTML
|
5
6
|
Created by Kim Durante, Stanford University Libraries
|
6
7
|
|
7
|
-
|
8
|
+
Modified by Keith Jenkins, Cornell University Library, 2018-01-25
|
9
|
+
to render attributes in a more readable form.
|
10
|
+
-->
|
8
11
|
<xsl:template match="/">
|
9
|
-
<xsl:text disable-output-escaping='yes'><!DOCTYPE html
|
12
|
+
<xsl:text disable-output-escaping='yes'><!DOCTYPE html></xsl:text>
|
10
13
|
<html>
|
11
14
|
<head>
|
12
15
|
<title>
|
@@ -64,6 +67,7 @@
|
|
64
67
|
</body>
|
65
68
|
</html>
|
66
69
|
</xsl:template>
|
70
|
+
|
67
71
|
<!-- Identification -->
|
68
72
|
<xsl:template match="idinfo">
|
69
73
|
<div id="fgdc-identification-info">
|
@@ -335,6 +339,7 @@
|
|
335
339
|
</dl>
|
336
340
|
</div>
|
337
341
|
</xsl:template>
|
342
|
+
|
338
343
|
<!-- Data Quality -->
|
339
344
|
<xsl:template match="dataqual">
|
340
345
|
<div id="fgdc-data-quality-info">
|
@@ -524,6 +529,7 @@
|
|
524
529
|
</dl>
|
525
530
|
</div>
|
526
531
|
</xsl:template>
|
532
|
+
|
527
533
|
<!-- Spatial Data Organization -->
|
528
534
|
<xsl:template match="spdoinfo">
|
529
535
|
<div id="fgdc-spatialdataorganization-info">
|
@@ -637,6 +643,7 @@
|
|
637
643
|
</dl>
|
638
644
|
</div>
|
639
645
|
</xsl:template>
|
646
|
+
|
640
647
|
<!-- Spatial Reference -->
|
641
648
|
<xsl:template match="spref">
|
642
649
|
<div id="fgdc-spatialreference-info">
|
@@ -1112,6 +1119,7 @@
|
|
1112
1119
|
</dl>
|
1113
1120
|
</div>
|
1114
1121
|
</xsl:template>
|
1122
|
+
|
1115
1123
|
<!-- Entity and Attribute -->
|
1116
1124
|
<xsl:template match="eainfo">
|
1117
1125
|
<div id="fgdc-entityattribute-info">
|
@@ -1145,7 +1153,7 @@
|
|
1145
1153
|
</dl>
|
1146
1154
|
</dd>
|
1147
1155
|
</xsl:for-each>
|
1148
|
-
<xsl:
|
1156
|
+
<xsl:call-template name='attributes' />
|
1149
1157
|
</xsl:for-each>
|
1150
1158
|
<xsl:for-each select="overview">
|
1151
1159
|
<xsl:for-each select="eaover">
|
@@ -1166,6 +1174,94 @@
|
|
1166
1174
|
</dl>
|
1167
1175
|
</div>
|
1168
1176
|
</xsl:template>
|
1177
|
+
|
1178
|
+
<xsl:template name='attributes'>
|
1179
|
+
<xsl:if test='attr'>
|
1180
|
+
<dt>Attributes</dt>
|
1181
|
+
<dd>
|
1182
|
+
<dl>
|
1183
|
+
<xsl:for-each select="attr">
|
1184
|
+
<dt><xsl:value-of select="attrlabl" /></dt>
|
1185
|
+
<dd>
|
1186
|
+
<xsl:value-of select="attrdef" />
|
1187
|
+
<xsl:apply-templates select="attrdomv" />
|
1188
|
+
<dl>
|
1189
|
+
<xsl:for-each select="begdatea">
|
1190
|
+
<dt>Beginning Date of Attribute Values</dt>
|
1191
|
+
<dd>
|
1192
|
+
<xsl:value-of select="." />
|
1193
|
+
</dd>
|
1194
|
+
</xsl:for-each>
|
1195
|
+
<xsl:for-each select="enddatea">
|
1196
|
+
<dt>Ending Date of Attribute Values</dt>
|
1197
|
+
<dd>
|
1198
|
+
<xsl:value-of select="." />
|
1199
|
+
</dd>
|
1200
|
+
</xsl:for-each>
|
1201
|
+
<xsl:for-each select="attrvai">
|
1202
|
+
<xsl:for-each select="attrva">
|
1203
|
+
<dt>Attribute Value Accuracy</dt>
|
1204
|
+
<dd>
|
1205
|
+
<xsl:value-of select="." />
|
1206
|
+
</dd>
|
1207
|
+
</xsl:for-each>
|
1208
|
+
<xsl:for-each select="attrvae">
|
1209
|
+
<dt>Attribute Value Accuracy Explanation</dt>
|
1210
|
+
<dd>
|
1211
|
+
<xsl:value-of select="." />
|
1212
|
+
</dd>
|
1213
|
+
</xsl:for-each>
|
1214
|
+
</xsl:for-each>
|
1215
|
+
<xsl:for-each select="attrmfrq">
|
1216
|
+
<dt>Attribute Measurement Frequency</dt>
|
1217
|
+
<dd>
|
1218
|
+
<xsl:value-of select="." />
|
1219
|
+
</dd>
|
1220
|
+
</xsl:for-each>
|
1221
|
+
</dl>
|
1222
|
+
</dd>
|
1223
|
+
</xsl:for-each>
|
1224
|
+
</dl>
|
1225
|
+
</dd>
|
1226
|
+
</xsl:if>
|
1227
|
+
</xsl:template>
|
1228
|
+
|
1229
|
+
<xsl:template match="attrdomv[codesetd]">
|
1230
|
+
<xsl:text> (</xsl:text>
|
1231
|
+
<xsl:value-of select="codesetd/codesetn" />
|
1232
|
+
<xsl:apply-templates select="codesetd/codesets/text()" />
|
1233
|
+
<xsl:text>)</xsl:text>
|
1234
|
+
</xsl:template>
|
1235
|
+
|
1236
|
+
<xsl:template match="attrdomv[edom]">
|
1237
|
+
<br />
|
1238
|
+
<button onclick="this.nextElementSibling.style.display = (this.nextElementSibling.style.display==='none') ? '' : 'none';">show/hide coded values</button>
|
1239
|
+
<dl style="display:none">
|
1240
|
+
<xsl:for-each select="edom">
|
1241
|
+
<dt><xsl:value-of select="edomv" /></dt>
|
1242
|
+
<dd><xsl:value-of select="edomvd" /></dd>
|
1243
|
+
</xsl:for-each>
|
1244
|
+
</dl>
|
1245
|
+
</xsl:template>
|
1246
|
+
|
1247
|
+
<xsl:template match="attrdomv[rdom]">
|
1248
|
+
<xsl:text> (</xsl:text>
|
1249
|
+
<xsl:value-of select="rdom/rdommin" />
|
1250
|
+
<xsl:text> to </xsl:text>
|
1251
|
+
<xsl:value-of select="rdom/rdommax" />
|
1252
|
+
<xsl:if test="rdom/attrunit">
|
1253
|
+
<xsl:text> </xsl:text>
|
1254
|
+
<xsl:value-of select="rdom/attrunit" />
|
1255
|
+
</xsl:if>
|
1256
|
+
<xsl:text>)</xsl:text>
|
1257
|
+
</xsl:template>
|
1258
|
+
|
1259
|
+
<xsl:template match="attrdomv[udom]">
|
1260
|
+
<xsl:text> (</xsl:text>
|
1261
|
+
<xsl:value-of select="udom" />
|
1262
|
+
<xsl:text>)</xsl:text>
|
1263
|
+
</xsl:template>
|
1264
|
+
|
1169
1265
|
<!-- Distribution -->
|
1170
1266
|
<xsl:template match="distinfo">
|
1171
1267
|
<div id="fgdc-distribution-info">
|
@@ -1202,6 +1298,7 @@
|
|
1202
1298
|
</dl>
|
1203
1299
|
</div>
|
1204
1300
|
</xsl:template>
|
1301
|
+
|
1205
1302
|
<!-- Metadata -->
|
1206
1303
|
<xsl:template match="metainfo">
|
1207
1304
|
<div id="fgdc-metadata-reference-info">
|
@@ -1265,6 +1362,7 @@
|
|
1265
1362
|
</dl>
|
1266
1363
|
</div>
|
1267
1364
|
</xsl:template>
|
1365
|
+
|
1268
1366
|
<!-- Citation -->
|
1269
1367
|
<xsl:template match="citeinfo">
|
1270
1368
|
<dl>
|
@@ -1362,6 +1460,7 @@
|
|
1362
1460
|
</xsl:for-each>
|
1363
1461
|
</dl>
|
1364
1462
|
</xsl:template>
|
1463
|
+
|
1365
1464
|
<!-- Contact -->
|
1366
1465
|
<xsl:template match="cntinfo">
|
1367
1466
|
<dt>Contact Information</dt>
|
@@ -1478,6 +1577,7 @@
|
|
1478
1577
|
</dl>
|
1479
1578
|
</dd>
|
1480
1579
|
</xsl:template>
|
1580
|
+
|
1481
1581
|
<!-- Time Period Info -->
|
1482
1582
|
<xsl:template match="timeinfo">
|
1483
1583
|
<dt>Time Period Information</dt>
|
@@ -1771,156 +1871,4 @@
|
|
1771
1871
|
<xsl:value-of select="." />
|
1772
1872
|
</dd>
|
1773
1873
|
</xsl:template>
|
1774
|
-
<xsl:template match="attr">
|
1775
|
-
<dt>Attribute</dt>
|
1776
|
-
<dd>
|
1777
|
-
<dl>
|
1778
|
-
<xsl:for-each select="attrlabl">
|
1779
|
-
<dt>Attribute Label</dt>
|
1780
|
-
<dd>
|
1781
|
-
<xsl:value-of select="." />
|
1782
|
-
</dd>
|
1783
|
-
</xsl:for-each>
|
1784
|
-
<xsl:for-each select="attrdef">
|
1785
|
-
<dt>Attribute Definition</dt>
|
1786
|
-
<dd>
|
1787
|
-
<xsl:value-of select="." />
|
1788
|
-
</dd>
|
1789
|
-
</xsl:for-each>
|
1790
|
-
<xsl:for-each select="attrdefs">
|
1791
|
-
<dt>Attribute Definition Source</dt>
|
1792
|
-
<dd>
|
1793
|
-
<xsl:value-of select="." />
|
1794
|
-
</dd>
|
1795
|
-
</xsl:for-each>
|
1796
|
-
<xsl:for-each select="attrdomv">
|
1797
|
-
<dt>Attribute Domain Values</dt>
|
1798
|
-
<dd>
|
1799
|
-
<dl>
|
1800
|
-
<xsl:for-each select="edom">
|
1801
|
-
<dt>Enumerated Domain</dt>
|
1802
|
-
<dd>
|
1803
|
-
<dl>
|
1804
|
-
<xsl:for-each select="edomv">
|
1805
|
-
<dt>Enumerated Domain Value</dt>
|
1806
|
-
<dd>
|
1807
|
-
<xsl:value-of select="." />
|
1808
|
-
</dd>
|
1809
|
-
</xsl:for-each>
|
1810
|
-
<xsl:for-each select="edomvd">
|
1811
|
-
<dt>Enumerated Domain Value Definition</dt>
|
1812
|
-
<dd>
|
1813
|
-
<xsl:value-of select="." />
|
1814
|
-
</dd>
|
1815
|
-
</xsl:for-each>
|
1816
|
-
<xsl:for-each select="edomvds">
|
1817
|
-
<dt>Enumerated Domain Value Definition Source</dt>
|
1818
|
-
<dd>
|
1819
|
-
<xsl:value-of select="." />
|
1820
|
-
</dd>
|
1821
|
-
</xsl:for-each>
|
1822
|
-
<xsl:apply-templates select="attr" />
|
1823
|
-
</dl>
|
1824
|
-
</dd>
|
1825
|
-
</xsl:for-each>
|
1826
|
-
<xsl:for-each select="rdom">
|
1827
|
-
<dt>Range Domain</dt>
|
1828
|
-
<dd>
|
1829
|
-
<dl>
|
1830
|
-
<xsl:for-each select="rdommin">
|
1831
|
-
<dt>Range Domain Minimum</dt>
|
1832
|
-
<dd>
|
1833
|
-
<xsl:value-of select="." />
|
1834
|
-
</dd>
|
1835
|
-
</xsl:for-each>
|
1836
|
-
<xsl:for-each select="rdommax">
|
1837
|
-
<dt>Range Domain Maximum</dt>
|
1838
|
-
<dd>
|
1839
|
-
<xsl:value-of select="." />
|
1840
|
-
</dd>
|
1841
|
-
</xsl:for-each>
|
1842
|
-
<xsl:for-each select="attrunit">
|
1843
|
-
<dt>Attribute Units of Measure</dt>
|
1844
|
-
<dd>
|
1845
|
-
<xsl:value-of select="." />
|
1846
|
-
</dd>
|
1847
|
-
</xsl:for-each>
|
1848
|
-
<xsl:for-each select="attrmres">
|
1849
|
-
<dt>Attribute Measurement Resolution</dt>
|
1850
|
-
<dd>
|
1851
|
-
<xsl:value-of select="." />
|
1852
|
-
</dd>
|
1853
|
-
</xsl:for-each>
|
1854
|
-
<xsl:apply-templates select="attr" />
|
1855
|
-
</dl>
|
1856
|
-
</dd>
|
1857
|
-
</xsl:for-each>
|
1858
|
-
<xsl:for-each select="codesetd">
|
1859
|
-
<dt>Codeset Domain</dt>
|
1860
|
-
<dd>
|
1861
|
-
<dl>
|
1862
|
-
<xsl:for-each select="codesetn">
|
1863
|
-
<dt>Codeset Name</dt>
|
1864
|
-
<dd>
|
1865
|
-
<xsl:value-of select="." />
|
1866
|
-
</dd>
|
1867
|
-
</xsl:for-each>
|
1868
|
-
<xsl:for-each select="codesets">
|
1869
|
-
<dt>Codeset Source</dt>
|
1870
|
-
<dd>
|
1871
|
-
<xsl:value-of select="." />
|
1872
|
-
</dd>
|
1873
|
-
</xsl:for-each>
|
1874
|
-
</dl>
|
1875
|
-
</dd>
|
1876
|
-
</xsl:for-each>
|
1877
|
-
<xsl:for-each select="udom">
|
1878
|
-
<dt>Unrepresentable Domain</dt>
|
1879
|
-
<dd>
|
1880
|
-
<xsl:value-of select="." />
|
1881
|
-
</dd>
|
1882
|
-
</xsl:for-each>
|
1883
|
-
</dl>
|
1884
|
-
</dd>
|
1885
|
-
</xsl:for-each>
|
1886
|
-
<xsl:for-each select="begdatea">
|
1887
|
-
<dt>Beginning Date of Attribute Values</dt>
|
1888
|
-
<dd>
|
1889
|
-
<xsl:value-of select="." />
|
1890
|
-
</dd>
|
1891
|
-
</xsl:for-each>
|
1892
|
-
<xsl:for-each select="enddatea">
|
1893
|
-
<dt>Ending Date of Attribute Values</dt>
|
1894
|
-
<dd>
|
1895
|
-
<xsl:value-of select="." />
|
1896
|
-
</dd>
|
1897
|
-
</xsl:for-each>
|
1898
|
-
<xsl:for-each select="attrvai">
|
1899
|
-
<dt>Attribute Value Accuracy Information</dt>
|
1900
|
-
<dd>
|
1901
|
-
<dl>
|
1902
|
-
<xsl:for-each select="attrva">
|
1903
|
-
<dt>Attribute Value Accuracy</dt>
|
1904
|
-
<dd>
|
1905
|
-
<xsl:value-of select="." />
|
1906
|
-
</dd>
|
1907
|
-
</xsl:for-each>
|
1908
|
-
<xsl:for-each select="attrvae">
|
1909
|
-
<dt>Attribute Value Accuracy Explanation</dt>
|
1910
|
-
<dd>
|
1911
|
-
<xsl:value-of select="." />
|
1912
|
-
</dd>
|
1913
|
-
</xsl:for-each>
|
1914
|
-
</dl>
|
1915
|
-
</dd>
|
1916
|
-
</xsl:for-each>
|
1917
|
-
<xsl:for-each select="attrmfrq">
|
1918
|
-
<dt>Attribute Measurement Frequency</dt>
|
1919
|
-
<dd>
|
1920
|
-
<xsl:value-of select="." />
|
1921
|
-
</dd>
|
1922
|
-
</xsl:for-each>
|
1923
|
-
</dl>
|
1924
|
-
</dd>
|
1925
|
-
</xsl:template>
|
1926
1874
|
</xsl:stylesheet>
|