geohydra 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +1 -8
- data/Gemfile.lock +87 -102
- data/README.md +2 -2
- data/VERSION +1 -1
- data/bin/accession.rb +99 -89
- data/bin/assemble.rb +288 -247
- data/bin/assemble_data.rb +54 -51
- data/bin/assemble_placenames.rb +85 -85
- data/bin/build_stage_options.rb +24 -18
- data/bin/derive_wgs84.rb +65 -66
- data/bin/extract_thumbnail.rb +38 -37
- data/bin/geo2mods.rb +78 -0
- data/bin/geohydra +14 -5
- data/bin/ingest_arcgis.rb +80 -60
- data/bin/iso2geo.rb +64 -0
- data/bin/loader_postgis.rb +121 -227
- data/bin/run_task.rb +23 -0
- data/bin/sync_geoserver_metadata.rb +132 -127
- data/bin/xsltproc-saxon +6 -0
- data/geohydra.gemspec +6 -4
- data/lib/geohydra.rb +5 -0
- data/lib/geohydra/accession.rb +24 -13
- data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
- data/lib/geohydra/gazetteer.csv +842 -36
- data/lib/geohydra/gazetteer.rb +48 -24
- data/lib/geohydra/mods2geoblacklight.xsl +248 -0
- data/lib/geohydra/mods2ogp.xsl +5 -8
- data/lib/geohydra/transform.rb +8 -2
- data/lib/geohydra/utils.rb +6 -0
- data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
- data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
- data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
- data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
- data/lib/geohydra/workflow/task.rb +82 -0
- data/ogp/README.md +350 -0
- data/ogp/download.rb +92 -0
- data/ogp/fgdc2mods.sh +9 -0
- data/ogp/fgdc2mods.xsl +884 -0
- data/ogp/ingest.rb +48 -0
- data/ogp/select.rb +20 -0
- data/ogp/transform.rb +354 -0
- data/ogp/validate.rb +182 -0
- data/{bin → scripts}/ingest_tufts.rb +0 -0
- data/scripts/iso2html/doit.sh +15 -0
- data/scripts/iso2html/main.css +66 -0
- data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
- data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
- data/scripts/iso2html/utils/replace-string.xsl +80 -0
- data/scripts/iso2html/utils/strip-digits.xsl +60 -0
- data/{bin → scripts}/loader.rb +0 -0
- data/scripts/rename_shapefiles.rb +5 -0
- data/scripts/render_gazetteer.rb +36 -0
- data/{bin → scripts}/seed.rb +0 -0
- data/{bin → scripts}/solr_indexer.rb +0 -0
- data/scripts/status.csv +253 -0
- data/scripts/status.rb +32 -0
- data/{bin → scripts}/validate_data.rb +1 -1
- data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-dev/conf/protwords.txt +21 -0
- data/solr/kurma-app-dev/conf/schema.xml +156 -0
- data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
- data/solr/kurma-app-dev/purge.sh +8 -0
- data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-test/conf/protwords.txt +21 -0
- data/solr/kurma-app-test/conf/schema.xml +158 -0
- data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-test/conf/synonyms.txt +29 -0
- data/solr/kurma-app-test/deploy.sh +15 -0
- data/solr/kurma-app-test/purge.sh +8 -0
- data/solr/ogp-dev/purge.sh +1 -2
- data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
- data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
- data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
- data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
- data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
- data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
- data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
- data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
- data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
- data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
- data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
- data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
- data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
- data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
- data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
- data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
- data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
- data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
- data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
- data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
- data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
- data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
- data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
- data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
- data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
- data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
- data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
- data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
- data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
- data/spec/unit/gazetteer_spec.rb +100 -35
- data/spec/unit/task_spec.rb +68 -0
- data/spec/unit/transform_spec.rb +1 -1
- data/spec/unit/utils_spec.rb +17 -3
- data/workflow.rb +35 -0
- metadata +323 -316
data/ogp/ingest.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'rsolr'
|
5
|
+
|
6
|
+
class IngestOgp
|
7
|
+
def initialize(collection, url)
|
8
|
+
raise ArgumentError, 'Collection not defined' unless collection.is_a? String
|
9
|
+
@solr = RSolr.connect(:url => (url + '/' + collection))
|
10
|
+
yield self
|
11
|
+
close
|
12
|
+
end
|
13
|
+
|
14
|
+
def ingest(fn)
|
15
|
+
puts "Ingesting #{fn}"
|
16
|
+
json = JSON::parse(File.read(fn))
|
17
|
+
n = 0
|
18
|
+
json.each do |doc|
|
19
|
+
next unless doc.is_a? Hash and not doc.empty?
|
20
|
+
doc.delete('_version_')
|
21
|
+
doc.delete('timestamp')
|
22
|
+
putc "."
|
23
|
+
@solr.add doc
|
24
|
+
n += 1
|
25
|
+
if n % 100 == 0
|
26
|
+
@solr.commit
|
27
|
+
puts "\ncommit 100 records, #{n} total\n"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
puts "\n#{n} records\n"
|
31
|
+
@solr.commit
|
32
|
+
end
|
33
|
+
|
34
|
+
def close
|
35
|
+
@solr.commit
|
36
|
+
#@solr.optimize
|
37
|
+
@solr = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# __MAIN__
|
44
|
+
IngestOgp.new(ARGV[0], (ARGV[1].nil?? 'http://localhost:18080/solr' : ARGV[1])) do |ogp|
|
45
|
+
Dir.glob("transformed*.json") do |fn|
|
46
|
+
ogp.ingest(fn)
|
47
|
+
end
|
48
|
+
end
|
data/ogp/select.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: select.rb
|
4
|
+
|
5
|
+
|
6
|
+
require 'awesome_print'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
|
10
|
+
# __MAIN__
|
11
|
+
selected = []
|
12
|
+
Dir.glob('transformed*.json') do |fn|
|
13
|
+
JSON::parse(File.read(fn)).each do |i|
|
14
|
+
if rand < 0.01
|
15
|
+
selected << i
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
ap({:selected => selected})
|
20
|
+
File.open('selected.json', 'wb') {|f| f << JSON.pretty_generate(selected)}
|
data/ogp/transform.rb
ADDED
@@ -0,0 +1,354 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: transform_ogp output.json
|
4
|
+
#
|
5
|
+
# Reads valid*.json in current directory
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'awesome_print'
|
9
|
+
require 'json'
|
10
|
+
require 'uri'
|
11
|
+
require 'date'
|
12
|
+
require 'nokogiri'
|
13
|
+
|
14
|
+
# Transforms an OGP schema into GeoBlacklight. Requires input of a JSON array
|
15
|
+
# of OGP hashs.
|
16
|
+
class TransformOgp
|
17
|
+
|
18
|
+
def initialize(fn)
|
19
|
+
@output = File.open(fn, 'wb')
|
20
|
+
@output.write "[\n"
|
21
|
+
@fgdcdir = 'fgdc'
|
22
|
+
yield self
|
23
|
+
self.close
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String|Array] s the URI to clean up
|
27
|
+
# @return [String] a normalized URI
|
28
|
+
def clean_uri(s)
|
29
|
+
unless s.nil? or s.empty?
|
30
|
+
return (s.is_a?(Array) ? URI(s.first) : URI(s)).to_s
|
31
|
+
end
|
32
|
+
''
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param [String] fn filename of JSON array of OGP hash objects
|
36
|
+
# @return [Hash] stats about :accepted vs. :rejected records
|
37
|
+
def transform_file(fn)
|
38
|
+
stats = { :accepted => 0, :rejected => 0 }
|
39
|
+
puts "Parsing #{fn}"
|
40
|
+
json = JSON::parse(File.open(fn, 'rb').read)
|
41
|
+
json.each do |doc| # contains JSON Solr query results
|
42
|
+
unless doc.empty?
|
43
|
+
begin
|
44
|
+
transform(doc)
|
45
|
+
stats[:accepted] += 1
|
46
|
+
rescue ArgumentError => e
|
47
|
+
puts e
|
48
|
+
stats[:rejected] += 1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
stats
|
53
|
+
end
|
54
|
+
|
55
|
+
# Transforms a single OGP record into a GeoBlacklight record
|
56
|
+
# @param [Hash] layer an OGP hash for a given layer
|
57
|
+
def transform(layer, skip_fgdc = true)
|
58
|
+
id = layer['LayerId'].to_s.strip
|
59
|
+
puts "Tranforming #{id}"
|
60
|
+
|
61
|
+
# For URN style @see http://www.ietf.org/rfc/rfc2141.txt
|
62
|
+
# For ARK @see https://wiki.ucop.edu/display/Curation/ARK
|
63
|
+
prefix = case layer['Institution']
|
64
|
+
when 'Stanford'
|
65
|
+
'http://purl.stanford.edu/'
|
66
|
+
when 'Tufts'
|
67
|
+
'urn:geodata.tufts.edu:'
|
68
|
+
when 'MassGIS'
|
69
|
+
'urn:massgis.state.ma.us:'
|
70
|
+
when 'Berkeley'
|
71
|
+
'http://ark.cdlib.org/ark:/'
|
72
|
+
when 'MIT'
|
73
|
+
'urn:arrowsmith.mit.edu:'
|
74
|
+
when 'Harvard'
|
75
|
+
'urn:hul.harvard.edu:'
|
76
|
+
else
|
77
|
+
''
|
78
|
+
end
|
79
|
+
uuid = prefix + URI.encode(id)
|
80
|
+
|
81
|
+
# Parse out the Location to get the WMS/WFS/WCS URLs
|
82
|
+
raise ArgumentError, "ERROR: #{id} no location" if layer['Location'].nil? or layer['Location'].empty?
|
83
|
+
location = JSON::parse(layer['Location'])
|
84
|
+
raise ArgumentError, "ERROR: #{id} has malformed location" unless location.is_a? Hash
|
85
|
+
|
86
|
+
# Parse out the bounding box
|
87
|
+
s = layer['MinY'].to_f
|
88
|
+
w = layer['MinX'].to_f
|
89
|
+
n = layer['MaxY'].to_f
|
90
|
+
e = layer['MaxX'].to_f
|
91
|
+
|
92
|
+
# Parse out the ContentDate date/time
|
93
|
+
dt = DateTime.rfc3339(layer['ContentDate'])
|
94
|
+
pub_dt = DateTime.rfc3339('2000-01-01T00:00:00Z') # XXX fake data, get from MODS
|
95
|
+
|
96
|
+
access = layer['Access']
|
97
|
+
collection = nil
|
98
|
+
|
99
|
+
# Parse out the PURL and other metadata for Stanford
|
100
|
+
if layer['Institution'] == 'Stanford'
|
101
|
+
purl = location['purl']
|
102
|
+
if purl.is_a? Array
|
103
|
+
purl = purl.first
|
104
|
+
end
|
105
|
+
if purl.nil? and uuid =~ /^http/
|
106
|
+
purl = uuid
|
107
|
+
end
|
108
|
+
else
|
109
|
+
purl = nil
|
110
|
+
# Because OGP does not deliminate keywords, we use a heuristic here
|
111
|
+
%w{PlaceKeywords ThemeKeywords}.each do |k|
|
112
|
+
unless layer[k] =~ /[;,]/ or layer[k].split.size < 4
|
113
|
+
layer[k] = layer[k].split.join(';')
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
slug = to_slug(id, layer)
|
119
|
+
|
120
|
+
layer_geom_type = layer['DataType'].to_s.downcase
|
121
|
+
layer_geom_type = 'raster' if layer_geom_type == 'paper map'
|
122
|
+
|
123
|
+
# @see https://github.com/OSGeo/Cat-Interop
|
124
|
+
%w{wcs wfs wms}.each do |k|
|
125
|
+
location[k] = location[k].first if location[k].is_a? Array
|
126
|
+
end
|
127
|
+
refs = {}
|
128
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wcs'] = "#{location['wcs']}" if location['wcs']
|
129
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wfs'] = "#{location['wfs']}" if location['wfs']
|
130
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wms'] = "#{location['wms']}" if location['wms']
|
131
|
+
if purl
|
132
|
+
refs["http://schema.org/thumbnailUrl"] = "http://stacks.stanford.edu/file/druid:#{id}/preview.jpg"
|
133
|
+
refs["http://schema.org/url"] = "#{clean_uri(purl)}"
|
134
|
+
refs["http://schema.org/DownloadAction"] = "http://stacks.stanford.edu/file/druid:#{id}/data.zip"
|
135
|
+
refs["http://www.isotc211.org/schemas/2005/gmd/"] = "#{purl}.iso19139"
|
136
|
+
refs["http://www.loc.gov/mods/v3"] = "#{purl}.mods"
|
137
|
+
end
|
138
|
+
|
139
|
+
# Make the conversion from OGP to GeoBlacklight
|
140
|
+
#
|
141
|
+
# @see http://dublincore.org/documents/dcmi-terms/
|
142
|
+
# @see http://wiki.dublincore.org/index.php/User_Guide/Creating_Metadata
|
143
|
+
# @see http://www.ietf.org/rfc/rfc5013.txt
|
144
|
+
new_layer = {
|
145
|
+
:uuid => uuid,
|
146
|
+
|
147
|
+
# Dublin Core elements
|
148
|
+
:dc_creator_sm => string2array(layer['Originator']),
|
149
|
+
:dc_description_s => layer['Abstract'],
|
150
|
+
:dc_format_s => (
|
151
|
+
(layer_geom_type == 'raster') ?
|
152
|
+
'GeoTIFF' : # 'image/tiff' :
|
153
|
+
'Shapefile' # 'application/x-esri-shapefile'
|
154
|
+
), # XXX: fake data
|
155
|
+
:dc_identifier_s => uuid,
|
156
|
+
:dc_language_s => 'English', # 'en', # XXX: fake data
|
157
|
+
:dc_publisher_s => layer['Publisher'],
|
158
|
+
:dc_rights_s => access,
|
159
|
+
:dc_subject_sm => string2array(layer['ThemeKeywords']),
|
160
|
+
:dc_title_s => layer['LayerDisplayName'],
|
161
|
+
:dc_type_s => 'Dataset', # or 'Image' for non-georectified,
|
162
|
+
# or 'PhysicalObject' for non-digitized maps
|
163
|
+
# Dublin Core terms
|
164
|
+
:dct_isPartOf_sm => collection.nil?? nil : [collection],
|
165
|
+
:dct_references_s => refs.to_json.to_s,
|
166
|
+
:dct_spatial_sm => string2array(layer['PlaceKeywords']),
|
167
|
+
:dct_temporal_sm => [dt.year.to_s],
|
168
|
+
:dct_issued_s => pub_dt.year.to_s,
|
169
|
+
:dct_provenance_s => layer['Institution'],
|
170
|
+
|
171
|
+
#
|
172
|
+
# xmlns:georss="http://www.georss.org/georss"
|
173
|
+
# A bounding box is a rectangular region, often used to define the extents of a map or a rough area of interest. A box contains two space seperate latitude-longitude pairs, with each pair separated by whitespace. The first pair is the lower corner, the second is the upper corner.
|
174
|
+
:georss_box_s => "#{s} #{w} #{n} #{e}",
|
175
|
+
:georss_polygon_s => "#{n} #{w} #{n} #{e} #{s} #{e} #{s} #{w} #{n} #{w}",
|
176
|
+
|
177
|
+
# Layer-specific schema
|
178
|
+
:layer_slug_s => slug,
|
179
|
+
:layer_id_s => layer['WorkspaceName'] + ':' + layer['Name'],
|
180
|
+
# :layer_srs_s => 'EPSG:4326', # XXX: fake data
|
181
|
+
:layer_geom_type_s => layer_geom_type.capitalize,
|
182
|
+
:layer_modified_dt => Time.now.utc.strftime('%FT%TZ'),
|
183
|
+
|
184
|
+
# derived fields used only by solr, for which copyField is insufficient
|
185
|
+
:solr_bbox => "#{w} #{s} #{e} #{n}", # minX minY maxX maxY
|
186
|
+
:solr_ne_pt => "#{n},#{e}",
|
187
|
+
:solr_sw_pt => "#{s},#{w}",
|
188
|
+
:solr_geom => "ENVELOPE(#{w}, #{e}, #{n}, #{s})",
|
189
|
+
:solr_year_i => dt.year,
|
190
|
+
:solr_issued_dt => pub_dt.strftime('%FT%TZ'), # Solr requires 1995-12-31T23:59:59Z
|
191
|
+
:solr_wms_url => location['wms'],
|
192
|
+
:solr_wfs_url => location['wfs'],
|
193
|
+
:solr_wcs_url => location['wcs']
|
194
|
+
|
195
|
+
# :layer_year_i => dt.year#, # XXX: migrate to copyField
|
196
|
+
# :ogp_area_f => layer['Area'],
|
197
|
+
# :ogp_center_x_f => layer['CenterX'],
|
198
|
+
# :ogp_center_y_f => layer['CenterY'],
|
199
|
+
# :ogp_georeferenced_b => (layer['GeoReferenced'].to_s.downcase == 'true'),
|
200
|
+
# :ogp_halfheight_f => layer['HalfHeight'],
|
201
|
+
# :ogp_halfwidth_f => layer['HalfWidth'],
|
202
|
+
# :ogp_layer_id_s => layer['LayerId'],
|
203
|
+
# :ogp_name_s => layer['Name'],
|
204
|
+
# :ogp_location_s => layer['Location'],
|
205
|
+
# :ogp_workspace_s => layer['WorkspaceName']
|
206
|
+
}
|
207
|
+
|
208
|
+
# Remove any fields that are blank
|
209
|
+
new_layer.each do |k, v|
|
210
|
+
new_layer.delete(k) if v.nil? or (v.respond_to?(:empty?) and v.empty?)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Write the JSON record for the GeoBlacklight layer
|
214
|
+
@output.write JSON::pretty_generate(new_layer)
|
215
|
+
@output.write "\n,\n"
|
216
|
+
|
217
|
+
unless skip_fgdc or layer['FgdcText'].nil? or layer['FgdcText'].empty?
|
218
|
+
xml = Nokogiri::XML(layer['FgdcText'])
|
219
|
+
xml.write_xml_to(File.open('fgdc' + '/' + slug + '.xml', 'wb'), :encoding => 'UTF-8', :indent => 2)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def close
|
224
|
+
@output.write "\n {} \n]\n"
|
225
|
+
@output.close
|
226
|
+
end
|
227
|
+
|
228
|
+
# @param [String] s has semi-colon/comma/gt delimited array
|
229
|
+
# @return [Array] results as array
|
230
|
+
def string2array(s)
|
231
|
+
if s.to_s =~ /[;,>]/
|
232
|
+
s.split(/\s*[;,>]\s*/).uniq.collect {|i| i.strip}
|
233
|
+
elsif s.is_a?(String) and s.size > 0
|
234
|
+
[s.strip]
|
235
|
+
else
|
236
|
+
nil
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
@@slugs = {}
|
241
|
+
def to_slug(id, layer)
|
242
|
+
# strip out schema and usernames
|
243
|
+
name = layer['Name'].sub('SDE_DATA.', '').sub('SDE.', '').sub('SDE2.', '').sub('GISPORTAL.GISOWNER01.', '').sub('GISDATA.', '').sub('MORIS.', '')
|
244
|
+
unless name.size > 1
|
245
|
+
# use first word of title is empty name
|
246
|
+
name = layer['LayerDisplayName'].split.first
|
247
|
+
end
|
248
|
+
slug = layer['Institution'] + '-' + name
|
249
|
+
|
250
|
+
# slugs should only have a-z, A-Z, 0-9, and -
|
251
|
+
slug.gsub!(/[^a-zA-Z0-9\-]/, '-')
|
252
|
+
slug.gsub!(/[\-]+/, '-')
|
253
|
+
|
254
|
+
# only lowercase
|
255
|
+
slug.downcase!
|
256
|
+
|
257
|
+
# ensure slugs are unique for this pass
|
258
|
+
if @@slugs.include?(slug)
|
259
|
+
slug += '-' + sprintf("%06d", Random.rand(999999))
|
260
|
+
end
|
261
|
+
@@slugs[slug] = true
|
262
|
+
|
263
|
+
slug
|
264
|
+
end
|
265
|
+
|
266
|
+
# Ensure that the WMS/WFS/WCS location values are as expected
|
267
|
+
def validate_location(id, location)
|
268
|
+
begin
|
269
|
+
x = JSON::parse(location)
|
270
|
+
if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
|
271
|
+
raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
|
272
|
+
end
|
273
|
+
|
274
|
+
%w{wms wcs wfs}.each do |protocol|
|
275
|
+
begin
|
276
|
+
unless x[protocol].nil?
|
277
|
+
if x[protocol].is_a? String
|
278
|
+
x[protocol] = [x[protocol]]
|
279
|
+
end
|
280
|
+
|
281
|
+
unless x[protocol].is_a? Array
|
282
|
+
raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
|
283
|
+
end
|
284
|
+
|
285
|
+
x[protocol].each do |url|
|
286
|
+
uri = clean_uri.parse(url)
|
287
|
+
raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(clean_uri::HTTP) or uri.kind_of?(clean_uri::HTTPS)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
rescue Exception => e
|
291
|
+
raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
return x.to_json
|
296
|
+
rescue JSON::ParserError => e
|
297
|
+
raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
|
298
|
+
end
|
299
|
+
nil
|
300
|
+
end
|
301
|
+
|
302
|
+
def lon? lon
|
303
|
+
lon >= -180 and lon <= 180
|
304
|
+
end
|
305
|
+
|
306
|
+
def lat? lat
|
307
|
+
lat >= -90 and lat <= 90
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
# __MAIN__
|
313
|
+
#
|
314
|
+
TransformOgp.new(ARGV[0].nil?? 'transformed.json' : ARGV[0]) do |ogp|
|
315
|
+
stats = { :accepted => 0, :rejected => 0 }
|
316
|
+
Dir.glob('valid*.json') do |fn|
|
317
|
+
s = ogp.transform_file(fn)
|
318
|
+
stats[:accepted] += s[:accepted]
|
319
|
+
stats[:rejected] += s[:rejected]
|
320
|
+
end
|
321
|
+
ap({:statistics => stats})
|
322
|
+
end
|
323
|
+
|
324
|
+
# example input data
|
325
|
+
__END__
|
326
|
+
[
|
327
|
+
{
|
328
|
+
"Abstract": "The boundaries of each supervisorial district in Sonoma County based on 2000 census. Redrawn in 2001 using Autobound.",
|
329
|
+
"Access": "Public",
|
330
|
+
"Area": 0.9463444815860053,
|
331
|
+
"Availability": "Online",
|
332
|
+
"CenterX": -122.942159,
|
333
|
+
"CenterY": 38.4580755,
|
334
|
+
"ContentDate": "2000-01-01T01:01:01Z",
|
335
|
+
"DataType": "Polygon",
|
336
|
+
"FgdcText": "...",
|
337
|
+
"GeoReferenced": true,
|
338
|
+
"HalfHeight": 0.39885650000000084,
|
339
|
+
"HalfWidth": 0.593161000000002,
|
340
|
+
"Institution": "Berkeley",
|
341
|
+
"LayerDisplayName": "SCGISDB2_BASE_ADM_SUPERVISOR",
|
342
|
+
"LayerId": "28722/bk0012h5s52",
|
343
|
+
"Location": "{\"wms\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wms\"],\"tilecache\":[\"http://gis.lib.berkeley.edu:8080/geoserver/gwc/service/wms\"],\"download\":\"\",\"wfs\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wfs\"]}",
|
344
|
+
"MaxX": -122.348998,
|
345
|
+
"MaxY": 38.856932,
|
346
|
+
"MinX": -123.53532,
|
347
|
+
"MinY": 38.059219,
|
348
|
+
"Name": "ADM_SUPERVISOR",
|
349
|
+
"PlaceKeywords": "Sonoma County County of Sonoma Sonoma California Bay Area",
|
350
|
+
"Publisher": "UC Berkeley Libraries",
|
351
|
+
"ThemeKeywords": "Supervisorial districts 1st District 2nd District 3rd District 4th District 5th District",
|
352
|
+
"WorkspaceName": "UCB"
|
353
|
+
}
|
354
|
+
]
|
data/ogp/validate.rb
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: validate_ogp [output.json]
|
4
|
+
#
|
5
|
+
# Requires data/*.json as input and output to valid.json
|
6
|
+
#
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'json'
|
9
|
+
require 'uri'
|
10
|
+
require 'date'
|
11
|
+
|
12
|
+
class ValidateOgp
|
13
|
+
def initialize(fn)
|
14
|
+
@wms_servers = {}
|
15
|
+
@output = File.open(fn, 'wb')
|
16
|
+
@output.write "[\n"
|
17
|
+
yield self
|
18
|
+
self.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def validate_file(fn)
|
22
|
+
stats = { :accepted => 0, :rejected => 0 }
|
23
|
+
puts "Validating #{fn}"
|
24
|
+
json = JSON::parse(File.read(fn))
|
25
|
+
json['response']['docs'].each do |doc| # contains JSON Solr query results
|
26
|
+
begin
|
27
|
+
validate(doc)
|
28
|
+
stats[:accepted] += 1
|
29
|
+
rescue ArgumentError => e
|
30
|
+
puts e
|
31
|
+
stats[:rejected] += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
stats
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def validate(layer)
|
39
|
+
id = layer['LayerId']
|
40
|
+
|
41
|
+
%w{LayerId Name Institution Access MinX MinY MaxX MaxY LayerDisplayName Location}.each do |k|
|
42
|
+
if layer[k].nil? or layer[k].to_s.empty?
|
43
|
+
raise ArgumentError, "ERROR: #{id} missing #{k}"
|
44
|
+
return
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
%w{MinX MaxX}.each do |lon|
|
49
|
+
raise ArgumentError, "ERROR: #{id}: Invalid longitude value: #{layer[lon]}" unless lon?(layer[lon])
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
%w{MinY MaxY}.each do |lat|
|
54
|
+
raise ArgumentError, "ERROR: #{id} Invalid latitude value: #{layer[lat]}" unless lat?(layer[lat])
|
55
|
+
end
|
56
|
+
|
57
|
+
k = 'Institution'
|
58
|
+
if ([layer[k]] & %w{Berkeley Harvard MIT MassGIS Stanford Tufts}).empty?
|
59
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
60
|
+
return
|
61
|
+
end
|
62
|
+
|
63
|
+
k = 'DataType'
|
64
|
+
if ([layer[k]] & %w{Line Paper\ Map Point Polygon Raster LibraryRecord}).empty?
|
65
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
66
|
+
return
|
67
|
+
end
|
68
|
+
|
69
|
+
k = 'Access'
|
70
|
+
if ([layer[k]] & %w{Public Restricted}).empty?
|
71
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
72
|
+
end
|
73
|
+
|
74
|
+
k = 'Availability'
|
75
|
+
if layer[k].downcase == 'online' # cleanup
|
76
|
+
layer[k] = 'Online'
|
77
|
+
end
|
78
|
+
if ([layer[k]] & %w{Online}).empty?
|
79
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
80
|
+
return
|
81
|
+
end
|
82
|
+
|
83
|
+
k = 'Location'
|
84
|
+
layer[k] = validate_location(id, layer[k])
|
85
|
+
if layer[k].nil? or layer[k].empty?
|
86
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
87
|
+
end
|
88
|
+
|
89
|
+
k = 'GeoReferenced'
|
90
|
+
unless layer[k].nil? or layer[k] == true
|
91
|
+
puts "WARNING: #{id} has boundingbox but claims it is not georeferenced"
|
92
|
+
#layer[k] = true
|
93
|
+
end
|
94
|
+
|
95
|
+
k = 'Area'
|
96
|
+
unless layer[k] > 0
|
97
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
98
|
+
end
|
99
|
+
|
100
|
+
k = 'ContentDate'
|
101
|
+
if layer[k].nil? or layer[k].empty?
|
102
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
103
|
+
end
|
104
|
+
dt = Date.rfc3339(layer[k])
|
105
|
+
if dt.year < 1500 or dt.year > 2100
|
106
|
+
raise ArgumentError, "ERROR: #{id} has suspect #{k}: #{layer[k]}"
|
107
|
+
end
|
108
|
+
|
109
|
+
# k = 'FgdcText'
|
110
|
+
# unless layer[k].nil? or layer[k].empty?
|
111
|
+
# layer[k] = ''
|
112
|
+
# end
|
113
|
+
|
114
|
+
@output.write JSON::pretty_generate(layer)
|
115
|
+
@output.write "\n,\n"
|
116
|
+
end
|
117
|
+
|
118
|
+
def close
|
119
|
+
@output.write "\n {} \n]\n"
|
120
|
+
@output.close
|
121
|
+
ap({:wms_servers => @wms_servers})
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def validate_location(id, location)
|
127
|
+
begin
|
128
|
+
x = JSON::parse(location)
|
129
|
+
if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
|
130
|
+
raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
|
131
|
+
end
|
132
|
+
|
133
|
+
%w{wms wcs wfs}.each do |protocol|
|
134
|
+
begin
|
135
|
+
unless x[protocol].nil?
|
136
|
+
if x[protocol].is_a? String
|
137
|
+
x[protocol] = [x[protocol]]
|
138
|
+
end
|
139
|
+
|
140
|
+
unless x[protocol].is_a? Array
|
141
|
+
raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
|
142
|
+
end
|
143
|
+
|
144
|
+
x[protocol].each do |url|
|
145
|
+
uri = URI.parse(url)
|
146
|
+
raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(URI::HTTP) or uri.kind_of?(URI::HTTPS)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
rescue Exception => e
|
150
|
+
raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
@wms_servers[x['wms'].first] = true
|
155
|
+
|
156
|
+
return x.to_json
|
157
|
+
rescue JSON::ParserError => e
|
158
|
+
raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
|
159
|
+
end
|
160
|
+
nil
|
161
|
+
end
|
162
|
+
|
163
|
+
def lon? lon
|
164
|
+
lon >= -180 and lon <= 180
|
165
|
+
end
|
166
|
+
|
167
|
+
def lat? lat
|
168
|
+
lat >= -90 and lat <= 90
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
|
173
|
+
# __MAIN__
|
174
|
+
ValidateOgp.new(ARGV[0].nil?? 'valid.json' : ARGV[0]) do |ogp|
|
175
|
+
stats = { :accepted => 0, :rejected => 0 }
|
176
|
+
Dir.glob('data/*.json') do |fn|
|
177
|
+
s = ogp.validate_file(fn)
|
178
|
+
stats[:accepted] += s[:accepted]
|
179
|
+
stats[:rejected] += s[:rejected]
|
180
|
+
end
|
181
|
+
ap({:statistics => stats})
|
182
|
+
end
|