geohydra 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +1 -8
- data/Gemfile.lock +87 -102
- data/README.md +2 -2
- data/VERSION +1 -1
- data/bin/accession.rb +99 -89
- data/bin/assemble.rb +288 -247
- data/bin/assemble_data.rb +54 -51
- data/bin/assemble_placenames.rb +85 -85
- data/bin/build_stage_options.rb +24 -18
- data/bin/derive_wgs84.rb +65 -66
- data/bin/extract_thumbnail.rb +38 -37
- data/bin/geo2mods.rb +78 -0
- data/bin/geohydra +14 -5
- data/bin/ingest_arcgis.rb +80 -60
- data/bin/iso2geo.rb +64 -0
- data/bin/loader_postgis.rb +121 -227
- data/bin/run_task.rb +23 -0
- data/bin/sync_geoserver_metadata.rb +132 -127
- data/bin/xsltproc-saxon +6 -0
- data/geohydra.gemspec +6 -4
- data/lib/geohydra.rb +5 -0
- data/lib/geohydra/accession.rb +24 -13
- data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
- data/lib/geohydra/gazetteer.csv +842 -36
- data/lib/geohydra/gazetteer.rb +48 -24
- data/lib/geohydra/mods2geoblacklight.xsl +248 -0
- data/lib/geohydra/mods2ogp.xsl +5 -8
- data/lib/geohydra/transform.rb +8 -2
- data/lib/geohydra/utils.rb +6 -0
- data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
- data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
- data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
- data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
- data/lib/geohydra/workflow/task.rb +82 -0
- data/ogp/README.md +350 -0
- data/ogp/download.rb +92 -0
- data/ogp/fgdc2mods.sh +9 -0
- data/ogp/fgdc2mods.xsl +884 -0
- data/ogp/ingest.rb +48 -0
- data/ogp/select.rb +20 -0
- data/ogp/transform.rb +354 -0
- data/ogp/validate.rb +182 -0
- data/{bin → scripts}/ingest_tufts.rb +0 -0
- data/scripts/iso2html/doit.sh +15 -0
- data/scripts/iso2html/main.css +66 -0
- data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
- data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
- data/scripts/iso2html/utils/replace-string.xsl +80 -0
- data/scripts/iso2html/utils/strip-digits.xsl +60 -0
- data/{bin → scripts}/loader.rb +0 -0
- data/scripts/rename_shapefiles.rb +5 -0
- data/scripts/render_gazetteer.rb +36 -0
- data/{bin → scripts}/seed.rb +0 -0
- data/{bin → scripts}/solr_indexer.rb +0 -0
- data/scripts/status.csv +253 -0
- data/scripts/status.rb +32 -0
- data/{bin → scripts}/validate_data.rb +1 -1
- data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-dev/conf/protwords.txt +21 -0
- data/solr/kurma-app-dev/conf/schema.xml +156 -0
- data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
- data/solr/kurma-app-dev/purge.sh +8 -0
- data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-test/conf/protwords.txt +21 -0
- data/solr/kurma-app-test/conf/schema.xml +158 -0
- data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-test/conf/synonyms.txt +29 -0
- data/solr/kurma-app-test/deploy.sh +15 -0
- data/solr/kurma-app-test/purge.sh +8 -0
- data/solr/ogp-dev/purge.sh +1 -2
- data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
- data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
- data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
- data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
- data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
- data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
- data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
- data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
- data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
- data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
- data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
- data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
- data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
- data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
- data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
- data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
- data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
- data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
- data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
- data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
- data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
- data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
- data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
- data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
- data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
- data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
- data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
- data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
- data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
- data/spec/unit/gazetteer_spec.rb +100 -35
- data/spec/unit/task_spec.rb +68 -0
- data/spec/unit/transform_spec.rb +1 -1
- data/spec/unit/utils_spec.rb +17 -3
- data/workflow.rb +35 -0
- metadata +323 -316
data/ogp/ingest.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'rsolr'
|
5
|
+
|
6
|
+
class IngestOgp
|
7
|
+
def initialize(collection, url)
|
8
|
+
raise ArgumentError, 'Collection not defined' unless collection.is_a? String
|
9
|
+
@solr = RSolr.connect(:url => (url + '/' + collection))
|
10
|
+
yield self
|
11
|
+
close
|
12
|
+
end
|
13
|
+
|
14
|
+
def ingest(fn)
|
15
|
+
puts "Ingesting #{fn}"
|
16
|
+
json = JSON::parse(File.read(fn))
|
17
|
+
n = 0
|
18
|
+
json.each do |doc|
|
19
|
+
next unless doc.is_a? Hash and not doc.empty?
|
20
|
+
doc.delete('_version_')
|
21
|
+
doc.delete('timestamp')
|
22
|
+
putc "."
|
23
|
+
@solr.add doc
|
24
|
+
n += 1
|
25
|
+
if n % 100 == 0
|
26
|
+
@solr.commit
|
27
|
+
puts "\ncommit 100 records, #{n} total\n"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
puts "\n#{n} records\n"
|
31
|
+
@solr.commit
|
32
|
+
end
|
33
|
+
|
34
|
+
def close
|
35
|
+
@solr.commit
|
36
|
+
#@solr.optimize
|
37
|
+
@solr = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# __MAIN__
|
44
|
+
IngestOgp.new(ARGV[0], (ARGV[1].nil?? 'http://localhost:18080/solr' : ARGV[1])) do |ogp|
|
45
|
+
Dir.glob("transformed*.json") do |fn|
|
46
|
+
ogp.ingest(fn)
|
47
|
+
end
|
48
|
+
end
|
data/ogp/select.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: select.rb
|
4
|
+
|
5
|
+
|
6
|
+
require 'awesome_print'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
|
10
|
+
# __MAIN__
|
11
|
+
selected = []
|
12
|
+
Dir.glob('transformed*.json') do |fn|
|
13
|
+
JSON::parse(File.read(fn)).each do |i|
|
14
|
+
if rand < 0.01
|
15
|
+
selected << i
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
ap({:selected => selected})
|
20
|
+
File.open('selected.json', 'wb') {|f| f << JSON.pretty_generate(selected)}
|
data/ogp/transform.rb
ADDED
@@ -0,0 +1,354 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: transform_ogp output.json
|
4
|
+
#
|
5
|
+
# Reads valid*.json in current directory
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'awesome_print'
|
9
|
+
require 'json'
|
10
|
+
require 'uri'
|
11
|
+
require 'date'
|
12
|
+
require 'nokogiri'
|
13
|
+
|
14
|
+
# Transforms an OGP schema into GeoBlacklight. Requires input of a JSON array
|
15
|
+
# of OGP hashs.
|
16
|
+
class TransformOgp
|
17
|
+
|
18
|
+
def initialize(fn)
|
19
|
+
@output = File.open(fn, 'wb')
|
20
|
+
@output.write "[\n"
|
21
|
+
@fgdcdir = 'fgdc'
|
22
|
+
yield self
|
23
|
+
self.close
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param [String|Array] s the URI to clean up
|
27
|
+
# @return [String] a normalized URI
|
28
|
+
def clean_uri(s)
|
29
|
+
unless s.nil? or s.empty?
|
30
|
+
return (s.is_a?(Array) ? URI(s.first) : URI(s)).to_s
|
31
|
+
end
|
32
|
+
''
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param [String] fn filename of JSON array of OGP hash objects
|
36
|
+
# @return [Hash] stats about :accepted vs. :rejected records
|
37
|
+
def transform_file(fn)
|
38
|
+
stats = { :accepted => 0, :rejected => 0 }
|
39
|
+
puts "Parsing #{fn}"
|
40
|
+
json = JSON::parse(File.open(fn, 'rb').read)
|
41
|
+
json.each do |doc| # contains JSON Solr query results
|
42
|
+
unless doc.empty?
|
43
|
+
begin
|
44
|
+
transform(doc)
|
45
|
+
stats[:accepted] += 1
|
46
|
+
rescue ArgumentError => e
|
47
|
+
puts e
|
48
|
+
stats[:rejected] += 1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
stats
|
53
|
+
end
|
54
|
+
|
55
|
+
# Transforms a single OGP record into a GeoBlacklight record
|
56
|
+
# @param [Hash] layer an OGP hash for a given layer
|
57
|
+
def transform(layer, skip_fgdc = true)
|
58
|
+
id = layer['LayerId'].to_s.strip
|
59
|
+
puts "Tranforming #{id}"
|
60
|
+
|
61
|
+
# For URN style @see http://www.ietf.org/rfc/rfc2141.txt
|
62
|
+
# For ARK @see https://wiki.ucop.edu/display/Curation/ARK
|
63
|
+
prefix = case layer['Institution']
|
64
|
+
when 'Stanford'
|
65
|
+
'http://purl.stanford.edu/'
|
66
|
+
when 'Tufts'
|
67
|
+
'urn:geodata.tufts.edu:'
|
68
|
+
when 'MassGIS'
|
69
|
+
'urn:massgis.state.ma.us:'
|
70
|
+
when 'Berkeley'
|
71
|
+
'http://ark.cdlib.org/ark:/'
|
72
|
+
when 'MIT'
|
73
|
+
'urn:arrowsmith.mit.edu:'
|
74
|
+
when 'Harvard'
|
75
|
+
'urn:hul.harvard.edu:'
|
76
|
+
else
|
77
|
+
''
|
78
|
+
end
|
79
|
+
uuid = prefix + URI.encode(id)
|
80
|
+
|
81
|
+
# Parse out the Location to get the WMS/WFS/WCS URLs
|
82
|
+
raise ArgumentError, "ERROR: #{id} no location" if layer['Location'].nil? or layer['Location'].empty?
|
83
|
+
location = JSON::parse(layer['Location'])
|
84
|
+
raise ArgumentError, "ERROR: #{id} has malformed location" unless location.is_a? Hash
|
85
|
+
|
86
|
+
# Parse out the bounding box
|
87
|
+
s = layer['MinY'].to_f
|
88
|
+
w = layer['MinX'].to_f
|
89
|
+
n = layer['MaxY'].to_f
|
90
|
+
e = layer['MaxX'].to_f
|
91
|
+
|
92
|
+
# Parse out the ContentDate date/time
|
93
|
+
dt = DateTime.rfc3339(layer['ContentDate'])
|
94
|
+
pub_dt = DateTime.rfc3339('2000-01-01T00:00:00Z') # XXX fake data, get from MODS
|
95
|
+
|
96
|
+
access = layer['Access']
|
97
|
+
collection = nil
|
98
|
+
|
99
|
+
# Parse out the PURL and other metadata for Stanford
|
100
|
+
if layer['Institution'] == 'Stanford'
|
101
|
+
purl = location['purl']
|
102
|
+
if purl.is_a? Array
|
103
|
+
purl = purl.first
|
104
|
+
end
|
105
|
+
if purl.nil? and uuid =~ /^http/
|
106
|
+
purl = uuid
|
107
|
+
end
|
108
|
+
else
|
109
|
+
purl = nil
|
110
|
+
# Because OGP does not deliminate keywords, we use a heuristic here
|
111
|
+
%w{PlaceKeywords ThemeKeywords}.each do |k|
|
112
|
+
unless layer[k] =~ /[;,]/ or layer[k].split.size < 4
|
113
|
+
layer[k] = layer[k].split.join(';')
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
slug = to_slug(id, layer)
|
119
|
+
|
120
|
+
layer_geom_type = layer['DataType'].to_s.downcase
|
121
|
+
layer_geom_type = 'raster' if layer_geom_type == 'paper map'
|
122
|
+
|
123
|
+
# @see https://github.com/OSGeo/Cat-Interop
|
124
|
+
%w{wcs wfs wms}.each do |k|
|
125
|
+
location[k] = location[k].first if location[k].is_a? Array
|
126
|
+
end
|
127
|
+
refs = {}
|
128
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wcs'] = "#{location['wcs']}" if location['wcs']
|
129
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wfs'] = "#{location['wfs']}" if location['wfs']
|
130
|
+
refs['http://www.opengis.net/def/serviceType/ogc/wms'] = "#{location['wms']}" if location['wms']
|
131
|
+
if purl
|
132
|
+
refs["http://schema.org/thumbnailUrl"] = "http://stacks.stanford.edu/file/druid:#{id}/preview.jpg"
|
133
|
+
refs["http://schema.org/url"] = "#{clean_uri(purl)}"
|
134
|
+
refs["http://schema.org/DownloadAction"] = "http://stacks.stanford.edu/file/druid:#{id}/data.zip"
|
135
|
+
refs["http://www.isotc211.org/schemas/2005/gmd/"] = "#{purl}.iso19139"
|
136
|
+
refs["http://www.loc.gov/mods/v3"] = "#{purl}.mods"
|
137
|
+
end
|
138
|
+
|
139
|
+
# Make the conversion from OGP to GeoBlacklight
|
140
|
+
#
|
141
|
+
# @see http://dublincore.org/documents/dcmi-terms/
|
142
|
+
# @see http://wiki.dublincore.org/index.php/User_Guide/Creating_Metadata
|
143
|
+
# @see http://www.ietf.org/rfc/rfc5013.txt
|
144
|
+
new_layer = {
|
145
|
+
:uuid => uuid,
|
146
|
+
|
147
|
+
# Dublin Core elements
|
148
|
+
:dc_creator_sm => string2array(layer['Originator']),
|
149
|
+
:dc_description_s => layer['Abstract'],
|
150
|
+
:dc_format_s => (
|
151
|
+
(layer_geom_type == 'raster') ?
|
152
|
+
'GeoTIFF' : # 'image/tiff' :
|
153
|
+
'Shapefile' # 'application/x-esri-shapefile'
|
154
|
+
), # XXX: fake data
|
155
|
+
:dc_identifier_s => uuid,
|
156
|
+
:dc_language_s => 'English', # 'en', # XXX: fake data
|
157
|
+
:dc_publisher_s => layer['Publisher'],
|
158
|
+
:dc_rights_s => access,
|
159
|
+
:dc_subject_sm => string2array(layer['ThemeKeywords']),
|
160
|
+
:dc_title_s => layer['LayerDisplayName'],
|
161
|
+
:dc_type_s => 'Dataset', # or 'Image' for non-georectified,
|
162
|
+
# or 'PhysicalObject' for non-digitized maps
|
163
|
+
# Dublin Core terms
|
164
|
+
:dct_isPartOf_sm => collection.nil?? nil : [collection],
|
165
|
+
:dct_references_s => refs.to_json.to_s,
|
166
|
+
:dct_spatial_sm => string2array(layer['PlaceKeywords']),
|
167
|
+
:dct_temporal_sm => [dt.year.to_s],
|
168
|
+
:dct_issued_s => pub_dt.year.to_s,
|
169
|
+
:dct_provenance_s => layer['Institution'],
|
170
|
+
|
171
|
+
#
|
172
|
+
# xmlns:georss="http://www.georss.org/georss"
|
173
|
+
# A bounding box is a rectangular region, often used to define the extents of a map or a rough area of interest. A box contains two space seperate latitude-longitude pairs, with each pair separated by whitespace. The first pair is the lower corner, the second is the upper corner.
|
174
|
+
:georss_box_s => "#{s} #{w} #{n} #{e}",
|
175
|
+
:georss_polygon_s => "#{n} #{w} #{n} #{e} #{s} #{e} #{s} #{w} #{n} #{w}",
|
176
|
+
|
177
|
+
# Layer-specific schema
|
178
|
+
:layer_slug_s => slug,
|
179
|
+
:layer_id_s => layer['WorkspaceName'] + ':' + layer['Name'],
|
180
|
+
# :layer_srs_s => 'EPSG:4326', # XXX: fake data
|
181
|
+
:layer_geom_type_s => layer_geom_type.capitalize,
|
182
|
+
:layer_modified_dt => Time.now.utc.strftime('%FT%TZ'),
|
183
|
+
|
184
|
+
# derived fields used only by solr, for which copyField is insufficient
|
185
|
+
:solr_bbox => "#{w} #{s} #{e} #{n}", # minX minY maxX maxY
|
186
|
+
:solr_ne_pt => "#{n},#{e}",
|
187
|
+
:solr_sw_pt => "#{s},#{w}",
|
188
|
+
:solr_geom => "ENVELOPE(#{w}, #{e}, #{n}, #{s})",
|
189
|
+
:solr_year_i => dt.year,
|
190
|
+
:solr_issued_dt => pub_dt.strftime('%FT%TZ'), # Solr requires 1995-12-31T23:59:59Z
|
191
|
+
:solr_wms_url => location['wms'],
|
192
|
+
:solr_wfs_url => location['wfs'],
|
193
|
+
:solr_wcs_url => location['wcs']
|
194
|
+
|
195
|
+
# :layer_year_i => dt.year#, # XXX: migrate to copyField
|
196
|
+
# :ogp_area_f => layer['Area'],
|
197
|
+
# :ogp_center_x_f => layer['CenterX'],
|
198
|
+
# :ogp_center_y_f => layer['CenterY'],
|
199
|
+
# :ogp_georeferenced_b => (layer['GeoReferenced'].to_s.downcase == 'true'),
|
200
|
+
# :ogp_halfheight_f => layer['HalfHeight'],
|
201
|
+
# :ogp_halfwidth_f => layer['HalfWidth'],
|
202
|
+
# :ogp_layer_id_s => layer['LayerId'],
|
203
|
+
# :ogp_name_s => layer['Name'],
|
204
|
+
# :ogp_location_s => layer['Location'],
|
205
|
+
# :ogp_workspace_s => layer['WorkspaceName']
|
206
|
+
}
|
207
|
+
|
208
|
+
# Remove any fields that are blank
|
209
|
+
new_layer.each do |k, v|
|
210
|
+
new_layer.delete(k) if v.nil? or (v.respond_to?(:empty?) and v.empty?)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Write the JSON record for the GeoBlacklight layer
|
214
|
+
@output.write JSON::pretty_generate(new_layer)
|
215
|
+
@output.write "\n,\n"
|
216
|
+
|
217
|
+
unless skip_fgdc or layer['FgdcText'].nil? or layer['FgdcText'].empty?
|
218
|
+
xml = Nokogiri::XML(layer['FgdcText'])
|
219
|
+
xml.write_xml_to(File.open('fgdc' + '/' + slug + '.xml', 'wb'), :encoding => 'UTF-8', :indent => 2)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def close
|
224
|
+
@output.write "\n {} \n]\n"
|
225
|
+
@output.close
|
226
|
+
end
|
227
|
+
|
228
|
+
# @param [String] s has semi-colon/comma/gt delimited array
|
229
|
+
# @return [Array] results as array
|
230
|
+
def string2array(s)
|
231
|
+
if s.to_s =~ /[;,>]/
|
232
|
+
s.split(/\s*[;,>]\s*/).uniq.collect {|i| i.strip}
|
233
|
+
elsif s.is_a?(String) and s.size > 0
|
234
|
+
[s.strip]
|
235
|
+
else
|
236
|
+
nil
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
@@slugs = {}
|
241
|
+
def to_slug(id, layer)
|
242
|
+
# strip out schema and usernames
|
243
|
+
name = layer['Name'].sub('SDE_DATA.', '').sub('SDE.', '').sub('SDE2.', '').sub('GISPORTAL.GISOWNER01.', '').sub('GISDATA.', '').sub('MORIS.', '')
|
244
|
+
unless name.size > 1
|
245
|
+
# use first word of title is empty name
|
246
|
+
name = layer['LayerDisplayName'].split.first
|
247
|
+
end
|
248
|
+
slug = layer['Institution'] + '-' + name
|
249
|
+
|
250
|
+
# slugs should only have a-z, A-Z, 0-9, and -
|
251
|
+
slug.gsub!(/[^a-zA-Z0-9\-]/, '-')
|
252
|
+
slug.gsub!(/[\-]+/, '-')
|
253
|
+
|
254
|
+
# only lowercase
|
255
|
+
slug.downcase!
|
256
|
+
|
257
|
+
# ensure slugs are unique for this pass
|
258
|
+
if @@slugs.include?(slug)
|
259
|
+
slug += '-' + sprintf("%06d", Random.rand(999999))
|
260
|
+
end
|
261
|
+
@@slugs[slug] = true
|
262
|
+
|
263
|
+
slug
|
264
|
+
end
|
265
|
+
|
266
|
+
# Ensure that the WMS/WFS/WCS location values are as expected
|
267
|
+
def validate_location(id, location)
|
268
|
+
begin
|
269
|
+
x = JSON::parse(location)
|
270
|
+
if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
|
271
|
+
raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
|
272
|
+
end
|
273
|
+
|
274
|
+
%w{wms wcs wfs}.each do |protocol|
|
275
|
+
begin
|
276
|
+
unless x[protocol].nil?
|
277
|
+
if x[protocol].is_a? String
|
278
|
+
x[protocol] = [x[protocol]]
|
279
|
+
end
|
280
|
+
|
281
|
+
unless x[protocol].is_a? Array
|
282
|
+
raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
|
283
|
+
end
|
284
|
+
|
285
|
+
x[protocol].each do |url|
|
286
|
+
uri = clean_uri.parse(url)
|
287
|
+
raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(clean_uri::HTTP) or uri.kind_of?(clean_uri::HTTPS)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
rescue Exception => e
|
291
|
+
raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
return x.to_json
|
296
|
+
rescue JSON::ParserError => e
|
297
|
+
raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
|
298
|
+
end
|
299
|
+
nil
|
300
|
+
end
|
301
|
+
|
302
|
+
def lon? lon
|
303
|
+
lon >= -180 and lon <= 180
|
304
|
+
end
|
305
|
+
|
306
|
+
def lat? lat
|
307
|
+
lat >= -90 and lat <= 90
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
# __MAIN__
|
313
|
+
#
|
314
|
+
TransformOgp.new(ARGV[0].nil?? 'transformed.json' : ARGV[0]) do |ogp|
|
315
|
+
stats = { :accepted => 0, :rejected => 0 }
|
316
|
+
Dir.glob('valid*.json') do |fn|
|
317
|
+
s = ogp.transform_file(fn)
|
318
|
+
stats[:accepted] += s[:accepted]
|
319
|
+
stats[:rejected] += s[:rejected]
|
320
|
+
end
|
321
|
+
ap({:statistics => stats})
|
322
|
+
end
|
323
|
+
|
324
|
+
# example input data
|
325
|
+
__END__
|
326
|
+
[
|
327
|
+
{
|
328
|
+
"Abstract": "The boundaries of each supervisorial district in Sonoma County based on 2000 census. Redrawn in 2001 using Autobound.",
|
329
|
+
"Access": "Public",
|
330
|
+
"Area": 0.9463444815860053,
|
331
|
+
"Availability": "Online",
|
332
|
+
"CenterX": -122.942159,
|
333
|
+
"CenterY": 38.4580755,
|
334
|
+
"ContentDate": "2000-01-01T01:01:01Z",
|
335
|
+
"DataType": "Polygon",
|
336
|
+
"FgdcText": "...",
|
337
|
+
"GeoReferenced": true,
|
338
|
+
"HalfHeight": 0.39885650000000084,
|
339
|
+
"HalfWidth": 0.593161000000002,
|
340
|
+
"Institution": "Berkeley",
|
341
|
+
"LayerDisplayName": "SCGISDB2_BASE_ADM_SUPERVISOR",
|
342
|
+
"LayerId": "28722/bk0012h5s52",
|
343
|
+
"Location": "{\"wms\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wms\"],\"tilecache\":[\"http://gis.lib.berkeley.edu:8080/geoserver/gwc/service/wms\"],\"download\":\"\",\"wfs\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wfs\"]}",
|
344
|
+
"MaxX": -122.348998,
|
345
|
+
"MaxY": 38.856932,
|
346
|
+
"MinX": -123.53532,
|
347
|
+
"MinY": 38.059219,
|
348
|
+
"Name": "ADM_SUPERVISOR",
|
349
|
+
"PlaceKeywords": "Sonoma County County of Sonoma Sonoma California Bay Area",
|
350
|
+
"Publisher": "UC Berkeley Libraries",
|
351
|
+
"ThemeKeywords": "Supervisorial districts 1st District 2nd District 3rd District 4th District 5th District",
|
352
|
+
"WorkspaceName": "UCB"
|
353
|
+
}
|
354
|
+
]
|
data/ogp/validate.rb
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Usage: validate_ogp [output.json]
|
4
|
+
#
|
5
|
+
# Requires data/*.json as input and output to valid.json
|
6
|
+
#
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'json'
|
9
|
+
require 'uri'
|
10
|
+
require 'date'
|
11
|
+
|
12
|
+
class ValidateOgp
|
13
|
+
def initialize(fn)
|
14
|
+
@wms_servers = {}
|
15
|
+
@output = File.open(fn, 'wb')
|
16
|
+
@output.write "[\n"
|
17
|
+
yield self
|
18
|
+
self.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def validate_file(fn)
|
22
|
+
stats = { :accepted => 0, :rejected => 0 }
|
23
|
+
puts "Validating #{fn}"
|
24
|
+
json = JSON::parse(File.read(fn))
|
25
|
+
json['response']['docs'].each do |doc| # contains JSON Solr query results
|
26
|
+
begin
|
27
|
+
validate(doc)
|
28
|
+
stats[:accepted] += 1
|
29
|
+
rescue ArgumentError => e
|
30
|
+
puts e
|
31
|
+
stats[:rejected] += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
stats
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def validate(layer)
|
39
|
+
id = layer['LayerId']
|
40
|
+
|
41
|
+
%w{LayerId Name Institution Access MinX MinY MaxX MaxY LayerDisplayName Location}.each do |k|
|
42
|
+
if layer[k].nil? or layer[k].to_s.empty?
|
43
|
+
raise ArgumentError, "ERROR: #{id} missing #{k}"
|
44
|
+
return
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
%w{MinX MaxX}.each do |lon|
|
49
|
+
raise ArgumentError, "ERROR: #{id}: Invalid longitude value: #{layer[lon]}" unless lon?(layer[lon])
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
%w{MinY MaxY}.each do |lat|
|
54
|
+
raise ArgumentError, "ERROR: #{id} Invalid latitude value: #{layer[lat]}" unless lat?(layer[lat])
|
55
|
+
end
|
56
|
+
|
57
|
+
k = 'Institution'
|
58
|
+
if ([layer[k]] & %w{Berkeley Harvard MIT MassGIS Stanford Tufts}).empty?
|
59
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
60
|
+
return
|
61
|
+
end
|
62
|
+
|
63
|
+
k = 'DataType'
|
64
|
+
if ([layer[k]] & %w{Line Paper\ Map Point Polygon Raster LibraryRecord}).empty?
|
65
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
66
|
+
return
|
67
|
+
end
|
68
|
+
|
69
|
+
k = 'Access'
|
70
|
+
if ([layer[k]] & %w{Public Restricted}).empty?
|
71
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
72
|
+
end
|
73
|
+
|
74
|
+
k = 'Availability'
|
75
|
+
if layer[k].downcase == 'online' # cleanup
|
76
|
+
layer[k] = 'Online'
|
77
|
+
end
|
78
|
+
if ([layer[k]] & %w{Online}).empty?
|
79
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
80
|
+
return
|
81
|
+
end
|
82
|
+
|
83
|
+
k = 'Location'
|
84
|
+
layer[k] = validate_location(id, layer[k])
|
85
|
+
if layer[k].nil? or layer[k].empty?
|
86
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
87
|
+
end
|
88
|
+
|
89
|
+
k = 'GeoReferenced'
|
90
|
+
unless layer[k].nil? or layer[k] == true
|
91
|
+
puts "WARNING: #{id} has boundingbox but claims it is not georeferenced"
|
92
|
+
#layer[k] = true
|
93
|
+
end
|
94
|
+
|
95
|
+
k = 'Area'
|
96
|
+
unless layer[k] > 0
|
97
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
98
|
+
end
|
99
|
+
|
100
|
+
k = 'ContentDate'
|
101
|
+
if layer[k].nil? or layer[k].empty?
|
102
|
+
raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
|
103
|
+
end
|
104
|
+
dt = Date.rfc3339(layer[k])
|
105
|
+
if dt.year < 1500 or dt.year > 2100
|
106
|
+
raise ArgumentError, "ERROR: #{id} has suspect #{k}: #{layer[k]}"
|
107
|
+
end
|
108
|
+
|
109
|
+
# k = 'FgdcText'
|
110
|
+
# unless layer[k].nil? or layer[k].empty?
|
111
|
+
# layer[k] = ''
|
112
|
+
# end
|
113
|
+
|
114
|
+
@output.write JSON::pretty_generate(layer)
|
115
|
+
@output.write "\n,\n"
|
116
|
+
end
|
117
|
+
|
118
|
+
def close
|
119
|
+
@output.write "\n {} \n]\n"
|
120
|
+
@output.close
|
121
|
+
ap({:wms_servers => @wms_servers})
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def validate_location(id, location)
|
127
|
+
begin
|
128
|
+
x = JSON::parse(location)
|
129
|
+
if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
|
130
|
+
raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
|
131
|
+
end
|
132
|
+
|
133
|
+
%w{wms wcs wfs}.each do |protocol|
|
134
|
+
begin
|
135
|
+
unless x[protocol].nil?
|
136
|
+
if x[protocol].is_a? String
|
137
|
+
x[protocol] = [x[protocol]]
|
138
|
+
end
|
139
|
+
|
140
|
+
unless x[protocol].is_a? Array
|
141
|
+
raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
|
142
|
+
end
|
143
|
+
|
144
|
+
x[protocol].each do |url|
|
145
|
+
uri = URI.parse(url)
|
146
|
+
raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(URI::HTTP) or uri.kind_of?(URI::HTTPS)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
rescue Exception => e
|
150
|
+
raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
@wms_servers[x['wms'].first] = true
|
155
|
+
|
156
|
+
return x.to_json
|
157
|
+
rescue JSON::ParserError => e
|
158
|
+
raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
|
159
|
+
end
|
160
|
+
nil
|
161
|
+
end
|
162
|
+
|
163
|
+
def lon? lon
|
164
|
+
lon >= -180 and lon <= 180
|
165
|
+
end
|
166
|
+
|
167
|
+
def lat? lat
|
168
|
+
lat >= -90 and lat <= 90
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
|
173
|
+
# __MAIN__
|
174
|
+
ValidateOgp.new(ARGV[0].nil?? 'valid.json' : ARGV[0]) do |ogp|
|
175
|
+
stats = { :accepted => 0, :rejected => 0 }
|
176
|
+
Dir.glob('data/*.json') do |fn|
|
177
|
+
s = ogp.validate_file(fn)
|
178
|
+
stats[:accepted] += s[:accepted]
|
179
|
+
stats[:rejected] += s[:rejected]
|
180
|
+
end
|
181
|
+
ap({:statistics => stats})
|
182
|
+
end
|