geohydra 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +1 -8
- data/Gemfile.lock +87 -102
- data/README.md +2 -2
- data/VERSION +1 -1
- data/bin/accession.rb +99 -89
- data/bin/assemble.rb +288 -247
- data/bin/assemble_data.rb +54 -51
- data/bin/assemble_placenames.rb +85 -85
- data/bin/build_stage_options.rb +24 -18
- data/bin/derive_wgs84.rb +65 -66
- data/bin/extract_thumbnail.rb +38 -37
- data/bin/geo2mods.rb +78 -0
- data/bin/geohydra +14 -5
- data/bin/ingest_arcgis.rb +80 -60
- data/bin/iso2geo.rb +64 -0
- data/bin/loader_postgis.rb +121 -227
- data/bin/run_task.rb +23 -0
- data/bin/sync_geoserver_metadata.rb +132 -127
- data/bin/xsltproc-saxon +6 -0
- data/geohydra.gemspec +6 -4
- data/lib/geohydra.rb +5 -0
- data/lib/geohydra/accession.rb +24 -13
- data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
- data/lib/geohydra/gazetteer.csv +842 -36
- data/lib/geohydra/gazetteer.rb +48 -24
- data/lib/geohydra/mods2geoblacklight.xsl +248 -0
- data/lib/geohydra/mods2ogp.xsl +5 -8
- data/lib/geohydra/transform.rb +8 -2
- data/lib/geohydra/utils.rb +6 -0
- data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
- data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
- data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
- data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
- data/lib/geohydra/workflow/task.rb +82 -0
- data/ogp/README.md +350 -0
- data/ogp/download.rb +92 -0
- data/ogp/fgdc2mods.sh +9 -0
- data/ogp/fgdc2mods.xsl +884 -0
- data/ogp/ingest.rb +48 -0
- data/ogp/select.rb +20 -0
- data/ogp/transform.rb +354 -0
- data/ogp/validate.rb +182 -0
- data/{bin → scripts}/ingest_tufts.rb +0 -0
- data/scripts/iso2html/doit.sh +15 -0
- data/scripts/iso2html/main.css +66 -0
- data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
- data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
- data/scripts/iso2html/utils/replace-string.xsl +80 -0
- data/scripts/iso2html/utils/strip-digits.xsl +60 -0
- data/{bin → scripts}/loader.rb +0 -0
- data/scripts/rename_shapefiles.rb +5 -0
- data/scripts/render_gazetteer.rb +36 -0
- data/{bin → scripts}/seed.rb +0 -0
- data/{bin → scripts}/solr_indexer.rb +0 -0
- data/scripts/status.csv +253 -0
- data/scripts/status.rb +32 -0
- data/{bin → scripts}/validate_data.rb +1 -1
- data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-dev/conf/protwords.txt +21 -0
- data/solr/kurma-app-dev/conf/schema.xml +156 -0
- data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
- data/solr/kurma-app-dev/purge.sh +8 -0
- data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-test/conf/protwords.txt +21 -0
- data/solr/kurma-app-test/conf/schema.xml +158 -0
- data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-test/conf/synonyms.txt +29 -0
- data/solr/kurma-app-test/deploy.sh +15 -0
- data/solr/kurma-app-test/purge.sh +8 -0
- data/solr/ogp-dev/purge.sh +1 -2
- data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
- data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
- data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
- data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
- data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
- data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
- data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
- data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
- data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
- data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
- data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
- data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
- data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
- data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
- data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
- data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
- data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
- data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
- data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
- data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
- data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
- data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
- data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
- data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
- data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
- data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
- data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
- data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
- data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
- data/spec/unit/gazetteer_spec.rb +100 -35
- data/spec/unit/task_spec.rb +68 -0
- data/spec/unit/transform_spec.rb +1 -1
- data/spec/unit/utils_spec.rb +17 -3
- data/workflow.rb +35 -0
- metadata +323 -316
    
        data/ogp/ingest.rb
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'json'
         | 
| 4 | 
            +
            require 'rsolr'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class IngestOgp
         | 
| 7 | 
            +
              def initialize(collection, url)
         | 
| 8 | 
            +
                raise ArgumentError, 'Collection not defined' unless collection.is_a? String
         | 
| 9 | 
            +
                @solr = RSolr.connect(:url => (url + '/' + collection))
         | 
| 10 | 
            +
                yield self
         | 
| 11 | 
            +
                close
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
              
         | 
| 14 | 
            +
              def ingest(fn)
         | 
| 15 | 
            +
                puts "Ingesting #{fn}"
         | 
| 16 | 
            +
                json = JSON::parse(File.read(fn))
         | 
| 17 | 
            +
                n = 0
         | 
| 18 | 
            +
                json.each do |doc|
         | 
| 19 | 
            +
                  next unless doc.is_a? Hash and not doc.empty?
         | 
| 20 | 
            +
                  doc.delete('_version_')
         | 
| 21 | 
            +
                  doc.delete('timestamp')
         | 
| 22 | 
            +
                  putc "."
         | 
| 23 | 
            +
                  @solr.add doc
         | 
| 24 | 
            +
                  n += 1
         | 
| 25 | 
            +
                  if n % 100 == 0
         | 
| 26 | 
            +
                    @solr.commit 
         | 
| 27 | 
            +
                    puts "\ncommit 100 records, #{n} total\n"
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
                puts "\n#{n} records\n"
         | 
| 31 | 
            +
                @solr.commit
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
              
         | 
| 34 | 
            +
              def close
         | 
| 35 | 
            +
                @solr.commit
         | 
| 36 | 
            +
                #@solr.optimize
         | 
| 37 | 
            +
                @solr = nil
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
              
         | 
| 40 | 
            +
            end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
             | 
| 43 | 
            +
            # __MAIN__
         | 
| 44 | 
            +
            IngestOgp.new(ARGV[0], (ARGV[1].nil?? 'http://localhost:18080/solr' : ARGV[1])) do |ogp|
         | 
| 45 | 
            +
              Dir.glob("transformed*.json") do |fn|
         | 
| 46 | 
            +
                ogp.ingest(fn)
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
            end
         | 
    
        data/ogp/select.rb
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Usage: select.rb
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            require 'awesome_print'
         | 
| 7 | 
            +
            require 'json'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            # __MAIN__
         | 
| 11 | 
            +
            selected = []
         | 
| 12 | 
            +
            Dir.glob('transformed*.json') do |fn|
         | 
| 13 | 
            +
              JSON::parse(File.read(fn)).each do |i|
         | 
| 14 | 
            +
                if rand < 0.01
         | 
| 15 | 
            +
                  selected << i
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
| 19 | 
            +
            ap({:selected => selected})
         | 
| 20 | 
            +
            File.open('selected.json', 'wb') {|f| f << JSON.pretty_generate(selected)}
         | 
    
        data/ogp/transform.rb
    ADDED
    
    | @@ -0,0 +1,354 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Usage: transform_ogp output.json
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            #  Reads valid*.json in current directory
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            require 'awesome_print'
         | 
| 9 | 
            +
            require 'json'
         | 
| 10 | 
            +
            require 'uri'
         | 
| 11 | 
            +
            require 'date'
         | 
| 12 | 
            +
            require 'nokogiri'
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Transforms an OGP schema into GeoBlacklight. Requires input of a JSON array
         | 
| 15 | 
            +
            # of OGP hashs.
         | 
| 16 | 
            +
            class TransformOgp
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              def initialize(fn)
         | 
| 19 | 
            +
                @output = File.open(fn, 'wb')
         | 
| 20 | 
            +
                @output.write "[\n"
         | 
| 21 | 
            +
                @fgdcdir = 'fgdc'
         | 
| 22 | 
            +
                yield self
         | 
| 23 | 
            +
                self.close
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
              
         | 
| 26 | 
            +
              # @param [String|Array] s the URI to clean up
         | 
| 27 | 
            +
              # @return [String] a normalized URI
         | 
| 28 | 
            +
              def clean_uri(s)
         | 
| 29 | 
            +
                unless s.nil? or s.empty?
         | 
| 30 | 
            +
                  return (s.is_a?(Array) ? URI(s.first) : URI(s)).to_s
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
                ''
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              # @param [String] fn filename of JSON array of OGP hash objects
         | 
| 36 | 
            +
              # @return [Hash] stats about :accepted vs. :rejected records
         | 
| 37 | 
            +
              def transform_file(fn)
         | 
| 38 | 
            +
                stats = { :accepted => 0, :rejected => 0 }
         | 
| 39 | 
            +
                puts "Parsing #{fn}"
         | 
| 40 | 
            +
                json = JSON::parse(File.open(fn, 'rb').read)
         | 
| 41 | 
            +
                json.each do |doc| # contains JSON Solr query results
         | 
| 42 | 
            +
                  unless doc.empty?
         | 
| 43 | 
            +
                    begin
         | 
| 44 | 
            +
                      transform(doc)
         | 
| 45 | 
            +
                      stats[:accepted] += 1
         | 
| 46 | 
            +
                    rescue ArgumentError => e
         | 
| 47 | 
            +
                      puts e
         | 
| 48 | 
            +
                      stats[:rejected] += 1
         | 
| 49 | 
            +
                    end
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
                stats
         | 
| 53 | 
            +
              end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
              # Transforms a single OGP record into a GeoBlacklight record
         | 
| 56 | 
            +
              # @param [Hash] layer an OGP hash for a given layer
         | 
| 57 | 
            +
              def transform(layer, skip_fgdc = true)
         | 
| 58 | 
            +
                id = layer['LayerId'].to_s.strip
         | 
| 59 | 
            +
                puts "Tranforming #{id}"
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                # For URN style @see http://www.ietf.org/rfc/rfc2141.txt
         | 
| 62 | 
            +
                # For ARK @see https://wiki.ucop.edu/display/Curation/ARK
         | 
| 63 | 
            +
                prefix = case layer['Institution']
         | 
| 64 | 
            +
                when 'Stanford'
         | 
| 65 | 
            +
                  'http://purl.stanford.edu/'
         | 
| 66 | 
            +
                when 'Tufts'
         | 
| 67 | 
            +
                  'urn:geodata.tufts.edu:'
         | 
| 68 | 
            +
                when 'MassGIS'
         | 
| 69 | 
            +
                  'urn:massgis.state.ma.us:'
         | 
| 70 | 
            +
                when 'Berkeley'
         | 
| 71 | 
            +
                  'http://ark.cdlib.org/ark:/'
         | 
| 72 | 
            +
                when 'MIT'
         | 
| 73 | 
            +
                  'urn:arrowsmith.mit.edu:'
         | 
| 74 | 
            +
                when 'Harvard'
         | 
| 75 | 
            +
                  'urn:hul.harvard.edu:'
         | 
| 76 | 
            +
                else
         | 
| 77 | 
            +
                  ''
         | 
| 78 | 
            +
                end
         | 
| 79 | 
            +
                uuid = prefix + URI.encode(id)
         | 
| 80 | 
            +
                
         | 
| 81 | 
            +
                # Parse out the Location to get the WMS/WFS/WCS URLs
         | 
| 82 | 
            +
                raise ArgumentError, "ERROR: #{id} no location" if layer['Location'].nil? or layer['Location'].empty?
         | 
| 83 | 
            +
                location = JSON::parse(layer['Location'])
         | 
| 84 | 
            +
                raise ArgumentError, "ERROR: #{id} has malformed location" unless location.is_a? Hash
         | 
| 85 | 
            +
                
         | 
| 86 | 
            +
                # Parse out the bounding box
         | 
| 87 | 
            +
                s = layer['MinY'].to_f
         | 
| 88 | 
            +
                w = layer['MinX'].to_f
         | 
| 89 | 
            +
                n = layer['MaxY'].to_f
         | 
| 90 | 
            +
                e = layer['MaxX'].to_f
         | 
| 91 | 
            +
                
         | 
| 92 | 
            +
                # Parse out the ContentDate date/time
         | 
| 93 | 
            +
                dt = DateTime.rfc3339(layer['ContentDate'])
         | 
| 94 | 
            +
                pub_dt = DateTime.rfc3339('2000-01-01T00:00:00Z') # XXX fake data, get from MODS
         | 
| 95 | 
            +
                
         | 
| 96 | 
            +
                access = layer['Access']
         | 
| 97 | 
            +
                collection = nil
         | 
| 98 | 
            +
                
         | 
| 99 | 
            +
                # Parse out the PURL and other metadata for Stanford
         | 
| 100 | 
            +
                if layer['Institution'] == 'Stanford'
         | 
| 101 | 
            +
                  purl = location['purl']
         | 
| 102 | 
            +
                  if purl.is_a? Array
         | 
| 103 | 
            +
                    purl = purl.first
         | 
| 104 | 
            +
                  end
         | 
| 105 | 
            +
                  if purl.nil? and uuid =~ /^http/
         | 
| 106 | 
            +
                    purl = uuid
         | 
| 107 | 
            +
                  end
         | 
| 108 | 
            +
                else
         | 
| 109 | 
            +
                  purl = nil
         | 
| 110 | 
            +
                  # Because OGP does not deliminate keywords, we use a heuristic here
         | 
| 111 | 
            +
                  %w{PlaceKeywords ThemeKeywords}.each do |k|
         | 
| 112 | 
            +
                    unless layer[k] =~ /[;,]/ or layer[k].split.size < 4
         | 
| 113 | 
            +
                      layer[k] = layer[k].split.join(';')
         | 
| 114 | 
            +
                    end
         | 
| 115 | 
            +
                  end
         | 
| 116 | 
            +
                end
         | 
| 117 | 
            +
                
         | 
| 118 | 
            +
                slug = to_slug(id, layer)
         | 
| 119 | 
            +
                
         | 
| 120 | 
            +
                layer_geom_type = layer['DataType'].to_s.downcase
         | 
| 121 | 
            +
                layer_geom_type = 'raster' if layer_geom_type == 'paper map'
         | 
| 122 | 
            +
                
         | 
| 123 | 
            +
                # @see https://github.com/OSGeo/Cat-Interop
         | 
| 124 | 
            +
                %w{wcs wfs wms}.each do |k|
         | 
| 125 | 
            +
                  location[k] = location[k].first if location[k].is_a? Array
         | 
| 126 | 
            +
                end
         | 
| 127 | 
            +
                refs = {}
         | 
| 128 | 
            +
                refs['http://www.opengis.net/def/serviceType/ogc/wcs'] = "#{location['wcs']}" if location['wcs']
         | 
| 129 | 
            +
                refs['http://www.opengis.net/def/serviceType/ogc/wfs'] = "#{location['wfs']}" if location['wfs']
         | 
| 130 | 
            +
                refs['http://www.opengis.net/def/serviceType/ogc/wms'] = "#{location['wms']}" if location['wms']
         | 
| 131 | 
            +
                if purl
         | 
| 132 | 
            +
                  refs["http://schema.org/thumbnailUrl"] = "http://stacks.stanford.edu/file/druid:#{id}/preview.jpg"
         | 
| 133 | 
            +
                  refs["http://schema.org/url"] = "#{clean_uri(purl)}"
         | 
| 134 | 
            +
                  refs["http://schema.org/DownloadAction"] = "http://stacks.stanford.edu/file/druid:#{id}/data.zip"
         | 
| 135 | 
            +
                  refs["http://www.isotc211.org/schemas/2005/gmd/"] = "#{purl}.iso19139"
         | 
| 136 | 
            +
                  refs["http://www.loc.gov/mods/v3"] = "#{purl}.mods"
         | 
| 137 | 
            +
                end
         | 
| 138 | 
            +
                
         | 
| 139 | 
            +
                # Make the conversion from OGP to GeoBlacklight
         | 
| 140 | 
            +
                #
         | 
| 141 | 
            +
                # @see http://dublincore.org/documents/dcmi-terms/
         | 
| 142 | 
            +
                # @see http://wiki.dublincore.org/index.php/User_Guide/Creating_Metadata
         | 
| 143 | 
            +
                # @see http://www.ietf.org/rfc/rfc5013.txt
         | 
| 144 | 
            +
                new_layer = {
         | 
| 145 | 
            +
                  :uuid               => uuid,
         | 
| 146 | 
            +
                  
         | 
| 147 | 
            +
                  # Dublin Core elements
         | 
| 148 | 
            +
                  :dc_creator_sm      => string2array(layer['Originator']),
         | 
| 149 | 
            +
                  :dc_description_s   => layer['Abstract'],
         | 
| 150 | 
            +
                  :dc_format_s        => (
         | 
| 151 | 
            +
                    (layer_geom_type == 'raster') ? 
         | 
| 152 | 
            +
                    'GeoTIFF' : # 'image/tiff' : 
         | 
| 153 | 
            +
                    'Shapefile' # 'application/x-esri-shapefile'
         | 
| 154 | 
            +
                  ), # XXX: fake data
         | 
| 155 | 
            +
                  :dc_identifier_s    => uuid,
         | 
| 156 | 
            +
                  :dc_language_s      => 'English', # 'en', # XXX: fake data
         | 
| 157 | 
            +
                  :dc_publisher_s     => layer['Publisher'],
         | 
| 158 | 
            +
                  :dc_rights_s        => access,
         | 
| 159 | 
            +
                  :dc_subject_sm      => string2array(layer['ThemeKeywords']),
         | 
| 160 | 
            +
                  :dc_title_s         => layer['LayerDisplayName'],
         | 
| 161 | 
            +
                  :dc_type_s          => 'Dataset',  # or 'Image' for non-georectified, 
         | 
| 162 | 
            +
                                                     # or 'PhysicalObject' for non-digitized maps
         | 
| 163 | 
            +
                  # Dublin Core terms
         | 
| 164 | 
            +
                  :dct_isPartOf_sm    => collection.nil?? nil : [collection],
         | 
| 165 | 
            +
                  :dct_references_s   => refs.to_json.to_s,
         | 
| 166 | 
            +
                  :dct_spatial_sm     => string2array(layer['PlaceKeywords']),
         | 
| 167 | 
            +
                  :dct_temporal_sm    => [dt.year.to_s],
         | 
| 168 | 
            +
                  :dct_issued_s       => pub_dt.year.to_s,
         | 
| 169 | 
            +
                  :dct_provenance_s   => layer['Institution'],
         | 
| 170 | 
            +
             | 
| 171 | 
            +
                 #
         | 
| 172 | 
            +
                 # xmlns:georss="http://www.georss.org/georss"
         | 
| 173 | 
            +
                 # A bounding box is a rectangular region, often used to define the extents of a map or a rough area of interest. A box contains two space seperate latitude-longitude pairs, with each pair separated by whitespace. The first pair is the lower corner, the second is the upper corner.
         | 
| 174 | 
            +
                  :georss_box_s       => "#{s} #{w} #{n} #{e}",
         | 
| 175 | 
            +
                  :georss_polygon_s   => "#{n} #{w} #{n} #{e} #{s} #{e} #{s} #{w} #{n} #{w}",
         | 
| 176 | 
            +
                 
         | 
| 177 | 
            +
                  # Layer-specific schema
         | 
| 178 | 
            +
                  :layer_slug_s       => slug,
         | 
| 179 | 
            +
                  :layer_id_s         => layer['WorkspaceName'] + ':' + layer['Name'],
         | 
| 180 | 
            +
                  # :layer_srs_s        => 'EPSG:4326', # XXX: fake data
         | 
| 181 | 
            +
                  :layer_geom_type_s  => layer_geom_type.capitalize,
         | 
| 182 | 
            +
                  :layer_modified_dt  => Time.now.utc.strftime('%FT%TZ'),
         | 
| 183 | 
            +
                  
         | 
| 184 | 
            +
                  # derived fields used only by solr, for which copyField is insufficient
         | 
| 185 | 
            +
                  :solr_bbox  => "#{w} #{s} #{e} #{n}", # minX minY maxX maxY
         | 
| 186 | 
            +
                  :solr_ne_pt => "#{n},#{e}",
         | 
| 187 | 
            +
                  :solr_sw_pt => "#{s},#{w}",
         | 
| 188 | 
            +
                  :solr_geom  => "ENVELOPE(#{w}, #{e}, #{n}, #{s})",
         | 
| 189 | 
            +
                  :solr_year_i => dt.year,
         | 
| 190 | 
            +
                  :solr_issued_dt => pub_dt.strftime('%FT%TZ'), # Solr requires 1995-12-31T23:59:59Z
         | 
| 191 | 
            +
                  :solr_wms_url => location['wms'],
         | 
| 192 | 
            +
                  :solr_wfs_url => location['wfs'],
         | 
| 193 | 
            +
                  :solr_wcs_url => location['wcs']
         | 
| 194 | 
            +
                  
         | 
| 195 | 
            +
                  # :layer_year_i       => dt.year#, # XXX: migrate to copyField
         | 
| 196 | 
            +
                  # :ogp_area_f         => layer['Area'],
         | 
| 197 | 
            +
                  # :ogp_center_x_f     => layer['CenterX'],
         | 
| 198 | 
            +
                  # :ogp_center_y_f     => layer['CenterY'],
         | 
| 199 | 
            +
                  # :ogp_georeferenced_b   => (layer['GeoReferenced'].to_s.downcase == 'true'),
         | 
| 200 | 
            +
                  # :ogp_halfheight_f   => layer['HalfHeight'],
         | 
| 201 | 
            +
                  # :ogp_halfwidth_f    => layer['HalfWidth'],
         | 
| 202 | 
            +
                  # :ogp_layer_id_s     => layer['LayerId'],
         | 
| 203 | 
            +
                  # :ogp_name_s         => layer['Name'],
         | 
| 204 | 
            +
                  # :ogp_location_s     => layer['Location'],
         | 
| 205 | 
            +
                  # :ogp_workspace_s    => layer['WorkspaceName']
         | 
| 206 | 
            +
                }
         | 
| 207 | 
            +
                
         | 
| 208 | 
            +
                # Remove any fields that are blank
         | 
| 209 | 
            +
                new_layer.each do |k, v| 
         | 
| 210 | 
            +
                  new_layer.delete(k) if v.nil? or (v.respond_to?(:empty?) and v.empty?)
         | 
| 211 | 
            +
                end
         | 
| 212 | 
            +
                
         | 
| 213 | 
            +
                # Write the JSON record for the GeoBlacklight layer
         | 
| 214 | 
            +
                @output.write JSON::pretty_generate(new_layer)
         | 
| 215 | 
            +
                @output.write "\n,\n"
         | 
| 216 | 
            +
                
         | 
| 217 | 
            +
                unless skip_fgdc or layer['FgdcText'].nil? or layer['FgdcText'].empty?
         | 
| 218 | 
            +
                  xml = Nokogiri::XML(layer['FgdcText'])
         | 
| 219 | 
            +
                  xml.write_xml_to(File.open('fgdc' + '/' + slug + '.xml', 'wb'), :encoding => 'UTF-8', :indent => 2)
         | 
| 220 | 
            +
                end
         | 
| 221 | 
            +
              end
         | 
| 222 | 
            +
             | 
| 223 | 
            +
              def close
         | 
| 224 | 
            +
                @output.write "\n {} \n]\n"
         | 
| 225 | 
            +
                @output.close
         | 
| 226 | 
            +
              end
         | 
| 227 | 
            +
                
         | 
| 228 | 
            +
              # @param [String] s has semi-colon/comma/gt delimited array
         | 
| 229 | 
            +
              # @return [Array] results as array
         | 
| 230 | 
            +
              def string2array(s)
         | 
| 231 | 
            +
                if s.to_s =~ /[;,>]/
         | 
| 232 | 
            +
                  s.split(/\s*[;,>]\s*/).uniq.collect {|i| i.strip}
         | 
| 233 | 
            +
                elsif s.is_a?(String) and s.size > 0
         | 
| 234 | 
            +
                  [s.strip]
         | 
| 235 | 
            +
                else
         | 
| 236 | 
            +
                  nil
         | 
| 237 | 
            +
                end
         | 
| 238 | 
            +
              end
         | 
| 239 | 
            +
              
         | 
| 240 | 
            +
              @@slugs = {}
         | 
| 241 | 
            +
              def to_slug(id, layer)
         | 
| 242 | 
            +
                # strip out schema and usernames
         | 
| 243 | 
            +
                name = layer['Name'].sub('SDE_DATA.', '').sub('SDE.', '').sub('SDE2.', '').sub('GISPORTAL.GISOWNER01.', '').sub('GISDATA.', '').sub('MORIS.', '')
         | 
| 244 | 
            +
                unless name.size > 1 
         | 
| 245 | 
            +
                  # use first word of title is empty name
         | 
| 246 | 
            +
                  name = layer['LayerDisplayName'].split.first 
         | 
| 247 | 
            +
                end
         | 
| 248 | 
            +
                slug = layer['Institution'] + '-' + name
         | 
| 249 | 
            +
                
         | 
| 250 | 
            +
                # slugs should only have a-z, A-Z, 0-9, and -
         | 
| 251 | 
            +
                slug.gsub!(/[^a-zA-Z0-9\-]/, '-')
         | 
| 252 | 
            +
                slug.gsub!(/[\-]+/, '-')
         | 
| 253 | 
            +
                
         | 
| 254 | 
            +
                # only lowercase
         | 
| 255 | 
            +
                slug.downcase!
         | 
| 256 | 
            +
                
         | 
| 257 | 
            +
                # ensure slugs are unique for this pass
         | 
| 258 | 
            +
                if @@slugs.include?(slug)
         | 
| 259 | 
            +
                  slug += '-' + sprintf("%06d", Random.rand(999999))
         | 
| 260 | 
            +
                end
         | 
| 261 | 
            +
                @@slugs[slug] = true
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                slug
         | 
| 264 | 
            +
              end
         | 
| 265 | 
            +
             | 
| 266 | 
            +
              # Ensure that the WMS/WFS/WCS location values are as expected
         | 
| 267 | 
            +
              def validate_location(id, location)
         | 
| 268 | 
            +
                begin
         | 
| 269 | 
            +
                  x = JSON::parse(location)
         | 
| 270 | 
            +
                  if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
         | 
| 271 | 
            +
                    raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
         | 
| 272 | 
            +
                  end
         | 
| 273 | 
            +
                  
         | 
| 274 | 
            +
                  %w{wms wcs wfs}.each do |protocol|
         | 
| 275 | 
            +
                    begin
         | 
| 276 | 
            +
                      unless x[protocol].nil?
         | 
| 277 | 
            +
                        if x[protocol].is_a? String
         | 
| 278 | 
            +
                          x[protocol] = [x[protocol]]
         | 
| 279 | 
            +
                        end
         | 
| 280 | 
            +
                        
         | 
| 281 | 
            +
                        unless x[protocol].is_a? Array
         | 
| 282 | 
            +
                          raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
         | 
| 283 | 
            +
                        end
         | 
| 284 | 
            +
                        
         | 
| 285 | 
            +
                        x[protocol].each do |url|
         | 
| 286 | 
            +
                          uri = clean_uri.parse(url)
         | 
| 287 | 
            +
                          raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(clean_uri::HTTP) or uri.kind_of?(clean_uri::HTTPS)
         | 
| 288 | 
            +
                        end
         | 
| 289 | 
            +
                      end
         | 
| 290 | 
            +
                    rescue Exception => e
         | 
| 291 | 
            +
                      raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
         | 
| 292 | 
            +
                    end        
         | 
| 293 | 
            +
                  end
         | 
| 294 | 
            +
                  
         | 
| 295 | 
            +
                  return x.to_json
         | 
| 296 | 
            +
                rescue JSON::ParserError => e
         | 
| 297 | 
            +
                  raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
         | 
| 298 | 
            +
                end
         | 
| 299 | 
            +
                nil
         | 
| 300 | 
            +
              end
         | 
| 301 | 
            +
              
         | 
| 302 | 
            +
              def lon? lon
         | 
| 303 | 
            +
                lon >= -180 and lon <= 180
         | 
| 304 | 
            +
              end
         | 
| 305 | 
            +
              
         | 
| 306 | 
            +
              def lat? lat
         | 
| 307 | 
            +
                lat >= -90 and lat <= 90
         | 
| 308 | 
            +
              end
         | 
| 309 | 
            +
            end
         | 
| 310 | 
            +
             | 
| 311 | 
            +
             | 
| 312 | 
            +
            # __MAIN__
         | 
| 313 | 
            +
            #
         | 
| 314 | 
            +
            TransformOgp.new(ARGV[0].nil?? 'transformed.json' : ARGV[0]) do |ogp|
         | 
| 315 | 
            +
              stats = { :accepted => 0, :rejected => 0 }
         | 
| 316 | 
            +
              Dir.glob('valid*.json') do |fn|
         | 
| 317 | 
            +
                s = ogp.transform_file(fn)
         | 
| 318 | 
            +
                stats[:accepted] += s[:accepted]
         | 
| 319 | 
            +
                stats[:rejected] += s[:rejected]
         | 
| 320 | 
            +
              end
         | 
| 321 | 
            +
              ap({:statistics => stats})
         | 
| 322 | 
            +
            end
         | 
| 323 | 
            +
             | 
| 324 | 
            +
            # example input data
         | 
| 325 | 
            +
            __END__
         | 
| 326 | 
            +
            [
         | 
| 327 | 
            +
            {
         | 
| 328 | 
            +
              "Abstract": "The boundaries of each supervisorial district in Sonoma County based on 2000 census. Redrawn in 2001 using Autobound.",
         | 
| 329 | 
            +
              "Access": "Public",
         | 
| 330 | 
            +
              "Area": 0.9463444815860053,
         | 
| 331 | 
            +
              "Availability": "Online",
         | 
| 332 | 
            +
              "CenterX": -122.942159,
         | 
| 333 | 
            +
              "CenterY": 38.4580755,
         | 
| 334 | 
            +
              "ContentDate": "2000-01-01T01:01:01Z",
         | 
| 335 | 
            +
              "DataType": "Polygon",
         | 
| 336 | 
            +
              "FgdcText": "...",
         | 
| 337 | 
            +
              "GeoReferenced": true,
         | 
| 338 | 
            +
              "HalfHeight": 0.39885650000000084,
         | 
| 339 | 
            +
              "HalfWidth": 0.593161000000002,
         | 
| 340 | 
            +
              "Institution": "Berkeley",
         | 
| 341 | 
            +
              "LayerDisplayName": "SCGISDB2_BASE_ADM_SUPERVISOR",
         | 
| 342 | 
            +
              "LayerId": "28722/bk0012h5s52",
         | 
| 343 | 
            +
              "Location": "{\"wms\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wms\"],\"tilecache\":[\"http://gis.lib.berkeley.edu:8080/geoserver/gwc/service/wms\"],\"download\":\"\",\"wfs\":[\"http://gis.lib.berkeley.edu:8080/geoserver/wfs\"]}",
         | 
| 344 | 
            +
              "MaxX": -122.348998,
         | 
| 345 | 
            +
              "MaxY": 38.856932,
         | 
| 346 | 
            +
              "MinX": -123.53532,
         | 
| 347 | 
            +
              "MinY": 38.059219,
         | 
| 348 | 
            +
              "Name": "ADM_SUPERVISOR",
         | 
| 349 | 
            +
              "PlaceKeywords": "Sonoma County County of Sonoma Sonoma California Bay Area",
         | 
| 350 | 
            +
              "Publisher": "UC Berkeley Libraries",
         | 
| 351 | 
            +
              "ThemeKeywords": "Supervisorial districts 1st District 2nd District 3rd District 4th District 5th District",
         | 
| 352 | 
            +
              "WorkspaceName": "UCB"
         | 
| 353 | 
            +
            }
         | 
| 354 | 
            +
            ]
         | 
    
        data/ogp/validate.rb
    ADDED
    
    | @@ -0,0 +1,182 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Usage: validate_ogp [output.json]
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            #  Requires data/*.json as input and output to valid.json
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            require 'awesome_print'
         | 
| 8 | 
            +
            require 'json'
         | 
| 9 | 
            +
            require 'uri'
         | 
| 10 | 
            +
            require 'date'
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            class ValidateOgp
         | 
| 13 | 
            +
              def initialize(fn)
         | 
| 14 | 
            +
                @wms_servers = {}
         | 
| 15 | 
            +
                @output = File.open(fn, 'wb')
         | 
| 16 | 
            +
                @output.write "[\n"
         | 
| 17 | 
            +
                yield self
         | 
| 18 | 
            +
                self.close
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              def validate_file(fn)
         | 
| 22 | 
            +
                stats = { :accepted => 0, :rejected => 0 }
         | 
| 23 | 
            +
                puts "Validating #{fn}"
         | 
| 24 | 
            +
                json = JSON::parse(File.read(fn))
         | 
| 25 | 
            +
                json['response']['docs'].each do |doc| # contains JSON Solr query results
         | 
| 26 | 
            +
                  begin
         | 
| 27 | 
            +
                    validate(doc)
         | 
| 28 | 
            +
                    stats[:accepted] += 1
         | 
| 29 | 
            +
                  rescue ArgumentError => e
         | 
| 30 | 
            +
                    puts e
         | 
| 31 | 
            +
                    stats[:rejected] += 1
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
                stats
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
              def validate(layer)
         | 
| 39 | 
            +
                id = layer['LayerId']
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                %w{LayerId Name Institution Access MinX MinY MaxX MaxY LayerDisplayName Location}.each do |k|
         | 
| 42 | 
            +
                  if layer[k].nil? or layer[k].to_s.empty?
         | 
| 43 | 
            +
                    raise ArgumentError, "ERROR: #{id} missing #{k}"
         | 
| 44 | 
            +
                    return
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
                
         | 
| 48 | 
            +
                %w{MinX MaxX}.each do |lon|
         | 
| 49 | 
            +
                  raise ArgumentError, "ERROR: #{id}: Invalid longitude value: #{layer[lon]}" unless lon?(layer[lon])
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                
         | 
| 53 | 
            +
                %w{MinY MaxY}.each do |lat|
         | 
| 54 | 
            +
                  raise ArgumentError, "ERROR: #{id} Invalid latitude value: #{layer[lat]}" unless lat?(layer[lat])
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
                
         | 
| 57 | 
            +
                k = 'Institution'
         | 
| 58 | 
            +
                if ([layer[k]] & %w{Berkeley Harvard MIT MassGIS Stanford Tufts}).empty?
         | 
| 59 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 60 | 
            +
                  return
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                k = 'DataType'
         | 
| 64 | 
            +
                if ([layer[k]] & %w{Line Paper\ Map Point Polygon Raster LibraryRecord}).empty?
         | 
| 65 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 66 | 
            +
                  return
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                k = 'Access'
         | 
| 70 | 
            +
                if ([layer[k]] & %w{Public Restricted}).empty?
         | 
| 71 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                k = 'Availability'
         | 
| 75 | 
            +
                if layer[k].downcase == 'online' # cleanup 
         | 
| 76 | 
            +
                  layer[k] = 'Online'
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
                if ([layer[k]] & %w{Online}).empty?
         | 
| 79 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 80 | 
            +
                  return
         | 
| 81 | 
            +
                end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                k = 'Location'
         | 
| 84 | 
            +
                layer[k] = validate_location(id, layer[k])
         | 
| 85 | 
            +
                if layer[k].nil? or layer[k].empty?
         | 
| 86 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                k = 'GeoReferenced'
         | 
| 90 | 
            +
                unless layer[k].nil? or layer[k] == true
         | 
| 91 | 
            +
                  puts "WARNING: #{id} has boundingbox but claims it is not georeferenced"
         | 
| 92 | 
            +
                  #layer[k] = true
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
                
         | 
| 95 | 
            +
                k = 'Area'
         | 
| 96 | 
            +
                unless layer[k] > 0
         | 
| 97 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 98 | 
            +
                end
         | 
| 99 | 
            +
                
         | 
| 100 | 
            +
                k = 'ContentDate'
         | 
| 101 | 
            +
                if layer[k].nil? or layer[k].empty?
         | 
| 102 | 
            +
                  raise ArgumentError, "ERROR: #{id} has unsupported #{k}: #{layer[k]}"
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
                dt = Date.rfc3339(layer[k])
         | 
| 105 | 
            +
                if dt.year < 1500 or dt.year > 2100
         | 
| 106 | 
            +
                  raise ArgumentError, "ERROR: #{id} has suspect #{k}: #{layer[k]}"
         | 
| 107 | 
            +
                end
         | 
| 108 | 
            +
                
         | 
| 109 | 
            +
                # k = 'FgdcText'
         | 
| 110 | 
            +
                # unless layer[k].nil? or layer[k].empty?
         | 
| 111 | 
            +
                #   layer[k] = ''
         | 
| 112 | 
            +
                # end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                @output.write JSON::pretty_generate(layer)
         | 
| 115 | 
            +
                @output.write "\n,\n"
         | 
| 116 | 
            +
              end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
              def close
         | 
| 119 | 
            +
                @output.write "\n {} \n]\n"
         | 
| 120 | 
            +
                @output.close
         | 
| 121 | 
            +
                ap({:wms_servers => @wms_servers})
         | 
| 122 | 
            +
              end
         | 
| 123 | 
            +
              
         | 
| 124 | 
            +
              private
         | 
| 125 | 
            +
              
         | 
| 126 | 
            +
              def validate_location(id, location)
         | 
| 127 | 
            +
                begin
         | 
| 128 | 
            +
                  x = JSON::parse(location)
         | 
| 129 | 
            +
                  if x['wms'].nil? or (x['wcs'].nil? and x['wfs'].nil?)
         | 
| 130 | 
            +
                    raise ArgumentError, "ERROR: #{id}: Missing WMS or WCS/WFS: #{x}"
         | 
| 131 | 
            +
                  end
         | 
| 132 | 
            +
                  
         | 
| 133 | 
            +
                  %w{wms wcs wfs}.each do |protocol|
         | 
| 134 | 
            +
                    begin
         | 
| 135 | 
            +
                      unless x[protocol].nil?
         | 
| 136 | 
            +
                        if x[protocol].is_a? String
         | 
| 137 | 
            +
                          x[protocol] = [x[protocol]]
         | 
| 138 | 
            +
                        end
         | 
| 139 | 
            +
                        
         | 
| 140 | 
            +
                        unless x[protocol].is_a? Array
         | 
| 141 | 
            +
                          raise ArgumentError, "ERROR: #{id}: Unknown #{protocol} value: #{x}"
         | 
| 142 | 
            +
                        end
         | 
| 143 | 
            +
                        
         | 
| 144 | 
            +
                        x[protocol].each do |url|
         | 
| 145 | 
            +
                          uri = URI.parse(url)
         | 
| 146 | 
            +
                          raise ArgumentError, "ERROR: #{id}: Invalid URL: #{uri}" unless uri.kind_of?(URI::HTTP) or uri.kind_of?(URI::HTTPS)
         | 
| 147 | 
            +
                        end
         | 
| 148 | 
            +
                      end
         | 
| 149 | 
            +
                    rescue Exception => e
         | 
| 150 | 
            +
                      raise ArgumentError, "ERROR: #{id}: Invalid #{k}: #{x}"
         | 
| 151 | 
            +
                    end        
         | 
| 152 | 
            +
                  end
         | 
| 153 | 
            +
                  
         | 
| 154 | 
            +
                  @wms_servers[x['wms'].first] = true      
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                  return x.to_json
         | 
| 157 | 
            +
                rescue JSON::ParserError => e
         | 
| 158 | 
            +
                  raise ArgumentError, "ERROR: #{id}: Invalid JSON: #{location}"
         | 
| 159 | 
            +
                end
         | 
| 160 | 
            +
                nil
         | 
| 161 | 
            +
              end
         | 
| 162 | 
            +
              
         | 
| 163 | 
            +
              def lon? lon
         | 
| 164 | 
            +
                lon >= -180 and lon <= 180
         | 
| 165 | 
            +
              end
         | 
| 166 | 
            +
              
         | 
| 167 | 
            +
              def lat? lat
         | 
| 168 | 
            +
                lat >= -90 and lat <= 90
         | 
| 169 | 
            +
              end
         | 
| 170 | 
            +
            end
         | 
| 171 | 
            +
             | 
| 172 | 
            +
             | 
| 173 | 
            +
            # __MAIN__
         | 
| 174 | 
            +
            ValidateOgp.new(ARGV[0].nil?? 'valid.json' : ARGV[0]) do |ogp|
         | 
| 175 | 
            +
              stats = { :accepted => 0, :rejected => 0 }
         | 
| 176 | 
            +
              Dir.glob('data/*.json') do |fn|
         | 
| 177 | 
            +
                s = ogp.validate_file(fn)
         | 
| 178 | 
            +
                stats[:accepted] += s[:accepted]
         | 
| 179 | 
            +
                stats[:rejected] += s[:rejected]
         | 
| 180 | 
            +
              end
         | 
| 181 | 
            +
              ap({:statistics => stats})
         | 
| 182 | 
            +
            end
         |