geohydra 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -1
  3. data/.travis.yml +1 -2
  4. data/Gemfile +1 -8
  5. data/Gemfile.lock +87 -102
  6. data/README.md +2 -2
  7. data/VERSION +1 -1
  8. data/bin/accession.rb +99 -89
  9. data/bin/assemble.rb +288 -247
  10. data/bin/assemble_data.rb +54 -51
  11. data/bin/assemble_placenames.rb +85 -85
  12. data/bin/build_stage_options.rb +24 -18
  13. data/bin/derive_wgs84.rb +65 -66
  14. data/bin/extract_thumbnail.rb +38 -37
  15. data/bin/geo2mods.rb +78 -0
  16. data/bin/geohydra +14 -5
  17. data/bin/ingest_arcgis.rb +80 -60
  18. data/bin/iso2geo.rb +64 -0
  19. data/bin/loader_postgis.rb +121 -227
  20. data/bin/run_task.rb +23 -0
  21. data/bin/sync_geoserver_metadata.rb +132 -127
  22. data/bin/xsltproc-saxon +6 -0
  23. data/geohydra.gemspec +6 -4
  24. data/lib/geohydra.rb +5 -0
  25. data/lib/geohydra/accession.rb +24 -13
  26. data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
  27. data/lib/geohydra/gazetteer.csv +842 -36
  28. data/lib/geohydra/gazetteer.rb +48 -24
  29. data/lib/geohydra/mods2geoblacklight.xsl +248 -0
  30. data/lib/geohydra/mods2ogp.xsl +5 -8
  31. data/lib/geohydra/transform.rb +8 -2
  32. data/lib/geohydra/utils.rb +6 -0
  33. data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
  34. data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
  35. data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
  36. data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
  37. data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
  38. data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
  39. data/lib/geohydra/workflow/task.rb +82 -0
  40. data/ogp/README.md +350 -0
  41. data/ogp/download.rb +92 -0
  42. data/ogp/fgdc2mods.sh +9 -0
  43. data/ogp/fgdc2mods.xsl +884 -0
  44. data/ogp/ingest.rb +48 -0
  45. data/ogp/select.rb +20 -0
  46. data/ogp/transform.rb +354 -0
  47. data/ogp/validate.rb +182 -0
  48. data/{bin → scripts}/ingest_tufts.rb +0 -0
  49. data/scripts/iso2html/doit.sh +15 -0
  50. data/scripts/iso2html/main.css +66 -0
  51. data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
  52. data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
  53. data/scripts/iso2html/utils/replace-string.xsl +80 -0
  54. data/scripts/iso2html/utils/strip-digits.xsl +60 -0
  55. data/{bin → scripts}/loader.rb +0 -0
  56. data/scripts/rename_shapefiles.rb +5 -0
  57. data/scripts/render_gazetteer.rb +36 -0
  58. data/{bin → scripts}/seed.rb +0 -0
  59. data/{bin → scripts}/solr_indexer.rb +0 -0
  60. data/scripts/status.csv +253 -0
  61. data/scripts/status.rb +32 -0
  62. data/{bin → scripts}/validate_data.rb +1 -1
  63. data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
  64. data/solr/kurma-app-dev/conf/protwords.txt +21 -0
  65. data/solr/kurma-app-dev/conf/schema.xml +156 -0
  66. data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
  67. data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
  68. data/solr/kurma-app-dev/purge.sh +8 -0
  69. data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
  70. data/solr/kurma-app-test/conf/protwords.txt +21 -0
  71. data/solr/kurma-app-test/conf/schema.xml +158 -0
  72. data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
  73. data/solr/kurma-app-test/conf/synonyms.txt +29 -0
  74. data/solr/kurma-app-test/deploy.sh +15 -0
  75. data/solr/kurma-app-test/purge.sh +8 -0
  76. data/solr/ogp-dev/purge.sh +1 -2
  77. data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
  78. data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
  79. data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  80. data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
  81. data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  82. data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
  83. data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
  84. data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  85. data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
  86. data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
  87. data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
  88. data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
  89. data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
  90. data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
  91. data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
  92. data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
  93. data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
  94. data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
  95. data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
  96. data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  97. data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
  98. data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
  99. data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
  100. data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
  101. data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  102. data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
  103. data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
  104. data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
  105. data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
  106. data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
  107. data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
  108. data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
  109. data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  110. data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
  111. data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
  112. data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
  113. data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
  114. data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
  115. data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
  116. data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
  117. data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
  118. data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
  119. data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
  120. data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
  121. data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
  122. data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
  123. data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
  124. data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
  125. data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  126. data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
  127. data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
  128. data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
  129. data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
  130. data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
  131. data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
  132. data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
  133. data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
  134. data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
  135. data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
  136. data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
  137. data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
  138. data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
  139. data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
  140. data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
  141. data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
  142. data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
  143. data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
  144. data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
  145. data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
  146. data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
  147. data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
  148. data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
  149. data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
  150. data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
  151. data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
  152. data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
  153. data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
  154. data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
  155. data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
  156. data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
  157. data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
  158. data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
  159. data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
  160. data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
  161. data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
  162. data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
  163. data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
  164. data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
  165. data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
  166. data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
  167. data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
  168. data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
  169. data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
  170. data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
  171. data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
  172. data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
  173. data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
  174. data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
  175. data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
  176. data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
  177. data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
  178. data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
  179. data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
  180. data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
  181. data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
  182. data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
  183. data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
  184. data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
  185. data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
  186. data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
  187. data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
  188. data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
  189. data/spec/unit/gazetteer_spec.rb +100 -35
  190. data/spec/unit/task_spec.rb +68 -0
  191. data/spec/unit/transform_spec.rb +1 -1
  192. data/spec/unit/utils_spec.rb +17 -3
  193. data/workflow.rb +35 -0
  194. metadata +323 -316
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ # require 'json'
5
+ require 'net/http'
6
+ # require 'awesome_print'
7
+
8
+ STDOUT.sync = true
9
+
10
+ CSV.foreach('status.csv') do |url|
11
+ # ap({:url => url.first})
12
+ uri = URI(url.first)
13
+ druid = 'unknown'
14
+ druid = $1 if uri.to_s =~ /druid%3A([a-z0-9]+)/
15
+ # ap({:uri => uri})
16
+ uri.host = 'localhost'
17
+ uri.port = 8080
18
+ # ap({:uri => uri})
19
+ begin
20
+ start = Time.now
21
+ res = Net::HTTP.get_response(uri)
22
+ sz = res.body.size
23
+ open("images/#{druid}.png", 'wb') do |f|
24
+ f.write(res.body)
25
+ end
26
+ finish = Time.now
27
+ puts [finish, druid, res.code, res['content-type'], sz, res['geowebcache-cache-result'], finish - start].join(', ')
28
+ rescue => e
29
+ puts e.class, e
30
+ end
31
+ sleep(0.5)
32
+ end
@@ -10,7 +10,7 @@ def validate(path, flags)
10
10
  puts "Processing #{shp}" if flags[:debug]
11
11
  basefn = File.basename(shp, '.shp')
12
12
  unless GeoHydra::Utils.shapefile?(shp)
13
- puts "Error <#{shp}>. Trying to repair..."
13
+ puts "SyntaxError: Not a shapefile <#{shp}>. Trying to repair..."
14
14
  Dir.glob("#{File.dirname(shp)}/#{basefn.gsub(' ', "\\ ")}.*") do |fn|
15
15
  newfn = File.join(File.dirname(fn), File.basename(fn).gsub(/[^a-zA-Z0-9_]/, '_'))
16
16
  FileUtils.mv fn, newfn
@@ -0,0 +1,34 @@
1
+ # Standard english stop words taken from Lucene's StopAnalyzer
2
+ a
3
+ an
4
+ and
5
+ are
6
+ as
7
+ at
8
+ be
9
+ but
10
+ by
11
+ for
12
+ if
13
+ in
14
+ into
15
+ is
16
+ it
17
+ no
18
+ not
19
+ of
20
+ on
21
+ or
22
+ such
23
+ that
24
+ the
25
+ their
26
+ then
27
+ there
28
+ these
29
+ they
30
+ this
31
+ to
32
+ was
33
+ will
34
+ with
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,156 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <schema name="kurma-app-dev" version="1.5">
3
+ <uniqueKey>uuid</uniqueKey>
4
+ <fields>
5
+ <field name="_version_" type="long" stored="true" indexed="true"/>
6
+ <field name="timestamp" type="date" stored="true" indexed="true" default="NOW"/>
7
+ <field name="uuid" type="string" stored="true" indexed="true" required="true"/>
8
+
9
+ <!-- core generated fields -->
10
+ <field name="text" type="text_en" stored="false" indexed="true" multiValued="true"
11
+ termVectors="true" termPositions="true" termOffsets="true" />
12
+
13
+ <!-- dynamic field with simple types by suffix -->
14
+ <dynamicField name="*_b" type="boolean" stored="true" indexed="true"/>
15
+ <dynamicField name="*_d" type="double" stored="true" indexed="true"/>
16
+ <dynamicField name="*_dt" type="date" stored="true" indexed="true"/>
17
+ <dynamicField name="*_f" type="float" stored="true" indexed="true"/>
18
+ <dynamicField name="*_i" type="int" stored="true" indexed="true"/>
19
+ <dynamicField name="*_l" type="long" stored="true" indexed="true"/>
20
+ <dynamicField name="*_s" type="string" stored="true" indexed="true"/>
21
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false"/>
22
+ <dynamicField name="*_si" type="string" stored="false" indexed="true"/>
23
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true" />
24
+ <dynamicField name="*_sm" type="string" stored="true" indexed="true" multiValued="true" />
25
+ <dynamicField name="*_url" type="string" stored="true" indexed="false"/>
26
+ <dynamicField name="*_blob" type="binary" stored="true" indexed="false"/>
27
+
28
+ <!-- dynamic Text fields by suffix without storage -->
29
+ <dynamicField name="*_t" type="text_en" stored="false" indexed="true"
30
+ termVectors="true" termPositions="true" termOffsets="true"/>
31
+ <dynamicField name="*_tm" type="text_en" stored="false" indexed="true" multiValued="true"
32
+ termVectors="true" termPositions="true" termOffsets="true"/>
33
+ <dynamicField name="*_ti" type="text_en" stored="false" indexed="true"
34
+ termVectors="true" termPositions="true" termOffsets="true"/>
35
+ <dynamicField name="*_tmi" type="text_en" stored="false" indexed="true" multiValued="true"
36
+ termVectors="true" termPositions="true" termOffsets="true"/>
37
+ <dynamicField name="*_sort" type="text_sort" stored="false" indexed="true" multiValued="false"/>
38
+
39
+ <!-- Spatial field types:
40
+
41
+ Solr3:
42
+ <field name="my_pt">83.1,-117.312</field>
43
+ as (y,x)
44
+
45
+ Solr4:
46
+
47
+ <field name="my_bbox">-117.312 83.1 -115.39 84.31</field>
48
+ as (W S E N)
49
+
50
+ <field name="my_geom">POLYGON((1 8, 1 9, 2 9, 2 8, 1 8))</field>
51
+ as WKT for point, linestring, polygon
52
+
53
+ -->
54
+ <dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
55
+ <dynamicField name="*_bbox" type="location_rpt" stored="true" indexed="true"/>
56
+ <dynamicField name="*_geom" type="location_jts" stored="true" indexed="true"/>
57
+ <dynamicField name="*_wkt" type="location_jts" stored="true" indexed="true"/>
58
+ </fields>
59
+
60
+ <types>
61
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
62
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
63
+
64
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
65
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
66
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
67
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
68
+
69
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z.
70
+ The trailing "Z" designates UTC time and is mandatory.
71
+ A Trie based date field for faster date range queries and date faceting. -->
72
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
73
+
74
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
75
+ <fieldtype name="binary" class="solr.BinaryField"/>
76
+
77
+ <!-- A text field with defaults appropriate for English: it
78
+ tokenizes with StandardTokenizer, removes English stop words
79
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
80
+ finally applies Porter's stemming. The query time analyzer
81
+ also applies synonyms from synonyms.txt. -->
82
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
83
+ <analyzer type="index">
84
+ <tokenizer class="solr.StandardTokenizerFactory"/>
85
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
86
+ <filter class="solr.LowerCaseFilterFactory"/>
87
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
88
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
89
+ <filter class="solr.PorterStemFilterFactory"/>
90
+ </analyzer>
91
+ <analyzer type="query">
92
+ <tokenizer class="solr.StandardTokenizerFactory"/>
93
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
94
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
95
+ <filter class="solr.LowerCaseFilterFactory"/>
96
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
97
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
98
+ <filter class="solr.PorterStemFilterFactory"/>
99
+ </analyzer>
100
+ </fieldType>
101
+
102
+ <!-- for alpha sorting as a single token -->
103
+ <fieldType name="text_sort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
104
+ <analyzer>
105
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
106
+ <filter class="solr.LowerCaseFilterFactory" />
107
+ <filter class="solr.TrimFilterFactory" />
108
+ <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9 ])" replacement="" replace="all"/>
109
+ </analyzer>
110
+ </fieldType>
111
+
112
+ <!-- Spatial field types -->
113
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_d"/>
114
+
115
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
116
+ distErrPct="0.025"
117
+ maxDistErr="0.000009"
118
+ units="degrees"
119
+ />
120
+
121
+ <!-- JTS-enabled spatial predicates; requires JTS installation -->
122
+ <fieldType name="location_jts" class="solr.SpatialRecursivePrefixTreeFieldType"
123
+ spatialContextFactory="com.spatial4j.core.context.jts.JtsSpatialContextFactory"
124
+ distErrPct="0.025"
125
+ maxDistErr="0.000009"
126
+ units="degrees"
127
+ />
128
+ </types>
129
+
130
+ <!-- for scoring formula -->
131
+ <copyField source="dct_spatial_sm" dest="dct_spatial_tmi" maxChars="10000"/>
132
+ <copyField source="dct_temporal_sm" dest="dct_temporal_tmi" maxChars="10000"/>
133
+ <copyField source="dc_creator_sm" dest="dc_creator_tmi" maxChars="1000"/>
134
+ <copyField source="dc_description_s" dest="dc_description_ti" maxChars="10000"/>
135
+ <copyField source="dc_format_s" dest="dc_format_ti" maxChars="100"/>
136
+ <copyField source="dc_identifier_s" dest="dc_identifier_ti" maxChars="100"/>
137
+ <copyField source="dc_publisher_s" dest="dc_publisher_ti" maxChars="1000"/>
138
+ <copyField source="dc_rights_s" dest="dc_rights_ti" maxChars="100"/>
139
+ <copyField source="dct_provenance_s" dest="dct_provenance_ti" maxChars="1000"/>
140
+ <copyField source="dc_subject_sm" dest="dc_subject_tmi" maxChars="10000"/>
141
+ <copyField source="dc_title_s" dest="dc_title_ti" maxChars="1000"/>
142
+ <copyField source="layer_collection_s" dest="layer_collection_ti" maxChars="1000"/>
143
+ <copyField source="layer_geom_type_s" dest="layer_geom_type_ti" maxChars="100"/>
144
+ <copyField source="layer_slug_s" dest="layer_slug_ti" maxChars="100"/>
145
+
146
+ <!-- core text search -->
147
+ <copyField source="*_ti" dest="text" />
148
+ <copyField source="*_tmi" dest="text" />
149
+
150
+ <!-- for sorting text fields -->
151
+ <copyField source="dc_title_s" dest="dc_title_sort"/>
152
+ <copyField source="dc_publisher_s" dest="dc_publisher_sort"/>
153
+ <copyField source="layer_collection_s" dest="layer_collection_sort"/>
154
+
155
+ </schema>
156
+
@@ -0,0 +1,161 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+ <!--
19
+ For more details about configurations options that may appear in
20
+ this file, see http://wiki.apache.org/solr/SolrConfigXml.
21
+ -->
22
+ <config>
23
+ <luceneMatchVersion>LUCENE_CURRENT</luceneMatchVersion>
24
+ <dataDir>${solr.data.dir:}</dataDir>
25
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
26
+ <codecFactory class="solr.SchemaCodecFactory"/>
27
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
28
+ <indexConfig>
29
+ <lockType>${solr.lock.type:native}</lockType>
30
+ </indexConfig>
31
+
32
+ <!-- The default high-performance update handler -->
33
+ <updateHandler class="solr.DirectUpdateHandler2">
34
+ <updateLog>
35
+ <str name="dir">${solr.ulog.dir:}</str>
36
+ </updateLog>
37
+ <autoCommit>
38
+ <maxTime>15000</maxTime>
39
+ <openSearcher>false</openSearcher>
40
+ </autoCommit>
41
+ </updateHandler>
42
+
43
+ <!-- realtime get handler, guaranteed to return the latest stored fields
44
+ of any document, without the need to commit or open a new searcher. The current
45
+ implementation relies on the updateLog feature being enabled. -->
46
+ <requestHandler name="/get" class="solr.RealTimeGetHandler">
47
+ <lst name="defaults">
48
+ <str name="omitHeader">true</str>
49
+ </lst>
50
+ </requestHandler>
51
+
52
+ <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
53
+
54
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55
+ Query section - these settings control query time things like caches
56
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
57
+ <query>
58
+ <maxBooleanClauses>1024</maxBooleanClauses>
59
+ <filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/>
60
+ <queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
61
+ <documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
62
+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
63
+ <queryResultWindowSize>20</queryResultWindowSize>
64
+ <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
65
+ <listener event="newSearcher" class="solr.QuerySenderListener">
66
+ <arr name="queries">
67
+ <lst><str name="q">stanford</str></lst>
68
+ <lst><str name="q">polygon</str></lst>
69
+ </arr>
70
+ </listener>
71
+ <listener event="firstSearcher" class="solr.QuerySenderListener">
72
+ <arr name="queries">
73
+ <lst>
74
+ <str name="q">static firstSearcher warming in solrconfig.xml</str>
75
+ </lst>
76
+ </arr>
77
+ </listener>
78
+ <useColdSearcher>false</useColdSearcher>
79
+ <maxWarmingSearchers>2</maxWarmingSearchers>
80
+ </query>
81
+
82
+ <requestDispatcher handleSelect="false">
83
+ <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"/>
84
+ <httpCaching never304="true"/>
85
+ </requestDispatcher>
86
+
87
+ <requestHandler name="/search" class="solr.SearchHandler"/>
88
+
89
+ <requestHandler name="/select" class="solr.SearchHandler">
90
+ <lst name="defaults">
91
+ <int name="start">0</int>
92
+ <int name="rows">10</int>
93
+ <str name="wt">json</str>
94
+ <int name="indent">2</int>
95
+ <str name="defType">edismax</str>
96
+ <str name="echoParams">all</str>
97
+ <str name="fl">*,score</str>
98
+ <str name="sort">score desc, dc_title_sort asc</str>
99
+ <str name="q.alt">*:*</str>
100
+ <str name="qf">
101
+ text^1
102
+ dc_description_ti^2
103
+ dc_creator_ti^3
104
+ dc_publisher_ti^3
105
+ layer_collection_ti^4
106
+ dc_subject_tmi^5
107
+ dct_spatial_tmi^5
108
+ dct_temporal_tmi^5
109
+ dc_title_ti^6
110
+ dc_rights_ti^7
111
+ dct_provenance_ti^8
112
+ layer_geom_type_ti^9
113
+ layer_slug_ti^10
114
+ dc_identifier_ti^10
115
+ </str>
116
+ <bool name="facet">true</bool>
117
+ <int name="facet.mincount">1</int>
118
+ <int name="facet.limit">10</int>
119
+ <str name="facet.field">dct_spatial_sm</str>
120
+ <str name="facet.field">dc_creator_s</str>
121
+ <str name="facet.field">dc_format_s</str>
122
+ <str name="facet.field">dc_language_s</str>
123
+ <str name="facet.field">dc_publisher_s</str>
124
+ <str name="facet.field">dc_rights_s</str>
125
+ <str name="facet.field">dct_provenance_s</str>
126
+ <str name="facet.field">dc_subject_sm</str>
127
+ <str name="facet.field">layer_collection_s</str>
128
+ <str name="facet.field">layer_geom_type_s</str>
129
+ <str name="facet.field">layer_srs_s</str>
130
+ <str name="facet.field">layer_year_i</str>
131
+ </lst>
132
+ </requestHandler>
133
+
134
+ <requestHandler name="/update" class="solr.UpdateRequestHandler"/>
135
+ <requestHandler name="/admin/" class="solr.admin.AdminHandlers"/>
136
+
137
+ <!-- ping/healthcheck -->
138
+ <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
139
+ <lst name="invariants">
140
+ <str name="q">solrpingquery</str>
141
+ </lst>
142
+ <lst name="defaults">
143
+ <str name="echoParams">all</str>
144
+ </lst>
145
+ <!-- An optional feature of the PingRequestHandler is to configure the
146
+ handler with a "healthcheckFile" which can be used to enable/disable
147
+ the PingRequestHandler.
148
+ relative paths are resolved against the data dir
149
+ -->
150
+ <str name="healthcheckFile">server-enabled.txt</str>
151
+ </requestHandler>
152
+
153
+ <requestHandler name="/analysis/field"
154
+ startup="lazy"
155
+ class="solr.FieldAnalysisRequestHandler" />
156
+
157
+ <!-- Legacy config for the admin interface -->
158
+ <admin>
159
+ <defaultQuery>*:*</defaultQuery>
160
+ </admin>
161
+ </config>
@@ -0,0 +1,29 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ #some test synonym mappings unlikely to appear in real input text
15
+ aaafoo => aaabar
16
+ bbbfoo => bbbfoo bbbbar
17
+ cccfoo => cccbar cccbaz
18
+ fooaaa,baraaa,bazaaa
19
+
20
+ # Some synonym groups specific to this example
21
+ GB,gib,gigabyte,gigabytes
22
+ MB,mib,megabyte,megabytes
23
+ Television, Televisions, TV, TVs
24
+ #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
25
+ #after us won't split it into two words.
26
+
27
+ # Synonym mappings can be used for spelling correction too
28
+ pixima => pixma
29
+