geohydra 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +1 -8
- data/Gemfile.lock +87 -102
- data/README.md +2 -2
- data/VERSION +1 -1
- data/bin/accession.rb +99 -89
- data/bin/assemble.rb +288 -247
- data/bin/assemble_data.rb +54 -51
- data/bin/assemble_placenames.rb +85 -85
- data/bin/build_stage_options.rb +24 -18
- data/bin/derive_wgs84.rb +65 -66
- data/bin/extract_thumbnail.rb +38 -37
- data/bin/geo2mods.rb +78 -0
- data/bin/geohydra +14 -5
- data/bin/ingest_arcgis.rb +80 -60
- data/bin/iso2geo.rb +64 -0
- data/bin/loader_postgis.rb +121 -227
- data/bin/run_task.rb +23 -0
- data/bin/sync_geoserver_metadata.rb +132 -127
- data/bin/xsltproc-saxon +6 -0
- data/geohydra.gemspec +6 -4
- data/lib/geohydra.rb +5 -0
- data/lib/geohydra/accession.rb +24 -13
- data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
- data/lib/geohydra/gazetteer.csv +842 -36
- data/lib/geohydra/gazetteer.rb +48 -24
- data/lib/geohydra/mods2geoblacklight.xsl +248 -0
- data/lib/geohydra/mods2ogp.xsl +5 -8
- data/lib/geohydra/transform.rb +8 -2
- data/lib/geohydra/utils.rb +6 -0
- data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
- data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
- data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
- data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
- data/lib/geohydra/workflow/task.rb +82 -0
- data/ogp/README.md +350 -0
- data/ogp/download.rb +92 -0
- data/ogp/fgdc2mods.sh +9 -0
- data/ogp/fgdc2mods.xsl +884 -0
- data/ogp/ingest.rb +48 -0
- data/ogp/select.rb +20 -0
- data/ogp/transform.rb +354 -0
- data/ogp/validate.rb +182 -0
- data/{bin → scripts}/ingest_tufts.rb +0 -0
- data/scripts/iso2html/doit.sh +15 -0
- data/scripts/iso2html/main.css +66 -0
- data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
- data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
- data/scripts/iso2html/utils/replace-string.xsl +80 -0
- data/scripts/iso2html/utils/strip-digits.xsl +60 -0
- data/{bin → scripts}/loader.rb +0 -0
- data/scripts/rename_shapefiles.rb +5 -0
- data/scripts/render_gazetteer.rb +36 -0
- data/{bin → scripts}/seed.rb +0 -0
- data/{bin → scripts}/solr_indexer.rb +0 -0
- data/scripts/status.csv +253 -0
- data/scripts/status.rb +32 -0
- data/{bin → scripts}/validate_data.rb +1 -1
- data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-dev/conf/protwords.txt +21 -0
- data/solr/kurma-app-dev/conf/schema.xml +156 -0
- data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
- data/solr/kurma-app-dev/purge.sh +8 -0
- data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-test/conf/protwords.txt +21 -0
- data/solr/kurma-app-test/conf/schema.xml +158 -0
- data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-test/conf/synonyms.txt +29 -0
- data/solr/kurma-app-test/deploy.sh +15 -0
- data/solr/kurma-app-test/purge.sh +8 -0
- data/solr/ogp-dev/purge.sh +1 -2
- data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
- data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
- data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
- data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
- data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
- data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
- data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
- data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
- data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
- data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
- data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
- data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
- data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
- data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
- data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
- data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
- data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
- data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
- data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
- data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
- data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
- data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
- data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
- data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
- data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
- data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
- data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
- data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
- data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
- data/spec/unit/gazetteer_spec.rb +100 -35
- data/spec/unit/task_spec.rb +68 -0
- data/spec/unit/transform_spec.rb +1 -1
- data/spec/unit/utils_spec.rb +17 -3
- data/workflow.rb +35 -0
- metadata +323 -316
data/scripts/status.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
# require 'json'
|
5
|
+
require 'net/http'
|
6
|
+
# require 'awesome_print'
|
7
|
+
|
8
|
+
STDOUT.sync = true
|
9
|
+
|
10
|
+
CSV.foreach('status.csv') do |url|
|
11
|
+
# ap({:url => url.first})
|
12
|
+
uri = URI(url.first)
|
13
|
+
druid = 'unknown'
|
14
|
+
druid = $1 if uri.to_s =~ /druid%3A([a-z0-9]+)/
|
15
|
+
# ap({:uri => uri})
|
16
|
+
uri.host = 'localhost'
|
17
|
+
uri.port = 8080
|
18
|
+
# ap({:uri => uri})
|
19
|
+
begin
|
20
|
+
start = Time.now
|
21
|
+
res = Net::HTTP.get_response(uri)
|
22
|
+
sz = res.body.size
|
23
|
+
open("images/#{druid}.png", 'wb') do |f|
|
24
|
+
f.write(res.body)
|
25
|
+
end
|
26
|
+
finish = Time.now
|
27
|
+
puts [finish, druid, res.code, res['content-type'], sz, res['geowebcache-cache-result'], finish - start].join(', ')
|
28
|
+
rescue => e
|
29
|
+
puts e.class, e
|
30
|
+
end
|
31
|
+
sleep(0.5)
|
32
|
+
end
|
@@ -10,7 +10,7 @@ def validate(path, flags)
|
|
10
10
|
puts "Processing #{shp}" if flags[:debug]
|
11
11
|
basefn = File.basename(shp, '.shp')
|
12
12
|
unless GeoHydra::Utils.shapefile?(shp)
|
13
|
-
puts "
|
13
|
+
puts "SyntaxError: Not a shapefile <#{shp}>. Trying to repair..."
|
14
14
|
Dir.glob("#{File.dirname(shp)}/#{basefn.gsub(' ', "\\ ")}.*") do |fn|
|
15
15
|
newfn = File.join(File.dirname(fn), File.basename(fn).gsub(/[^a-zA-Z0-9_]/, '_'))
|
16
16
|
FileUtils.mv fn, newfn
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Standard english stop words taken from Lucene's StopAnalyzer
|
2
|
+
a
|
3
|
+
an
|
4
|
+
and
|
5
|
+
are
|
6
|
+
as
|
7
|
+
at
|
8
|
+
be
|
9
|
+
but
|
10
|
+
by
|
11
|
+
for
|
12
|
+
if
|
13
|
+
in
|
14
|
+
into
|
15
|
+
is
|
16
|
+
it
|
17
|
+
no
|
18
|
+
not
|
19
|
+
of
|
20
|
+
on
|
21
|
+
or
|
22
|
+
such
|
23
|
+
that
|
24
|
+
the
|
25
|
+
their
|
26
|
+
then
|
27
|
+
there
|
28
|
+
these
|
29
|
+
they
|
30
|
+
this
|
31
|
+
to
|
32
|
+
was
|
33
|
+
will
|
34
|
+
with
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
# Use a protected word file to protect against the stemmer reducing two
|
15
|
+
# unrelated words to the same base word.
|
16
|
+
|
17
|
+
# Some non-words that normally won't be encountered,
|
18
|
+
# just to test that they won't be stemmed.
|
19
|
+
dontstems
|
20
|
+
zwhacky
|
21
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<schema name="kurma-app-dev" version="1.5">
|
3
|
+
<uniqueKey>uuid</uniqueKey>
|
4
|
+
<fields>
|
5
|
+
<field name="_version_" type="long" stored="true" indexed="true"/>
|
6
|
+
<field name="timestamp" type="date" stored="true" indexed="true" default="NOW"/>
|
7
|
+
<field name="uuid" type="string" stored="true" indexed="true" required="true"/>
|
8
|
+
|
9
|
+
<!-- core generated fields -->
|
10
|
+
<field name="text" type="text_en" stored="false" indexed="true" multiValued="true"
|
11
|
+
termVectors="true" termPositions="true" termOffsets="true" />
|
12
|
+
|
13
|
+
<!-- dynamic field with simple types by suffix -->
|
14
|
+
<dynamicField name="*_b" type="boolean" stored="true" indexed="true"/>
|
15
|
+
<dynamicField name="*_d" type="double" stored="true" indexed="true"/>
|
16
|
+
<dynamicField name="*_dt" type="date" stored="true" indexed="true"/>
|
17
|
+
<dynamicField name="*_f" type="float" stored="true" indexed="true"/>
|
18
|
+
<dynamicField name="*_i" type="int" stored="true" indexed="true"/>
|
19
|
+
<dynamicField name="*_l" type="long" stored="true" indexed="true"/>
|
20
|
+
<dynamicField name="*_s" type="string" stored="true" indexed="true"/>
|
21
|
+
<dynamicField name="*_ss" type="string" stored="true" indexed="false"/>
|
22
|
+
<dynamicField name="*_si" type="string" stored="false" indexed="true"/>
|
23
|
+
<dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true" />
|
24
|
+
<dynamicField name="*_sm" type="string" stored="true" indexed="true" multiValued="true" />
|
25
|
+
<dynamicField name="*_url" type="string" stored="true" indexed="false"/>
|
26
|
+
<dynamicField name="*_blob" type="binary" stored="true" indexed="false"/>
|
27
|
+
|
28
|
+
<!-- dynamic Text fields by suffix without storage -->
|
29
|
+
<dynamicField name="*_t" type="text_en" stored="false" indexed="true"
|
30
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
31
|
+
<dynamicField name="*_tm" type="text_en" stored="false" indexed="true" multiValued="true"
|
32
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
33
|
+
<dynamicField name="*_ti" type="text_en" stored="false" indexed="true"
|
34
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
35
|
+
<dynamicField name="*_tmi" type="text_en" stored="false" indexed="true" multiValued="true"
|
36
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
37
|
+
<dynamicField name="*_sort" type="text_sort" stored="false" indexed="true" multiValued="false"/>
|
38
|
+
|
39
|
+
<!-- Spatial field types:
|
40
|
+
|
41
|
+
Solr3:
|
42
|
+
<field name="my_pt">83.1,-117.312</field>
|
43
|
+
as (y,x)
|
44
|
+
|
45
|
+
Solr4:
|
46
|
+
|
47
|
+
<field name="my_bbox">-117.312 83.1 -115.39 84.31</field>
|
48
|
+
as (W S E N)
|
49
|
+
|
50
|
+
<field name="my_geom">POLYGON((1 8, 1 9, 2 9, 2 8, 1 8))</field>
|
51
|
+
as WKT for point, linestring, polygon
|
52
|
+
|
53
|
+
-->
|
54
|
+
<dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
|
55
|
+
<dynamicField name="*_bbox" type="location_rpt" stored="true" indexed="true"/>
|
56
|
+
<dynamicField name="*_geom" type="location_jts" stored="true" indexed="true"/>
|
57
|
+
<dynamicField name="*_wkt" type="location_jts" stored="true" indexed="true"/>
|
58
|
+
</fields>
|
59
|
+
|
60
|
+
<types>
|
61
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
62
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
63
|
+
|
64
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
65
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
66
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
67
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
68
|
+
|
69
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z.
|
70
|
+
The trailing "Z" designates UTC time and is mandatory.
|
71
|
+
A Trie based date field for faster date range queries and date faceting. -->
|
72
|
+
<fieldType name="date" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
73
|
+
|
74
|
+
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
75
|
+
<fieldtype name="binary" class="solr.BinaryField"/>
|
76
|
+
|
77
|
+
<!-- A text field with defaults appropriate for English: it
|
78
|
+
tokenizes with StandardTokenizer, removes English stop words
|
79
|
+
(lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
|
80
|
+
finally applies Porter's stemming. The query time analyzer
|
81
|
+
also applies synonyms from synonyms.txt. -->
|
82
|
+
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
83
|
+
<analyzer type="index">
|
84
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
85
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
86
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
87
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
88
|
+
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
89
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
90
|
+
</analyzer>
|
91
|
+
<analyzer type="query">
|
92
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
93
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
94
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
95
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
96
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
97
|
+
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
98
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
99
|
+
</analyzer>
|
100
|
+
</fieldType>
|
101
|
+
|
102
|
+
<!-- for alpha sorting as a single token -->
|
103
|
+
<fieldType name="text_sort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
104
|
+
<analyzer>
|
105
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
106
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
107
|
+
<filter class="solr.TrimFilterFactory" />
|
108
|
+
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9 ])" replacement="" replace="all"/>
|
109
|
+
</analyzer>
|
110
|
+
</fieldType>
|
111
|
+
|
112
|
+
<!-- Spatial field types -->
|
113
|
+
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_d"/>
|
114
|
+
|
115
|
+
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
116
|
+
distErrPct="0.025"
|
117
|
+
maxDistErr="0.000009"
|
118
|
+
units="degrees"
|
119
|
+
/>
|
120
|
+
|
121
|
+
<!-- JTS-enabled spatial predicates; requires JTS installation -->
|
122
|
+
<fieldType name="location_jts" class="solr.SpatialRecursivePrefixTreeFieldType"
|
123
|
+
spatialContextFactory="com.spatial4j.core.context.jts.JtsSpatialContextFactory"
|
124
|
+
distErrPct="0.025"
|
125
|
+
maxDistErr="0.000009"
|
126
|
+
units="degrees"
|
127
|
+
/>
|
128
|
+
</types>
|
129
|
+
|
130
|
+
<!-- for scoring formula -->
|
131
|
+
<copyField source="dct_spatial_sm" dest="dct_spatial_tmi" maxChars="10000"/>
|
132
|
+
<copyField source="dct_temporal_sm" dest="dct_temporal_tmi" maxChars="10000"/>
|
133
|
+
<copyField source="dc_creator_sm" dest="dc_creator_tmi" maxChars="1000"/>
|
134
|
+
<copyField source="dc_description_s" dest="dc_description_ti" maxChars="10000"/>
|
135
|
+
<copyField source="dc_format_s" dest="dc_format_ti" maxChars="100"/>
|
136
|
+
<copyField source="dc_identifier_s" dest="dc_identifier_ti" maxChars="100"/>
|
137
|
+
<copyField source="dc_publisher_s" dest="dc_publisher_ti" maxChars="1000"/>
|
138
|
+
<copyField source="dc_rights_s" dest="dc_rights_ti" maxChars="100"/>
|
139
|
+
<copyField source="dct_provenance_s" dest="dct_provenance_ti" maxChars="1000"/>
|
140
|
+
<copyField source="dc_subject_sm" dest="dc_subject_tmi" maxChars="10000"/>
|
141
|
+
<copyField source="dc_title_s" dest="dc_title_ti" maxChars="1000"/>
|
142
|
+
<copyField source="layer_collection_s" dest="layer_collection_ti" maxChars="1000"/>
|
143
|
+
<copyField source="layer_geom_type_s" dest="layer_geom_type_ti" maxChars="100"/>
|
144
|
+
<copyField source="layer_slug_s" dest="layer_slug_ti" maxChars="100"/>
|
145
|
+
|
146
|
+
<!-- core text search -->
|
147
|
+
<copyField source="*_ti" dest="text" />
|
148
|
+
<copyField source="*_tmi" dest="text" />
|
149
|
+
|
150
|
+
<!-- for sorting text fields -->
|
151
|
+
<copyField source="dc_title_s" dest="dc_title_sort"/>
|
152
|
+
<copyField source="dc_publisher_s" dest="dc_publisher_sort"/>
|
153
|
+
<copyField source="layer_collection_s" dest="layer_collection_sort"/>
|
154
|
+
|
155
|
+
</schema>
|
156
|
+
|
@@ -0,0 +1,161 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
<!--
|
19
|
+
For more details about configurations options that may appear in
|
20
|
+
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
21
|
+
-->
|
22
|
+
<config>
|
23
|
+
<luceneMatchVersion>LUCENE_CURRENT</luceneMatchVersion>
|
24
|
+
<dataDir>${solr.data.dir:}</dataDir>
|
25
|
+
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
26
|
+
<codecFactory class="solr.SchemaCodecFactory"/>
|
27
|
+
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
28
|
+
<indexConfig>
|
29
|
+
<lockType>${solr.lock.type:native}</lockType>
|
30
|
+
</indexConfig>
|
31
|
+
|
32
|
+
<!-- The default high-performance update handler -->
|
33
|
+
<updateHandler class="solr.DirectUpdateHandler2">
|
34
|
+
<updateLog>
|
35
|
+
<str name="dir">${solr.ulog.dir:}</str>
|
36
|
+
</updateLog>
|
37
|
+
<autoCommit>
|
38
|
+
<maxTime>15000</maxTime>
|
39
|
+
<openSearcher>false</openSearcher>
|
40
|
+
</autoCommit>
|
41
|
+
</updateHandler>
|
42
|
+
|
43
|
+
<!-- realtime get handler, guaranteed to return the latest stored fields
|
44
|
+
of any document, without the need to commit or open a new searcher. The current
|
45
|
+
implementation relies on the updateLog feature being enabled. -->
|
46
|
+
<requestHandler name="/get" class="solr.RealTimeGetHandler">
|
47
|
+
<lst name="defaults">
|
48
|
+
<str name="omitHeader">true</str>
|
49
|
+
</lst>
|
50
|
+
</requestHandler>
|
51
|
+
|
52
|
+
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
|
53
|
+
|
54
|
+
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
55
|
+
Query section - these settings control query time things like caches
|
56
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
57
|
+
<query>
|
58
|
+
<maxBooleanClauses>1024</maxBooleanClauses>
|
59
|
+
<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
60
|
+
<queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
61
|
+
<documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
62
|
+
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
63
|
+
<queryResultWindowSize>20</queryResultWindowSize>
|
64
|
+
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
65
|
+
<listener event="newSearcher" class="solr.QuerySenderListener">
|
66
|
+
<arr name="queries">
|
67
|
+
<lst><str name="q">stanford</str></lst>
|
68
|
+
<lst><str name="q">polygon</str></lst>
|
69
|
+
</arr>
|
70
|
+
</listener>
|
71
|
+
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
72
|
+
<arr name="queries">
|
73
|
+
<lst>
|
74
|
+
<str name="q">static firstSearcher warming in solrconfig.xml</str>
|
75
|
+
</lst>
|
76
|
+
</arr>
|
77
|
+
</listener>
|
78
|
+
<useColdSearcher>false</useColdSearcher>
|
79
|
+
<maxWarmingSearchers>2</maxWarmingSearchers>
|
80
|
+
</query>
|
81
|
+
|
82
|
+
<requestDispatcher handleSelect="false">
|
83
|
+
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"/>
|
84
|
+
<httpCaching never304="true"/>
|
85
|
+
</requestDispatcher>
|
86
|
+
|
87
|
+
<requestHandler name="/search" class="solr.SearchHandler"/>
|
88
|
+
|
89
|
+
<requestHandler name="/select" class="solr.SearchHandler">
|
90
|
+
<lst name="defaults">
|
91
|
+
<int name="start">0</int>
|
92
|
+
<int name="rows">10</int>
|
93
|
+
<str name="wt">json</str>
|
94
|
+
<int name="indent">2</int>
|
95
|
+
<str name="defType">edismax</str>
|
96
|
+
<str name="echoParams">all</str>
|
97
|
+
<str name="fl">*,score</str>
|
98
|
+
<str name="sort">score desc, dc_title_sort asc</str>
|
99
|
+
<str name="q.alt">*:*</str>
|
100
|
+
<str name="qf">
|
101
|
+
text^1
|
102
|
+
dc_description_ti^2
|
103
|
+
dc_creator_ti^3
|
104
|
+
dc_publisher_ti^3
|
105
|
+
layer_collection_ti^4
|
106
|
+
dc_subject_tmi^5
|
107
|
+
dct_spatial_tmi^5
|
108
|
+
dct_temporal_tmi^5
|
109
|
+
dc_title_ti^6
|
110
|
+
dc_rights_ti^7
|
111
|
+
dct_provenance_ti^8
|
112
|
+
layer_geom_type_ti^9
|
113
|
+
layer_slug_ti^10
|
114
|
+
dc_identifier_ti^10
|
115
|
+
</str>
|
116
|
+
<bool name="facet">true</bool>
|
117
|
+
<int name="facet.mincount">1</int>
|
118
|
+
<int name="facet.limit">10</int>
|
119
|
+
<str name="facet.field">dct_spatial_sm</str>
|
120
|
+
<str name="facet.field">dc_creator_s</str>
|
121
|
+
<str name="facet.field">dc_format_s</str>
|
122
|
+
<str name="facet.field">dc_language_s</str>
|
123
|
+
<str name="facet.field">dc_publisher_s</str>
|
124
|
+
<str name="facet.field">dc_rights_s</str>
|
125
|
+
<str name="facet.field">dct_provenance_s</str>
|
126
|
+
<str name="facet.field">dc_subject_sm</str>
|
127
|
+
<str name="facet.field">layer_collection_s</str>
|
128
|
+
<str name="facet.field">layer_geom_type_s</str>
|
129
|
+
<str name="facet.field">layer_srs_s</str>
|
130
|
+
<str name="facet.field">layer_year_i</str>
|
131
|
+
</lst>
|
132
|
+
</requestHandler>
|
133
|
+
|
134
|
+
<requestHandler name="/update" class="solr.UpdateRequestHandler"/>
|
135
|
+
<requestHandler name="/admin/" class="solr.admin.AdminHandlers"/>
|
136
|
+
|
137
|
+
<!-- ping/healthcheck -->
|
138
|
+
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
|
139
|
+
<lst name="invariants">
|
140
|
+
<str name="q">solrpingquery</str>
|
141
|
+
</lst>
|
142
|
+
<lst name="defaults">
|
143
|
+
<str name="echoParams">all</str>
|
144
|
+
</lst>
|
145
|
+
<!-- An optional feature of the PingRequestHandler is to configure the
|
146
|
+
handler with a "healthcheckFile" which can be used to enable/disable
|
147
|
+
the PingRequestHandler.
|
148
|
+
relative paths are resolved against the data dir
|
149
|
+
-->
|
150
|
+
<str name="healthcheckFile">server-enabled.txt</str>
|
151
|
+
</requestHandler>
|
152
|
+
|
153
|
+
<requestHandler name="/analysis/field"
|
154
|
+
startup="lazy"
|
155
|
+
class="solr.FieldAnalysisRequestHandler" />
|
156
|
+
|
157
|
+
<!-- Legacy config for the admin interface -->
|
158
|
+
<admin>
|
159
|
+
<defaultQuery>*:*</defaultQuery>
|
160
|
+
</admin>
|
161
|
+
</config>
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
#some test synonym mappings unlikely to appear in real input text
|
15
|
+
aaafoo => aaabar
|
16
|
+
bbbfoo => bbbfoo bbbbar
|
17
|
+
cccfoo => cccbar cccbaz
|
18
|
+
fooaaa,baraaa,bazaaa
|
19
|
+
|
20
|
+
# Some synonym groups specific to this example
|
21
|
+
GB,gib,gigabyte,gigabytes
|
22
|
+
MB,mib,megabyte,megabytes
|
23
|
+
Television, Televisions, TV, TVs
|
24
|
+
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
25
|
+
#after us won't split it into two words.
|
26
|
+
|
27
|
+
# Synonym mappings can be used for spelling correction too
|
28
|
+
pixima => pixma
|
29
|
+
|