geohydra 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +1 -2
- data/Gemfile +1 -8
- data/Gemfile.lock +87 -102
- data/README.md +2 -2
- data/VERSION +1 -1
- data/bin/accession.rb +99 -89
- data/bin/assemble.rb +288 -247
- data/bin/assemble_data.rb +54 -51
- data/bin/assemble_placenames.rb +85 -85
- data/bin/build_stage_options.rb +24 -18
- data/bin/derive_wgs84.rb +65 -66
- data/bin/extract_thumbnail.rb +38 -37
- data/bin/geo2mods.rb +78 -0
- data/bin/geohydra +14 -5
- data/bin/ingest_arcgis.rb +80 -60
- data/bin/iso2geo.rb +64 -0
- data/bin/loader_postgis.rb +121 -227
- data/bin/run_task.rb +23 -0
- data/bin/sync_geoserver_metadata.rb +132 -127
- data/bin/xsltproc-saxon +6 -0
- data/geohydra.gemspec +6 -4
- data/lib/geohydra.rb +5 -0
- data/lib/geohydra/accession.rb +24 -13
- data/lib/geohydra/{arcgis_to_iso19139_fc.xsl → arcgis_to_iso19110.xsl} +0 -0
- data/lib/geohydra/gazetteer.csv +842 -36
- data/lib/geohydra/gazetteer.rb +48 -24
- data/lib/geohydra/mods2geoblacklight.xsl +248 -0
- data/lib/geohydra/mods2ogp.xsl +5 -8
- data/lib/geohydra/transform.rb +8 -2
- data/lib/geohydra/utils.rb +6 -0
- data/lib/geohydra/workflow/gisAssemblyWF.rb +109 -0
- data/lib/geohydra/workflow/gisAssemblyWF.xml +85 -0
- data/lib/geohydra/workflow/gisDeliveryWF.rb +33 -0
- data/lib/geohydra/workflow/gisDeliveryWF.xml +36 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.rb +55 -0
- data/lib/geohydra/workflow/gisDiscoveryWF.xml +28 -0
- data/lib/geohydra/workflow/task.rb +82 -0
- data/ogp/README.md +350 -0
- data/ogp/download.rb +92 -0
- data/ogp/fgdc2mods.sh +9 -0
- data/ogp/fgdc2mods.xsl +884 -0
- data/ogp/ingest.rb +48 -0
- data/ogp/select.rb +20 -0
- data/ogp/transform.rb +354 -0
- data/ogp/validate.rb +182 -0
- data/{bin → scripts}/ingest_tufts.rb +0 -0
- data/scripts/iso2html/doit.sh +15 -0
- data/scripts/iso2html/main.css +66 -0
- data/scripts/iso2html/pacioos-iso-html.xsl +1749 -0
- data/scripts/iso2html/utils/replace-newlines.xsl +97 -0
- data/scripts/iso2html/utils/replace-string.xsl +80 -0
- data/scripts/iso2html/utils/strip-digits.xsl +60 -0
- data/{bin → scripts}/loader.rb +0 -0
- data/scripts/rename_shapefiles.rb +5 -0
- data/scripts/render_gazetteer.rb +36 -0
- data/{bin → scripts}/seed.rb +0 -0
- data/{bin → scripts}/solr_indexer.rb +0 -0
- data/scripts/status.csv +253 -0
- data/scripts/status.rb +32 -0
- data/{bin → scripts}/validate_data.rb +1 -1
- data/solr/kurma-app-dev/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-dev/conf/protwords.txt +21 -0
- data/solr/kurma-app-dev/conf/schema.xml +156 -0
- data/solr/kurma-app-dev/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-dev/conf/synonyms.txt +29 -0
- data/solr/kurma-app-dev/purge.sh +8 -0
- data/solr/kurma-app-test/conf/lang/stopwords_en.txt +34 -0
- data/solr/kurma-app-test/conf/protwords.txt +21 -0
- data/solr/kurma-app-test/conf/schema.xml +158 -0
- data/solr/kurma-app-test/conf/solrconfig.xml +161 -0
- data/solr/kurma-app-test/conf/synonyms.txt +29 -0
- data/solr/kurma-app-test/deploy.sh +15 -0
- data/solr/kurma-app-test/purge.sh +8 -0
- data/solr/ogp-dev/purge.sh +1 -2
- data/spec/fixtures/bw938nk9584/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/cc142xj8436/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/cg716wc7949/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cm007pv9601/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/cp055nb0189/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/cs838pw3418/temp/{OIL_GAS_FIELDS-iso19139-fc.xml → OIL_GAS_FIELDS-iso19110.xml} +0 -0
- data/spec/fixtures/dd308sy5843/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/dd452vk1873/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/dg850pt1796/temp/{STATE1951-iso19139-fc.xml → STATE1951-iso19110.xml} +0 -0
- data/spec/fixtures/dn744tf5427/temp/{DISTRICT1991-iso19139-fc.xml → DISTRICT1991-iso19110.xml} +0 -0
- data/spec/fixtures/dq603nz8402/temp/{STATE2001-iso19139-fc.xml → STATE2001-iso19110.xml} +0 -0
- data/spec/fixtures/dv609zt4699/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/dz222hw0585/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/fd673qb9705/temp/{STATE1971-iso19139-fc.xml → STATE1971-iso19110.xml} +0 -0
- data/spec/fixtures/fg451wp8917/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/fh247yz0156/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/fs487vd1465/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/fs591bn3317/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/fw920bc5473/temp/{PLSS_TWN-iso19139-fc.xml → PLSS_TWN-iso19110.xml} +0 -0
- data/spec/fixtures/gj831wj3625/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/gp075nv3265/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/gv800hj8141/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/gw520gz6339/temp/{DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml → DADRA_NAGAR_HAVELI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/gy054hz1045/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/gz352mw6982/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/hb489vm9892/temp/{DISTRICT1981-iso19139-fc.xml → DISTRICT1981-iso19110.xml} +0 -0
- data/spec/fixtures/hw125dq0418/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/hw892mn4587/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/jb371hz3868/temp/{INCOME-iso19139-fc.xml → INCOME-iso19110.xml} +0 -0
- data/spec/fixtures/jc017yk9928/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/jf841ys4828/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jh802mp2160/temp/{DELHI_PT-iso19139-fc.xml → DELHI_PT-iso19110.xml} +0 -0
- data/spec/fixtures/jj806fc3801/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/jq835yn7161/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/jr455pt6676/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/js637zp2537/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/jv502wg9611/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/jw462ck6560/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/kj800fb6273/temp/{STATE2011-iso19139-fc.xml → STATE2011-iso19110.xml} +0 -0
- data/spec/fixtures/km504zq3948/temp/{HIMACHAL_PRADESH-iso19139-fc.xml → HIMACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/ks297fy1411/temp/{OFFSH_BLOCKS-iso19139-fc.xml → OFFSH_BLOCKS-iso19110.xml} +0 -0
- data/spec/fixtures/md358hy5049/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/mg745bq0193/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/mh187yx3536/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/mk488yn6694/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/my216kp3008/temp/{DELHI-iso19139-fc.xml → DELHI-iso19110.xml} +0 -0
- data/spec/fixtures/my504nz9827/temp/{JAMMU_KASHMIR-iso19139-fc.xml → JAMMU_KASHMIR-iso19110.xml} +0 -0
- data/spec/fixtures/ng819jm8700/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/np020jq2139/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ns377mt1608/temp/{STATE1991-iso19139-fc.xml → STATE1991-iso19110.xml} +0 -0
- data/spec/fixtures/nw926np8508/temp/{metadata.iso19139-fc.xml → metadata.iso19110.xml} +0 -0
- data/spec/fixtures/ny358rm8559/temp/{TRIPURA-iso19139-fc.xml → TRIPURA-iso19110.xml} +0 -0
- data/spec/fixtures/nz176rm8192/temp/{DISTRICT2011-iso19139-fc.xml → DISTRICT2011-iso19110.xml} +0 -0
- data/spec/fixtures/nz252rq2252/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pd902kb3348/temp/{MADHYA_PRADESH-iso19139-fc.xml → MADHYA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/pz792fz1776/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/qb767ss4042/temp/{UTTAR_PRADESH-iso19139-fc.xml → UTTAR_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/qc091qw0570/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/qc652vr7204/temp/{ANDHRA_PRADESH_PT-iso19139-fc.xml → ANDHRA_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/qk786js7484/temp/{DISTRICT1961-iso19139-fc.xml → DISTRICT1961-iso19110.xml} +0 -0
- data/spec/fixtures/qn676pg6767/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/qr255jh4074/temp/{LOKSABHA_14-iso19139-fc.xml → LOKSABHA_14-iso19110.xml} +0 -0
- data/spec/fixtures/qr374kj4827/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/qy162js1748/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rd446vf2633/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/rf389hf2983/temp/{CHHATTISGARH_PT-iso19139-fc.xml → CHHATTISGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/rf859ff4582/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/rh343ds8931/temp/{BIHAR-iso19139-fc.xml → BIHAR-iso19110.xml} +0 -0
- data/spec/fixtures/rn815xk8157/temp/{SIKKIM-iso19139-fc.xml → SIKKIM-iso19110.xml} +0 -0
- data/spec/fixtures/rq653sz4470/temp/{CHHATTISGARH-iso19139-fc.xml → CHHATTISGARH-iso19110.xml} +0 -0
- data/spec/fixtures/rt625ws6022/temp/{GULF_FAIRWAYS-iso19139-fc.xml → GULF_FAIRWAYS-iso19110.xml} +0 -0
- data/spec/fixtures/sc330vf4259/temp/{JHARKHAND-iso19139-fc.xml → JHARKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/sq479mx3086/temp/{OFFSH_PLATF-iso19139-fc.xml → OFFSH_PLATF-iso19110.xml} +0 -0
- data/spec/fixtures/sr686bm4098/temp/{DAMAN_DIU_PT-iso19139-fc.xml → DAMAN_DIU_PT-iso19110.xml} +0 -0
- data/spec/fixtures/sv303sh5583/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/sy319nh8520/temp/{GUJARAT-iso19139-fc.xml → GUJARAT-iso19110.xml} +0 -0
- data/spec/fixtures/td363vx2792/temp/{HIMACHAL_PRADESH_PT-iso19139-fc.xml → HIMACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/tf374bd2484/temp/{DISTRICT1951-iso19139-fc.xml → DISTRICT1951-iso19110.xml} +0 -0
- data/spec/fixtures/tj797mj7877/temp/{LOKSABHA_15-iso19139-fc.xml → LOKSABHA_15-iso19110.xml} +0 -0
- data/spec/fixtures/tv060wq5179/temp/{ASSAM-iso19139-fc.xml → ASSAM-iso19110.xml} +0 -0
- data/spec/fixtures/tv536bn1915/temp/{ARUNACHAL_PRADESH-iso19139-fc.xml → ARUNACHAL_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/tz359cc2977/temp/{MANIPUR-iso19139-fc.xml → MANIPUR-iso19110.xml} +0 -0
- data/spec/fixtures/vb525my6511/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/vh802fs4240/temp/{PONDICHERRY-iso19139-fc.xml → PONDICHERRY-iso19110.xml} +0 -0
- data/spec/fixtures/vk120xn2474/temp/{PLSS_SEC-iso19139-fc.xml → PLSS_SEC-iso19110.xml} +0 -0
- data/spec/fixtures/vn439bc7316/temp/{KERALA-iso19139-fc.xml → KERALA-iso19110.xml} +0 -0
- data/spec/fixtures/vq745jk0695/temp/{MEGHALAYA-iso19139-fc.xml → MEGHALAYA-iso19110.xml} +0 -0
- data/spec/fixtures/vr593vj7147/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/vw911qb5271/temp/{DISTRICT2001-iso19139-fc.xml → DISTRICT2001-iso19110.xml} +0 -0
- data/spec/fixtures/wg680pz0365/temp/{ANDHRA_PRADESH-iso19139-fc.xml → ANDHRA_PRADESH-iso19110.xml} +0 -0
- data/spec/fixtures/wg761xn1926/temp/{HARYANA-iso19139-fc.xml → HARYANA-iso19110.xml} +0 -0
- data/spec/fixtures/wh870qw1934/temp/{PUNJAB-iso19139-fc.xml → PUNJAB-iso19110.xml} +0 -0
- data/spec/fixtures/wk775mm4673/temp/{MAHARASHTRA-iso19139-fc.xml → MAHARASHTRA-iso19110.xml} +0 -0
- data/spec/fixtures/ws171yz2165/temp/{ARUNACHAL_PRADESH_PT-iso19139-fc.xml → ARUNACHAL_PRADESH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/wt473hz7153/temp/{CHANDIGARH_PT-iso19139-fc.xml → CHANDIGARH_PT-iso19110.xml} +0 -0
- data/spec/fixtures/ww217dj0457/temp/{CO2_PIPE-iso19139-fc.xml → CO2_PIPE-iso19110.xml} +0 -0
- data/spec/fixtures/wy875pk9849/temp/{STATE1961-iso19139-fc.xml → STATE1961-iso19110.xml} +0 -0
- data/spec/fixtures/xb018tk2042/temp/{STATE1981-iso19139-fc.xml → STATE1981-iso19110.xml} +0 -0
- data/spec/fixtures/xg539vw8586/temp/{ORISSA-iso19139-fc.xml → ORISSA-iso19110.xml} +0 -0
- data/spec/fixtures/xv475kp4644/temp/{ASSAM_PT-iso19139-fc.xml → ASSAM_PT-iso19110.xml} +0 -0
- data/spec/fixtures/xy096gc2959/temp/{GOA-iso19139-fc.xml → GOA-iso19110.xml} +0 -0
- data/spec/fixtures/xz518gz3362/temp/{UTTARAKHAND-iso19139-fc.xml → UTTARAKHAND-iso19110.xml} +0 -0
- data/spec/fixtures/yh986wy4737/temp/{NAGALAND-iso19139-fc.xml → NAGALAND-iso19110.xml} +0 -0
- data/spec/fixtures/yn187fq4474/temp/{KARNATAKA-iso19139-fc.xml → KARNATAKA-iso19110.xml} +0 -0
- data/spec/fixtures/yn236mw3250/temp/{TAMILNADU-iso19139-fc.xml → TAMILNADU-iso19110.xml} +0 -0
- data/spec/fixtures/yz596nz0112/temp/{WEST_BENGAL-iso19139-fc.xml → WEST_BENGAL-iso19110.xml} +0 -0
- data/spec/fixtures/zk596gy7380/temp/{DISTRICT1971-iso19139-fc.xml → DISTRICT1971-iso19110.xml} +0 -0
- data/spec/fixtures/zn452hh7431/temp/{RAJASTHAN-iso19139-fc.xml → RAJASTHAN-iso19110.xml} +0 -0
- data/spec/fixtures/zt093fw6519/temp/{MIZORAM-iso19139-fc.xml → MIZORAM-iso19110.xml} +0 -0
- data/spec/fixtures/zv925hd6723/temp/{OGWELLS-iso19139-fc.xml → OGWELLS-iso19110.xml} +0 -0
- data/spec/fixtures/zy658cr1728/temp/{ANDAMAAN_NICOBAR_PT-iso19139-fc.xml → ANDAMAAN_NICOBAR_PT-iso19110.xml} +0 -0
- data/spec/fixtures/zz943vx1492/temp/{BASINS-iso19139-fc.xml → BASINS-iso19110.xml} +0 -0
- data/spec/unit/gazetteer_spec.rb +100 -35
- data/spec/unit/task_spec.rb +68 -0
- data/spec/unit/transform_spec.rb +1 -1
- data/spec/unit/utils_spec.rb +17 -3
- data/workflow.rb +35 -0
- metadata +323 -316
data/scripts/status.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
# require 'json'
|
5
|
+
require 'net/http'
|
6
|
+
# require 'awesome_print'
|
7
|
+
|
8
|
+
STDOUT.sync = true
|
9
|
+
|
10
|
+
CSV.foreach('status.csv') do |url|
|
11
|
+
# ap({:url => url.first})
|
12
|
+
uri = URI(url.first)
|
13
|
+
druid = 'unknown'
|
14
|
+
druid = $1 if uri.to_s =~ /druid%3A([a-z0-9]+)/
|
15
|
+
# ap({:uri => uri})
|
16
|
+
uri.host = 'localhost'
|
17
|
+
uri.port = 8080
|
18
|
+
# ap({:uri => uri})
|
19
|
+
begin
|
20
|
+
start = Time.now
|
21
|
+
res = Net::HTTP.get_response(uri)
|
22
|
+
sz = res.body.size
|
23
|
+
open("images/#{druid}.png", 'wb') do |f|
|
24
|
+
f.write(res.body)
|
25
|
+
end
|
26
|
+
finish = Time.now
|
27
|
+
puts [finish, druid, res.code, res['content-type'], sz, res['geowebcache-cache-result'], finish - start].join(', ')
|
28
|
+
rescue => e
|
29
|
+
puts e.class, e
|
30
|
+
end
|
31
|
+
sleep(0.5)
|
32
|
+
end
|
@@ -10,7 +10,7 @@ def validate(path, flags)
|
|
10
10
|
puts "Processing #{shp}" if flags[:debug]
|
11
11
|
basefn = File.basename(shp, '.shp')
|
12
12
|
unless GeoHydra::Utils.shapefile?(shp)
|
13
|
-
puts "
|
13
|
+
puts "SyntaxError: Not a shapefile <#{shp}>. Trying to repair..."
|
14
14
|
Dir.glob("#{File.dirname(shp)}/#{basefn.gsub(' ', "\\ ")}.*") do |fn|
|
15
15
|
newfn = File.join(File.dirname(fn), File.basename(fn).gsub(/[^a-zA-Z0-9_]/, '_'))
|
16
16
|
FileUtils.mv fn, newfn
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Standard english stop words taken from Lucene's StopAnalyzer
|
2
|
+
a
|
3
|
+
an
|
4
|
+
and
|
5
|
+
are
|
6
|
+
as
|
7
|
+
at
|
8
|
+
be
|
9
|
+
but
|
10
|
+
by
|
11
|
+
for
|
12
|
+
if
|
13
|
+
in
|
14
|
+
into
|
15
|
+
is
|
16
|
+
it
|
17
|
+
no
|
18
|
+
not
|
19
|
+
of
|
20
|
+
on
|
21
|
+
or
|
22
|
+
such
|
23
|
+
that
|
24
|
+
the
|
25
|
+
their
|
26
|
+
then
|
27
|
+
there
|
28
|
+
these
|
29
|
+
they
|
30
|
+
this
|
31
|
+
to
|
32
|
+
was
|
33
|
+
will
|
34
|
+
with
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
# Use a protected word file to protect against the stemmer reducing two
|
15
|
+
# unrelated words to the same base word.
|
16
|
+
|
17
|
+
# Some non-words that normally won't be encountered,
|
18
|
+
# just to test that they won't be stemmed.
|
19
|
+
dontstems
|
20
|
+
zwhacky
|
21
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<schema name="kurma-app-dev" version="1.5">
|
3
|
+
<uniqueKey>uuid</uniqueKey>
|
4
|
+
<fields>
|
5
|
+
<field name="_version_" type="long" stored="true" indexed="true"/>
|
6
|
+
<field name="timestamp" type="date" stored="true" indexed="true" default="NOW"/>
|
7
|
+
<field name="uuid" type="string" stored="true" indexed="true" required="true"/>
|
8
|
+
|
9
|
+
<!-- core generated fields -->
|
10
|
+
<field name="text" type="text_en" stored="false" indexed="true" multiValued="true"
|
11
|
+
termVectors="true" termPositions="true" termOffsets="true" />
|
12
|
+
|
13
|
+
<!-- dynamic field with simple types by suffix -->
|
14
|
+
<dynamicField name="*_b" type="boolean" stored="true" indexed="true"/>
|
15
|
+
<dynamicField name="*_d" type="double" stored="true" indexed="true"/>
|
16
|
+
<dynamicField name="*_dt" type="date" stored="true" indexed="true"/>
|
17
|
+
<dynamicField name="*_f" type="float" stored="true" indexed="true"/>
|
18
|
+
<dynamicField name="*_i" type="int" stored="true" indexed="true"/>
|
19
|
+
<dynamicField name="*_l" type="long" stored="true" indexed="true"/>
|
20
|
+
<dynamicField name="*_s" type="string" stored="true" indexed="true"/>
|
21
|
+
<dynamicField name="*_ss" type="string" stored="true" indexed="false"/>
|
22
|
+
<dynamicField name="*_si" type="string" stored="false" indexed="true"/>
|
23
|
+
<dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true" />
|
24
|
+
<dynamicField name="*_sm" type="string" stored="true" indexed="true" multiValued="true" />
|
25
|
+
<dynamicField name="*_url" type="string" stored="true" indexed="false"/>
|
26
|
+
<dynamicField name="*_blob" type="binary" stored="true" indexed="false"/>
|
27
|
+
|
28
|
+
<!-- dynamic Text fields by suffix without storage -->
|
29
|
+
<dynamicField name="*_t" type="text_en" stored="false" indexed="true"
|
30
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
31
|
+
<dynamicField name="*_tm" type="text_en" stored="false" indexed="true" multiValued="true"
|
32
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
33
|
+
<dynamicField name="*_ti" type="text_en" stored="false" indexed="true"
|
34
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
35
|
+
<dynamicField name="*_tmi" type="text_en" stored="false" indexed="true" multiValued="true"
|
36
|
+
termVectors="true" termPositions="true" termOffsets="true"/>
|
37
|
+
<dynamicField name="*_sort" type="text_sort" stored="false" indexed="true" multiValued="false"/>
|
38
|
+
|
39
|
+
<!-- Spatial field types:
|
40
|
+
|
41
|
+
Solr3:
|
42
|
+
<field name="my_pt">83.1,-117.312</field>
|
43
|
+
as (y,x)
|
44
|
+
|
45
|
+
Solr4:
|
46
|
+
|
47
|
+
<field name="my_bbox">-117.312 83.1 -115.39 84.31</field>
|
48
|
+
as (W S E N)
|
49
|
+
|
50
|
+
<field name="my_geom">POLYGON((1 8, 1 9, 2 9, 2 8, 1 8))</field>
|
51
|
+
as WKT for point, linestring, polygon
|
52
|
+
|
53
|
+
-->
|
54
|
+
<dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
|
55
|
+
<dynamicField name="*_bbox" type="location_rpt" stored="true" indexed="true"/>
|
56
|
+
<dynamicField name="*_geom" type="location_jts" stored="true" indexed="true"/>
|
57
|
+
<dynamicField name="*_wkt" type="location_jts" stored="true" indexed="true"/>
|
58
|
+
</fields>
|
59
|
+
|
60
|
+
<types>
|
61
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
62
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
63
|
+
|
64
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
65
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
66
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
67
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
68
|
+
|
69
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z.
|
70
|
+
The trailing "Z" designates UTC time and is mandatory.
|
71
|
+
A Trie based date field for faster date range queries and date faceting. -->
|
72
|
+
<fieldType name="date" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
73
|
+
|
74
|
+
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
75
|
+
<fieldtype name="binary" class="solr.BinaryField"/>
|
76
|
+
|
77
|
+
<!-- A text field with defaults appropriate for English: it
|
78
|
+
tokenizes with StandardTokenizer, removes English stop words
|
79
|
+
(lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
|
80
|
+
finally applies Porter's stemming. The query time analyzer
|
81
|
+
also applies synonyms from synonyms.txt. -->
|
82
|
+
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
83
|
+
<analyzer type="index">
|
84
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
85
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
86
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
87
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
88
|
+
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
89
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
90
|
+
</analyzer>
|
91
|
+
<analyzer type="query">
|
92
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
93
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
94
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
95
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
96
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
97
|
+
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
98
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
99
|
+
</analyzer>
|
100
|
+
</fieldType>
|
101
|
+
|
102
|
+
<!-- for alpha sorting as a single token -->
|
103
|
+
<fieldType name="text_sort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
104
|
+
<analyzer>
|
105
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
106
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
107
|
+
<filter class="solr.TrimFilterFactory" />
|
108
|
+
<filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z0-9 ])" replacement="" replace="all"/>
|
109
|
+
</analyzer>
|
110
|
+
</fieldType>
|
111
|
+
|
112
|
+
<!-- Spatial field types -->
|
113
|
+
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_d"/>
|
114
|
+
|
115
|
+
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
116
|
+
distErrPct="0.025"
|
117
|
+
maxDistErr="0.000009"
|
118
|
+
units="degrees"
|
119
|
+
/>
|
120
|
+
|
121
|
+
<!-- JTS-enabled spatial predicates; requires JTS installation -->
|
122
|
+
<fieldType name="location_jts" class="solr.SpatialRecursivePrefixTreeFieldType"
|
123
|
+
spatialContextFactory="com.spatial4j.core.context.jts.JtsSpatialContextFactory"
|
124
|
+
distErrPct="0.025"
|
125
|
+
maxDistErr="0.000009"
|
126
|
+
units="degrees"
|
127
|
+
/>
|
128
|
+
</types>
|
129
|
+
|
130
|
+
<!-- for scoring formula -->
|
131
|
+
<copyField source="dct_spatial_sm" dest="dct_spatial_tmi" maxChars="10000"/>
|
132
|
+
<copyField source="dct_temporal_sm" dest="dct_temporal_tmi" maxChars="10000"/>
|
133
|
+
<copyField source="dc_creator_sm" dest="dc_creator_tmi" maxChars="1000"/>
|
134
|
+
<copyField source="dc_description_s" dest="dc_description_ti" maxChars="10000"/>
|
135
|
+
<copyField source="dc_format_s" dest="dc_format_ti" maxChars="100"/>
|
136
|
+
<copyField source="dc_identifier_s" dest="dc_identifier_ti" maxChars="100"/>
|
137
|
+
<copyField source="dc_publisher_s" dest="dc_publisher_ti" maxChars="1000"/>
|
138
|
+
<copyField source="dc_rights_s" dest="dc_rights_ti" maxChars="100"/>
|
139
|
+
<copyField source="dct_provenance_s" dest="dct_provenance_ti" maxChars="1000"/>
|
140
|
+
<copyField source="dc_subject_sm" dest="dc_subject_tmi" maxChars="10000"/>
|
141
|
+
<copyField source="dc_title_s" dest="dc_title_ti" maxChars="1000"/>
|
142
|
+
<copyField source="layer_collection_s" dest="layer_collection_ti" maxChars="1000"/>
|
143
|
+
<copyField source="layer_geom_type_s" dest="layer_geom_type_ti" maxChars="100"/>
|
144
|
+
<copyField source="layer_slug_s" dest="layer_slug_ti" maxChars="100"/>
|
145
|
+
|
146
|
+
<!-- core text search -->
|
147
|
+
<copyField source="*_ti" dest="text" />
|
148
|
+
<copyField source="*_tmi" dest="text" />
|
149
|
+
|
150
|
+
<!-- for sorting text fields -->
|
151
|
+
<copyField source="dc_title_s" dest="dc_title_sort"/>
|
152
|
+
<copyField source="dc_publisher_s" dest="dc_publisher_sort"/>
|
153
|
+
<copyField source="layer_collection_s" dest="layer_collection_sort"/>
|
154
|
+
|
155
|
+
</schema>
|
156
|
+
|
@@ -0,0 +1,161 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
<!--
|
19
|
+
For more details about configurations options that may appear in
|
20
|
+
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
21
|
+
-->
|
22
|
+
<config>
|
23
|
+
<luceneMatchVersion>LUCENE_CURRENT</luceneMatchVersion>
|
24
|
+
<dataDir>${solr.data.dir:}</dataDir>
|
25
|
+
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
26
|
+
<codecFactory class="solr.SchemaCodecFactory"/>
|
27
|
+
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
28
|
+
<indexConfig>
|
29
|
+
<lockType>${solr.lock.type:native}</lockType>
|
30
|
+
</indexConfig>
|
31
|
+
|
32
|
+
<!-- The default high-performance update handler -->
|
33
|
+
<updateHandler class="solr.DirectUpdateHandler2">
|
34
|
+
<updateLog>
|
35
|
+
<str name="dir">${solr.ulog.dir:}</str>
|
36
|
+
</updateLog>
|
37
|
+
<autoCommit>
|
38
|
+
<maxTime>15000</maxTime>
|
39
|
+
<openSearcher>false</openSearcher>
|
40
|
+
</autoCommit>
|
41
|
+
</updateHandler>
|
42
|
+
|
43
|
+
<!-- realtime get handler, guaranteed to return the latest stored fields
|
44
|
+
of any document, without the need to commit or open a new searcher. The current
|
45
|
+
implementation relies on the updateLog feature being enabled. -->
|
46
|
+
<requestHandler name="/get" class="solr.RealTimeGetHandler">
|
47
|
+
<lst name="defaults">
|
48
|
+
<str name="omitHeader">true</str>
|
49
|
+
</lst>
|
50
|
+
</requestHandler>
|
51
|
+
|
52
|
+
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
|
53
|
+
|
54
|
+
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
55
|
+
Query section - these settings control query time things like caches
|
56
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
57
|
+
<query>
|
58
|
+
<maxBooleanClauses>1024</maxBooleanClauses>
|
59
|
+
<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
60
|
+
<queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
61
|
+
<documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
|
62
|
+
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
63
|
+
<queryResultWindowSize>20</queryResultWindowSize>
|
64
|
+
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
65
|
+
<listener event="newSearcher" class="solr.QuerySenderListener">
|
66
|
+
<arr name="queries">
|
67
|
+
<lst><str name="q">stanford</str></lst>
|
68
|
+
<lst><str name="q">polygon</str></lst>
|
69
|
+
</arr>
|
70
|
+
</listener>
|
71
|
+
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
72
|
+
<arr name="queries">
|
73
|
+
<lst>
|
74
|
+
<str name="q">static firstSearcher warming in solrconfig.xml</str>
|
75
|
+
</lst>
|
76
|
+
</arr>
|
77
|
+
</listener>
|
78
|
+
<useColdSearcher>false</useColdSearcher>
|
79
|
+
<maxWarmingSearchers>2</maxWarmingSearchers>
|
80
|
+
</query>
|
81
|
+
|
82
|
+
<requestDispatcher handleSelect="false">
|
83
|
+
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"/>
|
84
|
+
<httpCaching never304="true"/>
|
85
|
+
</requestDispatcher>
|
86
|
+
|
87
|
+
<requestHandler name="/search" class="solr.SearchHandler"/>
|
88
|
+
|
89
|
+
<requestHandler name="/select" class="solr.SearchHandler">
|
90
|
+
<lst name="defaults">
|
91
|
+
<int name="start">0</int>
|
92
|
+
<int name="rows">10</int>
|
93
|
+
<str name="wt">json</str>
|
94
|
+
<int name="indent">2</int>
|
95
|
+
<str name="defType">edismax</str>
|
96
|
+
<str name="echoParams">all</str>
|
97
|
+
<str name="fl">*,score</str>
|
98
|
+
<str name="sort">score desc, dc_title_sort asc</str>
|
99
|
+
<str name="q.alt">*:*</str>
|
100
|
+
<str name="qf">
|
101
|
+
text^1
|
102
|
+
dc_description_ti^2
|
103
|
+
dc_creator_ti^3
|
104
|
+
dc_publisher_ti^3
|
105
|
+
layer_collection_ti^4
|
106
|
+
dc_subject_tmi^5
|
107
|
+
dct_spatial_tmi^5
|
108
|
+
dct_temporal_tmi^5
|
109
|
+
dc_title_ti^6
|
110
|
+
dc_rights_ti^7
|
111
|
+
dct_provenance_ti^8
|
112
|
+
layer_geom_type_ti^9
|
113
|
+
layer_slug_ti^10
|
114
|
+
dc_identifier_ti^10
|
115
|
+
</str>
|
116
|
+
<bool name="facet">true</bool>
|
117
|
+
<int name="facet.mincount">1</int>
|
118
|
+
<int name="facet.limit">10</int>
|
119
|
+
<str name="facet.field">dct_spatial_sm</str>
|
120
|
+
<str name="facet.field">dc_creator_s</str>
|
121
|
+
<str name="facet.field">dc_format_s</str>
|
122
|
+
<str name="facet.field">dc_language_s</str>
|
123
|
+
<str name="facet.field">dc_publisher_s</str>
|
124
|
+
<str name="facet.field">dc_rights_s</str>
|
125
|
+
<str name="facet.field">dct_provenance_s</str>
|
126
|
+
<str name="facet.field">dc_subject_sm</str>
|
127
|
+
<str name="facet.field">layer_collection_s</str>
|
128
|
+
<str name="facet.field">layer_geom_type_s</str>
|
129
|
+
<str name="facet.field">layer_srs_s</str>
|
130
|
+
<str name="facet.field">layer_year_i</str>
|
131
|
+
</lst>
|
132
|
+
</requestHandler>
|
133
|
+
|
134
|
+
<requestHandler name="/update" class="solr.UpdateRequestHandler"/>
|
135
|
+
<requestHandler name="/admin/" class="solr.admin.AdminHandlers"/>
|
136
|
+
|
137
|
+
<!-- ping/healthcheck -->
|
138
|
+
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
|
139
|
+
<lst name="invariants">
|
140
|
+
<str name="q">solrpingquery</str>
|
141
|
+
</lst>
|
142
|
+
<lst name="defaults">
|
143
|
+
<str name="echoParams">all</str>
|
144
|
+
</lst>
|
145
|
+
<!-- An optional feature of the PingRequestHandler is to configure the
|
146
|
+
handler with a "healthcheckFile" which can be used to enable/disable
|
147
|
+
the PingRequestHandler.
|
148
|
+
relative paths are resolved against the data dir
|
149
|
+
-->
|
150
|
+
<str name="healthcheckFile">server-enabled.txt</str>
|
151
|
+
</requestHandler>
|
152
|
+
|
153
|
+
<requestHandler name="/analysis/field"
|
154
|
+
startup="lazy"
|
155
|
+
class="solr.FieldAnalysisRequestHandler" />
|
156
|
+
|
157
|
+
<!-- Legacy config for the admin interface -->
|
158
|
+
<admin>
|
159
|
+
<defaultQuery>*:*</defaultQuery>
|
160
|
+
</admin>
|
161
|
+
</config>
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
#some test synonym mappings unlikely to appear in real input text
|
15
|
+
aaafoo => aaabar
|
16
|
+
bbbfoo => bbbfoo bbbbar
|
17
|
+
cccfoo => cccbar cccbaz
|
18
|
+
fooaaa,baraaa,bazaaa
|
19
|
+
|
20
|
+
# Some synonym groups specific to this example
|
21
|
+
GB,gib,gigabyte,gigabytes
|
22
|
+
MB,mib,megabyte,megabytes
|
23
|
+
Television, Televisions, TV, TVs
|
24
|
+
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
25
|
+
#after us won't split it into two words.
|
26
|
+
|
27
|
+
# Synonym mappings can be used for spelling correction too
|
28
|
+
pixima => pixma
|
29
|
+
|