search_solr_tools 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +88 -0
- data/COPYING +674 -0
- data/README.md +203 -0
- data/bin/search_solr_tools +87 -0
- data/lib/search_solr_tools.rb +8 -0
- data/lib/search_solr_tools/config/environments.rb +12 -0
- data/lib/search_solr_tools/config/environments.yaml +73 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
- data/lib/search_solr_tools/harvesters/base.rb +183 -0
- data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
- data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
- data/lib/search_solr_tools/harvesters/echo.rb +50 -0
- data/lib/search_solr_tools/harvesters/eol.rb +53 -0
- data/lib/search_solr_tools/harvesters/ices.rb +55 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
- data/lib/search_solr_tools/harvesters/oai.rb +59 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
- data/lib/search_solr_tools/harvesters/rda.rb +33 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
- data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
- data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
- data/lib/search_solr_tools/helpers/selectors.rb +20 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
- data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
- data/lib/search_solr_tools/selectors/cisl.rb +112 -0
- data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/nmi.rb +106 -0
- data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
- data/lib/search_solr_tools/selectors/rda.rb +106 -0
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
- data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
- data/lib/search_solr_tools/version.rb +3 -0
- data/search_solr_tools.gemspec +45 -0
- metadata +345 -0
data/README.md
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/nsidc/search-solr-tools.svg?branch=master)](https://travis-ci.org/nsidc/search-solr-tools)
|
2
|
+
|
3
|
+
# NSIDC Search Solr Tools
|
4
|
+
|
5
|
+
This is a gem that contains:
|
6
|
+
|
7
|
+
* Ruby translators to transform various metadata feeds into solr documents
|
8
|
+
* A command-line utility to access/utilize the gem's translators to harvest
|
9
|
+
metadata into a working solr instance.
|
10
|
+
|
11
|
+
## Using the project
|
12
|
+
|
13
|
+
### Standard Installation
|
14
|
+
|
15
|
+
The gem is available through [RubyGems](https://rubygems.org/). To install the
|
16
|
+
gem, ensure all requirements below are met and run (providing the appropriate
|
17
|
+
version):
|
18
|
+
|
19
|
+
`sudo gem install search_solr_tools -v $VERSION`
|
20
|
+
|
21
|
+
### Custom Deployment
|
22
|
+
|
23
|
+
Clone the repository, and install all requirements as noted below.
|
24
|
+
|
25
|
+
#### Configuration
|
26
|
+
|
27
|
+
Once you have the code and requirements, edit the configuration file in
|
28
|
+
`lib/search_solr_tools/config/environments.yaml` to match your environment. The
|
29
|
+
configuration values are set by environment for each harvester (or specified in
|
30
|
+
the `common` settings list), with the environment overriding `common` if a
|
31
|
+
different setting is specified for a given environment.
|
32
|
+
|
33
|
+
Each harvester has its own configuration settings. Most are the target endpoint;
|
34
|
+
EOL, however, has a list of THREDDS project endpoints and NSIDC has its own
|
35
|
+
oai/metadata endpoint settings.
|
36
|
+
|
37
|
+
Most users should not need to change the harvester configuration unless they
|
38
|
+
establish a local test node, or if a provider changes available endpoints;
|
39
|
+
however, the `host` option for each environment must specify the configured SOLR
|
40
|
+
instance you intend to use these tools with.
|
41
|
+
|
42
|
+
#### Build and Install Gem
|
43
|
+
|
44
|
+
Then run:
|
45
|
+
|
46
|
+
`bundle exec gem build ./search_solr_tools.gemspec`
|
47
|
+
|
48
|
+
Once you have the gem built in the project directory, install the utility:
|
49
|
+
|
50
|
+
`gem install --local ./search_solr_tools-version.gem`
|
51
|
+
|
52
|
+
## Working on the Project
|
53
|
+
|
54
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
55
|
+
2. Stage your changes (`git add`)
|
56
|
+
3. Commit your Rubocop compliant and test-passing changes with a
|
57
|
+
[good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
|
58
|
+
(`git commit`)
|
59
|
+
4. Push to the branch (`git push -u origin my-new-feature`)
|
60
|
+
5. Create a new Pull Request
|
61
|
+
|
62
|
+
### Requirements
|
63
|
+
|
64
|
+
* Ruby > 2.0.0
|
65
|
+
* [Bundler](http://bundler.io/)
|
66
|
+
* Requirements for nokogiri:
|
67
|
+
* [libxml2/libxml2-dev](http://xmlsoft.org/)
|
68
|
+
* [zlibc](http://www.zlibc.linux.lu/)
|
69
|
+
* [zlib1g/zlib1g-dev](http://zlib.net/)
|
70
|
+
* Dependency build requirements:
|
71
|
+
* For Ubuntu/Debian, install the build-essential package.
|
72
|
+
* On the latest Fedora release installing the following will get you all of the requirements:
|
73
|
+
|
74
|
+
`yum groupinstall 'Development Tools'`
|
75
|
+
|
76
|
+
`yum install gcc-c++`
|
77
|
+
|
78
|
+
*Please note*: If you are having difficulty installing Nokogiri please review the
|
79
|
+
Nokogiri [installation tutorial](http://www.nokogiri.org/tutorials/installing_nokogiri.html)
|
80
|
+
|
81
|
+
* All gems installed (preferably using bundler: `bundle install`)
|
82
|
+
* A running, configured SOLR instance to accept data harvests.
|
83
|
+
|
84
|
+
### RuboCop
|
85
|
+
|
86
|
+
The style checker [RuboCop](https://github.com/bbatsov/rubocop) can be run with
|
87
|
+
`rubocop` or `bundle exec rake guard:rubocop`. The rake task will also watch for
|
88
|
+
ruby files (.rb, .rake, Gemfile, Guardfile, Rakefile) to be changed, and run
|
89
|
+
RuboCop on the changed files.
|
90
|
+
|
91
|
+
`bundle exec rake guard` will automatically run the unit tests and RuboCop in
|
92
|
+
one terminal window.
|
93
|
+
|
94
|
+
RuboCop can be configured by modifying `.rubocop.yml`.
|
95
|
+
|
96
|
+
Pushing with failing tests or RuboCop violations will cause the Jenkins build to
|
97
|
+
break. Jenkins jobs to build and deploy this project are named
|
98
|
+
"NSIDC_Search_SOLR_()…" and can be viewed under the
|
99
|
+
[NSIDC Search tab](https://scm.nsidc.org/jenkins/view/NSIDC%20Search/).
|
100
|
+
|
101
|
+
### Testing
|
102
|
+
|
103
|
+
Unit tests can be run with `rspec`, `bundle exec rake spec:unit`, or `bundle
|
104
|
+
exec rake guard:specs`. Running the rake guard task will also automatically run
|
105
|
+
the tests whenever the appropriate files are changed.
|
106
|
+
|
107
|
+
Please be sure to run them in the `bundle exec` context if you're utilizing bundler.
|
108
|
+
|
109
|
+
### Creating Releases (NSIDC devs only)
|
110
|
+
|
111
|
+
Requirements:
|
112
|
+
|
113
|
+
* Ruby > 2.0.0
|
114
|
+
* [Bundler](http://bundler.io/)
|
115
|
+
* [Gem Release](https://github.com/svenfuchs/gem-release)
|
116
|
+
* [Rake](https://github.com/ruby/rake)
|
117
|
+
* a [RubyGems](https://rubygems.org) account that has
|
118
|
+
[ownership](http://guides.rubygems.org/publishing/) of the gem
|
119
|
+
* RuboCop and the unit tests should all pass (`rake`)
|
120
|
+
|
121
|
+
**gem release** is used by rake tasks in this project to handle version changes,
|
122
|
+
tagging, and publishing to RubyGems.
|
123
|
+
|
124
|
+
| Command | Description |
|
125
|
+
|---------------------------|-------------|
|
126
|
+
| `rake release:pre[false]` | Increase the current prerelease version number, push changes |
|
127
|
+
| `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
|
128
|
+
| `rake release:none` | Drop the prerelease version, publish release, then `pre[false]` |
|
129
|
+
| `rake release:minor` | Increase the minor version number, publish release, then `pre[false]` |
|
130
|
+
| `rake release:major` | Increase the major version number, publish release, then `pre[false]` |
|
131
|
+
|
132
|
+
\*"publish release" means each of the following occurs:
|
133
|
+
|
134
|
+
* a new tag is created
|
135
|
+
* the changes are pushed
|
136
|
+
* the tagged version is built and published to RubyGems
|
137
|
+
|
138
|
+
### SOLR:
|
139
|
+
|
140
|
+
To harvest data utilizing the gem, you will need a local configured instance of
|
141
|
+
Solr 4.3, which can be downloaded from
|
142
|
+
[Apache's archive](https://archive.apache.org/dist/lucene/solr/4.3.0/).
|
143
|
+
|
144
|
+
#### NSIDC
|
145
|
+
|
146
|
+
At NSIDC the development VM can be provisioned with the
|
147
|
+
[solr puppet module](https://bitbucket.org/nsidc/puppet-solr/) to install and
|
148
|
+
configure Solr.
|
149
|
+
|
150
|
+
#### Non-NSIDC
|
151
|
+
|
152
|
+
Outside of NSIDC, setup solr using the instructions found in the
|
153
|
+
[search-solr](https://github.com/nsidc/search-solr) project.
|
154
|
+
|
155
|
+
### Harvesting Data
|
156
|
+
|
157
|
+
The harvester requires additional metadata from services that may not yet be
|
158
|
+
publicly available, which are referenced in
|
159
|
+
`lib/search_solr_tools/config/environments.yaml`.
|
160
|
+
|
161
|
+
To utilize the gem, build and install the **search_solr_tools** gem. This will
|
162
|
+
add an executable `search_solr_tools` to the path (source is in
|
163
|
+
`bin/search_solr_tools`). The executable is self-documenting; for a brief
|
164
|
+
overview of what's available, simply run `search_solr_tools`.
|
165
|
+
|
166
|
+
Harvesting of data can be done using the `harvest` task, giving it a list of
|
167
|
+
harvesters and an environment. Deletion is possible via the `delete_all` and/or
|
168
|
+
`delete_by_data_center'`tasks. `list harvesters` will list the valid harvest
|
169
|
+
targets.
|
170
|
+
|
171
|
+
In addition to feed URLs, `environments.yaml` also defines various environments
|
172
|
+
which can be modified, or additional environments can be added by just adding a
|
173
|
+
new YAML stanza with the right keys; this new environment can then be used with
|
174
|
+
the `--environment` flag when running `search_solr_tools harvest`.
|
175
|
+
|
176
|
+
## Organization Info
|
177
|
+
|
178
|
+
### How to contact NSIDC
|
179
|
+
|
180
|
+
User Services and general information:
|
181
|
+
Support: [http://support.nsidc.org](http://support.nsidc.org)
|
182
|
+
Email: nsidc@nsidc.org
|
183
|
+
|
184
|
+
Phone: +1 303.492.6199
|
185
|
+
Fax: +1 303.492.2468
|
186
|
+
|
187
|
+
Mailing address:
|
188
|
+
National Snow and Ice Data Center
|
189
|
+
CIRES, 449 UCB
|
190
|
+
University of Colorado
|
191
|
+
Boulder, CO 80309-0449 USA
|
192
|
+
|
193
|
+
### License
|
194
|
+
|
195
|
+
Every file in this repository is covered by the GNU GPL Version 3; a copy of the
|
196
|
+
license is included in the file COPYING.
|
197
|
+
|
198
|
+
### Citation Information
|
199
|
+
|
200
|
+
Andy Grauch, Brendan Billingsley, Chris Chalstrom, Danielle Harper, Ian
|
201
|
+
Truslove, Jonathan Kovarik, Luis Lopez, Miao Liu, Michael Brandt, Stuart Reed
|
202
|
+
(2013): Arctic Data Explorer SOLR Search software tools. The National Snow and
|
203
|
+
Ice Data Center. Software. http://ezid.cdlib.org/id/doi:10.7265/N5JQ0XZM
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'search_solr_tools'
|
3
|
+
require 'thor'
|
4
|
+
|
5
|
+
class SolrHarvestCLI < Thor
|
6
|
+
desc 'harvest', 'Harvest from one of the ADE harvesters'
|
7
|
+
option :data_center, type: :array, required: true
|
8
|
+
option :environment, required: true
|
9
|
+
option :die_on_failure, type: :boolean
|
10
|
+
|
11
|
+
def harvest(die_on_failure = options[:die_on_failure] || false)
|
12
|
+
options[:data_center].each do |target|
|
13
|
+
puts target
|
14
|
+
begin
|
15
|
+
harvest_class = get_harvester_class(target)
|
16
|
+
harvester = harvest_class.new options[:environment], die_on_failure
|
17
|
+
harvester.harvest_and_delete
|
18
|
+
rescue => e
|
19
|
+
puts "harvest failed for #{target}: #{e.message}"
|
20
|
+
raise e if die_on_failure
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
desc 'list_harvesters', 'List all harvesters'
|
26
|
+
def list_harvesters
|
27
|
+
puts harvester_map.keys
|
28
|
+
end
|
29
|
+
|
30
|
+
desc 'delete_all', 'Delete all documents from the index'
|
31
|
+
option :environment, required: true
|
32
|
+
def delete_all
|
33
|
+
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
34
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
35
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
36
|
+
end
|
37
|
+
|
38
|
+
desc 'delete_all_auto_suggest', 'Delete all documents from the auto_suggest index'
|
39
|
+
option :environment, required: true
|
40
|
+
def delete_all_auto_suggest
|
41
|
+
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
42
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
43
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
44
|
+
end
|
45
|
+
|
46
|
+
desc 'delete_by_data_center', 'Force deletion of documents for a specific data center with timestamps before the passed timestamp in format iso8601 (2014-07-14T21:49:21Z)'
|
47
|
+
option :timestamp, required: true
|
48
|
+
option :environment, required: true
|
49
|
+
option :data_center, required: true
|
50
|
+
def delete_by_data_center
|
51
|
+
harvester = get_harvester_class(options[:data_center]).new options[:environment]
|
52
|
+
harvester.delete_old_documents(options[:timestamp],
|
53
|
+
"data_centers:\"#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[options[:data_center].upcase.to_sym][:long_name]}\"",
|
54
|
+
SearchSolrTools::SolrEnvironments[harvester.environment][:collection_name],
|
55
|
+
true
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
no_tasks do
|
60
|
+
def harvester_map
|
61
|
+
{
|
62
|
+
'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
|
63
|
+
'cisl' => SearchSolrTools::Harvesters::Cisl,
|
64
|
+
'echo' => SearchSolrTools::Harvesters::Echo,
|
65
|
+
'eol' => SearchSolrTools::Harvesters::Eol,
|
66
|
+
'ices' => SearchSolrTools::Harvesters::Ices,
|
67
|
+
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
68
|
+
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
69
|
+
'rda' => SearchSolrTools::Harvesters::Rda,
|
70
|
+
'usgs' => SearchSolrTools::Harvesters::Usgs,
|
71
|
+
'tdar' => SearchSolrTools::Harvesters::Tdar,
|
72
|
+
'pdc' => SearchSolrTools::Harvesters::Pdc,
|
73
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
74
|
+
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
|
75
|
+
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_harvester_class(data_center_name)
|
80
|
+
name = data_center_name.downcase.to_s
|
81
|
+
fail("Invalid data center #{name}") unless harvester_map.key?(name)
|
82
|
+
|
83
|
+
harvester_map[name]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
SolrHarvestCLI.start(ARGV)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'require_all'
|
2
|
+
require_relative './search_solr_tools/config/environments'
|
3
|
+
require_relative './search_solr_tools/version'
|
4
|
+
|
5
|
+
require_rel './search_solr_tools/helpers'
|
6
|
+
require_rel './search_solr_tools/selectors'
|
7
|
+
require_rel './search_solr_tools/harvesters'
|
8
|
+
require_rel './search_solr_tools/translators'
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
|
+
module SolrEnvironments
|
6
|
+
YAML_ENVS = YAML.load_file(File.expand_path('../environments.yaml', __FILE__))
|
7
|
+
|
8
|
+
def self.[](env = :development)
|
9
|
+
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
:common:
|
2
|
+
:auto_suggest_collection_name: auto_suggest
|
3
|
+
:collection_name: nsidc_oai
|
4
|
+
:collection_path: solr
|
5
|
+
:port: 8983
|
6
|
+
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
|
+
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
|
+
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
9
|
+
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
10
|
+
:nmi_url: http://access.met.no/metamod/oai
|
11
|
+
:nodc_url: http://data.nodc.noaa.gov/geoportal/csw
|
12
|
+
:pdc_url: http://www.polardata.ca/oai/provider
|
13
|
+
:rda_url: http://rda.ucar.edu/cgi-bin/oai
|
14
|
+
:tdar_url: http://core.tdar.org/search/rss
|
15
|
+
:usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
|
16
|
+
:eol:
|
17
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
|
18
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
|
19
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
|
20
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
|
21
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
|
22
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
|
23
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
|
24
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
|
25
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
|
26
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
|
27
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
|
28
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
|
29
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
|
30
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
|
31
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
|
32
|
+
|
33
|
+
:local:
|
34
|
+
:host: localhost
|
35
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
36
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
37
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
38
|
+
|
39
|
+
:dev:
|
40
|
+
host: dev.search-solr.apps.int.nsidc.org
|
41
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
42
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
43
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
44
|
+
|
45
|
+
:integration:
|
46
|
+
:host: integration.search-solr.apps.int.nsidc.org
|
47
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
48
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
49
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
50
|
+
|
51
|
+
:qa:
|
52
|
+
:host: qa.search-solr.apps.int.nsidc.org
|
53
|
+
:nsidc_dataset_metadata_url: http://qa.nsidc.org/api/dataset/metadata/
|
54
|
+
:nsidc_oai_identifiers_url: http://qa.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
55
|
+
:oai_url: http://brash.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
56
|
+
|
57
|
+
:staging:
|
58
|
+
:host: staging.search-solr.apps.int.nsidc.org
|
59
|
+
:nsidc_dataset_metadata_url: http://staging.nsidc.org/api/dataset/metadata/
|
60
|
+
:nsidc_oai_identifiers_url: http://staging.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
61
|
+
:oai_url: http://freeze.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
62
|
+
|
63
|
+
:blue:
|
64
|
+
:host: blue.search-solr.apps.int.nsidc.org
|
65
|
+
:nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
|
66
|
+
:nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
67
|
+
:oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
68
|
+
|
69
|
+
:production:
|
70
|
+
:host: search-solr.apps.int.nsidc.org
|
71
|
+
:nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
|
72
|
+
:nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
73
|
+
:oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
class AdeAutoSuggest < AutoSuggest
|
4
|
+
def harvest_and_delete
|
5
|
+
puts 'Building auto-suggest indexes for ADE'
|
6
|
+
super(method(:harvest), "source:\"ADE\"", @env_settings[:auto_suggest_collection_name])
|
7
|
+
end
|
8
|
+
|
9
|
+
def harvest
|
10
|
+
url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
|
11
|
+
super url, fields
|
12
|
+
end
|
13
|
+
|
14
|
+
def fields
|
15
|
+
{ 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
16
|
+
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def split_creator(value, count, field_weight, source, split_regex)
|
21
|
+
add_docs = []
|
22
|
+
value.downcase.split(split_regex).each do |v|
|
23
|
+
v = v.strip.chomp('/')
|
24
|
+
add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
|
25
|
+
end
|
26
|
+
add_docs
|
27
|
+
end
|
28
|
+
|
29
|
+
def keyword_creator(value, count, field_weight, source)
|
30
|
+
split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
|
31
|
+
end
|
32
|
+
|
33
|
+
def author_creator(value, count, field_weight, source)
|
34
|
+
split_creator value, count, field_weight, source, %r{/;/}
|
35
|
+
end
|
36
|
+
|
37
|
+
def ade_length_limit_creator(value, count, field_weight, source)
|
38
|
+
return [] if value.length > 80
|
39
|
+
standard_add_creator value, count, field_weight, source
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module SearchSolrTools
|
5
|
+
module Harvesters
|
6
|
+
# Use the nsidc_oai core to populate the auto_suggest core
|
7
|
+
class AutoSuggest < Base
|
8
|
+
def initialize(env = 'development', die_on_failure = false)
|
9
|
+
super env, die_on_failure
|
10
|
+
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def harvest(url, fields)
|
16
|
+
facet_response = fetch_auto_suggest_facet_data(url, fields)
|
17
|
+
add_docs = generate_add_hashes(facet_response, fields)
|
18
|
+
add_documents_to_solr(add_docs)
|
19
|
+
end
|
20
|
+
|
21
|
+
def standard_add_creator(value, count, field_weight, source)
|
22
|
+
count_weight = count <= 1 ? 0.4 : Math.log(count)
|
23
|
+
weight = field_weight * count_weight
|
24
|
+
[{ 'id' => "#{source}:#{value}", 'text_suggest' => value, 'source' => source, 'weight' => weight }]
|
25
|
+
end
|
26
|
+
|
27
|
+
def fetch_auto_suggest_facet_data(url, fields)
|
28
|
+
fields.each do |name, _config|
|
29
|
+
url += "&facet.field=#{name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
serialized_facet_response = RestClient.get url
|
33
|
+
JSON.parse(serialized_facet_response)
|
34
|
+
end
|
35
|
+
|
36
|
+
def generate_add_hashes(facet_response, fields)
|
37
|
+
add_docs = []
|
38
|
+
facet_response['facet_counts']['facet_fields'].each do |facet_name, facet_values|
|
39
|
+
facet_values.each_slice(2) do |facet_value|
|
40
|
+
new_docs = fields[facet_name][:creator].call(facet_value[0], facet_value[1], fields[facet_name][:weight], fields[facet_name][:source])
|
41
|
+
add_docs.concat(new_docs)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
add_docs
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_documents_to_solr(add_docs)
|
48
|
+
if insert_solr_doc add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
49
|
+
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
50
|
+
else
|
51
|
+
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
52
|
+
new_add_docs = []
|
53
|
+
add_docs.each do |doc|
|
54
|
+
new_add_docs << { 'add' => { 'doc' => doc } }
|
55
|
+
end
|
56
|
+
insert_solr_docs new_add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|