search_solr_tools 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +88 -0
- data/COPYING +674 -0
- data/README.md +203 -0
- data/bin/search_solr_tools +87 -0
- data/lib/search_solr_tools.rb +8 -0
- data/lib/search_solr_tools/config/environments.rb +12 -0
- data/lib/search_solr_tools/config/environments.yaml +73 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
- data/lib/search_solr_tools/harvesters/base.rb +183 -0
- data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
- data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
- data/lib/search_solr_tools/harvesters/echo.rb +50 -0
- data/lib/search_solr_tools/harvesters/eol.rb +53 -0
- data/lib/search_solr_tools/harvesters/ices.rb +55 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
- data/lib/search_solr_tools/harvesters/oai.rb +59 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
- data/lib/search_solr_tools/harvesters/rda.rb +33 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
- data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
- data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
- data/lib/search_solr_tools/helpers/selectors.rb +20 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
- data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
- data/lib/search_solr_tools/selectors/cisl.rb +112 -0
- data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/nmi.rb +106 -0
- data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
- data/lib/search_solr_tools/selectors/rda.rb +106 -0
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
- data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
- data/lib/search_solr_tools/version.rb +3 -0
- data/search_solr_tools.gemspec +45 -0
- metadata +345 -0
data/README.md
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
[](https://travis-ci.org/nsidc/search-solr-tools)
|
2
|
+
|
3
|
+
# NSIDC Search Solr Tools
|
4
|
+
|
5
|
+
This is a gem that contains:
|
6
|
+
|
7
|
+
* Ruby translators to transform various metadata feeds into solr documents
|
8
|
+
* A command-line utility to access/utilize the gem's translators to harvest
|
9
|
+
metadata into a working solr instance.
|
10
|
+
|
11
|
+
## Using the project
|
12
|
+
|
13
|
+
### Standard Installation
|
14
|
+
|
15
|
+
The gem is available through [RubyGems](https://rubygems.org/). To install the
|
16
|
+
gem, ensure all requirements below are met and run (providing the appropriate
|
17
|
+
version):
|
18
|
+
|
19
|
+
`sudo gem install search_solr_tools -v $VERSION`
|
20
|
+
|
21
|
+
### Custom Deployment
|
22
|
+
|
23
|
+
Clone the repository, and install all requirements as noted below.
|
24
|
+
|
25
|
+
#### Configuration
|
26
|
+
|
27
|
+
Once you have the code and requirements, edit the configuration file in
|
28
|
+
`lib/search_solr_tools/config/environments.yaml` to match your environment. The
|
29
|
+
configuration values are set by environment for each harvester (or specified in
|
30
|
+
the `common` settings list), with the environment overriding `common` if a
|
31
|
+
different setting is specified for a given environment.
|
32
|
+
|
33
|
+
Each harvester has its own configuration settings. Most are the target endpoint;
|
34
|
+
EOL, however, has a list of THREDDS project endpoints and NSIDC has its own
|
35
|
+
oai/metadata endpoint settings.
|
36
|
+
|
37
|
+
Most users should not need to change the harvester configuration unless they
|
38
|
+
establish a local test node, or if a provider changes available endpoints;
|
39
|
+
however, the `host` option for each environment must specify the configured SOLR
|
40
|
+
instance you intend to use these tools with.
|
41
|
+
|
42
|
+
#### Build and Install Gem
|
43
|
+
|
44
|
+
Then run:
|
45
|
+
|
46
|
+
`bundle exec gem build ./search_solr_tools.gemspec`
|
47
|
+
|
48
|
+
Once you have the gem built in the project directory, install the utility:
|
49
|
+
|
50
|
+
`gem install --local ./search_solr_tools-version.gem`
|
51
|
+
|
52
|
+
## Working on the Project
|
53
|
+
|
54
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
55
|
+
2. Stage your changes (`git add`)
|
56
|
+
3. Commit your Rubocop compliant and test-passing changes with a
|
57
|
+
[good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
|
58
|
+
(`git commit`)
|
59
|
+
4. Push to the branch (`git push -u origin my-new-feature`)
|
60
|
+
5. Create a new Pull Request
|
61
|
+
|
62
|
+
### Requirements
|
63
|
+
|
64
|
+
* Ruby > 2.0.0
|
65
|
+
* [Bundler](http://bundler.io/)
|
66
|
+
* Requirements for nokogiri:
|
67
|
+
* [libxml2/libxml2-dev](http://xmlsoft.org/)
|
68
|
+
* [zlibc](http://www.zlibc.linux.lu/)
|
69
|
+
* [zlib1g/zlib1g-dev](http://zlib.net/)
|
70
|
+
* Dependency build requirements:
|
71
|
+
* For Ubuntu/Debian, install the build-essential package.
|
72
|
+
* On the latest Fedora release installing the following will get you all of the requirements:
|
73
|
+
|
74
|
+
`yum groupinstall 'Development Tools'`
|
75
|
+
|
76
|
+
`yum install gcc-c++`
|
77
|
+
|
78
|
+
*Please note*: If you are having difficulty installing Nokogiri please review the
|
79
|
+
Nokogiri [installation tutorial](http://www.nokogiri.org/tutorials/installing_nokogiri.html)
|
80
|
+
|
81
|
+
* All gems installed (preferably using bundler: `bundle install`)
|
82
|
+
* A running, configured SOLR instance to accept data harvests.
|
83
|
+
|
84
|
+
### RuboCop
|
85
|
+
|
86
|
+
The style checker [RuboCop](https://github.com/bbatsov/rubocop) can be run with
|
87
|
+
`rubocop` or `bundle exec rake guard:rubocop`. The rake task will also watch for
|
88
|
+
ruby files (.rb, .rake, Gemfile, Guardfile, Rakefile) to be changed, and run
|
89
|
+
RuboCop on the changed files.
|
90
|
+
|
91
|
+
`bundle exec rake guard` will automatically run the unit tests and RuboCop in
|
92
|
+
one terminal window.
|
93
|
+
|
94
|
+
RuboCop can be configured by modifying `.rubocop.yml`.
|
95
|
+
|
96
|
+
Pushing with failing tests or RuboCop violations will cause the Jenkins build to
|
97
|
+
break. Jenkins jobs to build and deploy this project are named
|
98
|
+
"NSIDC_Search_SOLR_()…" and can be viewed under the
|
99
|
+
[NSIDC Search tab](https://scm.nsidc.org/jenkins/view/NSIDC%20Search/).
|
100
|
+
|
101
|
+
### Testing
|
102
|
+
|
103
|
+
Unit tests can be run with `rspec`, `bundle exec rake spec:unit`, or `bundle
|
104
|
+
exec rake guard:specs`. Running the rake guard task will also automatically run
|
105
|
+
the tests whenever the appropriate files are changed.
|
106
|
+
|
107
|
+
Please be sure to run them in the `bundle exec` context if you're utilizing bundler.
|
108
|
+
|
109
|
+
### Creating Releases (NSIDC devs only)
|
110
|
+
|
111
|
+
Requirements:
|
112
|
+
|
113
|
+
* Ruby > 2.0.0
|
114
|
+
* [Bundler](http://bundler.io/)
|
115
|
+
* [Gem Release](https://github.com/svenfuchs/gem-release)
|
116
|
+
* [Rake](https://github.com/ruby/rake)
|
117
|
+
* a [RubyGems](https://rubygems.org) account that has
|
118
|
+
[ownership](http://guides.rubygems.org/publishing/) of the gem
|
119
|
+
* RuboCop and the unit tests should all pass (`rake`)
|
120
|
+
|
121
|
+
**gem release** is used by rake tasks in this project to handle version changes,
|
122
|
+
tagging, and publishing to RubyGems.
|
123
|
+
|
124
|
+
| Command | Description |
|
125
|
+
|---------------------------|-------------|
|
126
|
+
| `rake release:pre[false]` | Increase the current prerelease version number, push changes |
|
127
|
+
| `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
|
128
|
+
| `rake release:none` | Drop the prerelease version, publish release, then `pre[false]` |
|
129
|
+
| `rake release:minor` | Increase the minor version number, publish release, then `pre[false]` |
|
130
|
+
| `rake release:major` | Increase the major version number, publish release, then `pre[false]` |
|
131
|
+
|
132
|
+
\*"publish release" means each of the following occurs:
|
133
|
+
|
134
|
+
* a new tag is created
|
135
|
+
* the changes are pushed
|
136
|
+
* the tagged version is built and published to RubyGems
|
137
|
+
|
138
|
+
### SOLR:
|
139
|
+
|
140
|
+
To harvest data utilizing the gem, you will need a local configured instance of
|
141
|
+
Solr 4.3, which can be downloaded from
|
142
|
+
[Apache's archive](https://archive.apache.org/dist/lucene/solr/4.3.0/).
|
143
|
+
|
144
|
+
#### NSIDC
|
145
|
+
|
146
|
+
At NSIDC the development VM can be provisioned with the
|
147
|
+
[solr puppet module](https://bitbucket.org/nsidc/puppet-solr/) to install and
|
148
|
+
configure Solr.
|
149
|
+
|
150
|
+
#### Non-NSIDC
|
151
|
+
|
152
|
+
Outside of NSIDC, setup solr using the instructions found in the
|
153
|
+
[search-solr](https://github.com/nsidc/search-solr) project.
|
154
|
+
|
155
|
+
### Harvesting Data
|
156
|
+
|
157
|
+
The harvester requires additional metadata from services that may not yet be
|
158
|
+
publicly available, which are referenced in
|
159
|
+
`lib/search_solr_tools/config/environments.yaml`.
|
160
|
+
|
161
|
+
To utilize the gem, build and install the **search_solr_tools** gem. This will
|
162
|
+
add an executable `search_solr_tools` to the path (source is in
|
163
|
+
`bin/search_solr_tools`). The executable is self-documenting; for a brief
|
164
|
+
overview of what's available, simply run `search_solr_tools`.
|
165
|
+
|
166
|
+
Harvesting of data can be done using the `harvest` task, giving it a list of
|
167
|
+
harvesters and an environment. Deletion is possible via the `delete_all` and/or
|
168
|
+
`delete_by_data_center'`tasks. `list harvesters` will list the valid harvest
|
169
|
+
targets.
|
170
|
+
|
171
|
+
In addition to feed URLs, `environments.yaml` also defines various environments
|
172
|
+
which can be modified, or additional environments can be added by just adding a
|
173
|
+
new YAML stanza with the right keys; this new environment can then be used with
|
174
|
+
the `--environment` flag when running `search_solr_tools harvest`.
|
175
|
+
|
176
|
+
## Organization Info
|
177
|
+
|
178
|
+
### How to contact NSIDC
|
179
|
+
|
180
|
+
User Services and general information:
|
181
|
+
Support: [http://support.nsidc.org](http://support.nsidc.org)
|
182
|
+
Email: nsidc@nsidc.org
|
183
|
+
|
184
|
+
Phone: +1 303.492.6199
|
185
|
+
Fax: +1 303.492.2468
|
186
|
+
|
187
|
+
Mailing address:
|
188
|
+
National Snow and Ice Data Center
|
189
|
+
CIRES, 449 UCB
|
190
|
+
University of Colorado
|
191
|
+
Boulder, CO 80309-0449 USA
|
192
|
+
|
193
|
+
### License
|
194
|
+
|
195
|
+
Every file in this repository is covered by the GNU GPL Version 3; a copy of the
|
196
|
+
license is included in the file COPYING.
|
197
|
+
|
198
|
+
### Citation Information
|
199
|
+
|
200
|
+
Andy Grauch, Brendan Billingsley, Chris Chalstrom, Danielle Harper, Ian
|
201
|
+
Truslove, Jonathan Kovarik, Luis Lopez, Miao Liu, Michael Brandt, Stuart Reed
|
202
|
+
(2013): Arctic Data Explorer SOLR Search software tools. The National Snow and
|
203
|
+
Ice Data Center. Software. http://ezid.cdlib.org/id/doi:10.7265/N5JQ0XZM
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'search_solr_tools'
|
3
|
+
require 'thor'
|
4
|
+
|
5
|
+
class SolrHarvestCLI < Thor
|
6
|
+
desc 'harvest', 'Harvest from one of the ADE harvesters'
|
7
|
+
option :data_center, type: :array, required: true
|
8
|
+
option :environment, required: true
|
9
|
+
option :die_on_failure, type: :boolean
|
10
|
+
|
11
|
+
def harvest(die_on_failure = options[:die_on_failure] || false)
|
12
|
+
options[:data_center].each do |target|
|
13
|
+
puts target
|
14
|
+
begin
|
15
|
+
harvest_class = get_harvester_class(target)
|
16
|
+
harvester = harvest_class.new options[:environment], die_on_failure
|
17
|
+
harvester.harvest_and_delete
|
18
|
+
rescue => e
|
19
|
+
puts "harvest failed for #{target}: #{e.message}"
|
20
|
+
raise e if die_on_failure
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
desc 'list_harvesters', 'List all harvesters'
|
26
|
+
def list_harvesters
|
27
|
+
puts harvester_map.keys
|
28
|
+
end
|
29
|
+
|
30
|
+
desc 'delete_all', 'Delete all documents from the index'
|
31
|
+
option :environment, required: true
|
32
|
+
def delete_all
|
33
|
+
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
34
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
35
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
36
|
+
end
|
37
|
+
|
38
|
+
desc 'delete_all_auto_suggest', 'Delete all documents from the auto_suggest index'
|
39
|
+
option :environment, required: true
|
40
|
+
def delete_all_auto_suggest
|
41
|
+
env = SearchSolrTools::SolrEnvironments[options[:environment]]
|
42
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
|
43
|
+
`curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
|
44
|
+
end
|
45
|
+
|
46
|
+
desc 'delete_by_data_center', 'Force deletion of documents for a specific data center with timestamps before the passed timestamp in format iso8601 (2014-07-14T21:49:21Z)'
|
47
|
+
option :timestamp, required: true
|
48
|
+
option :environment, required: true
|
49
|
+
option :data_center, required: true
|
50
|
+
def delete_by_data_center
|
51
|
+
harvester = get_harvester_class(options[:data_center]).new options[:environment]
|
52
|
+
harvester.delete_old_documents(options[:timestamp],
|
53
|
+
"data_centers:\"#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[options[:data_center].upcase.to_sym][:long_name]}\"",
|
54
|
+
SearchSolrTools::SolrEnvironments[harvester.environment][:collection_name],
|
55
|
+
true
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
no_tasks do
|
60
|
+
def harvester_map
|
61
|
+
{
|
62
|
+
'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
|
63
|
+
'cisl' => SearchSolrTools::Harvesters::Cisl,
|
64
|
+
'echo' => SearchSolrTools::Harvesters::Echo,
|
65
|
+
'eol' => SearchSolrTools::Harvesters::Eol,
|
66
|
+
'ices' => SearchSolrTools::Harvesters::Ices,
|
67
|
+
'nmi' => SearchSolrTools::Harvesters::Nmi,
|
68
|
+
'nodc' => SearchSolrTools::Harvesters::Nodc,
|
69
|
+
'rda' => SearchSolrTools::Harvesters::Rda,
|
70
|
+
'usgs' => SearchSolrTools::Harvesters::Usgs,
|
71
|
+
'tdar' => SearchSolrTools::Harvesters::Tdar,
|
72
|
+
'pdc' => SearchSolrTools::Harvesters::Pdc,
|
73
|
+
'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
|
74
|
+
'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
|
75
|
+
'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_harvester_class(data_center_name)
|
80
|
+
name = data_center_name.downcase.to_s
|
81
|
+
fail("Invalid data center #{name}") unless harvester_map.key?(name)
|
82
|
+
|
83
|
+
harvester_map[name]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
SolrHarvestCLI.start(ARGV)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'require_all'
|
2
|
+
require_relative './search_solr_tools/config/environments'
|
3
|
+
require_relative './search_solr_tools/version'
|
4
|
+
|
5
|
+
require_rel './search_solr_tools/helpers'
|
6
|
+
require_rel './search_solr_tools/selectors'
|
7
|
+
require_rel './search_solr_tools/harvesters'
|
8
|
+
require_rel './search_solr_tools/translators'
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module SearchSolrTools
|
4
|
+
# configuration to work with solr locally, or on integration/qa/staging/prod
|
5
|
+
module SolrEnvironments
|
6
|
+
YAML_ENVS = YAML.load_file(File.expand_path('../environments.yaml', __FILE__))
|
7
|
+
|
8
|
+
def self.[](env = :development)
|
9
|
+
YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
:common:
|
2
|
+
:auto_suggest_collection_name: auto_suggest
|
3
|
+
:collection_name: nsidc_oai
|
4
|
+
:collection_path: solr
|
5
|
+
:port: 8983
|
6
|
+
:bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
|
7
|
+
:cisl_url: https://www.aoncadis.org/oai/repository
|
8
|
+
:echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
|
9
|
+
:ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
|
10
|
+
:nmi_url: http://access.met.no/metamod/oai
|
11
|
+
:nodc_url: http://data.nodc.noaa.gov/geoportal/csw
|
12
|
+
:pdc_url: http://www.polardata.ca/oai/provider
|
13
|
+
:rda_url: http://rda.ucar.edu/cgi-bin/oai
|
14
|
+
:tdar_url: http://core.tdar.org/search/rss
|
15
|
+
:usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
|
16
|
+
:eol:
|
17
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
|
18
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
|
19
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
|
20
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
|
21
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
|
22
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
|
23
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
|
24
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
|
25
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
|
26
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
|
27
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
|
28
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
|
29
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
|
30
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
|
31
|
+
- http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
|
32
|
+
|
33
|
+
:local:
|
34
|
+
:host: localhost
|
35
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
36
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
37
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
38
|
+
|
39
|
+
:dev:
|
40
|
+
host: dev.search-solr.apps.int.nsidc.org
|
41
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
42
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
43
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
44
|
+
|
45
|
+
:integration:
|
46
|
+
:host: integration.search-solr.apps.int.nsidc.org
|
47
|
+
:nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
|
48
|
+
:nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
49
|
+
:oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
50
|
+
|
51
|
+
:qa:
|
52
|
+
:host: qa.search-solr.apps.int.nsidc.org
|
53
|
+
:nsidc_dataset_metadata_url: http://qa.nsidc.org/api/dataset/metadata/
|
54
|
+
:nsidc_oai_identifiers_url: http://qa.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
55
|
+
:oai_url: http://brash.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
56
|
+
|
57
|
+
:staging:
|
58
|
+
:host: staging.search-solr.apps.int.nsidc.org
|
59
|
+
:nsidc_dataset_metadata_url: http://staging.nsidc.org/api/dataset/metadata/
|
60
|
+
:nsidc_oai_identifiers_url: http://staging.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
61
|
+
:oai_url: http://freeze.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
62
|
+
|
63
|
+
:blue:
|
64
|
+
:host: blue.search-solr.apps.int.nsidc.org
|
65
|
+
:nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
|
66
|
+
:nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
67
|
+
:oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
68
|
+
|
69
|
+
:production:
|
70
|
+
:host: search-solr.apps.int.nsidc.org
|
71
|
+
:nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
|
72
|
+
:nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
|
73
|
+
:oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Harvesters
|
3
|
+
class AdeAutoSuggest < AutoSuggest
|
4
|
+
def harvest_and_delete
|
5
|
+
puts 'Building auto-suggest indexes for ADE'
|
6
|
+
super(method(:harvest), "source:\"ADE\"", @env_settings[:auto_suggest_collection_name])
|
7
|
+
end
|
8
|
+
|
9
|
+
def harvest
|
10
|
+
url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
|
11
|
+
super url, fields
|
12
|
+
end
|
13
|
+
|
14
|
+
def fields
|
15
|
+
{ 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
16
|
+
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def split_creator(value, count, field_weight, source, split_regex)
|
21
|
+
add_docs = []
|
22
|
+
value.downcase.split(split_regex).each do |v|
|
23
|
+
v = v.strip.chomp('/')
|
24
|
+
add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
|
25
|
+
end
|
26
|
+
add_docs
|
27
|
+
end
|
28
|
+
|
29
|
+
def keyword_creator(value, count, field_weight, source)
|
30
|
+
split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
|
31
|
+
end
|
32
|
+
|
33
|
+
def author_creator(value, count, field_weight, source)
|
34
|
+
split_creator value, count, field_weight, source, %r{/;/}
|
35
|
+
end
|
36
|
+
|
37
|
+
def ade_length_limit_creator(value, count, field_weight, source)
|
38
|
+
return [] if value.length > 80
|
39
|
+
standard_add_creator value, count, field_weight, source
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module SearchSolrTools
|
5
|
+
module Harvesters
|
6
|
+
# Use the nsidc_oai core to populate the auto_suggest core
|
7
|
+
class AutoSuggest < Base
|
8
|
+
def initialize(env = 'development', die_on_failure = false)
|
9
|
+
super env, die_on_failure
|
10
|
+
@env_settings = SolrEnvironments[@environment] # super sets @environment.
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def harvest(url, fields)
|
16
|
+
facet_response = fetch_auto_suggest_facet_data(url, fields)
|
17
|
+
add_docs = generate_add_hashes(facet_response, fields)
|
18
|
+
add_documents_to_solr(add_docs)
|
19
|
+
end
|
20
|
+
|
21
|
+
def standard_add_creator(value, count, field_weight, source)
|
22
|
+
count_weight = count <= 1 ? 0.4 : Math.log(count)
|
23
|
+
weight = field_weight * count_weight
|
24
|
+
[{ 'id' => "#{source}:#{value}", 'text_suggest' => value, 'source' => source, 'weight' => weight }]
|
25
|
+
end
|
26
|
+
|
27
|
+
def fetch_auto_suggest_facet_data(url, fields)
|
28
|
+
fields.each do |name, _config|
|
29
|
+
url += "&facet.field=#{name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
serialized_facet_response = RestClient.get url
|
33
|
+
JSON.parse(serialized_facet_response)
|
34
|
+
end
|
35
|
+
|
36
|
+
def generate_add_hashes(facet_response, fields)
|
37
|
+
add_docs = []
|
38
|
+
facet_response['facet_counts']['facet_fields'].each do |facet_name, facet_values|
|
39
|
+
facet_values.each_slice(2) do |facet_value|
|
40
|
+
new_docs = fields[facet_name][:creator].call(facet_value[0], facet_value[1], fields[facet_name][:weight], fields[facet_name][:source])
|
41
|
+
add_docs.concat(new_docs)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
add_docs
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_documents_to_solr(add_docs)
|
48
|
+
if insert_solr_doc add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
49
|
+
puts "Added #{add_docs.size} auto suggest documents in one commit"
|
50
|
+
else
|
51
|
+
puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
|
52
|
+
new_add_docs = []
|
53
|
+
add_docs.each do |doc|
|
54
|
+
new_add_docs << { 'add' => { 'doc' => doc } }
|
55
|
+
end
|
56
|
+
insert_solr_docs new_add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|