search_solr_tools 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +88 -0
  3. data/COPYING +674 -0
  4. data/README.md +203 -0
  5. data/bin/search_solr_tools +87 -0
  6. data/lib/search_solr_tools.rb +8 -0
  7. data/lib/search_solr_tools/config/environments.rb +12 -0
  8. data/lib/search_solr_tools/config/environments.yaml +73 -0
  9. data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
  10. data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
  11. data/lib/search_solr_tools/harvesters/base.rb +183 -0
  12. data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
  13. data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
  14. data/lib/search_solr_tools/harvesters/echo.rb +50 -0
  15. data/lib/search_solr_tools/harvesters/eol.rb +53 -0
  16. data/lib/search_solr_tools/harvesters/ices.rb +55 -0
  17. data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
  18. data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
  19. data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
  20. data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
  21. data/lib/search_solr_tools/harvesters/oai.rb +59 -0
  22. data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
  23. data/lib/search_solr_tools/harvesters/rda.rb +33 -0
  24. data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
  25. data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
  26. data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
  27. data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
  28. data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
  29. data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
  30. data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
  31. data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
  32. data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
  33. data/lib/search_solr_tools/helpers/selectors.rb +20 -0
  34. data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
  35. data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
  36. data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
  37. data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
  38. data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
  39. data/lib/search_solr_tools/selectors/cisl.rb +112 -0
  40. data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
  41. data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
  42. data/lib/search_solr_tools/selectors/nmi.rb +106 -0
  43. data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
  44. data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
  45. data/lib/search_solr_tools/selectors/rda.rb +106 -0
  46. data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
  47. data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
  48. data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
  49. data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
  50. data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
  51. data/lib/search_solr_tools/version.rb +3 -0
  52. data/search_solr_tools.gemspec +45 -0
  53. metadata +345 -0
data/README.md ADDED
@@ -0,0 +1,203 @@
1
+ [![Build Status](https://travis-ci.org/nsidc/search-solr-tools.svg?branch=master)](https://travis-ci.org/nsidc/search-solr-tools)
2
+
3
+ # NSIDC Search Solr Tools
4
+
5
+ This is a gem that contains:
6
+
7
+ * Ruby translators to transform various metadata feeds into solr documents
8
+ * A command-line utility to access/utilize the gem's translators to harvest
9
+ metadata into a working solr instance.
10
+
11
+ ## Using the project
12
+
13
+ ### Standard Installation
14
+
15
+ The gem is available through [RubyGems](https://rubygems.org/). To install the
16
+ gem, ensure all requirements below are met and run (providing the appropriate
17
+ version):
18
+
19
+ `sudo gem install search_solr_tools -v $VERSION`
20
+
21
+ ### Custom Deployment
22
+
23
+ Clone the repository, and install all requirements as noted below.
24
+
25
+ #### Configuration
26
+
27
+ Once you have the code and requirements, edit the configuration file in
28
+ `lib/search_solr_tools/config/environments.yaml` to match your environment. The
29
+ configuration values are set by environment for each harvester (or specified in
30
+ the `common` settings list), with the environment overriding `common` if a
31
+ different setting is specified for a given environment.
32
+
33
+ Each harvester has its own configuration settings. Most are the target endpoint;
34
+ EOL, however, has a list of THREDDS project endpoints and NSIDC has its own
35
+ oai/metadata endpoint settings.
36
+
37
+ Most users should not need to change the harvester configuration unless they
38
+ establish a local test node, or if a provider changes available endpoints;
39
+ however, the `host` option for each environment must specify the configured SOLR
40
+ instance you intend to use these tools with.
41
+
42
+ #### Build and Install Gem
43
+
44
+ Then run:
45
+
46
+ `bundle exec gem build ./search_solr_tools.gemspec`
47
+
48
+ Once you have the gem built in the project directory, install the utility:
49
+
50
+ `gem install --local ./search_solr_tools-version.gem`
51
+
52
+ ## Working on the Project
53
+
54
+ 1. Create your feature branch (`git checkout -b my-new-feature`)
55
+ 2. Stage your changes (`git add`)
56
+ 3. Commit your Rubocop compliant and test-passing changes with a
57
+ [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
58
+ (`git commit`)
59
+ 4. Push to the branch (`git push -u origin my-new-feature`)
60
+ 5. Create a new Pull Request
61
+
62
+ ### Requirements
63
+
64
+ * Ruby > 2.0.0
65
+ * [Bundler](http://bundler.io/)
66
+ * Requirements for nokogiri:
67
+ * [libxml2/libxml2-dev](http://xmlsoft.org/)
68
+ * [zlibc](http://www.zlibc.linux.lu/)
69
+ * [zlib1g/zlib1g-dev](http://zlib.net/)
70
+ * Dependency build requirements:
71
+ * For Ubuntu/Debian, install the build-essential package.
72
+ * On the latest Fedora release installing the following will get you all of the requirements:
73
+
74
+ `yum groupinstall 'Development Tools'`
75
+
76
+ `yum install gcc-c++`
77
+
78
+ *Please note*: If you are having difficulty installing Nokogiri please review the
79
+ Nokogiri [installation tutorial](http://www.nokogiri.org/tutorials/installing_nokogiri.html)
80
+
81
+ * All gems installed (preferably using bundler: `bundle install`)
82
+ * A running, configured SOLR instance to accept data harvests.
83
+
84
+ ### RuboCop
85
+
86
+ The style checker [RuboCop](https://github.com/bbatsov/rubocop) can be run with
87
+ `rubocop` or `bundle exec rake guard:rubocop`. The rake task will also watch for
88
+ ruby files (.rb, .rake, Gemfile, Guardfile, Rakefile) to be changed, and run
89
+ RuboCop on the changed files.
90
+
91
+ `bundle exec rake guard` will automatically run the unit tests and RuboCop in
92
+ one terminal window.
93
+
94
+ RuboCop can be configured by modifying `.rubocop.yml`.
95
+
96
+ Pushing with failing tests or RuboCop violations will cause the Jenkins build to
97
+ break. Jenkins jobs to build and deploy this project are named
98
+ "NSIDC_Search_SOLR_()…" and can be viewed under the
99
+ [NSIDC Search tab](https://scm.nsidc.org/jenkins/view/NSIDC%20Search/).
100
+
101
+ ### Testing
102
+
103
+ Unit tests can be run with `rspec`, `bundle exec rake spec:unit`, or `bundle
104
+ exec rake guard:specs`. Running the rake guard task will also automatically run
105
+ the tests whenever the appropriate files are changed.
106
+
107
+ Please be sure to run them in the `bundle exec` context if you're utilizing bundler.
108
+
109
+ ### Creating Releases (NSIDC devs only)
110
+
111
+ Requirements:
112
+
113
+ * Ruby > 2.0.0
114
+ * [Bundler](http://bundler.io/)
115
+ * [Gem Release](https://github.com/svenfuchs/gem-release)
116
+ * [Rake](https://github.com/ruby/rake)
117
+ * a [RubyGems](https://rubygems.org) account that has
118
+ [ownership](http://guides.rubygems.org/publishing/) of the gem
119
+ * RuboCop and the unit tests should all pass (`rake`)
120
+
121
+ **gem release** is used by rake tasks in this project to handle version changes,
122
+ tagging, and publishing to RubyGems.
123
+
124
+ | Command | Description |
125
+ |---------------------------|-------------|
126
+ | `rake release:pre[false]` | Increase the current prerelease version number, push changes |
127
+ | `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
128
+ | `rake release:none` | Drop the prerelease version, publish release, then `pre[false]` |
129
+ | `rake release:minor` | Increase the minor version number, publish release, then `pre[false]` |
130
+ | `rake release:major` | Increase the major version number, publish release, then `pre[false]` |
131
+
132
+ \*"publish release" means each of the following occurs:
133
+
134
+ * a new tag is created
135
+ * the changes are pushed
136
+ * the tagged version is built and published to RubyGems
137
+
138
+ ### SOLR:
139
+
140
+ To harvest data utilizing the gem, you will need a local configured instance of
141
+ Solr 4.3, which can be downloaded from
142
+ [Apache's archive](https://archive.apache.org/dist/lucene/solr/4.3.0/).
143
+
144
+ #### NSIDC
145
+
146
+ At NSIDC the development VM can be provisioned with the
147
+ [solr puppet module](https://bitbucket.org/nsidc/puppet-solr/) to install and
148
+ configure Solr.
149
+
150
+ #### Non-NSIDC
151
+
152
+ Outside of NSIDC, setup solr using the instructions found in the
153
+ [search-solr](https://github.com/nsidc/search-solr) project.
154
+
155
+ ### Harvesting Data
156
+
157
+ The harvester requires additional metadata from services that may not yet be
158
+ publicly available, which are referenced in
159
+ `lib/search_solr_tools/config/environments.yaml`.
160
+
161
+ To utilize the gem, build and install the **search_solr_tools** gem. This will
162
+ add an executable `search_solr_tools` to the path (source is in
163
+ `bin/search_solr_tools`). The executable is self-documenting; for a brief
164
+ overview of what's available, simply run `search_solr_tools`.
165
+
166
+ Harvesting of data can be done using the `harvest` task, giving it a list of
167
+ harvesters and an environment. Deletion is possible via the `delete_all` and/or
168
+ `delete_by_data_center'`tasks. `list harvesters` will list the valid harvest
169
+ targets.
170
+
171
+ In addition to feed URLs, `environments.yaml` also defines various environments
172
+ which can be modified, or additional environments can be added by just adding a
173
+ new YAML stanza with the right keys; this new environment can then be used with
174
+ the `--environment` flag when running `search_solr_tools harvest`.
175
+
176
+ ## Organization Info
177
+
178
+ ### How to contact NSIDC
179
+
180
+ User Services and general information:
181
+ Support: [http://support.nsidc.org](http://support.nsidc.org)
182
+ Email: nsidc@nsidc.org
183
+
184
+ Phone: +1 303.492.6199
185
+ Fax: +1 303.492.2468
186
+
187
+ Mailing address:
188
+ National Snow and Ice Data Center
189
+ CIRES, 449 UCB
190
+ University of Colorado
191
+ Boulder, CO 80309-0449 USA
192
+
193
+ ### License
194
+
195
+ Every file in this repository is covered by the GNU GPL Version 3; a copy of the
196
+ license is included in the file COPYING.
197
+
198
+ ### Citation Information
199
+
200
+ Andy Grauch, Brendan Billingsley, Chris Chalstrom, Danielle Harper, Ian
201
+ Truslove, Jonathan Kovarik, Luis Lopez, Miao Liu, Michael Brandt, Stuart Reed
202
+ (2013): Arctic Data Explorer SOLR Search software tools. The National Snow and
203
+ Ice Data Center. Software. http://ezid.cdlib.org/id/doi:10.7265/N5JQ0XZM
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+ require 'search_solr_tools'
3
+ require 'thor'
4
+
5
+ class SolrHarvestCLI < Thor
6
+ desc 'harvest', 'Harvest from one of the ADE harvesters'
7
+ option :data_center, type: :array, required: true
8
+ option :environment, required: true
9
+ option :die_on_failure, type: :boolean
10
+
11
+ def harvest(die_on_failure = options[:die_on_failure] || false)
12
+ options[:data_center].each do |target|
13
+ puts target
14
+ begin
15
+ harvest_class = get_harvester_class(target)
16
+ harvester = harvest_class.new options[:environment], die_on_failure
17
+ harvester.harvest_and_delete
18
+ rescue => e
19
+ puts "harvest failed for #{target}: #{e.message}"
20
+ raise e if die_on_failure
21
+ end
22
+ end
23
+ end
24
+
25
+ desc 'list_harvesters', 'List all harvesters'
26
+ def list_harvesters
27
+ puts harvester_map.keys
28
+ end
29
+
30
+ desc 'delete_all', 'Delete all documents from the index'
31
+ option :environment, required: true
32
+ def delete_all
33
+ env = SearchSolrTools::SolrEnvironments[options[:environment]]
34
+ `curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
35
+ `curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
36
+ end
37
+
38
+ desc 'delete_all_auto_suggest', 'Delete all documents from the auto_suggest index'
39
+ option :environment, required: true
40
+ def delete_all_auto_suggest
41
+ env = SearchSolrTools::SolrEnvironments[options[:environment]]
42
+ `curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<delete><query>*:*</query></delete>'`
43
+ `curl 'http://#{env[:host]}:#{env[:port]}/solr/update' -H 'Content-Type: text/xml; charset=utf-8' --data '<commit/>'`
44
+ end
45
+
46
+ desc 'delete_by_data_center', 'Force deletion of documents for a specific data center with timestamps before the passed timestamp in format iso8601 (2014-07-14T21:49:21Z)'
47
+ option :timestamp, required: true
48
+ option :environment, required: true
49
+ option :data_center, required: true
50
+ def delete_by_data_center
51
+ harvester = get_harvester_class(options[:data_center]).new options[:environment]
52
+ harvester.delete_old_documents(options[:timestamp],
53
+ "data_centers:\"#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[options[:data_center].upcase.to_sym][:long_name]}\"",
54
+ SearchSolrTools::SolrEnvironments[harvester.environment][:collection_name],
55
+ true
56
+ )
57
+ end
58
+
59
+ no_tasks do
60
+ def harvester_map
61
+ {
62
+ 'bco_dmo' => SearchSolrTools::Harvesters::BcoDmo,
63
+ 'cisl' => SearchSolrTools::Harvesters::Cisl,
64
+ 'echo' => SearchSolrTools::Harvesters::Echo,
65
+ 'eol' => SearchSolrTools::Harvesters::Eol,
66
+ 'ices' => SearchSolrTools::Harvesters::Ices,
67
+ 'nmi' => SearchSolrTools::Harvesters::Nmi,
68
+ 'nodc' => SearchSolrTools::Harvesters::Nodc,
69
+ 'rda' => SearchSolrTools::Harvesters::Rda,
70
+ 'usgs' => SearchSolrTools::Harvesters::Usgs,
71
+ 'tdar' => SearchSolrTools::Harvesters::Tdar,
72
+ 'pdc' => SearchSolrTools::Harvesters::Pdc,
73
+ 'nsidc' => SearchSolrTools::Harvesters::NsidcJson,
74
+ 'nsidc_auto_suggest' => SearchSolrTools::Harvesters::NsidcAutoSuggest,
75
+ 'ade_auto_suggest' => SearchSolrTools::Harvesters::AdeAutoSuggest
76
+ }
77
+ end
78
+
79
+ def get_harvester_class(data_center_name)
80
+ name = data_center_name.downcase.to_s
81
+ fail("Invalid data center #{name}") unless harvester_map.key?(name)
82
+
83
+ harvester_map[name]
84
+ end
85
+ end
86
+ end
87
+ SolrHarvestCLI.start(ARGV)
@@ -0,0 +1,8 @@
1
+ require 'require_all'
2
+ require_relative './search_solr_tools/config/environments'
3
+ require_relative './search_solr_tools/version'
4
+
5
+ require_rel './search_solr_tools/helpers'
6
+ require_rel './search_solr_tools/selectors'
7
+ require_rel './search_solr_tools/harvesters'
8
+ require_rel './search_solr_tools/translators'
@@ -0,0 +1,12 @@
1
+ require 'yaml'
2
+
3
+ module SearchSolrTools
4
+ # configuration to work with solr locally, or on integration/qa/staging/prod
5
+ module SolrEnvironments
6
+ YAML_ENVS = YAML.load_file(File.expand_path('../environments.yaml', __FILE__))
7
+
8
+ def self.[](env = :development)
9
+ YAML_ENVS[:common].merge(YAML_ENVS[env.to_sym])
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,73 @@
1
+ :common:
2
+ :auto_suggest_collection_name: auto_suggest
3
+ :collection_name: nsidc_oai
4
+ :collection_path: solr
5
+ :port: 8983
6
+ :bcodmo_url: http://www.bco-dmo.org/nsidc/arctic-deployments.json
7
+ :cisl_url: https://www.aoncadis.org/oai/repository
8
+ :echo_url: https://api.echo.nasa.gov/catalog-rest/echo_catalog/datasets.echo10
9
+ :ices_url: http://geo.ices.dk/geonetwork/srv/en/csw
10
+ :nmi_url: http://access.met.no/metamod/oai
11
+ :nodc_url: http://data.nodc.noaa.gov/geoportal/csw
12
+ :pdc_url: http://www.polardata.ca/oai/provider
13
+ :rda_url: http://rda.ucar.edu/cgi-bin/oai
14
+ :tdar_url: http://core.tdar.org/search/rss
15
+ :usgs_url: https://www.sciencebase.gov/catalog/item/527cf4ede4b0850ea05182ee/csw
16
+ :eol:
17
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SHEBA.thredds.xml
18
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.SBI.thredds.xml
19
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.PacMARS.thredds.xml
20
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BASE.thredds.xml
21
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ATLAS.thredds.xml
22
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARC_MIP.thredds.xml
23
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.AMTS.thredds.xml
24
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BOREAS.thredds.xml
25
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BeringSea.thredds.xml
26
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ARCSS.thredds.xml
27
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BEST.thredds.xml
28
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BSIERP.thredds.xml
29
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.BARROW.thredds.xml
30
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.DBO.thredds.xml
31
+ - http://data.eol.ucar.edu/jedi/catalog/ucar.ncar.eol.project.ITEX.thredds.xml
32
+
33
+ :local:
34
+ :host: localhost
35
+ :nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
36
+ :nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
37
+ :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
38
+
39
+ :dev:
40
+ host: dev.search-solr.apps.int.nsidc.org
41
+ :nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
42
+ :nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
43
+ :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
44
+
45
+ :integration:
46
+ :host: integration.search-solr.apps.int.nsidc.org
47
+ :nsidc_dataset_metadata_url: http://integration.nsidc.org/api/dataset/metadata/
48
+ :nsidc_oai_identifiers_url: http://integration.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
49
+ :oai_url: http://liquid.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
50
+
51
+ :qa:
52
+ :host: qa.search-solr.apps.int.nsidc.org
53
+ :nsidc_dataset_metadata_url: http://qa.nsidc.org/api/dataset/metadata/
54
+ :nsidc_oai_identifiers_url: http://qa.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
55
+ :oai_url: http://brash.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
56
+
57
+ :staging:
58
+ :host: staging.search-solr.apps.int.nsidc.org
59
+ :nsidc_dataset_metadata_url: http://staging.nsidc.org/api/dataset/metadata/
60
+ :nsidc_oai_identifiers_url: http://staging.nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
61
+ :oai_url: http://freeze.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
62
+
63
+ :blue:
64
+ :host: blue.search-solr.apps.int.nsidc.org
65
+ :nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
66
+ :nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
67
+ :oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
68
+
69
+ :production:
70
+ :host: search-solr.apps.int.nsidc.org
71
+ :nsidc_dataset_metadata_url: http://nsidc.org/api/dataset/metadata/
72
+ :nsidc_oai_identifiers_url: http://nsidc.org/api/dataset/metadata/oai?verb=ListIdentifiers&metadata_prefix=iso
73
+ :oai_url: http://frozen.colorado.edu:11580/api/dataset/2/oai?verb=ListRecords&metadata_prefix=iso
@@ -0,0 +1,43 @@
1
+ module SearchSolrTools
2
+ module Harvesters
3
+ class AdeAutoSuggest < AutoSuggest
4
+ def harvest_and_delete
5
+ puts 'Building auto-suggest indexes for ADE'
6
+ super(method(:harvest), "source:\"ADE\"", @env_settings[:auto_suggest_collection_name])
7
+ end
8
+
9
+ def harvest
10
+ url = "#{solr_url}/#{@env_settings[:collection_name]}/select?q=*%3A*&fq=source%3AADE&fq=spatial:[45.0,-180.0+TO+90.0,180.0]&rows=0&wt=json&indent=true&facet=true&facet.mincount=1&facet.sort=count&facet.limit=-1"
11
+ super url, fields
12
+ end
13
+
14
+ def fields
15
+ { 'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
16
+ 'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
17
+ }
18
+ end
19
+
20
+ def split_creator(value, count, field_weight, source, split_regex)
21
+ add_docs = []
22
+ value.downcase.split(split_regex).each do |v|
23
+ v = v.strip.chomp('/')
24
+ add_docs.concat(ade_length_limit_creator(v, count, field_weight, source)) unless v.nil? || v.empty?
25
+ end
26
+ add_docs
27
+ end
28
+
29
+ def keyword_creator(value, count, field_weight, source)
30
+ split_creator value, count, field_weight, source, %r{/ [\/ \>]+ /}
31
+ end
32
+
33
+ def author_creator(value, count, field_weight, source)
34
+ split_creator value, count, field_weight, source, %r{/;/}
35
+ end
36
+
37
+ def ade_length_limit_creator(value, count, field_weight, source)
38
+ return [] if value.length > 80
39
+ standard_add_creator value, count, field_weight, source
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,61 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+
4
+ module SearchSolrTools
5
+ module Harvesters
6
+ # Use the nsidc_oai core to populate the auto_suggest core
7
+ class AutoSuggest < Base
8
+ def initialize(env = 'development', die_on_failure = false)
9
+ super env, die_on_failure
10
+ @env_settings = SolrEnvironments[@environment] # super sets @environment.
11
+ end
12
+
13
+ private
14
+
15
+ def harvest(url, fields)
16
+ facet_response = fetch_auto_suggest_facet_data(url, fields)
17
+ add_docs = generate_add_hashes(facet_response, fields)
18
+ add_documents_to_solr(add_docs)
19
+ end
20
+
21
+ def standard_add_creator(value, count, field_weight, source)
22
+ count_weight = count <= 1 ? 0.4 : Math.log(count)
23
+ weight = field_weight * count_weight
24
+ [{ 'id' => "#{source}:#{value}", 'text_suggest' => value, 'source' => source, 'weight' => weight }]
25
+ end
26
+
27
+ def fetch_auto_suggest_facet_data(url, fields)
28
+ fields.each do |name, _config|
29
+ url += "&facet.field=#{name}"
30
+ end
31
+
32
+ serialized_facet_response = RestClient.get url
33
+ JSON.parse(serialized_facet_response)
34
+ end
35
+
36
+ def generate_add_hashes(facet_response, fields)
37
+ add_docs = []
38
+ facet_response['facet_counts']['facet_fields'].each do |facet_name, facet_values|
39
+ facet_values.each_slice(2) do |facet_value|
40
+ new_docs = fields[facet_name][:creator].call(facet_value[0], facet_value[1], fields[facet_name][:weight], fields[facet_name][:source])
41
+ add_docs.concat(new_docs)
42
+ end
43
+ end
44
+ add_docs
45
+ end
46
+
47
+ def add_documents_to_solr(add_docs)
48
+ if insert_solr_doc add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
49
+ puts "Added #{add_docs.size} auto suggest documents in one commit"
50
+ else
51
+ puts "Failed adding #{add_docs.size} documents in single commit, retrying one by one"
52
+ new_add_docs = []
53
+ add_docs.each do |doc|
54
+ new_add_docs << { 'add' => { 'doc' => doc } }
55
+ end
56
+ insert_solr_docs new_add_docs, Base::JSON_CONTENT_TYPE, @env_settings[:auto_suggest_collection_name]
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end