search_solr_tools 3.7.0 → 3.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -7
- data/bin/search_solr_tools +2 -3
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +3 -2
- data/lib/search_solr_tools/harvesters/base.rb +1 -1
- data/lib/search_solr_tools/harvesters/bcodmo.rb +1 -1
- data/lib/search_solr_tools/harvesters/eol.rb +1 -1
- data/lib/search_solr_tools/harvesters/gtnp.rb +1 -1
- data/lib/search_solr_tools/harvesters/ices.rb +1 -2
- data/lib/search_solr_tools/harvesters/ncdc_paleo.rb +1 -2
- data/lib/search_solr_tools/harvesters/nodc.rb +1 -2
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +3 -2
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +1 -1
- data/lib/search_solr_tools/harvesters/oai.rb +3 -3
- data/lib/search_solr_tools/harvesters/usgs.rb +1 -2
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +1 -1
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +1 -1
- data/lib/search_solr_tools/helpers/solr_format.rb +1 -1
- data/lib/search_solr_tools/translators/eol_to_solr.rb +2 -2
- data/lib/search_solr_tools/translators/nsidc_json.rb +1 -1
- data/lib/search_solr_tools/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 187e12f711b38e3274387d3575728d8d5250bf48
|
4
|
+
data.tar.gz: db79a42d420a46430dcf32e4bf442bf790d0d18b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a69bcf39d6c09009d399cd2d029f8691ed6d00b3760a217f7ed7c5324c4171d0332b6ed2982c12caa9f663bc489a4edd5ce5ce0a8717a8dde2df78cdf2215c5c
|
7
|
+
data.tar.gz: 613a293d8808099b554551932c37b8c3a709cdbb94488c61cd4f65bd5d0737ba7cd30537e217beccb032d3792195759496e785f4f1ffeb6560e1f1a0560ca7e0
|
data/README.md
CHANGED
@@ -93,11 +93,6 @@ one terminal window.
|
|
93
93
|
|
94
94
|
RuboCop can be configured by modifying `.rubocop.yml`.
|
95
95
|
|
96
|
-
Pushing with failing tests or RuboCop violations will cause the Jenkins build to
|
97
|
-
break. Jenkins jobs to build and deploy this project are named
|
98
|
-
"NSIDC_Search_SOLR_()…" and can be viewed under the
|
99
|
-
[NSIDC Search tab](https://scm.nsidc.org/jenkins/view/NSIDC%20Search/).
|
100
|
-
|
101
96
|
### Testing
|
102
97
|
|
103
98
|
Unit tests can be run with `rspec`, `bundle exec rake spec:unit`, or `bundle
|
@@ -114,7 +109,7 @@ Requirements:
|
|
114
109
|
* [Bundler](http://bundler.io/)
|
115
110
|
* [Gem Release](https://github.com/svenfuchs/gem-release)
|
116
111
|
* [Rake](https://github.com/ruby/rake)
|
117
|
-
*
|
112
|
+
* A [RubyGems](https://rubygems.org) account that has
|
118
113
|
[ownership](http://guides.rubygems.org/publishing/) of the gem
|
119
114
|
* RuboCop and the unit tests should all pass (`rake`)
|
120
115
|
|
@@ -125,7 +120,7 @@ tagging, and publishing to RubyGems.
|
|
125
120
|
|---------------------------|-------------|
|
126
121
|
| `rake release:pre[false]` | Increase the current prerelease version number, push changes |
|
127
122
|
| `rake release:pre[true]` | Increase the current prerelease version number, publish release\* |
|
128
|
-
| `rake release:none` | Drop the prerelease version, publish release, then `pre[false]` |
|
123
|
+
| `rake release:none` | Drop the prerelease version, publish release, then `pre[false]` (does a patch release) |
|
129
124
|
| `rake release:minor` | Increase the minor version number, publish release, then `pre[false]` |
|
130
125
|
| `rake release:major` | Increase the major version number, publish release, then `pre[false]` |
|
131
126
|
|
data/bin/search_solr_tools
CHANGED
@@ -58,8 +58,7 @@ class SolrHarvestCLI < Thor
|
|
58
58
|
harvester.delete_old_documents(options[:timestamp],
|
59
59
|
"data_centers:\"#{SearchSolrTools::Helpers::SolrFormat::DATA_CENTER_NAMES[options[:data_center].upcase.to_sym][:long_name]}\"",
|
60
60
|
SearchSolrTools::SolrEnvironments[harvester.environment][:collection_name],
|
61
|
-
true
|
62
|
-
)
|
61
|
+
true)
|
63
62
|
end
|
64
63
|
|
65
64
|
no_tasks do
|
@@ -90,7 +89,7 @@ class SolrHarvestCLI < Thor
|
|
90
89
|
|
91
90
|
def get_harvester_class(data_center_name)
|
92
91
|
name = data_center_name.downcase.to_s
|
93
|
-
|
92
|
+
raise("Invalid data center #{name}") unless harvester_map.key?(name)
|
94
93
|
|
95
94
|
harvester_map[name]
|
96
95
|
end
|
@@ -3,7 +3,7 @@ module SearchSolrTools
|
|
3
3
|
class AdeAutoSuggest < AutoSuggest
|
4
4
|
def harvest_and_delete
|
5
5
|
puts 'Building auto-suggest indexes for ADE'
|
6
|
-
super(method(:harvest),
|
6
|
+
super(method(:harvest), 'source:"ADE"', @env_settings[:auto_suggest_collection_name])
|
7
7
|
end
|
8
8
|
|
9
9
|
def harvest
|
@@ -12,7 +12,8 @@ module SearchSolrTools
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def fields
|
15
|
-
{
|
15
|
+
{
|
16
|
+
'full_keywords_and_parameters' => { weight: 2, source: 'ADE', creator: method(:keyword_creator) },
|
16
17
|
'full_authors' => { weight: 1, source: 'ADE', creator: method(:author_creator) }
|
17
18
|
}
|
18
19
|
end
|
@@ -81,7 +81,7 @@ module SearchSolrTools
|
|
81
81
|
end
|
82
82
|
puts "#{success} document#{success == 1 ? '' : 's'} successfully added to Solr."
|
83
83
|
puts "#{failure} document#{failure == 1 ? '' : 's'} not added to Solr."
|
84
|
-
|
84
|
+
raise 'Some documents failed to be inserted into Solr' if failure > 0
|
85
85
|
end
|
86
86
|
|
87
87
|
def insert_solr_doc(doc, content_type = XML_CONTENT_TYPE, core = SolrEnvironments[@environment][:collection_name])
|
@@ -18,7 +18,7 @@ module SearchSolrTools
|
|
18
18
|
def harvest_bcodmo_into_solr
|
19
19
|
result = translate_bcodmo
|
20
20
|
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
21
|
-
|
21
|
+
raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
|
22
22
|
end
|
23
23
|
|
24
24
|
def translate_bcodmo
|
@@ -22,7 +22,7 @@ module SearchSolrTools
|
|
22
22
|
doc = open_xml_document(dataset)
|
23
23
|
if doc.xpath('//xmlns:metadata').size > 1
|
24
24
|
# THREDDS allows for a dataset of datasests, EOL should not utilize this
|
25
|
-
|
25
|
+
raise "Complex dataset encountered at #{doc.xpath('//xmlns:catalog').to_html}"
|
26
26
|
end
|
27
27
|
metadata_doc = open_xml_document(doc.xpath('//xmlns:metadata')[0]['xlink:href'])
|
28
28
|
{ 'add' => { 'doc' => @translator.translate(doc, metadata_doc) } }
|
@@ -28,7 +28,7 @@ module SearchSolrTools
|
|
28
28
|
def harvest_gtnp_into_solr
|
29
29
|
result = translate_gtnp
|
30
30
|
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
31
|
-
|
31
|
+
raise 'Failed to harvest some records from the provider' if result[:failure_ids].length > 0
|
32
32
|
end
|
33
33
|
|
34
34
|
def translate_gtnp
|
@@ -48,8 +48,7 @@ module SearchSolrTools
|
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
50
|
'constraintLanguage' => 'CQL_TEXT',
|
51
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
52
|
-
)
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
53
52
|
end
|
54
53
|
end
|
55
54
|
end
|
@@ -48,8 +48,7 @@ module SearchSolrTools
|
|
48
48
|
'maxRecords' => maxRecords,
|
49
49
|
'startPosition' => startPosition,
|
50
50
|
'constraint' => bbox_constraint,
|
51
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
52
|
-
)
|
51
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
53
52
|
end
|
54
53
|
|
55
54
|
def bbox_constraint
|
@@ -3,7 +3,7 @@ module SearchSolrTools
|
|
3
3
|
class NsidcAutoSuggest < AutoSuggest
|
4
4
|
def harvest_and_delete
|
5
5
|
puts 'Building auto-suggest indexes for NSIDC'
|
6
|
-
super(method(:harvest),
|
6
|
+
super(method(:harvest), 'source:"NSIDC"', @env_settings[:auto_suggest_collection_name])
|
7
7
|
end
|
8
8
|
|
9
9
|
def harvest
|
@@ -12,7 +12,8 @@ module SearchSolrTools
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def fields
|
15
|
-
{
|
15
|
+
{
|
16
|
+
'authoritative_id' => { weight: 1, source: 'NSIDC', creator: method(:standard_add_creator) },
|
16
17
|
'full_title' => { weight: 2, source: 'NSIDC', creator: method(:standard_add_creator) },
|
17
18
|
'copy_parameters' => { weight: 5, source: 'NSIDC', creator: method(:standard_add_creator) },
|
18
19
|
'full_platforms' => { weight: 2, source: 'NSIDC', creator: method(:short_full_split_add_creator) },
|
@@ -23,7 +23,7 @@ module SearchSolrTools
|
|
23
23
|
def harvest_nsidc_json_into_solr
|
24
24
|
result = docs_with_translated_entries_from_nsidc
|
25
25
|
insert_solr_docs result[:add_docs], Base::JSON_CONTENT_TYPE
|
26
|
-
|
26
|
+
raise 'Failed to harvest and insert some authoritative IDs' if result[:failure_ids].length > 0
|
27
27
|
end
|
28
28
|
|
29
29
|
def nsidc_json_url
|
@@ -34,11 +34,11 @@ module SearchSolrTools
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def results
|
37
|
-
|
37
|
+
raise NotImplementedError
|
38
38
|
end
|
39
39
|
|
40
40
|
def metadata_url
|
41
|
-
|
41
|
+
raise NotImplementedError
|
42
42
|
end
|
43
43
|
|
44
44
|
def translated_docs(entries)
|
@@ -48,7 +48,7 @@ module SearchSolrTools
|
|
48
48
|
private
|
49
49
|
|
50
50
|
def request_params
|
51
|
-
|
51
|
+
raise NotImplementedError
|
52
52
|
end
|
53
53
|
|
54
54
|
def request_string
|
@@ -49,8 +49,7 @@ module SearchSolrTools
|
|
49
49
|
'startPosition' => startPosition,
|
50
50
|
'TypeNames' => '',
|
51
51
|
'constraint' => bbox_constraint,
|
52
|
-
'outputSchema' => 'http://www.isotc211.org/2005/gmd'
|
53
|
-
)
|
52
|
+
'outputSchema' => 'http://www.isotc211.org/2005/gmd')
|
54
53
|
end
|
55
54
|
|
56
55
|
def bbox_constraint
|
@@ -83,7 +83,7 @@ module SearchSolrTools
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def strip_invalid_utf8_bytes(text)
|
86
|
-
if text.respond_to?(:encode) &&
|
86
|
+
if text.respond_to?(:encode) && !text.valid_encoding?
|
87
87
|
text.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
88
88
|
end
|
89
89
|
|
@@ -134,7 +134,7 @@ module SearchSolrTools
|
|
134
134
|
j = send(find_index_method, resolution['max_resolution'])
|
135
135
|
return resolution_values[i..j]
|
136
136
|
end
|
137
|
-
|
137
|
+
raise "Invalid resolution #{resolution['type']}"
|
138
138
|
end
|
139
139
|
|
140
140
|
def self.resolution_not_specified?(resolution)
|
@@ -62,9 +62,9 @@ module SearchSolrTools
|
|
62
62
|
def parse_geospatial_coverages(doc)
|
63
63
|
node = doc.xpath('//xmlns:geospatialCoverage')
|
64
64
|
south = node.xpath('./xmlns:northsouth/xmlns:start').text.to_f
|
65
|
-
north = south +
|
65
|
+
north = south + node.xpath('./xmlns:northsouth/xmlns:size').text.to_f
|
66
66
|
west = node.xpath('./xmlns:eastwest/xmlns:start').text.to_f
|
67
|
-
east = west +
|
67
|
+
east = west + node.xpath('./xmlns:eastwest/xmlns:size').text.to_f
|
68
68
|
# EOL uses out-of-range east-west values to represent bounding boxes
|
69
69
|
# that cross the date line. For any box with a value out of range,
|
70
70
|
# adjust the east/west value to lie within the -180 to 180 range.
|
@@ -43,7 +43,7 @@ module SearchSolrTools
|
|
43
43
|
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
44
44
|
'dataset_url' => json_doc['datasetUrl'],
|
45
45
|
'distribution_formats' => json_doc['distributionFormats'],
|
46
|
-
'facet_format' =>
|
46
|
+
'facet_format' => json_doc['distributionFormats'].empty? ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
47
47
|
'source' => %w(NSIDC ADE),
|
48
48
|
'popularity' => json_doc['popularity'],
|
49
49
|
'data_access_urls' => translate_data_access_urls(json_doc['dataAccessLinks']),
|