search_solr_tools 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +88 -0
- data/COPYING +674 -0
- data/README.md +203 -0
- data/bin/search_solr_tools +87 -0
- data/lib/search_solr_tools.rb +8 -0
- data/lib/search_solr_tools/config/environments.rb +12 -0
- data/lib/search_solr_tools/config/environments.yaml +73 -0
- data/lib/search_solr_tools/harvesters/ade_auto_suggest.rb +43 -0
- data/lib/search_solr_tools/harvesters/auto_suggest.rb +61 -0
- data/lib/search_solr_tools/harvesters/base.rb +183 -0
- data/lib/search_solr_tools/harvesters/bcodmo.rb +55 -0
- data/lib/search_solr_tools/harvesters/cisl.rb +63 -0
- data/lib/search_solr_tools/harvesters/echo.rb +50 -0
- data/lib/search_solr_tools/harvesters/eol.rb +53 -0
- data/lib/search_solr_tools/harvesters/ices.rb +55 -0
- data/lib/search_solr_tools/harvesters/nmi.rb +32 -0
- data/lib/search_solr_tools/harvesters/nodc.rb +72 -0
- data/lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb +33 -0
- data/lib/search_solr_tools/harvesters/nsidc_json.rb +60 -0
- data/lib/search_solr_tools/harvesters/oai.rb +59 -0
- data/lib/search_solr_tools/harvesters/pdc.rb +38 -0
- data/lib/search_solr_tools/harvesters/rda.rb +33 -0
- data/lib/search_solr_tools/harvesters/tdar.rb +57 -0
- data/lib/search_solr_tools/harvesters/usgs.rb +74 -0
- data/lib/search_solr_tools/helpers/bounding_box_util.rb +37 -0
- data/lib/search_solr_tools/helpers/csw_iso_query_builder.rb +30 -0
- data/lib/search_solr_tools/helpers/facet_configuration.rb +19 -0
- data/lib/search_solr_tools/helpers/iso_namespaces.rb +30 -0
- data/lib/search_solr_tools/helpers/iso_to_solr.rb +96 -0
- data/lib/search_solr_tools/helpers/iso_to_solr_format.rb +198 -0
- data/lib/search_solr_tools/helpers/query_builder.rb +13 -0
- data/lib/search_solr_tools/helpers/selectors.rb +20 -0
- data/lib/search_solr_tools/helpers/solr_format.rb +260 -0
- data/lib/search_solr_tools/helpers/tdar_format.rb +70 -0
- data/lib/search_solr_tools/helpers/translate_spatial_coverage.rb +77 -0
- data/lib/search_solr_tools/helpers/translate_temporal_coverage.rb +40 -0
- data/lib/search_solr_tools/helpers/usgs_format.rb +50 -0
- data/lib/search_solr_tools/selectors/cisl.rb +112 -0
- data/lib/search_solr_tools/selectors/echo_iso.rb +111 -0
- data/lib/search_solr_tools/selectors/ices_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/nmi.rb +106 -0
- data/lib/search_solr_tools/selectors/nodc_iso.rb +107 -0
- data/lib/search_solr_tools/selectors/pdc_iso.rb +108 -0
- data/lib/search_solr_tools/selectors/rda.rb +106 -0
- data/lib/search_solr_tools/selectors/tdar_opensearch.rb +89 -0
- data/lib/search_solr_tools/selectors/usgs_iso.rb +105 -0
- data/lib/search_solr_tools/translators/bcodmo_json.rb +69 -0
- data/lib/search_solr_tools/translators/eol_to_solr.rb +78 -0
- data/lib/search_solr_tools/translators/nsidc_json.rb +190 -0
- data/lib/search_solr_tools/version.rb +3 -0
- data/search_solr_tools.gemspec +45 -0
- metadata +345 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'rgeo/geo_json'
|
4
|
+
require 'rgeo/wkrep/wkt_parser'
|
5
|
+
|
6
|
+
require 'search_solr_tools'
|
7
|
+
|
8
|
+
module SearchSolrTools
|
9
|
+
module Translators
|
10
|
+
# Translates Bcodmo json to solr json format
|
11
|
+
class BcodmoJsonToSolr
|
12
|
+
# rubocop:disable MethodLength
|
13
|
+
# rubocop:disable AbcSize
|
14
|
+
def translate(json_doc, json_record, geometry)
|
15
|
+
originators = json_doc.key?('people') ? JSON.parse(RestClient.get((json_doc['people']))) : []
|
16
|
+
spatial_values = translate_geometry geometry
|
17
|
+
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages [{ 'start' => "#{json_record['startDate']}", 'end' => "#{json_record['endDate']}" }]
|
18
|
+
{
|
19
|
+
'title' => json_doc['dataset_name'],
|
20
|
+
'authoritative_id' => json_record['id'] + json_doc['dataset_nid'],
|
21
|
+
'dataset_version' => translate_dataset_version(json_doc['dataset_version']),
|
22
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name],
|
23
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:BCODMO][:short_name]}",
|
24
|
+
'summary' => json_doc['dataset_description'].to_s.empty? ? json_doc['dataset_brief_description'] : json_doc['dataset_description'],
|
25
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
26
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
27
|
+
'temporal' => temporal_coverage_values['temporal'],
|
28
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
29
|
+
'last_revision_date' => json_doc['dataset_deployment_version_date'].to_s.empty? ? nil : Time.parse(json_doc['dataset_deployment_version_date']).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
30
|
+
'dataset_url' => json_doc['dataset_url'],
|
31
|
+
'source' => 'ADE',
|
32
|
+
'facet_spatial_coverage' => spatial_values[:global_facet],
|
33
|
+
'facet_spatial_scope' => spatial_values[:spatial_scope_facet],
|
34
|
+
'spatial_coverages' => spatial_values[:spatial_display],
|
35
|
+
'spatial_area' => spatial_values[:spatial_area],
|
36
|
+
'spatial' => spatial_values[:spatial_index],
|
37
|
+
'data_access_urls' => json_doc.key?('dataset_deployment_url') ? json_doc['dataset_deployment_url'] : [],
|
38
|
+
'authors' => parse_people(originators)
|
39
|
+
}
|
40
|
+
end
|
41
|
+
# rubocop:enable MethodLength
|
42
|
+
|
43
|
+
def translate_dataset_version(dataset_version)
|
44
|
+
version_translation = dataset_version.to_s.gsub(/\D/, '')
|
45
|
+
version_translation.empty? ? nil : version_translation
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_people(people_json)
|
49
|
+
people_json.map { |entry| entry['person_name'] } unless people_json.empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
def translate_geometry(wkt_geom)
|
53
|
+
wkt_geom['geometry'].sub! '<http://www.opengis.net/def/crs/OGC/1.3/CRS84> ', ''
|
54
|
+
# Consider all linestring and polygon geometries to be multipoint for this provider
|
55
|
+
wkt_geom['geometry'].sub! 'LINESTRING', 'MULTIPOINT'
|
56
|
+
wkt_geom['geometry'].sub! 'POLYGON', 'MULTIPOINT'
|
57
|
+
parser = RGeo::WKRep::WKTParser.new(nil, {})
|
58
|
+
geometry = parser.parse(wkt_geom['geometry'])
|
59
|
+
{
|
60
|
+
spatial_display: Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str([geometry]),
|
61
|
+
spatial_index: Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str([geometry]),
|
62
|
+
spatial_area: Helpers::TranslateSpatialCoverage.geojson_to_spatial_area([geometry]),
|
63
|
+
global_facet: Helpers::TranslateSpatialCoverage.geojson_to_global_facet([geometry]),
|
64
|
+
spatial_scope_facet: Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet([geometry])
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module SearchSolrTools
|
2
|
+
module Translators
|
3
|
+
# Translates an EOL THREDDS dataset link set into a SOLR json ingest record
|
4
|
+
class EolToSolr
|
5
|
+
# rubocop:disable Metrics/AbcSize
|
6
|
+
# rubocop:disable Metrics/MethodLength
|
7
|
+
|
8
|
+
def translate(title_metadata, dataset_metadata)
|
9
|
+
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages get_time_coverages(dataset_metadata)
|
10
|
+
rev_date = dataset_metadata.xpath('//xmlns:date[@type="metadataCreated"]').text
|
11
|
+
geospatial_coverage = parse_geospatial_coverages(dataset_metadata)
|
12
|
+
{
|
13
|
+
'title' => title_metadata.xpath('//xmlns:dataset').first['name'],
|
14
|
+
'authoritative_id' => title_metadata.xpath('//xmlns:dataset').first['ID'],
|
15
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name],
|
16
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:EOL][:short_name]}",
|
17
|
+
'summary' => dataset_metadata.xpath('//xmlns:documentation[@type="summary"]').text,
|
18
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
19
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
20
|
+
'temporal' => temporal_coverage_values['temporal'],
|
21
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
22
|
+
'last_revision_date' => rev_date.empty? ? Helpers::SolrFormat.date_str(DateTime.now) : Helpers::SolrFormat.date_str(rev_date),
|
23
|
+
'source' => 'ADE',
|
24
|
+
'keywords' => dataset_metadata.xpath('//xmlns:keyword').map(&:text),
|
25
|
+
'authors' => dataset_metadata.xpath('//xmlns:contributor[@role="author"]').map { |node| parse_eol_authors(node.text) }.join(', '),
|
26
|
+
'dataset_url' => eol_dataset_url(dataset_metadata),
|
27
|
+
'facet_spatial_coverage' => Helpers::BoundingBoxUtil.box_global?(geospatial_coverage),
|
28
|
+
'facet_spatial_scope' => Helpers::SolrFormat.get_spatial_scope_facet_with_bounding_box(geospatial_coverage),
|
29
|
+
'spatial_coverages' => %i(south west north east).map { |d| geospatial_coverage[d] }.join(' '),
|
30
|
+
'spatial_area' => spatial_coverage_to_spatial_area(geospatial_coverage),
|
31
|
+
'spatial' => %i(west south east north).map { |d| geospatial_coverage[d] }.join(' ')
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def eol_dataset_url(node)
|
36
|
+
node.xpath('//xmlns:documentation[@xlink:href]').each do |doc|
|
37
|
+
return doc['xlink:href'] if doc['xlink:href'].match('http://data.eol.ucar.edu/codiac/dss/id=(\S*)')
|
38
|
+
end
|
39
|
+
rescue Nokogiri::XML::XPath::SyntaxError
|
40
|
+
puts "Warning - no documentation URL found in the following node: #{node.to_html}"
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_eol_authors(author)
|
44
|
+
if author.include?(' AT ') && author.include?(' dot ')
|
45
|
+
author = author[0..author.rindex(',') - 1]
|
46
|
+
end
|
47
|
+
author
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_time_coverages(doc)
|
51
|
+
doc.xpath('//xmlns:timeCoverage').map do |node|
|
52
|
+
{ 'start' => node.xpath('./xmlns:start').text,
|
53
|
+
'end' => node.xpath('./xmlns:end').text }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def spatial_coverage_to_spatial_area(coverage)
|
58
|
+
return if [:north, :south].any? { |x| coverage[x].nil? }
|
59
|
+
coverage[:north].abs - coverage[:south].abs
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_geospatial_coverages(doc)
|
63
|
+
node = doc.xpath('//xmlns:geospatialCoverage')
|
64
|
+
south = node.xpath('./xmlns:northsouth/xmlns:start').text.to_f
|
65
|
+
north = south + (node.xpath('./xmlns:northsouth/xmlns:size').text.to_f)
|
66
|
+
west = node.xpath('./xmlns:eastwest/xmlns:start').text.to_f
|
67
|
+
east = west + (node.xpath('./xmlns:eastwest/xmlns:size').text.to_f)
|
68
|
+
# EOL uses out-of-range east-west values to represent bounding boxes
|
69
|
+
# that cross the date line. For any box with a value out of range,
|
70
|
+
# adjust the east/west value to lie within the -180 to 180 range.
|
71
|
+
east -= 360 if east > 180
|
72
|
+
west += 360 if west < -180
|
73
|
+
|
74
|
+
{ east: east, west: west, north: north, south: south }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
# rubocop:disable Metrics/ClassLength
|
2
|
+
require 'rgeo/geo_json'
|
3
|
+
|
4
|
+
require 'search_solr_tools'
|
5
|
+
|
6
|
+
module SearchSolrTools
|
7
|
+
module Translators
|
8
|
+
# Translates NSIDC JSON format to Solr JSON add format
|
9
|
+
class NsidcJsonToSolr
|
10
|
+
PARAMETER_PARTS = %w(category topic term variableLevel1 variableLevel2 variableLevel3 detailedVariable)
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength
|
13
|
+
# rubocop:disable Metrics/AbcSize
|
14
|
+
def translate(json_doc)
|
15
|
+
copy_keys = %w(title summary keywords brokered)
|
16
|
+
temporal_coverage_values = Helpers::TranslateTemporalCoverage.translate_coverages json_doc['temporalCoverages']
|
17
|
+
spatial_coverages = convert_spatial_coverages(json_doc['spatialCoverages'])
|
18
|
+
|
19
|
+
solr_add_hash = json_doc.select { |k, _v| copy_keys.include?(k) }
|
20
|
+
solr_add_hash.merge!(
|
21
|
+
'authoritative_id' => json_doc['authoritativeId'],
|
22
|
+
'dataset_version' => json_doc['majorVersion']['version'],
|
23
|
+
'data_centers' => Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name],
|
24
|
+
'facet_data_center' => "#{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:long_name]} | #{Helpers::SolrFormat::DATA_CENTER_NAMES[:NSIDC][:short_name]}",
|
25
|
+
'authors' => translate_personnel_and_creators_to_authors(json_doc['personnel'], generate_data_citation_creators(json_doc['dataCitation'])),
|
26
|
+
'topics' => translate_iso_topic_categories(json_doc['isoTopicCategories']),
|
27
|
+
'parameters' => translate_parameters(json_doc['parameters']),
|
28
|
+
'full_parameters' => translate_json_string(json_doc['parameters'], PARAMETER_PARTS),
|
29
|
+
'facet_parameter' => translate_parameters_to_facet_parameters(json_doc['parameters']),
|
30
|
+
'platforms' => translate_json_string(json_doc['platforms']),
|
31
|
+
'sensors' => translate_json_string(json_doc['instruments']),
|
32
|
+
'facet_sensor' => translate_sensor_to_facet_sensor(json_doc['instruments']),
|
33
|
+
'published_date' => (Helpers::SolrFormat.date_str json_doc['releaseDate']),
|
34
|
+
'spatial_coverages' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_display_str(spatial_coverages),
|
35
|
+
'spatial' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_index_str(spatial_coverages),
|
36
|
+
'spatial_area' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_area(spatial_coverages),
|
37
|
+
'facet_spatial_coverage' => Helpers::TranslateSpatialCoverage.geojson_to_global_facet(spatial_coverages),
|
38
|
+
'facet_spatial_scope' => Helpers::TranslateSpatialCoverage.geojson_to_spatial_scope_facet(spatial_coverages),
|
39
|
+
'temporal_coverages' => temporal_coverage_values['temporal_coverages'],
|
40
|
+
'temporal_duration' => temporal_coverage_values['temporal_duration'],
|
41
|
+
'temporal' => temporal_coverage_values['temporal'],
|
42
|
+
'facet_temporal_duration' => temporal_coverage_values['facet_temporal_duration'],
|
43
|
+
'last_revision_date' => (Helpers::SolrFormat.date_str json_doc['lastRevisionDate']),
|
44
|
+
'dataset_url' => json_doc['datasetUrl'],
|
45
|
+
'distribution_formats' => json_doc['distributionFormats'],
|
46
|
+
'facet_format' => (json_doc['distributionFormats'].empty?) ? [Helpers::SolrFormat::NOT_SPECIFIED] : translate_format_to_facet_format(json_doc['distributionFormats']),
|
47
|
+
'source' => %w(NSIDC ADE),
|
48
|
+
'popularity' => json_doc['popularity'],
|
49
|
+
'facet_sponsored_program' => translate_short_long_names_to_facet_value(json_doc['internalDataCenters']),
|
50
|
+
'facet_temporal_resolution' => translate_temporal_resolution_facet_values(json_doc['parameters']),
|
51
|
+
'facet_spatial_resolution' => translate_spatial_resolution_facet_values(json_doc['parameters'])
|
52
|
+
)
|
53
|
+
end
|
54
|
+
# rubocop:enable Metrics/MethodLength
|
55
|
+
# rubocop:enable Metrics/AbcSize
|
56
|
+
|
57
|
+
def convert_spatial_coverages(nsidc_geom)
|
58
|
+
geometries = []
|
59
|
+
nsidc_geom.each do |entry|
|
60
|
+
geometries << RGeo::GeoJSON.decode(entry['geom4326'])
|
61
|
+
end
|
62
|
+
geometries
|
63
|
+
end
|
64
|
+
|
65
|
+
def translate_sensor_to_facet_sensor(json)
|
66
|
+
facet_values = []
|
67
|
+
return facet_values if json.nil?
|
68
|
+
json.each do |json_entry|
|
69
|
+
sensor_bin = Helpers::SolrFormat.facet_binning('sensor', json_entry['shortName'].to_s)
|
70
|
+
if sensor_bin.eql? json_entry['shortName']
|
71
|
+
facet_values << "#{json_entry['longName']} | #{json_entry['shortName']}"
|
72
|
+
else
|
73
|
+
facet_values << " | #{sensor_bin}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
facet_values
|
77
|
+
end
|
78
|
+
|
79
|
+
def translate_temporal_resolution_facet_values(parameters_json)
|
80
|
+
temporal_resolutions = []
|
81
|
+
parameters_json.each do |param_json|
|
82
|
+
binned_temporal_res = Helpers::SolrFormat.resolution_value(param_json['temporalResolution'], :find_index_for_single_temporal_resolution_value, Helpers::SolrFormat::TEMPORAL_RESOLUTION_FACET_VALUES)
|
83
|
+
temporal_resolutions << binned_temporal_res unless binned_temporal_res.to_s.empty?
|
84
|
+
end
|
85
|
+
temporal_resolutions.flatten.uniq
|
86
|
+
end
|
87
|
+
|
88
|
+
def translate_spatial_resolution_facet_values(parameters_json)
|
89
|
+
spatial_resolutions = []
|
90
|
+
parameters_json.each do |param_json|
|
91
|
+
binned_res = Helpers::SolrFormat.resolution_value(param_json['spatialYResolution'], :find_index_for_single_spatial_resolution_value, Helpers::SolrFormat::SPATIAL_RESOLUTION_FACET_VALUES)
|
92
|
+
spatial_resolutions << binned_res unless binned_res.to_s.empty?
|
93
|
+
end
|
94
|
+
spatial_resolutions.flatten.uniq
|
95
|
+
end
|
96
|
+
|
97
|
+
def translate_iso_topic_categories(iso_topic_categories_json)
|
98
|
+
iso_topic_categories_json.map { |t| t['name'] } unless iso_topic_categories_json.nil?
|
99
|
+
end
|
100
|
+
|
101
|
+
def translate_short_long_names_to_facet_value(json)
|
102
|
+
facet_values = []
|
103
|
+
return facet_values if json.nil?
|
104
|
+
json.each do |json_entry|
|
105
|
+
long_name = json_entry['longName'].nil? ? '' : json_entry['longName']
|
106
|
+
short_name = json_entry['shortName'].nil? ? '' : json_entry['shortName']
|
107
|
+
|
108
|
+
facet_values << "#{long_name} | #{short_name}"
|
109
|
+
end
|
110
|
+
facet_values
|
111
|
+
end
|
112
|
+
|
113
|
+
def translate_personnel_and_creators_to_authors(personnel_json, creator_json)
|
114
|
+
author_set = (personnel_json.to_a | creator_json.to_a)
|
115
|
+
|
116
|
+
authors = author_set.map do |author|
|
117
|
+
first = author['firstName'].to_s
|
118
|
+
middle = author['middleName'].to_s
|
119
|
+
last = author['lastName'].to_s
|
120
|
+
|
121
|
+
full = [first, middle, last].reject(&:empty?)
|
122
|
+
full.join(' ').strip
|
123
|
+
end
|
124
|
+
|
125
|
+
authors.reject! do |author|
|
126
|
+
author.empty? || author == 'NSIDC User Services'
|
127
|
+
end
|
128
|
+
|
129
|
+
authors.uniq
|
130
|
+
end
|
131
|
+
|
132
|
+
def translate_parameters(parameters_json)
|
133
|
+
parameters = []
|
134
|
+
parameters_json.each do |param_json|
|
135
|
+
parameters.concat(generate_part_array(param_json, PARAMETER_PARTS))
|
136
|
+
end
|
137
|
+
parameters
|
138
|
+
end
|
139
|
+
|
140
|
+
def translate_parameters_to_facet_parameters(parameters_json)
|
141
|
+
parameters_strings = translate_json_string(parameters_json, PARAMETER_PARTS)
|
142
|
+
return [] if parameters_strings.nil?
|
143
|
+
facet_params = []
|
144
|
+
parameters_strings.each do |str|
|
145
|
+
facet_params << Helpers::SolrFormat.parameter_binning(str)
|
146
|
+
end
|
147
|
+
facet_params
|
148
|
+
end
|
149
|
+
|
150
|
+
def translate_format_to_facet_format(format_json)
|
151
|
+
return [] if format_json.nil?
|
152
|
+
|
153
|
+
facet_format = []
|
154
|
+
|
155
|
+
format_json.each do |format|
|
156
|
+
facet_format << Helpers::SolrFormat.facet_binning('format', format)
|
157
|
+
end
|
158
|
+
facet_format
|
159
|
+
end
|
160
|
+
|
161
|
+
def translate_json_string(json, limit_values = nil)
|
162
|
+
json_strings = []
|
163
|
+
|
164
|
+
json.each do |item|
|
165
|
+
json_string = generate_part_array(item, limit_values).join(' > ')
|
166
|
+
json_strings << json_string unless json_string.empty?
|
167
|
+
end
|
168
|
+
|
169
|
+
json_strings.uniq
|
170
|
+
end
|
171
|
+
|
172
|
+
def generate_data_citation_creators(data_citation)
|
173
|
+
data_citation.nil? ? creators = [] : creators = data_citation['creators']
|
174
|
+
creators
|
175
|
+
end
|
176
|
+
|
177
|
+
def generate_part_array(json, limit_values = nil)
|
178
|
+
parts = []
|
179
|
+
json = json.select { |k, _v| limit_values.include?(k) } unless limit_values.nil? || limit_values.empty?
|
180
|
+
|
181
|
+
json.each do |_k, v|
|
182
|
+
parts << v unless v.to_s.empty?
|
183
|
+
end
|
184
|
+
|
185
|
+
parts
|
186
|
+
end
|
187
|
+
end
|
188
|
+
# rubocop:enable Metrics/ClassLength
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'search_solr_tools/version'
|
3
|
+
|
4
|
+
# no need for tests or dev files like .rubocop.yml to be packaged with the gem
|
5
|
+
gem_files = %(CHANGELOG.md COPYING README.md bin/ lib/ search_solr_tools.gemspec)
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'search_solr_tools'
|
9
|
+
spec.version = SearchSolrTools::VERSION
|
10
|
+
spec.authors = ['Chris Chalstrom', 'Michael Brandt', 'Jonathan Kovarik', 'Luis Lopez', 'Stuart Reed']
|
11
|
+
spec.email = ['cchalstr@nsidc.org', 'mbrandt@colorado.edu', 'kovarik@nsidc.org', 'luis.lopezespinosa@colorado.edu', 'stuart.reed@colorado.edu']
|
12
|
+
spec.summary = 'Tools to harvest and manage various scientific dataset feeds in a Solr instance.'
|
13
|
+
spec.description = <<-EOF
|
14
|
+
Ruby translators to transform various metadata feeds into solr documents and
|
15
|
+
a command-line utility to access/utilize the gem's translators to harvest
|
16
|
+
metadata into a working solr instance.
|
17
|
+
EOF
|
18
|
+
spec.homepage = 'https://github.com/nsidc/search-solr-tools'
|
19
|
+
spec.license = 'GNU GPL Version 3'
|
20
|
+
|
21
|
+
spec.files = `git ls-files -z #{gem_files}`.split("\x0")
|
22
|
+
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
23
|
+
spec.test_files = spec.files.grep(/^(test|spec|features)\//)
|
24
|
+
spec.require_paths = ['lib']
|
25
|
+
|
26
|
+
spec.required_ruby_version = '~> 2.0'
|
27
|
+
|
28
|
+
spec.add_runtime_dependency 'iso8601', '~> 0.8'
|
29
|
+
spec.add_runtime_dependency 'multi_json', '~> 1.11'
|
30
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
31
|
+
spec.add_runtime_dependency 'require_all', '~> 1.3'
|
32
|
+
spec.add_runtime_dependency 'rest-client', '~> 1.6'
|
33
|
+
spec.add_runtime_dependency 'rgeo', '~> 0.3'
|
34
|
+
spec.add_runtime_dependency 'rgeo-geojson', '~> 0.3'
|
35
|
+
spec.add_runtime_dependency 'rsolr', '~> 1.0'
|
36
|
+
spec.add_runtime_dependency 'thor', '~> 0.18'
|
37
|
+
spec.add_development_dependency 'gem-release', '~> 0.7'
|
38
|
+
spec.add_development_dependency 'guard', '~> 2.12'
|
39
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.6'
|
40
|
+
spec.add_development_dependency 'guard-rubocop', '~> 1.2'
|
41
|
+
spec.add_development_dependency 'rake', '~> 10.4'
|
42
|
+
spec.add_development_dependency 'rspec', '~> 3.2'
|
43
|
+
spec.add_development_dependency 'rubocop', '~> 0.32'
|
44
|
+
spec.add_development_dependency 'webmock', '~> 1.13'
|
45
|
+
end
|
metadata
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: search_solr_tools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Chalstrom
|
8
|
+
- Michael Brandt
|
9
|
+
- Jonathan Kovarik
|
10
|
+
- Luis Lopez
|
11
|
+
- Stuart Reed
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
date: 2015-06-30 00:00:00.000000000 Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
18
|
+
name: iso8601
|
19
|
+
requirement: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - "~>"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '0.8'
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '0.8'
|
31
|
+
- !ruby/object:Gem::Dependency
|
32
|
+
name: multi_json
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - "~>"
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '1.11'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - "~>"
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '1.11'
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: nokogiri
|
47
|
+
requirement: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - "~>"
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '1.6'
|
52
|
+
type: :runtime
|
53
|
+
prerelease: false
|
54
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - "~>"
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '1.6'
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: require_all
|
61
|
+
requirement: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - "~>"
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '1.3'
|
66
|
+
type: :runtime
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - "~>"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '1.3'
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: rest-client
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '1.6'
|
80
|
+
type: :runtime
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - "~>"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '1.6'
|
87
|
+
- !ruby/object:Gem::Dependency
|
88
|
+
name: rgeo
|
89
|
+
requirement: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0.3'
|
94
|
+
type: :runtime
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - "~>"
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0.3'
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: rgeo-geojson
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - "~>"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0.3'
|
108
|
+
type: :runtime
|
109
|
+
prerelease: false
|
110
|
+
version_requirements: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - "~>"
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0.3'
|
115
|
+
- !ruby/object:Gem::Dependency
|
116
|
+
name: rsolr
|
117
|
+
requirement: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - "~>"
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '1.0'
|
122
|
+
type: :runtime
|
123
|
+
prerelease: false
|
124
|
+
version_requirements: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - "~>"
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '1.0'
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
name: thor
|
131
|
+
requirement: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - "~>"
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0.18'
|
136
|
+
type: :runtime
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - "~>"
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0.18'
|
143
|
+
- !ruby/object:Gem::Dependency
|
144
|
+
name: gem-release
|
145
|
+
requirement: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - "~>"
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0.7'
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
requirements:
|
154
|
+
- - "~>"
|
155
|
+
- !ruby/object:Gem::Version
|
156
|
+
version: '0.7'
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
name: guard
|
159
|
+
requirement: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - "~>"
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '2.12'
|
164
|
+
type: :development
|
165
|
+
prerelease: false
|
166
|
+
version_requirements: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - "~>"
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '2.12'
|
171
|
+
- !ruby/object:Gem::Dependency
|
172
|
+
name: guard-rspec
|
173
|
+
requirement: !ruby/object:Gem::Requirement
|
174
|
+
requirements:
|
175
|
+
- - "~>"
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: '4.6'
|
178
|
+
type: :development
|
179
|
+
prerelease: false
|
180
|
+
version_requirements: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - "~>"
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '4.6'
|
185
|
+
- !ruby/object:Gem::Dependency
|
186
|
+
name: guard-rubocop
|
187
|
+
requirement: !ruby/object:Gem::Requirement
|
188
|
+
requirements:
|
189
|
+
- - "~>"
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '1.2'
|
192
|
+
type: :development
|
193
|
+
prerelease: false
|
194
|
+
version_requirements: !ruby/object:Gem::Requirement
|
195
|
+
requirements:
|
196
|
+
- - "~>"
|
197
|
+
- !ruby/object:Gem::Version
|
198
|
+
version: '1.2'
|
199
|
+
- !ruby/object:Gem::Dependency
|
200
|
+
name: rake
|
201
|
+
requirement: !ruby/object:Gem::Requirement
|
202
|
+
requirements:
|
203
|
+
- - "~>"
|
204
|
+
- !ruby/object:Gem::Version
|
205
|
+
version: '10.4'
|
206
|
+
type: :development
|
207
|
+
prerelease: false
|
208
|
+
version_requirements: !ruby/object:Gem::Requirement
|
209
|
+
requirements:
|
210
|
+
- - "~>"
|
211
|
+
- !ruby/object:Gem::Version
|
212
|
+
version: '10.4'
|
213
|
+
- !ruby/object:Gem::Dependency
|
214
|
+
name: rspec
|
215
|
+
requirement: !ruby/object:Gem::Requirement
|
216
|
+
requirements:
|
217
|
+
- - "~>"
|
218
|
+
- !ruby/object:Gem::Version
|
219
|
+
version: '3.2'
|
220
|
+
type: :development
|
221
|
+
prerelease: false
|
222
|
+
version_requirements: !ruby/object:Gem::Requirement
|
223
|
+
requirements:
|
224
|
+
- - "~>"
|
225
|
+
- !ruby/object:Gem::Version
|
226
|
+
version: '3.2'
|
227
|
+
- !ruby/object:Gem::Dependency
|
228
|
+
name: rubocop
|
229
|
+
requirement: !ruby/object:Gem::Requirement
|
230
|
+
requirements:
|
231
|
+
- - "~>"
|
232
|
+
- !ruby/object:Gem::Version
|
233
|
+
version: '0.32'
|
234
|
+
type: :development
|
235
|
+
prerelease: false
|
236
|
+
version_requirements: !ruby/object:Gem::Requirement
|
237
|
+
requirements:
|
238
|
+
- - "~>"
|
239
|
+
- !ruby/object:Gem::Version
|
240
|
+
version: '0.32'
|
241
|
+
- !ruby/object:Gem::Dependency
|
242
|
+
name: webmock
|
243
|
+
requirement: !ruby/object:Gem::Requirement
|
244
|
+
requirements:
|
245
|
+
- - "~>"
|
246
|
+
- !ruby/object:Gem::Version
|
247
|
+
version: '1.13'
|
248
|
+
type: :development
|
249
|
+
prerelease: false
|
250
|
+
version_requirements: !ruby/object:Gem::Requirement
|
251
|
+
requirements:
|
252
|
+
- - "~>"
|
253
|
+
- !ruby/object:Gem::Version
|
254
|
+
version: '1.13'
|
255
|
+
description: |2
|
256
|
+
Ruby translators to transform various metadata feeds into solr documents and
|
257
|
+
a command-line utility to access/utilize the gem's translators to harvest
|
258
|
+
metadata into a working solr instance.
|
259
|
+
email:
|
260
|
+
- cchalstr@nsidc.org
|
261
|
+
- mbrandt@colorado.edu
|
262
|
+
- kovarik@nsidc.org
|
263
|
+
- luis.lopezespinosa@colorado.edu
|
264
|
+
- stuart.reed@colorado.edu
|
265
|
+
executables:
|
266
|
+
- search_solr_tools
|
267
|
+
extensions: []
|
268
|
+
extra_rdoc_files: []
|
269
|
+
files:
|
270
|
+
- CHANGELOG.md
|
271
|
+
- COPYING
|
272
|
+
- README.md
|
273
|
+
- bin/search_solr_tools
|
274
|
+
- lib/search_solr_tools.rb
|
275
|
+
- lib/search_solr_tools/config/environments.rb
|
276
|
+
- lib/search_solr_tools/config/environments.yaml
|
277
|
+
- lib/search_solr_tools/harvesters/ade_auto_suggest.rb
|
278
|
+
- lib/search_solr_tools/harvesters/auto_suggest.rb
|
279
|
+
- lib/search_solr_tools/harvesters/base.rb
|
280
|
+
- lib/search_solr_tools/harvesters/bcodmo.rb
|
281
|
+
- lib/search_solr_tools/harvesters/cisl.rb
|
282
|
+
- lib/search_solr_tools/harvesters/echo.rb
|
283
|
+
- lib/search_solr_tools/harvesters/eol.rb
|
284
|
+
- lib/search_solr_tools/harvesters/ices.rb
|
285
|
+
- lib/search_solr_tools/harvesters/nmi.rb
|
286
|
+
- lib/search_solr_tools/harvesters/nodc.rb
|
287
|
+
- lib/search_solr_tools/harvesters/nsidc_auto_suggest.rb
|
288
|
+
- lib/search_solr_tools/harvesters/nsidc_json.rb
|
289
|
+
- lib/search_solr_tools/harvesters/oai.rb
|
290
|
+
- lib/search_solr_tools/harvesters/pdc.rb
|
291
|
+
- lib/search_solr_tools/harvesters/rda.rb
|
292
|
+
- lib/search_solr_tools/harvesters/tdar.rb
|
293
|
+
- lib/search_solr_tools/harvesters/usgs.rb
|
294
|
+
- lib/search_solr_tools/helpers/bounding_box_util.rb
|
295
|
+
- lib/search_solr_tools/helpers/csw_iso_query_builder.rb
|
296
|
+
- lib/search_solr_tools/helpers/facet_configuration.rb
|
297
|
+
- lib/search_solr_tools/helpers/iso_namespaces.rb
|
298
|
+
- lib/search_solr_tools/helpers/iso_to_solr.rb
|
299
|
+
- lib/search_solr_tools/helpers/iso_to_solr_format.rb
|
300
|
+
- lib/search_solr_tools/helpers/query_builder.rb
|
301
|
+
- lib/search_solr_tools/helpers/selectors.rb
|
302
|
+
- lib/search_solr_tools/helpers/solr_format.rb
|
303
|
+
- lib/search_solr_tools/helpers/tdar_format.rb
|
304
|
+
- lib/search_solr_tools/helpers/translate_spatial_coverage.rb
|
305
|
+
- lib/search_solr_tools/helpers/translate_temporal_coverage.rb
|
306
|
+
- lib/search_solr_tools/helpers/usgs_format.rb
|
307
|
+
- lib/search_solr_tools/selectors/cisl.rb
|
308
|
+
- lib/search_solr_tools/selectors/echo_iso.rb
|
309
|
+
- lib/search_solr_tools/selectors/ices_iso.rb
|
310
|
+
- lib/search_solr_tools/selectors/nmi.rb
|
311
|
+
- lib/search_solr_tools/selectors/nodc_iso.rb
|
312
|
+
- lib/search_solr_tools/selectors/pdc_iso.rb
|
313
|
+
- lib/search_solr_tools/selectors/rda.rb
|
314
|
+
- lib/search_solr_tools/selectors/tdar_opensearch.rb
|
315
|
+
- lib/search_solr_tools/selectors/usgs_iso.rb
|
316
|
+
- lib/search_solr_tools/translators/bcodmo_json.rb
|
317
|
+
- lib/search_solr_tools/translators/eol_to_solr.rb
|
318
|
+
- lib/search_solr_tools/translators/nsidc_json.rb
|
319
|
+
- lib/search_solr_tools/version.rb
|
320
|
+
- search_solr_tools.gemspec
|
321
|
+
homepage: https://github.com/nsidc/search-solr-tools
|
322
|
+
licenses:
|
323
|
+
- GNU GPL Version 3
|
324
|
+
metadata: {}
|
325
|
+
post_install_message:
|
326
|
+
rdoc_options: []
|
327
|
+
require_paths:
|
328
|
+
- lib
|
329
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
330
|
+
requirements:
|
331
|
+
- - "~>"
|
332
|
+
- !ruby/object:Gem::Version
|
333
|
+
version: '2.0'
|
334
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
335
|
+
requirements:
|
336
|
+
- - ">="
|
337
|
+
- !ruby/object:Gem::Version
|
338
|
+
version: '0'
|
339
|
+
requirements: []
|
340
|
+
rubyforge_project:
|
341
|
+
rubygems_version: 2.4.8
|
342
|
+
signing_key:
|
343
|
+
specification_version: 4
|
344
|
+
summary: Tools to harvest and manage various scientific dataset feeds in a Solr instance.
|
345
|
+
test_files: []
|