geo_combine 0.7.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +7 -16
- data/.gitignore +1 -0
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +34 -36
- data/README.md +47 -22
- data/geo_combine.gemspec +2 -0
- data/lib/geo_combine/ckan_metadata.rb +5 -4
- data/lib/geo_combine/formatting.rb +1 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
- data/lib/geo_combine/geoblacklight.rb +1 -1
- data/lib/geo_combine/harvester.rb +132 -0
- data/lib/geo_combine/indexer.rb +126 -0
- data/lib/geo_combine/logger.rb +16 -0
- data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
- data/lib/geo_combine/ogp.rb +1 -1
- data/lib/geo_combine/railtie.rb +1 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +3 -0
- data/lib/tasks/geo_combine.rake +10 -65
- data/spec/fixtures/docs/full_geoblacklight.json +8 -1
- data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
- data/spec/fixtures/indexing/aardvark.json +57 -0
- data/spec/fixtures/json_docs.rb +6 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
- data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
- data/spec/lib/geo_combine/harvester_spec.rb +133 -0
- data/spec/lib/geo_combine/indexer_spec.rb +134 -0
- data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
- data/spec/lib/geo_combine_spec.rb +20 -17
- data/spec/spec_helper.rb +1 -2
- metadata +46 -9
- data/bin/geocombine +0 -6
- data/lib/geo_combine/cli.rb +0 -27
- data/spec/lib/tasks/geo_combine_spec.rb +0 -45
@@ -5,8 +5,9 @@ require 'spec_helper'
|
|
5
5
|
require 'rsolr'
|
6
6
|
|
7
7
|
RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
8
|
-
subject(:harvester) { described_class.new(site_key) }
|
8
|
+
subject(:harvester) { described_class.new(site_key, logger:) }
|
9
9
|
|
10
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
10
11
|
let(:site_key) { :INSTITUTION }
|
11
12
|
let(:stub_json_response) { '{}' }
|
12
13
|
let(:stub_solr_connection) { double('RSolr::Connection') }
|
@@ -40,7 +41,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
40
41
|
|
41
42
|
let(:docs) { [{ layer_slug_s: 'abc-123' }, { layer_slug_s: 'abc-321' }] }
|
42
43
|
let(:stub_json_response) do
|
43
|
-
{ response: { docs
|
44
|
+
{ response: { docs:, pages: { current_page: 1, total_pages: 1 } } }.to_json
|
44
45
|
end
|
45
46
|
|
46
47
|
it 'adds documents returned to solr' do
|
@@ -142,7 +143,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
142
143
|
).and_return(stub_second_response.to_json)
|
143
144
|
base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
|
144
145
|
docs = described_class::LegacyBlacklightResponse.new(response: stub_first_response,
|
145
|
-
base_url:
|
146
|
+
base_url:).documents
|
146
147
|
|
147
148
|
expect(docs.to_a).to eq([first_docs, second_docs])
|
148
149
|
end
|
@@ -182,7 +183,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
182
183
|
|
183
184
|
base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
|
184
185
|
docs = described_class::ModernBlacklightResponse.new(response: first_results_response,
|
185
|
-
base_url:
|
186
|
+
base_url:).documents
|
186
187
|
|
187
188
|
expect(docs.to_a).to eq([
|
188
189
|
[{ 'layer_slug_s' => 'abc-123' }, { 'layer_slug_s' => 'abc-321' }],
|
@@ -151,8 +151,8 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
151
151
|
let(:unparseable_json) do
|
152
152
|
<<-JSON
|
153
153
|
{
|
154
|
-
|
155
|
-
|
154
|
+
"http://schema.org/url":"http://example.com/abc123",,
|
155
|
+
"http://schema.org/downloadUrl":"http://example.com/abc123/data.zip"
|
156
156
|
}
|
157
157
|
JSON
|
158
158
|
end
|
@@ -184,7 +184,7 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
184
184
|
|
185
185
|
describe 'spatial_validate!' do
|
186
186
|
context 'when valid' do
|
187
|
-
it { full_geobl.spatial_validate! }
|
187
|
+
it { expect { full_geobl.spatial_validate! }.not_to raise_error }
|
188
188
|
end
|
189
189
|
|
190
190
|
context 'when invalid' do
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'git'
|
4
|
+
require 'geo_combine/harvester'
|
5
|
+
require 'spec_helper'
|
6
|
+
|
7
|
+
RSpec.describe GeoCombine::Harvester do
|
8
|
+
subject(:harvester) { described_class.new(ogm_path: 'spec/fixtures/indexing', logger:) }
|
9
|
+
|
10
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
11
|
+
let(:repo_name) { 'my-institution' }
|
12
|
+
let(:repo_path) { File.join(harvester.ogm_path, repo_name) }
|
13
|
+
let(:repo_url) { "https://github.com/OpenGeoMetadata/#{repo_name}.git" }
|
14
|
+
let(:stub_repo) { instance_double(Git::Base) }
|
15
|
+
let(:stub_gh_api) do
|
16
|
+
[
|
17
|
+
{ name: repo_name, size: 100 },
|
18
|
+
{ name: 'another-institution', size: 100 },
|
19
|
+
{ name: 'outdated-institution', size: 100, archived: true }, # archived
|
20
|
+
{ name: 'aardvark', size: 300 }, # on denylist
|
21
|
+
{ name: 'empty', size: 0 } # no data
|
22
|
+
]
|
23
|
+
end
|
24
|
+
|
25
|
+
before do
|
26
|
+
# stub github API requests
|
27
|
+
# use the whole org response, or just a portion for particular repos
|
28
|
+
allow(Net::HTTP).to receive(:get) do |uri|
|
29
|
+
if uri == described_class.ogm_api_uri
|
30
|
+
stub_gh_api.to_json
|
31
|
+
else
|
32
|
+
repo_name = uri.path.split('/').last.gsub('.git', '')
|
33
|
+
stub_gh_api.find { |repo| repo[:name] == repo_name }.to_json
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# stub git commands
|
38
|
+
allow(Git).to receive_messages(open: stub_repo, clone: stub_repo)
|
39
|
+
allow(stub_repo).to receive(:pull).and_return(stub_repo)
|
40
|
+
end
|
41
|
+
|
42
|
+
describe '#docs_to_index' do
|
43
|
+
it 'yields each JSON record with its path, skipping layers.JSON' do
|
44
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
45
|
+
[JSON.parse(File.read('spec/fixtures/indexing/basic_geoblacklight.json')), 'spec/fixtures/indexing/basic_geoblacklight.json'],
|
46
|
+
[JSON.parse(File.read('spec/fixtures/indexing/geoblacklight.json')), 'spec/fixtures/indexing/geoblacklight.json']
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'skips records with a different schema version' do
|
51
|
+
harvester = described_class.new(ogm_path: 'spec/fixtures/indexing/', schema_version: 'Aardvark', logger:)
|
52
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
53
|
+
[JSON.parse(File.read('spec/fixtures/indexing/aardvark.json')), 'spec/fixtures/indexing/aardvark.json']
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#pull' do
|
59
|
+
it 'can pull a single repository' do
|
60
|
+
harvester.pull(repo_name)
|
61
|
+
expect(Git).to have_received(:open).with(repo_path)
|
62
|
+
expect(stub_repo).to have_received(:pull)
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'clones a repo before pulling if it does not exist' do
|
66
|
+
harvester.pull(repo_name)
|
67
|
+
expect(Git).to have_received(:clone)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '#pull_all' do
|
72
|
+
it 'can pull all repositories' do
|
73
|
+
harvester.pull_all
|
74
|
+
expect(Git).to have_received(:open).exactly(2).times
|
75
|
+
expect(stub_repo).to have_received(:pull).exactly(2).times
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'returns the names of repositories pulled' do
|
79
|
+
expect(harvester.pull_all).to eq(%w[my-institution another-institution])
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'skips repositories in the denylist' do
|
83
|
+
harvester.pull_all
|
84
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'skips archived repositories' do
|
88
|
+
harvester.pull_all
|
89
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/outdated-institution.git')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#clone' do
|
94
|
+
it 'can clone a single repository' do
|
95
|
+
harvester.clone(repo_name)
|
96
|
+
expect(Git).to have_received(:clone).with(
|
97
|
+
repo_url,
|
98
|
+
nil, {
|
99
|
+
depth: 1, # shallow clone
|
100
|
+
path: harvester.ogm_path
|
101
|
+
}
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'skips repositories that already exist' do
|
106
|
+
allow(File).to receive(:directory?).with(repo_path).and_return(true)
|
107
|
+
harvester.clone(repo_name)
|
108
|
+
expect(Git).not_to have_received(:clone)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe '#clone_all' do
|
113
|
+
it 'can clone all repositories' do
|
114
|
+
harvester.clone_all
|
115
|
+
expect(Git).to have_received(:clone).exactly(2).times
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'skips repositories in the denylist' do
|
119
|
+
harvester.clone_all
|
120
|
+
expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'returns the names of repositories cloned' do
|
124
|
+
expect(harvester.clone_all).to eq(%w[my-institution another-institution])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe '#ogm_api_uri' do
|
129
|
+
it 'includes a per_page param' do
|
130
|
+
expect(described_class.send('ogm_api_uri').to_s).to include('per_page')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'geo_combine/indexer'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
# Mock an available Blacklight installation
|
7
|
+
class FakeBlacklight
|
8
|
+
def self.default_index
|
9
|
+
Repository
|
10
|
+
end
|
11
|
+
|
12
|
+
class Repository
|
13
|
+
def self.connection; end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
RSpec.describe GeoCombine::Indexer do
|
18
|
+
subject(:indexer) { described_class.new(solr:, logger:) }
|
19
|
+
|
20
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
21
|
+
let(:solr) { instance_double(RSolr::Client, options: { url: 'TEST' }) }
|
22
|
+
let(:docs) do
|
23
|
+
[
|
24
|
+
[{ 'id' => '1' }, 'path/to/record1.json'], # v1.0 schema
|
25
|
+
[{ 'dc_identifier_s' => '2' }, 'path/to/record2.json'] # aardvark schema
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
before do
|
30
|
+
allow(solr).to receive(:update)
|
31
|
+
allow(solr).to receive(:commit)
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#initialize' do
|
35
|
+
before do
|
36
|
+
allow(RSolr).to receive(:connect).and_return(solr)
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'when solr url is set in the environment' do
|
40
|
+
before do
|
41
|
+
stub_const('ENV', 'SOLR_URL' => 'http://localhost:8983/solr/geoblacklight')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'connects to the solr instance' do
|
45
|
+
described_class.new(logger:)
|
46
|
+
expect(RSolr).to have_received(:connect).with(
|
47
|
+
be_a(Faraday::Connection),
|
48
|
+
url: 'http://localhost:8983/solr/geoblacklight'
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'when there is a configured Blacklight connection' do
|
54
|
+
before do
|
55
|
+
stub_const('Blacklight', FakeBlacklight)
|
56
|
+
allow(FakeBlacklight::Repository).to receive(:connection).and_return(
|
57
|
+
instance_double(RSolr::Client, base_uri: URI('http://localhost:8983/solr/geoblacklight'))
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'connects to the solr instance' do
|
62
|
+
described_class.new(logger:)
|
63
|
+
expect(RSolr).to have_received(:connect).with(
|
64
|
+
be_a(Faraday::Connection),
|
65
|
+
url: 'http://localhost:8983/solr/geoblacklight'
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'when solr url is not set' do
|
71
|
+
before do
|
72
|
+
stub_const('ENV', {})
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'falls back to the Blacklight default' do
|
76
|
+
described_class.new(logger:)
|
77
|
+
expect(RSolr).to have_received(:connect).with(
|
78
|
+
be_a(Faraday::Connection),
|
79
|
+
url: 'http://localhost:8983/solr/blacklight-core'
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
describe '#index' do
|
86
|
+
let(:solr_error_msg) { { error: { msg: 'error message' } }.to_json }
|
87
|
+
let(:solr_response) { { status: '400', body: solr_error_msg } }
|
88
|
+
let(:error) { RSolr::Error::Http.new({ uri: URI('') }, solr_response) }
|
89
|
+
|
90
|
+
it 'sends records in batches to solr' do
|
91
|
+
indexer.index(docs)
|
92
|
+
expect(solr).to have_received(:update).with(
|
93
|
+
data: "{ add: { doc: {\"id\":\"1\"} },\nadd: { doc: {\"dc_identifier_s\":\"2\"} } }",
|
94
|
+
headers: { 'Content-Type' => 'application/json' },
|
95
|
+
params: { overwrite: true }
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'commits changes to solr after indexing' do
|
100
|
+
indexer.index(docs)
|
101
|
+
expect(solr).to have_received(:commit).once
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'returns the count of records successfully indexed' do
|
105
|
+
expect(indexer.index(docs)).to eq 2
|
106
|
+
end
|
107
|
+
|
108
|
+
context 'when an error occurs during batch indexing' do
|
109
|
+
before do
|
110
|
+
allow(solr).to receive(:update).and_raise(error)
|
111
|
+
allow(solr).to receive(:add)
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'attempts to index records individually' do
|
115
|
+
total = indexer.index(docs)
|
116
|
+
expect(solr).to have_received(:add).twice
|
117
|
+
expect(total).to eq 2
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
context 'when an error occurs during individual indexing' do
|
122
|
+
before do
|
123
|
+
allow(solr).to receive(:update).and_raise(error)
|
124
|
+
allow(solr).to receive(:add).with(docs[0][0], anything).and_raise(error)
|
125
|
+
allow(solr).to receive(:add).with(docs[1][0], anything)
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'continues indexing' do
|
129
|
+
total = indexer.index(docs)
|
130
|
+
expect(total).to eq 1
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
|
6
|
+
include JsonDocs
|
7
|
+
|
8
|
+
describe '#run' do
|
9
|
+
it 'migrates fields to new names and types' do
|
10
|
+
input_hash = JSON.parse(full_geoblacklight)
|
11
|
+
expected_output = JSON.parse(full_geoblacklight_aardvark)
|
12
|
+
expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'removes deprecated fields' do
|
16
|
+
input_hash = JSON.parse(full_geoblacklight)
|
17
|
+
output = described_class.new(v1_hash: input_hash).run
|
18
|
+
expect(output.keys).not_to include(described_class::SCHEMA_FIELD_MAP.keys)
|
19
|
+
expect(output.keys).not_to include('dc_type_s')
|
20
|
+
expect(output.keys).not_to include('layer_geom_type_s')
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'leaves custom fields unchanged' do
|
24
|
+
input_hash = JSON.parse(full_geoblacklight)
|
25
|
+
input_hash['custom_field'] = 'custom_value'
|
26
|
+
output = described_class.new(v1_hash: input_hash).run
|
27
|
+
expect(output['custom_field']).to eq('custom_value')
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'when the given record is already in aardvark schema' do
|
31
|
+
it 'returns the record unchanged' do
|
32
|
+
input_hash = JSON.parse(full_geoblacklight_aardvark)
|
33
|
+
expect(described_class.new(v1_hash: input_hash).run).to eq(input_hash)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'when the user supplies a mapping for collection names to ids' do
|
38
|
+
it 'converts the collection names to ids' do
|
39
|
+
input_hash = JSON.parse(full_geoblacklight)
|
40
|
+
collection_id_map = { 'Uganda GIS Maps and Data, 2000-2010' => 'stanford-rb371kw9607' }
|
41
|
+
output = described_class.new(v1_hash: input_hash, collection_id_map:).run
|
42
|
+
expect(output['dct_isPartOf_sm']).to eq(['stanford-rb371kw9607'])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -2,25 +2,28 @@
|
|
2
2
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
|
-
RSpec.describe GeoCombine
|
5
|
+
RSpec.describe GeoCombine do
|
6
6
|
include XmlDocs
|
7
|
-
describe '#initialize' do
|
8
|
-
it 'reads metadata from file if File is readable' do
|
9
|
-
expect(File).to receive(:readable?).and_return(true)
|
10
|
-
expect(File).to receive(:read).and_return(simple_xml)
|
11
|
-
metadata_object = described_class.new('./tmp/fake/file/location')
|
12
|
-
expect(metadata_object).to be_an described_class
|
13
|
-
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
14
|
-
expect(metadata_object.metadata.css('Author').count).to eq 2
|
15
|
-
end
|
16
7
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
describe GeoCombine::Metadata do
|
9
|
+
describe '#initialize' do
|
10
|
+
it 'reads metadata from file if File is readable' do
|
11
|
+
expect(File).to receive(:readable?).and_return(true)
|
12
|
+
expect(File).to receive(:read).and_return(simple_xml)
|
13
|
+
metadata_object = described_class.new('./tmp/fake/file/location')
|
14
|
+
expect(metadata_object).to be_an described_class
|
15
|
+
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
16
|
+
expect(metadata_object.metadata.css('Author').count).to eq 2
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'reads metadata from parameter if File is not readable' do
|
20
|
+
metadata_object = described_class.new(simple_xml)
|
21
|
+
expect(metadata_object).to be_an described_class
|
22
|
+
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
23
|
+
expect(metadata_object.metadata.css('Author').count).to eq 2
|
24
|
+
end
|
22
25
|
end
|
26
|
+
# GeoCombine subclasses should individually test `to_geoblacklight` and
|
27
|
+
# `to_html` methods
|
23
28
|
end
|
24
|
-
# GeoCombine subclasses should individually test `to_geoblacklight` and
|
25
|
-
# `to_html` methods
|
26
29
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,6 @@ SimpleCov.start 'rails' do
|
|
7
7
|
add_filter 'lib/tasks/geo_combine.rake'
|
8
8
|
add_filter 'lib/geo_combine/version.rb'
|
9
9
|
add_filter 'lib/geo_combine/railtie.rb'
|
10
|
-
add_filter 'lib/geo_combine/cli.rb'
|
11
10
|
minimum_coverage 95 # When updating this value, update the README badge value
|
12
11
|
end
|
13
12
|
|
@@ -23,7 +22,7 @@ require 'webmock/rspec'
|
|
23
22
|
WebMock.allow_net_connect!
|
24
23
|
|
25
24
|
# include the spec support files
|
26
|
-
Dir['./spec/support/**/*.rb'].
|
25
|
+
Dir['./spec/support/**/*.rb'].each { |f| require f }
|
27
26
|
|
28
27
|
RSpec.configure do |config|
|
29
28
|
config.include Helpers
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geo_combine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Reed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -108,6 +108,34 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: git
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: faraday-retry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '2.2'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '2.2'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: bundler
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -237,8 +265,7 @@ dependencies:
|
|
237
265
|
description: A Ruby toolkit for managing geospatial metadata
|
238
266
|
email:
|
239
267
|
- pjreed@stanford.edu
|
240
|
-
executables:
|
241
|
-
- geocombine
|
268
|
+
executables: []
|
242
269
|
extensions: []
|
243
270
|
extra_rdoc_files: []
|
244
271
|
files:
|
@@ -251,12 +278,10 @@ files:
|
|
251
278
|
- LICENSE.txt
|
252
279
|
- README.md
|
253
280
|
- Rakefile
|
254
|
-
- bin/geocombine
|
255
281
|
- geo_combine.gemspec
|
256
282
|
- lib/geo_combine.rb
|
257
283
|
- lib/geo_combine/bounding_box.rb
|
258
284
|
- lib/geo_combine/ckan_metadata.rb
|
259
|
-
- lib/geo_combine/cli.rb
|
260
285
|
- lib/geo_combine/esri_open_data.rb
|
261
286
|
- lib/geo_combine/exceptions.rb
|
262
287
|
- lib/geo_combine/fgdc.rb
|
@@ -265,7 +290,11 @@ files:
|
|
265
290
|
- lib/geo_combine/geo_blacklight_harvester.rb
|
266
291
|
- lib/geo_combine/geoblacklight.rb
|
267
292
|
- lib/geo_combine/geometry_types.rb
|
293
|
+
- lib/geo_combine/harvester.rb
|
294
|
+
- lib/geo_combine/indexer.rb
|
268
295
|
- lib/geo_combine/iso19139.rb
|
296
|
+
- lib/geo_combine/logger.rb
|
297
|
+
- lib/geo_combine/migrators/v1_aardvark_migrator.rb
|
269
298
|
- lib/geo_combine/ogp.rb
|
270
299
|
- lib/geo_combine/railtie.rb
|
271
300
|
- lib/geo_combine/subjects.rb
|
@@ -294,6 +323,7 @@ files:
|
|
294
323
|
- spec/fixtures/docs/ckan.json
|
295
324
|
- spec/fixtures/docs/esri_open_data.json
|
296
325
|
- spec/fixtures/docs/full_geoblacklight.json
|
326
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
297
327
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
298
328
|
- spec/fixtures/docs/ogp_harvard_line.json
|
299
329
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -304,6 +334,7 @@ files:
|
|
304
334
|
- spec/fixtures/docs/simple_xslt.xsl
|
305
335
|
- spec/fixtures/docs/stanford_iso.xml
|
306
336
|
- spec/fixtures/docs/tufts_fgdc.xml
|
337
|
+
- spec/fixtures/indexing/aardvark.json
|
307
338
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
308
339
|
- spec/fixtures/indexing/geoblacklight.json
|
309
340
|
- spec/fixtures/indexing/layers.json
|
@@ -318,10 +349,12 @@ files:
|
|
318
349
|
- spec/lib/geo_combine/formatting_spec.rb
|
319
350
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
320
351
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
352
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
353
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
321
354
|
- spec/lib/geo_combine/iso19139_spec.rb
|
355
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
322
356
|
- spec/lib/geo_combine/ogp_spec.rb
|
323
357
|
- spec/lib/geo_combine_spec.rb
|
324
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
325
358
|
- spec/spec_helper.rb
|
326
359
|
- spec/support/fixtures.rb
|
327
360
|
homepage: ''
|
@@ -343,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
343
376
|
- !ruby/object:Gem::Version
|
344
377
|
version: '0'
|
345
378
|
requirements: []
|
346
|
-
rubygems_version: 3.
|
379
|
+
rubygems_version: 3.3.7
|
347
380
|
signing_key:
|
348
381
|
specification_version: 4
|
349
382
|
summary: A Ruby toolkit for managing geospatial metadata
|
@@ -354,6 +387,7 @@ test_files:
|
|
354
387
|
- spec/fixtures/docs/ckan.json
|
355
388
|
- spec/fixtures/docs/esri_open_data.json
|
356
389
|
- spec/fixtures/docs/full_geoblacklight.json
|
390
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
357
391
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
358
392
|
- spec/fixtures/docs/ogp_harvard_line.json
|
359
393
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -364,6 +398,7 @@ test_files:
|
|
364
398
|
- spec/fixtures/docs/simple_xslt.xsl
|
365
399
|
- spec/fixtures/docs/stanford_iso.xml
|
366
400
|
- spec/fixtures/docs/tufts_fgdc.xml
|
401
|
+
- spec/fixtures/indexing/aardvark.json
|
367
402
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
368
403
|
- spec/fixtures/indexing/geoblacklight.json
|
369
404
|
- spec/fixtures/indexing/layers.json
|
@@ -378,9 +413,11 @@ test_files:
|
|
378
413
|
- spec/lib/geo_combine/formatting_spec.rb
|
379
414
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
380
415
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
416
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
417
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
381
418
|
- spec/lib/geo_combine/iso19139_spec.rb
|
419
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
382
420
|
- spec/lib/geo_combine/ogp_spec.rb
|
383
421
|
- spec/lib/geo_combine_spec.rb
|
384
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
385
422
|
- spec/spec_helper.rb
|
386
423
|
- spec/support/fixtures.rb
|
data/bin/geocombine
DELETED
data/lib/geo_combine/cli.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
root = Gem::Specification.find_by_name('geo_combine').gem_dir
|
7
|
-
tasks = File.join(root, 'lib/tasks/*.rake')
|
8
|
-
Dir.glob(tasks).each { |r| load r }
|
9
|
-
|
10
|
-
module GeoCombine
|
11
|
-
class CLI < Thor
|
12
|
-
desc 'clone', 'Clone all OpenGeoMetadata repositories'
|
13
|
-
def clone
|
14
|
-
Rake::Task['geocombine:clone'].invoke
|
15
|
-
end
|
16
|
-
|
17
|
-
desc 'pull', '"git pull" OpenGeoMetadata repositories'
|
18
|
-
def pull
|
19
|
-
Rake::Task['geocombine:pull'].invoke
|
20
|
-
end
|
21
|
-
|
22
|
-
desc 'index', 'Index all of the GeoBlacklight documents'
|
23
|
-
def index
|
24
|
-
Rake::Task['geocombine:index'].invoke
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
describe 'geo_combine.rake' do
|
7
|
-
before(:all) do
|
8
|
-
load File.expand_path('../../../lib/tasks/geo_combine.rake', __dir__)
|
9
|
-
Rake::Task.define_task(:environment)
|
10
|
-
end
|
11
|
-
|
12
|
-
before do
|
13
|
-
allow(ENV).to receive(:[]).and_call_original
|
14
|
-
allow(ENV).to receive(:[]).with('OGM_PATH').and_return(File.join(fixture_dir, 'indexing'))
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'geocombine:clone' do
|
18
|
-
before do
|
19
|
-
WebMock.disable_net_connect!
|
20
|
-
end
|
21
|
-
|
22
|
-
after do
|
23
|
-
WebMock.allow_net_connect!
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'does not clone repos on deny list' do
|
27
|
-
stub_request(:get, 'https://api.github.com/orgs/opengeometadata/repos').to_return(status: 200, body: read_fixture('docs/repos.json'))
|
28
|
-
allow(Kernel).to receive(:system)
|
29
|
-
Rake::Task['geocombine:clone'].invoke
|
30
|
-
expect(Kernel).to have_received(:system).exactly(21).times
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
describe 'geocombine:index' do
|
35
|
-
it 'only indexes .json files but not layers.json' do
|
36
|
-
rsolr_mock = instance_double(RSolr::Client)
|
37
|
-
allow(rsolr_mock).to receive(:update)
|
38
|
-
allow(rsolr_mock).to receive(:commit)
|
39
|
-
allow(RSolr).to receive(:connect).and_return(rsolr_mock)
|
40
|
-
Rake::Task['geocombine:index'].invoke
|
41
|
-
# We expect 2 files to index
|
42
|
-
expect(rsolr_mock).to have_received(:update).exactly(2).times
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|