geo_combine 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +7 -16
- data/.gitignore +1 -0
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +34 -36
- data/README.md +47 -22
- data/geo_combine.gemspec +2 -0
- data/lib/geo_combine/ckan_metadata.rb +5 -4
- data/lib/geo_combine/formatting.rb +1 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
- data/lib/geo_combine/geoblacklight.rb +1 -1
- data/lib/geo_combine/harvester.rb +132 -0
- data/lib/geo_combine/indexer.rb +126 -0
- data/lib/geo_combine/logger.rb +16 -0
- data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
- data/lib/geo_combine/ogp.rb +1 -1
- data/lib/geo_combine/railtie.rb +1 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +3 -0
- data/lib/tasks/geo_combine.rake +10 -65
- data/spec/fixtures/docs/full_geoblacklight.json +8 -1
- data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
- data/spec/fixtures/indexing/aardvark.json +57 -0
- data/spec/fixtures/json_docs.rb +6 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
- data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
- data/spec/lib/geo_combine/harvester_spec.rb +133 -0
- data/spec/lib/geo_combine/indexer_spec.rb +134 -0
- data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
- data/spec/lib/geo_combine_spec.rb +20 -17
- data/spec/spec_helper.rb +1 -2
- metadata +46 -9
- data/bin/geocombine +0 -6
- data/lib/geo_combine/cli.rb +0 -27
- data/spec/lib/tasks/geo_combine_spec.rb +0 -45
@@ -5,8 +5,9 @@ require 'spec_helper'
|
|
5
5
|
require 'rsolr'
|
6
6
|
|
7
7
|
RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
8
|
-
subject(:harvester) { described_class.new(site_key) }
|
8
|
+
subject(:harvester) { described_class.new(site_key, logger:) }
|
9
9
|
|
10
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
10
11
|
let(:site_key) { :INSTITUTION }
|
11
12
|
let(:stub_json_response) { '{}' }
|
12
13
|
let(:stub_solr_connection) { double('RSolr::Connection') }
|
@@ -40,7 +41,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
40
41
|
|
41
42
|
let(:docs) { [{ layer_slug_s: 'abc-123' }, { layer_slug_s: 'abc-321' }] }
|
42
43
|
let(:stub_json_response) do
|
43
|
-
{ response: { docs
|
44
|
+
{ response: { docs:, pages: { current_page: 1, total_pages: 1 } } }.to_json
|
44
45
|
end
|
45
46
|
|
46
47
|
it 'adds documents returned to solr' do
|
@@ -142,7 +143,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
142
143
|
).and_return(stub_second_response.to_json)
|
143
144
|
base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
|
144
145
|
docs = described_class::LegacyBlacklightResponse.new(response: stub_first_response,
|
145
|
-
base_url:
|
146
|
+
base_url:).documents
|
146
147
|
|
147
148
|
expect(docs.to_a).to eq([first_docs, second_docs])
|
148
149
|
end
|
@@ -182,7 +183,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
|
|
182
183
|
|
183
184
|
base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
|
184
185
|
docs = described_class::ModernBlacklightResponse.new(response: first_results_response,
|
185
|
-
base_url:
|
186
|
+
base_url:).documents
|
186
187
|
|
187
188
|
expect(docs.to_a).to eq([
|
188
189
|
[{ 'layer_slug_s' => 'abc-123' }, { 'layer_slug_s' => 'abc-321' }],
|
@@ -151,8 +151,8 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
151
151
|
let(:unparseable_json) do
|
152
152
|
<<-JSON
|
153
153
|
{
|
154
|
-
|
155
|
-
|
154
|
+
"http://schema.org/url":"http://example.com/abc123",,
|
155
|
+
"http://schema.org/downloadUrl":"http://example.com/abc123/data.zip"
|
156
156
|
}
|
157
157
|
JSON
|
158
158
|
end
|
@@ -184,7 +184,7 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
184
184
|
|
185
185
|
describe 'spatial_validate!' do
|
186
186
|
context 'when valid' do
|
187
|
-
it { full_geobl.spatial_validate! }
|
187
|
+
it { expect { full_geobl.spatial_validate! }.not_to raise_error }
|
188
188
|
end
|
189
189
|
|
190
190
|
context 'when invalid' do
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'git'
|
4
|
+
require 'geo_combine/harvester'
|
5
|
+
require 'spec_helper'
|
6
|
+
|
7
|
+
RSpec.describe GeoCombine::Harvester do
|
8
|
+
subject(:harvester) { described_class.new(ogm_path: 'spec/fixtures/indexing', logger:) }
|
9
|
+
|
10
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
11
|
+
let(:repo_name) { 'my-institution' }
|
12
|
+
let(:repo_path) { File.join(harvester.ogm_path, repo_name) }
|
13
|
+
let(:repo_url) { "https://github.com/OpenGeoMetadata/#{repo_name}.git" }
|
14
|
+
let(:stub_repo) { instance_double(Git::Base) }
|
15
|
+
let(:stub_gh_api) do
|
16
|
+
[
|
17
|
+
{ name: repo_name, size: 100 },
|
18
|
+
{ name: 'another-institution', size: 100 },
|
19
|
+
{ name: 'outdated-institution', size: 100, archived: true }, # archived
|
20
|
+
{ name: 'aardvark', size: 300 }, # on denylist
|
21
|
+
{ name: 'empty', size: 0 } # no data
|
22
|
+
]
|
23
|
+
end
|
24
|
+
|
25
|
+
before do
|
26
|
+
# stub github API requests
|
27
|
+
# use the whole org response, or just a portion for particular repos
|
28
|
+
allow(Net::HTTP).to receive(:get) do |uri|
|
29
|
+
if uri == described_class.ogm_api_uri
|
30
|
+
stub_gh_api.to_json
|
31
|
+
else
|
32
|
+
repo_name = uri.path.split('/').last.gsub('.git', '')
|
33
|
+
stub_gh_api.find { |repo| repo[:name] == repo_name }.to_json
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# stub git commands
|
38
|
+
allow(Git).to receive_messages(open: stub_repo, clone: stub_repo)
|
39
|
+
allow(stub_repo).to receive(:pull).and_return(stub_repo)
|
40
|
+
end
|
41
|
+
|
42
|
+
describe '#docs_to_index' do
|
43
|
+
it 'yields each JSON record with its path, skipping layers.JSON' do
|
44
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
45
|
+
[JSON.parse(File.read('spec/fixtures/indexing/basic_geoblacklight.json')), 'spec/fixtures/indexing/basic_geoblacklight.json'],
|
46
|
+
[JSON.parse(File.read('spec/fixtures/indexing/geoblacklight.json')), 'spec/fixtures/indexing/geoblacklight.json']
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'skips records with a different schema version' do
|
51
|
+
harvester = described_class.new(ogm_path: 'spec/fixtures/indexing/', schema_version: 'Aardvark', logger:)
|
52
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
53
|
+
[JSON.parse(File.read('spec/fixtures/indexing/aardvark.json')), 'spec/fixtures/indexing/aardvark.json']
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#pull' do
|
59
|
+
it 'can pull a single repository' do
|
60
|
+
harvester.pull(repo_name)
|
61
|
+
expect(Git).to have_received(:open).with(repo_path)
|
62
|
+
expect(stub_repo).to have_received(:pull)
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'clones a repo before pulling if it does not exist' do
|
66
|
+
harvester.pull(repo_name)
|
67
|
+
expect(Git).to have_received(:clone)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '#pull_all' do
|
72
|
+
it 'can pull all repositories' do
|
73
|
+
harvester.pull_all
|
74
|
+
expect(Git).to have_received(:open).exactly(2).times
|
75
|
+
expect(stub_repo).to have_received(:pull).exactly(2).times
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'returns the names of repositories pulled' do
|
79
|
+
expect(harvester.pull_all).to eq(%w[my-institution another-institution])
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'skips repositories in the denylist' do
|
83
|
+
harvester.pull_all
|
84
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'skips archived repositories' do
|
88
|
+
harvester.pull_all
|
89
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/outdated-institution.git')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#clone' do
|
94
|
+
it 'can clone a single repository' do
|
95
|
+
harvester.clone(repo_name)
|
96
|
+
expect(Git).to have_received(:clone).with(
|
97
|
+
repo_url,
|
98
|
+
nil, {
|
99
|
+
depth: 1, # shallow clone
|
100
|
+
path: harvester.ogm_path
|
101
|
+
}
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'skips repositories that already exist' do
|
106
|
+
allow(File).to receive(:directory?).with(repo_path).and_return(true)
|
107
|
+
harvester.clone(repo_name)
|
108
|
+
expect(Git).not_to have_received(:clone)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe '#clone_all' do
|
113
|
+
it 'can clone all repositories' do
|
114
|
+
harvester.clone_all
|
115
|
+
expect(Git).to have_received(:clone).exactly(2).times
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'skips repositories in the denylist' do
|
119
|
+
harvester.clone_all
|
120
|
+
expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'returns the names of repositories cloned' do
|
124
|
+
expect(harvester.clone_all).to eq(%w[my-institution another-institution])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe '#ogm_api_uri' do
|
129
|
+
it 'includes a per_page param' do
|
130
|
+
expect(described_class.send('ogm_api_uri').to_s).to include('per_page')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'geo_combine/indexer'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
# Mock an available Blacklight installation
|
7
|
+
class FakeBlacklight
|
8
|
+
def self.default_index
|
9
|
+
Repository
|
10
|
+
end
|
11
|
+
|
12
|
+
class Repository
|
13
|
+
def self.connection; end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
RSpec.describe GeoCombine::Indexer do
|
18
|
+
subject(:indexer) { described_class.new(solr:, logger:) }
|
19
|
+
|
20
|
+
let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
|
21
|
+
let(:solr) { instance_double(RSolr::Client, options: { url: 'TEST' }) }
|
22
|
+
let(:docs) do
|
23
|
+
[
|
24
|
+
[{ 'id' => '1' }, 'path/to/record1.json'], # v1.0 schema
|
25
|
+
[{ 'dc_identifier_s' => '2' }, 'path/to/record2.json'] # aardvark schema
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
before do
|
30
|
+
allow(solr).to receive(:update)
|
31
|
+
allow(solr).to receive(:commit)
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#initialize' do
|
35
|
+
before do
|
36
|
+
allow(RSolr).to receive(:connect).and_return(solr)
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'when solr url is set in the environment' do
|
40
|
+
before do
|
41
|
+
stub_const('ENV', 'SOLR_URL' => 'http://localhost:8983/solr/geoblacklight')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'connects to the solr instance' do
|
45
|
+
described_class.new(logger:)
|
46
|
+
expect(RSolr).to have_received(:connect).with(
|
47
|
+
be_a(Faraday::Connection),
|
48
|
+
url: 'http://localhost:8983/solr/geoblacklight'
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'when there is a configured Blacklight connection' do
|
54
|
+
before do
|
55
|
+
stub_const('Blacklight', FakeBlacklight)
|
56
|
+
allow(FakeBlacklight::Repository).to receive(:connection).and_return(
|
57
|
+
instance_double(RSolr::Client, base_uri: URI('http://localhost:8983/solr/geoblacklight'))
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'connects to the solr instance' do
|
62
|
+
described_class.new(logger:)
|
63
|
+
expect(RSolr).to have_received(:connect).with(
|
64
|
+
be_a(Faraday::Connection),
|
65
|
+
url: 'http://localhost:8983/solr/geoblacklight'
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'when solr url is not set' do
|
71
|
+
before do
|
72
|
+
stub_const('ENV', {})
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'falls back to the Blacklight default' do
|
76
|
+
described_class.new(logger:)
|
77
|
+
expect(RSolr).to have_received(:connect).with(
|
78
|
+
be_a(Faraday::Connection),
|
79
|
+
url: 'http://localhost:8983/solr/blacklight-core'
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
describe '#index' do
|
86
|
+
let(:solr_error_msg) { { error: { msg: 'error message' } }.to_json }
|
87
|
+
let(:solr_response) { { status: '400', body: solr_error_msg } }
|
88
|
+
let(:error) { RSolr::Error::Http.new({ uri: URI('') }, solr_response) }
|
89
|
+
|
90
|
+
it 'sends records in batches to solr' do
|
91
|
+
indexer.index(docs)
|
92
|
+
expect(solr).to have_received(:update).with(
|
93
|
+
data: "{ add: { doc: {\"id\":\"1\"} },\nadd: { doc: {\"dc_identifier_s\":\"2\"} } }",
|
94
|
+
headers: { 'Content-Type' => 'application/json' },
|
95
|
+
params: { overwrite: true }
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'commits changes to solr after indexing' do
|
100
|
+
indexer.index(docs)
|
101
|
+
expect(solr).to have_received(:commit).once
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'returns the count of records successfully indexed' do
|
105
|
+
expect(indexer.index(docs)).to eq 2
|
106
|
+
end
|
107
|
+
|
108
|
+
context 'when an error occurs during batch indexing' do
|
109
|
+
before do
|
110
|
+
allow(solr).to receive(:update).and_raise(error)
|
111
|
+
allow(solr).to receive(:add)
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'attempts to index records individually' do
|
115
|
+
total = indexer.index(docs)
|
116
|
+
expect(solr).to have_received(:add).twice
|
117
|
+
expect(total).to eq 2
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
context 'when an error occurs during individual indexing' do
|
122
|
+
before do
|
123
|
+
allow(solr).to receive(:update).and_raise(error)
|
124
|
+
allow(solr).to receive(:add).with(docs[0][0], anything).and_raise(error)
|
125
|
+
allow(solr).to receive(:add).with(docs[1][0], anything)
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'continues indexing' do
|
129
|
+
total = indexer.index(docs)
|
130
|
+
expect(total).to eq 1
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
|
6
|
+
include JsonDocs
|
7
|
+
|
8
|
+
describe '#run' do
|
9
|
+
it 'migrates fields to new names and types' do
|
10
|
+
input_hash = JSON.parse(full_geoblacklight)
|
11
|
+
expected_output = JSON.parse(full_geoblacklight_aardvark)
|
12
|
+
expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'removes deprecated fields' do
|
16
|
+
input_hash = JSON.parse(full_geoblacklight)
|
17
|
+
output = described_class.new(v1_hash: input_hash).run
|
18
|
+
expect(output.keys).not_to include(described_class::SCHEMA_FIELD_MAP.keys)
|
19
|
+
expect(output.keys).not_to include('dc_type_s')
|
20
|
+
expect(output.keys).not_to include('layer_geom_type_s')
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'leaves custom fields unchanged' do
|
24
|
+
input_hash = JSON.parse(full_geoblacklight)
|
25
|
+
input_hash['custom_field'] = 'custom_value'
|
26
|
+
output = described_class.new(v1_hash: input_hash).run
|
27
|
+
expect(output['custom_field']).to eq('custom_value')
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'when the given record is already in aardvark schema' do
|
31
|
+
it 'returns the record unchanged' do
|
32
|
+
input_hash = JSON.parse(full_geoblacklight_aardvark)
|
33
|
+
expect(described_class.new(v1_hash: input_hash).run).to eq(input_hash)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'when the user supplies a mapping for collection names to ids' do
|
38
|
+
it 'converts the collection names to ids' do
|
39
|
+
input_hash = JSON.parse(full_geoblacklight)
|
40
|
+
collection_id_map = { 'Uganda GIS Maps and Data, 2000-2010' => 'stanford-rb371kw9607' }
|
41
|
+
output = described_class.new(v1_hash: input_hash, collection_id_map:).run
|
42
|
+
expect(output['dct_isPartOf_sm']).to eq(['stanford-rb371kw9607'])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -2,25 +2,28 @@
|
|
2
2
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
|
-
RSpec.describe GeoCombine
|
5
|
+
RSpec.describe GeoCombine do
|
6
6
|
include XmlDocs
|
7
|
-
describe '#initialize' do
|
8
|
-
it 'reads metadata from file if File is readable' do
|
9
|
-
expect(File).to receive(:readable?).and_return(true)
|
10
|
-
expect(File).to receive(:read).and_return(simple_xml)
|
11
|
-
metadata_object = described_class.new('./tmp/fake/file/location')
|
12
|
-
expect(metadata_object).to be_an described_class
|
13
|
-
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
14
|
-
expect(metadata_object.metadata.css('Author').count).to eq 2
|
15
|
-
end
|
16
7
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
describe GeoCombine::Metadata do
|
9
|
+
describe '#initialize' do
|
10
|
+
it 'reads metadata from file if File is readable' do
|
11
|
+
expect(File).to receive(:readable?).and_return(true)
|
12
|
+
expect(File).to receive(:read).and_return(simple_xml)
|
13
|
+
metadata_object = described_class.new('./tmp/fake/file/location')
|
14
|
+
expect(metadata_object).to be_an described_class
|
15
|
+
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
16
|
+
expect(metadata_object.metadata.css('Author').count).to eq 2
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'reads metadata from parameter if File is not readable' do
|
20
|
+
metadata_object = described_class.new(simple_xml)
|
21
|
+
expect(metadata_object).to be_an described_class
|
22
|
+
expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
|
23
|
+
expect(metadata_object.metadata.css('Author').count).to eq 2
|
24
|
+
end
|
22
25
|
end
|
26
|
+
# GeoCombine subclasses should individually test `to_geoblacklight` and
|
27
|
+
# `to_html` methods
|
23
28
|
end
|
24
|
-
# GeoCombine subclasses should individually test `to_geoblacklight` and
|
25
|
-
# `to_html` methods
|
26
29
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,6 @@ SimpleCov.start 'rails' do
|
|
7
7
|
add_filter 'lib/tasks/geo_combine.rake'
|
8
8
|
add_filter 'lib/geo_combine/version.rb'
|
9
9
|
add_filter 'lib/geo_combine/railtie.rb'
|
10
|
-
add_filter 'lib/geo_combine/cli.rb'
|
11
10
|
minimum_coverage 95 # When updating this value, update the README badge value
|
12
11
|
end
|
13
12
|
|
@@ -23,7 +22,7 @@ require 'webmock/rspec'
|
|
23
22
|
WebMock.allow_net_connect!
|
24
23
|
|
25
24
|
# include the spec support files
|
26
|
-
Dir['./spec/support/**/*.rb'].
|
25
|
+
Dir['./spec/support/**/*.rb'].each { |f| require f }
|
27
26
|
|
28
27
|
RSpec.configure do |config|
|
29
28
|
config.include Helpers
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geo_combine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Reed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -108,6 +108,34 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: git
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: faraday-retry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '2.2'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '2.2'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: bundler
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -237,8 +265,7 @@ dependencies:
|
|
237
265
|
description: A Ruby toolkit for managing geospatial metadata
|
238
266
|
email:
|
239
267
|
- pjreed@stanford.edu
|
240
|
-
executables:
|
241
|
-
- geocombine
|
268
|
+
executables: []
|
242
269
|
extensions: []
|
243
270
|
extra_rdoc_files: []
|
244
271
|
files:
|
@@ -251,12 +278,10 @@ files:
|
|
251
278
|
- LICENSE.txt
|
252
279
|
- README.md
|
253
280
|
- Rakefile
|
254
|
-
- bin/geocombine
|
255
281
|
- geo_combine.gemspec
|
256
282
|
- lib/geo_combine.rb
|
257
283
|
- lib/geo_combine/bounding_box.rb
|
258
284
|
- lib/geo_combine/ckan_metadata.rb
|
259
|
-
- lib/geo_combine/cli.rb
|
260
285
|
- lib/geo_combine/esri_open_data.rb
|
261
286
|
- lib/geo_combine/exceptions.rb
|
262
287
|
- lib/geo_combine/fgdc.rb
|
@@ -265,7 +290,11 @@ files:
|
|
265
290
|
- lib/geo_combine/geo_blacklight_harvester.rb
|
266
291
|
- lib/geo_combine/geoblacklight.rb
|
267
292
|
- lib/geo_combine/geometry_types.rb
|
293
|
+
- lib/geo_combine/harvester.rb
|
294
|
+
- lib/geo_combine/indexer.rb
|
268
295
|
- lib/geo_combine/iso19139.rb
|
296
|
+
- lib/geo_combine/logger.rb
|
297
|
+
- lib/geo_combine/migrators/v1_aardvark_migrator.rb
|
269
298
|
- lib/geo_combine/ogp.rb
|
270
299
|
- lib/geo_combine/railtie.rb
|
271
300
|
- lib/geo_combine/subjects.rb
|
@@ -294,6 +323,7 @@ files:
|
|
294
323
|
- spec/fixtures/docs/ckan.json
|
295
324
|
- spec/fixtures/docs/esri_open_data.json
|
296
325
|
- spec/fixtures/docs/full_geoblacklight.json
|
326
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
297
327
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
298
328
|
- spec/fixtures/docs/ogp_harvard_line.json
|
299
329
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -304,6 +334,7 @@ files:
|
|
304
334
|
- spec/fixtures/docs/simple_xslt.xsl
|
305
335
|
- spec/fixtures/docs/stanford_iso.xml
|
306
336
|
- spec/fixtures/docs/tufts_fgdc.xml
|
337
|
+
- spec/fixtures/indexing/aardvark.json
|
307
338
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
308
339
|
- spec/fixtures/indexing/geoblacklight.json
|
309
340
|
- spec/fixtures/indexing/layers.json
|
@@ -318,10 +349,12 @@ files:
|
|
318
349
|
- spec/lib/geo_combine/formatting_spec.rb
|
319
350
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
320
351
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
352
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
353
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
321
354
|
- spec/lib/geo_combine/iso19139_spec.rb
|
355
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
322
356
|
- spec/lib/geo_combine/ogp_spec.rb
|
323
357
|
- spec/lib/geo_combine_spec.rb
|
324
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
325
358
|
- spec/spec_helper.rb
|
326
359
|
- spec/support/fixtures.rb
|
327
360
|
homepage: ''
|
@@ -343,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
343
376
|
- !ruby/object:Gem::Version
|
344
377
|
version: '0'
|
345
378
|
requirements: []
|
346
|
-
rubygems_version: 3.
|
379
|
+
rubygems_version: 3.3.7
|
347
380
|
signing_key:
|
348
381
|
specification_version: 4
|
349
382
|
summary: A Ruby toolkit for managing geospatial metadata
|
@@ -354,6 +387,7 @@ test_files:
|
|
354
387
|
- spec/fixtures/docs/ckan.json
|
355
388
|
- spec/fixtures/docs/esri_open_data.json
|
356
389
|
- spec/fixtures/docs/full_geoblacklight.json
|
390
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
357
391
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
358
392
|
- spec/fixtures/docs/ogp_harvard_line.json
|
359
393
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -364,6 +398,7 @@ test_files:
|
|
364
398
|
- spec/fixtures/docs/simple_xslt.xsl
|
365
399
|
- spec/fixtures/docs/stanford_iso.xml
|
366
400
|
- spec/fixtures/docs/tufts_fgdc.xml
|
401
|
+
- spec/fixtures/indexing/aardvark.json
|
367
402
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
368
403
|
- spec/fixtures/indexing/geoblacklight.json
|
369
404
|
- spec/fixtures/indexing/layers.json
|
@@ -378,9 +413,11 @@ test_files:
|
|
378
413
|
- spec/lib/geo_combine/formatting_spec.rb
|
379
414
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
380
415
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
416
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
417
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
381
418
|
- spec/lib/geo_combine/iso19139_spec.rb
|
419
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
382
420
|
- spec/lib/geo_combine/ogp_spec.rb
|
383
421
|
- spec/lib/geo_combine_spec.rb
|
384
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
385
422
|
- spec/spec_helper.rb
|
386
423
|
- spec/support/fixtures.rb
|
data/bin/geocombine
DELETED
data/lib/geo_combine/cli.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
root = Gem::Specification.find_by_name('geo_combine').gem_dir
|
7
|
-
tasks = File.join(root, 'lib/tasks/*.rake')
|
8
|
-
Dir.glob(tasks).each { |r| load r }
|
9
|
-
|
10
|
-
module GeoCombine
|
11
|
-
class CLI < Thor
|
12
|
-
desc 'clone', 'Clone all OpenGeoMetadata repositories'
|
13
|
-
def clone
|
14
|
-
Rake::Task['geocombine:clone'].invoke
|
15
|
-
end
|
16
|
-
|
17
|
-
desc 'pull', '"git pull" OpenGeoMetadata repositories'
|
18
|
-
def pull
|
19
|
-
Rake::Task['geocombine:pull'].invoke
|
20
|
-
end
|
21
|
-
|
22
|
-
desc 'index', 'Index all of the GeoBlacklight documents'
|
23
|
-
def index
|
24
|
-
Rake::Task['geocombine:index'].invoke
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
describe 'geo_combine.rake' do
|
7
|
-
before(:all) do
|
8
|
-
load File.expand_path('../../../lib/tasks/geo_combine.rake', __dir__)
|
9
|
-
Rake::Task.define_task(:environment)
|
10
|
-
end
|
11
|
-
|
12
|
-
before do
|
13
|
-
allow(ENV).to receive(:[]).and_call_original
|
14
|
-
allow(ENV).to receive(:[]).with('OGM_PATH').and_return(File.join(fixture_dir, 'indexing'))
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'geocombine:clone' do
|
18
|
-
before do
|
19
|
-
WebMock.disable_net_connect!
|
20
|
-
end
|
21
|
-
|
22
|
-
after do
|
23
|
-
WebMock.allow_net_connect!
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'does not clone repos on deny list' do
|
27
|
-
stub_request(:get, 'https://api.github.com/orgs/opengeometadata/repos').to_return(status: 200, body: read_fixture('docs/repos.json'))
|
28
|
-
allow(Kernel).to receive(:system)
|
29
|
-
Rake::Task['geocombine:clone'].invoke
|
30
|
-
expect(Kernel).to have_received(:system).exactly(21).times
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
describe 'geocombine:index' do
|
35
|
-
it 'only indexes .json files but not layers.json' do
|
36
|
-
rsolr_mock = instance_double(RSolr::Client)
|
37
|
-
allow(rsolr_mock).to receive(:update)
|
38
|
-
allow(rsolr_mock).to receive(:commit)
|
39
|
-
allow(RSolr).to receive(:connect).and_return(rsolr_mock)
|
40
|
-
Rake::Task['geocombine:index'].invoke
|
41
|
-
# We expect 2 files to index
|
42
|
-
expect(rsolr_mock).to have_received(:update).exactly(2).times
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|