geo_combine 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +7 -16
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -1
  5. data/.rubocop_todo.yml +34 -36
  6. data/README.md +47 -22
  7. data/geo_combine.gemspec +2 -0
  8. data/lib/geo_combine/ckan_metadata.rb +5 -4
  9. data/lib/geo_combine/formatting.rb +1 -1
  10. data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
  11. data/lib/geo_combine/geoblacklight.rb +1 -1
  12. data/lib/geo_combine/harvester.rb +132 -0
  13. data/lib/geo_combine/indexer.rb +126 -0
  14. data/lib/geo_combine/logger.rb +16 -0
  15. data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
  16. data/lib/geo_combine/ogp.rb +1 -1
  17. data/lib/geo_combine/railtie.rb +1 -0
  18. data/lib/geo_combine/version.rb +1 -1
  19. data/lib/geo_combine.rb +3 -0
  20. data/lib/tasks/geo_combine.rake +10 -65
  21. data/spec/fixtures/docs/full_geoblacklight.json +8 -1
  22. data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
  23. data/spec/fixtures/indexing/aardvark.json +57 -0
  24. data/spec/fixtures/json_docs.rb +6 -0
  25. data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
  26. data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
  27. data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
  28. data/spec/lib/geo_combine/harvester_spec.rb +133 -0
  29. data/spec/lib/geo_combine/indexer_spec.rb +134 -0
  30. data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
  31. data/spec/lib/geo_combine_spec.rb +20 -17
  32. data/spec/spec_helper.rb +1 -2
  33. metadata +46 -9
  34. data/bin/geocombine +0 -6
  35. data/lib/geo_combine/cli.rb +0 -27
  36. data/spec/lib/tasks/geo_combine_spec.rb +0 -45
@@ -5,8 +5,9 @@ require 'spec_helper'
5
5
  require 'rsolr'
6
6
 
7
7
  RSpec.describe GeoCombine::GeoBlacklightHarvester do
8
- subject(:harvester) { described_class.new(site_key) }
8
+ subject(:harvester) { described_class.new(site_key, logger:) }
9
9
 
10
+ let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
10
11
  let(:site_key) { :INSTITUTION }
11
12
  let(:stub_json_response) { '{}' }
12
13
  let(:stub_solr_connection) { double('RSolr::Connection') }
@@ -40,7 +41,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
40
41
 
41
42
  let(:docs) { [{ layer_slug_s: 'abc-123' }, { layer_slug_s: 'abc-321' }] }
42
43
  let(:stub_json_response) do
43
- { response: { docs: docs, pages: { current_page: 1, total_pages: 1 } } }.to_json
44
+ { response: { docs:, pages: { current_page: 1, total_pages: 1 } } }.to_json
44
45
  end
45
46
 
46
47
  it 'adds documents returned to solr' do
@@ -142,7 +143,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
142
143
  ).and_return(stub_second_response.to_json)
143
144
  base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
144
145
  docs = described_class::LegacyBlacklightResponse.new(response: stub_first_response,
145
- base_url: base_url).documents
146
+ base_url:).documents
146
147
 
147
148
  expect(docs.to_a).to eq([first_docs, second_docs])
148
149
  end
@@ -182,7 +183,7 @@ RSpec.describe GeoCombine::GeoBlacklightHarvester do
182
183
 
183
184
  base_url = 'https://example.com?f%5Bdct_provenance_s%5D%5B%5D=INSTITUTION&format=json&per_page=100'
184
185
  docs = described_class::ModernBlacklightResponse.new(response: first_results_response,
185
- base_url: base_url).documents
186
+ base_url:).documents
186
187
 
187
188
  expect(docs.to_a).to eq([
188
189
  [{ 'layer_slug_s' => 'abc-123' }, { 'layer_slug_s' => 'abc-321' }],
@@ -151,8 +151,8 @@ RSpec.describe GeoCombine::Geoblacklight do
151
151
  let(:unparseable_json) do
152
152
  <<-JSON
153
153
  {
154
- \"http://schema.org/url\":\"http://example.com/abc123\",,
155
- \"http://schema.org/downloadUrl\":\"http://example.com/abc123/data.zip\"
154
+ "http://schema.org/url":"http://example.com/abc123",,
155
+ "http://schema.org/downloadUrl":"http://example.com/abc123/data.zip"
156
156
  }
157
157
  JSON
158
158
  end
@@ -184,7 +184,7 @@ RSpec.describe GeoCombine::Geoblacklight do
184
184
 
185
185
  describe 'spatial_validate!' do
186
186
  context 'when valid' do
187
- it { full_geobl.spatial_validate! }
187
+ it { expect { full_geobl.spatial_validate! }.not_to raise_error }
188
188
  end
189
189
 
190
190
  context 'when invalid' do
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'git'
4
+ require 'geo_combine/harvester'
5
+ require 'spec_helper'
6
+
7
+ RSpec.describe GeoCombine::Harvester do
8
+ subject(:harvester) { described_class.new(ogm_path: 'spec/fixtures/indexing', logger:) }
9
+
10
+ let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
11
+ let(:repo_name) { 'my-institution' }
12
+ let(:repo_path) { File.join(harvester.ogm_path, repo_name) }
13
+ let(:repo_url) { "https://github.com/OpenGeoMetadata/#{repo_name}.git" }
14
+ let(:stub_repo) { instance_double(Git::Base) }
15
+ let(:stub_gh_api) do
16
+ [
17
+ { name: repo_name, size: 100 },
18
+ { name: 'another-institution', size: 100 },
19
+ { name: 'outdated-institution', size: 100, archived: true }, # archived
20
+ { name: 'aardvark', size: 300 }, # on denylist
21
+ { name: 'empty', size: 0 } # no data
22
+ ]
23
+ end
24
+
25
+ before do
26
+ # stub github API requests
27
+ # use the whole org response, or just a portion for particular repos
28
+ allow(Net::HTTP).to receive(:get) do |uri|
29
+ if uri == described_class.ogm_api_uri
30
+ stub_gh_api.to_json
31
+ else
32
+ repo_name = uri.path.split('/').last.gsub('.git', '')
33
+ stub_gh_api.find { |repo| repo[:name] == repo_name }.to_json
34
+ end
35
+ end
36
+
37
+ # stub git commands
38
+ allow(Git).to receive_messages(open: stub_repo, clone: stub_repo)
39
+ allow(stub_repo).to receive(:pull).and_return(stub_repo)
40
+ end
41
+
42
+ describe '#docs_to_index' do
43
+ it 'yields each JSON record with its path, skipping layers.JSON' do
44
+ expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
45
+ [JSON.parse(File.read('spec/fixtures/indexing/basic_geoblacklight.json')), 'spec/fixtures/indexing/basic_geoblacklight.json'],
46
+ [JSON.parse(File.read('spec/fixtures/indexing/geoblacklight.json')), 'spec/fixtures/indexing/geoblacklight.json']
47
+ )
48
+ end
49
+
50
+ it 'skips records with a different schema version' do
51
+ harvester = described_class.new(ogm_path: 'spec/fixtures/indexing/', schema_version: 'Aardvark', logger:)
52
+ expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
53
+ [JSON.parse(File.read('spec/fixtures/indexing/aardvark.json')), 'spec/fixtures/indexing/aardvark.json']
54
+ )
55
+ end
56
+ end
57
+
58
+ describe '#pull' do
59
+ it 'can pull a single repository' do
60
+ harvester.pull(repo_name)
61
+ expect(Git).to have_received(:open).with(repo_path)
62
+ expect(stub_repo).to have_received(:pull)
63
+ end
64
+
65
+ it 'clones a repo before pulling if it does not exist' do
66
+ harvester.pull(repo_name)
67
+ expect(Git).to have_received(:clone)
68
+ end
69
+ end
70
+
71
+ describe '#pull_all' do
72
+ it 'can pull all repositories' do
73
+ harvester.pull_all
74
+ expect(Git).to have_received(:open).exactly(2).times
75
+ expect(stub_repo).to have_received(:pull).exactly(2).times
76
+ end
77
+
78
+ it 'returns the names of repositories pulled' do
79
+ expect(harvester.pull_all).to eq(%w[my-institution another-institution])
80
+ end
81
+
82
+ it 'skips repositories in the denylist' do
83
+ harvester.pull_all
84
+ expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git')
85
+ end
86
+
87
+ it 'skips archived repositories' do
88
+ harvester.pull_all
89
+ expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/outdated-institution.git')
90
+ end
91
+ end
92
+
93
+ describe '#clone' do
94
+ it 'can clone a single repository' do
95
+ harvester.clone(repo_name)
96
+ expect(Git).to have_received(:clone).with(
97
+ repo_url,
98
+ nil, {
99
+ depth: 1, # shallow clone
100
+ path: harvester.ogm_path
101
+ }
102
+ )
103
+ end
104
+
105
+ it 'skips repositories that already exist' do
106
+ allow(File).to receive(:directory?).with(repo_path).and_return(true)
107
+ harvester.clone(repo_name)
108
+ expect(Git).not_to have_received(:clone)
109
+ end
110
+ end
111
+
112
+ describe '#clone_all' do
113
+ it 'can clone all repositories' do
114
+ harvester.clone_all
115
+ expect(Git).to have_received(:clone).exactly(2).times
116
+ end
117
+
118
+ it 'skips repositories in the denylist' do
119
+ harvester.clone_all
120
+ expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git')
121
+ end
122
+
123
+ it 'returns the names of repositories cloned' do
124
+ expect(harvester.clone_all).to eq(%w[my-institution another-institution])
125
+ end
126
+ end
127
+
128
+ describe '#ogm_api_uri' do
129
+ it 'includes a per_page param' do
130
+ expect(described_class.send('ogm_api_uri').to_s).to include('per_page')
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'geo_combine/indexer'
4
+ require 'spec_helper'
5
+
6
+ # Mock an available Blacklight installation
7
+ class FakeBlacklight
8
+ def self.default_index
9
+ Repository
10
+ end
11
+
12
+ class Repository
13
+ def self.connection; end
14
+ end
15
+ end
16
+
17
+ RSpec.describe GeoCombine::Indexer do
18
+ subject(:indexer) { described_class.new(solr:, logger:) }
19
+
20
+ let(:logger) { instance_double(Logger, warn: nil, info: nil, error: nil, debug: nil) }
21
+ let(:solr) { instance_double(RSolr::Client, options: { url: 'TEST' }) }
22
+ let(:docs) do
23
+ [
24
+ [{ 'id' => '1' }, 'path/to/record1.json'], # v1.0 schema
25
+ [{ 'dc_identifier_s' => '2' }, 'path/to/record2.json'] # aardvark schema
26
+ ]
27
+ end
28
+
29
+ before do
30
+ allow(solr).to receive(:update)
31
+ allow(solr).to receive(:commit)
32
+ end
33
+
34
+ describe '#initialize' do
35
+ before do
36
+ allow(RSolr).to receive(:connect).and_return(solr)
37
+ end
38
+
39
+ context 'when solr url is set in the environment' do
40
+ before do
41
+ stub_const('ENV', 'SOLR_URL' => 'http://localhost:8983/solr/geoblacklight')
42
+ end
43
+
44
+ it 'connects to the solr instance' do
45
+ described_class.new(logger:)
46
+ expect(RSolr).to have_received(:connect).with(
47
+ be_a(Faraday::Connection),
48
+ url: 'http://localhost:8983/solr/geoblacklight'
49
+ )
50
+ end
51
+ end
52
+
53
+ context 'when there is a configured Blacklight connection' do
54
+ before do
55
+ stub_const('Blacklight', FakeBlacklight)
56
+ allow(FakeBlacklight::Repository).to receive(:connection).and_return(
57
+ instance_double(RSolr::Client, base_uri: URI('http://localhost:8983/solr/geoblacklight'))
58
+ )
59
+ end
60
+
61
+ it 'connects to the solr instance' do
62
+ described_class.new(logger:)
63
+ expect(RSolr).to have_received(:connect).with(
64
+ be_a(Faraday::Connection),
65
+ url: 'http://localhost:8983/solr/geoblacklight'
66
+ )
67
+ end
68
+ end
69
+
70
+ context 'when solr url is not set' do
71
+ before do
72
+ stub_const('ENV', {})
73
+ end
74
+
75
+ it 'falls back to the Blacklight default' do
76
+ described_class.new(logger:)
77
+ expect(RSolr).to have_received(:connect).with(
78
+ be_a(Faraday::Connection),
79
+ url: 'http://localhost:8983/solr/blacklight-core'
80
+ )
81
+ end
82
+ end
83
+ end
84
+
85
+ describe '#index' do
86
+ let(:solr_error_msg) { { error: { msg: 'error message' } }.to_json }
87
+ let(:solr_response) { { status: '400', body: solr_error_msg } }
88
+ let(:error) { RSolr::Error::Http.new({ uri: URI('') }, solr_response) }
89
+
90
+ it 'sends records in batches to solr' do
91
+ indexer.index(docs)
92
+ expect(solr).to have_received(:update).with(
93
+ data: "{ add: { doc: {\"id\":\"1\"} },\nadd: { doc: {\"dc_identifier_s\":\"2\"} } }",
94
+ headers: { 'Content-Type' => 'application/json' },
95
+ params: { overwrite: true }
96
+ )
97
+ end
98
+
99
+ it 'commits changes to solr after indexing' do
100
+ indexer.index(docs)
101
+ expect(solr).to have_received(:commit).once
102
+ end
103
+
104
+ it 'returns the count of records successfully indexed' do
105
+ expect(indexer.index(docs)).to eq 2
106
+ end
107
+
108
+ context 'when an error occurs during batch indexing' do
109
+ before do
110
+ allow(solr).to receive(:update).and_raise(error)
111
+ allow(solr).to receive(:add)
112
+ end
113
+
114
+ it 'attempts to index records individually' do
115
+ total = indexer.index(docs)
116
+ expect(solr).to have_received(:add).twice
117
+ expect(total).to eq 2
118
+ end
119
+ end
120
+
121
+ context 'when an error occurs during individual indexing' do
122
+ before do
123
+ allow(solr).to receive(:update).and_raise(error)
124
+ allow(solr).to receive(:add).with(docs[0][0], anything).and_raise(error)
125
+ allow(solr).to receive(:add).with(docs[1][0], anything)
126
+ end
127
+
128
+ it 'continues indexing' do
129
+ total = indexer.index(docs)
130
+ expect(total).to eq 1
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
6
+ include JsonDocs
7
+
8
+ describe '#run' do
9
+ it 'migrates fields to new names and types' do
10
+ input_hash = JSON.parse(full_geoblacklight)
11
+ expected_output = JSON.parse(full_geoblacklight_aardvark)
12
+ expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
13
+ end
14
+
15
+ it 'removes deprecated fields' do
16
+ input_hash = JSON.parse(full_geoblacklight)
17
+ output = described_class.new(v1_hash: input_hash).run
18
+ expect(output.keys).not_to include(described_class::SCHEMA_FIELD_MAP.keys)
19
+ expect(output.keys).not_to include('dc_type_s')
20
+ expect(output.keys).not_to include('layer_geom_type_s')
21
+ end
22
+
23
+ it 'leaves custom fields unchanged' do
24
+ input_hash = JSON.parse(full_geoblacklight)
25
+ input_hash['custom_field'] = 'custom_value'
26
+ output = described_class.new(v1_hash: input_hash).run
27
+ expect(output['custom_field']).to eq('custom_value')
28
+ end
29
+
30
+ context 'when the given record is already in aardvark schema' do
31
+ it 'returns the record unchanged' do
32
+ input_hash = JSON.parse(full_geoblacklight_aardvark)
33
+ expect(described_class.new(v1_hash: input_hash).run).to eq(input_hash)
34
+ end
35
+ end
36
+
37
+ context 'when the user supplies a mapping for collection names to ids' do
38
+ it 'converts the collection names to ids' do
39
+ input_hash = JSON.parse(full_geoblacklight)
40
+ collection_id_map = { 'Uganda GIS Maps and Data, 2000-2010' => 'stanford-rb371kw9607' }
41
+ output = described_class.new(v1_hash: input_hash, collection_id_map:).run
42
+ expect(output['dct_isPartOf_sm']).to eq(['stanford-rb371kw9607'])
43
+ end
44
+ end
45
+ end
46
+ end
@@ -2,25 +2,28 @@
2
2
 
3
3
  require 'spec_helper'
4
4
 
5
- RSpec.describe GeoCombine::Metadata do
5
+ RSpec.describe GeoCombine do
6
6
  include XmlDocs
7
- describe '#initialize' do
8
- it 'reads metadata from file if File is readable' do
9
- expect(File).to receive(:readable?).and_return(true)
10
- expect(File).to receive(:read).and_return(simple_xml)
11
- metadata_object = described_class.new('./tmp/fake/file/location')
12
- expect(metadata_object).to be_an described_class
13
- expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
14
- expect(metadata_object.metadata.css('Author').count).to eq 2
15
- end
16
7
 
17
- it 'reads metadata from parameter if File is not readable' do
18
- metadata_object = described_class.new(simple_xml)
19
- expect(metadata_object).to be_an described_class
20
- expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
21
- expect(metadata_object.metadata.css('Author').count).to eq 2
8
+ describe GeoCombine::Metadata do
9
+ describe '#initialize' do
10
+ it 'reads metadata from file if File is readable' do
11
+ expect(File).to receive(:readable?).and_return(true)
12
+ expect(File).to receive(:read).and_return(simple_xml)
13
+ metadata_object = described_class.new('./tmp/fake/file/location')
14
+ expect(metadata_object).to be_an described_class
15
+ expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
16
+ expect(metadata_object.metadata.css('Author').count).to eq 2
17
+ end
18
+
19
+ it 'reads metadata from parameter if File is not readable' do
20
+ metadata_object = described_class.new(simple_xml)
21
+ expect(metadata_object).to be_an described_class
22
+ expect(metadata_object.metadata).to be_an Nokogiri::XML::Document
23
+ expect(metadata_object.metadata.css('Author').count).to eq 2
24
+ end
22
25
  end
26
+ # GeoCombine subclasses should individually test `to_geoblacklight` and
27
+ # `to_html` methods
23
28
  end
24
- # GeoCombine subclasses should individually test `to_geoblacklight` and
25
- # `to_html` methods
26
29
  end
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,6 @@ SimpleCov.start 'rails' do
7
7
  add_filter 'lib/tasks/geo_combine.rake'
8
8
  add_filter 'lib/geo_combine/version.rb'
9
9
  add_filter 'lib/geo_combine/railtie.rb'
10
- add_filter 'lib/geo_combine/cli.rb'
11
10
  minimum_coverage 95 # When updating this value, update the README badge value
12
11
  end
13
12
 
@@ -23,7 +22,7 @@ require 'webmock/rspec'
23
22
  WebMock.allow_net_connect!
24
23
 
25
24
  # include the spec support files
26
- Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
25
+ Dir['./spec/support/**/*.rb'].each { |f| require f }
27
26
 
28
27
  RSpec.configure do |config|
29
28
  config.include Helpers
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geo_combine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Reed
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-07 00:00:00.000000000 Z
11
+ date: 2024-02-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -108,6 +108,34 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '2.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: git
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: faraday-retry
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '2.2'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '2.2'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: bundler
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -237,8 +265,7 @@ dependencies:
237
265
  description: A Ruby toolkit for managing geospatial metadata
238
266
  email:
239
267
  - pjreed@stanford.edu
240
- executables:
241
- - geocombine
268
+ executables: []
242
269
  extensions: []
243
270
  extra_rdoc_files: []
244
271
  files:
@@ -251,12 +278,10 @@ files:
251
278
  - LICENSE.txt
252
279
  - README.md
253
280
  - Rakefile
254
- - bin/geocombine
255
281
  - geo_combine.gemspec
256
282
  - lib/geo_combine.rb
257
283
  - lib/geo_combine/bounding_box.rb
258
284
  - lib/geo_combine/ckan_metadata.rb
259
- - lib/geo_combine/cli.rb
260
285
  - lib/geo_combine/esri_open_data.rb
261
286
  - lib/geo_combine/exceptions.rb
262
287
  - lib/geo_combine/fgdc.rb
@@ -265,7 +290,11 @@ files:
265
290
  - lib/geo_combine/geo_blacklight_harvester.rb
266
291
  - lib/geo_combine/geoblacklight.rb
267
292
  - lib/geo_combine/geometry_types.rb
293
+ - lib/geo_combine/harvester.rb
294
+ - lib/geo_combine/indexer.rb
268
295
  - lib/geo_combine/iso19139.rb
296
+ - lib/geo_combine/logger.rb
297
+ - lib/geo_combine/migrators/v1_aardvark_migrator.rb
269
298
  - lib/geo_combine/ogp.rb
270
299
  - lib/geo_combine/railtie.rb
271
300
  - lib/geo_combine/subjects.rb
@@ -294,6 +323,7 @@ files:
294
323
  - spec/fixtures/docs/ckan.json
295
324
  - spec/fixtures/docs/esri_open_data.json
296
325
  - spec/fixtures/docs/full_geoblacklight.json
326
+ - spec/fixtures/docs/full_geoblacklight_aardvark.json
297
327
  - spec/fixtures/docs/geoblacklight_pre_v1.json
298
328
  - spec/fixtures/docs/ogp_harvard_line.json
299
329
  - spec/fixtures/docs/ogp_harvard_raster.json
@@ -304,6 +334,7 @@ files:
304
334
  - spec/fixtures/docs/simple_xslt.xsl
305
335
  - spec/fixtures/docs/stanford_iso.xml
306
336
  - spec/fixtures/docs/tufts_fgdc.xml
337
+ - spec/fixtures/indexing/aardvark.json
307
338
  - spec/fixtures/indexing/basic_geoblacklight.json
308
339
  - spec/fixtures/indexing/geoblacklight.json
309
340
  - spec/fixtures/indexing/layers.json
@@ -318,10 +349,12 @@ files:
318
349
  - spec/lib/geo_combine/formatting_spec.rb
319
350
  - spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
320
351
  - spec/lib/geo_combine/geoblacklight_spec.rb
352
+ - spec/lib/geo_combine/harvester_spec.rb
353
+ - spec/lib/geo_combine/indexer_spec.rb
321
354
  - spec/lib/geo_combine/iso19139_spec.rb
355
+ - spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
322
356
  - spec/lib/geo_combine/ogp_spec.rb
323
357
  - spec/lib/geo_combine_spec.rb
324
- - spec/lib/tasks/geo_combine_spec.rb
325
358
  - spec/spec_helper.rb
326
359
  - spec/support/fixtures.rb
327
360
  homepage: ''
@@ -343,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
343
376
  - !ruby/object:Gem::Version
344
377
  version: '0'
345
378
  requirements: []
346
- rubygems_version: 3.1.6
379
+ rubygems_version: 3.3.7
347
380
  signing_key:
348
381
  specification_version: 4
349
382
  summary: A Ruby toolkit for managing geospatial metadata
@@ -354,6 +387,7 @@ test_files:
354
387
  - spec/fixtures/docs/ckan.json
355
388
  - spec/fixtures/docs/esri_open_data.json
356
389
  - spec/fixtures/docs/full_geoblacklight.json
390
+ - spec/fixtures/docs/full_geoblacklight_aardvark.json
357
391
  - spec/fixtures/docs/geoblacklight_pre_v1.json
358
392
  - spec/fixtures/docs/ogp_harvard_line.json
359
393
  - spec/fixtures/docs/ogp_harvard_raster.json
@@ -364,6 +398,7 @@ test_files:
364
398
  - spec/fixtures/docs/simple_xslt.xsl
365
399
  - spec/fixtures/docs/stanford_iso.xml
366
400
  - spec/fixtures/docs/tufts_fgdc.xml
401
+ - spec/fixtures/indexing/aardvark.json
367
402
  - spec/fixtures/indexing/basic_geoblacklight.json
368
403
  - spec/fixtures/indexing/geoblacklight.json
369
404
  - spec/fixtures/indexing/layers.json
@@ -378,9 +413,11 @@ test_files:
378
413
  - spec/lib/geo_combine/formatting_spec.rb
379
414
  - spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
380
415
  - spec/lib/geo_combine/geoblacklight_spec.rb
416
+ - spec/lib/geo_combine/harvester_spec.rb
417
+ - spec/lib/geo_combine/indexer_spec.rb
381
418
  - spec/lib/geo_combine/iso19139_spec.rb
419
+ - spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
382
420
  - spec/lib/geo_combine/ogp_spec.rb
383
421
  - spec/lib/geo_combine_spec.rb
384
- - spec/lib/tasks/geo_combine_spec.rb
385
422
  - spec/spec_helper.rb
386
423
  - spec/support/fixtures.rb
data/bin/geocombine DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'geo_combine/cli'
5
-
6
- GeoCombine::CLI.start(ARGV)
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require 'rake'
5
-
6
- root = Gem::Specification.find_by_name('geo_combine').gem_dir
7
- tasks = File.join(root, 'lib/tasks/*.rake')
8
- Dir.glob(tasks).each { |r| load r }
9
-
10
- module GeoCombine
11
- class CLI < Thor
12
- desc 'clone', 'Clone all OpenGeoMetadata repositories'
13
- def clone
14
- Rake::Task['geocombine:clone'].invoke
15
- end
16
-
17
- desc 'pull', '"git pull" OpenGeoMetadata repositories'
18
- def pull
19
- Rake::Task['geocombine:pull'].invoke
20
- end
21
-
22
- desc 'index', 'Index all of the GeoBlacklight documents'
23
- def index
24
- Rake::Task['geocombine:index'].invoke
25
- end
26
- end
27
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'spec_helper'
4
- require 'rake'
5
-
6
- describe 'geo_combine.rake' do
7
- before(:all) do
8
- load File.expand_path('../../../lib/tasks/geo_combine.rake', __dir__)
9
- Rake::Task.define_task(:environment)
10
- end
11
-
12
- before do
13
- allow(ENV).to receive(:[]).and_call_original
14
- allow(ENV).to receive(:[]).with('OGM_PATH').and_return(File.join(fixture_dir, 'indexing'))
15
- end
16
-
17
- describe 'geocombine:clone' do
18
- before do
19
- WebMock.disable_net_connect!
20
- end
21
-
22
- after do
23
- WebMock.allow_net_connect!
24
- end
25
-
26
- it 'does not clone repos on deny list' do
27
- stub_request(:get, 'https://api.github.com/orgs/opengeometadata/repos').to_return(status: 200, body: read_fixture('docs/repos.json'))
28
- allow(Kernel).to receive(:system)
29
- Rake::Task['geocombine:clone'].invoke
30
- expect(Kernel).to have_received(:system).exactly(21).times
31
- end
32
- end
33
-
34
- describe 'geocombine:index' do
35
- it 'only indexes .json files but not layers.json' do
36
- rsolr_mock = instance_double(RSolr::Client)
37
- allow(rsolr_mock).to receive(:update)
38
- allow(rsolr_mock).to receive(:commit)
39
- allow(RSolr).to receive(:connect).and_return(rsolr_mock)
40
- Rake::Task['geocombine:index'].invoke
41
- # We expect 2 files to index
42
- expect(rsolr_mock).to have_received(:update).exactly(2).times
43
- end
44
- end
45
- end