geo_combine 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +7 -16
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -1
  5. data/.rubocop_todo.yml +34 -36
  6. data/README.md +47 -22
  7. data/geo_combine.gemspec +2 -0
  8. data/lib/geo_combine/ckan_metadata.rb +5 -4
  9. data/lib/geo_combine/formatting.rb +1 -1
  10. data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
  11. data/lib/geo_combine/geoblacklight.rb +1 -1
  12. data/lib/geo_combine/harvester.rb +132 -0
  13. data/lib/geo_combine/indexer.rb +126 -0
  14. data/lib/geo_combine/logger.rb +16 -0
  15. data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
  16. data/lib/geo_combine/ogp.rb +1 -1
  17. data/lib/geo_combine/railtie.rb +1 -0
  18. data/lib/geo_combine/version.rb +1 -1
  19. data/lib/geo_combine.rb +3 -0
  20. data/lib/tasks/geo_combine.rake +10 -65
  21. data/spec/fixtures/docs/full_geoblacklight.json +8 -1
  22. data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
  23. data/spec/fixtures/indexing/aardvark.json +57 -0
  24. data/spec/fixtures/json_docs.rb +6 -0
  25. data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
  26. data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
  27. data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
  28. data/spec/lib/geo_combine/harvester_spec.rb +133 -0
  29. data/spec/lib/geo_combine/indexer_spec.rb +134 -0
  30. data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
  31. data/spec/lib/geo_combine_spec.rb +20 -17
  32. data/spec/spec_helper.rb +1 -2
  33. metadata +46 -9
  34. data/bin/geocombine +0 -6
  35. data/lib/geo_combine/cli.rb +0 -27
  36. data/spec/lib/tasks/geo_combine_spec.rb +0 -45
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d47d9cff1e3bf0e3ec2237c65554524b9a624d8c9faf833d1c99c6fdc4f7c94
4
- data.tar.gz: 7069057b0b5166f2ed5496af51a270cefdd6ee5aa3937456f10a54b2f5f32536
3
+ metadata.gz: 5b168ca81c2b6d5ff2fa0ce75d18154ad9806f5a872cc6f96e41ab0cea628864
4
+ data.tar.gz: 9ba88c0cca642ebe79301182f992f2dfe223bc1cf4abaa9b00ac4687293b78f4
5
5
  SHA512:
6
- metadata.gz: 7208f9b13e73b183571861a7d40bcd6a1c1a466b4af7ff6fead7174bdb992a51955ebb986b07094ce195dd2e1d520a3c8b0b51b7c18178b57b3b0eb6db0c6e4e
7
- data.tar.gz: 7daf75a1d31036a3659d4e96f261f4f1ce20bfea2083827f32e767ee8f15c14d6c112053cd009ca17cdfc6c433b9f7b64ce524fbf4fdaf7fe56877b1b7c07360
6
+ metadata.gz: 8c54ead8b591bd20fc3b62fddbfec2b0ac6830972cd85bf49fd31f230c84c8cf1a7bd4c0b56de702a235eca130fc643e4c796cb84adf15fd93f0b73075161fe2
7
+ data.tar.gz: 28fe9a5209dd77c2f8b60e87fddec8c7892ddf33794afd75522cf09c6827496c445c6c2f52445ad7fa46508ac1a4dcedd0c5068e161d0e176ec7ab232b21d296
@@ -7,12 +7,11 @@ jobs:
7
7
  runs-on: ubuntu-latest
8
8
  steps:
9
9
  - uses: actions/checkout@v2
10
- - name: Set up Ruby
10
+ - name: Set up Ruby and install dependencies
11
11
  uses: ruby/setup-ruby@v1
12
12
  with:
13
- ruby-version: 2.7
14
- - name: Install dependencies
15
- run: bundle install
13
+ ruby-version: 3.1
14
+ bundler-cache: true
16
15
  - name: Run linter
17
16
  run: bundle exec rubocop
18
17
 
@@ -20,24 +19,16 @@ jobs:
20
19
  runs-on: ubuntu-latest
21
20
  strategy:
22
21
  matrix:
23
- ruby: [2.7, 3.0, 3.1]
24
- faraday_version: [''] # Defaults to whatever's the most recent version.
25
- include:
26
- - ruby: 2.7
27
- faraday_version: '~> 1.0'
22
+ ruby: [3.1, 3.2, 3.3]
23
+ faraday_version: ['', '~> 1.0'] # Defaults to whatever's the most recent version.
28
24
  steps:
29
25
  - uses: actions/checkout@v2
30
26
 
31
- - name: Set up Ruby
27
+ - name: Set up Ruby and install dependencies
32
28
  uses: ruby/setup-ruby@v1
33
29
  with:
34
30
  ruby-version: ${{ matrix.ruby }}
35
-
36
- - name: Install bundler
37
- run: gem install bundler -v 2.1.1
38
-
39
- - name: Install dependencies
40
- run: bundle _2.1.1_ install
31
+ bundler-cache: true
41
32
  env:
42
33
  FARADAY_VERSION: ${{ matrix.faraday_version }}
43
34
 
data/.gitignore CHANGED
@@ -14,3 +14,4 @@
14
14
  mkmf.log
15
15
  .tool-versions
16
16
  .byebug_history
17
+ .ruby-version
data/.rubocop.yml CHANGED
@@ -5,16 +5,20 @@ require:
5
5
  inherit_from: .rubocop_todo.yml
6
6
 
7
7
  AllCops:
8
- TargetRubyVersion: 2.7
8
+ TargetRubyVersion: 3.1
9
9
  DisplayCopNames: true
10
10
  NewCops: enable
11
11
  Exclude:
12
12
  - 'geo_combine.gemspec'
13
13
  - 'tmp/**/*'
14
+ - 'vendor/bundle/**/*'
14
15
 
15
16
  RSpec/DescribeClass:
16
17
  Enabled: false
17
18
 
19
+ RSpec/MultipleMemoizedHelpers:
20
+ Enabled: false
21
+
18
22
  RSpec/BeforeAfterAll:
19
23
  Exclude:
20
24
  - 'spec/lib/tasks/geo_combine_spec.rb'
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2022-02-17 18:38:52 UTC using RuboCop version 1.25.1.
3
+ # on 2023-09-13 18:53:11 UTC using RuboCop version 1.56.3.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -12,55 +12,51 @@ Lint/RescueException:
12
12
  - 'spec/helpers.rb'
13
13
 
14
14
  # Offense count: 1
15
+ # This cop supports unsafe autocorrection (--autocorrect-all).
15
16
  Lint/UselessAssignment:
16
17
  Exclude:
17
18
  - 'spec/helpers.rb'
18
19
 
19
20
  # Offense count: 7
20
- # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
21
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
21
22
  Metrics/AbcSize:
22
23
  Max: 33
23
24
 
24
- # Offense count: 25
25
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
26
- # IgnoredMethods: refine
25
+ # Offense count: 1
26
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
27
+ # AllowedMethods: refine
27
28
  Metrics/BlockLength:
28
- Max: 181
29
+ Max: 27
29
30
 
30
31
  # Offense count: 1
31
32
  # Configuration parameters: CountComments, CountAsOne.
32
33
  Metrics/ClassLength:
33
34
  Max: 152
34
35
 
35
- # Offense count: 3
36
- # Configuration parameters: IgnoredMethods.
36
+ # Offense count: 5
37
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
37
38
  Metrics/CyclomaticComplexity:
38
39
  Max: 11
39
40
 
40
- # Offense count: 10
41
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
41
+ # Offense count: 13
42
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
42
43
  Metrics/MethodLength:
43
44
  Max: 21
44
45
 
45
- # Offense count: 1
46
- # Configuration parameters: CountComments, CountAsOne.
47
- Metrics/ModuleLength:
48
- Max: 1657
49
-
50
- # Offense count: 1
51
- # Configuration parameters: IgnoredMethods.
46
+ # Offense count: 2
47
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
52
48
  Metrics/PerceivedComplexity:
53
49
  Max: 11
54
50
 
55
51
  # Offense count: 9
56
- # Configuration parameters: Prefixes.
52
+ # Configuration parameters: Prefixes, AllowedPatterns.
57
53
  # Prefixes: when, with, without
58
54
  RSpec/ContextWording:
59
55
  Exclude:
60
56
  - 'spec/lib/geo_combine/geoblacklight_spec.rb'
61
57
  - 'spec/lib/geo_combine/ogp_spec.rb'
62
58
 
63
- # Offense count: 9
59
+ # Offense count: 11
64
60
  # Configuration parameters: CountAsOne.
65
61
  RSpec/ExampleLength:
66
62
  Max: 12
@@ -71,17 +67,10 @@ RSpec/ExpectInHook:
71
67
  - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
72
68
  - 'spec/lib/geo_combine/geoblacklight_spec.rb'
73
69
 
74
- # Offense count: 1
75
- # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
76
- # Include: **/*_spec*rb*, **/spec/**/*
77
- RSpec/FilePath:
78
- Exclude:
79
- - 'spec/lib/geo_combine_spec.rb'
80
-
81
- # Configuration parameters: .
70
+ # Offense count: 23
71
+ # Configuration parameters: EnforcedStyle.
82
72
  # SupportedStyles: have_received, receive
83
73
  RSpec/MessageSpies:
84
- EnforcedStyle: have_received
85
74
  Exclude:
86
75
  - 'spec/lib/geo_combine/esri_open_data_spec.rb'
87
76
  - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
@@ -89,7 +78,7 @@ RSpec/MessageSpies:
89
78
  - 'spec/lib/geo_combine/ogp_spec.rb'
90
79
  - 'spec/lib/geo_combine_spec.rb'
91
80
 
92
- # Offense count: 39
81
+ # Offense count: 48
93
82
  RSpec/MultipleExpectations:
94
83
  Max: 5
95
84
 
@@ -99,12 +88,14 @@ RSpec/MultipleMemoizedHelpers:
99
88
  Max: 7
100
89
 
101
90
  # Offense count: 5
102
- # Configuration parameters: IgnoreSharedExamples.
91
+ # Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
92
+ # SupportedStyles: always, named_only
103
93
  RSpec/NamedSubject:
104
94
  Exclude:
105
95
  - 'spec/lib/geo_combine/formatting_spec.rb'
106
96
 
107
97
  # Offense count: 8
98
+ # Configuration parameters: AllowedGroups.
108
99
  RSpec/NestedGroups:
109
100
  Max: 4
110
101
 
@@ -118,6 +109,14 @@ RSpec/RepeatedExampleGroupBody:
118
109
  Exclude:
119
110
  - 'spec/lib/geo_combine/iso19139_spec.rb'
120
111
 
112
+ # Offense count: 1
113
+ # Configuration parameters: Include, CustomTransform, IgnoreMethods, IgnoreMetadata.
114
+ # Include: **/*_spec.rb
115
+ RSpec/SpecFilePathFormat:
116
+ Exclude:
117
+ - '**/spec/routing/**/*'
118
+ - 'spec/lib/geo_combine_spec.rb'
119
+
121
120
  # Offense count: 19
122
121
  RSpec/StubbedMock:
123
122
  Exclude:
@@ -143,7 +142,7 @@ Security/Open:
143
142
  Exclude:
144
143
  - 'lib/geo_combine/geoblacklight.rb'
145
144
 
146
- # Offense count: 7
145
+ # Offense count: 6
147
146
  # Configuration parameters: AllowedConstants.
148
147
  Style/Documentation:
149
148
  Exclude:
@@ -151,15 +150,14 @@ Style/Documentation:
151
150
  - 'test/**/*'
152
151
  - 'lib/geo_combine/bounding_box.rb'
153
152
  - 'lib/geo_combine/ckan_metadata.rb'
154
- - 'lib/geo_combine/cli.rb'
155
153
  - 'lib/geo_combine/geo_blacklight_harvester.rb'
156
154
  - 'lib/geo_combine/geoblacklight.rb'
157
155
  - 'lib/geo_combine/geometry_types.rb'
158
156
  - 'lib/geo_combine/iso19139.rb'
159
157
 
160
- # Offense count: 7
161
- # Cop supports --auto-correct.
162
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
158
+ # Offense count: 12
159
+ # This cop supports safe autocorrection (--autocorrect).
160
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
163
161
  # URISchemes: http, https
164
162
  Layout/LineLength:
165
- Max: 159
163
+ Max: 198
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # GeoCombine
2
2
 
3
- ![CI](https://github.com/OpenGeoMetadata/GeoCombine/actions/workflows/ruby.yml/badge.svg)
3
+ ![CI](https://github.com/OpenGeoMetadata/GeoCombine/actions/workflows/ruby.yml/badge.svg)
4
4
  | [![Coverage Status](https://img.shields.io/badge/coverage-95%25-brightgreen)]()
5
5
  | [![Gem Version](https://img.shields.io/gem/v/geo_combine.svg)](https://github.com/OpenGeoMetadata/GeoCombine/releases)
6
6
 
7
-
8
7
  A Ruby toolkit for managing geospatial metadata, including:
9
- - tasks for cloning, updating, and indexing OpenGeoMetdata metadata
8
+
9
+ - tasks for cloning, updating, and indexing OpenGeoMetadata metadata
10
10
  - library for converting metadata between standards
11
11
 
12
12
  ## Installation
@@ -19,11 +19,15 @@ gem 'geo_combine'
19
19
 
20
20
  And then execute:
21
21
 
22
- $ bundle install
22
+ ```sh
23
+ $ bundle install
24
+ ```
23
25
 
24
26
  Or install it yourself as:
25
27
 
26
- $ gem install geo_combine
28
+ ```sh
29
+ $ gem install geo_combine
30
+ ```
27
31
 
28
32
  ## Usage
29
33
 
@@ -43,8 +47,42 @@ Or install it yourself as:
43
47
  > iso_metadata.to_html
44
48
  ```
45
49
 
50
+ ### Migrating metadata
51
+
52
+ You can use the `GeoCombine::Migrators` to migrate metadata from one schema to another.
53
+
54
+ Currently, the only migrator is `GeoCombine::Migrators::V1AardvarkMigrator` which migrates from the [GeoBlacklight v1 schema](https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/gbl-1.0.md) to the [Aardvark schema](https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/ogm-aardvark.md)
55
+
56
+ ```ruby
57
+ # Load a record in geoblacklight v1 schema
58
+ record = JSON.parse(File.read('.spec/fixtures/docs/full_geoblacklight.json'))
59
+
60
+ # Migrate it to Aardvark schema
61
+ GeoCombine::Migrators::V1AardvarkMigrator.new(v1_hash: record).run
62
+ ```
63
+
64
+ Some fields cannot be migrated automatically. To handle the migration of collection names to IDs when migrating from v1 to Aardvark, you can provide a mapping of collection names to IDs to the migrator:
65
+
66
+ ```ruby
67
+ # You can store this mapping as a JSON or CSV file and load it into a hash
68
+ id_map = {
69
+ 'My Collection 1' => 'institution:my-collection-1',
70
+ 'My Collection 2' => 'institution:my-collection-2'
71
+ }
72
+
73
+ GeoCombine::Migrators::V1AardvarkMigrator.new(v1_hash: record, collection_id_map: id_map).run
74
+ ```
75
+
46
76
  ### OpenGeoMetadata
47
77
 
78
+ #### Logging
79
+
80
+ Some of the tools and scripts in this gem use Ruby's `Logger` class to print information to `$stderr`. By default, the log level is set to `Logger::INFO`. For more verbose information, you can set the `LOG_LEVEL` environment variable to `DEBUG`:
81
+
82
+ ```sh
83
+ $ LOG_LEVEL=DEBUG bundle exec rake geocombine:clone
84
+ ```
85
+
48
86
  #### Clone OpenGeoMetadata repositories locally
49
87
 
50
88
  ```sh
@@ -63,7 +101,7 @@ You can also specify a single repository:
63
101
  $ bundle exec rake geocombine:clone[edu.stanford.purl]
64
102
  ```
65
103
 
66
- *Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
104
+ _Note: If you are using zsh, you will need to use escape characters in front of the brackets:_
67
105
 
68
106
  ```sh
69
107
  $ bundle exec rake geocombine:clone\[edu.stanford.purl\]
@@ -83,7 +121,7 @@ You can also specify a single repository:
83
121
  $ bundle exec rake geocombine:pull[edu.stanford.purl]
84
122
  ```
85
123
 
86
- *Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
124
+ _Note: If you are using zsh, you will need to use escape characters in front of the brackets:_
87
125
 
88
126
  ```sh
89
127
  $ bundle exec rake geocombine:pull\[edu.stanford.purl\]
@@ -98,23 +136,14 @@ To index into Solr, GeoCombine requires a Solr instance that is running the
98
136
  $ bundle exec rake geocombine:index
99
137
  ```
100
138
 
101
- Indexes the `geoblacklight.json` files in cloned repositories to a Solr index running at http://127.0.0.1:8983/solr
139
+ If Blacklight is installed in the ruby environment and a solr index is configured, the rake task will use the solr index configured in the Blacklight application (this is the case when invoking GeoCombine from your GeoBlacklight installation). If Blacklight is unavailable, the rake task will try to find a Solr instance running at `http://localhost:8983/solr/blacklight-core`.
102
140
 
103
- ##### Custom Solr location
104
-
105
- Solr location can also be specified by an environment variable `SOLR_URL`.
141
+ You can also set a the Solr instance URL using `SOLR_URL`:
106
142
 
107
143
  ```sh
108
144
  $ SOLR_URL=http://www.example.com:1234/solr/collection bundle exec rake geocombine:index
109
145
  ```
110
146
 
111
- Depending on your Solr instance's performance characteristics, you may want to
112
- change the [`commitWithin` parameter](https://lucene.apache.org/solr/guide/6_6/updatehandlers-in-solrconfig.html) (in milliseconds):
113
-
114
- ```sh
115
- $ SOLR_COMMIT_WITHIN=100 bundle exec rake geocombine:index
116
- ```
117
-
118
147
  ### Harvesting and indexing documents from GeoBlacklight sites
119
148
 
120
149
  GeoCombine provides a Harvester class and rake task to harvest and index content from GeoBlacklight sites (or any site that follows the Blacklight API format). Given that the configurations can change from consumer to consumer and site to site, the class provides a relatively simple configuration API. This can be configured in an initializer, a wrapping rake task, or any other ruby context where the rake task our class would be invoked.
@@ -160,10 +189,6 @@ Crawl delays can be configured (in seconds) either globally for all sites or on
160
189
 
161
190
  Solr's commitWithin option can be configured (in milliseconds) by passing a value under the commit_within key.
162
191
 
163
- ##### Debugging (default: false)
164
-
165
- The harvester and indexer will only `puts` content when errors happen. It is possible to see some progress information by setting the debug configuration option.
166
-
167
192
  #### Transforming Documents
168
193
 
169
194
  You may need to transform documents that are harvested for various purposes (removing fields, adding fields, omitting a document all together, etc). You can configure some ruby code (a proc) that will take the document in, transform it, and return the transformed document. By default the indexer will remove the `score`, `timestamp`, and `_version_` fields from the documents harvested. If you provide your own transformer, you'll likely want to remove these fields in addition to the other transformations you provide.
data/geo_combine.gemspec CHANGED
@@ -25,6 +25,8 @@ Gem::Specification.new do |spec|
25
25
  spec.add_dependency 'sanitize'
26
26
  spec.add_dependency 'thor'
27
27
  spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
28
+ spec.add_dependency 'git'
29
+ spec.add_dependency 'faraday-retry', '~> 2.2'
28
30
 
29
31
  spec.add_development_dependency 'bundler'
30
32
  spec.add_development_dependency 'rake'
@@ -44,7 +44,8 @@ module GeoCombine
44
44
  def envelope
45
45
  return envelope_from_bbox unless envelope_from_bbox.nil?
46
46
  return envelope_from_spatial(',') unless envelope_from_spatial(',').nil?
47
- return envelope_from_spatial(' ') unless envelope_from_spatial(' ').nil?
47
+
48
+ envelope_from_spatial(' ') unless envelope_from_spatial(' ').nil?
48
49
  end
49
50
 
50
51
  def envelope_from_bbox
@@ -55,7 +56,7 @@ module GeoCombine
55
56
  north: extras('bbox-north-lat')
56
57
  )
57
58
  begin
58
- return bbox.to_envelope if bbox.valid?
59
+ bbox.to_envelope if bbox.valid?
59
60
  rescue GeoCombine::Exceptions::InvalidGeometry
60
61
  nil
61
62
  end
@@ -64,10 +65,10 @@ module GeoCombine
64
65
  def envelope_from_spatial(delimiter)
65
66
  bbox = GeoCombine::BoundingBox.from_string_delimiter(
66
67
  extras('spatial'),
67
- delimiter: delimiter
68
+ delimiter:
68
69
  )
69
70
  begin
70
- return bbox.to_envelope if bbox.valid?
71
+ bbox.to_envelope if bbox.valid?
71
72
  rescue GeoCombine::Exceptions::InvalidGeometry
72
73
  nil
73
74
  end
@@ -30,7 +30,7 @@ module GeoCombine
30
30
 
31
31
  # slugs should be lowercase and only have a-z, A-Z, 0-9, and -
32
32
  def sluggify(slug)
33
- slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/-+/, '-').downcase
33
+ slug.gsub(/[^a-zA-Z0-9-]/, '-').gsub(/-+/, '-').downcase
34
34
  end
35
35
  end
36
36
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'geo_combine/logger'
4
+
3
5
  module GeoCombine
4
6
  ##
5
7
  # A class to harvest and index results from GeoBlacklight sites
@@ -45,24 +47,25 @@ module GeoCombine
45
47
 
46
48
  attr_reader :site, :site_key
47
49
 
48
- def initialize(site_key)
50
+ def initialize(site_key, logger: GeoCombine::Logger.logger)
49
51
  @site_key = site_key
50
52
  @site = self.class.config[site_key]
53
+ @logger = logger
51
54
 
52
55
  raise ArgumentError, "Site key #{@site_key.inspect} is not configured for #{self.class.name}" unless @site
53
56
  end
54
57
 
55
58
  def index
56
- puts "Fetching page 1 @ #{base_url}&page=1" if self.class.config[:debug]
59
+ @logger.debug "fetching page 1 @ #{base_url}&page=1"
57
60
  response = JSON.parse(Net::HTTP.get(URI("#{base_url}&page=1")))
58
61
  response_class = BlacklightResponseVersionFactory.call(response)
59
62
 
60
- response_class.new(response: response, base_url: base_url).documents.each do |docs|
63
+ response_class.new(response:, base_url:, logger: @logger).documents.each do |docs|
61
64
  docs.map! do |document|
62
65
  self.class.document_transformer&.call(document)
63
66
  end.compact
64
67
 
65
- puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
68
+ @logger.debug "adding #{docs.count} documents to solr"
66
69
  solr_connection.update params: { commitWithin: commit_within, overwrite: true },
67
70
  data: docs.to_json,
68
71
  headers: { 'Content-Type' => 'application/json' }
@@ -91,10 +94,11 @@ module GeoCombine
91
94
  attr_reader :base_url
92
95
  attr_accessor :response, :page
93
96
 
94
- def initialize(response:, base_url:)
97
+ def initialize(response:, base_url:, logger: GeoCombine::Logger.logger)
95
98
  @base_url = base_url
96
99
  @response = response
97
100
  @page = 1
101
+ @logger = logger
98
102
  end
99
103
 
100
104
  def documents
@@ -106,12 +110,12 @@ module GeoCombine
106
110
  break if current_page == total_pages
107
111
 
108
112
  self.page += 1
109
- puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
113
+ @logger.debug "fetching page #{page} @ #{url}"
110
114
 
111
115
  begin
112
116
  self.response = JSON.parse(Net::HTTP.get(URI(url)))
113
117
  rescue StandardError => e
114
- puts "Request for #{url} failed with #{e}"
118
+ @logger.error "request for #{url} failed with #{e}"
115
119
  self.response = nil
116
120
  end
117
121
  end
@@ -138,10 +142,11 @@ module GeoCombine
138
142
  attr_reader :base_url
139
143
  attr_accessor :response, :page
140
144
 
141
- def initialize(response:, base_url:)
145
+ def initialize(response:, base_url:, logger: GeoCombine::Logger.logger)
142
146
  @base_url = base_url
143
147
  @response = response
144
148
  @page = 1
149
+ @logger = logger
145
150
  end
146
151
 
147
152
  def documents
@@ -157,11 +162,11 @@ module GeoCombine
157
162
 
158
163
  url = "#{url}&format=json"
159
164
  self.page += 1
160
- puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
165
+ @logger.debug "fetching page #{page} @ #{url}"
161
166
  begin
162
167
  self.response = JSON.parse(Net::HTTP.get(URI(url)))
163
168
  rescue StandardError => e
164
- puts "Request for #{url} failed with #{e}"
169
+ @logger.error "Request for #{url} failed with #{e}"
165
170
  self.response = nil
166
171
  end
167
172
  end
@@ -170,11 +175,11 @@ module GeoCombine
170
175
  private
171
176
 
172
177
  def documents_from_urls(urls)
173
- puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
178
+ @logger.debug "fetching #{urls.count} documents for page #{page}"
174
179
  urls.map do |url|
175
180
  JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
176
181
  rescue StandardError => e
177
- puts "Fetching \"#{url}/raw\" failed with #{e}"
182
+ @logger.error "fetching \"#{url}/raw\" failed with #{e}"
178
183
 
179
184
  nil
180
185
  end.compact
@@ -13,7 +13,7 @@ module GeoCombine
13
13
  attr_reader :metadata
14
14
 
15
15
  GEOBLACKLIGHT_VERSION = '1.0'
16
- SCHEMA_JSON_URL = "https://raw.githubusercontent.com/geoblacklight/geoblacklight/main/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json"
16
+ SCHEMA_JSON_URL = "https://raw.githubusercontent.com/OpenGeoMetadata/opengeometadata.github.io/main/docs/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json".freeze
17
17
  DEPRECATED_KEYS_V1 = %w[
18
18
  uuid
19
19
  georss_polygon_s