geo_combine 0.7.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +7 -16
- data/.gitignore +1 -0
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +34 -36
- data/README.md +47 -22
- data/geo_combine.gemspec +2 -0
- data/lib/geo_combine/ckan_metadata.rb +5 -4
- data/lib/geo_combine/formatting.rb +1 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
- data/lib/geo_combine/geoblacklight.rb +1 -1
- data/lib/geo_combine/harvester.rb +132 -0
- data/lib/geo_combine/indexer.rb +126 -0
- data/lib/geo_combine/logger.rb +16 -0
- data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
- data/lib/geo_combine/ogp.rb +1 -1
- data/lib/geo_combine/railtie.rb +1 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +3 -0
- data/lib/tasks/geo_combine.rake +10 -65
- data/spec/fixtures/docs/full_geoblacklight.json +8 -1
- data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
- data/spec/fixtures/indexing/aardvark.json +57 -0
- data/spec/fixtures/json_docs.rb +6 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
- data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
- data/spec/lib/geo_combine/harvester_spec.rb +133 -0
- data/spec/lib/geo_combine/indexer_spec.rb +134 -0
- data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
- data/spec/lib/geo_combine_spec.rb +20 -17
- data/spec/spec_helper.rb +1 -2
- metadata +46 -9
- data/bin/geocombine +0 -6
- data/lib/geo_combine/cli.rb +0 -27
- data/spec/lib/tasks/geo_combine_spec.rb +0 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b168ca81c2b6d5ff2fa0ce75d18154ad9806f5a872cc6f96e41ab0cea628864
|
4
|
+
data.tar.gz: 9ba88c0cca642ebe79301182f992f2dfe223bc1cf4abaa9b00ac4687293b78f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c54ead8b591bd20fc3b62fddbfec2b0ac6830972cd85bf49fd31f230c84c8cf1a7bd4c0b56de702a235eca130fc643e4c796cb84adf15fd93f0b73075161fe2
|
7
|
+
data.tar.gz: 28fe9a5209dd77c2f8b60e87fddec8c7892ddf33794afd75522cf09c6827496c445c6c2f52445ad7fa46508ac1a4dcedd0c5068e161d0e176ec7ab232b21d296
|
data/.github/workflows/ruby.yml
CHANGED
@@ -7,12 +7,11 @@ jobs:
|
|
7
7
|
runs-on: ubuntu-latest
|
8
8
|
steps:
|
9
9
|
- uses: actions/checkout@v2
|
10
|
-
- name: Set up Ruby
|
10
|
+
- name: Set up Ruby and install dependencies
|
11
11
|
uses: ruby/setup-ruby@v1
|
12
12
|
with:
|
13
|
-
ruby-version:
|
14
|
-
|
15
|
-
run: bundle install
|
13
|
+
ruby-version: 3.1
|
14
|
+
bundler-cache: true
|
16
15
|
- name: Run linter
|
17
16
|
run: bundle exec rubocop
|
18
17
|
|
@@ -20,24 +19,16 @@ jobs:
|
|
20
19
|
runs-on: ubuntu-latest
|
21
20
|
strategy:
|
22
21
|
matrix:
|
23
|
-
ruby: [
|
24
|
-
faraday_version: [''] # Defaults to whatever's the most recent version.
|
25
|
-
include:
|
26
|
-
- ruby: 2.7
|
27
|
-
faraday_version: '~> 1.0'
|
22
|
+
ruby: [3.1, 3.2, 3.3]
|
23
|
+
faraday_version: ['', '~> 1.0'] # Defaults to whatever's the most recent version.
|
28
24
|
steps:
|
29
25
|
- uses: actions/checkout@v2
|
30
26
|
|
31
|
-
- name: Set up Ruby
|
27
|
+
- name: Set up Ruby and install dependencies
|
32
28
|
uses: ruby/setup-ruby@v1
|
33
29
|
with:
|
34
30
|
ruby-version: ${{ matrix.ruby }}
|
35
|
-
|
36
|
-
- name: Install bundler
|
37
|
-
run: gem install bundler -v 2.1.1
|
38
|
-
|
39
|
-
- name: Install dependencies
|
40
|
-
run: bundle _2.1.1_ install
|
31
|
+
bundler-cache: true
|
41
32
|
env:
|
42
33
|
FARADAY_VERSION: ${{ matrix.faraday_version }}
|
43
34
|
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -5,16 +5,20 @@ require:
|
|
5
5
|
inherit_from: .rubocop_todo.yml
|
6
6
|
|
7
7
|
AllCops:
|
8
|
-
TargetRubyVersion:
|
8
|
+
TargetRubyVersion: 3.1
|
9
9
|
DisplayCopNames: true
|
10
10
|
NewCops: enable
|
11
11
|
Exclude:
|
12
12
|
- 'geo_combine.gemspec'
|
13
13
|
- 'tmp/**/*'
|
14
|
+
- 'vendor/bundle/**/*'
|
14
15
|
|
15
16
|
RSpec/DescribeClass:
|
16
17
|
Enabled: false
|
17
18
|
|
19
|
+
RSpec/MultipleMemoizedHelpers:
|
20
|
+
Enabled: false
|
21
|
+
|
18
22
|
RSpec/BeforeAfterAll:
|
19
23
|
Exclude:
|
20
24
|
- 'spec/lib/tasks/geo_combine_spec.rb'
|
data/.rubocop_todo.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2023-09-13 18:53:11 UTC using RuboCop version 1.56.3.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
@@ -12,55 +12,51 @@ Lint/RescueException:
|
|
12
12
|
- 'spec/helpers.rb'
|
13
13
|
|
14
14
|
# Offense count: 1
|
15
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
15
16
|
Lint/UselessAssignment:
|
16
17
|
Exclude:
|
17
18
|
- 'spec/helpers.rb'
|
18
19
|
|
19
20
|
# Offense count: 7
|
20
|
-
# Configuration parameters:
|
21
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
|
21
22
|
Metrics/AbcSize:
|
22
23
|
Max: 33
|
23
24
|
|
24
|
-
# Offense count:
|
25
|
-
# Configuration parameters: CountComments, CountAsOne,
|
26
|
-
#
|
25
|
+
# Offense count: 1
|
26
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
27
|
+
# AllowedMethods: refine
|
27
28
|
Metrics/BlockLength:
|
28
|
-
Max:
|
29
|
+
Max: 27
|
29
30
|
|
30
31
|
# Offense count: 1
|
31
32
|
# Configuration parameters: CountComments, CountAsOne.
|
32
33
|
Metrics/ClassLength:
|
33
34
|
Max: 152
|
34
35
|
|
35
|
-
# Offense count:
|
36
|
-
# Configuration parameters:
|
36
|
+
# Offense count: 5
|
37
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
37
38
|
Metrics/CyclomaticComplexity:
|
38
39
|
Max: 11
|
39
40
|
|
40
|
-
# Offense count:
|
41
|
-
# Configuration parameters: CountComments, CountAsOne,
|
41
|
+
# Offense count: 13
|
42
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
42
43
|
Metrics/MethodLength:
|
43
44
|
Max: 21
|
44
45
|
|
45
|
-
# Offense count:
|
46
|
-
# Configuration parameters:
|
47
|
-
Metrics/ModuleLength:
|
48
|
-
Max: 1657
|
49
|
-
|
50
|
-
# Offense count: 1
|
51
|
-
# Configuration parameters: IgnoredMethods.
|
46
|
+
# Offense count: 2
|
47
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
52
48
|
Metrics/PerceivedComplexity:
|
53
49
|
Max: 11
|
54
50
|
|
55
51
|
# Offense count: 9
|
56
|
-
# Configuration parameters: Prefixes.
|
52
|
+
# Configuration parameters: Prefixes, AllowedPatterns.
|
57
53
|
# Prefixes: when, with, without
|
58
54
|
RSpec/ContextWording:
|
59
55
|
Exclude:
|
60
56
|
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
61
57
|
- 'spec/lib/geo_combine/ogp_spec.rb'
|
62
58
|
|
63
|
-
# Offense count:
|
59
|
+
# Offense count: 11
|
64
60
|
# Configuration parameters: CountAsOne.
|
65
61
|
RSpec/ExampleLength:
|
66
62
|
Max: 12
|
@@ -71,17 +67,10 @@ RSpec/ExpectInHook:
|
|
71
67
|
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
72
68
|
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
73
69
|
|
74
|
-
# Offense count:
|
75
|
-
# Configuration parameters:
|
76
|
-
# Include: **/*_spec*rb*, **/spec/**/*
|
77
|
-
RSpec/FilePath:
|
78
|
-
Exclude:
|
79
|
-
- 'spec/lib/geo_combine_spec.rb'
|
80
|
-
|
81
|
-
# Configuration parameters: .
|
70
|
+
# Offense count: 23
|
71
|
+
# Configuration parameters: EnforcedStyle.
|
82
72
|
# SupportedStyles: have_received, receive
|
83
73
|
RSpec/MessageSpies:
|
84
|
-
EnforcedStyle: have_received
|
85
74
|
Exclude:
|
86
75
|
- 'spec/lib/geo_combine/esri_open_data_spec.rb'
|
87
76
|
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
@@ -89,7 +78,7 @@ RSpec/MessageSpies:
|
|
89
78
|
- 'spec/lib/geo_combine/ogp_spec.rb'
|
90
79
|
- 'spec/lib/geo_combine_spec.rb'
|
91
80
|
|
92
|
-
# Offense count:
|
81
|
+
# Offense count: 48
|
93
82
|
RSpec/MultipleExpectations:
|
94
83
|
Max: 5
|
95
84
|
|
@@ -99,12 +88,14 @@ RSpec/MultipleMemoizedHelpers:
|
|
99
88
|
Max: 7
|
100
89
|
|
101
90
|
# Offense count: 5
|
102
|
-
# Configuration parameters: IgnoreSharedExamples.
|
91
|
+
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
|
92
|
+
# SupportedStyles: always, named_only
|
103
93
|
RSpec/NamedSubject:
|
104
94
|
Exclude:
|
105
95
|
- 'spec/lib/geo_combine/formatting_spec.rb'
|
106
96
|
|
107
97
|
# Offense count: 8
|
98
|
+
# Configuration parameters: AllowedGroups.
|
108
99
|
RSpec/NestedGroups:
|
109
100
|
Max: 4
|
110
101
|
|
@@ -118,6 +109,14 @@ RSpec/RepeatedExampleGroupBody:
|
|
118
109
|
Exclude:
|
119
110
|
- 'spec/lib/geo_combine/iso19139_spec.rb'
|
120
111
|
|
112
|
+
# Offense count: 1
|
113
|
+
# Configuration parameters: Include, CustomTransform, IgnoreMethods, IgnoreMetadata.
|
114
|
+
# Include: **/*_spec.rb
|
115
|
+
RSpec/SpecFilePathFormat:
|
116
|
+
Exclude:
|
117
|
+
- '**/spec/routing/**/*'
|
118
|
+
- 'spec/lib/geo_combine_spec.rb'
|
119
|
+
|
121
120
|
# Offense count: 19
|
122
121
|
RSpec/StubbedMock:
|
123
122
|
Exclude:
|
@@ -143,7 +142,7 @@ Security/Open:
|
|
143
142
|
Exclude:
|
144
143
|
- 'lib/geo_combine/geoblacklight.rb'
|
145
144
|
|
146
|
-
# Offense count:
|
145
|
+
# Offense count: 6
|
147
146
|
# Configuration parameters: AllowedConstants.
|
148
147
|
Style/Documentation:
|
149
148
|
Exclude:
|
@@ -151,15 +150,14 @@ Style/Documentation:
|
|
151
150
|
- 'test/**/*'
|
152
151
|
- 'lib/geo_combine/bounding_box.rb'
|
153
152
|
- 'lib/geo_combine/ckan_metadata.rb'
|
154
|
-
- 'lib/geo_combine/cli.rb'
|
155
153
|
- 'lib/geo_combine/geo_blacklight_harvester.rb'
|
156
154
|
- 'lib/geo_combine/geoblacklight.rb'
|
157
155
|
- 'lib/geo_combine/geometry_types.rb'
|
158
156
|
- 'lib/geo_combine/iso19139.rb'
|
159
157
|
|
160
|
-
# Offense count:
|
161
|
-
#
|
162
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives,
|
158
|
+
# Offense count: 12
|
159
|
+
# This cop supports safe autocorrection (--autocorrect).
|
160
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
|
163
161
|
# URISchemes: http, https
|
164
162
|
Layout/LineLength:
|
165
|
-
Max:
|
163
|
+
Max: 198
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# GeoCombine
|
2
2
|
|
3
|
-
|
3
|
+
![CI](https://github.com/OpenGeoMetadata/GeoCombine/actions/workflows/ruby.yml/badge.svg)
|
4
4
|
| [![Coverage Status](https://img.shields.io/badge/coverage-95%25-brightgreen)]()
|
5
5
|
| [![Gem Version](https://img.shields.io/gem/v/geo_combine.svg)](https://github.com/OpenGeoMetadata/GeoCombine/releases)
|
6
6
|
|
7
|
-
|
8
7
|
A Ruby toolkit for managing geospatial metadata, including:
|
9
|
-
|
8
|
+
|
9
|
+
- tasks for cloning, updating, and indexing OpenGeoMetadata metadata
|
10
10
|
- library for converting metadata between standards
|
11
11
|
|
12
12
|
## Installation
|
@@ -19,11 +19,15 @@ gem 'geo_combine'
|
|
19
19
|
|
20
20
|
And then execute:
|
21
21
|
|
22
|
-
|
22
|
+
```sh
|
23
|
+
$ bundle install
|
24
|
+
```
|
23
25
|
|
24
26
|
Or install it yourself as:
|
25
27
|
|
26
|
-
|
28
|
+
```sh
|
29
|
+
$ gem install geo_combine
|
30
|
+
```
|
27
31
|
|
28
32
|
## Usage
|
29
33
|
|
@@ -43,8 +47,42 @@ Or install it yourself as:
|
|
43
47
|
> iso_metadata.to_html
|
44
48
|
```
|
45
49
|
|
50
|
+
### Migrating metadata
|
51
|
+
|
52
|
+
You can use the `GeoCombine::Migrators` to migrate metadata from one schema to another.
|
53
|
+
|
54
|
+
Currently, the only migrator is `GeoCombine::Migrators::V1AardvarkMigrator` which migrates from the [GeoBlacklight v1 schema](https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/gbl-1.0.md) to the [Aardvark schema](https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/ogm-aardvark.md)
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
# Load a record in geoblacklight v1 schema
|
58
|
+
record = JSON.parse(File.read('.spec/fixtures/docs/full_geoblacklight.json'))
|
59
|
+
|
60
|
+
# Migrate it to Aardvark schema
|
61
|
+
GeoCombine::Migrators::V1AardvarkMigrator.new(v1_hash: record).run
|
62
|
+
```
|
63
|
+
|
64
|
+
Some fields cannot be migrated automatically. To handle the migration of collection names to IDs when migrating from v1 to Aardvark, you can provide a mapping of collection names to IDs to the migrator:
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
# You can store this mapping as a JSON or CSV file and load it into a hash
|
68
|
+
id_map = {
|
69
|
+
'My Collection 1' => 'institution:my-collection-1',
|
70
|
+
'My Collection 2' => 'institution:my-collection-2'
|
71
|
+
}
|
72
|
+
|
73
|
+
GeoCombine::Migrators::V1AardvarkMigrator.new(v1_hash: record, collection_id_map: id_map).run
|
74
|
+
```
|
75
|
+
|
46
76
|
### OpenGeoMetadata
|
47
77
|
|
78
|
+
#### Logging
|
79
|
+
|
80
|
+
Some of the tools and scripts in this gem use Ruby's `Logger` class to print information to `$stderr`. By default, the log level is set to `Logger::INFO`. For more verbose information, you can set the `LOG_LEVEL` environment variable to `DEBUG`:
|
81
|
+
|
82
|
+
```sh
|
83
|
+
$ LOG_LEVEL=DEBUG bundle exec rake geocombine:clone
|
84
|
+
```
|
85
|
+
|
48
86
|
#### Clone OpenGeoMetadata repositories locally
|
49
87
|
|
50
88
|
```sh
|
@@ -63,7 +101,7 @@ You can also specify a single repository:
|
|
63
101
|
$ bundle exec rake geocombine:clone[edu.stanford.purl]
|
64
102
|
```
|
65
103
|
|
66
|
-
|
104
|
+
_Note: If you are using zsh, you will need to use escape characters in front of the brackets:_
|
67
105
|
|
68
106
|
```sh
|
69
107
|
$ bundle exec rake geocombine:clone\[edu.stanford.purl\]
|
@@ -83,7 +121,7 @@ You can also specify a single repository:
|
|
83
121
|
$ bundle exec rake geocombine:pull[edu.stanford.purl]
|
84
122
|
```
|
85
123
|
|
86
|
-
|
124
|
+
_Note: If you are using zsh, you will need to use escape characters in front of the brackets:_
|
87
125
|
|
88
126
|
```sh
|
89
127
|
$ bundle exec rake geocombine:pull\[edu.stanford.purl\]
|
@@ -98,23 +136,14 @@ To index into Solr, GeoCombine requires a Solr instance that is running the
|
|
98
136
|
$ bundle exec rake geocombine:index
|
99
137
|
```
|
100
138
|
|
101
|
-
|
139
|
+
If Blacklight is installed in the ruby environment and a solr index is configured, the rake task will use the solr index configured in the Blacklight application (this is the case when invoking GeoCombine from your GeoBlacklight installation). If Blacklight is unavailable, the rake task will try to find a Solr instance running at `http://localhost:8983/solr/blacklight-core`.
|
102
140
|
|
103
|
-
|
104
|
-
|
105
|
-
Solr location can also be specified by an environment variable `SOLR_URL`.
|
141
|
+
You can also set a the Solr instance URL using `SOLR_URL`:
|
106
142
|
|
107
143
|
```sh
|
108
144
|
$ SOLR_URL=http://www.example.com:1234/solr/collection bundle exec rake geocombine:index
|
109
145
|
```
|
110
146
|
|
111
|
-
Depending on your Solr instance's performance characteristics, you may want to
|
112
|
-
change the [`commitWithin` parameter](https://lucene.apache.org/solr/guide/6_6/updatehandlers-in-solrconfig.html) (in milliseconds):
|
113
|
-
|
114
|
-
```sh
|
115
|
-
$ SOLR_COMMIT_WITHIN=100 bundle exec rake geocombine:index
|
116
|
-
```
|
117
|
-
|
118
147
|
### Harvesting and indexing documents from GeoBlacklight sites
|
119
148
|
|
120
149
|
GeoCombine provides a Harvester class and rake task to harvest and index content from GeoBlacklight sites (or any site that follows the Blacklight API format). Given that the configurations can change from consumer to consumer and site to site, the class provides a relatively simple configuration API. This can be configured in an initializer, a wrapping rake task, or any other ruby context where the rake task our class would be invoked.
|
@@ -160,10 +189,6 @@ Crawl delays can be configured (in seconds) either globally for all sites or on
|
|
160
189
|
|
161
190
|
Solr's commitWithin option can be configured (in milliseconds) by passing a value under the commit_within key.
|
162
191
|
|
163
|
-
##### Debugging (default: false)
|
164
|
-
|
165
|
-
The harvester and indexer will only `puts` content when errors happen. It is possible to see some progress information by setting the debug configuration option.
|
166
|
-
|
167
192
|
#### Transforming Documents
|
168
193
|
|
169
194
|
You may need to transform documents that are harvested for various purposes (removing fields, adding fields, omitting a document all together, etc). You can configure some ruby code (a proc) that will take the document in, transform it, and return the transformed document. By default the indexer will remove the `score`, `timestamp`, and `_version_` fields from the documents harvested. If you provide your own transformer, you'll likely want to remove these fields in addition to the other transformations you provide.
|
data/geo_combine.gemspec
CHANGED
@@ -25,6 +25,8 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_dependency 'sanitize'
|
26
26
|
spec.add_dependency 'thor'
|
27
27
|
spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
|
28
|
+
spec.add_dependency 'git'
|
29
|
+
spec.add_dependency 'faraday-retry', '~> 2.2'
|
28
30
|
|
29
31
|
spec.add_development_dependency 'bundler'
|
30
32
|
spec.add_development_dependency 'rake'
|
@@ -44,7 +44,8 @@ module GeoCombine
|
|
44
44
|
def envelope
|
45
45
|
return envelope_from_bbox unless envelope_from_bbox.nil?
|
46
46
|
return envelope_from_spatial(',') unless envelope_from_spatial(',').nil?
|
47
|
-
|
47
|
+
|
48
|
+
envelope_from_spatial(' ') unless envelope_from_spatial(' ').nil?
|
48
49
|
end
|
49
50
|
|
50
51
|
def envelope_from_bbox
|
@@ -55,7 +56,7 @@ module GeoCombine
|
|
55
56
|
north: extras('bbox-north-lat')
|
56
57
|
)
|
57
58
|
begin
|
58
|
-
|
59
|
+
bbox.to_envelope if bbox.valid?
|
59
60
|
rescue GeoCombine::Exceptions::InvalidGeometry
|
60
61
|
nil
|
61
62
|
end
|
@@ -64,10 +65,10 @@ module GeoCombine
|
|
64
65
|
def envelope_from_spatial(delimiter)
|
65
66
|
bbox = GeoCombine::BoundingBox.from_string_delimiter(
|
66
67
|
extras('spatial'),
|
67
|
-
delimiter:
|
68
|
+
delimiter:
|
68
69
|
)
|
69
70
|
begin
|
70
|
-
|
71
|
+
bbox.to_envelope if bbox.valid?
|
71
72
|
rescue GeoCombine::Exceptions::InvalidGeometry
|
72
73
|
nil
|
73
74
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'geo_combine/logger'
|
4
|
+
|
3
5
|
module GeoCombine
|
4
6
|
##
|
5
7
|
# A class to harvest and index results from GeoBlacklight sites
|
@@ -45,24 +47,25 @@ module GeoCombine
|
|
45
47
|
|
46
48
|
attr_reader :site, :site_key
|
47
49
|
|
48
|
-
def initialize(site_key)
|
50
|
+
def initialize(site_key, logger: GeoCombine::Logger.logger)
|
49
51
|
@site_key = site_key
|
50
52
|
@site = self.class.config[site_key]
|
53
|
+
@logger = logger
|
51
54
|
|
52
55
|
raise ArgumentError, "Site key #{@site_key.inspect} is not configured for #{self.class.name}" unless @site
|
53
56
|
end
|
54
57
|
|
55
58
|
def index
|
56
|
-
|
59
|
+
@logger.debug "fetching page 1 @ #{base_url}&page=1"
|
57
60
|
response = JSON.parse(Net::HTTP.get(URI("#{base_url}&page=1")))
|
58
61
|
response_class = BlacklightResponseVersionFactory.call(response)
|
59
62
|
|
60
|
-
response_class.new(response
|
63
|
+
response_class.new(response:, base_url:, logger: @logger).documents.each do |docs|
|
61
64
|
docs.map! do |document|
|
62
65
|
self.class.document_transformer&.call(document)
|
63
66
|
end.compact
|
64
67
|
|
65
|
-
|
68
|
+
@logger.debug "adding #{docs.count} documents to solr"
|
66
69
|
solr_connection.update params: { commitWithin: commit_within, overwrite: true },
|
67
70
|
data: docs.to_json,
|
68
71
|
headers: { 'Content-Type' => 'application/json' }
|
@@ -91,10 +94,11 @@ module GeoCombine
|
|
91
94
|
attr_reader :base_url
|
92
95
|
attr_accessor :response, :page
|
93
96
|
|
94
|
-
def initialize(response:, base_url:)
|
97
|
+
def initialize(response:, base_url:, logger: GeoCombine::Logger.logger)
|
95
98
|
@base_url = base_url
|
96
99
|
@response = response
|
97
100
|
@page = 1
|
101
|
+
@logger = logger
|
98
102
|
end
|
99
103
|
|
100
104
|
def documents
|
@@ -106,12 +110,12 @@ module GeoCombine
|
|
106
110
|
break if current_page == total_pages
|
107
111
|
|
108
112
|
self.page += 1
|
109
|
-
|
113
|
+
@logger.debug "fetching page #{page} @ #{url}"
|
110
114
|
|
111
115
|
begin
|
112
116
|
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
113
117
|
rescue StandardError => e
|
114
|
-
|
118
|
+
@logger.error "request for #{url} failed with #{e}"
|
115
119
|
self.response = nil
|
116
120
|
end
|
117
121
|
end
|
@@ -138,10 +142,11 @@ module GeoCombine
|
|
138
142
|
attr_reader :base_url
|
139
143
|
attr_accessor :response, :page
|
140
144
|
|
141
|
-
def initialize(response:, base_url:)
|
145
|
+
def initialize(response:, base_url:, logger: GeoCombine::Logger.logger)
|
142
146
|
@base_url = base_url
|
143
147
|
@response = response
|
144
148
|
@page = 1
|
149
|
+
@logger = logger
|
145
150
|
end
|
146
151
|
|
147
152
|
def documents
|
@@ -157,11 +162,11 @@ module GeoCombine
|
|
157
162
|
|
158
163
|
url = "#{url}&format=json"
|
159
164
|
self.page += 1
|
160
|
-
|
165
|
+
@logger.debug "fetching page #{page} @ #{url}"
|
161
166
|
begin
|
162
167
|
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
163
168
|
rescue StandardError => e
|
164
|
-
|
169
|
+
@logger.error "Request for #{url} failed with #{e}"
|
165
170
|
self.response = nil
|
166
171
|
end
|
167
172
|
end
|
@@ -170,11 +175,11 @@ module GeoCombine
|
|
170
175
|
private
|
171
176
|
|
172
177
|
def documents_from_urls(urls)
|
173
|
-
|
178
|
+
@logger.debug "fetching #{urls.count} documents for page #{page}"
|
174
179
|
urls.map do |url|
|
175
180
|
JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
|
176
181
|
rescue StandardError => e
|
177
|
-
|
182
|
+
@logger.error "fetching \"#{url}/raw\" failed with #{e}"
|
178
183
|
|
179
184
|
nil
|
180
185
|
end.compact
|
@@ -13,7 +13,7 @@ module GeoCombine
|
|
13
13
|
attr_reader :metadata
|
14
14
|
|
15
15
|
GEOBLACKLIGHT_VERSION = '1.0'
|
16
|
-
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/
|
16
|
+
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/OpenGeoMetadata/opengeometadata.github.io/main/docs/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json".freeze
|
17
17
|
DEPRECATED_KEYS_V1 = %w[
|
18
18
|
uuid
|
19
19
|
georss_polygon_s
|