geo_combine 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +53 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +20 -0
- data/.rubocop_todo.yml +165 -0
- data/Gemfile +2 -1
- data/README.md +20 -2
- data/Rakefile +4 -2
- data/bin/geocombine +1 -0
- data/geo_combine.gemspec +6 -1
- data/lib/geo_combine/bounding_box.rb +7 -1
- data/lib/geo_combine/ckan_metadata.rb +10 -8
- data/lib/geo_combine/cli.rb +3 -1
- data/lib/geo_combine/esri_open_data.rb +2 -0
- data/lib/geo_combine/exceptions.rb +3 -0
- data/lib/geo_combine/fgdc.rb +2 -2
- data/lib/geo_combine/formats.rb +2 -0
- data/lib/geo_combine/formatting.rb +3 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +21 -13
- data/lib/geo_combine/geoblacklight.rb +20 -6
- data/lib/geo_combine/geometry_types.rb +2 -0
- data/lib/geo_combine/iso19139.rb +2 -1
- data/lib/geo_combine/ogp.rb +13 -11
- data/lib/geo_combine/railtie.rb +2 -0
- data/lib/geo_combine/subjects.rb +2 -0
- data/lib/geo_combine/version.rb +3 -1
- data/lib/geo_combine.rb +4 -3
- data/lib/tasks/geo_combine.rake +50 -29
- data/lib/xslt/fgdc2html.xsl +38 -9
- data/spec/features/fgdc2html_spec.rb +53 -1
- data/spec/features/iso2html_spec.rb +10 -1
- data/spec/fixtures/docs/princeton_fgdc.xml +374 -0
- data/spec/fixtures/docs/repos.json +3224 -0
- data/spec/fixtures/docs/simple_xml.xml +10 -0
- data/spec/fixtures/docs/simple_xslt.xsl +11 -0
- data/spec/fixtures/docs/stanford_iso.xml +652 -0
- data/spec/fixtures/docs/tufts_fgdc.xml +977 -0
- data/spec/fixtures/indexing/basic_geoblacklight.json +27 -0
- data/spec/fixtures/indexing/geoblacklight.json +33 -0
- data/spec/fixtures/indexing/layers.json +16119 -0
- data/spec/fixtures/indexing/test.txt +1 -0
- data/spec/fixtures/json_docs.rb +2 -0
- data/spec/fixtures/xml_docs.rb +9 -1659
- data/spec/helpers.rb +7 -7
- data/spec/lib/geo_combine/bounding_box_spec.rb +18 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +34 -11
- data/spec/lib/geo_combine/esri_open_data_spec.rb +23 -2
- data/spec/lib/geo_combine/fgdc_spec.rb +41 -10
- data/spec/lib/geo_combine/formatting_spec.rb +13 -5
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +30 -26
- data/spec/lib/geo_combine/geoblacklight_spec.rb +41 -11
- data/spec/lib/geo_combine/iso19139_spec.rb +26 -14
- data/spec/lib/geo_combine/ogp_spec.rb +28 -8
- data/spec/lib/geo_combine_spec.rb +7 -4
- data/spec/lib/tasks/geo_combine_spec.rb +45 -0
- data/spec/spec_helper.rb +19 -84
- data/spec/support/fixtures.rb +9 -0
- metadata +116 -21
- data/.coveralls.yml +0 -1
- data/.travis.yml +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d47d9cff1e3bf0e3ec2237c65554524b9a624d8c9faf833d1c99c6fdc4f7c94
|
4
|
+
data.tar.gz: 7069057b0b5166f2ed5496af51a270cefdd6ee5aa3937456f10a54b2f5f32536
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7208f9b13e73b183571861a7d40bcd6a1c1a466b4af7ff6fead7174bdb992a51955ebb986b07094ce195dd2e1d520a3c8b0b51b7c18178b57b3b0eb6db0c6e4e
|
7
|
+
data.tar.gz: 7daf75a1d31036a3659d4e96f261f4f1ce20bfea2083827f32e767ee8f15c14d6c112053cd009ca17cdfc6c433b9f7b64ce524fbf4fdaf7fe56877b1b7c07360
|
@@ -0,0 +1,53 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
rubocop:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
steps:
|
9
|
+
- uses: actions/checkout@v2
|
10
|
+
- name: Set up Ruby
|
11
|
+
uses: ruby/setup-ruby@v1
|
12
|
+
with:
|
13
|
+
ruby-version: 2.7
|
14
|
+
- name: Install dependencies
|
15
|
+
run: bundle install
|
16
|
+
- name: Run linter
|
17
|
+
run: bundle exec rubocop
|
18
|
+
|
19
|
+
test:
|
20
|
+
runs-on: ubuntu-latest
|
21
|
+
strategy:
|
22
|
+
matrix:
|
23
|
+
ruby: [2.7, 3.0, 3.1]
|
24
|
+
faraday_version: [''] # Defaults to whatever's the most recent version.
|
25
|
+
include:
|
26
|
+
- ruby: 2.7
|
27
|
+
faraday_version: '~> 1.0'
|
28
|
+
steps:
|
29
|
+
- uses: actions/checkout@v2
|
30
|
+
|
31
|
+
- name: Set up Ruby
|
32
|
+
uses: ruby/setup-ruby@v1
|
33
|
+
with:
|
34
|
+
ruby-version: ${{ matrix.ruby }}
|
35
|
+
|
36
|
+
- name: Install bundler
|
37
|
+
run: gem install bundler -v 2.1.1
|
38
|
+
|
39
|
+
- name: Install dependencies
|
40
|
+
run: bundle _2.1.1_ install
|
41
|
+
env:
|
42
|
+
FARADAY_VERSION: ${{ matrix.faraday_version }}
|
43
|
+
|
44
|
+
- name: Run tests
|
45
|
+
run: bundle exec rake spec
|
46
|
+
env:
|
47
|
+
FARADAY_VERSION: ${{ matrix.faraday_version }}
|
48
|
+
|
49
|
+
- name: Upload coverage artifacts
|
50
|
+
uses: actions/upload-artifact@v2
|
51
|
+
with:
|
52
|
+
name: coverage
|
53
|
+
path: coverage/
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-rspec
|
3
|
+
- rubocop-rake
|
4
|
+
|
5
|
+
inherit_from: .rubocop_todo.yml
|
6
|
+
|
7
|
+
AllCops:
|
8
|
+
TargetRubyVersion: 2.7
|
9
|
+
DisplayCopNames: true
|
10
|
+
NewCops: enable
|
11
|
+
Exclude:
|
12
|
+
- 'geo_combine.gemspec'
|
13
|
+
- 'tmp/**/*'
|
14
|
+
|
15
|
+
RSpec/DescribeClass:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
RSpec/BeforeAfterAll:
|
19
|
+
Exclude:
|
20
|
+
- 'spec/lib/tasks/geo_combine_spec.rb'
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2022-02-17 18:38:52 UTC using RuboCop version 1.25.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
Lint/RescueException:
|
11
|
+
Exclude:
|
12
|
+
- 'spec/helpers.rb'
|
13
|
+
|
14
|
+
# Offense count: 1
|
15
|
+
Lint/UselessAssignment:
|
16
|
+
Exclude:
|
17
|
+
- 'spec/helpers.rb'
|
18
|
+
|
19
|
+
# Offense count: 7
|
20
|
+
# Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
|
21
|
+
Metrics/AbcSize:
|
22
|
+
Max: 33
|
23
|
+
|
24
|
+
# Offense count: 25
|
25
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
26
|
+
# IgnoredMethods: refine
|
27
|
+
Metrics/BlockLength:
|
28
|
+
Max: 181
|
29
|
+
|
30
|
+
# Offense count: 1
|
31
|
+
# Configuration parameters: CountComments, CountAsOne.
|
32
|
+
Metrics/ClassLength:
|
33
|
+
Max: 152
|
34
|
+
|
35
|
+
# Offense count: 3
|
36
|
+
# Configuration parameters: IgnoredMethods.
|
37
|
+
Metrics/CyclomaticComplexity:
|
38
|
+
Max: 11
|
39
|
+
|
40
|
+
# Offense count: 10
|
41
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
42
|
+
Metrics/MethodLength:
|
43
|
+
Max: 21
|
44
|
+
|
45
|
+
# Offense count: 1
|
46
|
+
# Configuration parameters: CountComments, CountAsOne.
|
47
|
+
Metrics/ModuleLength:
|
48
|
+
Max: 1657
|
49
|
+
|
50
|
+
# Offense count: 1
|
51
|
+
# Configuration parameters: IgnoredMethods.
|
52
|
+
Metrics/PerceivedComplexity:
|
53
|
+
Max: 11
|
54
|
+
|
55
|
+
# Offense count: 9
|
56
|
+
# Configuration parameters: Prefixes.
|
57
|
+
# Prefixes: when, with, without
|
58
|
+
RSpec/ContextWording:
|
59
|
+
Exclude:
|
60
|
+
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
61
|
+
- 'spec/lib/geo_combine/ogp_spec.rb'
|
62
|
+
|
63
|
+
# Offense count: 9
|
64
|
+
# Configuration parameters: CountAsOne.
|
65
|
+
RSpec/ExampleLength:
|
66
|
+
Max: 12
|
67
|
+
|
68
|
+
# Offense count: 4
|
69
|
+
RSpec/ExpectInHook:
|
70
|
+
Exclude:
|
71
|
+
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
72
|
+
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
73
|
+
|
74
|
+
# Offense count: 1
|
75
|
+
# Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
|
76
|
+
# Include: **/*_spec*rb*, **/spec/**/*
|
77
|
+
RSpec/FilePath:
|
78
|
+
Exclude:
|
79
|
+
- 'spec/lib/geo_combine_spec.rb'
|
80
|
+
|
81
|
+
# Configuration parameters: .
|
82
|
+
# SupportedStyles: have_received, receive
|
83
|
+
RSpec/MessageSpies:
|
84
|
+
EnforcedStyle: have_received
|
85
|
+
Exclude:
|
86
|
+
- 'spec/lib/geo_combine/esri_open_data_spec.rb'
|
87
|
+
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
88
|
+
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
89
|
+
- 'spec/lib/geo_combine/ogp_spec.rb'
|
90
|
+
- 'spec/lib/geo_combine_spec.rb'
|
91
|
+
|
92
|
+
# Offense count: 39
|
93
|
+
RSpec/MultipleExpectations:
|
94
|
+
Max: 5
|
95
|
+
|
96
|
+
# Offense count: 3
|
97
|
+
# Configuration parameters: AllowSubject.
|
98
|
+
RSpec/MultipleMemoizedHelpers:
|
99
|
+
Max: 7
|
100
|
+
|
101
|
+
# Offense count: 5
|
102
|
+
# Configuration parameters: IgnoreSharedExamples.
|
103
|
+
RSpec/NamedSubject:
|
104
|
+
Exclude:
|
105
|
+
- 'spec/lib/geo_combine/formatting_spec.rb'
|
106
|
+
|
107
|
+
# Offense count: 8
|
108
|
+
RSpec/NestedGroups:
|
109
|
+
Max: 4
|
110
|
+
|
111
|
+
# Offense count: 1
|
112
|
+
RSpec/OverwritingSetup:
|
113
|
+
Exclude:
|
114
|
+
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
115
|
+
|
116
|
+
# Offense count: 2
|
117
|
+
RSpec/RepeatedExampleGroupBody:
|
118
|
+
Exclude:
|
119
|
+
- 'spec/lib/geo_combine/iso19139_spec.rb'
|
120
|
+
|
121
|
+
# Offense count: 19
|
122
|
+
RSpec/StubbedMock:
|
123
|
+
Exclude:
|
124
|
+
- 'spec/lib/geo_combine/esri_open_data_spec.rb'
|
125
|
+
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
126
|
+
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
127
|
+
- 'spec/lib/geo_combine/ogp_spec.rb'
|
128
|
+
- 'spec/lib/geo_combine_spec.rb'
|
129
|
+
|
130
|
+
# Offense count: 5
|
131
|
+
RSpec/SubjectStub:
|
132
|
+
Exclude:
|
133
|
+
- 'spec/lib/geo_combine/ogp_spec.rb'
|
134
|
+
|
135
|
+
# Offense count: 1
|
136
|
+
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
137
|
+
RSpec/VerifiedDoubles:
|
138
|
+
Exclude:
|
139
|
+
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
140
|
+
|
141
|
+
# Offense count: 1
|
142
|
+
Security/Open:
|
143
|
+
Exclude:
|
144
|
+
- 'lib/geo_combine/geoblacklight.rb'
|
145
|
+
|
146
|
+
# Offense count: 7
|
147
|
+
# Configuration parameters: AllowedConstants.
|
148
|
+
Style/Documentation:
|
149
|
+
Exclude:
|
150
|
+
- 'spec/**/*'
|
151
|
+
- 'test/**/*'
|
152
|
+
- 'lib/geo_combine/bounding_box.rb'
|
153
|
+
- 'lib/geo_combine/ckan_metadata.rb'
|
154
|
+
- 'lib/geo_combine/cli.rb'
|
155
|
+
- 'lib/geo_combine/geo_blacklight_harvester.rb'
|
156
|
+
- 'lib/geo_combine/geoblacklight.rb'
|
157
|
+
- 'lib/geo_combine/geometry_types.rb'
|
158
|
+
- 'lib/geo_combine/iso19139.rb'
|
159
|
+
|
160
|
+
# Offense count: 7
|
161
|
+
# Cop supports --auto-correct.
|
162
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
163
|
+
# URISchemes: http, https
|
164
|
+
Layout/LineLength:
|
165
|
+
Max: 159
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# GeoCombine
|
2
2
|
|
3
|
-
|
3
|
+
![CI](https://github.com/OpenGeoMetadata/GeoCombine/actions/workflows/ruby.yml/badge.svg)
|
4
|
+
| [![Coverage Status](https://img.shields.io/badge/coverage-95%25-brightgreen)]()
|
5
|
+
| [![Gem Version](https://img.shields.io/gem/v/geo_combine.svg)](https://github.com/OpenGeoMetadata/GeoCombine/releases)
|
4
6
|
|
5
7
|
|
6
8
|
A Ruby toolkit for managing geospatial metadata, including:
|
@@ -61,6 +63,12 @@ You can also specify a single repository:
|
|
61
63
|
$ bundle exec rake geocombine:clone[edu.stanford.purl]
|
62
64
|
```
|
63
65
|
|
66
|
+
*Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
|
67
|
+
|
68
|
+
```sh
|
69
|
+
$ bundle exec rake geocombine:clone\[edu.stanford.purl\]
|
70
|
+
```
|
71
|
+
|
64
72
|
#### Update local OpenGeoMetadata repositories
|
65
73
|
|
66
74
|
```sh
|
@@ -75,6 +83,12 @@ You can also specify a single repository:
|
|
75
83
|
$ bundle exec rake geocombine:pull[edu.stanford.purl]
|
76
84
|
```
|
77
85
|
|
86
|
+
*Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
|
87
|
+
|
88
|
+
```sh
|
89
|
+
$ bundle exec rake geocombine:pull\[edu.stanford.purl\]
|
90
|
+
```
|
91
|
+
|
78
92
|
#### Index GeoBlacklight documents
|
79
93
|
|
80
94
|
To index into Solr, GeoCombine requires a Solr instance that is running the
|
@@ -105,12 +119,16 @@ $ SOLR_COMMIT_WITHIN=100 bundle exec rake geocombine:index
|
|
105
119
|
|
106
120
|
GeoCombine provides a Harvester class and rake task to harvest and index content from GeoBlacklight sites (or any site that follows the Blacklight API format). Given that the configurations can change from consumer to consumer and site to site, the class provides a relatively simple configuration API. This can be configured in an initializer, a wrapping rake task, or any other ruby context where the rake task our class would be invoked.
|
107
121
|
|
122
|
+
```sh
|
123
|
+
bundle exec rake geocombine:geoblacklight_harvester:index[YOUR_CONFIGURED_SITE_KEY]
|
124
|
+
```
|
125
|
+
|
108
126
|
#### Harvester configuration
|
109
127
|
|
110
128
|
Only the sites themselves are required to be configured but there are various configuration options that can (optionally) be supplied to modify the harvester's behavior.
|
111
129
|
|
112
130
|
```ruby
|
113
|
-
GeoCombine::
|
131
|
+
GeoCombine::GeoBlacklightHarvester.configure do
|
114
132
|
{
|
115
133
|
commit_within: '10000',
|
116
134
|
crawl_delay: 1, # All sites
|
data/Rakefile
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'bundler/gem_tasks'
|
2
4
|
|
3
|
-
Dir.glob('lib/tasks/*.rake').each { |r| load r}
|
5
|
+
Dir.glob('lib/tasks/*.rake').each { |r| load r }
|
4
6
|
|
5
7
|
desc 'Run console for development'
|
6
8
|
task :console do
|
@@ -17,7 +19,7 @@ begin
|
|
17
19
|
|
18
20
|
RSpec::Core::RakeTask.new(:spec)
|
19
21
|
|
20
|
-
task :
|
22
|
+
task default: :spec
|
21
23
|
rescue LoadError
|
22
24
|
# no rspec available
|
23
25
|
end
|
data/bin/geocombine
CHANGED
data/geo_combine.gemspec
CHANGED
@@ -20,14 +20,19 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'activesupport'
|
22
22
|
spec.add_dependency 'rsolr'
|
23
|
-
spec.add_dependency 'net-http-persistent', '~> 2.0' # pin since faraday (rsolr) doesn't work correctly with 3.x
|
24
23
|
spec.add_dependency 'nokogiri'
|
25
24
|
spec.add_dependency 'json-schema'
|
26
25
|
spec.add_dependency 'sanitize'
|
27
26
|
spec.add_dependency 'thor'
|
27
|
+
spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
|
28
28
|
|
29
29
|
spec.add_development_dependency 'bundler'
|
30
30
|
spec.add_development_dependency 'rake'
|
31
31
|
spec.add_development_dependency 'rspec'
|
32
32
|
spec.add_development_dependency 'rspec-html-matchers'
|
33
|
+
spec.add_development_dependency 'rubocop', '~> 1.25'
|
34
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 2.8'
|
35
|
+
spec.add_development_dependency 'rubocop-rake'
|
36
|
+
spec.add_development_dependency 'simplecov'
|
37
|
+
spec.add_development_dependency 'webmock', '~> 3.14'
|
33
38
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module GeoCombine
|
2
4
|
class BoundingBox
|
3
5
|
attr_reader :west, :south, :east, :north
|
@@ -24,11 +26,13 @@ module GeoCombine
|
|
24
26
|
def valid?
|
25
27
|
[south, north].map do |coord|
|
26
28
|
next if (-90..90).cover?(coord)
|
29
|
+
|
27
30
|
raise GeoCombine::Exceptions::InvalidGeometry,
|
28
31
|
"#{coord} should be in range -90 90"
|
29
32
|
end
|
30
33
|
[east, west].map do |coord|
|
31
34
|
next if (-180..180).cover?(coord)
|
35
|
+
|
32
36
|
raise GeoCombine::Exceptions::InvalidGeometry,
|
33
37
|
"#{coord} should be in range -180 180"
|
34
38
|
end
|
@@ -45,7 +49,8 @@ module GeoCombine
|
|
45
49
|
|
46
50
|
def self.from_envelope(envelope)
|
47
51
|
return if envelope.nil?
|
48
|
-
|
52
|
+
|
53
|
+
envelope = envelope[/.*ENVELOPE\(([^)]*)/, 1].split(',')
|
49
54
|
new(
|
50
55
|
west: envelope[0],
|
51
56
|
south: envelope[3],
|
@@ -59,6 +64,7 @@ module GeoCombine
|
|
59
64
|
# @param [String] delimiter "," or " "
|
60
65
|
def self.from_string_delimiter(spatial, delimiter: ',')
|
61
66
|
return if spatial.nil?
|
67
|
+
|
62
68
|
spatial = spatial.split(delimiter)
|
63
69
|
new(
|
64
70
|
west: spatial[0],
|
@@ -1,8 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module GeoCombine
|
2
4
|
class CkanMetadata
|
3
|
-
MAX_STRING_LENGTH =
|
5
|
+
MAX_STRING_LENGTH = 32_765 # Solr limit
|
4
6
|
|
5
7
|
attr_reader :metadata
|
8
|
+
|
6
9
|
def initialize(metadata)
|
7
10
|
@metadata = JSON.parse(metadata)
|
8
11
|
end
|
@@ -31,7 +34,7 @@ module GeoCombine
|
|
31
34
|
dc_subject_sm: subjects,
|
32
35
|
dct_references_s: external_references.to_json.to_s,
|
33
36
|
dc_format_s: downloadable? ? 'ZIP' : nil # TODO: we only allow direct ZIP file downloads
|
34
|
-
}.
|
37
|
+
}.compact
|
35
38
|
end
|
36
39
|
|
37
40
|
def organization
|
@@ -54,7 +57,7 @@ module GeoCombine
|
|
54
57
|
begin
|
55
58
|
return bbox.to_envelope if bbox.valid?
|
56
59
|
rescue GeoCombine::Exceptions::InvalidGeometry
|
57
|
-
|
60
|
+
nil
|
58
61
|
end
|
59
62
|
end
|
60
63
|
|
@@ -66,7 +69,7 @@ module GeoCombine
|
|
66
69
|
begin
|
67
70
|
return bbox.to_envelope if bbox.valid?
|
68
71
|
rescue GeoCombine::Exceptions::InvalidGeometry
|
69
|
-
|
72
|
+
nil
|
70
73
|
end
|
71
74
|
end
|
72
75
|
|
@@ -87,11 +90,9 @@ module GeoCombine
|
|
87
90
|
'http://schema.org/url' => resource_urls('information').first
|
88
91
|
}
|
89
92
|
|
90
|
-
if downloadable?
|
91
|
-
h['http://schema.org/downloadUrl'] = resource_urls('download').first
|
92
|
-
end
|
93
|
+
h['http://schema.org/downloadUrl'] = resource_urls('download').first if downloadable?
|
93
94
|
|
94
|
-
h.
|
95
|
+
h.compact
|
95
96
|
end
|
96
97
|
|
97
98
|
def downloadable?
|
@@ -100,6 +101,7 @@ module GeoCombine
|
|
100
101
|
|
101
102
|
def resources(type)
|
102
103
|
return [] if @metadata['resources'].nil?
|
104
|
+
|
103
105
|
@metadata['resources'].select { |resource| resource['resource_locator_function'] == type }
|
104
106
|
end
|
105
107
|
|
data/lib/geo_combine/cli.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'thor'
|
2
4
|
require 'rake'
|
3
5
|
|
@@ -17,7 +19,7 @@ module GeoCombine
|
|
17
19
|
Rake::Task['geocombine:pull'].invoke
|
18
20
|
end
|
19
21
|
|
20
|
-
desc
|
22
|
+
desc 'index', 'Index all of the GeoBlacklight documents'
|
21
23
|
def index
|
22
24
|
Rake::Task['geocombine:index'].invoke
|
23
25
|
end
|
data/lib/geo_combine/fgdc.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
module GeoCombine
|
3
4
|
##
|
4
5
|
# FIXME: FGDC parsing, transformations are still experimental
|
5
6
|
class Fgdc < Metadata
|
6
|
-
|
7
7
|
##
|
8
8
|
# Returns a Nokogiri::XSLT object containing the FGDC to GeoBlacklight XSL
|
9
9
|
# @return [Nokogiri::XSLT]
|
data/lib/geo_combine/formats.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module GeoCombine
|
2
4
|
##
|
3
5
|
# Mixin used for formatting metadata fields
|
@@ -28,7 +30,7 @@ module GeoCombine
|
|
28
30
|
|
29
31
|
# slugs should be lowercase and only have a-z, A-Z, 0-9, and -
|
30
32
|
def sluggify(slug)
|
31
|
-
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(
|
33
|
+
slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/-+/, '-').downcase
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
@@ -30,17 +30,21 @@ module GeoCombine
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def document_transformer
|
33
|
-
@document_transformer ||
|
33
|
+
@document_transformer || lambda do |document|
|
34
34
|
document.delete('_version_')
|
35
35
|
document.delete('score')
|
36
36
|
document.delete('timestamp')
|
37
|
+
document.delete('solr_bboxtype__minX')
|
38
|
+
document.delete('solr_bboxtype__minY')
|
39
|
+
document.delete('solr_bboxtype__maxX')
|
40
|
+
document.delete('solr_bboxtype__maxY')
|
37
41
|
document
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
41
45
|
|
42
|
-
|
43
46
|
attr_reader :site, :site_key
|
47
|
+
|
44
48
|
def initialize(site_key)
|
45
49
|
@site_key = site_key
|
46
50
|
@site = self.class.config[site_key]
|
@@ -55,7 +59,7 @@ module GeoCombine
|
|
55
59
|
|
56
60
|
response_class.new(response: response, base_url: base_url).documents.each do |docs|
|
57
61
|
docs.map! do |document|
|
58
|
-
self.class.document_transformer
|
62
|
+
self.class.document_transformer&.call(document)
|
59
63
|
end.compact
|
60
64
|
|
61
65
|
puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
|
@@ -77,7 +81,8 @@ module GeoCombine
|
|
77
81
|
elsif keys.any? && %w[links data].all? { |param| keys.include?(param) }
|
78
82
|
ModernBlacklightResponse
|
79
83
|
else
|
80
|
-
raise NotImplementedError,
|
84
|
+
raise NotImplementedError,
|
85
|
+
"The following json response was not able to be parsed by the GeoBlacklightHarvester\n#{json}"
|
81
86
|
end
|
82
87
|
end
|
83
88
|
end
|
@@ -85,6 +90,7 @@ module GeoCombine
|
|
85
90
|
class LegacyBlacklightResponse
|
86
91
|
attr_reader :base_url
|
87
92
|
attr_accessor :response, :page
|
93
|
+
|
88
94
|
def initialize(response:, base_url:)
|
89
95
|
@base_url = base_url
|
90
96
|
@response = response
|
@@ -94,16 +100,17 @@ module GeoCombine
|
|
94
100
|
def documents
|
95
101
|
return enum_for(:documents) unless block_given?
|
96
102
|
|
97
|
-
while current_page && total_pages && (current_page <= total_pages)
|
103
|
+
while current_page && total_pages && (current_page <= total_pages)
|
98
104
|
yield response.dig('response', 'docs')
|
99
105
|
|
100
106
|
break if current_page == total_pages
|
107
|
+
|
101
108
|
self.page += 1
|
102
109
|
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
103
110
|
|
104
111
|
begin
|
105
112
|
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
106
|
-
rescue => e
|
113
|
+
rescue StandardError => e
|
107
114
|
puts "Request for #{url} failed with #{e}"
|
108
115
|
self.response = nil
|
109
116
|
end
|
@@ -130,6 +137,7 @@ module GeoCombine
|
|
130
137
|
class ModernBlacklightResponse
|
131
138
|
attr_reader :base_url
|
132
139
|
attr_accessor :response, :page
|
140
|
+
|
133
141
|
def initialize(response:, base_url:)
|
134
142
|
@base_url = base_url
|
135
143
|
@response = response
|
@@ -146,11 +154,13 @@ module GeoCombine
|
|
146
154
|
|
147
155
|
url = response.dig('links', 'next')
|
148
156
|
break unless url
|
157
|
+
|
158
|
+
url = "#{url}&format=json"
|
149
159
|
self.page += 1
|
150
160
|
puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
151
161
|
begin
|
152
162
|
self.response = JSON.parse(Net::HTTP.get(URI(url)))
|
153
|
-
rescue => e
|
163
|
+
rescue StandardError => e
|
154
164
|
puts "Request for #{url} failed with #{e}"
|
155
165
|
self.response = nil
|
156
166
|
end
|
@@ -162,13 +172,11 @@ module GeoCombine
|
|
162
172
|
def documents_from_urls(urls)
|
163
173
|
puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
|
164
174
|
urls.map do |url|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
puts "Fetching \"#{url}/raw\" failed with #{e}"
|
175
|
+
JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
|
176
|
+
rescue StandardError => e
|
177
|
+
puts "Fetching \"#{url}/raw\" failed with #{e}"
|
169
178
|
|
170
|
-
|
171
|
-
end
|
179
|
+
nil
|
172
180
|
end.compact
|
173
181
|
end
|
174
182
|
end
|