geo_combine 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +5 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +27 -26
- data/geo_combine.gemspec +1 -0
- data/lib/geo_combine/formatting.rb +1 -1
- data/lib/geo_combine/geoblacklight.rb +1 -1
- data/lib/geo_combine/harvester.rb +115 -0
- data/lib/geo_combine/indexer.rb +47 -0
- data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +52 -0
- data/lib/geo_combine/railtie.rb +1 -0
- data/lib/geo_combine/version.rb +1 -1
- data/lib/geo_combine.rb +3 -0
- data/lib/tasks/geo_combine.rake +14 -65
- data/spec/fixtures/docs/full_geoblacklight_aardvark.json +33 -0
- data/spec/fixtures/indexing/aardvark.json +57 -0
- data/spec/fixtures/json_docs.rb +6 -0
- data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
- data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
- data/spec/lib/geo_combine/harvester_spec.rb +147 -0
- data/spec/lib/geo_combine/indexer_spec.rb +63 -0
- data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +22 -0
- data/spec/spec_helper.rb +0 -1
- metadata +31 -9
- data/bin/geocombine +0 -6
- data/lib/geo_combine/cli.rb +0 -27
- data/spec/lib/tasks/geo_combine_spec.rb +0 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c91fb8acc12d958eb450bd5cebc525073f30b6df664bd55f80622a942d8e6703
|
4
|
+
data.tar.gz: abcc395544fe5530d2fa9b15d1f26f2e758f5b48983fcab35f8fe7305e424e92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ad8cee3e3aa481816dffe885db30217daaa3d5167240ff8395954f02226c8d54a8ce9227efa602127ac55f08106a77f1f1ca3ac0a3203fd51c92fe284327ecd
|
7
|
+
data.tar.gz: 523b156be9684071cd06ddcd6a10dbb14b93f3b35970ad976787edb7363667301c72963075563cccc7e330f4c9a0acad26df271f63a799e1eeaa5dc72373f574
|
data/.github/workflows/ruby.yml
CHANGED
@@ -7,12 +7,11 @@ jobs:
|
|
7
7
|
runs-on: ubuntu-latest
|
8
8
|
steps:
|
9
9
|
- uses: actions/checkout@v2
|
10
|
-
- name: Set up Ruby
|
10
|
+
- name: Set up Ruby and install dependencies
|
11
11
|
uses: ruby/setup-ruby@v1
|
12
12
|
with:
|
13
13
|
ruby-version: 2.7
|
14
|
-
|
15
|
-
run: bundle install
|
14
|
+
bundler-cache: true
|
16
15
|
- name: Run linter
|
17
16
|
run: bundle exec rubocop
|
18
17
|
|
@@ -20,7 +19,7 @@ jobs:
|
|
20
19
|
runs-on: ubuntu-latest
|
21
20
|
strategy:
|
22
21
|
matrix:
|
23
|
-
ruby: [2.7, 3.0, 3.1]
|
22
|
+
ruby: [2.7, '3.0', 3.1]
|
24
23
|
faraday_version: [''] # Defaults to whatever's the most recent version.
|
25
24
|
include:
|
26
25
|
- ruby: 2.7
|
@@ -28,16 +27,11 @@ jobs:
|
|
28
27
|
steps:
|
29
28
|
- uses: actions/checkout@v2
|
30
29
|
|
31
|
-
- name: Set up Ruby
|
30
|
+
- name: Set up Ruby and install dependencies
|
32
31
|
uses: ruby/setup-ruby@v1
|
33
32
|
with:
|
34
33
|
ruby-version: ${{ matrix.ruby }}
|
35
|
-
|
36
|
-
- name: Install bundler
|
37
|
-
run: gem install bundler -v 2.1.1
|
38
|
-
|
39
|
-
- name: Install dependencies
|
40
|
-
run: bundle _2.1.1_ install
|
34
|
+
bundler-cache: true
|
41
35
|
env:
|
42
36
|
FARADAY_VERSION: ${{ matrix.faraday_version }}
|
43
37
|
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2023-03-27 19:15:05 UTC using RuboCop version 1.48.1.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
@@ -17,50 +17,45 @@ Lint/UselessAssignment:
|
|
17
17
|
- 'spec/helpers.rb'
|
18
18
|
|
19
19
|
# Offense count: 7
|
20
|
-
# Configuration parameters:
|
20
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
|
21
21
|
Metrics/AbcSize:
|
22
22
|
Max: 33
|
23
23
|
|
24
|
-
# Offense count:
|
25
|
-
# Configuration parameters: CountComments, CountAsOne,
|
26
|
-
#
|
24
|
+
# Offense count: 1
|
25
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
26
|
+
# AllowedMethods: refine
|
27
27
|
Metrics/BlockLength:
|
28
|
-
Max:
|
28
|
+
Max: 27
|
29
29
|
|
30
30
|
# Offense count: 1
|
31
31
|
# Configuration parameters: CountComments, CountAsOne.
|
32
32
|
Metrics/ClassLength:
|
33
33
|
Max: 152
|
34
34
|
|
35
|
-
# Offense count:
|
36
|
-
# Configuration parameters:
|
35
|
+
# Offense count: 4
|
36
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
37
37
|
Metrics/CyclomaticComplexity:
|
38
38
|
Max: 11
|
39
39
|
|
40
|
-
# Offense count:
|
41
|
-
# Configuration parameters: CountComments, CountAsOne,
|
40
|
+
# Offense count: 11
|
41
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
42
42
|
Metrics/MethodLength:
|
43
43
|
Max: 21
|
44
44
|
|
45
45
|
# Offense count: 1
|
46
|
-
# Configuration parameters:
|
47
|
-
Metrics/ModuleLength:
|
48
|
-
Max: 1657
|
49
|
-
|
50
|
-
# Offense count: 1
|
51
|
-
# Configuration parameters: IgnoredMethods.
|
46
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
52
47
|
Metrics/PerceivedComplexity:
|
53
48
|
Max: 11
|
54
49
|
|
55
50
|
# Offense count: 9
|
56
|
-
# Configuration parameters: Prefixes.
|
51
|
+
# Configuration parameters: Prefixes, AllowedPatterns.
|
57
52
|
# Prefixes: when, with, without
|
58
53
|
RSpec/ContextWording:
|
59
54
|
Exclude:
|
60
55
|
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
61
56
|
- 'spec/lib/geo_combine/ogp_spec.rb'
|
62
57
|
|
63
|
-
# Offense count:
|
58
|
+
# Offense count: 11
|
64
59
|
# Configuration parameters: CountAsOne.
|
65
60
|
RSpec/ExampleLength:
|
66
61
|
Max: 12
|
@@ -78,10 +73,10 @@ RSpec/FilePath:
|
|
78
73
|
Exclude:
|
79
74
|
- 'spec/lib/geo_combine_spec.rb'
|
80
75
|
|
81
|
-
#
|
76
|
+
# Offense count: 23
|
77
|
+
# Configuration parameters: EnforcedStyle.
|
82
78
|
# SupportedStyles: have_received, receive
|
83
79
|
RSpec/MessageSpies:
|
84
|
-
EnforcedStyle: have_received
|
85
80
|
Exclude:
|
86
81
|
- 'spec/lib/geo_combine/esri_open_data_spec.rb'
|
87
82
|
- 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
|
@@ -89,7 +84,7 @@ RSpec/MessageSpies:
|
|
89
84
|
- 'spec/lib/geo_combine/ogp_spec.rb'
|
90
85
|
- 'spec/lib/geo_combine_spec.rb'
|
91
86
|
|
92
|
-
# Offense count:
|
87
|
+
# Offense count: 48
|
93
88
|
RSpec/MultipleExpectations:
|
94
89
|
Max: 5
|
95
90
|
|
@@ -99,12 +94,14 @@ RSpec/MultipleMemoizedHelpers:
|
|
99
94
|
Max: 7
|
100
95
|
|
101
96
|
# Offense count: 5
|
102
|
-
# Configuration parameters: IgnoreSharedExamples.
|
97
|
+
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
|
98
|
+
# SupportedStyles: always, named_only
|
103
99
|
RSpec/NamedSubject:
|
104
100
|
Exclude:
|
105
101
|
- 'spec/lib/geo_combine/formatting_spec.rb'
|
106
102
|
|
107
103
|
# Offense count: 8
|
104
|
+
# Configuration parameters: AllowedGroups.
|
108
105
|
RSpec/NestedGroups:
|
109
106
|
Max: 4
|
110
107
|
|
@@ -113,6 +110,11 @@ RSpec/OverwritingSetup:
|
|
113
110
|
Exclude:
|
114
111
|
- 'spec/lib/geo_combine/geoblacklight_spec.rb'
|
115
112
|
|
113
|
+
# Offense count: 1
|
114
|
+
RSpec/PendingWithoutReason:
|
115
|
+
Exclude:
|
116
|
+
- 'spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb'
|
117
|
+
|
116
118
|
# Offense count: 2
|
117
119
|
RSpec/RepeatedExampleGroupBody:
|
118
120
|
Exclude:
|
@@ -151,15 +153,14 @@ Style/Documentation:
|
|
151
153
|
- 'test/**/*'
|
152
154
|
- 'lib/geo_combine/bounding_box.rb'
|
153
155
|
- 'lib/geo_combine/ckan_metadata.rb'
|
154
|
-
- 'lib/geo_combine/cli.rb'
|
155
156
|
- 'lib/geo_combine/geo_blacklight_harvester.rb'
|
156
157
|
- 'lib/geo_combine/geoblacklight.rb'
|
157
158
|
- 'lib/geo_combine/geometry_types.rb'
|
158
159
|
- 'lib/geo_combine/iso19139.rb'
|
159
160
|
|
160
|
-
# Offense count:
|
161
|
-
#
|
162
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives,
|
161
|
+
# Offense count: 8
|
162
|
+
# This cop supports safe autocorrection (--autocorrect).
|
163
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
|
163
164
|
# URISchemes: http, https
|
164
165
|
Layout/LineLength:
|
165
166
|
Max: 159
|
data/geo_combine.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_dependency 'sanitize'
|
26
26
|
spec.add_dependency 'thor'
|
27
27
|
spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
|
28
|
+
spec.add_dependency 'git'
|
28
29
|
|
29
30
|
spec.add_development_dependency 'bundler'
|
30
31
|
spec.add_development_dependency 'rake'
|
@@ -13,7 +13,7 @@ module GeoCombine
|
|
13
13
|
attr_reader :metadata
|
14
14
|
|
15
15
|
GEOBLACKLIGHT_VERSION = '1.0'
|
16
|
-
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/
|
16
|
+
SCHEMA_JSON_URL = "https://raw.githubusercontent.com/OpenGeoMetadata/opengeometadata.github.io/main/docs/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json"
|
17
17
|
DEPRECATED_KEYS_V1 = %w[
|
18
18
|
uuid
|
19
19
|
georss_polygon_s
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'find'
|
5
|
+
require 'git'
|
6
|
+
|
7
|
+
module GeoCombine
|
8
|
+
# Harvests Geoblacklight documents from OpenGeoMetadata for indexing
|
9
|
+
class Harvester
|
10
|
+
attr_reader :ogm_path, :schema_version
|
11
|
+
|
12
|
+
# Non-metadata repositories that shouldn't be harvested
|
13
|
+
def self.denylist
|
14
|
+
[
|
15
|
+
'GeoCombine',
|
16
|
+
'aardvark',
|
17
|
+
'metadata-issues',
|
18
|
+
'ogm_utils-python',
|
19
|
+
'opengeometadata.github.io',
|
20
|
+
'opengeometadata-rails',
|
21
|
+
'gbl-1_to_aardvark'
|
22
|
+
]
|
23
|
+
end
|
24
|
+
|
25
|
+
# GitHub API endpoint for OpenGeoMetadata repositories
|
26
|
+
def self.ogm_api_uri
|
27
|
+
URI('https://api.github.com/orgs/opengeometadata/repos?per_page=1000')
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
ogm_path: ENV.fetch('OGM_PATH', 'tmp/opengeometadata'),
|
32
|
+
schema_version: ENV.fetch('SCHEMA_VERSION', '1.0')
|
33
|
+
)
|
34
|
+
@ogm_path = ogm_path
|
35
|
+
@schema_version = schema_version
|
36
|
+
end
|
37
|
+
|
38
|
+
# Enumerable of docs to index, for passing to an indexer
|
39
|
+
def docs_to_index
|
40
|
+
return to_enum(:docs_to_index) unless block_given?
|
41
|
+
|
42
|
+
Find.find(@ogm_path) do |path|
|
43
|
+
# skip non-json and layers.json files
|
44
|
+
next unless File.basename(path).include?('.json') && File.basename(path) != 'layers.json'
|
45
|
+
|
46
|
+
doc = JSON.parse(File.read(path))
|
47
|
+
[doc].flatten.each do |record|
|
48
|
+
# skip indexing if this record has a different schema version than what we want
|
49
|
+
record_schema = record['gbl_mdVersion_s'] || record['geoblacklight_version']
|
50
|
+
next unless record_schema == @schema_version
|
51
|
+
|
52
|
+
yield record, path
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Update a repository via git
|
58
|
+
# If the repository doesn't exist, clone it.
|
59
|
+
def pull(repo)
|
60
|
+
repo_path = File.join(@ogm_path, repo)
|
61
|
+
clone(repo) unless File.directory? repo_path
|
62
|
+
|
63
|
+
Git.open(repo_path).pull
|
64
|
+
puts "Updated #{repo}"
|
65
|
+
1
|
66
|
+
end
|
67
|
+
|
68
|
+
# Update all repositories
|
69
|
+
# Return the count of repositories updated
|
70
|
+
def pull_all
|
71
|
+
repositories.map(&method(:pull)).reduce(:+)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Clone a repository via git
|
75
|
+
# If the repository already exists, skip it.
|
76
|
+
def clone(repo)
|
77
|
+
repo_path = File.join(@ogm_path, repo)
|
78
|
+
repo_info = repository_info(repo)
|
79
|
+
|
80
|
+
# Skip if exists; warn if archived or empty
|
81
|
+
if File.directory? repo_path
|
82
|
+
puts "Skipping clone to #{repo_path}; directory exists"
|
83
|
+
return 0
|
84
|
+
end
|
85
|
+
puts "WARNING: repository '#{repo}' is archived" if repo_info['archived']
|
86
|
+
puts "WARNING: repository '#{repo}' is empty" if repo_info['size'].zero?
|
87
|
+
|
88
|
+
repo_url = "https://github.com/OpenGeoMetadata/#{repo}.git"
|
89
|
+
Git.clone(repo_url, nil, path: ogm_path, depth: 1)
|
90
|
+
puts "Cloned #{repo_url}"
|
91
|
+
1
|
92
|
+
end
|
93
|
+
|
94
|
+
# Clone all repositories via git
|
95
|
+
# Return the count of repositories cloned.
|
96
|
+
def clone_all
|
97
|
+
repositories.map(&method(:clone)).reduce(:+)
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# List of repository names to harvest
|
103
|
+
def repositories
|
104
|
+
@repositories ||= JSON.parse(Net::HTTP.get(self.class.ogm_api_uri))
|
105
|
+
.filter { |repo| repo['size'].positive? }
|
106
|
+
.reject { |repo| repo['archived'] }
|
107
|
+
.map { |repo| repo['name'] }
|
108
|
+
.reject { |name| self.class.denylist.include? name }
|
109
|
+
end
|
110
|
+
|
111
|
+
def repository_info(repo_name)
|
112
|
+
JSON.parse(Net::HTTP.get(URI("https://api.github.com/repos/opengeometadata/#{repo_name}")))
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rsolr'
|
4
|
+
require 'faraday/net_http_persistent'
|
5
|
+
|
6
|
+
module GeoCombine
|
7
|
+
# Indexes Geoblacklight documents into Solr
|
8
|
+
class Indexer
|
9
|
+
attr_reader :solr
|
10
|
+
|
11
|
+
def self.solr(url: ENV.fetch('SOLR_URL', 'http://127.0.0.1:8983/solr/blacklight-core'))
|
12
|
+
RSolr.connect url: url, adapter: :net_http_persistent
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(solr: GeoCombine::Indexer.solr)
|
16
|
+
@solr = solr
|
17
|
+
end
|
18
|
+
|
19
|
+
def solr_url
|
20
|
+
@solr.options[:url]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Index everything and return the number of docs successfully indexed
|
24
|
+
def index(docs, commit_within: ENV.fetch('SOLR_COMMIT_WITHIN', 5000).to_i)
|
25
|
+
indexed_count = 0
|
26
|
+
|
27
|
+
docs.each do |record, path|
|
28
|
+
# log the unique identifier for the record for debugging
|
29
|
+
id = record['id'] || record['dc_identifier_s']
|
30
|
+
puts "Indexing #{id}: #{path}" if $DEBUG
|
31
|
+
|
32
|
+
# index the record into solr
|
33
|
+
@solr.update params: { commitWithin: commit_within, overwrite: true },
|
34
|
+
data: [record].to_json,
|
35
|
+
headers: { 'Content-Type' => 'application/json' }
|
36
|
+
|
37
|
+
# count the number of records successfully indexed
|
38
|
+
indexed_count += 1
|
39
|
+
rescue RSolr::Error::Http => e
|
40
|
+
puts e
|
41
|
+
end
|
42
|
+
|
43
|
+
@solr.commit
|
44
|
+
indexed_count
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GeoCombine
|
4
|
+
module Migrators
|
5
|
+
# TODO: WARNING! This class is not fully implemented and should not be used in
|
6
|
+
# production. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
|
7
|
+
# for remaining work.
|
8
|
+
#
|
9
|
+
# migrates the v1 schema to the aardvark schema
|
10
|
+
class V1AardvarkMigrator
|
11
|
+
attr_reader :v1_hash
|
12
|
+
|
13
|
+
# @param v1_hash [Hash] parsed json in the v1 schema
|
14
|
+
def initialize(v1_hash:)
|
15
|
+
@v1_hash = v1_hash
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
v2_hash = convert_keys
|
20
|
+
v2_hash['gbl_mdVersion_s'] = 'Aardvark'
|
21
|
+
v2_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def convert_keys
|
25
|
+
v1_hash.transform_keys do |k|
|
26
|
+
SCHEMA_FIELD_MAP[k] || k
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
SCHEMA_FIELD_MAP = {
|
31
|
+
'dc_title_s' => 'dct_title_s', # new namespace
|
32
|
+
'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued
|
33
|
+
'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued
|
34
|
+
'dc_language_sm' => 'dct_language_sm', # new namespace; single to multi-valued
|
35
|
+
'dc_creator_sm' => 'dct_creator_sm', # new namespace
|
36
|
+
'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued
|
37
|
+
'dct_provenance_s' => 'schema_provider_s', # new URI name
|
38
|
+
'dc_subject_sm' => 'dct_subject_sm', # new namespace
|
39
|
+
'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued
|
40
|
+
'dc_source_sm' => 'dct_source_sm', # new namespace
|
41
|
+
'dc_rights_s' => 'dct_accessRights_s', # new URI name
|
42
|
+
'dc_format_s' => 'dct_format_s', # new namespace
|
43
|
+
'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name
|
44
|
+
'layer_slug_s' => 'id', # new URI name
|
45
|
+
'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued
|
46
|
+
'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name
|
47
|
+
'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name
|
48
|
+
'suppressed_b' => 'gbl_suppressed_b' # new namespace
|
49
|
+
}.freeze
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/geo_combine/railtie.rb
CHANGED
data/lib/geo_combine/version.rb
CHANGED
data/lib/geo_combine.rb
CHANGED
@@ -72,6 +72,9 @@ require 'geo_combine/ogp'
|
|
72
72
|
# Require harvesting/indexing files
|
73
73
|
require 'geo_combine/geo_blacklight_harvester'
|
74
74
|
|
75
|
+
# Migrators
|
76
|
+
require 'geo_combine/migrators/v1_aardvark_migrator'
|
77
|
+
|
75
78
|
# Require gem files
|
76
79
|
require 'geo_combine/version'
|
77
80
|
require 'geo_combine/railtie' if defined?(Rails)
|
data/lib/tasks/geo_combine.rake
CHANGED
@@ -3,58 +3,33 @@
|
|
3
3
|
require 'json'
|
4
4
|
require 'rsolr'
|
5
5
|
require 'find'
|
6
|
-
require 'geo_combine/geo_blacklight_harvester'
|
7
6
|
require 'faraday/net_http_persistent'
|
7
|
+
require 'geo_combine/harvester'
|
8
|
+
require 'geo_combine/indexer'
|
9
|
+
require 'geo_combine/geo_blacklight_harvester'
|
8
10
|
|
9
11
|
namespace :geocombine do
|
10
12
|
desc 'Clone OpenGeoMetadata repositories'
|
11
13
|
task :clone, [:repo] do |_t, args|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
|
16
|
-
ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map do |repo|
|
17
|
-
repo['clone_url'] if (repo['size']).positive?
|
18
|
-
end.compact
|
19
|
-
ogm_repos.reject! { |repo| GeoCombineRake.denylist.include?(repo) }
|
20
|
-
end
|
21
|
-
ogm_repos.each do |repo|
|
22
|
-
Kernel.system "echo #{repo} && mkdir -p #{GeoCombineRake.ogm_path} && cd #{GeoCombineRake.ogm_path} && git clone --depth 1 #{repo}"
|
23
|
-
end
|
14
|
+
harvester = GeoCombine::Harvester.new
|
15
|
+
total = args[:repo] ? harvester.clone(args.repo) : harvester.clone_all
|
16
|
+
puts "Cloned #{total} repositories"
|
24
17
|
end
|
25
18
|
|
26
19
|
desc '"git pull" OpenGeoMetadata repositories'
|
27
20
|
task :pull, [:repo] do |_t, args|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
Dir.glob("#{GeoCombineRake.ogm_path}/*")
|
32
|
-
end
|
33
|
-
paths.each do |path|
|
34
|
-
next unless File.directory?(path)
|
35
|
-
|
36
|
-
Kernel.system "echo #{path} && cd #{path} && git pull origin"
|
37
|
-
end
|
21
|
+
harvester = GeoCombine::Harvester.new
|
22
|
+
total = args[:repo] ? harvester.pull(args.repo) : harvester.pull_all
|
23
|
+
puts "Updated #{total} repositories"
|
38
24
|
end
|
39
25
|
|
40
26
|
desc 'Index all JSON documents except Layers.json'
|
41
27
|
task :index do
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
doc = JSON.parse(File.read(path))
|
48
|
-
[doc].flatten.each do |record|
|
49
|
-
puts "Indexing #{record['layer_slug_s']}: #{path}" if $DEBUG
|
50
|
-
solr.update params: { commitWithin: GeoCombineRake.commit_within, overwrite: true },
|
51
|
-
data: [record].to_json,
|
52
|
-
headers: { 'Content-Type' => 'application/json' }
|
53
|
-
rescue RSolr::Error::Http => e
|
54
|
-
puts e
|
55
|
-
end
|
56
|
-
end
|
57
|
-
solr.commit
|
28
|
+
harvester = GeoCombine::Harvester.new
|
29
|
+
indexer = GeoCombine::Indexer.new
|
30
|
+
puts "Indexing #{harvester.ogm_path} into #{indexer.solr_url}"
|
31
|
+
total = indexer.index(harvester.docs_to_index)
|
32
|
+
puts "Indexed #{total} documents"
|
58
33
|
end
|
59
34
|
|
60
35
|
namespace :geoblacklight_harvester do
|
@@ -66,29 +41,3 @@ namespace :geocombine do
|
|
66
41
|
end
|
67
42
|
end
|
68
43
|
end
|
69
|
-
|
70
|
-
# Class to hold helper methods for use in GeoCombine rake tasks
|
71
|
-
class GeoCombineRake
|
72
|
-
def self.commit_within
|
73
|
-
(ENV['SOLR_COMMIT_WITHIN'] || 5000).to_i
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.denylist
|
77
|
-
[
|
78
|
-
'https://github.com/OpenGeoMetadata/GeoCombine.git',
|
79
|
-
'https://github.com/OpenGeoMetadata/aardvark.git',
|
80
|
-
'https://github.com/OpenGeoMetadata/metadata-issues.git',
|
81
|
-
'https://github.com/OpenGeoMetadata/ogm_utils-python.git',
|
82
|
-
'https://github.com/OpenGeoMetadata/opengeometadata.github.io.git',
|
83
|
-
'https://github.com/OpenGeoMetadata/opengeometadata-rails.git'
|
84
|
-
]
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.ogm_path
|
88
|
-
ENV['OGM_PATH'] || 'tmp/opengeometadata'
|
89
|
-
end
|
90
|
-
|
91
|
-
def self.solr_url
|
92
|
-
ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
|
93
|
-
end
|
94
|
-
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
{
|
2
|
+
"gbl_mdVersion_s":"Aardvark",
|
3
|
+
"dct_identifier_sm":"http://purl.stanford.edu/cz128vq0535",
|
4
|
+
"dct_title_s":"2005 Rural Poverty GIS Database: Uganda",
|
5
|
+
"dct_description_sm":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.",
|
6
|
+
"dct_accessRights_s":"Public",
|
7
|
+
"schema_provider_s":"Stanford",
|
8
|
+
"dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}",
|
9
|
+
"gbl_wxsIdentifier_s":"druid:cz128vq0535",
|
10
|
+
"id":"stanford-cz128vq0535",
|
11
|
+
"layer_geom_type_s":"Polygon",
|
12
|
+
"gbl_mdModified_dt":"2015-01-13T18:46:38Z",
|
13
|
+
"dct_format_s":"Shapefile",
|
14
|
+
"dct_language_sm":"English",
|
15
|
+
"dc_type_s":"Dataset",
|
16
|
+
"dct_publisher_sm":"Uganda Bureau of Statistics",
|
17
|
+
"dct_creator_sm":[
|
18
|
+
"Uganda Bureau of Statistics"
|
19
|
+
],
|
20
|
+
"dct_subject_sm":[
|
21
|
+
"Poverty",
|
22
|
+
"Statistics"
|
23
|
+
],
|
24
|
+
"dct_issued_s":"2005",
|
25
|
+
"dct_temporal_sm":[
|
26
|
+
"2005"
|
27
|
+
],
|
28
|
+
"dct_spatial_sm":[
|
29
|
+
"Uganda"
|
30
|
+
],
|
31
|
+
"solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)",
|
32
|
+
"gbl_indexYear_im":2005
|
33
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
{
|
2
|
+
"dct_title_s": "A description of the coast & city of Goa.",
|
3
|
+
"dct_alternative_sm": [
|
4
|
+
"A description of the coast & city of Goa."
|
5
|
+
],
|
6
|
+
"dct_description_sm": [
|
7
|
+
"Photocopy. Some relief shown pictorially. North oriented to the left. \"The city of Goa & all its dependencies doth justly belong to the crown of England by the mariage [sic] of King Charles the Second with Queen Catherine\"--upper right. \"21\"--upper right. Outside of original margin is image of British Museum rule and \"7-Tab-125.\" Dimensions: 51 x 61 centimeters"
|
8
|
+
],
|
9
|
+
"dct_language_sm": [
|
10
|
+
"eng"
|
11
|
+
],
|
12
|
+
"dct_publisher_sm": [
|
13
|
+
"British Museum? (London?)"
|
14
|
+
],
|
15
|
+
"schema_provider_s": "University of Minnesota",
|
16
|
+
"gbl_resourceClass_sm": [
|
17
|
+
"Maps"
|
18
|
+
],
|
19
|
+
"dcat_keyword_sm": [
|
20
|
+
"Velha Goa (India) Maps",
|
21
|
+
"Maps"
|
22
|
+
],
|
23
|
+
"dct_temporal_sm": [
|
24
|
+
"1900-1999"
|
25
|
+
],
|
26
|
+
"dct_issued_s": "1900 - 1999?",
|
27
|
+
"gbl_indexYear_im": [
|
28
|
+
"1900"
|
29
|
+
],
|
30
|
+
"gbl_dateRange_drsim": [
|
31
|
+
"[1900 TO 1999]"
|
32
|
+
],
|
33
|
+
"dct_spatial_sm": [
|
34
|
+
"India"
|
35
|
+
],
|
36
|
+
"locn_geometry": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
|
37
|
+
"dcat_bbox": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
|
38
|
+
"dcat_centroid": "11.620049999999999,-2.21",
|
39
|
+
"pcdm_memberOf_sm": [
|
40
|
+
"64bd8c4c-8e60-4956-b43d-bdc3f93db488"
|
41
|
+
],
|
42
|
+
"dct_isPartOf_sm": [
|
43
|
+
"05d-01"
|
44
|
+
],
|
45
|
+
"dct_rights_sm": [
|
46
|
+
"Use of this item may be governed by US and international copyright laws. You may be able to use this item, but copyright and other considerations may apply. For possible additional information or guidance on your use, please contact the contributing organization."
|
47
|
+
],
|
48
|
+
"dct_accessRights_s": "Public",
|
49
|
+
"dct_format_s": "JPEG",
|
50
|
+
"dct_references_s": "{\"http://schema.org/downloadUrl\":\"http://cdm16022.contentdm.oclc.org/utils/getfile/collection/p16022coll205/id/236/filename/print/page/download/fparams/forcedownload\",\"http://schema.org/url\":\"https://umedia.lib.umn.edu/item/p16022coll205:236\",\"http://iiif.io/api/presentation#manifest\":\"https://cdm16022.contentdm.oclc.org/iiif/info/p16022coll205/236/manifest.json\"}",
|
51
|
+
"id": "p16022coll205:236",
|
52
|
+
"dct_identifier_sm": [
|
53
|
+
"UMN_ALMA:9949551790001701"
|
54
|
+
],
|
55
|
+
"gbl_mdModified_dt": "2022-04-01T15:27:13Z",
|
56
|
+
"gbl_mdVersion_s": "Aardvark"
|
57
|
+
}
|
data/spec/fixtures/json_docs.rb
CHANGED
@@ -15,6 +15,12 @@ module JsonDocs
|
|
15
15
|
File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json'))
|
16
16
|
end
|
17
17
|
|
18
|
+
##
|
19
|
+
# full_geoblacklight fixture converted to the aardvark schema
|
20
|
+
def full_geoblacklight_aardvark
|
21
|
+
File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json'))
|
22
|
+
end
|
23
|
+
|
18
24
|
##
|
19
25
|
# A sample Esri OpenData metadata record
|
20
26
|
def esri_opendata_metadata
|
@@ -151,8 +151,8 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
151
151
|
let(:unparseable_json) do
|
152
152
|
<<-JSON
|
153
153
|
{
|
154
|
-
|
155
|
-
|
154
|
+
"http://schema.org/url":"http://example.com/abc123",,
|
155
|
+
"http://schema.org/downloadUrl":"http://example.com/abc123/data.zip"
|
156
156
|
}
|
157
157
|
JSON
|
158
158
|
end
|
@@ -184,7 +184,7 @@ RSpec.describe GeoCombine::Geoblacklight do
|
|
184
184
|
|
185
185
|
describe 'spatial_validate!' do
|
186
186
|
context 'when valid' do
|
187
|
-
it { full_geobl.spatial_validate! }
|
187
|
+
it { expect { full_geobl.spatial_validate! }.not_to raise_error }
|
188
188
|
end
|
189
189
|
|
190
190
|
context 'when invalid' do
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'git'
|
4
|
+
require 'geo_combine/harvester'
|
5
|
+
require 'spec_helper'
|
6
|
+
|
7
|
+
RSpec.describe GeoCombine::Harvester do
|
8
|
+
subject(:harvester) { described_class.new(ogm_path: 'spec/fixtures/indexing') }
|
9
|
+
|
10
|
+
let(:repo_name) { 'my-institution' }
|
11
|
+
let(:repo_path) { File.join(harvester.ogm_path, repo_name) }
|
12
|
+
let(:repo_url) { "https://github.com/OpenGeoMetadata/#{repo_name}.git" }
|
13
|
+
let(:stub_repo) { instance_double(Git::Base) }
|
14
|
+
let(:stub_gh_api) do
|
15
|
+
[
|
16
|
+
{ name: repo_name, size: 100 },
|
17
|
+
{ name: 'another-institution', size: 100 },
|
18
|
+
{ name: 'outdated-institution', size: 100, archived: true }, # archived
|
19
|
+
{ name: 'aardvark', size: 300 }, # on denylist
|
20
|
+
{ name: 'empty', size: 0 } # no data
|
21
|
+
]
|
22
|
+
end
|
23
|
+
|
24
|
+
before do
|
25
|
+
# stub github API requests
|
26
|
+
# use the whole org response, or just a portion for particular repos
|
27
|
+
allow(Net::HTTP).to receive(:get) do |uri|
|
28
|
+
if uri == described_class.ogm_api_uri
|
29
|
+
stub_gh_api.to_json
|
30
|
+
else
|
31
|
+
repo_name = uri.path.split('/').last.gsub('.git', '')
|
32
|
+
stub_gh_api.find { |repo| repo[:name] == repo_name }.to_json
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# stub git commands
|
37
|
+
allow(Git).to receive(:open).and_return(stub_repo)
|
38
|
+
allow(Git).to receive(:clone).and_return(stub_repo)
|
39
|
+
allow(stub_repo).to receive(:pull).and_return(stub_repo)
|
40
|
+
end
|
41
|
+
|
42
|
+
describe '#docs_to_index' do
|
43
|
+
it 'yields each JSON record with its path, skipping layers.JSON' do
|
44
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
45
|
+
[JSON.parse(File.read('spec/fixtures/indexing/basic_geoblacklight.json')), 'spec/fixtures/indexing/basic_geoblacklight.json'],
|
46
|
+
[JSON.parse(File.read('spec/fixtures/indexing/geoblacklight.json')), 'spec/fixtures/indexing/geoblacklight.json']
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'skips records with a different schema version' do
|
51
|
+
harvester = described_class.new(ogm_path: 'spec/fixtures/indexing/', schema_version: 'Aardvark')
|
52
|
+
expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
|
53
|
+
[JSON.parse(File.read('spec/fixtures/indexing/aardvark.json')), 'spec/fixtures/indexing/aardvark.json']
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#pull' do
|
59
|
+
it 'can pull a single repository' do
|
60
|
+
harvester.pull(repo_name)
|
61
|
+
expect(Git).to have_received(:open).with(repo_path)
|
62
|
+
expect(stub_repo).to have_received(:pull)
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'clones a repo before pulling if it does not exist' do
|
66
|
+
harvester.pull(repo_name)
|
67
|
+
expect(Git).to have_received(:clone)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '#pull_all' do
|
72
|
+
it 'can pull all repositories' do
|
73
|
+
harvester.pull_all
|
74
|
+
expect(Git).to have_received(:open).exactly(2).times
|
75
|
+
expect(stub_repo).to have_received(:pull).exactly(2).times
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'returns the count of repositories pulled' do
|
79
|
+
expect(harvester.pull_all).to eq(2)
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'skips repositories in the denylist' do
|
83
|
+
harvester.pull_all
|
84
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'skips archived repositories' do
|
88
|
+
harvester.pull_all
|
89
|
+
expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/outdated-institution.git')
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#clone' do
|
94
|
+
it 'can clone a single repository' do
|
95
|
+
harvester.clone(repo_name)
|
96
|
+
expect(Git).to have_received(:clone).with(
|
97
|
+
repo_url,
|
98
|
+
nil, {
|
99
|
+
depth: 1, # shallow clone
|
100
|
+
path: harvester.ogm_path
|
101
|
+
}
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'skips repositories that already exist' do
|
106
|
+
allow(File).to receive(:directory?).with(repo_path).and_return(true)
|
107
|
+
harvester.clone(repo_name)
|
108
|
+
expect(Git).not_to have_received(:clone)
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'warns if a repository is empty' do
|
112
|
+
allow(Net::HTTP).to receive(:get).with('https://api.github.com/repos/opengeometadata/empty').and_return('{"size": 0}')
|
113
|
+
expect do
|
114
|
+
harvester.clone('empty')
|
115
|
+
end.to output(/repository 'empty' is empty/).to_stdout
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'warns if a repository is archived' do
|
119
|
+
allow(Net::HTTP).to receive(:get).with('https://api.github.com/repos/opengeometadata/empty').and_return('{"archived": true}')
|
120
|
+
expect do
|
121
|
+
harvester.clone('outdated-institution')
|
122
|
+
end.to output(/repository 'outdated-institution' is archived/).to_stdout
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe '#clone_all' do
|
127
|
+
it 'can clone all repositories' do
|
128
|
+
harvester.clone_all
|
129
|
+
expect(Git).to have_received(:clone).exactly(2).times
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'skips repositories in the denylist' do
|
133
|
+
harvester.clone_all
|
134
|
+
expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git')
|
135
|
+
end
|
136
|
+
|
137
|
+
it 'returns the count of repositories cloned' do
|
138
|
+
expect(harvester.clone_all).to eq(2)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
describe '#ogm_api_uri' do
|
143
|
+
it 'includes a per_page param' do
|
144
|
+
expect(described_class.send('ogm_api_uri').to_s).to include('per_page')
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'geo_combine/indexer'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
RSpec.describe GeoCombine::Indexer do
|
7
|
+
subject(:indexer) { described_class.new(solr: solr) }
|
8
|
+
|
9
|
+
let(:solr) { instance_double(RSolr::Client) }
|
10
|
+
let(:docs) do
|
11
|
+
[
|
12
|
+
[{ 'id' => '1' }, 'path/to/record1.json'], # v1.0 schema
|
13
|
+
[{ 'dc_identifier_s' => '2' }, 'path/to/record2.json'] # aardvark schema
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
before do
|
18
|
+
allow(solr).to receive(:update)
|
19
|
+
allow(solr).to receive(:commit)
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#initialize' do
|
23
|
+
before do
|
24
|
+
stub_const('ENV', 'SOLR_URL' => 'http://localhost:8983/solr/geoblacklight')
|
25
|
+
allow(RSolr).to receive(:connect).and_return(solr)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'connects to a solr instance if set in the environment' do
|
29
|
+
described_class.new
|
30
|
+
expect(RSolr).to have_received(:connect).with(
|
31
|
+
url: 'http://localhost:8983/solr/geoblacklight',
|
32
|
+
adapter: :net_http_persistent
|
33
|
+
)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#index' do
|
38
|
+
it 'posts each record to solr as JSON' do
|
39
|
+
indexer.index([docs[0]], commit_within: 1)
|
40
|
+
expect(solr).to have_received(:update).with(
|
41
|
+
params: { commitWithin: 1, overwrite: true },
|
42
|
+
data: [docs[0][0]].to_json,
|
43
|
+
headers: { 'Content-Type' => 'application/json' }
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'prints the id and path of each record in debug mode' do
|
48
|
+
$DEBUG = true
|
49
|
+
expect { indexer.index([docs[0]]) }.to output("Indexing 1: path/to/record1.json\n").to_stdout
|
50
|
+
expect { indexer.index([docs[1]]) }.to output("Indexing 2: path/to/record2.json\n").to_stdout
|
51
|
+
$DEBUG = false
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'commits changes to solr after indexing' do
|
55
|
+
indexer.index(docs)
|
56
|
+
expect(solr).to have_received(:commit).once
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'returns the count of records successfully indexed' do
|
60
|
+
expect(indexer.index(docs)).to eq 2
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
|
6
|
+
include JsonDocs
|
7
|
+
|
8
|
+
describe '#run' do
|
9
|
+
it 'migrates keys' do
|
10
|
+
input_hash = JSON.parse(full_geoblacklight)
|
11
|
+
# TODO: Note that this fixture has not yet been fully converted to
|
12
|
+
# aardvark. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
|
13
|
+
# for remaining work.
|
14
|
+
expected_output = JSON.parse(full_geoblacklight_aardvark)
|
15
|
+
expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when the given record is already in aardvark schema' do
|
19
|
+
xit 'returns the record unchanged'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,6 @@ SimpleCov.start 'rails' do
|
|
7
7
|
add_filter 'lib/tasks/geo_combine.rake'
|
8
8
|
add_filter 'lib/geo_combine/version.rb'
|
9
9
|
add_filter 'lib/geo_combine/railtie.rb'
|
10
|
-
add_filter 'lib/geo_combine/cli.rb'
|
11
10
|
minimum_coverage 95 # When updating this value, update the README badge value
|
12
11
|
end
|
13
12
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geo_combine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Reed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: git
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: bundler
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -237,8 +251,7 @@ dependencies:
|
|
237
251
|
description: A Ruby toolkit for managing geospatial metadata
|
238
252
|
email:
|
239
253
|
- pjreed@stanford.edu
|
240
|
-
executables:
|
241
|
-
- geocombine
|
254
|
+
executables: []
|
242
255
|
extensions: []
|
243
256
|
extra_rdoc_files: []
|
244
257
|
files:
|
@@ -251,12 +264,10 @@ files:
|
|
251
264
|
- LICENSE.txt
|
252
265
|
- README.md
|
253
266
|
- Rakefile
|
254
|
-
- bin/geocombine
|
255
267
|
- geo_combine.gemspec
|
256
268
|
- lib/geo_combine.rb
|
257
269
|
- lib/geo_combine/bounding_box.rb
|
258
270
|
- lib/geo_combine/ckan_metadata.rb
|
259
|
-
- lib/geo_combine/cli.rb
|
260
271
|
- lib/geo_combine/esri_open_data.rb
|
261
272
|
- lib/geo_combine/exceptions.rb
|
262
273
|
- lib/geo_combine/fgdc.rb
|
@@ -265,7 +276,10 @@ files:
|
|
265
276
|
- lib/geo_combine/geo_blacklight_harvester.rb
|
266
277
|
- lib/geo_combine/geoblacklight.rb
|
267
278
|
- lib/geo_combine/geometry_types.rb
|
279
|
+
- lib/geo_combine/harvester.rb
|
280
|
+
- lib/geo_combine/indexer.rb
|
268
281
|
- lib/geo_combine/iso19139.rb
|
282
|
+
- lib/geo_combine/migrators/v1_aardvark_migrator.rb
|
269
283
|
- lib/geo_combine/ogp.rb
|
270
284
|
- lib/geo_combine/railtie.rb
|
271
285
|
- lib/geo_combine/subjects.rb
|
@@ -294,6 +308,7 @@ files:
|
|
294
308
|
- spec/fixtures/docs/ckan.json
|
295
309
|
- spec/fixtures/docs/esri_open_data.json
|
296
310
|
- spec/fixtures/docs/full_geoblacklight.json
|
311
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
297
312
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
298
313
|
- spec/fixtures/docs/ogp_harvard_line.json
|
299
314
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -304,6 +319,7 @@ files:
|
|
304
319
|
- spec/fixtures/docs/simple_xslt.xsl
|
305
320
|
- spec/fixtures/docs/stanford_iso.xml
|
306
321
|
- spec/fixtures/docs/tufts_fgdc.xml
|
322
|
+
- spec/fixtures/indexing/aardvark.json
|
307
323
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
308
324
|
- spec/fixtures/indexing/geoblacklight.json
|
309
325
|
- spec/fixtures/indexing/layers.json
|
@@ -318,10 +334,12 @@ files:
|
|
318
334
|
- spec/lib/geo_combine/formatting_spec.rb
|
319
335
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
320
336
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
337
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
338
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
321
339
|
- spec/lib/geo_combine/iso19139_spec.rb
|
340
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
322
341
|
- spec/lib/geo_combine/ogp_spec.rb
|
323
342
|
- spec/lib/geo_combine_spec.rb
|
324
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
325
343
|
- spec/spec_helper.rb
|
326
344
|
- spec/support/fixtures.rb
|
327
345
|
homepage: ''
|
@@ -343,7 +361,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
343
361
|
- !ruby/object:Gem::Version
|
344
362
|
version: '0'
|
345
363
|
requirements: []
|
346
|
-
rubygems_version: 3.
|
364
|
+
rubygems_version: 3.3.7
|
347
365
|
signing_key:
|
348
366
|
specification_version: 4
|
349
367
|
summary: A Ruby toolkit for managing geospatial metadata
|
@@ -354,6 +372,7 @@ test_files:
|
|
354
372
|
- spec/fixtures/docs/ckan.json
|
355
373
|
- spec/fixtures/docs/esri_open_data.json
|
356
374
|
- spec/fixtures/docs/full_geoblacklight.json
|
375
|
+
- spec/fixtures/docs/full_geoblacklight_aardvark.json
|
357
376
|
- spec/fixtures/docs/geoblacklight_pre_v1.json
|
358
377
|
- spec/fixtures/docs/ogp_harvard_line.json
|
359
378
|
- spec/fixtures/docs/ogp_harvard_raster.json
|
@@ -364,6 +383,7 @@ test_files:
|
|
364
383
|
- spec/fixtures/docs/simple_xslt.xsl
|
365
384
|
- spec/fixtures/docs/stanford_iso.xml
|
366
385
|
- spec/fixtures/docs/tufts_fgdc.xml
|
386
|
+
- spec/fixtures/indexing/aardvark.json
|
367
387
|
- spec/fixtures/indexing/basic_geoblacklight.json
|
368
388
|
- spec/fixtures/indexing/geoblacklight.json
|
369
389
|
- spec/fixtures/indexing/layers.json
|
@@ -378,9 +398,11 @@ test_files:
|
|
378
398
|
- spec/lib/geo_combine/formatting_spec.rb
|
379
399
|
- spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
|
380
400
|
- spec/lib/geo_combine/geoblacklight_spec.rb
|
401
|
+
- spec/lib/geo_combine/harvester_spec.rb
|
402
|
+
- spec/lib/geo_combine/indexer_spec.rb
|
381
403
|
- spec/lib/geo_combine/iso19139_spec.rb
|
404
|
+
- spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
|
382
405
|
- spec/lib/geo_combine/ogp_spec.rb
|
383
406
|
- spec/lib/geo_combine_spec.rb
|
384
|
-
- spec/lib/tasks/geo_combine_spec.rb
|
385
407
|
- spec/spec_helper.rb
|
386
408
|
- spec/support/fixtures.rb
|
data/bin/geocombine
DELETED
data/lib/geo_combine/cli.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
root = Gem::Specification.find_by_name('geo_combine').gem_dir
|
7
|
-
tasks = File.join(root, 'lib/tasks/*.rake')
|
8
|
-
Dir.glob(tasks).each { |r| load r }
|
9
|
-
|
10
|
-
module GeoCombine
|
11
|
-
class CLI < Thor
|
12
|
-
desc 'clone', 'Clone all OpenGeoMetadata repositories'
|
13
|
-
def clone
|
14
|
-
Rake::Task['geocombine:clone'].invoke
|
15
|
-
end
|
16
|
-
|
17
|
-
desc 'pull', '"git pull" OpenGeoMetadata repositories'
|
18
|
-
def pull
|
19
|
-
Rake::Task['geocombine:pull'].invoke
|
20
|
-
end
|
21
|
-
|
22
|
-
desc 'index', 'Index all of the GeoBlacklight documents'
|
23
|
-
def index
|
24
|
-
Rake::Task['geocombine:index'].invoke
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
require 'rake'
|
5
|
-
|
6
|
-
describe 'geo_combine.rake' do
|
7
|
-
before(:all) do
|
8
|
-
load File.expand_path('../../../lib/tasks/geo_combine.rake', __dir__)
|
9
|
-
Rake::Task.define_task(:environment)
|
10
|
-
end
|
11
|
-
|
12
|
-
before do
|
13
|
-
allow(ENV).to receive(:[]).and_call_original
|
14
|
-
allow(ENV).to receive(:[]).with('OGM_PATH').and_return(File.join(fixture_dir, 'indexing'))
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'geocombine:clone' do
|
18
|
-
before do
|
19
|
-
WebMock.disable_net_connect!
|
20
|
-
end
|
21
|
-
|
22
|
-
after do
|
23
|
-
WebMock.allow_net_connect!
|
24
|
-
end
|
25
|
-
|
26
|
-
it 'does not clone repos on deny list' do
|
27
|
-
stub_request(:get, 'https://api.github.com/orgs/opengeometadata/repos').to_return(status: 200, body: read_fixture('docs/repos.json'))
|
28
|
-
allow(Kernel).to receive(:system)
|
29
|
-
Rake::Task['geocombine:clone'].invoke
|
30
|
-
expect(Kernel).to have_received(:system).exactly(21).times
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
describe 'geocombine:index' do
|
35
|
-
it 'only indexes .json files but not layers.json' do
|
36
|
-
rsolr_mock = instance_double(RSolr::Client)
|
37
|
-
allow(rsolr_mock).to receive(:update)
|
38
|
-
allow(rsolr_mock).to receive(:commit)
|
39
|
-
allow(RSolr).to receive(:connect).and_return(rsolr_mock)
|
40
|
-
Rake::Task['geocombine:index'].invoke
|
41
|
-
# We expect 2 files to index
|
42
|
-
expect(rsolr_mock).to have_received(:update).exactly(2).times
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|