geo_combine 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d47d9cff1e3bf0e3ec2237c65554524b9a624d8c9faf833d1c99c6fdc4f7c94
4
- data.tar.gz: 7069057b0b5166f2ed5496af51a270cefdd6ee5aa3937456f10a54b2f5f32536
3
+ metadata.gz: c91fb8acc12d958eb450bd5cebc525073f30b6df664bd55f80622a942d8e6703
4
+ data.tar.gz: abcc395544fe5530d2fa9b15d1f26f2e758f5b48983fcab35f8fe7305e424e92
5
5
  SHA512:
6
- metadata.gz: 7208f9b13e73b183571861a7d40bcd6a1c1a466b4af7ff6fead7174bdb992a51955ebb986b07094ce195dd2e1d520a3c8b0b51b7c18178b57b3b0eb6db0c6e4e
7
- data.tar.gz: 7daf75a1d31036a3659d4e96f261f4f1ce20bfea2083827f32e767ee8f15c14d6c112053cd009ca17cdfc6c433b9f7b64ce524fbf4fdaf7fe56877b1b7c07360
6
+ metadata.gz: 8ad8cee3e3aa481816dffe885db30217daaa3d5167240ff8395954f02226c8d54a8ce9227efa602127ac55f08106a77f1f1ca3ac0a3203fd51c92fe284327ecd
7
+ data.tar.gz: 523b156be9684071cd06ddcd6a10dbb14b93f3b35970ad976787edb7363667301c72963075563cccc7e330f4c9a0acad26df271f63a799e1eeaa5dc72373f574
@@ -7,12 +7,11 @@ jobs:
7
7
  runs-on: ubuntu-latest
8
8
  steps:
9
9
  - uses: actions/checkout@v2
10
- - name: Set up Ruby
10
+ - name: Set up Ruby and install dependencies
11
11
  uses: ruby/setup-ruby@v1
12
12
  with:
13
13
  ruby-version: 2.7
14
- - name: Install dependencies
15
- run: bundle install
14
+ bundler-cache: true
16
15
  - name: Run linter
17
16
  run: bundle exec rubocop
18
17
 
@@ -20,7 +19,7 @@ jobs:
20
19
  runs-on: ubuntu-latest
21
20
  strategy:
22
21
  matrix:
23
- ruby: [2.7, 3.0, 3.1]
22
+ ruby: [2.7, '3.0', 3.1]
24
23
  faraday_version: [''] # Defaults to whatever's the most recent version.
25
24
  include:
26
25
  - ruby: 2.7
@@ -28,16 +27,11 @@ jobs:
28
27
  steps:
29
28
  - uses: actions/checkout@v2
30
29
 
31
- - name: Set up Ruby
30
+ - name: Set up Ruby and install dependencies
32
31
  uses: ruby/setup-ruby@v1
33
32
  with:
34
33
  ruby-version: ${{ matrix.ruby }}
35
-
36
- - name: Install bundler
37
- run: gem install bundler -v 2.1.1
38
-
39
- - name: Install dependencies
40
- run: bundle _2.1.1_ install
34
+ bundler-cache: true
41
35
  env:
42
36
  FARADAY_VERSION: ${{ matrix.faraday_version }}
43
37
 
data/.gitignore CHANGED
@@ -14,3 +14,4 @@
14
14
  mkmf.log
15
15
  .tool-versions
16
16
  .byebug_history
17
+ .ruby-version
data/.rubocop.yml CHANGED
@@ -11,6 +11,7 @@ AllCops:
11
11
  Exclude:
12
12
  - 'geo_combine.gemspec'
13
13
  - 'tmp/**/*'
14
+ - 'vendor/bundle/**/*'
14
15
 
15
16
  RSpec/DescribeClass:
16
17
  Enabled: false
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2022-02-17 18:38:52 UTC using RuboCop version 1.25.1.
3
+ # on 2023-03-27 19:15:05 UTC using RuboCop version 1.48.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -17,50 +17,45 @@ Lint/UselessAssignment:
17
17
  - 'spec/helpers.rb'
18
18
 
19
19
  # Offense count: 7
20
- # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
20
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
21
21
  Metrics/AbcSize:
22
22
  Max: 33
23
23
 
24
- # Offense count: 25
25
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
26
- # IgnoredMethods: refine
24
+ # Offense count: 1
25
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
26
+ # AllowedMethods: refine
27
27
  Metrics/BlockLength:
28
- Max: 181
28
+ Max: 27
29
29
 
30
30
  # Offense count: 1
31
31
  # Configuration parameters: CountComments, CountAsOne.
32
32
  Metrics/ClassLength:
33
33
  Max: 152
34
34
 
35
- # Offense count: 3
36
- # Configuration parameters: IgnoredMethods.
35
+ # Offense count: 4
36
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
37
37
  Metrics/CyclomaticComplexity:
38
38
  Max: 11
39
39
 
40
- # Offense count: 10
41
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
40
+ # Offense count: 11
41
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
42
42
  Metrics/MethodLength:
43
43
  Max: 21
44
44
 
45
45
  # Offense count: 1
46
- # Configuration parameters: CountComments, CountAsOne.
47
- Metrics/ModuleLength:
48
- Max: 1657
49
-
50
- # Offense count: 1
51
- # Configuration parameters: IgnoredMethods.
46
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
52
47
  Metrics/PerceivedComplexity:
53
48
  Max: 11
54
49
 
55
50
  # Offense count: 9
56
- # Configuration parameters: Prefixes.
51
+ # Configuration parameters: Prefixes, AllowedPatterns.
57
52
  # Prefixes: when, with, without
58
53
  RSpec/ContextWording:
59
54
  Exclude:
60
55
  - 'spec/lib/geo_combine/geoblacklight_spec.rb'
61
56
  - 'spec/lib/geo_combine/ogp_spec.rb'
62
57
 
63
- # Offense count: 9
58
+ # Offense count: 11
64
59
  # Configuration parameters: CountAsOne.
65
60
  RSpec/ExampleLength:
66
61
  Max: 12
@@ -78,10 +73,10 @@ RSpec/FilePath:
78
73
  Exclude:
79
74
  - 'spec/lib/geo_combine_spec.rb'
80
75
 
81
- # Configuration parameters: .
76
+ # Offense count: 23
77
+ # Configuration parameters: EnforcedStyle.
82
78
  # SupportedStyles: have_received, receive
83
79
  RSpec/MessageSpies:
84
- EnforcedStyle: have_received
85
80
  Exclude:
86
81
  - 'spec/lib/geo_combine/esri_open_data_spec.rb'
87
82
  - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
@@ -89,7 +84,7 @@ RSpec/MessageSpies:
89
84
  - 'spec/lib/geo_combine/ogp_spec.rb'
90
85
  - 'spec/lib/geo_combine_spec.rb'
91
86
 
92
- # Offense count: 39
87
+ # Offense count: 48
93
88
  RSpec/MultipleExpectations:
94
89
  Max: 5
95
90
 
@@ -99,12 +94,14 @@ RSpec/MultipleMemoizedHelpers:
99
94
  Max: 7
100
95
 
101
96
  # Offense count: 5
102
- # Configuration parameters: IgnoreSharedExamples.
97
+ # Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
98
+ # SupportedStyles: always, named_only
103
99
  RSpec/NamedSubject:
104
100
  Exclude:
105
101
  - 'spec/lib/geo_combine/formatting_spec.rb'
106
102
 
107
103
  # Offense count: 8
104
+ # Configuration parameters: AllowedGroups.
108
105
  RSpec/NestedGroups:
109
106
  Max: 4
110
107
 
@@ -113,6 +110,11 @@ RSpec/OverwritingSetup:
113
110
  Exclude:
114
111
  - 'spec/lib/geo_combine/geoblacklight_spec.rb'
115
112
 
113
+ # Offense count: 1
114
+ RSpec/PendingWithoutReason:
115
+ Exclude:
116
+ - 'spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb'
117
+
116
118
  # Offense count: 2
117
119
  RSpec/RepeatedExampleGroupBody:
118
120
  Exclude:
@@ -151,15 +153,14 @@ Style/Documentation:
151
153
  - 'test/**/*'
152
154
  - 'lib/geo_combine/bounding_box.rb'
153
155
  - 'lib/geo_combine/ckan_metadata.rb'
154
- - 'lib/geo_combine/cli.rb'
155
156
  - 'lib/geo_combine/geo_blacklight_harvester.rb'
156
157
  - 'lib/geo_combine/geoblacklight.rb'
157
158
  - 'lib/geo_combine/geometry_types.rb'
158
159
  - 'lib/geo_combine/iso19139.rb'
159
160
 
160
- # Offense count: 7
161
- # Cop supports --auto-correct.
162
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
161
+ # Offense count: 8
162
+ # This cop supports safe autocorrection (--autocorrect).
163
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns.
163
164
  # URISchemes: http, https
164
165
  Layout/LineLength:
165
166
  Max: 159
data/geo_combine.gemspec CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
25
25
  spec.add_dependency 'sanitize'
26
26
  spec.add_dependency 'thor'
27
27
  spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
28
+ spec.add_dependency 'git'
28
29
 
29
30
  spec.add_development_dependency 'bundler'
30
31
  spec.add_development_dependency 'rake'
@@ -30,7 +30,7 @@ module GeoCombine
30
30
 
31
31
  # slugs should be lowercase and only have a-z, A-Z, 0-9, and -
32
32
  def sluggify(slug)
33
- slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/-+/, '-').downcase
33
+ slug.gsub(/[^a-zA-Z0-9-]/, '-').gsub(/-+/, '-').downcase
34
34
  end
35
35
  end
36
36
  end
@@ -13,7 +13,7 @@ module GeoCombine
13
13
  attr_reader :metadata
14
14
 
15
15
  GEOBLACKLIGHT_VERSION = '1.0'
16
- SCHEMA_JSON_URL = "https://raw.githubusercontent.com/geoblacklight/geoblacklight/main/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json"
16
+ SCHEMA_JSON_URL = "https://raw.githubusercontent.com/OpenGeoMetadata/opengeometadata.github.io/main/docs/schema/geoblacklight-schema-#{GEOBLACKLIGHT_VERSION}.json"
17
17
  DEPRECATED_KEYS_V1 = %w[
18
18
  uuid
19
19
  georss_polygon_s
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'find'
5
+ require 'git'
6
+
7
+ module GeoCombine
8
+ # Harvests Geoblacklight documents from OpenGeoMetadata for indexing
9
+ class Harvester
10
+ attr_reader :ogm_path, :schema_version
11
+
12
+ # Non-metadata repositories that shouldn't be harvested
13
+ def self.denylist
14
+ [
15
+ 'GeoCombine',
16
+ 'aardvark',
17
+ 'metadata-issues',
18
+ 'ogm_utils-python',
19
+ 'opengeometadata.github.io',
20
+ 'opengeometadata-rails',
21
+ 'gbl-1_to_aardvark'
22
+ ]
23
+ end
24
+
25
+ # GitHub API endpoint for OpenGeoMetadata repositories
26
+ def self.ogm_api_uri
27
+ URI('https://api.github.com/orgs/opengeometadata/repos?per_page=1000')
28
+ end
29
+
30
+ def initialize(
31
+ ogm_path: ENV.fetch('OGM_PATH', 'tmp/opengeometadata'),
32
+ schema_version: ENV.fetch('SCHEMA_VERSION', '1.0')
33
+ )
34
+ @ogm_path = ogm_path
35
+ @schema_version = schema_version
36
+ end
37
+
38
+ # Enumerable of docs to index, for passing to an indexer
39
+ def docs_to_index
40
+ return to_enum(:docs_to_index) unless block_given?
41
+
42
+ Find.find(@ogm_path) do |path|
43
+ # skip non-json and layers.json files
44
+ next unless File.basename(path).include?('.json') && File.basename(path) != 'layers.json'
45
+
46
+ doc = JSON.parse(File.read(path))
47
+ [doc].flatten.each do |record|
48
+ # skip indexing if this record has a different schema version than what we want
49
+ record_schema = record['gbl_mdVersion_s'] || record['geoblacklight_version']
50
+ next unless record_schema == @schema_version
51
+
52
+ yield record, path
53
+ end
54
+ end
55
+ end
56
+
57
+ # Update a repository via git
58
+ # If the repository doesn't exist, clone it.
59
+ def pull(repo)
60
+ repo_path = File.join(@ogm_path, repo)
61
+ clone(repo) unless File.directory? repo_path
62
+
63
+ Git.open(repo_path).pull
64
+ puts "Updated #{repo}"
65
+ 1
66
+ end
67
+
68
+ # Update all repositories
69
+ # Return the count of repositories updated
70
+ def pull_all
71
+ repositories.map(&method(:pull)).reduce(:+)
72
+ end
73
+
74
+ # Clone a repository via git
75
+ # If the repository already exists, skip it.
76
+ def clone(repo)
77
+ repo_path = File.join(@ogm_path, repo)
78
+ repo_info = repository_info(repo)
79
+
80
+ # Skip if exists; warn if archived or empty
81
+ if File.directory? repo_path
82
+ puts "Skipping clone to #{repo_path}; directory exists"
83
+ return 0
84
+ end
85
+ puts "WARNING: repository '#{repo}' is archived" if repo_info['archived']
86
+ puts "WARNING: repository '#{repo}' is empty" if repo_info['size'].zero?
87
+
88
+ repo_url = "https://github.com/OpenGeoMetadata/#{repo}.git"
89
+ Git.clone(repo_url, nil, path: ogm_path, depth: 1)
90
+ puts "Cloned #{repo_url}"
91
+ 1
92
+ end
93
+
94
+ # Clone all repositories via git
95
+ # Return the count of repositories cloned.
96
+ def clone_all
97
+ repositories.map(&method(:clone)).reduce(:+)
98
+ end
99
+
100
+ private
101
+
102
+ # List of repository names to harvest
103
+ def repositories
104
+ @repositories ||= JSON.parse(Net::HTTP.get(self.class.ogm_api_uri))
105
+ .filter { |repo| repo['size'].positive? }
106
+ .reject { |repo| repo['archived'] }
107
+ .map { |repo| repo['name'] }
108
+ .reject { |name| self.class.denylist.include? name }
109
+ end
110
+
111
+ def repository_info(repo_name)
112
+ JSON.parse(Net::HTTP.get(URI("https://api.github.com/repos/opengeometadata/#{repo_name}")))
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rsolr'
4
+ require 'faraday/net_http_persistent'
5
+
6
+ module GeoCombine
7
+ # Indexes Geoblacklight documents into Solr
8
+ class Indexer
9
+ attr_reader :solr
10
+
11
+ def self.solr(url: ENV.fetch('SOLR_URL', 'http://127.0.0.1:8983/solr/blacklight-core'))
12
+ RSolr.connect url: url, adapter: :net_http_persistent
13
+ end
14
+
15
+ def initialize(solr: GeoCombine::Indexer.solr)
16
+ @solr = solr
17
+ end
18
+
19
+ def solr_url
20
+ @solr.options[:url]
21
+ end
22
+
23
+ # Index everything and return the number of docs successfully indexed
24
+ def index(docs, commit_within: ENV.fetch('SOLR_COMMIT_WITHIN', 5000).to_i)
25
+ indexed_count = 0
26
+
27
+ docs.each do |record, path|
28
+ # log the unique identifier for the record for debugging
29
+ id = record['id'] || record['dc_identifier_s']
30
+ puts "Indexing #{id}: #{path}" if $DEBUG
31
+
32
+ # index the record into solr
33
+ @solr.update params: { commitWithin: commit_within, overwrite: true },
34
+ data: [record].to_json,
35
+ headers: { 'Content-Type' => 'application/json' }
36
+
37
+ # count the number of records successfully indexed
38
+ indexed_count += 1
39
+ rescue RSolr::Error::Http => e
40
+ puts e
41
+ end
42
+
43
+ @solr.commit
44
+ indexed_count
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GeoCombine
4
+ module Migrators
5
+ # TODO: WARNING! This class is not fully implemented and should not be used in
6
+ # production. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
7
+ # for remaining work.
8
+ #
9
+ # migrates the v1 schema to the aardvark schema
10
+ class V1AardvarkMigrator
11
+ attr_reader :v1_hash
12
+
13
+ # @param v1_hash [Hash] parsed json in the v1 schema
14
+ def initialize(v1_hash:)
15
+ @v1_hash = v1_hash
16
+ end
17
+
18
+ def run
19
+ v2_hash = convert_keys
20
+ v2_hash['gbl_mdVersion_s'] = 'Aardvark'
21
+ v2_hash
22
+ end
23
+
24
+ def convert_keys
25
+ v1_hash.transform_keys do |k|
26
+ SCHEMA_FIELD_MAP[k] || k
27
+ end
28
+ end
29
+
30
+ SCHEMA_FIELD_MAP = {
31
+ 'dc_title_s' => 'dct_title_s', # new namespace
32
+ 'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued
33
+ 'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued
34
+ 'dc_language_sm' => 'dct_language_sm', # new namespace; single to multi-valued
35
+ 'dc_creator_sm' => 'dct_creator_sm', # new namespace
36
+ 'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued
37
+ 'dct_provenance_s' => 'schema_provider_s', # new URI name
38
+ 'dc_subject_sm' => 'dct_subject_sm', # new namespace
39
+ 'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued
40
+ 'dc_source_sm' => 'dct_source_sm', # new namespace
41
+ 'dc_rights_s' => 'dct_accessRights_s', # new URI name
42
+ 'dc_format_s' => 'dct_format_s', # new namespace
43
+ 'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name
44
+ 'layer_slug_s' => 'id', # new URI name
45
+ 'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued
46
+ 'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name
47
+ 'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name
48
+ 'suppressed_b' => 'gbl_suppressed_b' # new namespace
49
+ }.freeze
50
+ end
51
+ end
52
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GeoCombine
4
+ # Railtie for rake tasks
4
5
  class Railtie < Rails::Railtie
5
6
  rake_tasks do
6
7
  load 'tasks/geo_combine.rake'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GeoCombine
4
- VERSION = '0.7.0'
4
+ VERSION = '0.8.0'
5
5
  end
data/lib/geo_combine.rb CHANGED
@@ -72,6 +72,9 @@ require 'geo_combine/ogp'
72
72
  # Require harvesting/indexing files
73
73
  require 'geo_combine/geo_blacklight_harvester'
74
74
 
75
+ # Migrators
76
+ require 'geo_combine/migrators/v1_aardvark_migrator'
77
+
75
78
  # Require gem files
76
79
  require 'geo_combine/version'
77
80
  require 'geo_combine/railtie' if defined?(Rails)
@@ -3,58 +3,33 @@
3
3
  require 'json'
4
4
  require 'rsolr'
5
5
  require 'find'
6
- require 'geo_combine/geo_blacklight_harvester'
7
6
  require 'faraday/net_http_persistent'
7
+ require 'geo_combine/harvester'
8
+ require 'geo_combine/indexer'
9
+ require 'geo_combine/geo_blacklight_harvester'
8
10
 
9
11
  namespace :geocombine do
10
12
  desc 'Clone OpenGeoMetadata repositories'
11
13
  task :clone, [:repo] do |_t, args|
12
- if args.repo
13
- ogm_repos = ["https://github.com/OpenGeoMetadata/#{args.repo}.git"]
14
- else
15
- ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
16
- ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map do |repo|
17
- repo['clone_url'] if (repo['size']).positive?
18
- end.compact
19
- ogm_repos.reject! { |repo| GeoCombineRake.denylist.include?(repo) }
20
- end
21
- ogm_repos.each do |repo|
22
- Kernel.system "echo #{repo} && mkdir -p #{GeoCombineRake.ogm_path} && cd #{GeoCombineRake.ogm_path} && git clone --depth 1 #{repo}"
23
- end
14
+ harvester = GeoCombine::Harvester.new
15
+ total = args[:repo] ? harvester.clone(args.repo) : harvester.clone_all
16
+ puts "Cloned #{total} repositories"
24
17
  end
25
18
 
26
19
  desc '"git pull" OpenGeoMetadata repositories'
27
20
  task :pull, [:repo] do |_t, args|
28
- paths = if args.repo
29
- [File.join(GeoCombineRake.ogm_path, args.repo)]
30
- else
31
- Dir.glob("#{GeoCombineRake.ogm_path}/*")
32
- end
33
- paths.each do |path|
34
- next unless File.directory?(path)
35
-
36
- Kernel.system "echo #{path} && cd #{path} && git pull origin"
37
- end
21
+ harvester = GeoCombine::Harvester.new
22
+ total = args[:repo] ? harvester.pull(args.repo) : harvester.pull_all
23
+ puts "Updated #{total} repositories"
38
24
  end
39
25
 
40
26
  desc 'Index all JSON documents except Layers.json'
41
27
  task :index do
42
- puts "Indexing #{GeoCombineRake.ogm_path} into #{GeoCombineRake.solr_url}"
43
- solr = RSolr.connect url: GeoCombineRake.solr_url, adapter: :net_http_persistent
44
- Find.find(GeoCombineRake.ogm_path) do |path|
45
- next unless File.basename(path).include?('.json') && File.basename(path) != 'layers.json'
46
-
47
- doc = JSON.parse(File.read(path))
48
- [doc].flatten.each do |record|
49
- puts "Indexing #{record['layer_slug_s']}: #{path}" if $DEBUG
50
- solr.update params: { commitWithin: GeoCombineRake.commit_within, overwrite: true },
51
- data: [record].to_json,
52
- headers: { 'Content-Type' => 'application/json' }
53
- rescue RSolr::Error::Http => e
54
- puts e
55
- end
56
- end
57
- solr.commit
28
+ harvester = GeoCombine::Harvester.new
29
+ indexer = GeoCombine::Indexer.new
30
+ puts "Indexing #{harvester.ogm_path} into #{indexer.solr_url}"
31
+ total = indexer.index(harvester.docs_to_index)
32
+ puts "Indexed #{total} documents"
58
33
  end
59
34
 
60
35
  namespace :geoblacklight_harvester do
@@ -66,29 +41,3 @@ namespace :geocombine do
66
41
  end
67
42
  end
68
43
  end
69
-
70
- # Class to hold helper methods for use in GeoCombine rake tasks
71
- class GeoCombineRake
72
- def self.commit_within
73
- (ENV['SOLR_COMMIT_WITHIN'] || 5000).to_i
74
- end
75
-
76
- def self.denylist
77
- [
78
- 'https://github.com/OpenGeoMetadata/GeoCombine.git',
79
- 'https://github.com/OpenGeoMetadata/aardvark.git',
80
- 'https://github.com/OpenGeoMetadata/metadata-issues.git',
81
- 'https://github.com/OpenGeoMetadata/ogm_utils-python.git',
82
- 'https://github.com/OpenGeoMetadata/opengeometadata.github.io.git',
83
- 'https://github.com/OpenGeoMetadata/opengeometadata-rails.git'
84
- ]
85
- end
86
-
87
- def self.ogm_path
88
- ENV['OGM_PATH'] || 'tmp/opengeometadata'
89
- end
90
-
91
- def self.solr_url
92
- ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
93
- end
94
- end
@@ -0,0 +1,33 @@
1
+ {
2
+ "gbl_mdVersion_s":"Aardvark",
3
+ "dct_identifier_sm":"http://purl.stanford.edu/cz128vq0535",
4
+ "dct_title_s":"2005 Rural Poverty GIS Database: Uganda",
5
+ "dct_description_sm":"This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production.",
6
+ "dct_accessRights_s":"Public",
7
+ "schema_provider_s":"Stanford",
8
+ "dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}",
9
+ "gbl_wxsIdentifier_s":"druid:cz128vq0535",
10
+ "id":"stanford-cz128vq0535",
11
+ "layer_geom_type_s":"Polygon",
12
+ "gbl_mdModified_dt":"2015-01-13T18:46:38Z",
13
+ "dct_format_s":"Shapefile",
14
+ "dct_language_sm":"English",
15
+ "dc_type_s":"Dataset",
16
+ "dct_publisher_sm":"Uganda Bureau of Statistics",
17
+ "dct_creator_sm":[
18
+ "Uganda Bureau of Statistics"
19
+ ],
20
+ "dct_subject_sm":[
21
+ "Poverty",
22
+ "Statistics"
23
+ ],
24
+ "dct_issued_s":"2005",
25
+ "dct_temporal_sm":[
26
+ "2005"
27
+ ],
28
+ "dct_spatial_sm":[
29
+ "Uganda"
30
+ ],
31
+ "solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)",
32
+ "gbl_indexYear_im":2005
33
+ }
@@ -0,0 +1,57 @@
1
+ {
2
+ "dct_title_s": "A description of the coast & city of Goa.",
3
+ "dct_alternative_sm": [
4
+ "A description of the coast & city of Goa."
5
+ ],
6
+ "dct_description_sm": [
7
+ "Photocopy. Some relief shown pictorially. North oriented to the left. \"The city of Goa & all its dependencies doth justly belong to the crown of England by the mariage [sic] of King Charles the Second with Queen Catherine\"--upper right. \"21\"--upper right. Outside of original margin is image of British Museum rule and \"7-Tab-125.\" Dimensions: 51 x 61 centimeters"
8
+ ],
9
+ "dct_language_sm": [
10
+ "eng"
11
+ ],
12
+ "dct_publisher_sm": [
13
+ "British Museum? (London?)"
14
+ ],
15
+ "schema_provider_s": "University of Minnesota",
16
+ "gbl_resourceClass_sm": [
17
+ "Maps"
18
+ ],
19
+ "dcat_keyword_sm": [
20
+ "Velha Goa (India) Maps",
21
+ "Maps"
22
+ ],
23
+ "dct_temporal_sm": [
24
+ "1900-1999"
25
+ ],
26
+ "dct_issued_s": "1900 - 1999?",
27
+ "gbl_indexYear_im": [
28
+ "1900"
29
+ ],
30
+ "gbl_dateRange_drsim": [
31
+ "[1900 TO 1999]"
32
+ ],
33
+ "dct_spatial_sm": [
34
+ "India"
35
+ ],
36
+ "locn_geometry": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
37
+ "dcat_bbox": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
38
+ "dcat_centroid": "11.620049999999999,-2.21",
39
+ "pcdm_memberOf_sm": [
40
+ "64bd8c4c-8e60-4956-b43d-bdc3f93db488"
41
+ ],
42
+ "dct_isPartOf_sm": [
43
+ "05d-01"
44
+ ],
45
+ "dct_rights_sm": [
46
+ "Use of this item may be governed by US and international copyright laws. You may be able to use this item, but copyright and other considerations may apply. For possible additional information or guidance on your use, please contact the contributing organization."
47
+ ],
48
+ "dct_accessRights_s": "Public",
49
+ "dct_format_s": "JPEG",
50
+ "dct_references_s": "{\"http://schema.org/downloadUrl\":\"http://cdm16022.contentdm.oclc.org/utils/getfile/collection/p16022coll205/id/236/filename/print/page/download/fparams/forcedownload\",\"http://schema.org/url\":\"https://umedia.lib.umn.edu/item/p16022coll205:236\",\"http://iiif.io/api/presentation#manifest\":\"https://cdm16022.contentdm.oclc.org/iiif/info/p16022coll205/236/manifest.json\"}",
51
+ "id": "p16022coll205:236",
52
+ "dct_identifier_sm": [
53
+ "UMN_ALMA:9949551790001701"
54
+ ],
55
+ "gbl_mdModified_dt": "2022-04-01T15:27:13Z",
56
+ "gbl_mdVersion_s": "Aardvark"
57
+ }
@@ -15,6 +15,12 @@ module JsonDocs
15
15
  File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json'))
16
16
  end
17
17
 
18
+ ##
19
+ # full_geoblacklight fixture converted to the aardvark schema
20
+ def full_geoblacklight_aardvark
21
+ File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json'))
22
+ end
23
+
18
24
  ##
19
25
  # A sample Esri OpenData metadata record
20
26
  def esri_opendata_metadata
@@ -15,7 +15,7 @@ RSpec.describe GeoCombine::BoundingBox do
15
15
 
16
16
  describe '#valid?' do
17
17
  context 'when valid' do
18
- it { valid.valid? }
18
+ it { expect(valid.valid?).to be true }
19
19
  end
20
20
 
21
21
  context 'when south > north' do
@@ -151,8 +151,8 @@ RSpec.describe GeoCombine::Geoblacklight do
151
151
  let(:unparseable_json) do
152
152
  <<-JSON
153
153
  {
154
- \"http://schema.org/url\":\"http://example.com/abc123\",,
155
- \"http://schema.org/downloadUrl\":\"http://example.com/abc123/data.zip\"
154
+ "http://schema.org/url":"http://example.com/abc123",,
155
+ "http://schema.org/downloadUrl":"http://example.com/abc123/data.zip"
156
156
  }
157
157
  JSON
158
158
  end
@@ -184,7 +184,7 @@ RSpec.describe GeoCombine::Geoblacklight do
184
184
 
185
185
  describe 'spatial_validate!' do
186
186
  context 'when valid' do
187
- it { full_geobl.spatial_validate! }
187
+ it { expect { full_geobl.spatial_validate! }.not_to raise_error }
188
188
  end
189
189
 
190
190
  context 'when invalid' do
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'git'
4
+ require 'geo_combine/harvester'
5
+ require 'spec_helper'
6
+
7
+ RSpec.describe GeoCombine::Harvester do
8
+ subject(:harvester) { described_class.new(ogm_path: 'spec/fixtures/indexing') }
9
+
10
+ let(:repo_name) { 'my-institution' }
11
+ let(:repo_path) { File.join(harvester.ogm_path, repo_name) }
12
+ let(:repo_url) { "https://github.com/OpenGeoMetadata/#{repo_name}.git" }
13
+ let(:stub_repo) { instance_double(Git::Base) }
14
+ let(:stub_gh_api) do
15
+ [
16
+ { name: repo_name, size: 100 },
17
+ { name: 'another-institution', size: 100 },
18
+ { name: 'outdated-institution', size: 100, archived: true }, # archived
19
+ { name: 'aardvark', size: 300 }, # on denylist
20
+ { name: 'empty', size: 0 } # no data
21
+ ]
22
+ end
23
+
24
+ before do
25
+ # stub github API requests
26
+ # use the whole org response, or just a portion for particular repos
27
+ allow(Net::HTTP).to receive(:get) do |uri|
28
+ if uri == described_class.ogm_api_uri
29
+ stub_gh_api.to_json
30
+ else
31
+ repo_name = uri.path.split('/').last.gsub('.git', '')
32
+ stub_gh_api.find { |repo| repo[:name] == repo_name }.to_json
33
+ end
34
+ end
35
+
36
+ # stub git commands
37
+ allow(Git).to receive(:open).and_return(stub_repo)
38
+ allow(Git).to receive(:clone).and_return(stub_repo)
39
+ allow(stub_repo).to receive(:pull).and_return(stub_repo)
40
+ end
41
+
42
+ describe '#docs_to_index' do
43
+ it 'yields each JSON record with its path, skipping layers.JSON' do
44
+ expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
45
+ [JSON.parse(File.read('spec/fixtures/indexing/basic_geoblacklight.json')), 'spec/fixtures/indexing/basic_geoblacklight.json'],
46
+ [JSON.parse(File.read('spec/fixtures/indexing/geoblacklight.json')), 'spec/fixtures/indexing/geoblacklight.json']
47
+ )
48
+ end
49
+
50
+ it 'skips records with a different schema version' do
51
+ harvester = described_class.new(ogm_path: 'spec/fixtures/indexing/', schema_version: 'Aardvark')
52
+ expect { |b| harvester.docs_to_index(&b) }.to yield_successive_args(
53
+ [JSON.parse(File.read('spec/fixtures/indexing/aardvark.json')), 'spec/fixtures/indexing/aardvark.json']
54
+ )
55
+ end
56
+ end
57
+
58
+ describe '#pull' do
59
+ it 'can pull a single repository' do
60
+ harvester.pull(repo_name)
61
+ expect(Git).to have_received(:open).with(repo_path)
62
+ expect(stub_repo).to have_received(:pull)
63
+ end
64
+
65
+ it 'clones a repo before pulling if it does not exist' do
66
+ harvester.pull(repo_name)
67
+ expect(Git).to have_received(:clone)
68
+ end
69
+ end
70
+
71
+ describe '#pull_all' do
72
+ it 'can pull all repositories' do
73
+ harvester.pull_all
74
+ expect(Git).to have_received(:open).exactly(2).times
75
+ expect(stub_repo).to have_received(:pull).exactly(2).times
76
+ end
77
+
78
+ it 'returns the count of repositories pulled' do
79
+ expect(harvester.pull_all).to eq(2)
80
+ end
81
+
82
+ it 'skips repositories in the denylist' do
83
+ harvester.pull_all
84
+ expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git')
85
+ end
86
+
87
+ it 'skips archived repositories' do
88
+ harvester.pull_all
89
+ expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/outdated-institution.git')
90
+ end
91
+ end
92
+
93
+ describe '#clone' do
94
+ it 'can clone a single repository' do
95
+ harvester.clone(repo_name)
96
+ expect(Git).to have_received(:clone).with(
97
+ repo_url,
98
+ nil, {
99
+ depth: 1, # shallow clone
100
+ path: harvester.ogm_path
101
+ }
102
+ )
103
+ end
104
+
105
+ it 'skips repositories that already exist' do
106
+ allow(File).to receive(:directory?).with(repo_path).and_return(true)
107
+ harvester.clone(repo_name)
108
+ expect(Git).not_to have_received(:clone)
109
+ end
110
+
111
+ it 'warns if a repository is empty' do
112
+ allow(Net::HTTP).to receive(:get).with('https://api.github.com/repos/opengeometadata/empty').and_return('{"size": 0}')
113
+ expect do
114
+ harvester.clone('empty')
115
+ end.to output(/repository 'empty' is empty/).to_stdout
116
+ end
117
+
118
+ it 'warns if a repository is archived' do
119
+ allow(Net::HTTP).to receive(:get).with('https://api.github.com/repos/opengeometadata/empty').and_return('{"archived": true}')
120
+ expect do
121
+ harvester.clone('outdated-institution')
122
+ end.to output(/repository 'outdated-institution' is archived/).to_stdout
123
+ end
124
+ end
125
+
126
+ describe '#clone_all' do
127
+ it 'can clone all repositories' do
128
+ harvester.clone_all
129
+ expect(Git).to have_received(:clone).exactly(2).times
130
+ end
131
+
132
+ it 'skips repositories in the denylist' do
133
+ harvester.clone_all
134
+ expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git')
135
+ end
136
+
137
+ it 'returns the count of repositories cloned' do
138
+ expect(harvester.clone_all).to eq(2)
139
+ end
140
+ end
141
+
142
+ describe '#ogm_api_uri' do
143
+ it 'includes a per_page param' do
144
+ expect(described_class.send('ogm_api_uri').to_s).to include('per_page')
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'geo_combine/indexer'
4
+ require 'spec_helper'
5
+
6
+ RSpec.describe GeoCombine::Indexer do
7
+ subject(:indexer) { described_class.new(solr: solr) }
8
+
9
+ let(:solr) { instance_double(RSolr::Client) }
10
+ let(:docs) do
11
+ [
12
+ [{ 'id' => '1' }, 'path/to/record1.json'], # v1.0 schema
13
+ [{ 'dc_identifier_s' => '2' }, 'path/to/record2.json'] # aardvark schema
14
+ ]
15
+ end
16
+
17
+ before do
18
+ allow(solr).to receive(:update)
19
+ allow(solr).to receive(:commit)
20
+ end
21
+
22
+ describe '#initialize' do
23
+ before do
24
+ stub_const('ENV', 'SOLR_URL' => 'http://localhost:8983/solr/geoblacklight')
25
+ allow(RSolr).to receive(:connect).and_return(solr)
26
+ end
27
+
28
+ it 'connects to a solr instance if set in the environment' do
29
+ described_class.new
30
+ expect(RSolr).to have_received(:connect).with(
31
+ url: 'http://localhost:8983/solr/geoblacklight',
32
+ adapter: :net_http_persistent
33
+ )
34
+ end
35
+ end
36
+
37
+ describe '#index' do
38
+ it 'posts each record to solr as JSON' do
39
+ indexer.index([docs[0]], commit_within: 1)
40
+ expect(solr).to have_received(:update).with(
41
+ params: { commitWithin: 1, overwrite: true },
42
+ data: [docs[0][0]].to_json,
43
+ headers: { 'Content-Type' => 'application/json' }
44
+ )
45
+ end
46
+
47
+ it 'prints the id and path of each record in debug mode' do
48
+ $DEBUG = true
49
+ expect { indexer.index([docs[0]]) }.to output("Indexing 1: path/to/record1.json\n").to_stdout
50
+ expect { indexer.index([docs[1]]) }.to output("Indexing 2: path/to/record2.json\n").to_stdout
51
+ $DEBUG = false
52
+ end
53
+
54
+ it 'commits changes to solr after indexing' do
55
+ indexer.index(docs)
56
+ expect(solr).to have_received(:commit).once
57
+ end
58
+
59
+ it 'returns the count of records successfully indexed' do
60
+ expect(indexer.index(docs)).to eq 2
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe GeoCombine::Migrators::V1AardvarkMigrator do
6
+ include JsonDocs
7
+
8
+ describe '#run' do
9
+ it 'migrates keys' do
10
+ input_hash = JSON.parse(full_geoblacklight)
11
+ # TODO: Note that this fixture has not yet been fully converted to
12
+ # aardvark. See https://github.com/OpenGeoMetadata/GeoCombine/issues/121
13
+ # for remaining work.
14
+ expected_output = JSON.parse(full_geoblacklight_aardvark)
15
+ expect(described_class.new(v1_hash: input_hash).run).to eq(expected_output)
16
+ end
17
+
18
+ context 'when the given record is already in aardvark schema' do
19
+ xit 'returns the record unchanged'
20
+ end
21
+ end
22
+ end
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,6 @@ SimpleCov.start 'rails' do
7
7
  add_filter 'lib/tasks/geo_combine.rake'
8
8
  add_filter 'lib/geo_combine/version.rb'
9
9
  add_filter 'lib/geo_combine/railtie.rb'
10
- add_filter 'lib/geo_combine/cli.rb'
11
10
  minimum_coverage 95 # When updating this value, update the README badge value
12
11
  end
13
12
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geo_combine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Reed
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-07 00:00:00.000000000 Z
11
+ date: 2023-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '2.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: git
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: bundler
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -237,8 +251,7 @@ dependencies:
237
251
  description: A Ruby toolkit for managing geospatial metadata
238
252
  email:
239
253
  - pjreed@stanford.edu
240
- executables:
241
- - geocombine
254
+ executables: []
242
255
  extensions: []
243
256
  extra_rdoc_files: []
244
257
  files:
@@ -251,12 +264,10 @@ files:
251
264
  - LICENSE.txt
252
265
  - README.md
253
266
  - Rakefile
254
- - bin/geocombine
255
267
  - geo_combine.gemspec
256
268
  - lib/geo_combine.rb
257
269
  - lib/geo_combine/bounding_box.rb
258
270
  - lib/geo_combine/ckan_metadata.rb
259
- - lib/geo_combine/cli.rb
260
271
  - lib/geo_combine/esri_open_data.rb
261
272
  - lib/geo_combine/exceptions.rb
262
273
  - lib/geo_combine/fgdc.rb
@@ -265,7 +276,10 @@ files:
265
276
  - lib/geo_combine/geo_blacklight_harvester.rb
266
277
  - lib/geo_combine/geoblacklight.rb
267
278
  - lib/geo_combine/geometry_types.rb
279
+ - lib/geo_combine/harvester.rb
280
+ - lib/geo_combine/indexer.rb
268
281
  - lib/geo_combine/iso19139.rb
282
+ - lib/geo_combine/migrators/v1_aardvark_migrator.rb
269
283
  - lib/geo_combine/ogp.rb
270
284
  - lib/geo_combine/railtie.rb
271
285
  - lib/geo_combine/subjects.rb
@@ -294,6 +308,7 @@ files:
294
308
  - spec/fixtures/docs/ckan.json
295
309
  - spec/fixtures/docs/esri_open_data.json
296
310
  - spec/fixtures/docs/full_geoblacklight.json
311
+ - spec/fixtures/docs/full_geoblacklight_aardvark.json
297
312
  - spec/fixtures/docs/geoblacklight_pre_v1.json
298
313
  - spec/fixtures/docs/ogp_harvard_line.json
299
314
  - spec/fixtures/docs/ogp_harvard_raster.json
@@ -304,6 +319,7 @@ files:
304
319
  - spec/fixtures/docs/simple_xslt.xsl
305
320
  - spec/fixtures/docs/stanford_iso.xml
306
321
  - spec/fixtures/docs/tufts_fgdc.xml
322
+ - spec/fixtures/indexing/aardvark.json
307
323
  - spec/fixtures/indexing/basic_geoblacklight.json
308
324
  - spec/fixtures/indexing/geoblacklight.json
309
325
  - spec/fixtures/indexing/layers.json
@@ -318,10 +334,12 @@ files:
318
334
  - spec/lib/geo_combine/formatting_spec.rb
319
335
  - spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
320
336
  - spec/lib/geo_combine/geoblacklight_spec.rb
337
+ - spec/lib/geo_combine/harvester_spec.rb
338
+ - spec/lib/geo_combine/indexer_spec.rb
321
339
  - spec/lib/geo_combine/iso19139_spec.rb
340
+ - spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
322
341
  - spec/lib/geo_combine/ogp_spec.rb
323
342
  - spec/lib/geo_combine_spec.rb
324
- - spec/lib/tasks/geo_combine_spec.rb
325
343
  - spec/spec_helper.rb
326
344
  - spec/support/fixtures.rb
327
345
  homepage: ''
@@ -343,7 +361,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
343
361
  - !ruby/object:Gem::Version
344
362
  version: '0'
345
363
  requirements: []
346
- rubygems_version: 3.1.6
364
+ rubygems_version: 3.3.7
347
365
  signing_key:
348
366
  specification_version: 4
349
367
  summary: A Ruby toolkit for managing geospatial metadata
@@ -354,6 +372,7 @@ test_files:
354
372
  - spec/fixtures/docs/ckan.json
355
373
  - spec/fixtures/docs/esri_open_data.json
356
374
  - spec/fixtures/docs/full_geoblacklight.json
375
+ - spec/fixtures/docs/full_geoblacklight_aardvark.json
357
376
  - spec/fixtures/docs/geoblacklight_pre_v1.json
358
377
  - spec/fixtures/docs/ogp_harvard_line.json
359
378
  - spec/fixtures/docs/ogp_harvard_raster.json
@@ -364,6 +383,7 @@ test_files:
364
383
  - spec/fixtures/docs/simple_xslt.xsl
365
384
  - spec/fixtures/docs/stanford_iso.xml
366
385
  - spec/fixtures/docs/tufts_fgdc.xml
386
+ - spec/fixtures/indexing/aardvark.json
367
387
  - spec/fixtures/indexing/basic_geoblacklight.json
368
388
  - spec/fixtures/indexing/geoblacklight.json
369
389
  - spec/fixtures/indexing/layers.json
@@ -378,9 +398,11 @@ test_files:
378
398
  - spec/lib/geo_combine/formatting_spec.rb
379
399
  - spec/lib/geo_combine/geo_blacklight_harvester_spec.rb
380
400
  - spec/lib/geo_combine/geoblacklight_spec.rb
401
+ - spec/lib/geo_combine/harvester_spec.rb
402
+ - spec/lib/geo_combine/indexer_spec.rb
381
403
  - spec/lib/geo_combine/iso19139_spec.rb
404
+ - spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb
382
405
  - spec/lib/geo_combine/ogp_spec.rb
383
406
  - spec/lib/geo_combine_spec.rb
384
- - spec/lib/tasks/geo_combine_spec.rb
385
407
  - spec/spec_helper.rb
386
408
  - spec/support/fixtures.rb
data/bin/geocombine DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'geo_combine/cli'
5
-
6
- GeoCombine::CLI.start(ARGV)
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require 'rake'
5
-
6
- root = Gem::Specification.find_by_name('geo_combine').gem_dir
7
- tasks = File.join(root, 'lib/tasks/*.rake')
8
- Dir.glob(tasks).each { |r| load r }
9
-
10
- module GeoCombine
11
- class CLI < Thor
12
- desc 'clone', 'Clone all OpenGeoMetadata repositories'
13
- def clone
14
- Rake::Task['geocombine:clone'].invoke
15
- end
16
-
17
- desc 'pull', '"git pull" OpenGeoMetadata repositories'
18
- def pull
19
- Rake::Task['geocombine:pull'].invoke
20
- end
21
-
22
- desc 'index', 'Index all of the GeoBlacklight documents'
23
- def index
24
- Rake::Task['geocombine:index'].invoke
25
- end
26
- end
27
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'spec_helper'
4
- require 'rake'
5
-
6
- describe 'geo_combine.rake' do
7
- before(:all) do
8
- load File.expand_path('../../../lib/tasks/geo_combine.rake', __dir__)
9
- Rake::Task.define_task(:environment)
10
- end
11
-
12
- before do
13
- allow(ENV).to receive(:[]).and_call_original
14
- allow(ENV).to receive(:[]).with('OGM_PATH').and_return(File.join(fixture_dir, 'indexing'))
15
- end
16
-
17
- describe 'geocombine:clone' do
18
- before do
19
- WebMock.disable_net_connect!
20
- end
21
-
22
- after do
23
- WebMock.allow_net_connect!
24
- end
25
-
26
- it 'does not clone repos on deny list' do
27
- stub_request(:get, 'https://api.github.com/orgs/opengeometadata/repos').to_return(status: 200, body: read_fixture('docs/repos.json'))
28
- allow(Kernel).to receive(:system)
29
- Rake::Task['geocombine:clone'].invoke
30
- expect(Kernel).to have_received(:system).exactly(21).times
31
- end
32
- end
33
-
34
- describe 'geocombine:index' do
35
- it 'only indexes .json files but not layers.json' do
36
- rsolr_mock = instance_double(RSolr::Client)
37
- allow(rsolr_mock).to receive(:update)
38
- allow(rsolr_mock).to receive(:commit)
39
- allow(RSolr).to receive(:connect).and_return(rsolr_mock)
40
- Rake::Task['geocombine:index'].invoke
41
- # We expect 2 files to index
42
- expect(rsolr_mock).to have_received(:update).exactly(2).times
43
- end
44
- end
45
- end