geo_combine 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +7 -16
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -1
  5. data/.rubocop_todo.yml +34 -36
  6. data/README.md +47 -22
  7. data/geo_combine.gemspec +2 -0
  8. data/lib/geo_combine/ckan_metadata.rb +5 -4
  9. data/lib/geo_combine/formatting.rb +1 -1
  10. data/lib/geo_combine/geo_blacklight_harvester.rb +17 -12
  11. data/lib/geo_combine/geoblacklight.rb +1 -1
  12. data/lib/geo_combine/harvester.rb +132 -0
  13. data/lib/geo_combine/indexer.rb +126 -0
  14. data/lib/geo_combine/logger.rb +16 -0
  15. data/lib/geo_combine/migrators/v1_aardvark_migrator.rb +118 -0
  16. data/lib/geo_combine/ogp.rb +1 -1
  17. data/lib/geo_combine/railtie.rb +1 -0
  18. data/lib/geo_combine/version.rb +1 -1
  19. data/lib/geo_combine.rb +3 -0
  20. data/lib/tasks/geo_combine.rake +10 -65
  21. data/spec/fixtures/docs/full_geoblacklight.json +8 -1
  22. data/spec/fixtures/docs/full_geoblacklight_aardvark.json +51 -0
  23. data/spec/fixtures/indexing/aardvark.json +57 -0
  24. data/spec/fixtures/json_docs.rb +6 -0
  25. data/spec/lib/geo_combine/bounding_box_spec.rb +1 -1
  26. data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +5 -4
  27. data/spec/lib/geo_combine/geoblacklight_spec.rb +3 -3
  28. data/spec/lib/geo_combine/harvester_spec.rb +133 -0
  29. data/spec/lib/geo_combine/indexer_spec.rb +134 -0
  30. data/spec/lib/geo_combine/migrators/v1_aardvark_migrator_spec.rb +46 -0
  31. data/spec/lib/geo_combine_spec.rb +20 -17
  32. data/spec/spec_helper.rb +1 -2
  33. metadata +46 -9
  34. data/bin/geocombine +0 -6
  35. data/lib/geo_combine/cli.rb +0 -27
  36. data/spec/lib/tasks/geo_combine_spec.rb +0 -45
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'find'
5
+ require 'git'
6
+ require 'net/http'
7
+ require 'geo_combine/logger'
8
+
9
+ module GeoCombine
10
+ # Harvests Geoblacklight documents from OpenGeoMetadata for indexing
11
+ class Harvester
12
+ attr_reader :ogm_path, :schema_version
13
+
14
+ # Non-metadata repositories that shouldn't be harvested
15
+ def self.denylist
16
+ [
17
+ 'GeoCombine',
18
+ 'aardvark',
19
+ 'metadata-issues',
20
+ 'ogm_utils-python',
21
+ 'opengeometadata.github.io',
22
+ 'opengeometadata-rails',
23
+ 'gbl-1_to_aardvark'
24
+ ]
25
+ end
26
+
27
+ # GitHub API endpoint for OpenGeoMetadata repositories
28
+ def self.ogm_api_uri
29
+ URI('https://api.github.com/orgs/opengeometadata/repos?per_page=1000')
30
+ end
31
+
32
+ def initialize(
33
+ ogm_path: ENV.fetch('OGM_PATH', 'tmp/opengeometadata'),
34
+ schema_version: ENV.fetch('SCHEMA_VERSION', '1.0'),
35
+ logger: GeoCombine::Logger.logger
36
+ )
37
+ @ogm_path = ogm_path
38
+ @schema_version = schema_version
39
+ @logger = logger
40
+ end
41
+
42
+ # Enumerable of docs to index, for passing to an indexer
43
+ def docs_to_index
44
+ return to_enum(:docs_to_index) unless block_given?
45
+
46
+ @logger.info "loading documents from #{ogm_path}"
47
+ Find.find(@ogm_path) do |path|
48
+ # skip non-json and layers.json files
49
+ if File.basename(path) == 'layers.json' || !File.basename(path).end_with?('.json')
50
+ @logger.debug "skipping #{path}; not a geoblacklight JSON document"
51
+ next
52
+ end
53
+
54
+ doc = JSON.parse(File.read(path))
55
+ [doc].flatten.each do |record|
56
+ # skip indexing if this record has a different schema version than what we want
57
+ record_schema = record['gbl_mdVersion_s'] || record['geoblacklight_version']
58
+ record_id = record['layer_slug_s'] || record['dc_identifier_s']
59
+ if record_schema != @schema_version
60
+ @logger.debug "skipping #{record_id}; schema version #{record_schema} doesn't match #{@schema_version}"
61
+ next
62
+ end
63
+
64
+ @logger.debug "found record #{record_id} at #{path}"
65
+ yield record, path
66
+ end
67
+ end
68
+ end
69
+
70
+ # Update a repository via git
71
+ # If the repository doesn't exist, clone it.
72
+ def pull(repo)
73
+ repo_path = File.join(@ogm_path, repo)
74
+ clone(repo) unless File.directory? repo_path
75
+
76
+ Git.open(repo_path).pull
77
+ @logger.info "updated #{repo}"
78
+ repo
79
+ end
80
+
81
+ # Update all repositories
82
+ # Return the names of repositories updated
83
+ def pull_all
84
+ updated = repositories.map(&method(:pull)).compact
85
+ @logger.info "updated #{updated.size} repositories"
86
+ updated
87
+ end
88
+
89
+ # Clone a repository via git
90
+ # If the repository already exists, skip it.
91
+ def clone(repo)
92
+ repo_path = File.join(@ogm_path, repo)
93
+ repo_info = repository_info(repo)
94
+ repo_url = "https://github.com/OpenGeoMetadata/#{repo}.git"
95
+
96
+ # Skip if exists; warn if archived or empty
97
+ if File.directory? repo_path
98
+ @logger.warn "skipping clone to #{repo_path}; directory exists"
99
+ return nil
100
+ end
101
+ @logger.warn "repository is archived: #{repo_url}" if repo_info['archived']
102
+ @logger.warn "repository is empty: #{repo_url}" if repo_info['size'].zero?
103
+
104
+ Git.clone(repo_url, nil, path: ogm_path, depth: 1)
105
+ @logger.info "cloned #{repo_url} to #{repo_path}"
106
+ repo
107
+ end
108
+
109
+ # Clone all repositories via git
110
+ # Return the names of repositories cloned.
111
+ def clone_all
112
+ cloned = repositories.map(&method(:clone)).compact
113
+ @logger.info "cloned #{cloned.size} repositories"
114
+ cloned
115
+ end
116
+
117
+ private
118
+
119
+ # List of repository names to harvest
120
+ def repositories
121
+ @repositories ||= JSON.parse(Net::HTTP.get(self.class.ogm_api_uri))
122
+ .filter { |repo| repo['size'].positive? }
123
+ .reject { |repo| repo['archived'] }
124
+ .map { |repo| repo['name'] }
125
+ .reject { |name| self.class.denylist.include? name }
126
+ end
127
+
128
+ def repository_info(repo_name)
129
+ JSON.parse(Net::HTTP.get(URI("https://api.github.com/repos/opengeometadata/#{repo_name}")))
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rsolr'
4
+ require 'faraday/retry'
5
+ require 'faraday/net_http_persistent'
6
+ require 'geo_combine/logger'
7
+
8
+ module GeoCombine
9
+ # Indexes Geoblacklight documents into Solr
10
+ class Indexer
11
+ attr_reader :solr
12
+
13
+ def initialize(solr: nil, logger: GeoCombine::Logger.logger)
14
+ @logger = logger
15
+ @batch_size = ENV.fetch('SOLR_BATCH_SIZE', 100).to_i
16
+
17
+ # If SOLR_URL is set, use it; if in a Geoblacklight app, use its solr core
18
+ solr_url = ENV.fetch('SOLR_URL', nil)
19
+ solr_url ||= Blacklight.default_index.connection.base_uri.to_s if defined? Blacklight
20
+
21
+ # If neither, warn and try to use local Blacklight default solr core
22
+ if solr_url.nil?
23
+ @logger.warn 'SOLR_URL not set; using Blacklight default'
24
+ solr_url = 'http://localhost:8983/solr/blacklight-core'
25
+ end
26
+
27
+ @solr = solr || RSolr.connect(client, url: solr_url)
28
+ end
29
+
30
+ # Index everything and return the number of docs successfully indexed
31
+ def index(docs)
32
+ # Track total indexed and time spent
33
+ @logger.info "indexing into #{solr_url}"
34
+ total_indexed = 0
35
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
36
+
37
+ # Index in batches; set batch size via BATCH_SIZE
38
+ batch = []
39
+ docs.each do |doc, path|
40
+ if batch.size < @batch_size
41
+ batch << [doc, path]
42
+ else
43
+ total_indexed += index_batch(batch)
44
+ batch = []
45
+ end
46
+ end
47
+ total_indexed += index_batch(batch) unless batch.empty?
48
+
49
+ # Issue a commit to make sure all documents are indexed
50
+ @solr.commit
51
+ end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
52
+ sec = end_time - start_time
53
+ @logger.info format('indexed %<total_indexed>d documents in %<sec>.2f seconds', total_indexed:, sec:)
54
+ total_indexed
55
+ end
56
+
57
+ # URL to the solr instance being used
58
+ def solr_url
59
+ @solr.options[:url]
60
+ end
61
+
62
+ private
63
+
64
+ # Index a batch of documents; if we fail, index them all individually
65
+ def index_batch(batch)
66
+ docs = batch.map(&:first)
67
+ @solr.update(data: batch_json(docs), params:, headers:)
68
+ @logger.debug "indexed batch (#{batch.size} docs)"
69
+ batch.size
70
+ rescue RSolr::Error::Http => e
71
+ @logger.error "error indexing batch (#{batch.size} docs): #{format_error(e)}"
72
+ @logger.warn 'retrying documents individually'
73
+ batch.map { |doc, path| index_single(doc, path) }.compact.size
74
+ end
75
+
76
+ # Index a single document; if it fails, log the error and continue
77
+ def index_single(doc, path)
78
+ @solr.add(doc, params:, headers:)
79
+ @logger.debug "indexed #{path}"
80
+ doc
81
+ rescue RSolr::Error::Http => e
82
+ @logger.error "error indexing #{path}: #{format_error(e)}"
83
+ nil
84
+ end
85
+
86
+ # Generate a JSON string to send to solr update API for a batch of documents
87
+ def batch_json(batch)
88
+ batch.map { |doc| "add: { doc: #{doc.to_json} }" }.join(",\n").prepend('{ ').concat(' }')
89
+ end
90
+
91
+ # Generate a friendly error message for logging including status code and message
92
+ def format_error(error)
93
+ code = error.response[:status]
94
+ status_info = "#{code} #{RSolr::Error::Http::STATUS_CODES[code.to_i]}"
95
+ error_info = parse_solr_error(error)
96
+ [status_info, error_info].compact.join(' - ')
97
+ end
98
+
99
+ # Extract the specific error message from a solr JSON error response, if any
100
+ def parse_solr_error(error)
101
+ JSON.parse(error.response[:body]).dig('error', 'msg')
102
+ rescue StandardError
103
+ nil
104
+ end
105
+
106
+ def headers
107
+ { 'Content-Type' => 'application/json' }
108
+ end
109
+
110
+ def params
111
+ { overwrite: true }
112
+ end
113
+
114
+ def client
115
+ @client ||= Faraday.new do |conn|
116
+ conn.request :retry, max: 3, interval: 1, backoff_factor: 2, exceptions: [
117
+ Faraday::TimeoutError,
118
+ Faraday::ConnectionFailed,
119
+ Faraday::TooManyRequestsError
120
+ ]
121
+ conn.response :raise_error
122
+ conn.adapter :net_http_persistent
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+
5
+ module GeoCombine
6
+ # Logger for gem
7
+ class Logger
8
+ def self.logger
9
+ @logger ||= ::Logger.new(
10
+ $stderr,
11
+ progname: 'GeoCombine',
12
+ level: ENV.fetch('LOG_LEVEL', 'info').to_sym
13
+ )
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support'
4
+
5
+ module GeoCombine
6
+ module Migrators
7
+ # migrates the v1 schema to the aardvark schema
8
+ class V1AardvarkMigrator
9
+ attr_reader :v1_hash
10
+
11
+ # @param v1_hash [Hash] parsed json in the v1 schema
12
+ # @param collection_id_map [Hash] a hash mapping collection names to ids for converting dct_isPartOf_sm
13
+ def initialize(v1_hash:, collection_id_map: {})
14
+ @v1_hash = v1_hash
15
+ @v2_hash = v1_hash
16
+ @collection_id_map = collection_id_map
17
+ end
18
+
19
+ def run
20
+ # Return unchanged if already in the aardvark schema
21
+ return @v2_hash if @v2_hash['gbl_mdVersion_s'] == 'Aardvark'
22
+
23
+ # Convert the record
24
+ convert_keys
25
+ convert_single_to_multi_valued_fields
26
+ convert_non_crosswalked_fields
27
+ remove_deprecated_fields
28
+
29
+ # Mark the record as converted and return it
30
+ @v2_hash['gbl_mdVersion_s'] = 'Aardvark'
31
+ @v2_hash
32
+ end
33
+
34
+ # Namespace and URI changes to fields
35
+ def convert_keys
36
+ @v2_hash.transform_keys! do |k|
37
+ SCHEMA_FIELD_MAP[k] || k
38
+ end
39
+ end
40
+
41
+ # Fields that need to be converted from single to multi-valued
42
+ def convert_single_to_multi_valued_fields
43
+ @v2_hash = @v2_hash.each_with_object({}) do |(k, v), h|
44
+ h[k] = if !v.is_a?(Array) && k.match?(/.*_[s|i]m/)
45
+ [v]
46
+ else
47
+ v
48
+ end
49
+ end
50
+ end
51
+
52
+ # Convert non-crosswalked fields via lookup tables
53
+ def convert_non_crosswalked_fields
54
+ # Keys may or may not include whitespace, so we normalize them.
55
+ # Resource class is required so we default to "Other"; resource type is not required.
56
+ @v2_hash['gbl_resourceClass_sm'] = RESOURCE_CLASS_MAP[@v1_hash['dc_type_s']&.gsub(/\s+/, '')] || ['Other']
57
+ resource_type = RESOURCE_TYPE_MAP[@v1_hash['layer_geom_type_s']&.gsub(/\s+/, '')]
58
+ @v2_hash['gbl_resourceType_sm'] = resource_type unless resource_type.nil?
59
+
60
+ # If the user specified a collection id map, use it to convert the collection names to ids
61
+ is_part_of = @v1_hash['dct_isPartOf_sm']&.map { |name| @collection_id_map[name] }&.compact
62
+ if is_part_of.present?
63
+ @v2_hash['dct_isPartOf_sm'] = is_part_of
64
+ else
65
+ @v2_hash.delete('dct_isPartOf_sm')
66
+ end
67
+ end
68
+
69
+ # Remove fields that are no longer used
70
+ def remove_deprecated_fields
71
+ @v2_hash = @v2_hash.except(*SCHEMA_FIELD_MAP.keys, 'dc_type_s', 'layer_geom_type_s')
72
+ end
73
+
74
+ SCHEMA_FIELD_MAP = {
75
+ 'dc_title_s' => 'dct_title_s', # new namespace
76
+ 'dc_description_s' => 'dct_description_sm', # new namespace; single to multi-valued
77
+ 'dc_language_s' => 'dct_language_sm', # new namespace; single to multi-valued
78
+ 'dc_language_sm' => 'dct_language_sm', # new namespace
79
+ 'dc_creator_sm' => 'dct_creator_sm', # new namespace
80
+ 'dc_publisher_s' => 'dct_publisher_sm', # new namespace; single to multi-valued
81
+ 'dct_provenance_s' => 'schema_provider_s', # new URI name
82
+ 'dc_subject_sm' => 'dct_subject_sm', # new namespace
83
+ 'solr_geom' => 'dcat_bbox', # new URI name
84
+ 'solr_year_i' => 'gbl_indexYear_im', # new URI name; single to multi-valued
85
+ 'dc_source_sm' => 'dct_source_sm', # new namespace
86
+ 'dc_rights_s' => 'dct_accessRights_s', # new URI name
87
+ 'dc_format_s' => 'dct_format_s', # new namespace
88
+ 'layer_id_s' => 'gbl_wxsIdentifier_s', # new URI name
89
+ 'layer_slug_s' => 'id', # new URI name
90
+ 'dc_identifier_s' => 'dct_identifier_sm', # new namespace; single to multi-valued
91
+ 'layer_modified_dt' => 'gbl_mdModified_dt', # new URI name
92
+ 'geoblacklight_version' => 'gbl_mdVersion_s', # new URI name
93
+ 'suppressed_b' => 'gbl_suppressed_b' # new namespace
94
+ }.freeze
95
+
96
+ # Map Dublin Core types to Aardvark resource class sets
97
+ # See: https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/ogm-aardvark/resource-class.md
98
+ RESOURCE_CLASS_MAP = {
99
+ 'Collection' => ['Collections'],
100
+ 'Dataset' => ['Datasets'],
101
+ 'Image' => ['Imagery'],
102
+ 'InteractiveResource' => ['Websites'],
103
+ 'Service' => ['Web services'],
104
+ 'StillImage' => ['Imagery']
105
+ }.freeze
106
+
107
+ # Map geometry types to Aardvark resource type sets
108
+ # See: https://github.com/OpenGeoMetadata/opengeometadata.github.io/blob/main/docs/ogm-aardvark/resource-type.md
109
+ RESOURCE_TYPE_MAP = {
110
+ 'Point' => ['Point data'],
111
+ 'Line' => ['Line data'],
112
+ 'Polygon' => ['Polygon data'],
113
+ 'Raster' => ['Raster data'],
114
+ 'Table' => ['Table data']
115
+ }.freeze
116
+ end
117
+ end
118
+ end
@@ -75,7 +75,7 @@ module GeoCombine
75
75
  dc_publisher_s: metadata['Publisher'],
76
76
  dc_subject_sm: subjects,
77
77
  dc_type_s: 'Dataset'
78
- }.delete_if { |_k, v| v.nil? }
78
+ }.compact
79
79
  end
80
80
 
81
81
  def date
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GeoCombine
4
+ # Railtie for rake tasks
4
5
  class Railtie < Rails::Railtie
5
6
  rake_tasks do
6
7
  load 'tasks/geo_combine.rake'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GeoCombine
4
- VERSION = '0.7.0'
4
+ VERSION = '0.9.0'
5
5
  end
data/lib/geo_combine.rb CHANGED
@@ -72,6 +72,9 @@ require 'geo_combine/ogp'
72
72
  # Require harvesting/indexing files
73
73
  require 'geo_combine/geo_blacklight_harvester'
74
74
 
75
+ # Migrators
76
+ require 'geo_combine/migrators/v1_aardvark_migrator'
77
+
75
78
  # Require gem files
76
79
  require 'geo_combine/version'
77
80
  require 'geo_combine/railtie' if defined?(Rails)
@@ -3,58 +3,29 @@
3
3
  require 'json'
4
4
  require 'rsolr'
5
5
  require 'find'
6
- require 'geo_combine/geo_blacklight_harvester'
7
6
  require 'faraday/net_http_persistent'
7
+ require 'geo_combine/harvester'
8
+ require 'geo_combine/indexer'
9
+ require 'geo_combine/geo_blacklight_harvester'
8
10
 
9
11
  namespace :geocombine do
10
12
  desc 'Clone OpenGeoMetadata repositories'
11
13
  task :clone, [:repo] do |_t, args|
12
- if args.repo
13
- ogm_repos = ["https://github.com/OpenGeoMetadata/#{args.repo}.git"]
14
- else
15
- ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos')
16
- ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map do |repo|
17
- repo['clone_url'] if (repo['size']).positive?
18
- end.compact
19
- ogm_repos.reject! { |repo| GeoCombineRake.denylist.include?(repo) }
20
- end
21
- ogm_repos.each do |repo|
22
- Kernel.system "echo #{repo} && mkdir -p #{GeoCombineRake.ogm_path} && cd #{GeoCombineRake.ogm_path} && git clone --depth 1 #{repo}"
23
- end
14
+ harvester = GeoCombine::Harvester.new
15
+ args[:repo] ? harvester.clone(args.repo) : harvester.clone_all
24
16
  end
25
17
 
26
18
  desc '"git pull" OpenGeoMetadata repositories'
27
19
  task :pull, [:repo] do |_t, args|
28
- paths = if args.repo
29
- [File.join(GeoCombineRake.ogm_path, args.repo)]
30
- else
31
- Dir.glob("#{GeoCombineRake.ogm_path}/*")
32
- end
33
- paths.each do |path|
34
- next unless File.directory?(path)
35
-
36
- Kernel.system "echo #{path} && cd #{path} && git pull origin"
37
- end
20
+ harvester = GeoCombine::Harvester.new
21
+ args[:repo] ? harvester.pull(args.repo) : harvester.pull_all
38
22
  end
39
23
 
40
24
  desc 'Index all JSON documents except Layers.json'
41
25
  task :index do
42
- puts "Indexing #{GeoCombineRake.ogm_path} into #{GeoCombineRake.solr_url}"
43
- solr = RSolr.connect url: GeoCombineRake.solr_url, adapter: :net_http_persistent
44
- Find.find(GeoCombineRake.ogm_path) do |path|
45
- next unless File.basename(path).include?('.json') && File.basename(path) != 'layers.json'
46
-
47
- doc = JSON.parse(File.read(path))
48
- [doc].flatten.each do |record|
49
- puts "Indexing #{record['layer_slug_s']}: #{path}" if $DEBUG
50
- solr.update params: { commitWithin: GeoCombineRake.commit_within, overwrite: true },
51
- data: [record].to_json,
52
- headers: { 'Content-Type' => 'application/json' }
53
- rescue RSolr::Error::Http => e
54
- puts e
55
- end
56
- end
57
- solr.commit
26
+ harvester = GeoCombine::Harvester.new
27
+ indexer = GeoCombine::Indexer.new
28
+ indexer.index(harvester.docs_to_index)
58
29
  end
59
30
 
60
31
  namespace :geoblacklight_harvester do
@@ -66,29 +37,3 @@ namespace :geocombine do
66
37
  end
67
38
  end
68
39
  end
69
-
70
- # Class to hold helper methods for use in GeoCombine rake tasks
71
- class GeoCombineRake
72
- def self.commit_within
73
- (ENV['SOLR_COMMIT_WITHIN'] || 5000).to_i
74
- end
75
-
76
- def self.denylist
77
- [
78
- 'https://github.com/OpenGeoMetadata/GeoCombine.git',
79
- 'https://github.com/OpenGeoMetadata/aardvark.git',
80
- 'https://github.com/OpenGeoMetadata/metadata-issues.git',
81
- 'https://github.com/OpenGeoMetadata/ogm_utils-python.git',
82
- 'https://github.com/OpenGeoMetadata/opengeometadata.github.io.git',
83
- 'https://github.com/OpenGeoMetadata/opengeometadata-rails.git'
84
- ]
85
- end
86
-
87
- def self.ogm_path
88
- ENV['OGM_PATH'] || 'tmp/opengeometadata'
89
- end
90
-
91
- def self.solr_url
92
- ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core'
93
- end
94
- end
@@ -28,6 +28,13 @@
28
28
  "dct_spatial_sm":[
29
29
  "Uganda"
30
30
  ],
31
+ "dct_isPartOf_sm":[
32
+ "Uganda GIS Maps and Data, 2000-2010"
33
+ ],
34
+ "dc_source_sm": [
35
+ "stanford-rb371kw9607"
36
+ ],
31
37
  "solr_geom":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)",
32
- "solr_year_i":2005
38
+ "solr_year_i":2005,
39
+ "suppressed_b":false
33
40
  }
@@ -0,0 +1,51 @@
1
+ {
2
+ "gbl_mdVersion_s":"Aardvark",
3
+ "dct_identifier_sm":[
4
+ "http://purl.stanford.edu/cz128vq0535"
5
+ ],
6
+ "dct_title_s":"2005 Rural Poverty GIS Database: Uganda",
7
+ "dct_description_sm":[
8
+ "This polygon shapefile contains 2005 poverty data for 855 rural subcounties in Uganda. These data are intended for researchers, students, policy makers and the general public for reference and mapping purposes, and may be used for basic applications such as viewing, querying, and map output production."
9
+ ],
10
+ "dct_accessRights_s":"Public",
11
+ "schema_provider_s":"Stanford",
12
+ "dct_references_s":"{\"http://schema.org/url\":\"http://purl.stanford.edu/cz128vq0535\",\"http://schema.org/downloadUrl\":\"http://stacks.stanford.edu/file/druid:cz128vq0535/data.zip\",\"http://www.loc.gov/mods/v3\":\"http://purl.stanford.edu/cz128vq0535.mods\",\"http://www.isotc211.org/schemas/2005/gmd/\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/iso19139.xml\",\"http://www.w3.org/1999/xhtml\":\"http://opengeometadata.stanford.edu/metadata/edu.stanford.purl/druid:cz128vq0535/default.html\",\"http://www.opengis.net/def/serviceType/ogc/wfs\":\"https://geowebservices.stanford.edu/geoserver/wfs\",\"http://www.opengis.net/def/serviceType/ogc/wms\":\"https://geowebservices.stanford.edu/geoserver/wms\"}",
13
+ "gbl_wxsIdentifier_s":"druid:cz128vq0535",
14
+ "id":"stanford-cz128vq0535",
15
+ "gbl_resourceType_sm": [
16
+ "Polygon data"
17
+ ],
18
+ "gbl_mdModified_dt":"2015-01-13T18:46:38Z",
19
+ "dct_format_s":"Shapefile",
20
+ "dct_language_sm":[
21
+ "English"
22
+ ],
23
+ "gbl_resourceClass_sm":[
24
+ "Datasets"
25
+ ],
26
+ "dct_publisher_sm":[
27
+ "Uganda Bureau of Statistics"
28
+ ],
29
+ "dct_creator_sm":[
30
+ "Uganda Bureau of Statistics"
31
+ ],
32
+ "dct_subject_sm":[
33
+ "Poverty",
34
+ "Statistics"
35
+ ],
36
+ "dct_issued_s":"2005",
37
+ "dct_temporal_sm":[
38
+ "2005"
39
+ ],
40
+ "dct_spatial_sm":[
41
+ "Uganda"
42
+ ],
43
+ "dct_source_sm": [
44
+ "stanford-rb371kw9607"
45
+ ],
46
+ "dcat_bbox":"ENVELOPE(29.572742, 35.000308, 4.234077, -1.478794)",
47
+ "gbl_indexYear_im":[
48
+ 2005
49
+ ],
50
+ "gbl_suppressed_b":false
51
+ }
@@ -0,0 +1,57 @@
1
+ {
2
+ "dct_title_s": "A description of the coast & city of Goa.",
3
+ "dct_alternative_sm": [
4
+ "A description of the coast & city of Goa."
5
+ ],
6
+ "dct_description_sm": [
7
+ "Photocopy. Some relief shown pictorially. North oriented to the left. \"The city of Goa & all its dependencies doth justly belong to the crown of England by the mariage [sic] of King Charles the Second with Queen Catherine\"--upper right. \"21\"--upper right. Outside of original margin is image of British Museum rule and \"7-Tab-125.\" Dimensions: 51 x 61 centimeters"
8
+ ],
9
+ "dct_language_sm": [
10
+ "eng"
11
+ ],
12
+ "dct_publisher_sm": [
13
+ "British Museum? (London?)"
14
+ ],
15
+ "schema_provider_s": "University of Minnesota",
16
+ "gbl_resourceClass_sm": [
17
+ "Maps"
18
+ ],
19
+ "dcat_keyword_sm": [
20
+ "Velha Goa (India) Maps",
21
+ "Maps"
22
+ ],
23
+ "dct_temporal_sm": [
24
+ "1900-1999"
25
+ ],
26
+ "dct_issued_s": "1900 - 1999?",
27
+ "gbl_indexYear_im": [
28
+ "1900"
29
+ ],
30
+ "gbl_dateRange_drsim": [
31
+ "[1900 TO 1999]"
32
+ ],
33
+ "dct_spatial_sm": [
34
+ "India"
35
+ ],
36
+ "locn_geometry": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
37
+ "dcat_bbox": "ENVELOPE(-2.36,-2.06,11.73,11.5101)",
38
+ "dcat_centroid": "11.620049999999999,-2.21",
39
+ "pcdm_memberOf_sm": [
40
+ "64bd8c4c-8e60-4956-b43d-bdc3f93db488"
41
+ ],
42
+ "dct_isPartOf_sm": [
43
+ "05d-01"
44
+ ],
45
+ "dct_rights_sm": [
46
+ "Use of this item may be governed by US and international copyright laws. You may be able to use this item, but copyright and other considerations may apply. For possible additional information or guidance on your use, please contact the contributing organization."
47
+ ],
48
+ "dct_accessRights_s": "Public",
49
+ "dct_format_s": "JPEG",
50
+ "dct_references_s": "{\"http://schema.org/downloadUrl\":\"http://cdm16022.contentdm.oclc.org/utils/getfile/collection/p16022coll205/id/236/filename/print/page/download/fparams/forcedownload\",\"http://schema.org/url\":\"https://umedia.lib.umn.edu/item/p16022coll205:236\",\"http://iiif.io/api/presentation#manifest\":\"https://cdm16022.contentdm.oclc.org/iiif/info/p16022coll205/236/manifest.json\"}",
51
+ "id": "p16022coll205:236",
52
+ "dct_identifier_sm": [
53
+ "UMN_ALMA:9949551790001701"
54
+ ],
55
+ "gbl_mdModified_dt": "2022-04-01T15:27:13Z",
56
+ "gbl_mdVersion_s": "Aardvark"
57
+ }
@@ -15,6 +15,12 @@ module JsonDocs
15
15
  File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight.json'))
16
16
  end
17
17
 
18
+ ##
19
+ # full_geoblacklight fixture converted to the aardvark schema
20
+ def full_geoblacklight_aardvark
21
+ File.read(File.join(File.dirname(__FILE__), './docs/full_geoblacklight_aardvark.json'))
22
+ end
23
+
18
24
  ##
19
25
  # A sample Esri OpenData metadata record
20
26
  def esri_opendata_metadata
@@ -15,7 +15,7 @@ RSpec.describe GeoCombine::BoundingBox do
15
15
 
16
16
  describe '#valid?' do
17
17
  context 'when valid' do
18
- it { valid.valid? }
18
+ it { expect(valid.valid?).to be true }
19
19
  end
20
20
 
21
21
  context 'when south > north' do