relaton-ogc 1.14.2 → 1.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -0
- data/.github/workflows/release.yml +3 -2
- data/Gemfile +3 -0
- data/lib/relaton_ogc/data_fetcher.rb +43 -22
- data/lib/relaton_ogc/hit_collection.rb +15 -62
- data/lib/relaton_ogc/processor.rb +7 -0
- data/lib/relaton_ogc/scrapper.rb +4 -2
- data/lib/relaton_ogc/version.rb +1 -1
- data/lib/relaton_ogc.rb +1 -0
- data/relaton_ogc.gemspec +1 -4
- metadata +9 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df9aa5c8876eed257c8bba8f85d6f59e917f5239c17a8937f233209c2318a6cc
|
4
|
+
data.tar.gz: 829cb72c24ba23c1762fb90f51f683fb889eaa8cb6a2b318de02932b5c4b07cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8fcb76f8709b72a4e50f9a163d9e7234c08dd67690cf38b72395047f23347be69feff9c460392b95554889a9fdff40b1b737de892bb489f4632931830792d6c1
|
7
|
+
data.tar.gz: 6b4ebf6cb8538a543db51ca9dca4dba2fab47287229a2ce864516d063c6f634e653a6b25f8fe5817bf74301c4657d66efb1fd7631e56ea8ef5f48d45803e72b4
|
data/.github/workflows/rake.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module RelatonOgc
|
2
4
|
class DataFetcher
|
3
5
|
module Utils
|
4
|
-
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"
|
5
|
-
"NamingAuthority/master/incubation/bibliography/"\
|
6
|
-
"bibliography.json".freeze
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/definitions/docs/docs.json"
|
7
7
|
|
8
8
|
def get_data # rubocop:disable Metrics/AbcSize
|
9
9
|
h = {}
|
@@ -43,45 +43,54 @@ module RelatonOgc
|
|
43
43
|
# Create DataFetcher instance
|
44
44
|
#
|
45
45
|
# @param [String] output directory to save the documents
|
46
|
-
# @param [String] format output format "yaml" or "
|
46
|
+
# @param [String] format output format "yaml", "xml", or "bibxml"
|
47
47
|
#
|
48
48
|
def initialize(output, format)
|
49
49
|
@output = output
|
50
50
|
@etagfile = File.join output, "etag.txt"
|
51
51
|
@format = format
|
52
|
+
@ext = format.sub "bib", ""
|
52
53
|
@docids = []
|
53
|
-
@dupids =
|
54
|
+
@dupids = Set.new
|
55
|
+
end
|
56
|
+
|
57
|
+
def index
|
58
|
+
@index ||= Relaton::Index.find_or_create :ogc, file: "index-v1.yaml"
|
54
59
|
end
|
55
60
|
|
56
61
|
def self.fetch(output: "data", format: "yaml")
|
57
62
|
t1 = Time.now
|
58
63
|
puts "Started at: #{t1}"
|
59
|
-
FileUtils.mkdir_p output
|
64
|
+
FileUtils.mkdir_p output
|
60
65
|
new(output, format).fetch
|
61
66
|
t2 = Time.now
|
62
67
|
puts "Stopped at: #{t2}"
|
63
68
|
puts "Done in: #{(t2 - t1).round} sec."
|
64
69
|
end
|
65
70
|
|
66
|
-
def fetch
|
71
|
+
def fetch
|
67
72
|
get_data do |etag, json|
|
68
73
|
no_errors = true
|
69
|
-
json.each
|
70
|
-
|
71
|
-
|
72
|
-
bib = Scrapper.parse_page hit
|
73
|
-
write_document bib
|
74
|
-
rescue StandardError => e
|
75
|
-
no_errors = false
|
76
|
-
warn "Fetching document: #{hit['identifier']}"
|
77
|
-
warn "#{e.class} #{e.message}"
|
78
|
-
warn e.backtrace
|
79
|
-
end
|
80
|
-
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
|
74
|
+
json.each { |_, hit| fetch_doc(hit) || no_errors = false }
|
75
|
+
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.to_a.join(', ')}" if @dupids.any?
|
81
76
|
self.etag = etag if no_errors
|
77
|
+
index.save
|
82
78
|
end
|
83
79
|
end
|
84
80
|
|
81
|
+
def fetch_doc(hit)
|
82
|
+
return if hit["type"] == "CC"
|
83
|
+
|
84
|
+
bib = Scrapper.parse_page hit
|
85
|
+
write_document bib
|
86
|
+
true
|
87
|
+
rescue StandardError => e
|
88
|
+
warn "Fetching document: #{hit['identifier']}"
|
89
|
+
warn "#{e.class} #{e.message}"
|
90
|
+
warn e.backtrace
|
91
|
+
false
|
92
|
+
end
|
93
|
+
|
85
94
|
def write_document(bib) # rubocop:disable Metrics/AbcSize
|
86
95
|
if @docids.include?(bib.docidentifier[0].id)
|
87
96
|
@dupids << bib.docidentifier[0].id
|
@@ -89,10 +98,22 @@ module RelatonOgc
|
|
89
98
|
end
|
90
99
|
|
91
100
|
@docids << bib.docidentifier[0].id
|
101
|
+
file = file_name bib
|
102
|
+
index.add_or_update bib.docidentifier[0].id, file
|
103
|
+
File.write file, content(bib), encoding: "UTF-8"
|
104
|
+
end
|
105
|
+
|
106
|
+
def file_name(bib)
|
92
107
|
name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
|
93
|
-
|
94
|
-
|
95
|
-
|
108
|
+
"#{@output}/#{name}.#{@ext}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def content(bib)
|
112
|
+
case @format
|
113
|
+
when "xml" then bib.to_xml bibdata: true
|
114
|
+
when "yaml" then bib.to_hash.to_yaml
|
115
|
+
when "bibxml" then bib.to_bibxml
|
116
|
+
end
|
96
117
|
end
|
97
118
|
end
|
98
119
|
end
|
@@ -4,76 +4,29 @@ require "fileutils"
|
|
4
4
|
|
5
5
|
module RelatonOgc
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
7
|
-
|
8
|
-
|
9
|
-
# ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
10
|
-
# "NamingAuthority/master/incubation/bibliography/"\
|
11
|
-
# "bibliography.json".freeze
|
12
|
-
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ogc/main/data/".freeze
|
13
|
-
# DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
14
|
-
# DATAFILE = File.expand_path "bibliography.json", DATADIR
|
15
|
-
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
7
|
+
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ogc/main/".freeze
|
8
|
+
INDEX_FILE = "index-v1.yaml".freeze
|
16
9
|
|
17
10
|
# @param code [Strig]
|
18
11
|
# @param year [String]
|
19
12
|
# @param opts [Hash]
|
20
13
|
def initialize(code, year = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
21
14
|
super
|
22
|
-
|
23
|
-
|
24
|
-
# hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
25
|
-
# rescue ArgumentError
|
26
|
-
# Date.parse "0000-01-01"
|
27
|
-
# end.reverse
|
28
|
-
url = "#{ENDPOINT}#{code.upcase.gsub(/[\s:.]/, '_')}.yaml"
|
29
|
-
resp = Faraday.get url do |req|
|
30
|
-
req.options.timeout = 10
|
31
|
-
end
|
32
|
-
@array = case resp.status
|
33
|
-
when 200
|
34
|
-
hash = YAML.safe_load(resp.body)
|
35
|
-
hash["fetched"] = Date.today.to_s
|
36
|
-
bib = OgcBibliographicItem.from_hash hash
|
37
|
-
[Hit.new(bib, self)]
|
38
|
-
else []
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# private
|
15
|
+
@array = []
|
16
|
+
return if code.nil? || code.empty?
|
43
17
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
# @param docid [String]
|
48
|
-
# def from_json(docid, **_opts)
|
49
|
-
# ref = docid.sub(/^OGC\s/, "").strip
|
50
|
-
# return [] if ref.empty?
|
18
|
+
index = Relaton::Index.find_or_create :ogc, url: "#{ENDPOINT}index-v1.zip", file: INDEX_FILE
|
19
|
+
row = index.search(code).min_by { |r| r[:id] }
|
20
|
+
return unless row
|
51
21
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
# end
|
22
|
+
url = "#{ENDPOINT}#{row[:file]}"
|
23
|
+
resp = Faraday.get(url) { |req| req.options.timeout = 10 }
|
24
|
+
return unless resp.status == 200
|
56
25
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
63
|
-
# fetch_data if !ctime || ctime.to_date < Date.today
|
64
|
-
# @data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
|
65
|
-
# end
|
66
|
-
|
67
|
-
#
|
68
|
-
# fetch data form server and save it to file.
|
69
|
-
#
|
70
|
-
# def fetch_data
|
71
|
-
# json = get_data
|
72
|
-
# return unless json
|
73
|
-
|
74
|
-
# FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
75
|
-
# @data = json
|
76
|
-
# File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
77
|
-
# end
|
26
|
+
hash = YAML.safe_load(resp.body)
|
27
|
+
hash["fetched"] = Date.today.to_s
|
28
|
+
bib = OgcBibliographicItem.from_hash hash
|
29
|
+
@array = [Hit.new(bib, self)]
|
30
|
+
end
|
78
31
|
end
|
79
32
|
end
|
@@ -47,5 +47,12 @@ module RelatonOgc
|
|
47
47
|
def grammar_hash
|
48
48
|
@grammar_hash ||= ::RelatonOgc.grammar_hash
|
49
49
|
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Remove index file
|
53
|
+
#
|
54
|
+
def remove_index_file
|
55
|
+
Relaton::Index.find_or_create(:ogc, url: true, file: HitCollection::INDEX_FILE).remove_file
|
56
|
+
end
|
50
57
|
end
|
51
58
|
end
|
data/lib/relaton_ogc/scrapper.rb
CHANGED
@@ -85,7 +85,7 @@ module RelatonOgc
|
|
85
85
|
end
|
86
86
|
|
87
87
|
# @param stage [String]
|
88
|
-
# @return [RelatonBib::DocumentStatus,
|
88
|
+
# @return [RelatonBib::DocumentStatus, nil]
|
89
89
|
def fetch_status(stage)
|
90
90
|
stage && RelatonBib::DocumentStatus.new(stage: stage)
|
91
91
|
end
|
@@ -134,12 +134,14 @@ module RelatonOgc
|
|
134
134
|
)
|
135
135
|
end
|
136
136
|
|
137
|
-
# @param date [String]
|
137
|
+
# @param date [String, nil]
|
138
138
|
# @return [Array<RelatonBib::BibliographicDate>]
|
139
139
|
def fetch_date(date)
|
140
140
|
return [] unless date
|
141
141
|
|
142
142
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
143
|
+
rescue Date::Error
|
144
|
+
[]
|
143
145
|
end
|
144
146
|
end
|
145
147
|
end
|
data/lib/relaton_ogc/version.rb
CHANGED
data/lib/relaton_ogc.rb
CHANGED
data/relaton_ogc.gemspec
CHANGED
@@ -25,10 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.require_paths = ["lib"]
|
26
26
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
27
27
|
|
28
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
30
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
31
|
-
|
32
28
|
spec.add_dependency "faraday", "~> 2.7.0"
|
29
|
+
spec.add_dependency "relaton-index", "~> 0.2.0"
|
33
30
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
34
31
|
end
|
metadata
CHANGED
@@ -1,71 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ogc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '13.0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '13.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rspec
|
14
|
+
name: faraday
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
19
|
+
version: 2.7.0
|
20
|
+
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
26
|
+
version: 2.7.0
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
28
|
+
name: relaton-index
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version: 2.
|
33
|
+
version: 0.2.0
|
62
34
|
type: :runtime
|
63
35
|
prerelease: false
|
64
36
|
version_requirements: !ruby/object:Gem::Requirement
|
65
37
|
requirements:
|
66
38
|
- - "~>"
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version: 2.
|
40
|
+
version: 0.2.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
42
|
name: relaton-iso-bib
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|