relaton-ogc 1.14.2 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -0
- data/.github/workflows/release.yml +3 -2
- data/Gemfile +3 -0
- data/lib/relaton_ogc/data_fetcher.rb +43 -22
- data/lib/relaton_ogc/hit_collection.rb +15 -62
- data/lib/relaton_ogc/processor.rb +7 -0
- data/lib/relaton_ogc/scrapper.rb +4 -2
- data/lib/relaton_ogc/version.rb +1 -1
- data/lib/relaton_ogc.rb +1 -0
- data/relaton_ogc.gemspec +1 -4
- metadata +9 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df9aa5c8876eed257c8bba8f85d6f59e917f5239c17a8937f233209c2318a6cc
|
4
|
+
data.tar.gz: 829cb72c24ba23c1762fb90f51f683fb889eaa8cb6a2b318de02932b5c4b07cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8fcb76f8709b72a4e50f9a163d9e7234c08dd67690cf38b72395047f23347be69feff9c460392b95554889a9fdff40b1b737de892bb489f4632931830792d6c1
|
7
|
+
data.tar.gz: 6b4ebf6cb8538a543db51ca9dca4dba2fab47287229a2ce864516d063c6f634e653a6b25f8fe5817bf74301c4657d66efb1fd7631e56ea8ef5f48d45803e72b4
|
data/.github/workflows/rake.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module RelatonOgc
|
2
4
|
class DataFetcher
|
3
5
|
module Utils
|
4
|
-
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"
|
5
|
-
"NamingAuthority/master/incubation/bibliography/"\
|
6
|
-
"bibliography.json".freeze
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/definitions/docs/docs.json"
|
7
7
|
|
8
8
|
def get_data # rubocop:disable Metrics/AbcSize
|
9
9
|
h = {}
|
@@ -43,45 +43,54 @@ module RelatonOgc
|
|
43
43
|
# Create DataFetcher instance
|
44
44
|
#
|
45
45
|
# @param [String] output directory to save the documents
|
46
|
-
# @param [String] format output format "yaml" or "
|
46
|
+
# @param [String] format output format "yaml", "xml", or "bibxml"
|
47
47
|
#
|
48
48
|
def initialize(output, format)
|
49
49
|
@output = output
|
50
50
|
@etagfile = File.join output, "etag.txt"
|
51
51
|
@format = format
|
52
|
+
@ext = format.sub "bib", ""
|
52
53
|
@docids = []
|
53
|
-
@dupids =
|
54
|
+
@dupids = Set.new
|
55
|
+
end
|
56
|
+
|
57
|
+
def index
|
58
|
+
@index ||= Relaton::Index.find_or_create :ogc, file: "index-v1.yaml"
|
54
59
|
end
|
55
60
|
|
56
61
|
def self.fetch(output: "data", format: "yaml")
|
57
62
|
t1 = Time.now
|
58
63
|
puts "Started at: #{t1}"
|
59
|
-
FileUtils.mkdir_p output
|
64
|
+
FileUtils.mkdir_p output
|
60
65
|
new(output, format).fetch
|
61
66
|
t2 = Time.now
|
62
67
|
puts "Stopped at: #{t2}"
|
63
68
|
puts "Done in: #{(t2 - t1).round} sec."
|
64
69
|
end
|
65
70
|
|
66
|
-
def fetch
|
71
|
+
def fetch
|
67
72
|
get_data do |etag, json|
|
68
73
|
no_errors = true
|
69
|
-
json.each
|
70
|
-
|
71
|
-
|
72
|
-
bib = Scrapper.parse_page hit
|
73
|
-
write_document bib
|
74
|
-
rescue StandardError => e
|
75
|
-
no_errors = false
|
76
|
-
warn "Fetching document: #{hit['identifier']}"
|
77
|
-
warn "#{e.class} #{e.message}"
|
78
|
-
warn e.backtrace
|
79
|
-
end
|
80
|
-
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
|
74
|
+
json.each { |_, hit| fetch_doc(hit) || no_errors = false }
|
75
|
+
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.to_a.join(', ')}" if @dupids.any?
|
81
76
|
self.etag = etag if no_errors
|
77
|
+
index.save
|
82
78
|
end
|
83
79
|
end
|
84
80
|
|
81
|
+
def fetch_doc(hit)
|
82
|
+
return if hit["type"] == "CC"
|
83
|
+
|
84
|
+
bib = Scrapper.parse_page hit
|
85
|
+
write_document bib
|
86
|
+
true
|
87
|
+
rescue StandardError => e
|
88
|
+
warn "Fetching document: #{hit['identifier']}"
|
89
|
+
warn "#{e.class} #{e.message}"
|
90
|
+
warn e.backtrace
|
91
|
+
false
|
92
|
+
end
|
93
|
+
|
85
94
|
def write_document(bib) # rubocop:disable Metrics/AbcSize
|
86
95
|
if @docids.include?(bib.docidentifier[0].id)
|
87
96
|
@dupids << bib.docidentifier[0].id
|
@@ -89,10 +98,22 @@ module RelatonOgc
|
|
89
98
|
end
|
90
99
|
|
91
100
|
@docids << bib.docidentifier[0].id
|
101
|
+
file = file_name bib
|
102
|
+
index.add_or_update bib.docidentifier[0].id, file
|
103
|
+
File.write file, content(bib), encoding: "UTF-8"
|
104
|
+
end
|
105
|
+
|
106
|
+
def file_name(bib)
|
92
107
|
name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
|
93
|
-
|
94
|
-
|
95
|
-
|
108
|
+
"#{@output}/#{name}.#{@ext}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def content(bib)
|
112
|
+
case @format
|
113
|
+
when "xml" then bib.to_xml bibdata: true
|
114
|
+
when "yaml" then bib.to_hash.to_yaml
|
115
|
+
when "bibxml" then bib.to_bibxml
|
116
|
+
end
|
96
117
|
end
|
97
118
|
end
|
98
119
|
end
|
@@ -4,76 +4,29 @@ require "fileutils"
|
|
4
4
|
|
5
5
|
module RelatonOgc
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
7
|
-
|
8
|
-
|
9
|
-
# ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
10
|
-
# "NamingAuthority/master/incubation/bibliography/"\
|
11
|
-
# "bibliography.json".freeze
|
12
|
-
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ogc/main/data/".freeze
|
13
|
-
# DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
14
|
-
# DATAFILE = File.expand_path "bibliography.json", DATADIR
|
15
|
-
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
7
|
+
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ogc/main/".freeze
|
8
|
+
INDEX_FILE = "index-v1.yaml".freeze
|
16
9
|
|
17
10
|
# @param code [Strig]
|
18
11
|
# @param year [String]
|
19
12
|
# @param opts [Hash]
|
20
13
|
def initialize(code, year = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
21
14
|
super
|
22
|
-
|
23
|
-
|
24
|
-
# hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
25
|
-
# rescue ArgumentError
|
26
|
-
# Date.parse "0000-01-01"
|
27
|
-
# end.reverse
|
28
|
-
url = "#{ENDPOINT}#{code.upcase.gsub(/[\s:.]/, '_')}.yaml"
|
29
|
-
resp = Faraday.get url do |req|
|
30
|
-
req.options.timeout = 10
|
31
|
-
end
|
32
|
-
@array = case resp.status
|
33
|
-
when 200
|
34
|
-
hash = YAML.safe_load(resp.body)
|
35
|
-
hash["fetched"] = Date.today.to_s
|
36
|
-
bib = OgcBibliographicItem.from_hash hash
|
37
|
-
[Hit.new(bib, self)]
|
38
|
-
else []
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# private
|
15
|
+
@array = []
|
16
|
+
return if code.nil? || code.empty?
|
43
17
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
# @param docid [String]
|
48
|
-
# def from_json(docid, **_opts)
|
49
|
-
# ref = docid.sub(/^OGC\s/, "").strip
|
50
|
-
# return [] if ref.empty?
|
18
|
+
index = Relaton::Index.find_or_create :ogc, url: "#{ENDPOINT}index-v1.zip", file: INDEX_FILE
|
19
|
+
row = index.search(code).min_by { |r| r[:id] }
|
20
|
+
return unless row
|
51
21
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
# end
|
22
|
+
url = "#{ENDPOINT}#{row[:file]}"
|
23
|
+
resp = Faraday.get(url) { |req| req.options.timeout = 10 }
|
24
|
+
return unless resp.status == 200
|
56
25
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
63
|
-
# fetch_data if !ctime || ctime.to_date < Date.today
|
64
|
-
# @data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
|
65
|
-
# end
|
66
|
-
|
67
|
-
#
|
68
|
-
# fetch data form server and save it to file.
|
69
|
-
#
|
70
|
-
# def fetch_data
|
71
|
-
# json = get_data
|
72
|
-
# return unless json
|
73
|
-
|
74
|
-
# FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
75
|
-
# @data = json
|
76
|
-
# File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
77
|
-
# end
|
26
|
+
hash = YAML.safe_load(resp.body)
|
27
|
+
hash["fetched"] = Date.today.to_s
|
28
|
+
bib = OgcBibliographicItem.from_hash hash
|
29
|
+
@array = [Hit.new(bib, self)]
|
30
|
+
end
|
78
31
|
end
|
79
32
|
end
|
@@ -47,5 +47,12 @@ module RelatonOgc
|
|
47
47
|
def grammar_hash
|
48
48
|
@grammar_hash ||= ::RelatonOgc.grammar_hash
|
49
49
|
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Remove index file
|
53
|
+
#
|
54
|
+
def remove_index_file
|
55
|
+
Relaton::Index.find_or_create(:ogc, url: true, file: HitCollection::INDEX_FILE).remove_file
|
56
|
+
end
|
50
57
|
end
|
51
58
|
end
|
data/lib/relaton_ogc/scrapper.rb
CHANGED
@@ -85,7 +85,7 @@ module RelatonOgc
|
|
85
85
|
end
|
86
86
|
|
87
87
|
# @param stage [String]
|
88
|
-
# @return [RelatonBib::DocumentStatus,
|
88
|
+
# @return [RelatonBib::DocumentStatus, nil]
|
89
89
|
def fetch_status(stage)
|
90
90
|
stage && RelatonBib::DocumentStatus.new(stage: stage)
|
91
91
|
end
|
@@ -134,12 +134,14 @@ module RelatonOgc
|
|
134
134
|
)
|
135
135
|
end
|
136
136
|
|
137
|
-
# @param date [String]
|
137
|
+
# @param date [String, nil]
|
138
138
|
# @return [Array<RelatonBib::BibliographicDate>]
|
139
139
|
def fetch_date(date)
|
140
140
|
return [] unless date
|
141
141
|
|
142
142
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
143
|
+
rescue Date::Error
|
144
|
+
[]
|
143
145
|
end
|
144
146
|
end
|
145
147
|
end
|
data/lib/relaton_ogc/version.rb
CHANGED
data/lib/relaton_ogc.rb
CHANGED
data/relaton_ogc.gemspec
CHANGED
@@ -25,10 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.require_paths = ["lib"]
|
26
26
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
27
27
|
|
28
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
29
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
30
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
31
|
-
|
32
28
|
spec.add_dependency "faraday", "~> 2.7.0"
|
29
|
+
spec.add_dependency "relaton-index", "~> 0.2.0"
|
33
30
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
34
31
|
end
|
metadata
CHANGED
@@ -1,71 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-ogc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '13.0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '13.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rspec
|
14
|
+
name: faraday
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
19
|
+
version: 2.7.0
|
20
|
+
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
26
|
+
version: 2.7.0
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
28
|
+
name: relaton-index
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version: 2.
|
33
|
+
version: 0.2.0
|
62
34
|
type: :runtime
|
63
35
|
prerelease: false
|
64
36
|
version_requirements: !ruby/object:Gem::Requirement
|
65
37
|
requirements:
|
66
38
|
- - "~>"
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version: 2.
|
40
|
+
version: 0.2.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
42
|
name: relaton-iso-bib
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|