relaton-ogc 1.9.1 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +19 -0
- data/lib/relaton_ogc/data_fetcher.rb +96 -0
- data/lib/relaton_ogc/hit_collection.rb +14 -37
- data/lib/relaton_ogc/processor.rb +14 -1
- data/lib/relaton_ogc/scrapper.rb +4 -2
- data/lib/relaton_ogc/version.rb +1 -1
- data/lib/relaton_ogc.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d9ddd36656e221ad7ceb82adf637197b15648451b42fbd78069bc02078082c05
|
|
4
|
+
data.tar.gz: 9179d1d896e37f26841c392701c500990eafbfdb1060a4120bb85189014d788e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0ab0f2b5ed50fd8f218c92ae1a8589df818f92bb2480fffc450fbe930c16185cc9855d683dc4aafb5d34ad4f808ffd1cc44ba11b19adc6550d62dc029e7c0b11
|
|
7
|
+
data.tar.gz: c62733901e8f8790eaaccf90345503fd187bba8e8a08655400fdd77aa508df640ec7a4f11ef1146e41b0e456c538954769cbb4ef2d6eacba3b6b2cac73a70e01
|
data/README.adoc
CHANGED
|
@@ -113,6 +113,25 @@ RelatonOgc::OgcBibliographicItem.from_hash hash
|
|
|
113
113
|
...
|
|
114
114
|
----
|
|
115
115
|
|
|
116
|
+
=== Fetch data
|
|
117
|
+
|
|
118
|
+
This gem uses the https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/incubation/bibliography/bibliography.json dataset as a data sources.
|
|
119
|
+
|
|
120
|
+
The method `RelatonOgc::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
|
|
121
|
+
Arguments:
|
|
122
|
+
|
|
123
|
+
- `output` - folder to save documents (default './data').
|
|
124
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
RelatonOgc::DataFetcher.fetch
|
|
129
|
+
Started at: 2021-09-14 11:21:46 +0200
|
|
130
|
+
[relaton-ogc] WARNING Duplicated documents: 15-113r5, 08-094r1, 10-025r1, 12-128r14, 16-079, 16-007r3, 13-026r8, 12-128r12, 15-078r6, 12-176r7, 09-102r3, 14-095, 14-115, 07-147r2, 12-000, 12-006, 09-025r1, 07-036, 07-110r4, 03-105r1, 06-042, 07-165r1, 12-066, 06-104r4, 11-122r1, 09-000, 04-094, 07-006r1, 06-035r1, 03-006r3, 05-134, 04-021r3, 02-058, 01-009
|
|
131
|
+
Stopped at: 2021-09-14 11:21:48 +0200
|
|
132
|
+
=> nil
|
|
133
|
+
----
|
|
134
|
+
|
|
116
135
|
== Development
|
|
117
136
|
|
|
118
137
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
module RelatonOgc
|
|
2
|
+
class DataFetcher
|
|
3
|
+
module Utils
|
|
4
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
|
5
|
+
"NamingAuthority/master/incubation/bibliography/"\
|
|
6
|
+
"bibliography.json".freeze
|
|
7
|
+
|
|
8
|
+
def get_data # rubocop:disable Metrics/AbcSize
|
|
9
|
+
h = {}
|
|
10
|
+
h["If-None-Match"] = etag if etag
|
|
11
|
+
resp = Faraday.new(ENDPOINT, headers: h).get
|
|
12
|
+
case resp.status
|
|
13
|
+
when 200
|
|
14
|
+
json = JSON.parse(resp.body)
|
|
15
|
+
block_given? ? yield(resp[:etag], json) : json
|
|
16
|
+
when 304 then [] # there aren't any changes since last fetching
|
|
17
|
+
else raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
#
|
|
22
|
+
# Read ETag form file
|
|
23
|
+
#
|
|
24
|
+
# @return [String, NilClass]
|
|
25
|
+
def etag
|
|
26
|
+
@etag ||= if File.exist? @etagfile
|
|
27
|
+
File.read @etagfile, encoding: "UTF-8"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# Save ETag to file
|
|
33
|
+
#
|
|
34
|
+
# @param tag [String]
|
|
35
|
+
def etag=(e_tag)
|
|
36
|
+
File.write @etagfile, e_tag, encoding: "UTF-8"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
include Utils
|
|
41
|
+
|
|
42
|
+
#
|
|
43
|
+
# Create DataFetcher instance
|
|
44
|
+
#
|
|
45
|
+
# @param [String] output directory to save the documents
|
|
46
|
+
# @param [String] format output format "yaml" or "xmo"
|
|
47
|
+
#
|
|
48
|
+
def initialize(output, format)
|
|
49
|
+
@output = output
|
|
50
|
+
@etagfile = File.join output, "etag.txt"
|
|
51
|
+
@format = format
|
|
52
|
+
@docids = []
|
|
53
|
+
@dupids = []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.fetch(output: "data", format: "yaml")
|
|
57
|
+
t1 = Time.now
|
|
58
|
+
puts "Started at: #{t1}"
|
|
59
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
|
60
|
+
new(output, format).fetch
|
|
61
|
+
t2 = Time.now
|
|
62
|
+
puts "Stopped at: #{t2}"
|
|
63
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
|
67
|
+
get_data do |etag, json|
|
|
68
|
+
no_errors = true
|
|
69
|
+
json.each do |_, hit|
|
|
70
|
+
bib = Scrapper.parse_page hit
|
|
71
|
+
write_document bib
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
no_errors = false
|
|
74
|
+
warn "Fetching document: #{hit['identifier']}"
|
|
75
|
+
warn "#{e.class} #{e.message}"
|
|
76
|
+
warn e.backtrace
|
|
77
|
+
end
|
|
78
|
+
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
|
|
79
|
+
self.etag = etag if no_errors
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def write_document(bib) # rubocop:disable Metrics/AbcSize
|
|
84
|
+
if @docids.include?(bib.docidentifier[0].id)
|
|
85
|
+
@dupids << bib.docidentifier[0].id
|
|
86
|
+
return
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
@docids << bib.docidentifier[0].id
|
|
90
|
+
name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
|
|
91
|
+
file = "#{@output}/#{name}.#{@format}"
|
|
92
|
+
content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
|
|
93
|
+
File.write file, content, encoding: "UTF-8"
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -4,23 +4,25 @@ require "fileutils"
|
|
|
4
4
|
|
|
5
5
|
module RelatonOgc
|
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
include DataFetcher::Utils
|
|
8
|
+
|
|
9
|
+
# ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
|
10
|
+
# "NamingAuthority/master/incubation/bibliography/"\
|
|
11
|
+
# "bibliography.json".freeze
|
|
9
12
|
DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
|
10
13
|
DATAFILE = File.expand_path "bibliography.json", DATADIR
|
|
11
|
-
ETAGFILE = File.expand_path "etag.txt", DATADIR
|
|
14
|
+
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
|
12
15
|
|
|
13
16
|
# @param ref [Strig]
|
|
14
17
|
# @param year [String]
|
|
15
18
|
# @param opts [Hash]
|
|
16
19
|
def initialize(ref, year = nil)
|
|
17
20
|
super
|
|
21
|
+
@etagfile = File.expand_path "etag.txt", DATADIR
|
|
18
22
|
@array = from_json(ref).sort_by do |hit|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
Date.parse "0000-01-01"
|
|
23
|
-
end
|
|
23
|
+
hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
|
24
|
+
rescue ArgumentError
|
|
25
|
+
Date.parse "0000-01-01"
|
|
24
26
|
end.reverse
|
|
25
27
|
end
|
|
26
28
|
|
|
@@ -52,38 +54,13 @@ module RelatonOgc
|
|
|
52
54
|
#
|
|
53
55
|
# fetch data form server and save it to file.
|
|
54
56
|
#
|
|
55
|
-
def fetch_data
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
resp = Faraday.new(ENDPOINT, headers: h).get
|
|
59
|
-
# return if there aren't any changes since last fetching
|
|
60
|
-
return if resp.status == 304
|
|
61
|
-
unless resp.status == 200
|
|
62
|
-
raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
|
63
|
-
end
|
|
57
|
+
def fetch_data
|
|
58
|
+
json = get_data
|
|
59
|
+
return unless json
|
|
64
60
|
|
|
65
61
|
FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
|
66
|
-
|
|
67
|
-
@data = JSON.parse resp.body
|
|
62
|
+
@data = json
|
|
68
63
|
File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
|
69
64
|
end
|
|
70
|
-
|
|
71
|
-
#
|
|
72
|
-
# Read ETag form file
|
|
73
|
-
#
|
|
74
|
-
# @return [String, NilClass]
|
|
75
|
-
def etag
|
|
76
|
-
@etag ||= if File.exist? ETAGFILE
|
|
77
|
-
File.read ETAGFILE, encoding: "UTF-8"
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
#
|
|
82
|
-
# Save ETag to file
|
|
83
|
-
#
|
|
84
|
-
# @param tag [String]
|
|
85
|
-
def etag=(e_tag)
|
|
86
|
-
File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
|
87
|
-
end
|
|
88
65
|
end
|
|
89
66
|
end
|
|
@@ -2,11 +2,12 @@ require "relaton/processor"
|
|
|
2
2
|
|
|
3
3
|
module RelatonOgc
|
|
4
4
|
class Processor < Relaton::Processor
|
|
5
|
-
def initialize
|
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
6
6
|
@short = :relaton_ogc
|
|
7
7
|
@prefix = "OGC"
|
|
8
8
|
@defaultprefix = %r{^OGC\s}
|
|
9
9
|
@idtype = "OGC"
|
|
10
|
+
@datasets = %w[ogc-naming-authority]
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
# @param code [String]
|
|
@@ -17,6 +18,18 @@ module RelatonOgc
|
|
|
17
18
|
::RelatonOgc::OgcBibliography.get(code, date, opts)
|
|
18
19
|
end
|
|
19
20
|
|
|
21
|
+
#
|
|
22
|
+
# Fetch all the documents from a source
|
|
23
|
+
#
|
|
24
|
+
# @param [String] _source source name
|
|
25
|
+
# @param [Hash] opts
|
|
26
|
+
# @option opts [String] :output directory to output documents
|
|
27
|
+
# @option opts [String] :format
|
|
28
|
+
#
|
|
29
|
+
def fetch_data(_source, opts)
|
|
30
|
+
DataFetcher.fetch(**opts)
|
|
31
|
+
end
|
|
32
|
+
|
|
20
33
|
# @param xml [String]
|
|
21
34
|
# @return [RelatonOgc::OgcBibliographicItem]
|
|
22
35
|
def from_xml(xml)
|
data/lib/relaton_ogc/scrapper.rb
CHANGED
|
@@ -13,7 +13,7 @@ module RelatonOgc
|
|
|
13
13
|
"IPR" => { type: "engineering-report" },
|
|
14
14
|
"IS" => { type: "standard", subtype: "implementation" },
|
|
15
15
|
"ISC" => { type: "standard", subtype: "implementation" },
|
|
16
|
-
"ISx" => { type: "standard", subtype: "
|
|
16
|
+
"ISx" => { type: "standard", subtype: "extension" },
|
|
17
17
|
"Notes" => { type: "other" },
|
|
18
18
|
"ORM" => { type: "reference-model" },
|
|
19
19
|
"PC" => { type: "standard", subtype: "profile" },
|
|
@@ -88,7 +88,7 @@ module RelatonOgc
|
|
|
88
88
|
# @param stage [String]
|
|
89
89
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
|
90
90
|
def fetch_status(stage)
|
|
91
|
-
stage && RelatonBib::
|
|
91
|
+
stage && RelatonBib::DocumentStatus.new(stage: stage)
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
# @param identifier [String]
|
|
@@ -138,6 +138,8 @@ module RelatonOgc
|
|
|
138
138
|
# @param date [String]
|
|
139
139
|
# @return [Array<RelatonBib::BibliographicDate>]
|
|
140
140
|
def fetch_date(date)
|
|
141
|
+
return [] unless date
|
|
142
|
+
|
|
141
143
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
|
142
144
|
end
|
|
143
145
|
end
|
data/lib/relaton_ogc/version.rb
CHANGED
data/lib/relaton_ogc.rb
CHANGED
|
@@ -2,6 +2,7 @@ require "relaton_iso_bib"
|
|
|
2
2
|
require "relaton_ogc/version"
|
|
3
3
|
require "relaton_ogc/ogc_bibliographic_item"
|
|
4
4
|
require "relaton_ogc/ogc_bibliography"
|
|
5
|
+
require "relaton_ogc/data_fetcher"
|
|
5
6
|
require "relaton_ogc/hit_collection"
|
|
6
7
|
require "relaton_ogc/scrapper"
|
|
7
8
|
require "relaton_ogc/xml_parser"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-ogc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.9.
|
|
4
|
+
version: 1.9.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-09-
|
|
11
|
+
date: 2021-09-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: equivalent-xml
|
|
@@ -175,6 +175,7 @@ files:
|
|
|
175
175
|
- grammars/ogc.rng
|
|
176
176
|
- grammars/reqt.rng
|
|
177
177
|
- lib/relaton_ogc.rb
|
|
178
|
+
- lib/relaton_ogc/data_fetcher.rb
|
|
178
179
|
- lib/relaton_ogc/editorial_group.rb
|
|
179
180
|
- lib/relaton_ogc/hash_converter.rb
|
|
180
181
|
- lib/relaton_ogc/hit.rb
|