relaton-ogc 1.7.3 → 1.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +19 -0
- data/bin/rspec +29 -0
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/isodoc.rng +532 -16
- data/grammars/ogc.rng +33 -13
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_ogc/data_fetcher.rb +96 -0
- data/lib/relaton_ogc/editorial_group.rb +1 -1
- data/lib/relaton_ogc/hash_converter.rb +1 -1
- data/lib/relaton_ogc/hit_collection.rb +14 -37
- data/lib/relaton_ogc/ogc_bibliographic_item.rb +17 -21
- data/lib/relaton_ogc/processor.rb +14 -1
- data/lib/relaton_ogc/scrapper.rb +6 -4
- data/lib/relaton_ogc/version.rb +1 -1
- data/lib/relaton_ogc/xml_parser.rb +9 -9
- data/lib/relaton_ogc.rb +1 -0
- data/relaton_ogc.gemspec +3 -4
- metadata +10 -8
data/grammars/ogc.rng
CHANGED
@@ -63,21 +63,35 @@
|
|
63
63
|
</element>
|
64
64
|
</define>
|
65
65
|
</include>
|
66
|
-
<define name="
|
67
|
-
<
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
<
|
76
|
-
|
77
|
-
|
78
|
-
|
66
|
+
<define name="TextElement" combine="choice">
|
67
|
+
<ref name="hi"/>
|
68
|
+
</define>
|
69
|
+
<define name="PureTextElement" combine="choice">
|
70
|
+
<ref name="hi"/>
|
71
|
+
</define>
|
72
|
+
<define name="hi">
|
73
|
+
<element name="hi">
|
74
|
+
<zeroOrMore>
|
75
|
+
<choice>
|
76
|
+
<ref name="PureTextElement"/>
|
77
|
+
<ref name="stem"/>
|
78
|
+
</choice>
|
79
|
+
</zeroOrMore>
|
79
80
|
</element>
|
80
81
|
</define>
|
82
|
+
<define name="DocumentSubtype">
|
83
|
+
<choice>
|
84
|
+
<value>conceptual-model</value>
|
85
|
+
<value>conceptual-model-and-encoding</value>
|
86
|
+
<value>conceptual-model-and-implementation</value>
|
87
|
+
<value>encoding</value>
|
88
|
+
<value>extension</value>
|
89
|
+
<value>implementation</value>
|
90
|
+
<value>profile</value>
|
91
|
+
<value>profile-with-extension</value>
|
92
|
+
<value>general</value>
|
93
|
+
</choice>
|
94
|
+
</define>
|
81
95
|
<define name="submitters">
|
82
96
|
<element name="submitters">
|
83
97
|
<ref name="Basic-Section"/>
|
@@ -115,6 +129,9 @@
|
|
115
129
|
<zeroOrMore>
|
116
130
|
<ref name="termdocsource"/>
|
117
131
|
</zeroOrMore>
|
132
|
+
<optional>
|
133
|
+
<ref name="misccontainer"/>
|
134
|
+
</optional>
|
118
135
|
<optional>
|
119
136
|
<ref name="boilerplate"/>
|
120
137
|
</optional>
|
@@ -126,6 +143,9 @@
|
|
126
143
|
<ref name="annex"/>
|
127
144
|
</zeroOrMore>
|
128
145
|
<ref name="bibliography"/>
|
146
|
+
<zeroOrMore>
|
147
|
+
<ref name="indexsect"/>
|
148
|
+
</zeroOrMore>
|
129
149
|
</element>
|
130
150
|
</define>
|
131
151
|
</grammar>
|
data/grammars/reqt.rng
CHANGED
@@ -30,15 +30,34 @@
|
|
30
30
|
<data type="boolean"/>
|
31
31
|
</attribute>
|
32
32
|
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
33
36
|
<optional>
|
34
37
|
<attribute name="subsequence"/>
|
35
38
|
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
36
49
|
<attribute name="id">
|
37
50
|
<data type="ID"/>
|
38
51
|
</attribute>
|
39
52
|
<optional>
|
40
53
|
<attribute name="filename"/>
|
41
54
|
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
42
61
|
<optional>
|
43
62
|
<ref name="reqtitle"/>
|
44
63
|
</optional>
|
@@ -48,9 +67,9 @@
|
|
48
67
|
<optional>
|
49
68
|
<ref name="subject"/>
|
50
69
|
</optional>
|
51
|
-
<
|
70
|
+
<zeroOrMore>
|
52
71
|
<ref name="reqinherit"/>
|
53
|
-
</
|
72
|
+
</zeroOrMore>
|
54
73
|
<zeroOrMore>
|
55
74
|
<ref name="classification"/>
|
56
75
|
</zeroOrMore>
|
@@ -135,6 +154,16 @@
|
|
135
154
|
<data type="boolean"/>
|
136
155
|
</attribute>
|
137
156
|
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
138
167
|
<oneOrMore>
|
139
168
|
<ref name="BasicBlock"/>
|
140
169
|
</oneOrMore>
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class DataFetcher
|
3
|
+
module Utils
|
4
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
5
|
+
"NamingAuthority/master/incubation/bibliography/"\
|
6
|
+
"bibliography.json".freeze
|
7
|
+
|
8
|
+
def get_data # rubocop:disable Metrics/AbcSize
|
9
|
+
h = {}
|
10
|
+
h["If-None-Match"] = etag if etag
|
11
|
+
resp = Faraday.new(ENDPOINT, headers: h).get
|
12
|
+
case resp.status
|
13
|
+
when 200
|
14
|
+
json = JSON.parse(resp.body)
|
15
|
+
block_given? ? yield(resp[:etag], json) : json
|
16
|
+
when 304 then [] # there aren't any changes since last fetching
|
17
|
+
else raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Read ETag form file
|
23
|
+
#
|
24
|
+
# @return [String, NilClass]
|
25
|
+
def etag
|
26
|
+
@etag ||= if File.exist? @etagfile
|
27
|
+
File.read @etagfile, encoding: "UTF-8"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Save ETag to file
|
33
|
+
#
|
34
|
+
# @param tag [String]
|
35
|
+
def etag=(e_tag)
|
36
|
+
File.write @etagfile, e_tag, encoding: "UTF-8"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include Utils
|
41
|
+
|
42
|
+
#
|
43
|
+
# Create DataFetcher instance
|
44
|
+
#
|
45
|
+
# @param [String] output directory to save the documents
|
46
|
+
# @param [String] format output format "yaml" or "xmo"
|
47
|
+
#
|
48
|
+
def initialize(output, format)
|
49
|
+
@output = output
|
50
|
+
@etagfile = File.join output, "etag.txt"
|
51
|
+
@format = format
|
52
|
+
@docids = []
|
53
|
+
@dupids = []
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.fetch(output: "data", format: "yaml")
|
57
|
+
t1 = Time.now
|
58
|
+
puts "Started at: #{t1}"
|
59
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
60
|
+
new(output, format).fetch
|
61
|
+
t2 = Time.now
|
62
|
+
puts "Stopped at: #{t2}"
|
63
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
64
|
+
end
|
65
|
+
|
66
|
+
def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
67
|
+
get_data do |etag, json|
|
68
|
+
no_errors = true
|
69
|
+
json.each do |_, hit|
|
70
|
+
bib = Scrapper.parse_page hit
|
71
|
+
write_document bib
|
72
|
+
rescue StandardError => e
|
73
|
+
no_errors = false
|
74
|
+
warn "Fetching document: #{hit['identifier']}"
|
75
|
+
warn "#{e.class} #{e.message}"
|
76
|
+
warn e.backtrace
|
77
|
+
end
|
78
|
+
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
|
79
|
+
self.etag = etag if no_errors
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def write_document(bib) # rubocop:disable Metrics/AbcSize
|
84
|
+
if @docids.include?(bib.docidentifier[0].id)
|
85
|
+
@dupids << bib.docidentifier[0].id
|
86
|
+
return
|
87
|
+
end
|
88
|
+
|
89
|
+
@docids << bib.docidentifier[0].id
|
90
|
+
name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
|
91
|
+
file = "#{@output}/#{name}.#{@format}"
|
92
|
+
content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
|
93
|
+
File.write file, content, encoding: "UTF-8"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -4,23 +4,25 @@ require "fileutils"
|
|
4
4
|
|
5
5
|
module RelatonOgc
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
7
|
-
|
8
|
-
|
7
|
+
include DataFetcher::Utils
|
8
|
+
|
9
|
+
# ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
10
|
+
# "NamingAuthority/master/incubation/bibliography/"\
|
11
|
+
# "bibliography.json".freeze
|
9
12
|
DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
10
13
|
DATAFILE = File.expand_path "bibliography.json", DATADIR
|
11
|
-
ETAGFILE = File.expand_path "etag.txt", DATADIR
|
14
|
+
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
12
15
|
|
13
16
|
# @param ref [Strig]
|
14
17
|
# @param year [String]
|
15
18
|
# @param opts [Hash]
|
16
19
|
def initialize(ref, year = nil)
|
17
20
|
super
|
21
|
+
@etagfile = File.expand_path "etag.txt", DATADIR
|
18
22
|
@array = from_json(ref).sort_by do |hit|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
Date.parse "0000-01-01"
|
23
|
-
end
|
23
|
+
hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
24
|
+
rescue ArgumentError
|
25
|
+
Date.parse "0000-01-01"
|
24
26
|
end.reverse
|
25
27
|
end
|
26
28
|
|
@@ -52,38 +54,13 @@ module RelatonOgc
|
|
52
54
|
#
|
53
55
|
# fetch data form server and save it to file.
|
54
56
|
#
|
55
|
-
def fetch_data
|
56
|
-
|
57
|
-
|
58
|
-
resp = Faraday.new(ENDPOINT, headers: h).get
|
59
|
-
# return if there aren't any changes since last fetching
|
60
|
-
return if resp.status == 304
|
61
|
-
unless resp.status == 200
|
62
|
-
raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
63
|
-
end
|
57
|
+
def fetch_data
|
58
|
+
json = get_data
|
59
|
+
return unless json
|
64
60
|
|
65
61
|
FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
66
|
-
|
67
|
-
@data = JSON.parse resp.body
|
62
|
+
@data = json
|
68
63
|
File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
69
64
|
end
|
70
|
-
|
71
|
-
#
|
72
|
-
# Read ETag form file
|
73
|
-
#
|
74
|
-
# @return [String, NilClass]
|
75
|
-
def etag
|
76
|
-
@etag ||= if File.exist? ETAGFILE
|
77
|
-
File.read ETAGFILE, encoding: "UTF-8"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# Save ETag to file
|
83
|
-
#
|
84
|
-
# @param tag [String]
|
85
|
-
def etag=(e_tag)
|
86
|
-
File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
87
|
-
end
|
88
65
|
end
|
89
66
|
end
|
@@ -14,17 +14,13 @@ module RelatonOgc
|
|
14
14
|
profile profile-with-extension general
|
15
15
|
].freeze
|
16
16
|
|
17
|
-
# @return [String]
|
18
|
-
attr_reader :docsubtype
|
19
|
-
|
20
|
-
# @param docsubtype [String]
|
21
17
|
def initialize(**args)
|
22
|
-
if args[:
|
18
|
+
if args[:subdoctype] && !SUBTYPES.include?(args[:subdoctype])
|
23
19
|
warn "[relaton-ogc] WARNING: invalid document "\
|
24
|
-
|
20
|
+
"subtype: #{args[:subdoctype]}"
|
25
21
|
end
|
26
22
|
|
27
|
-
@docsubtype = args.delete :docsubtype
|
23
|
+
# @docsubtype = args.delete :docsubtype
|
28
24
|
# @doctype = args.delete :doctype
|
29
25
|
super
|
30
26
|
end
|
@@ -33,15 +29,15 @@ module RelatonOgc
|
|
33
29
|
# @return [RelatonOgc::OgcBibliographicItem]
|
34
30
|
def self.from_hash(hash)
|
35
31
|
item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
|
36
|
-
new
|
32
|
+
new(**item_hash)
|
37
33
|
end
|
38
34
|
|
39
35
|
# @return [Hash]
|
40
|
-
def to_hash
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
36
|
+
# def to_hash
|
37
|
+
# hash = super
|
38
|
+
# hash["docsubtype"] = docsubtype if docsubtype
|
39
|
+
# hash
|
40
|
+
# end
|
45
41
|
|
46
42
|
# @param opts [Hash]
|
47
43
|
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
@@ -50,10 +46,10 @@ module RelatonOgc
|
|
50
46
|
# @option opts [String, Symbol] :lang language
|
51
47
|
# @return [String] XML
|
52
48
|
def to_xml(**opts)
|
53
|
-
super
|
49
|
+
super(**opts) do |b|
|
54
50
|
b.ext do
|
55
51
|
b.doctype doctype if doctype
|
56
|
-
b.
|
52
|
+
b.subdoctype subdoctype if subdoctype
|
57
53
|
editorialgroup&.to_xml b
|
58
54
|
ics.each { |i| i.to_xml b }
|
59
55
|
end
|
@@ -62,11 +58,11 @@ module RelatonOgc
|
|
62
58
|
|
63
59
|
# @param prefix [String]
|
64
60
|
# @return [String]
|
65
|
-
def to_asciibib(prefix = "")
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
61
|
+
# def to_asciibib(prefix = "")
|
62
|
+
# pref = prefix.empty? ? prefix : prefix + "."
|
63
|
+
# out = super
|
64
|
+
# out += "#{pref}docsubtype:: #{docsubtype}\n" if docsubtype
|
65
|
+
# out
|
66
|
+
# end
|
71
67
|
end
|
72
68
|
end
|
@@ -2,11 +2,12 @@ require "relaton/processor"
|
|
2
2
|
|
3
3
|
module RelatonOgc
|
4
4
|
class Processor < Relaton::Processor
|
5
|
-
def initialize
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
6
6
|
@short = :relaton_ogc
|
7
7
|
@prefix = "OGC"
|
8
8
|
@defaultprefix = %r{^OGC\s}
|
9
9
|
@idtype = "OGC"
|
10
|
+
@datasets = %w[ogc-naming-authority]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonOgc
|
|
17
18
|
::RelatonOgc::OgcBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] _source source name
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format
|
28
|
+
#
|
29
|
+
def fetch_data(_source, opts)
|
30
|
+
DataFetcher.fetch(**opts)
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonOgc::OgcBibliographicItem]
|
22
35
|
def from_xml(xml)
|
data/lib/relaton_ogc/scrapper.rb
CHANGED
@@ -13,7 +13,7 @@ module RelatonOgc
|
|
13
13
|
"IPR" => { type: "engineering-report" },
|
14
14
|
"IS" => { type: "standard", subtype: "implementation" },
|
15
15
|
"ISC" => { type: "standard", subtype: "implementation" },
|
16
|
-
"ISx" => { type: "standard", subtype: "
|
16
|
+
"ISx" => { type: "standard", subtype: "extension" },
|
17
17
|
"Notes" => { type: "other" },
|
18
18
|
"ORM" => { type: "reference-model" },
|
19
19
|
"PC" => { type: "standard", subtype: "profile" },
|
@@ -34,7 +34,7 @@ module RelatonOgc
|
|
34
34
|
class << self
|
35
35
|
# papam hit [Hash]
|
36
36
|
# @return [RelatonOgc::OrcBibliographicItem]
|
37
|
-
def parse_page(hit)
|
37
|
+
def parse_page(hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
38
38
|
type = fetch_type(hit["type"])
|
39
39
|
OgcBibliographicItem.new(
|
40
40
|
fetched: Date.today.to_s,
|
@@ -43,7 +43,7 @@ module RelatonOgc
|
|
43
43
|
docid: fetch_docid(hit["identifier"]),
|
44
44
|
link: fetch_link(hit["URL"]),
|
45
45
|
doctype: type[:type],
|
46
|
-
|
46
|
+
subdoctype: type[:subtype],
|
47
47
|
docstatus: fetch_status(type[:stage]),
|
48
48
|
edition: fetch_edition(hit["identifier"]),
|
49
49
|
abstract: fetch_abstract(hit["description"]),
|
@@ -88,7 +88,7 @@ module RelatonOgc
|
|
88
88
|
# @param stage [String]
|
89
89
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
90
90
|
def fetch_status(stage)
|
91
|
-
stage && RelatonBib::
|
91
|
+
stage && RelatonBib::DocumentStatus.new(stage: stage)
|
92
92
|
end
|
93
93
|
|
94
94
|
# @param identifier [String]
|
@@ -138,6 +138,8 @@ module RelatonOgc
|
|
138
138
|
# @param date [String]
|
139
139
|
# @return [Array<RelatonBib::BibliographicDate>]
|
140
140
|
def fetch_date(date)
|
141
|
+
return [] unless date
|
142
|
+
|
141
143
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
142
144
|
end
|
143
145
|
end
|