relaton-calconnect 1.7.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +36 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +20 -5
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/csd.rng +9 -0
- data/grammars/isodoc.rng +532 -16
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_calconnect/cc_bibliographic_item.rb +7 -0
- data/lib/relaton_calconnect/cc_bibliography.rb +6 -10
- data/lib/relaton_calconnect/data_fetcher.rb +93 -0
- data/lib/relaton_calconnect/hash_converter.rb +1 -1
- data/lib/relaton_calconnect/hit_collection.rb +3 -3
- data/lib/relaton_calconnect/processor.rb +15 -3
- data/lib/relaton_calconnect/scrapper.rb +65 -10
- data/lib/relaton_calconnect/version.rb +1 -1
- data/lib/relaton_calconnect/xml_parser.rb +2 -2
- data/lib/relaton_calconnect.rb +1 -0
- data/relaton_calconnect.gemspec +3 -5
- metadata +10 -39
- data/.github/workflows/macos.yml +0 -34
- data/.github/workflows/ubuntu.yml +0 -33
- data/.github/workflows/windows.yml +0 -35
data/grammars/reqt.rng
CHANGED
@@ -30,15 +30,34 @@
|
|
30
30
|
<data type="boolean"/>
|
31
31
|
</attribute>
|
32
32
|
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
33
36
|
<optional>
|
34
37
|
<attribute name="subsequence"/>
|
35
38
|
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
36
49
|
<attribute name="id">
|
37
50
|
<data type="ID"/>
|
38
51
|
</attribute>
|
39
52
|
<optional>
|
40
53
|
<attribute name="filename"/>
|
41
54
|
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
42
61
|
<optional>
|
43
62
|
<ref name="reqtitle"/>
|
44
63
|
</optional>
|
@@ -48,9 +67,9 @@
|
|
48
67
|
<optional>
|
49
68
|
<ref name="subject"/>
|
50
69
|
</optional>
|
51
|
-
<
|
70
|
+
<zeroOrMore>
|
52
71
|
<ref name="reqinherit"/>
|
53
|
-
</
|
72
|
+
</zeroOrMore>
|
54
73
|
<zeroOrMore>
|
55
74
|
<ref name="classification"/>
|
56
75
|
</zeroOrMore>
|
@@ -135,6 +154,16 @@
|
|
135
154
|
<data type="boolean"/>
|
136
155
|
</attribute>
|
137
156
|
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
138
167
|
<oneOrMore>
|
139
168
|
<ref name="BasicBlock"/>
|
140
169
|
</oneOrMore>
|
@@ -4,5 +4,12 @@ module RelatonCalconnect
|
|
4
4
|
directive guide specification standard report administrative amendment
|
5
5
|
technical\ corrigendum advisory
|
6
6
|
].freeze
|
7
|
+
|
8
|
+
# @param hash [Hash]
|
9
|
+
# @return [RelatonIsoBib::CcBibliographicItem]
|
10
|
+
def self.from_hash(hash)
|
11
|
+
item_hash = ::RelatonCalconnect::HashConverter.hash_to_bib(hash)
|
12
|
+
new(**item_hash)
|
13
|
+
end
|
7
14
|
end
|
8
15
|
end
|
@@ -3,7 +3,7 @@ module RelatonCalconnect
|
|
3
3
|
class << self
|
4
4
|
# @param text [String]
|
5
5
|
# @return [RelatonCalconnect::HitCollection]
|
6
|
-
def search(text, year = nil,
|
6
|
+
def search(text, year = nil, _opts = {})
|
7
7
|
HitCollection.new text, year
|
8
8
|
rescue Faraday::ConnectionFailed
|
9
9
|
raise RelatonBib::RequestError, "Could not access https://standards.calconnect.org"
|
@@ -18,11 +18,11 @@ module RelatonCalconnect
|
|
18
18
|
# @option opts [TrueClass, FalseClass] :bibdata
|
19
19
|
#
|
20
20
|
# @return [RelatonCalconnect::CcBibliographicItem]
|
21
|
-
def get(ref, year = nil, opts = {})
|
21
|
+
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
22
22
|
code = ref
|
23
23
|
|
24
24
|
if year.nil?
|
25
|
-
/^(?<code1>[^\s]+(
|
25
|
+
/^(?<code1>[^\s]+(?:\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ ref
|
26
26
|
unless code1.nil?
|
27
27
|
code = code1
|
28
28
|
year = year1
|
@@ -30,7 +30,7 @@ module RelatonCalconnect
|
|
30
30
|
end
|
31
31
|
|
32
32
|
warn "[relaton-calconnect] (\"#{ref}\") fetching..."
|
33
|
-
result =
|
33
|
+
result = search(code, year, opts) || (return nil)
|
34
34
|
ret = bib_results_filter(result, year)
|
35
35
|
if ret[:ret]
|
36
36
|
warn "[relaton-calconnect] (\"#{ref}\") found #{ret[:ret].docidentifier.first.id}"
|
@@ -42,10 +42,6 @@ module RelatonCalconnect
|
|
42
42
|
|
43
43
|
private
|
44
44
|
|
45
|
-
def bib_search_filter(code, year, opts)
|
46
|
-
search(code, year, opts)
|
47
|
-
end
|
48
|
-
|
49
45
|
# Sort through the results from RelatonNist, fetching them three at a time,
|
50
46
|
# and return the first result that matches the code,
|
51
47
|
# matches the year (if provided), and which # has a title (amendments do not).
|
@@ -80,8 +76,8 @@ module RelatonCalconnect
|
|
80
76
|
warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
|
81
77
|
"The code must be exactly like it is on the standards website."
|
82
78
|
unless missed_years.empty?
|
83
|
-
warn "[relaton-calconnect] (There was no match for #{year}, though
|
84
|
-
"found for #{missed_years.join(', ')}.)"
|
79
|
+
warn "[relaton-calconnect] (There was no match for #{year}, though "\
|
80
|
+
"there were matches found for #{missed_years.join(', ')}.)"
|
85
81
|
end
|
86
82
|
nil
|
87
83
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal:true
|
2
|
+
|
3
|
+
module RelatonCalconnect
|
4
|
+
#
|
5
|
+
# Relaton-calconnect data fetcher
|
6
|
+
#
|
7
|
+
class DataFetcher
|
8
|
+
# DOMAIN = "https://standards.calconnect.org/"
|
9
|
+
# SCHEME, HOST = DOMAIN.split(%r{:?/?/})
|
10
|
+
ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml"
|
11
|
+
# DATADIR = "data"
|
12
|
+
# DATAFILE = File.join DATADIR, "bibliography.yml"
|
13
|
+
# ETAGFILE = File.join DATADIR, "etag.txt"
|
14
|
+
|
15
|
+
def initialize(output, format)
|
16
|
+
@output = output
|
17
|
+
@etagfile = File.join output, "etag.txt"
|
18
|
+
@format = format
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.fetch(output: "data", format: "yaml")
|
22
|
+
t1 = Time.now
|
23
|
+
puts "Started at: #{t1}"
|
24
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
25
|
+
new(output, format).fetch
|
26
|
+
t2 = Time.now
|
27
|
+
puts "Stopped at: #{t2}"
|
28
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# fetch data form server and save it to file.
|
33
|
+
#
|
34
|
+
def fetch
|
35
|
+
resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
36
|
+
# return if there aren't any changes since last fetching
|
37
|
+
return unless resp.status == 200
|
38
|
+
|
39
|
+
data = YAML.safe_load resp.body
|
40
|
+
all_success = true
|
41
|
+
data["root"]["items"].each do |doc|
|
42
|
+
success = parse_page doc
|
43
|
+
all_success &&= success
|
44
|
+
end
|
45
|
+
self.etag = resp[:etag] if all_success
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
#
|
51
|
+
# Parse document and write it to file
|
52
|
+
#
|
53
|
+
# @param [Hash] doc
|
54
|
+
#
|
55
|
+
def parse_page(doc)
|
56
|
+
bib = Scrapper.parse_page doc
|
57
|
+
# bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
|
58
|
+
write_doc doc["docid"]["id"], bib
|
59
|
+
true
|
60
|
+
rescue StandardError => e
|
61
|
+
warn "Document: #{doc['docid']['id']}"
|
62
|
+
warn e.message
|
63
|
+
puts e.backtrace
|
64
|
+
false
|
65
|
+
end
|
66
|
+
|
67
|
+
def write_doc(docid, bib)
|
68
|
+
content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
|
69
|
+
file = File.join @output, "#{docid.downcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
|
70
|
+
# if File.exist? file
|
71
|
+
# warn "#{file} exist"
|
72
|
+
# else
|
73
|
+
File.write file, content, encoding: "UTF-8"
|
74
|
+
# end
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Read ETag from file
|
79
|
+
#
|
80
|
+
# @return [String, NilClass]
|
81
|
+
def etag
|
82
|
+
@etag ||= File.exist?(@etagfile) ? File.read(@etagfile, encoding: "UTF-8") : nil
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Save ETag to file
|
87
|
+
#
|
88
|
+
# @param tag [String]
|
89
|
+
def etag=(e_tag)
|
90
|
+
File.write @etagfile, e_tag, encoding: "UTF-8"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -6,7 +6,7 @@ module RelatonCalconnect
|
|
6
6
|
return unless ret[:editorialgroup]
|
7
7
|
|
8
8
|
technical_committee = array(ret[:editorialgroup]).map do |wg|
|
9
|
-
TechnicalCommittee.new RelatonBib::WorkGroup.new(wg)
|
9
|
+
TechnicalCommittee.new RelatonBib::WorkGroup.new(**wg)
|
10
10
|
end
|
11
11
|
ret[:editorialgroup] = RelatonBib::EditorialGroup.new technical_committee
|
12
12
|
end
|
@@ -23,7 +23,7 @@ module RelatonCalconnect
|
|
23
23
|
private
|
24
24
|
|
25
25
|
#
|
26
|
-
# Fetch data
|
26
|
+
# Fetch data from yaml
|
27
27
|
#
|
28
28
|
# @param docid [String]
|
29
29
|
def from_yaml(docid, **_opts)
|
@@ -44,7 +44,7 @@ module RelatonCalconnect
|
|
44
44
|
end
|
45
45
|
|
46
46
|
#
|
47
|
-
# fetch data
|
47
|
+
# fetch data from server and save it to file.
|
48
48
|
#
|
49
49
|
def fetch_data
|
50
50
|
resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
@@ -57,7 +57,7 @@ module RelatonCalconnect
|
|
57
57
|
end
|
58
58
|
|
59
59
|
#
|
60
|
-
# Read ETag
|
60
|
+
# Read ETag from file
|
61
61
|
#
|
62
62
|
# @return [String, NilClass]
|
63
63
|
def etag
|
@@ -4,11 +4,12 @@ module RelatonCalconnect
|
|
4
4
|
class Processor < Relaton::Processor
|
5
5
|
attr_reader :idtype
|
6
6
|
|
7
|
-
def initialize
|
7
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
8
8
|
@short = :relaton_calconnect
|
9
9
|
@prefix = "CC"
|
10
10
|
@defaultprefix = %r{^CC\s}
|
11
11
|
@idtype = "CC"
|
12
|
+
@datasets = %w[calconnect-org]
|
12
13
|
end
|
13
14
|
|
14
15
|
# @param code [String]
|
@@ -19,6 +20,18 @@ module RelatonCalconnect
|
|
19
20
|
::RelatonCalconnect::CcBibliography.get(code, date, opts)
|
20
21
|
end
|
21
22
|
|
23
|
+
#
|
24
|
+
# Fetch all the documents from a source
|
25
|
+
#
|
26
|
+
# @param [String] _source source name
|
27
|
+
# @param [Hash] opts
|
28
|
+
# @option opts [String] :output directory to output documents
|
29
|
+
# @option opts [String] :format
|
30
|
+
#
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
|
+
end
|
34
|
+
|
22
35
|
# @param xml [String]
|
23
36
|
# @return [RelatonCalconnect::CcBibliographicItem]
|
24
37
|
def from_xml(xml)
|
@@ -28,8 +41,7 @@ module RelatonCalconnect
|
|
28
41
|
# @param hash [Hash]
|
29
42
|
# @return [RelatonIsoBib::CcBibliographicItem]
|
30
43
|
def hash_to_bib(hash)
|
31
|
-
|
32
|
-
::RelatonCalconnect::CcBibliographicItem.new item_hash
|
44
|
+
::RelatonCalconnect::CcBibliographicItem.from_hash hash
|
33
45
|
end
|
34
46
|
|
35
47
|
# Returns hash of XML grammar
|
@@ -1,17 +1,24 @@
|
|
1
1
|
module RelatonCalconnect
|
2
2
|
module Scrapper
|
3
3
|
DOMAIN = "https://standards.calconnect.org/".freeze
|
4
|
+
SCHEME, HOST = DOMAIN.split(%r{:?/?/})
|
4
5
|
# DOMAIN = "http://127.0.0.1:4000/".freeze
|
5
6
|
|
6
7
|
class << self
|
7
8
|
# papam hit [Hash]
|
8
9
|
# @return [RelatonOgc::OrcBibliographicItem]
|
9
10
|
def parse_page(hit)
|
10
|
-
|
11
|
+
links = array(hit["link"])
|
12
|
+
link = links.detect { |l| l["type"] == "rxl" }
|
11
13
|
if link
|
12
|
-
|
13
|
-
|
14
|
+
bib = fetch_bib_xml link["content"]
|
15
|
+
update_links bib, links
|
16
|
+
# XMLParser.from_xml bib_xml
|
17
|
+
else
|
18
|
+
bib = RelatonCalconnect::CcBibliographicItem.from_hash doc_to_hash(hit)
|
14
19
|
end
|
20
|
+
bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
|
21
|
+
bib
|
15
22
|
end
|
16
23
|
|
17
24
|
private
|
@@ -19,15 +26,25 @@ module RelatonCalconnect
|
|
19
26
|
# @param url [String]
|
20
27
|
# @return [String] XML
|
21
28
|
def fetch_bib_xml(url)
|
29
|
+
# rxl = get_rxl url
|
30
|
+
# uri_rxl = rxl.at("uri[@type='rxl']")
|
31
|
+
# return rxl.to_xml unless uri_rxl
|
32
|
+
|
33
|
+
# uri_xml = rxl.xpath("//uri").to_xml
|
34
|
+
# rxl = get_rxl uri_rxl.text
|
35
|
+
# docid = rxl.at "//docidentifier"
|
36
|
+
# docid.add_previous_sibling uri_xml
|
37
|
+
# rxl.to_xml
|
22
38
|
rxl = get_rxl url
|
23
39
|
uri_rxl = rxl.at("uri[@type='rxl']")
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
rxl.to_xml
|
40
|
+
if uri_rxl
|
41
|
+
uri_xml = rxl.xpath("//uri").to_xml
|
42
|
+
rxl = get_rxl uri_rxl.text
|
43
|
+
docid = rxl.at "//docidentifier"
|
44
|
+
docid.add_previous_sibling uri_xml
|
45
|
+
end
|
46
|
+
xml = rxl.to_xml.gsub!(%r{(</?)technical-committee(>)}, '\1committee\2')
|
47
|
+
RelatonCalconnect::XMLParser.from_xml xml
|
31
48
|
end
|
32
49
|
|
33
50
|
# @param path [String]
|
@@ -36,6 +53,44 @@ module RelatonCalconnect
|
|
36
53
|
resp = Faraday.get DOMAIN + path
|
37
54
|
Nokogiri::XML resp.body
|
38
55
|
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Fix editorial group
|
59
|
+
#
|
60
|
+
# @param [Hash] doc
|
61
|
+
#
|
62
|
+
# @return [Hash]
|
63
|
+
#
|
64
|
+
def doc_to_hash(doc)
|
65
|
+
array(doc["editorialgroup"]).each do |eg|
|
66
|
+
tc = eg.delete("technical_committee")
|
67
|
+
eg.merge!(tc) if tc
|
68
|
+
end
|
69
|
+
doc
|
70
|
+
end
|
71
|
+
|
72
|
+
def update_links(bib, links)
|
73
|
+
links.each do |l|
|
74
|
+
tu = l.transform_keys(&:to_sym)
|
75
|
+
bib.link << RelatonBib::TypedUri.new(**tu) unless bib.url(l["type"])
|
76
|
+
end
|
77
|
+
bib
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Wrap into Array if not Array
|
82
|
+
#
|
83
|
+
# @param [Array, Hash, String, nil] content
|
84
|
+
#
|
85
|
+
# @return [Array<Hash, String>]
|
86
|
+
#
|
87
|
+
def array(content)
|
88
|
+
case content
|
89
|
+
when Array then content
|
90
|
+
when nil then []
|
91
|
+
else [content]
|
92
|
+
end
|
93
|
+
end
|
39
94
|
end
|
40
95
|
end
|
41
96
|
end
|
@@ -5,7 +5,7 @@ module RelatonCalconnect
|
|
5
5
|
# @param item_hash [Hash]
|
6
6
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
7
|
def bib_item(item_hash)
|
8
|
-
CcBibliographicItem.new
|
8
|
+
CcBibliographicItem.new(**item_hash)
|
9
9
|
end
|
10
10
|
|
11
11
|
# @param ext [Nokogiri::XML::Element]
|
@@ -14,7 +14,7 @@ module RelatonCalconnect
|
|
14
14
|
return unless ext && (eg = ext.at "editorialgroup")
|
15
15
|
|
16
16
|
eg = eg.xpath("committee", "technical-committee").map do |tc|
|
17
|
-
wg = RelatonBib::WorkGroup.new(
|
17
|
+
wg = RelatonBib::WorkGroup.new(name: tc.text, number: tc[:number]&.to_i,
|
18
18
|
type: tc[:type])
|
19
19
|
TechnicalCommittee.new wg
|
20
20
|
end
|
data/lib/relaton_calconnect.rb
CHANGED
@@ -8,6 +8,7 @@ require "relaton_calconnect/technical_committee"
|
|
8
8
|
require "relaton_calconnect/cc_bibliographic_item"
|
9
9
|
require "relaton_calconnect/xml_parser"
|
10
10
|
require "relaton_calconnect/hash_converter"
|
11
|
+
require "relaton_calconnect/data_fetcher"
|
11
12
|
|
12
13
|
module RelatonCalconnect
|
13
14
|
class Error < StandardError; end
|
data/relaton_calconnect.gemspec
CHANGED
@@ -24,18 +24,16 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.bindir = "exe"
|
25
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
26
26
|
spec.require_paths = ["lib"]
|
27
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
27
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
28
28
|
|
29
|
-
spec.add_development_dependency "debase"
|
30
29
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
31
|
-
spec.add_development_dependency "rake", "~>
|
30
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
32
31
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
-
spec.add_development_dependency "ruby-debug-ide"
|
34
32
|
spec.add_development_dependency "ruby-jing"
|
35
33
|
spec.add_development_dependency "simplecov"
|
36
34
|
spec.add_development_dependency "vcr"
|
37
35
|
spec.add_development_dependency "webmock"
|
38
36
|
|
39
37
|
spec.add_dependency "faraday"
|
40
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
38
|
+
spec.add_dependency "relaton-bib", "~> 1.9.0"
|
41
39
|
end
|