relaton-calconnect 1.7.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/grammars/reqt.rng CHANGED
@@ -30,15 +30,34 @@
30
30
  <data type="boolean"/>
31
31
  </attribute>
32
32
  </optional>
33
+ <optional>
34
+ <attribute name="number"/>
35
+ </optional>
33
36
  <optional>
34
37
  <attribute name="subsequence"/>
35
38
  </optional>
39
+ <optional>
40
+ <attribute name="keep-with-next">
41
+ <data type="boolean"/>
42
+ </attribute>
43
+ </optional>
44
+ <optional>
45
+ <attribute name="keep-lines-together">
46
+ <data type="boolean"/>
47
+ </attribute>
48
+ </optional>
36
49
  <attribute name="id">
37
50
  <data type="ID"/>
38
51
  </attribute>
39
52
  <optional>
40
53
  <attribute name="filename"/>
41
54
  </optional>
55
+ <optional>
56
+ <attribute name="model"/>
57
+ </optional>
58
+ <optional>
59
+ <attribute name="type"/>
60
+ </optional>
42
61
  <optional>
43
62
  <ref name="reqtitle"/>
44
63
  </optional>
@@ -48,9 +67,9 @@
48
67
  <optional>
49
68
  <ref name="subject"/>
50
69
  </optional>
51
- <optional>
70
+ <zeroOrMore>
52
71
  <ref name="reqinherit"/>
53
- </optional>
72
+ </zeroOrMore>
54
73
  <zeroOrMore>
55
74
  <ref name="classification"/>
56
75
  </zeroOrMore>
@@ -135,6 +154,16 @@
135
154
  <data type="boolean"/>
136
155
  </attribute>
137
156
  </optional>
157
+ <optional>
158
+ <attribute name="keep-with-next">
159
+ <data type="boolean"/>
160
+ </attribute>
161
+ </optional>
162
+ <optional>
163
+ <attribute name="keep-lines-together">
164
+ <data type="boolean"/>
165
+ </attribute>
166
+ </optional>
138
167
  <oneOrMore>
139
168
  <ref name="BasicBlock"/>
140
169
  </oneOrMore>
@@ -4,5 +4,12 @@ module RelatonCalconnect
4
4
  directive guide specification standard report administrative amendment
5
5
  technical\ corrigendum advisory
6
6
  ].freeze
7
+
8
+ # @param hash [Hash]
9
+ # @return [RelatonIsoBib::CcBibliographicItem]
10
+ def self.from_hash(hash)
11
+ item_hash = ::RelatonCalconnect::HashConverter.hash_to_bib(hash)
12
+ new(**item_hash)
13
+ end
7
14
  end
8
15
  end
@@ -3,7 +3,7 @@ module RelatonCalconnect
3
3
  class << self
4
4
  # @param text [String]
5
5
  # @return [RelatonCalconnect::HitCollection]
6
- def search(text, year = nil, opts = {})
6
+ def search(text, year = nil, _opts = {})
7
7
  HitCollection.new text, year
8
8
  rescue Faraday::ConnectionFailed
9
9
  raise RelatonBib::RequestError, "Could not access https://standards.calconnect.org"
@@ -18,11 +18,11 @@ module RelatonCalconnect
18
18
  # @option opts [TrueClass, FalseClass] :bibdata
19
19
  #
20
20
  # @return [RelatonCalconnect::CcBibliographicItem]
21
- def get(ref, year = nil, opts = {})
21
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
22
22
  code = ref
23
23
 
24
24
  if year.nil?
25
- /^(?<code1>[^\s]+(\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ ref
25
+ /^(?<code1>[^\s]+(?:\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ ref
26
26
  unless code1.nil?
27
27
  code = code1
28
28
  year = year1
@@ -30,7 +30,7 @@ module RelatonCalconnect
30
30
  end
31
31
 
32
32
  warn "[relaton-calconnect] (\"#{ref}\") fetching..."
33
- result = bib_search_filter(code, year, opts) || (return nil)
33
+ result = search(code, year, opts) || (return nil)
34
34
  ret = bib_results_filter(result, year)
35
35
  if ret[:ret]
36
36
  warn "[relaton-calconnect] (\"#{ref}\") found #{ret[:ret].docidentifier.first.id}"
@@ -42,10 +42,6 @@ module RelatonCalconnect
42
42
 
43
43
  private
44
44
 
45
- def bib_search_filter(code, year, opts)
46
- search(code, year, opts)
47
- end
48
-
49
45
  # Sort through the results from RelatonNist, fetching them three at a time,
50
46
  # and return the first result that matches the code,
51
47
  # matches the year (if provided), and which # has a title (amendments do not).
@@ -80,8 +76,8 @@ module RelatonCalconnect
80
76
  warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
81
77
  "The code must be exactly like it is on the standards website."
82
78
  unless missed_years.empty?
83
- warn "[relaton-calconnect] (There was no match for #{year}, though there were matches "\
84
- "found for #{missed_years.join(', ')}.)"
79
+ warn "[relaton-calconnect] (There was no match for #{year}, though "\
80
+ "there were matches found for #{missed_years.join(', ')}.)"
85
81
  end
86
82
  nil
87
83
  end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal:true
2
+
3
+ module RelatonCalconnect
4
+ #
5
+ # Relaton-calconnect data fetcher
6
+ #
7
+ class DataFetcher
8
+ # DOMAIN = "https://standards.calconnect.org/"
9
+ # SCHEME, HOST = DOMAIN.split(%r{:?/?/})
10
+ ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml"
11
+ # DATADIR = "data"
12
+ # DATAFILE = File.join DATADIR, "bibliography.yml"
13
+ # ETAGFILE = File.join DATADIR, "etag.txt"
14
+
15
+ def initialize(output, format)
16
+ @output = output
17
+ @etagfile = File.join output, "etag.txt"
18
+ @format = format
19
+ end
20
+
21
+ def self.fetch(output: "data", format: "yaml")
22
+ t1 = Time.now
23
+ puts "Started at: #{t1}"
24
+ FileUtils.mkdir_p output unless Dir.exist? output
25
+ new(output, format).fetch
26
+ t2 = Time.now
27
+ puts "Stopped at: #{t2}"
28
+ puts "Done in: #{(t2 - t1).round} sec."
29
+ end
30
+
31
+ #
32
+ # fetch data form server and save it to file.
33
+ #
34
+ def fetch
35
+ resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
36
+ # return if there aren't any changes since last fetching
37
+ return unless resp.status == 200
38
+
39
+ data = YAML.safe_load resp.body
40
+ all_success = true
41
+ data["root"]["items"].each do |doc|
42
+ success = parse_page doc
43
+ all_success &&= success
44
+ end
45
+ self.etag = resp[:etag] if all_success
46
+ end
47
+
48
+ private
49
+
50
+ #
51
+ # Parse document and write it to file
52
+ #
53
+ # @param [Hash] doc
54
+ #
55
+ def parse_page(doc)
56
+ bib = Scrapper.parse_page doc
57
+ # bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
58
+ write_doc doc["docid"]["id"], bib
59
+ true
60
+ rescue StandardError => e
61
+ warn "Document: #{doc['docid']['id']}"
62
+ warn e.message
63
+ puts e.backtrace
64
+ false
65
+ end
66
+
67
+ def write_doc(docid, bib)
68
+ content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
69
+ file = File.join @output, "#{docid.downcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
70
+ # if File.exist? file
71
+ # warn "#{file} exist"
72
+ # else
73
+ File.write file, content, encoding: "UTF-8"
74
+ # end
75
+ end
76
+
77
+ #
78
+ # Read ETag from file
79
+ #
80
+ # @return [String, NilClass]
81
+ def etag
82
+ @etag ||= File.exist?(@etagfile) ? File.read(@etagfile, encoding: "UTF-8") : nil
83
+ end
84
+
85
+ #
86
+ # Save ETag to file
87
+ #
88
+ # @param tag [String]
89
+ def etag=(e_tag)
90
+ File.write @etagfile, e_tag, encoding: "UTF-8"
91
+ end
92
+ end
93
+ end
@@ -6,7 +6,7 @@ module RelatonCalconnect
6
6
  return unless ret[:editorialgroup]
7
7
 
8
8
  technical_committee = array(ret[:editorialgroup]).map do |wg|
9
- TechnicalCommittee.new RelatonBib::WorkGroup.new(wg)
9
+ TechnicalCommittee.new RelatonBib::WorkGroup.new(**wg)
10
10
  end
11
11
  ret[:editorialgroup] = RelatonBib::EditorialGroup.new technical_committee
12
12
  end
@@ -23,7 +23,7 @@ module RelatonCalconnect
23
23
  private
24
24
 
25
25
  #
26
- # Fetch data form yaml
26
+ # Fetch data from yaml
27
27
  #
28
28
  # @param docid [String]
29
29
  def from_yaml(docid, **_opts)
@@ -44,7 +44,7 @@ module RelatonCalconnect
44
44
  end
45
45
 
46
46
  #
47
- # fetch data form server and save it to file.
47
+ # fetch data from server and save it to file.
48
48
  #
49
49
  def fetch_data
50
50
  resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
@@ -57,7 +57,7 @@ module RelatonCalconnect
57
57
  end
58
58
 
59
59
  #
60
- # Read ETag form file
60
+ # Read ETag from file
61
61
  #
62
62
  # @return [String, NilClass]
63
63
  def etag
@@ -4,11 +4,12 @@ module RelatonCalconnect
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_calconnect
9
9
  @prefix = "CC"
10
10
  @defaultprefix = %r{^CC\s}
11
11
  @idtype = "CC"
12
+ @datasets = %w[calconnect-org]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonCalconnect
19
20
  ::RelatonCalconnect::CcBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from a source
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -28,8 +41,7 @@ module RelatonCalconnect
28
41
  # @param hash [Hash]
29
42
  # @return [RelatonIsoBib::CcBibliographicItem]
30
43
  def hash_to_bib(hash)
31
- item_hash = ::RelatonCalconnect::HashConverter.hash_to_bib(hash)
32
- ::RelatonCalconnect::CcBibliographicItem.new item_hash
44
+ ::RelatonCalconnect::CcBibliographicItem.from_hash hash
33
45
  end
34
46
 
35
47
  # Returns hash of XML grammar
@@ -1,17 +1,24 @@
1
1
  module RelatonCalconnect
2
2
  module Scrapper
3
3
  DOMAIN = "https://standards.calconnect.org/".freeze
4
+ SCHEME, HOST = DOMAIN.split(%r{:?/?/})
4
5
  # DOMAIN = "http://127.0.0.1:4000/".freeze
5
6
 
6
7
  class << self
7
8
  # papam hit [Hash]
8
9
  # @return [RelatonOgc::OrcBibliographicItem]
9
10
  def parse_page(hit)
10
- link = hit["link"].detect { |l| l["type"] == "rxl" }
11
+ links = array(hit["link"])
12
+ link = links.detect { |l| l["type"] == "rxl" }
11
13
  if link
12
- bib_xml = fetch_bib_xml link["content"]
13
- XMLParser.from_xml bib_xml
14
+ bib = fetch_bib_xml link["content"]
15
+ update_links bib, links
16
+ # XMLParser.from_xml bib_xml
17
+ else
18
+ bib = RelatonCalconnect::CcBibliographicItem.from_hash doc_to_hash(hit)
14
19
  end
20
+ bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
21
+ bib
15
22
  end
16
23
 
17
24
  private
@@ -19,15 +26,25 @@ module RelatonCalconnect
19
26
  # @param url [String]
20
27
  # @return [String] XML
21
28
  def fetch_bib_xml(url)
29
+ # rxl = get_rxl url
30
+ # uri_rxl = rxl.at("uri[@type='rxl']")
31
+ # return rxl.to_xml unless uri_rxl
32
+
33
+ # uri_xml = rxl.xpath("//uri").to_xml
34
+ # rxl = get_rxl uri_rxl.text
35
+ # docid = rxl.at "//docidentifier"
36
+ # docid.add_previous_sibling uri_xml
37
+ # rxl.to_xml
22
38
  rxl = get_rxl url
23
39
  uri_rxl = rxl.at("uri[@type='rxl']")
24
- return rxl.to_xml unless uri_rxl
25
-
26
- uri_xml = rxl.xpath("//uri").to_xml
27
- rxl = get_rxl uri_rxl.text
28
- docid = rxl.at "//docidentifier"
29
- docid.add_previous_sibling uri_xml
30
- rxl.to_xml
40
+ if uri_rxl
41
+ uri_xml = rxl.xpath("//uri").to_xml
42
+ rxl = get_rxl uri_rxl.text
43
+ docid = rxl.at "//docidentifier"
44
+ docid.add_previous_sibling uri_xml
45
+ end
46
+ xml = rxl.to_xml.gsub!(%r{(</?)technical-committee(>)}, '\1committee\2')
47
+ RelatonCalconnect::XMLParser.from_xml xml
31
48
  end
32
49
 
33
50
  # @param path [String]
@@ -36,6 +53,44 @@ module RelatonCalconnect
36
53
  resp = Faraday.get DOMAIN + path
37
54
  Nokogiri::XML resp.body
38
55
  end
56
+
57
+ #
58
+ # Fix editorial group
59
+ #
60
+ # @param [Hash] doc
61
+ #
62
+ # @return [Hash]
63
+ #
64
+ def doc_to_hash(doc)
65
+ array(doc["editorialgroup"]).each do |eg|
66
+ tc = eg.delete("technical_committee")
67
+ eg.merge!(tc) if tc
68
+ end
69
+ doc
70
+ end
71
+
72
+ def update_links(bib, links)
73
+ links.each do |l|
74
+ tu = l.transform_keys(&:to_sym)
75
+ bib.link << RelatonBib::TypedUri.new(**tu) unless bib.url(l["type"])
76
+ end
77
+ bib
78
+ end
79
+
80
+ #
81
+ # Wrap into Array if not Array
82
+ #
83
+ # @param [Array, Hash, String, nil] content
84
+ #
85
+ # @return [Array<Hash, String>]
86
+ #
87
+ def array(content)
88
+ case content
89
+ when Array then content
90
+ when nil then []
91
+ else [content]
92
+ end
93
+ end
39
94
  end
40
95
  end
41
96
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonCalconnect
2
- VERSION = "1.7.0".freeze
2
+ VERSION = "1.9.1".freeze
3
3
  end
@@ -5,7 +5,7 @@ module RelatonCalconnect
5
5
  # @param item_hash [Hash]
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
7
  def bib_item(item_hash)
8
- CcBibliographicItem.new item_hash
8
+ CcBibliographicItem.new(**item_hash)
9
9
  end
10
10
 
11
11
  # @param ext [Nokogiri::XML::Element]
@@ -14,7 +14,7 @@ module RelatonCalconnect
14
14
  return unless ext && (eg = ext.at "editorialgroup")
15
15
 
16
16
  eg = eg.xpath("committee", "technical-committee").map do |tc|
17
- wg = RelatonBib::WorkGroup.new(content: tc.text, number: tc[:number]&.to_i,
17
+ wg = RelatonBib::WorkGroup.new(name: tc.text, number: tc[:number]&.to_i,
18
18
  type: tc[:type])
19
19
  TechnicalCommittee.new wg
20
20
  end
@@ -8,6 +8,7 @@ require "relaton_calconnect/technical_committee"
8
8
  require "relaton_calconnect/cc_bibliographic_item"
9
9
  require "relaton_calconnect/xml_parser"
10
10
  require "relaton_calconnect/hash_converter"
11
+ require "relaton_calconnect/data_fetcher"
11
12
 
12
13
  module RelatonCalconnect
13
14
  class Error < StandardError; end
@@ -24,18 +24,16 @@ Gem::Specification.new do |spec|
24
24
  spec.bindir = "exe"
25
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
26
  spec.require_paths = ["lib"]
27
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
27
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
28
28
 
29
- spec.add_development_dependency "debase"
30
29
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
31
- spec.add_development_dependency "rake", "~> 10.0"
30
+ spec.add_development_dependency "rake", "~> 13.0"
32
31
  spec.add_development_dependency "rspec", "~> 3.0"
33
- spec.add_development_dependency "ruby-debug-ide"
34
32
  spec.add_development_dependency "ruby-jing"
35
33
  spec.add_development_dependency "simplecov"
36
34
  spec.add_development_dependency "vcr"
37
35
  spec.add_development_dependency "webmock"
38
36
 
39
37
  spec.add_dependency "faraday"
40
- spec.add_dependency "relaton-bib", "~> 1.7.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.9.0"
41
39
  end