relaton-calconnect 1.7.2 → 1.9.2

Sign up to get free protection for your applications and to get access to all the features.
data/grammars/reqt.rng CHANGED
@@ -30,15 +30,34 @@
30
30
  <data type="boolean"/>
31
31
  </attribute>
32
32
  </optional>
33
+ <optional>
34
+ <attribute name="number"/>
35
+ </optional>
33
36
  <optional>
34
37
  <attribute name="subsequence"/>
35
38
  </optional>
39
+ <optional>
40
+ <attribute name="keep-with-next">
41
+ <data type="boolean"/>
42
+ </attribute>
43
+ </optional>
44
+ <optional>
45
+ <attribute name="keep-lines-together">
46
+ <data type="boolean"/>
47
+ </attribute>
48
+ </optional>
36
49
  <attribute name="id">
37
50
  <data type="ID"/>
38
51
  </attribute>
39
52
  <optional>
40
53
  <attribute name="filename"/>
41
54
  </optional>
55
+ <optional>
56
+ <attribute name="model"/>
57
+ </optional>
58
+ <optional>
59
+ <attribute name="type"/>
60
+ </optional>
42
61
  <optional>
43
62
  <ref name="reqtitle"/>
44
63
  </optional>
@@ -48,9 +67,9 @@
48
67
  <optional>
49
68
  <ref name="subject"/>
50
69
  </optional>
51
- <optional>
70
+ <zeroOrMore>
52
71
  <ref name="reqinherit"/>
53
- </optional>
72
+ </zeroOrMore>
54
73
  <zeroOrMore>
55
74
  <ref name="classification"/>
56
75
  </zeroOrMore>
@@ -135,6 +154,16 @@
135
154
  <data type="boolean"/>
136
155
  </attribute>
137
156
  </optional>
157
+ <optional>
158
+ <attribute name="keep-with-next">
159
+ <data type="boolean"/>
160
+ </attribute>
161
+ </optional>
162
+ <optional>
163
+ <attribute name="keep-lines-together">
164
+ <data type="boolean"/>
165
+ </attribute>
166
+ </optional>
138
167
  <oneOrMore>
139
168
  <ref name="BasicBlock"/>
140
169
  </oneOrMore>
@@ -9,7 +9,7 @@ module RelatonCalconnect
9
9
  # @return [RelatonIsoBib::CcBibliographicItem]
10
10
  def self.from_hash(hash)
11
11
  item_hash = ::RelatonCalconnect::HashConverter.hash_to_bib(hash)
12
- new **item_hash
12
+ new(**item_hash)
13
13
  end
14
14
  end
15
15
  end
@@ -3,7 +3,7 @@ module RelatonCalconnect
3
3
  class << self
4
4
  # @param text [String]
5
5
  # @return [RelatonCalconnect::HitCollection]
6
- def search(text, year = nil, opts = {})
6
+ def search(text, year = nil, _opts = {})
7
7
  HitCollection.new text, year
8
8
  rescue Faraday::ConnectionFailed
9
9
  raise RelatonBib::RequestError, "Could not access https://standards.calconnect.org"
@@ -18,11 +18,11 @@ module RelatonCalconnect
18
18
  # @option opts [TrueClass, FalseClass] :bibdata
19
19
  #
20
20
  # @return [RelatonCalconnect::CcBibliographicItem]
21
- def get(ref, year = nil, opts = {})
21
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
22
22
  code = ref
23
23
 
24
24
  if year.nil?
25
- /^(?<code1>[^\s]+(\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ ref
25
+ /^(?<code1>[^\s]+(?:\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ ref
26
26
  unless code1.nil?
27
27
  code = code1
28
28
  year = year1
@@ -30,7 +30,7 @@ module RelatonCalconnect
30
30
  end
31
31
 
32
32
  warn "[relaton-calconnect] (\"#{ref}\") fetching..."
33
- result = bib_search_filter(code, year, opts) || (return nil)
33
+ result = search(code, year, opts) || (return nil)
34
34
  ret = bib_results_filter(result, year)
35
35
  if ret[:ret]
36
36
  warn "[relaton-calconnect] (\"#{ref}\") found #{ret[:ret].docidentifier.first.id}"
@@ -42,10 +42,6 @@ module RelatonCalconnect
42
42
 
43
43
  private
44
44
 
45
- def bib_search_filter(code, year, opts)
46
- search(code, year, opts)
47
- end
48
-
49
45
  # Sort through the results from RelatonNist, fetching them three at a time,
50
46
  # and return the first result that matches the code,
51
47
  # matches the year (if provided), and which # has a title (amendments do not).
@@ -80,8 +76,8 @@ module RelatonCalconnect
80
76
  warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
81
77
  "The code must be exactly like it is on the standards website."
82
78
  unless missed_years.empty?
83
- warn "[relaton-calconnect] (There was no match for #{year}, though there were matches "\
84
- "found for #{missed_years.join(', ')}.)"
79
+ warn "[relaton-calconnect] (There was no match for #{year}, though "\
80
+ "there were matches found for #{missed_years.join(', ')}.)"
85
81
  end
86
82
  nil
87
83
  end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal:true
2
+
3
+ module RelatonCalconnect
4
+ #
5
+ # Relaton-calconnect data fetcher
6
+ #
7
+ class DataFetcher
8
+ # DOMAIN = "https://standards.calconnect.org/"
9
+ # SCHEME, HOST = DOMAIN.split(%r{:?/?/})
10
+ ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml"
11
+ # DATADIR = "data"
12
+ # DATAFILE = File.join DATADIR, "bibliography.yml"
13
+ # ETAGFILE = File.join DATADIR, "etag.txt"
14
+
15
+ def initialize(output, format)
16
+ @output = output
17
+ @etagfile = File.join output, "etag.txt"
18
+ @format = format
19
+ end
20
+
21
+ def self.fetch(output: "data", format: "yaml")
22
+ t1 = Time.now
23
+ puts "Started at: #{t1}"
24
+ FileUtils.mkdir_p output unless Dir.exist? output
25
+ new(output, format).fetch
26
+ t2 = Time.now
27
+ puts "Stopped at: #{t2}"
28
+ puts "Done in: #{(t2 - t1).round} sec."
29
+ end
30
+
31
+ #
32
+ # fetch data form server and save it to file.
33
+ #
34
+ def fetch
35
+ resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
36
+ # return if there aren't any changes since last fetching
37
+ return unless resp.status == 200
38
+
39
+ data = YAML.safe_load resp.body
40
+ all_success = true
41
+ data["root"]["items"].each do |doc|
42
+ success = parse_page doc
43
+ all_success &&= success
44
+ end
45
+ self.etag = resp[:etag] if all_success
46
+ end
47
+
48
+ private
49
+
50
+ #
51
+ # Parse document and write it to file
52
+ #
53
+ # @param [Hash] doc
54
+ #
55
+ def parse_page(doc)
56
+ bib = Scrapper.parse_page doc
57
+ # bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
58
+ write_doc doc["docid"]["id"], bib
59
+ true
60
+ rescue StandardError => e
61
+ warn "Document: #{doc['docid']['id']}"
62
+ warn e.message
63
+ puts e.backtrace
64
+ false
65
+ end
66
+
67
+ def write_doc(docid, bib)
68
+ content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
69
+ file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
70
+ # if File.exist? file
71
+ # warn "#{file} exist"
72
+ # else
73
+ File.write file, content, encoding: "UTF-8"
74
+ # end
75
+ end
76
+
77
+ #
78
+ # Read ETag from file
79
+ #
80
+ # @return [String, NilClass]
81
+ def etag
82
+ @etag ||= File.exist?(@etagfile) ? File.read(@etagfile, encoding: "UTF-8") : nil
83
+ end
84
+
85
+ #
86
+ # Save ETag to file
87
+ #
88
+ # @param tag [String]
89
+ def etag=(e_tag)
90
+ File.write @etagfile, e_tag, encoding: "UTF-8"
91
+ end
92
+ end
93
+ end
@@ -23,7 +23,7 @@ module RelatonCalconnect
23
23
  private
24
24
 
25
25
  #
26
- # Fetch data form yaml
26
+ # Fetch data from yaml
27
27
  #
28
28
  # @param docid [String]
29
29
  def from_yaml(docid, **_opts)
@@ -44,7 +44,7 @@ module RelatonCalconnect
44
44
  end
45
45
 
46
46
  #
47
- # fetch data form server and save it to file.
47
+ # fetch data from server and save it to file.
48
48
  #
49
49
  def fetch_data
50
50
  resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
@@ -57,7 +57,7 @@ module RelatonCalconnect
57
57
  end
58
58
 
59
59
  #
60
- # Read ETag form file
60
+ # Read ETag from file
61
61
  #
62
62
  # @return [String, NilClass]
63
63
  def etag
@@ -4,11 +4,12 @@ module RelatonCalconnect
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_calconnect
9
9
  @prefix = "CC"
10
10
  @defaultprefix = %r{^CC\s}
11
11
  @idtype = "CC"
12
+ @datasets = %w[calconnect-org]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonCalconnect
19
20
  ::RelatonCalconnect::CcBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from a source
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -1,17 +1,24 @@
1
1
  module RelatonCalconnect
2
2
  module Scrapper
3
3
  DOMAIN = "https://standards.calconnect.org/".freeze
4
+ SCHEME, HOST = DOMAIN.split(%r{:?/?/})
4
5
  # DOMAIN = "http://127.0.0.1:4000/".freeze
5
6
 
6
7
  class << self
7
8
  # papam hit [Hash]
8
9
  # @return [RelatonOgc::OrcBibliographicItem]
9
10
  def parse_page(hit)
10
- link = hit["link"].detect { |l| l["type"] == "rxl" }
11
+ links = array(hit["link"])
12
+ link = links.detect { |l| l["type"] == "rxl" }
11
13
  if link
12
- bib_xml = fetch_bib_xml link["content"]
13
- XMLParser.from_xml bib_xml
14
+ bib = fetch_bib_xml link["content"]
15
+ update_links bib, links
16
+ # XMLParser.from_xml bib_xml
17
+ else
18
+ bib = RelatonCalconnect::CcBibliographicItem.from_hash doc_to_hash(hit)
14
19
  end
20
+ bib.link.each { |l| l.content.merge!(scheme: SCHEME, host: HOST) unless l.content.host }
21
+ bib
15
22
  end
16
23
 
17
24
  private
@@ -19,15 +26,25 @@ module RelatonCalconnect
19
26
  # @param url [String]
20
27
  # @return [String] XML
21
28
  def fetch_bib_xml(url)
29
+ # rxl = get_rxl url
30
+ # uri_rxl = rxl.at("uri[@type='rxl']")
31
+ # return rxl.to_xml unless uri_rxl
32
+
33
+ # uri_xml = rxl.xpath("//uri").to_xml
34
+ # rxl = get_rxl uri_rxl.text
35
+ # docid = rxl.at "//docidentifier"
36
+ # docid.add_previous_sibling uri_xml
37
+ # rxl.to_xml
22
38
  rxl = get_rxl url
23
39
  uri_rxl = rxl.at("uri[@type='rxl']")
24
- return rxl.to_xml unless uri_rxl
25
-
26
- uri_xml = rxl.xpath("//uri").to_xml
27
- rxl = get_rxl uri_rxl.text
28
- docid = rxl.at "//docidentifier"
29
- docid.add_previous_sibling uri_xml
30
- rxl.to_xml
40
+ if uri_rxl
41
+ uri_xml = rxl.xpath("//uri").to_xml
42
+ rxl = get_rxl uri_rxl.text
43
+ docid = rxl.at "//docidentifier"
44
+ docid.add_previous_sibling uri_xml
45
+ end
46
+ xml = rxl.to_xml.gsub!(%r{(</?)technical-committee(>)}, '\1committee\2')
47
+ RelatonCalconnect::XMLParser.from_xml xml
31
48
  end
32
49
 
33
50
  # @param path [String]
@@ -36,6 +53,44 @@ module RelatonCalconnect
36
53
  resp = Faraday.get DOMAIN + path
37
54
  Nokogiri::XML resp.body
38
55
  end
56
+
57
+ #
58
+ # Fix editorial group
59
+ #
60
+ # @param [Hash] doc
61
+ #
62
+ # @return [Hash]
63
+ #
64
+ def doc_to_hash(doc)
65
+ array(doc["editorialgroup"]).each do |eg|
66
+ tc = eg.delete("technical_committee")
67
+ eg.merge!(tc) if tc
68
+ end
69
+ doc
70
+ end
71
+
72
+ def update_links(bib, links)
73
+ links.each do |l|
74
+ tu = l.transform_keys(&:to_sym)
75
+ bib.link << RelatonBib::TypedUri.new(**tu) unless bib.url(l["type"])
76
+ end
77
+ bib
78
+ end
79
+
80
+ #
81
+ # Wrap into Array if not Array
82
+ #
83
+ # @param [Array, Hash, String, nil] content
84
+ #
85
+ # @return [Array<Hash, String>]
86
+ #
87
+ def array(content)
88
+ case content
89
+ when Array then content
90
+ when nil then []
91
+ else [content]
92
+ end
93
+ end
39
94
  end
40
95
  end
41
96
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonCalconnect
2
- VERSION = "1.7.2".freeze
2
+ VERSION = "1.9.2".freeze
3
3
  end
@@ -5,7 +5,7 @@ module RelatonCalconnect
5
5
  # @param item_hash [Hash]
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
7
  def bib_item(item_hash)
8
- CcBibliographicItem.new **item_hash
8
+ CcBibliographicItem.new(**item_hash)
9
9
  end
10
10
 
11
11
  # @param ext [Nokogiri::XML::Element]
@@ -14,7 +14,7 @@ module RelatonCalconnect
14
14
  return unless ext && (eg = ext.at "editorialgroup")
15
15
 
16
16
  eg = eg.xpath("committee", "technical-committee").map do |tc|
17
- wg = RelatonBib::WorkGroup.new(content: tc.text, number: tc[:number]&.to_i,
17
+ wg = RelatonBib::WorkGroup.new(name: tc.text, number: tc[:number]&.to_i,
18
18
  type: tc[:type])
19
19
  TechnicalCommittee.new wg
20
20
  end
@@ -8,6 +8,7 @@ require "relaton_calconnect/technical_committee"
8
8
  require "relaton_calconnect/cc_bibliographic_item"
9
9
  require "relaton_calconnect/xml_parser"
10
10
  require "relaton_calconnect/hash_converter"
11
+ require "relaton_calconnect/data_fetcher"
11
12
 
12
13
  module RelatonCalconnect
13
14
  class Error < StandardError; end
@@ -24,18 +24,16 @@ Gem::Specification.new do |spec|
24
24
  spec.bindir = "exe"
25
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
26
  spec.require_paths = ["lib"]
27
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
27
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
28
28
 
29
- # spec.add_development_dependency "debase"
30
29
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
31
- spec.add_development_dependency "rake", "~> 10.0"
30
+ spec.add_development_dependency "rake", "~> 13.0"
32
31
  spec.add_development_dependency "rspec", "~> 3.0"
33
- # spec.add_development_dependency "ruby-debug-ide"
34
32
  spec.add_development_dependency "ruby-jing"
35
33
  spec.add_development_dependency "simplecov"
36
34
  spec.add_development_dependency "vcr"
37
35
  spec.add_development_dependency "webmock"
38
36
 
39
37
  spec.add_dependency "faraday"
40
- spec.add_dependency "relaton-bib", "~> 1.7.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.9.0"
41
39
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-calconnect
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.2
4
+ version: 1.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-20 00:00:00.000000000 Z
11
+ date: 2021-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '13.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '13.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: 1.7.0
131
+ version: 1.9.0
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: 1.7.0
138
+ version: 1.9.0
139
139
  description: 'RelatonIso: retrieve CC Standards for bibliographic use using the IsoBibliographicItem
140
140
  model'
141
141
  email:
@@ -163,6 +163,7 @@ files:
163
163
  - lib/relaton_calconnect.rb
164
164
  - lib/relaton_calconnect/cc_bibliographic_item.rb
165
165
  - lib/relaton_calconnect/cc_bibliography.rb
166
+ - lib/relaton_calconnect/data_fetcher.rb
166
167
  - lib/relaton_calconnect/hash_converter.rb
167
168
  - lib/relaton_calconnect/hit.rb
168
169
  - lib/relaton_calconnect/hit_collection.rb
@@ -184,14 +185,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
184
185
  requirements:
185
186
  - - ">="
186
187
  - !ruby/object:Gem::Version
187
- version: 2.4.0
188
+ version: 2.5.0
188
189
  required_rubygems_version: !ruby/object:Gem::Requirement
189
190
  requirements:
190
191
  - - ">="
191
192
  - !ruby/object:Gem::Version
192
193
  version: '0'
193
194
  requirements: []
194
- rubygems_version: 3.0.6
195
+ rubygems_version: 3.2.3
195
196
  signing_key:
196
197
  specification_version: 4
197
198
  summary: 'RelatonIso: retrieve CC Standards for bibliographic use using the IsoBibliographicItem