relaton-calconnect 1.14.1 → 1.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e56768bb659378f291f2bd2ec6f57d46ff0db713c6465150b57e3a3ba85aeff0
4
- data.tar.gz: db3c391bd354c06cbb774e2f13f0f746ec615bee53c139876f70e312816fce19
3
+ metadata.gz: 181a7e1a48eb95ff2558f0895621f59fb8a52f167c0ccac81433f5346e52e38a
4
+ data.tar.gz: a24dae24e0803340a07ddf3882a0343bb8ba52f05bb2d923df016d33dfd4e7cb
5
5
  SHA512:
6
- metadata.gz: bb8b6b662b644931211db146edb0e4470d068f9c94987f7594793656655f450902b3e601217f9ecf24975cf7a6434150c5c67c668c18a0ecc6464653d07edbe1
7
- data.tar.gz: a373c880ef6c73092333b85e7f686d765d596ef8bf6be2516124393d63fc179b64fa0005f69a747ccc139bde5117c5d8f2b0937a381cf561e2664894ee9b021a
6
+ metadata.gz: aab8cae2f19844f9a7d8d999b568d16f1a22344d88d3df81f924ef0e7c225f58d0c05616b6f6b73e3a6d3b8b116bc3df7c33e5ca3f3cd187391c1b7d4d4163fc
7
+ data.tar.gz: 12b98b7c9ae142bebd87bf4201889c471f28b5c477dc364936e0d94d3c6a3a0c1dd4a8a01cce9367b44183a2fcde70bdde43a50d2f58da04433318ab2f58939a
data/Gemfile CHANGED
@@ -3,6 +3,9 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in relaton_calconnect.gemspec
4
4
  gemspec
5
5
 
6
+ gem "equivalent-xml", "~> 0.6"
7
+ gem "rake", "~> 13.0"
8
+ gem "rspec", "~> 3.0"
6
9
  gem "ruby-jing"
7
10
  gem "simplecov"
8
11
  gem "vcr"
data/README.adoc CHANGED
@@ -50,7 +50,7 @@ item.to_xml
50
50
  ...
51
51
  </bibitem>"
52
52
  ----
53
- With argument `bibdata: true` it ouputs XML wrapped by `bibdata` element and adds flavour `ext` element.
53
+ With argument `bibdata: true` it outputs XML wrapped by `bibdata` element and adds flavour `ext` element.
54
54
  [source,ruby]
55
55
  ----
56
56
  item.to_xml bibdata: true
@@ -87,7 +87,7 @@ item.link
87
87
  #<RelatonBib::TypedUri:0x00007ff2d4b23a08 @content=#<Addressable::URI:0x6b8 URI:https://standards.calconnect.org/csd/cc-10005.rxl>, @type="rxl">]
88
88
  ----
89
89
 
90
- === Get code, and year
90
+ === Get document by code and year
91
91
  [source,ruby]
92
92
  ----
93
93
  RelatonCalconnect::CcBibliography.get "CC/DIR 10005", "2019", {}
@@ -119,13 +119,13 @@ RelatonCalconnect::CcBibliographicItem.from_hash hash
119
119
 
120
120
  === Fetch data
121
121
 
122
- This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
122
+ This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of the data sources.
123
123
 
124
- The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
124
+ The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
125
125
  Arguments:
126
126
 
127
127
  - `output` - folder to save documents (default './data').
128
- - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
128
+ - `format` - the format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
129
129
 
130
130
  [source,ruby]
131
131
  ----
@@ -74,11 +74,11 @@ module RelatonCalconnect
74
74
  # @param missed_years [Array<Strig>]
75
75
  def fetch_ref_err(code, year, missed_years)
76
76
  id = year ? "#{code} year #{year}" : code
77
- warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
78
- "The code must be exactly like it is on the standards website."
77
+ warn "[relaton-calconnect] WARNING: no match found online for #{id}. " \
78
+ "The code must be exactly like it is on the standards website."
79
79
  unless missed_years.empty?
80
- warn "[relaton-calconnect] (There was no match for #{year}, though "\
81
- "there were matches found for #{missed_years.join(', ')}.)"
80
+ warn "[relaton-calconnect] (There was no match for #{year}, though " \
81
+ "there were matches found for #{missed_years.join(', ')}.)"
82
82
  end
83
83
  nil
84
84
  end
@@ -16,8 +16,19 @@ module RelatonCalconnect
16
16
  @output = output
17
17
  @etagfile = File.join output, "etag.txt"
18
18
  @format = format
19
+ @ext = format.sub "bibxml", "xml"
20
+ @files = []
21
+ @index = Relaton::Index.find_or_create :CC, file: "index-v1.yaml"
19
22
  end
20
23
 
24
+ #
25
+ # Fetch all the documents from a source
26
+ #
27
+ # @param [String] output directory to output documents, default: "data"
28
+ # @param [String] format output format, default: "yaml"
29
+ #
30
+ # @return [void]
31
+ #
21
32
  def self.fetch(output: "data", format: "yaml")
22
33
  t1 = Time.now
23
34
  puts "Started at: #{t1}"
@@ -31,18 +42,16 @@ module RelatonCalconnect
31
42
  #
32
43
  # fetch data form server and save it to file.
33
44
  #
34
- def fetch
45
+ def fetch # rubocop:disable Metrics/AbcSize
35
46
  resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
36
47
  # return if there aren't any changes since last fetching
37
48
  return unless resp.status == 200
38
49
 
39
50
  data = YAML.safe_load resp.body
40
51
  all_success = true
41
- data["root"]["items"].each do |doc|
42
- success = parse_page doc
43
- all_success &&= success
44
- end
52
+ data["root"]["items"].each { |doc| all_success &&= parse_page doc }
45
53
  self.etag = resp[:etag] if all_success
54
+ @index.save
46
55
  end
47
56
 
48
57
  private
@@ -64,14 +73,20 @@ module RelatonCalconnect
64
73
  false
65
74
  end
66
75
 
67
- def write_doc(docid, bib)
68
- content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
69
- file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
70
- # if File.exist? file
71
- # warn "#{file} exist"
72
- # else
76
+ def write_doc(docid, bib) # rubocop:disable Metrics/MethodLength
77
+ content = case @format
78
+ when "xml" then bib.to_xml(bibdata: true)
79
+ when "bibxml" then bib.to_bibxml
80
+ else bib.to_hash.to_yaml
81
+ end
82
+ file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@ext}"
83
+ if @files.include? file
84
+ warn "#{file} exist"
85
+ else
86
+ @files << file
87
+ end
88
+ @index.add_or_update docid, file
73
89
  File.write file, content, encoding: "UTF-8"
74
- # end
75
90
  end
76
91
 
77
92
  #
@@ -3,7 +3,12 @@ module RelatonCalconnect
3
3
  # Parse page.
4
4
  # @return [RelatonCalconnect::CcBliographicItem]
5
5
  def fetch
6
- @fetch ||= Scrapper.parse_page @hit
6
+ # @fetch ||= Scrapper.parse_page @hit
7
+ @fetch ||= begin
8
+ url = "#{HitCollection::GHURL}#{@hit[:file]}"
9
+ resp = Faraday.get url
10
+ CcBibliographicItem.from_hash YAML.safe_load(resp.body)
11
+ end
7
12
  end
8
13
  end
9
14
  end
@@ -4,23 +4,29 @@ require "fileutils"
4
4
 
5
5
  module RelatonCalconnect
6
6
  class HitCollection < RelatonBib::HitCollection
7
- ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
7
+ # ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
8
8
  # ENDPOINT = "http://127.0.0.1:4000/relaton/index.yaml".freeze
9
- DATADIR = File.expand_path ".relaton/calconnect", Dir.home
10
- DATAFILE = File.expand_path "bibliography.yml", DATADIR
11
- ETAGFILE = File.expand_path "etag.txt", DATADIR
9
+ # DATADIR = File.expand_path ".relaton/calconnect", Dir.home
10
+ # DATAFILE = File.expand_path "bibliography.yml", DATADIR
11
+ # ETAGFILE = File.expand_path "etag.txt", DATADIR
12
+ GHURL = "https://raw.githubusercontent.com/relaton/relaton-data-calconnect/main/".freeze
13
+ INDEX_FILE = "index-v1.yaml".freeze
12
14
 
13
15
  # @param ref [Strig]
14
16
  # @param year [String]
15
17
  # @param opts [Hash]
16
18
  def initialize(ref, year = nil)
17
19
  super
18
- @array = from_yaml(ref).sort_by do |hit|
19
- hit.hit["revdate"] ? Date.parse(hit.hit["revdate"]) : Date.new
20
- end.reverse
20
+ # @array = from_yaml(ref).sort_by do |hit|
21
+ # hit.hit["revdate"] ? Date.parse(hit.hit["revdate"]) : Date.new
22
+ # end.reverse
23
+ index = Relaton::Index.find_or_create :CC, url: "#{GHURL}index-v1.zip", file: INDEX_FILE
24
+ @array = index.search(ref).map do |row|
25
+ Hit.new(row, self)
26
+ end
21
27
  end
22
28
 
23
- private
29
+ # private
24
30
 
25
31
  #
26
32
  # Fetch data from yaml
@@ -29,52 +35,52 @@ module RelatonCalconnect
29
35
  #
30
36
  # @return [Array<RelatonBib::Hit>]
31
37
  #
32
- def from_yaml(docid, **_opts)
33
- data["root"]["items"].select do |doc|
34
- doc["docid"] && doc["docid"]["id"].include?(docid)
35
- end.map { |h| Hit.new(h, self) }
36
- end
38
+ # def from_yaml(docid, **_opts)
39
+ # data["root"]["items"].select do |doc|
40
+ # doc["docid"] && doc["docid"]["id"].include?(docid)
41
+ # end.map { |h| Hit.new(h, self) }
42
+ # end
37
43
 
38
44
  #
39
45
  # Fetches YAML data
40
46
  #
41
47
  # @return [Hash]
42
- def data
43
- FileUtils.mkdir_p DATADIR
44
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
45
- fetch_data if !ctime || ctime.to_date < Date.today
46
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
47
- end
48
+ # def data
49
+ # FileUtils.mkdir_p DATADIR
50
+ # ctime = File.ctime DATAFILE if File.exist? DATAFILE
51
+ # fetch_data if !ctime || ctime.to_date < Date.today
52
+ # @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
53
+ # end
48
54
 
49
55
  #
50
56
  # fetch data from server and save it to file.
51
57
  #
52
- def fetch_data
53
- resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
54
- # return if there aren't any changes since last fetching
55
- return unless resp.status == 200
58
+ # def fetch_data
59
+ # resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
60
+ # # return if there aren't any changes since last fetching
61
+ # return unless resp.status == 200
56
62
 
57
- self.etag = resp[:etag]
58
- @data = YAML.safe_load resp.body
59
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
60
- end
63
+ # self.etag = resp[:etag]
64
+ # @data = YAML.safe_load resp.body
65
+ # File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
66
+ # end
61
67
 
62
68
  #
63
69
  # Read ETag from file
64
70
  #
65
71
  # @return [String, NilClass]
66
- def etag
67
- @etag ||= if File.exist? ETAGFILE
68
- File.read ETAGFILE, encoding: "UTF-8"
69
- end
70
- end
72
+ # def etag
73
+ # @etag ||= if File.exist? ETAGFILE
74
+ # File.read ETAGFILE, encoding: "UTF-8"
75
+ # end
76
+ # end
71
77
 
72
78
  #
73
79
  # Save ETag to file
74
80
  #
75
81
  # @param tag [String]
76
- def etag=(e_tag)
77
- File.write ETAGFILE, e_tag, encoding: "UTF-8"
78
- end
82
+ # def etag=(e_tag)
83
+ # File.write ETAGFILE, e_tag, encoding: "UTF-8"
84
+ # end
79
85
  end
80
86
  end
@@ -49,5 +49,12 @@ module RelatonCalconnect
49
49
  def grammar_hash
50
50
  @grammar_hash ||= ::RelatonCalconnect.grammar_hash
51
51
  end
52
+
53
+ #
54
+ # Remove index file
55
+ #
56
+ def remove_index_file
57
+ Relaton::Index.find_or_create(:CC, url: true, file: HitCollection::INDEX_FILE).remove_file
58
+ end
52
59
  end
53
60
  end
@@ -47,7 +47,7 @@ module RelatonCalconnect
47
47
  docid = rxl.at "//docidentifier"
48
48
  docid.add_previous_sibling uri_xml
49
49
  end
50
- xml = rxl.to_xml.gsub!(%r{(</?)technical-committee(>)}, '\1committee\2')
50
+ xml = rxl.to_xml.gsub(%r{(</?)technical-committee(>)}, '\1committee\2')
51
51
  .gsub(%r{type="(?:csd|CC)"(?=>)}i, '\0 primary="true"')
52
52
  RelatonCalconnect::XMLParser.from_xml xml
53
53
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonCalconnect
2
- VERSION = "1.14.1".freeze
2
+ VERSION = "1.14.2".freeze
3
3
  end
@@ -1,3 +1,4 @@
1
+ require "relaton/index"
1
2
  require "relaton_bib"
2
3
  require "relaton_calconnect/version"
3
4
  require "relaton_calconnect/cc_bibliography"
@@ -26,10 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ["lib"]
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
28
28
 
29
- spec.add_development_dependency "equivalent-xml", "~> 0.6"
30
- spec.add_development_dependency "rake", "~> 13.0"
31
- spec.add_development_dependency "rspec", "~> 3.0"
32
-
33
29
  spec.add_dependency "faraday", "~> 2.7.0"
34
30
  spec.add_dependency "relaton-bib", "~> 1.14.0"
31
+ spec.add_dependency "relaton-index", "~> 0.2.0"
35
32
  end
metadata CHANGED
@@ -1,85 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-calconnect
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.1
4
+ version: 1.14.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: equivalent-xml
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0.6'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.6'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '13.0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '13.0'
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
14
+ name: faraday
43
15
  requirement: !ruby/object:Gem::Requirement
44
16
  requirements:
45
17
  - - "~>"
46
18
  - !ruby/object:Gem::Version
47
- version: '3.0'
48
- type: :development
19
+ version: 2.7.0
20
+ type: :runtime
49
21
  prerelease: false
50
22
  version_requirements: !ruby/object:Gem::Requirement
51
23
  requirements:
52
24
  - - "~>"
53
25
  - !ruby/object:Gem::Version
54
- version: '3.0'
26
+ version: 2.7.0
55
27
  - !ruby/object:Gem::Dependency
56
- name: faraday
28
+ name: relaton-bib
57
29
  requirement: !ruby/object:Gem::Requirement
58
30
  requirements:
59
31
  - - "~>"
60
32
  - !ruby/object:Gem::Version
61
- version: 2.7.0
33
+ version: 1.14.0
62
34
  type: :runtime
63
35
  prerelease: false
64
36
  version_requirements: !ruby/object:Gem::Requirement
65
37
  requirements:
66
38
  - - "~>"
67
39
  - !ruby/object:Gem::Version
68
- version: 2.7.0
40
+ version: 1.14.0
69
41
  - !ruby/object:Gem::Dependency
70
- name: relaton-bib
42
+ name: relaton-index
71
43
  requirement: !ruby/object:Gem::Requirement
72
44
  requirements:
73
45
  - - "~>"
74
46
  - !ruby/object:Gem::Version
75
- version: 1.14.0
47
+ version: 0.2.0
76
48
  type: :runtime
77
49
  prerelease: false
78
50
  version_requirements: !ruby/object:Gem::Requirement
79
51
  requirements:
80
52
  - - "~>"
81
53
  - !ruby/object:Gem::Version
82
- version: 1.14.0
54
+ version: 0.2.0
83
55
  description: 'RelatonIso: retrieve CC Standards for bibliographic use using the IsoBibliographicItem
84
56
  model'
85
57
  email: