relaton-calconnect 1.14.1 → 1.14.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e56768bb659378f291f2bd2ec6f57d46ff0db713c6465150b57e3a3ba85aeff0
4
- data.tar.gz: db3c391bd354c06cbb774e2f13f0f746ec615bee53c139876f70e312816fce19
3
+ metadata.gz: 181a7e1a48eb95ff2558f0895621f59fb8a52f167c0ccac81433f5346e52e38a
4
+ data.tar.gz: a24dae24e0803340a07ddf3882a0343bb8ba52f05bb2d923df016d33dfd4e7cb
5
5
  SHA512:
6
- metadata.gz: bb8b6b662b644931211db146edb0e4470d068f9c94987f7594793656655f450902b3e601217f9ecf24975cf7a6434150c5c67c668c18a0ecc6464653d07edbe1
7
- data.tar.gz: a373c880ef6c73092333b85e7f686d765d596ef8bf6be2516124393d63fc179b64fa0005f69a747ccc139bde5117c5d8f2b0937a381cf561e2664894ee9b021a
6
+ metadata.gz: aab8cae2f19844f9a7d8d999b568d16f1a22344d88d3df81f924ef0e7c225f58d0c05616b6f6b73e3a6d3b8b116bc3df7c33e5ca3f3cd187391c1b7d4d4163fc
7
+ data.tar.gz: 12b98b7c9ae142bebd87bf4201889c471f28b5c477dc364936e0d94d3c6a3a0c1dd4a8a01cce9367b44183a2fcde70bdde43a50d2f58da04433318ab2f58939a
data/Gemfile CHANGED
@@ -3,6 +3,9 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in relaton_calconnect.gemspec
4
4
  gemspec
5
5
 
6
+ gem "equivalent-xml", "~> 0.6"
7
+ gem "rake", "~> 13.0"
8
+ gem "rspec", "~> 3.0"
6
9
  gem "ruby-jing"
7
10
  gem "simplecov"
8
11
  gem "vcr"
data/README.adoc CHANGED
@@ -50,7 +50,7 @@ item.to_xml
50
50
  ...
51
51
  </bibitem>"
52
52
  ----
53
- With argument `bibdata: true` it ouputs XML wrapped by `bibdata` element and adds flavour `ext` element.
53
+ With argument `bibdata: true` it outputs XML wrapped by `bibdata` element and adds flavour `ext` element.
54
54
  [source,ruby]
55
55
  ----
56
56
  item.to_xml bibdata: true
@@ -87,7 +87,7 @@ item.link
87
87
  #<RelatonBib::TypedUri:0x00007ff2d4b23a08 @content=#<Addressable::URI:0x6b8 URI:https://standards.calconnect.org/csd/cc-10005.rxl>, @type="rxl">]
88
88
  ----
89
89
 
90
- === Get code, and year
90
+ === Get document by code and year
91
91
  [source,ruby]
92
92
  ----
93
93
  RelatonCalconnect::CcBibliography.get "CC/DIR 10005", "2019", {}
@@ -119,13 +119,13 @@ RelatonCalconnect::CcBibliographicItem.from_hash hash
119
119
 
120
120
  === Fetch data
121
121
 
122
- This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
122
+ This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of the data sources.
123
123
 
124
- The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
124
+ The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
125
125
  Arguments:
126
126
 
127
127
  - `output` - folder to save documents (default './data').
128
- - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
128
+ - `format` - the format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
129
129
 
130
130
  [source,ruby]
131
131
  ----
@@ -74,11 +74,11 @@ module RelatonCalconnect
74
74
  # @param missed_years [Array<Strig>]
75
75
  def fetch_ref_err(code, year, missed_years)
76
76
  id = year ? "#{code} year #{year}" : code
77
- warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
78
- "The code must be exactly like it is on the standards website."
77
+ warn "[relaton-calconnect] WARNING: no match found online for #{id}. " \
78
+ "The code must be exactly like it is on the standards website."
79
79
  unless missed_years.empty?
80
- warn "[relaton-calconnect] (There was no match for #{year}, though "\
81
- "there were matches found for #{missed_years.join(', ')}.)"
80
+ warn "[relaton-calconnect] (There was no match for #{year}, though " \
81
+ "there were matches found for #{missed_years.join(', ')}.)"
82
82
  end
83
83
  nil
84
84
  end
@@ -16,8 +16,19 @@ module RelatonCalconnect
16
16
  @output = output
17
17
  @etagfile = File.join output, "etag.txt"
18
18
  @format = format
19
+ @ext = format.sub "bibxml", "xml"
20
+ @files = []
21
+ @index = Relaton::Index.find_or_create :CC, file: "index-v1.yaml"
19
22
  end
20
23
 
24
+ #
25
+ # Fetch all the documents from a source
26
+ #
27
+ # @param [String] output directory to output documents, default: "data"
28
+ # @param [String] format output format, default: "yaml"
29
+ #
30
+ # @return [void]
31
+ #
21
32
  def self.fetch(output: "data", format: "yaml")
22
33
  t1 = Time.now
23
34
  puts "Started at: #{t1}"
@@ -31,18 +42,16 @@ module RelatonCalconnect
31
42
  #
32
43
  # fetch data form server and save it to file.
33
44
  #
34
- def fetch
45
+ def fetch # rubocop:disable Metrics/AbcSize
35
46
  resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
36
47
  # return if there aren't any changes since last fetching
37
48
  return unless resp.status == 200
38
49
 
39
50
  data = YAML.safe_load resp.body
40
51
  all_success = true
41
- data["root"]["items"].each do |doc|
42
- success = parse_page doc
43
- all_success &&= success
44
- end
52
+ data["root"]["items"].each { |doc| all_success &&= parse_page doc }
45
53
  self.etag = resp[:etag] if all_success
54
+ @index.save
46
55
  end
47
56
 
48
57
  private
@@ -64,14 +73,20 @@ module RelatonCalconnect
64
73
  false
65
74
  end
66
75
 
67
- def write_doc(docid, bib)
68
- content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
69
- file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@format}"
70
- # if File.exist? file
71
- # warn "#{file} exist"
72
- # else
76
+ def write_doc(docid, bib) # rubocop:disable Metrics/MethodLength
77
+ content = case @format
78
+ when "xml" then bib.to_xml(bibdata: true)
79
+ when "bibxml" then bib.to_bibxml
80
+ else bib.to_hash.to_yaml
81
+ end
82
+ file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@ext}"
83
+ if @files.include? file
84
+ warn "#{file} exist"
85
+ else
86
+ @files << file
87
+ end
88
+ @index.add_or_update docid, file
73
89
  File.write file, content, encoding: "UTF-8"
74
- # end
75
90
  end
76
91
 
77
92
  #
@@ -3,7 +3,12 @@ module RelatonCalconnect
3
3
  # Parse page.
4
4
  # @return [RelatonCalconnect::CcBliographicItem]
5
5
  def fetch
6
- @fetch ||= Scrapper.parse_page @hit
6
+ # @fetch ||= Scrapper.parse_page @hit
7
+ @fetch ||= begin
8
+ url = "#{HitCollection::GHURL}#{@hit[:file]}"
9
+ resp = Faraday.get url
10
+ CcBibliographicItem.from_hash YAML.safe_load(resp.body)
11
+ end
7
12
  end
8
13
  end
9
14
  end
@@ -4,23 +4,29 @@ require "fileutils"
4
4
 
5
5
  module RelatonCalconnect
6
6
  class HitCollection < RelatonBib::HitCollection
7
- ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
7
+ # ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
8
8
  # ENDPOINT = "http://127.0.0.1:4000/relaton/index.yaml".freeze
9
- DATADIR = File.expand_path ".relaton/calconnect", Dir.home
10
- DATAFILE = File.expand_path "bibliography.yml", DATADIR
11
- ETAGFILE = File.expand_path "etag.txt", DATADIR
9
+ # DATADIR = File.expand_path ".relaton/calconnect", Dir.home
10
+ # DATAFILE = File.expand_path "bibliography.yml", DATADIR
11
+ # ETAGFILE = File.expand_path "etag.txt", DATADIR
12
+ GHURL = "https://raw.githubusercontent.com/relaton/relaton-data-calconnect/main/".freeze
13
+ INDEX_FILE = "index-v1.yaml".freeze
12
14
 
13
15
  # @param ref [Strig]
14
16
  # @param year [String]
15
17
  # @param opts [Hash]
16
18
  def initialize(ref, year = nil)
17
19
  super
18
- @array = from_yaml(ref).sort_by do |hit|
19
- hit.hit["revdate"] ? Date.parse(hit.hit["revdate"]) : Date.new
20
- end.reverse
20
+ # @array = from_yaml(ref).sort_by do |hit|
21
+ # hit.hit["revdate"] ? Date.parse(hit.hit["revdate"]) : Date.new
22
+ # end.reverse
23
+ index = Relaton::Index.find_or_create :CC, url: "#{GHURL}index-v1.zip", file: INDEX_FILE
24
+ @array = index.search(ref).map do |row|
25
+ Hit.new(row, self)
26
+ end
21
27
  end
22
28
 
23
- private
29
+ # private
24
30
 
25
31
  #
26
32
  # Fetch data from yaml
@@ -29,52 +35,52 @@ module RelatonCalconnect
29
35
  #
30
36
  # @return [Array<RelatonBib::Hit>]
31
37
  #
32
- def from_yaml(docid, **_opts)
33
- data["root"]["items"].select do |doc|
34
- doc["docid"] && doc["docid"]["id"].include?(docid)
35
- end.map { |h| Hit.new(h, self) }
36
- end
38
+ # def from_yaml(docid, **_opts)
39
+ # data["root"]["items"].select do |doc|
40
+ # doc["docid"] && doc["docid"]["id"].include?(docid)
41
+ # end.map { |h| Hit.new(h, self) }
42
+ # end
37
43
 
38
44
  #
39
45
  # Fetches YAML data
40
46
  #
41
47
  # @return [Hash]
42
- def data
43
- FileUtils.mkdir_p DATADIR
44
- ctime = File.ctime DATAFILE if File.exist? DATAFILE
45
- fetch_data if !ctime || ctime.to_date < Date.today
46
- @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
47
- end
48
+ # def data
49
+ # FileUtils.mkdir_p DATADIR
50
+ # ctime = File.ctime DATAFILE if File.exist? DATAFILE
51
+ # fetch_data if !ctime || ctime.to_date < Date.today
52
+ # @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
53
+ # end
48
54
 
49
55
  #
50
56
  # fetch data from server and save it to file.
51
57
  #
52
- def fetch_data
53
- resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
54
- # return if there aren't any changes since last fetching
55
- return unless resp.status == 200
58
+ # def fetch_data
59
+ # resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
60
+ # # return if there aren't any changes since last fetching
61
+ # return unless resp.status == 200
56
62
 
57
- self.etag = resp[:etag]
58
- @data = YAML.safe_load resp.body
59
- File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
60
- end
63
+ # self.etag = resp[:etag]
64
+ # @data = YAML.safe_load resp.body
65
+ # File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
66
+ # end
61
67
 
62
68
  #
63
69
  # Read ETag from file
64
70
  #
65
71
  # @return [String, NilClass]
66
- def etag
67
- @etag ||= if File.exist? ETAGFILE
68
- File.read ETAGFILE, encoding: "UTF-8"
69
- end
70
- end
72
+ # def etag
73
+ # @etag ||= if File.exist? ETAGFILE
74
+ # File.read ETAGFILE, encoding: "UTF-8"
75
+ # end
76
+ # end
71
77
 
72
78
  #
73
79
  # Save ETag to file
74
80
  #
75
81
  # @param tag [String]
76
- def etag=(e_tag)
77
- File.write ETAGFILE, e_tag, encoding: "UTF-8"
78
- end
82
+ # def etag=(e_tag)
83
+ # File.write ETAGFILE, e_tag, encoding: "UTF-8"
84
+ # end
79
85
  end
80
86
  end
@@ -49,5 +49,12 @@ module RelatonCalconnect
49
49
  def grammar_hash
50
50
  @grammar_hash ||= ::RelatonCalconnect.grammar_hash
51
51
  end
52
+
53
+ #
54
+ # Remove index file
55
+ #
56
+ def remove_index_file
57
+ Relaton::Index.find_or_create(:CC, url: true, file: HitCollection::INDEX_FILE).remove_file
58
+ end
52
59
  end
53
60
  end
@@ -47,7 +47,7 @@ module RelatonCalconnect
47
47
  docid = rxl.at "//docidentifier"
48
48
  docid.add_previous_sibling uri_xml
49
49
  end
50
- xml = rxl.to_xml.gsub!(%r{(</?)technical-committee(>)}, '\1committee\2')
50
+ xml = rxl.to_xml.gsub(%r{(</?)technical-committee(>)}, '\1committee\2')
51
51
  .gsub(%r{type="(?:csd|CC)"(?=>)}i, '\0 primary="true"')
52
52
  RelatonCalconnect::XMLParser.from_xml xml
53
53
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonCalconnect
2
- VERSION = "1.14.1".freeze
2
+ VERSION = "1.14.2".freeze
3
3
  end
@@ -1,3 +1,4 @@
1
+ require "relaton/index"
1
2
  require "relaton_bib"
2
3
  require "relaton_calconnect/version"
3
4
  require "relaton_calconnect/cc_bibliography"
@@ -26,10 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ["lib"]
27
27
  spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
28
28
 
29
- spec.add_development_dependency "equivalent-xml", "~> 0.6"
30
- spec.add_development_dependency "rake", "~> 13.0"
31
- spec.add_development_dependency "rspec", "~> 3.0"
32
-
33
29
  spec.add_dependency "faraday", "~> 2.7.0"
34
30
  spec.add_dependency "relaton-bib", "~> 1.14.0"
31
+ spec.add_dependency "relaton-index", "~> 0.2.0"
35
32
  end
metadata CHANGED
@@ -1,85 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-calconnect
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.1
4
+ version: 1.14.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: equivalent-xml
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0.6'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.6'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '13.0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '13.0'
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
14
+ name: faraday
43
15
  requirement: !ruby/object:Gem::Requirement
44
16
  requirements:
45
17
  - - "~>"
46
18
  - !ruby/object:Gem::Version
47
- version: '3.0'
48
- type: :development
19
+ version: 2.7.0
20
+ type: :runtime
49
21
  prerelease: false
50
22
  version_requirements: !ruby/object:Gem::Requirement
51
23
  requirements:
52
24
  - - "~>"
53
25
  - !ruby/object:Gem::Version
54
- version: '3.0'
26
+ version: 2.7.0
55
27
  - !ruby/object:Gem::Dependency
56
- name: faraday
28
+ name: relaton-bib
57
29
  requirement: !ruby/object:Gem::Requirement
58
30
  requirements:
59
31
  - - "~>"
60
32
  - !ruby/object:Gem::Version
61
- version: 2.7.0
33
+ version: 1.14.0
62
34
  type: :runtime
63
35
  prerelease: false
64
36
  version_requirements: !ruby/object:Gem::Requirement
65
37
  requirements:
66
38
  - - "~>"
67
39
  - !ruby/object:Gem::Version
68
- version: 2.7.0
40
+ version: 1.14.0
69
41
  - !ruby/object:Gem::Dependency
70
- name: relaton-bib
42
+ name: relaton-index
71
43
  requirement: !ruby/object:Gem::Requirement
72
44
  requirements:
73
45
  - - "~>"
74
46
  - !ruby/object:Gem::Version
75
- version: 1.14.0
47
+ version: 0.2.0
76
48
  type: :runtime
77
49
  prerelease: false
78
50
  version_requirements: !ruby/object:Gem::Requirement
79
51
  requirements:
80
52
  - - "~>"
81
53
  - !ruby/object:Gem::Version
82
- version: 1.14.0
54
+ version: 0.2.0
83
55
  description: 'RelatonIso: retrieve CC Standards for bibliographic use using the IsoBibliographicItem
84
56
  model'
85
57
  email: