relaton-calconnect 1.14.1 → 1.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/README.adoc +5 -5
- data/lib/relaton_calconnect/cc_bibliography.rb +4 -4
- data/lib/relaton_calconnect/data_fetcher.rb +27 -12
- data/lib/relaton_calconnect/hit.rb +6 -1
- data/lib/relaton_calconnect/hit_collection.rb +41 -35
- data/lib/relaton_calconnect/processor.rb +7 -0
- data/lib/relaton_calconnect/scrapper.rb +1 -1
- data/lib/relaton_calconnect/version.rb +1 -1
- data/lib/relaton_calconnect.rb +1 -0
- data/relaton_calconnect.gemspec +1 -4
- metadata +12 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 181a7e1a48eb95ff2558f0895621f59fb8a52f167c0ccac81433f5346e52e38a
|
4
|
+
data.tar.gz: a24dae24e0803340a07ddf3882a0343bb8ba52f05bb2d923df016d33dfd4e7cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aab8cae2f19844f9a7d8d999b568d16f1a22344d88d3df81f924ef0e7c225f58d0c05616b6f6b73e3a6d3b8b116bc3df7c33e5ca3f3cd187391c1b7d4d4163fc
|
7
|
+
data.tar.gz: 12b98b7c9ae142bebd87bf4201889c471f28b5c477dc364936e0d94d3c6a3a0c1dd4a8a01cce9367b44183a2fcde70bdde43a50d2f58da04433318ab2f58939a
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
@@ -50,7 +50,7 @@ item.to_xml
|
|
50
50
|
...
|
51
51
|
</bibitem>"
|
52
52
|
----
|
53
|
-
With argument `bibdata: true` it
|
53
|
+
With argument `bibdata: true` it outputs XML wrapped by `bibdata` element and adds flavour `ext` element.
|
54
54
|
[source,ruby]
|
55
55
|
----
|
56
56
|
item.to_xml bibdata: true
|
@@ -87,7 +87,7 @@ item.link
|
|
87
87
|
#<RelatonBib::TypedUri:0x00007ff2d4b23a08 @content=#<Addressable::URI:0x6b8 URI:https://standards.calconnect.org/csd/cc-10005.rxl>, @type="rxl">]
|
88
88
|
----
|
89
89
|
|
90
|
-
=== Get code
|
90
|
+
=== Get document by code and year
|
91
91
|
[source,ruby]
|
92
92
|
----
|
93
93
|
RelatonCalconnect::CcBibliography.get "CC/DIR 10005", "2019", {}
|
@@ -119,13 +119,13 @@ RelatonCalconnect::CcBibliographicItem.from_hash hash
|
|
119
119
|
|
120
120
|
=== Fetch data
|
121
121
|
|
122
|
-
This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
|
122
|
+
This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of the data sources.
|
123
123
|
|
124
|
-
The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the
|
124
|
+
The method `RelatonCalconnect::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
|
125
125
|
Arguments:
|
126
126
|
|
127
127
|
- `output` - folder to save documents (default './data').
|
128
|
-
- `format` - format in which the documents are saved.
|
128
|
+
- `format` - the format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
|
129
129
|
|
130
130
|
[source,ruby]
|
131
131
|
----
|
@@ -74,11 +74,11 @@ module RelatonCalconnect
|
|
74
74
|
# @param missed_years [Array<Strig>]
|
75
75
|
def fetch_ref_err(code, year, missed_years)
|
76
76
|
id = year ? "#{code} year #{year}" : code
|
77
|
-
warn "[relaton-calconnect] WARNING: no match found online for #{id}. "\
|
78
|
-
|
77
|
+
warn "[relaton-calconnect] WARNING: no match found online for #{id}. " \
|
78
|
+
"The code must be exactly like it is on the standards website."
|
79
79
|
unless missed_years.empty?
|
80
|
-
warn "[relaton-calconnect] (There was no match for #{year}, though "\
|
81
|
-
|
80
|
+
warn "[relaton-calconnect] (There was no match for #{year}, though " \
|
81
|
+
"there were matches found for #{missed_years.join(', ')}.)"
|
82
82
|
end
|
83
83
|
nil
|
84
84
|
end
|
@@ -16,8 +16,19 @@ module RelatonCalconnect
|
|
16
16
|
@output = output
|
17
17
|
@etagfile = File.join output, "etag.txt"
|
18
18
|
@format = format
|
19
|
+
@ext = format.sub "bibxml", "xml"
|
20
|
+
@files = []
|
21
|
+
@index = Relaton::Index.find_or_create :CC, file: "index-v1.yaml"
|
19
22
|
end
|
20
23
|
|
24
|
+
#
|
25
|
+
# Fetch all the documents from a source
|
26
|
+
#
|
27
|
+
# @param [String] output directory to output documents, default: "data"
|
28
|
+
# @param [String] format output format, default: "yaml"
|
29
|
+
#
|
30
|
+
# @return [void]
|
31
|
+
#
|
21
32
|
def self.fetch(output: "data", format: "yaml")
|
22
33
|
t1 = Time.now
|
23
34
|
puts "Started at: #{t1}"
|
@@ -31,18 +42,16 @@ module RelatonCalconnect
|
|
31
42
|
#
|
32
43
|
# fetch data form server and save it to file.
|
33
44
|
#
|
34
|
-
def fetch
|
45
|
+
def fetch # rubocop:disable Metrics/AbcSize
|
35
46
|
resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
36
47
|
# return if there aren't any changes since last fetching
|
37
48
|
return unless resp.status == 200
|
38
49
|
|
39
50
|
data = YAML.safe_load resp.body
|
40
51
|
all_success = true
|
41
|
-
data["root"]["items"].each
|
42
|
-
success = parse_page doc
|
43
|
-
all_success &&= success
|
44
|
-
end
|
52
|
+
data["root"]["items"].each { |doc| all_success &&= parse_page doc }
|
45
53
|
self.etag = resp[:etag] if all_success
|
54
|
+
@index.save
|
46
55
|
end
|
47
56
|
|
48
57
|
private
|
@@ -64,14 +73,20 @@ module RelatonCalconnect
|
|
64
73
|
false
|
65
74
|
end
|
66
75
|
|
67
|
-
def write_doc(docid, bib)
|
68
|
-
content = @format
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
76
|
+
def write_doc(docid, bib) # rubocop:disable Metrics/MethodLength
|
77
|
+
content = case @format
|
78
|
+
when "xml" then bib.to_xml(bibdata: true)
|
79
|
+
when "bibxml" then bib.to_bibxml
|
80
|
+
else bib.to_hash.to_yaml
|
81
|
+
end
|
82
|
+
file = File.join @output, "#{docid.upcase.gsub(%r{[/\s:]}, '_')}.#{@ext}"
|
83
|
+
if @files.include? file
|
84
|
+
warn "#{file} exist"
|
85
|
+
else
|
86
|
+
@files << file
|
87
|
+
end
|
88
|
+
@index.add_or_update docid, file
|
73
89
|
File.write file, content, encoding: "UTF-8"
|
74
|
-
# end
|
75
90
|
end
|
76
91
|
|
77
92
|
#
|
@@ -3,7 +3,12 @@ module RelatonCalconnect
|
|
3
3
|
# Parse page.
|
4
4
|
# @return [RelatonCalconnect::CcBliographicItem]
|
5
5
|
def fetch
|
6
|
-
@fetch ||= Scrapper.parse_page @hit
|
6
|
+
# @fetch ||= Scrapper.parse_page @hit
|
7
|
+
@fetch ||= begin
|
8
|
+
url = "#{HitCollection::GHURL}#{@hit[:file]}"
|
9
|
+
resp = Faraday.get url
|
10
|
+
CcBibliographicItem.from_hash YAML.safe_load(resp.body)
|
11
|
+
end
|
7
12
|
end
|
8
13
|
end
|
9
14
|
end
|
@@ -4,23 +4,29 @@ require "fileutils"
|
|
4
4
|
|
5
5
|
module RelatonCalconnect
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
7
|
-
ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
|
7
|
+
# ENDPOINT = "https://standards.calconnect.org/relaton/index.yaml".freeze
|
8
8
|
# ENDPOINT = "http://127.0.0.1:4000/relaton/index.yaml".freeze
|
9
|
-
DATADIR = File.expand_path ".relaton/calconnect", Dir.home
|
10
|
-
DATAFILE = File.expand_path "bibliography.yml", DATADIR
|
11
|
-
ETAGFILE = File.expand_path "etag.txt", DATADIR
|
9
|
+
# DATADIR = File.expand_path ".relaton/calconnect", Dir.home
|
10
|
+
# DATAFILE = File.expand_path "bibliography.yml", DATADIR
|
11
|
+
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
12
|
+
GHURL = "https://raw.githubusercontent.com/relaton/relaton-data-calconnect/main/".freeze
|
13
|
+
INDEX_FILE = "index-v1.yaml".freeze
|
12
14
|
|
13
15
|
# @param ref [Strig]
|
14
16
|
# @param year [String]
|
15
17
|
# @param opts [Hash]
|
16
18
|
def initialize(ref, year = nil)
|
17
19
|
super
|
18
|
-
@array = from_yaml(ref).sort_by do |hit|
|
19
|
-
|
20
|
-
end.reverse
|
20
|
+
# @array = from_yaml(ref).sort_by do |hit|
|
21
|
+
# hit.hit["revdate"] ? Date.parse(hit.hit["revdate"]) : Date.new
|
22
|
+
# end.reverse
|
23
|
+
index = Relaton::Index.find_or_create :CC, url: "#{GHURL}index-v1.zip", file: INDEX_FILE
|
24
|
+
@array = index.search(ref).map do |row|
|
25
|
+
Hit.new(row, self)
|
26
|
+
end
|
21
27
|
end
|
22
28
|
|
23
|
-
private
|
29
|
+
# private
|
24
30
|
|
25
31
|
#
|
26
32
|
# Fetch data from yaml
|
@@ -29,52 +35,52 @@ module RelatonCalconnect
|
|
29
35
|
#
|
30
36
|
# @return [Array<RelatonBib::Hit>]
|
31
37
|
#
|
32
|
-
def from_yaml(docid, **_opts)
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
38
|
+
# def from_yaml(docid, **_opts)
|
39
|
+
# data["root"]["items"].select do |doc|
|
40
|
+
# doc["docid"] && doc["docid"]["id"].include?(docid)
|
41
|
+
# end.map { |h| Hit.new(h, self) }
|
42
|
+
# end
|
37
43
|
|
38
44
|
#
|
39
45
|
# Fetches YAML data
|
40
46
|
#
|
41
47
|
# @return [Hash]
|
42
|
-
def data
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
end
|
48
|
+
# def data
|
49
|
+
# FileUtils.mkdir_p DATADIR
|
50
|
+
# ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
51
|
+
# fetch_data if !ctime || ctime.to_date < Date.today
|
52
|
+
# @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
|
53
|
+
# end
|
48
54
|
|
49
55
|
#
|
50
56
|
# fetch data from server and save it to file.
|
51
57
|
#
|
52
|
-
def fetch_data
|
53
|
-
|
54
|
-
|
55
|
-
|
58
|
+
# def fetch_data
|
59
|
+
# resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
60
|
+
# # return if there aren't any changes since last fetching
|
61
|
+
# return unless resp.status == 200
|
56
62
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
63
|
+
# self.etag = resp[:etag]
|
64
|
+
# @data = YAML.safe_load resp.body
|
65
|
+
# File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
|
66
|
+
# end
|
61
67
|
|
62
68
|
#
|
63
69
|
# Read ETag from file
|
64
70
|
#
|
65
71
|
# @return [String, NilClass]
|
66
|
-
def etag
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
72
|
+
# def etag
|
73
|
+
# @etag ||= if File.exist? ETAGFILE
|
74
|
+
# File.read ETAGFILE, encoding: "UTF-8"
|
75
|
+
# end
|
76
|
+
# end
|
71
77
|
|
72
78
|
#
|
73
79
|
# Save ETag to file
|
74
80
|
#
|
75
81
|
# @param tag [String]
|
76
|
-
def etag=(e_tag)
|
77
|
-
|
78
|
-
end
|
82
|
+
# def etag=(e_tag)
|
83
|
+
# File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
84
|
+
# end
|
79
85
|
end
|
80
86
|
end
|
@@ -49,5 +49,12 @@ module RelatonCalconnect
|
|
49
49
|
def grammar_hash
|
50
50
|
@grammar_hash ||= ::RelatonCalconnect.grammar_hash
|
51
51
|
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Remove index file
|
55
|
+
#
|
56
|
+
def remove_index_file
|
57
|
+
Relaton::Index.find_or_create(:CC, url: true, file: HitCollection::INDEX_FILE).remove_file
|
58
|
+
end
|
52
59
|
end
|
53
60
|
end
|
@@ -47,7 +47,7 @@ module RelatonCalconnect
|
|
47
47
|
docid = rxl.at "//docidentifier"
|
48
48
|
docid.add_previous_sibling uri_xml
|
49
49
|
end
|
50
|
-
xml = rxl.to_xml.gsub
|
50
|
+
xml = rxl.to_xml.gsub(%r{(</?)technical-committee(>)}, '\1committee\2')
|
51
51
|
.gsub(%r{type="(?:csd|CC)"(?=>)}i, '\0 primary="true"')
|
52
52
|
RelatonCalconnect::XMLParser.from_xml xml
|
53
53
|
end
|
data/lib/relaton_calconnect.rb
CHANGED
data/relaton_calconnect.gemspec
CHANGED
@@ -26,10 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
28
28
|
|
29
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
31
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
-
|
33
29
|
spec.add_dependency "faraday", "~> 2.7.0"
|
34
30
|
spec.add_dependency "relaton-bib", "~> 1.14.0"
|
31
|
+
spec.add_dependency "relaton-index", "~> 0.2.0"
|
35
32
|
end
|
metadata
CHANGED
@@ -1,85 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-calconnect
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '13.0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '13.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rspec
|
14
|
+
name: faraday
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
19
|
+
version: 2.7.0
|
20
|
+
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
26
|
+
version: 2.7.0
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
28
|
+
name: relaton-bib
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
33
|
+
version: 1.14.0
|
62
34
|
type: :runtime
|
63
35
|
prerelease: false
|
64
36
|
version_requirements: !ruby/object:Gem::Requirement
|
65
37
|
requirements:
|
66
38
|
- - "~>"
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
40
|
+
version: 1.14.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
|
-
name: relaton-
|
42
|
+
name: relaton-index
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
72
44
|
requirements:
|
73
45
|
- - "~>"
|
74
46
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
47
|
+
version: 0.2.0
|
76
48
|
type: :runtime
|
77
49
|
prerelease: false
|
78
50
|
version_requirements: !ruby/object:Gem::Requirement
|
79
51
|
requirements:
|
80
52
|
- - "~>"
|
81
53
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
54
|
+
version: 0.2.0
|
83
55
|
description: 'RelatonIso: retrieve CC Standards for bibliographic use using the IsoBibliographicItem
|
84
56
|
model'
|
85
57
|
email:
|