relaton-bipm 1.18.0 → 1.18.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_bipm/bipm_bibliography.rb +2 -2
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +13 -13
- data/lib/relaton_bipm/data_outcomes_parser.rb +1 -1
- data/lib/relaton_bipm/id_parser.rb +146 -59
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +9 -1
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +3 -3
- data/lib/relaton_bipm/version.rb +1 -1
- data/relaton_bipm.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e3b95a4edc9d788cb85a8ac616dcaf8c3ae1957a3138ee45c049e20d92cd168
|
4
|
+
data.tar.gz: bf39e7f015de4adddbda44451bac4efc9691f1e50df4cc6ea5969067677a5f44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31ae9b9e3a97df4ee2bbaaa539a496babf1a28de8749fff8dbdd076251d63f188ac1ec88615ba651764fdfa730f242c4dbef535eb2a1fbe1d61d674e8970e631
|
7
|
+
data.tar.gz: 4b883547bd97402ec22c9896c6aa535708748358aabf637755a2f305c7d64a30cbb2526ef689d3398fb4d5592680f5c40c87da61e71e50c4059d6df3dbd2a260
|
@@ -46,7 +46,7 @@ module RelatonBipm
|
|
46
46
|
# @return [RelatonBipm::BipmBibliographicItem]
|
47
47
|
#
|
48
48
|
def get_bipm(reference) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
49
|
-
ref_id = Id.new reference
|
49
|
+
ref_id = Id.new.parse reference
|
50
50
|
rows = index.search { |r| ref_id == r[:id] }
|
51
51
|
return unless rows.any?
|
52
52
|
|
@@ -63,7 +63,7 @@ module RelatonBipm
|
|
63
63
|
|
64
64
|
def index
|
65
65
|
Relaton::Index.find_or_create(
|
66
|
-
:bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr]
|
66
|
+
:bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr part append]
|
67
67
|
)
|
68
68
|
end
|
69
69
|
|
@@ -36,7 +36,7 @@ module RelatonBipm
|
|
36
36
|
basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
|
37
37
|
outfile = "#{basename}.#{@data_fetcher.ext}"
|
38
38
|
key = hash1["docnumber"] || basename
|
39
|
-
@data_fetcher.index2.add_or_update Id.new(key).to_hash, outfile
|
39
|
+
@data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
|
40
40
|
hash = if File.exist? outfile
|
41
41
|
warn_duplicate = false
|
42
42
|
hash2 = YAML.load_file outfile
|
@@ -60,19 +60,19 @@ module RelatonBipm
|
|
60
60
|
# @return [void]
|
61
61
|
#
|
62
62
|
def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
63
|
-
|
64
|
-
|
65
|
-
return unless did["id"] == "BIPM Brochure"
|
63
|
+
# isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
|
64
|
+
# num = isbn && isbn["id"] == "978-92-822-2272-0" ? "SI Brochure" : "SI Brochure, Appendix 4"
|
66
65
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
hash["
|
75
|
-
|
66
|
+
hash["docid"].each do |id|
|
67
|
+
next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)
|
68
|
+
|
69
|
+
id["primary"] = true
|
70
|
+
id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
|
71
|
+
end
|
72
|
+
|
73
|
+
num = hash["docid"].detect { |id| id["primary"] && id["language"] == "en" }["id"]
|
74
|
+
hash["docnumber"].sub!(/^Brochure$/i, num.sub(/^BIPM\s/, ""))
|
75
|
+
hash["id"] = num.gsub(/[,\s]/, "")
|
76
76
|
end
|
77
77
|
|
78
78
|
#
|
@@ -230,7 +230,7 @@ module RelatonBipm
|
|
230
230
|
# @param [String] path path to YAML file
|
231
231
|
#
|
232
232
|
def add_to_index(item, path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
233
|
-
key = Id.new(item.docnumber).to_hash
|
233
|
+
key = Id.new.parse(item.docnumber).to_hash
|
234
234
|
@data_fetcher.index2.add_or_update key, path
|
235
235
|
end
|
236
236
|
|
@@ -1,60 +1,63 @@
|
|
1
1
|
module RelatonBipm
|
2
2
|
class Id
|
3
|
-
class Parser < Parslet::Parser
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
3
|
+
# class Parser < Parslet::Parser
|
4
|
+
# rule(:space) { match("\s").repeat(1) }
|
5
|
+
# rule(:space?) { space.maybe }
|
6
|
+
# rule(:comma) { str(",") >> space? }
|
7
|
+
# rule(:lparen) { str("(") }
|
8
|
+
# rule(:rparen) { str(")") }
|
9
|
+
# rule(:slash) { str("/") }
|
10
|
+
# rule(:num) { match["0-9"].repeat(1) }
|
11
|
+
|
12
|
+
# rule(:delimeter) { str("--") >> space }
|
13
|
+
# rule(:delimeter?) { delimeter.maybe }
|
14
|
+
|
15
|
+
# rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
|
16
|
+
# rule(:lang?) { lang.maybe }
|
17
|
+
|
18
|
+
# rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
|
19
|
+
# rule(:number) { numdash >> space? }
|
20
|
+
# rule(:number?) { number.maybe }
|
21
|
+
# rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
|
22
|
+
|
23
|
+
# rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
|
24
|
+
# rule(:year_paren) { lparen >> year >> lang? >> rparen }
|
25
|
+
# rule(:num_year) { number? >> year_paren }
|
26
|
+
# rule(:year_num) { year >> str("-") >> number }
|
27
|
+
# rule(:num_and_year) { num_year | year_num | number }
|
28
|
+
|
29
|
+
# rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
|
30
|
+
# rule(:suff) { match["a-zA-Z-"].repeat(1) }
|
31
|
+
# rule(:cgmp) { str("CGPM") }
|
32
|
+
# rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
|
33
|
+
# rule(:cc) { str("CC") >> suff >> sect.maybe }
|
34
|
+
# rule(:jc) { str("JC") >> suff }
|
35
|
+
# rule(:cec) { str("CEC") }
|
36
|
+
# rule(:wgms) { str("WG-MS") }
|
37
|
+
# rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
|
38
|
+
|
39
|
+
# rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
|
40
|
+
|
41
|
+
# rule(:type_group) { type >> group >> slash >> num_and_year }
|
42
|
+
# rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
|
43
|
+
# rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
|
44
|
+
# rule(:outcome) { group_num | group_type | type_group }
|
45
|
+
|
46
|
+
# rule(:part_partie) { str("Part") | str("Partie") }
|
47
|
+
# rule(:part) { comma >> part_partie >> space >> num.as(:part) }
|
48
|
+
# rule(:append) { (comma | space) >> (str("Appendix") | str("Annexe")) >> space >> num.as(:append) }
|
49
|
+
# rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> (part | append).maybe }
|
50
|
+
|
51
|
+
# rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
|
52
|
+
|
53
|
+
# rule(:corr) { space >> str("Corrigendum").as(:corr) }
|
54
|
+
# rule(:corr?) { corr.maybe }
|
55
|
+
# rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
|
56
|
+
|
57
|
+
# rule(:result) { outcome | brochure | metrologia | jcgm }
|
58
|
+
|
59
|
+
# root :result
|
60
|
+
# end
|
58
61
|
|
59
62
|
TYPES = {
|
60
63
|
"Resolution" => "RES",
|
@@ -75,16 +78,98 @@ module RelatonBipm
|
|
75
78
|
#
|
76
79
|
# Create a new Id object
|
77
80
|
#
|
81
|
+
def initialize
|
82
|
+
# @id = Parser.new.parse(id)
|
83
|
+
# @id = parse(id)
|
84
|
+
# rescue Parslet::ParseFailed => e
|
85
|
+
# Util.warn "WARNING: Incorrect reference: `#{id}`"
|
86
|
+
# warn e.parse_failure_cause.ascii_tree
|
87
|
+
# raise RelatonBib::RequestError, e
|
88
|
+
end
|
89
|
+
|
78
90
|
# @param [String] id id string
|
79
91
|
#
|
80
|
-
def
|
81
|
-
|
82
|
-
|
92
|
+
def parse(id)
|
93
|
+
# str = StringScanner.new id
|
94
|
+
match = parse_outcome(id) || parse_brochure(id) || parse_metrologia(id) || parse_jcgm(id)
|
95
|
+
@id = match.named_captures.compact.transform_keys(&:to_sym)
|
96
|
+
self
|
97
|
+
rescue StandardError => e
|
83
98
|
Util.warn "WARNING: Incorrect reference: `#{id}`"
|
84
|
-
# warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
|
85
99
|
raise RelatonBib::RequestError, e
|
86
100
|
end
|
87
101
|
|
102
|
+
def parse_outcome(id)
|
103
|
+
parse_group_num(id) || parse_group_type(id) || parse_type_group(id)
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse_group_num(id)
|
107
|
+
%r{^#{group}\s#{number}[a-z]{1,2}\s#{type}\s#{year_lang}$}.match(id)
|
108
|
+
end
|
109
|
+
|
110
|
+
def parse_group_type(id)
|
111
|
+
%r{^#{group}\s(?:--\s)?#{type}\s#{num_and_year}$}.match(id)
|
112
|
+
end
|
113
|
+
|
114
|
+
def parse_type_group(id)
|
115
|
+
%r{^#{type}\s#{group}\/#{num_and_year}$}.match(id)
|
116
|
+
end
|
117
|
+
|
118
|
+
def group
|
119
|
+
"(?<group>CGPM|CIPM(?:\\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]+|CEC|WG-MS)"
|
120
|
+
end
|
121
|
+
|
122
|
+
def type; "(?<type>[[:alpha:]]+)"; end
|
123
|
+
def number; "(?<number>[A-Z0-9-]+)"; end
|
124
|
+
def year; "(?<year>\\d{4})"; end
|
125
|
+
def lang; ",\\s?(?<lang>[A-Z]{1,2})"; end
|
126
|
+
def year_lang; "\\(#{year}(?:#{lang})?\\)"; end
|
127
|
+
def num_and_year; "(?:(?:#{number}\\s)?#{year_lang}|#{year}-#{number}|#{number})"; end
|
128
|
+
|
129
|
+
def parse_brochure(id)
|
130
|
+
%r{^
|
131
|
+
(?<group>SI)\s(?<type>Brochure)
|
132
|
+
(?:,?\s(?:(?:Part|Partie)\s(?<part>\d+)|(?:Appendix|Annexe)\s(?<append>\d+)))?
|
133
|
+
$}x.match(id)
|
134
|
+
end
|
135
|
+
|
136
|
+
def parse_metrologia(id)
|
137
|
+
%r{^(?<group>Metrologia)(?:\s(?<number>[a-zA-Z0-9\s]+))?$}.match(id)
|
138
|
+
end
|
139
|
+
|
140
|
+
def parse_jcgm(id)
|
141
|
+
%r{^#{group}\s#{number}(?::#{year})?(?:\s(?<corr>Corrigendum))?$}.match(id)
|
142
|
+
end
|
143
|
+
|
144
|
+
# def parse_gorup_num(str)
|
145
|
+
# return unless group = parse_group(str)
|
146
|
+
|
147
|
+
# return unless str.scan(" ") && num_suff = parse_num_suff(str)
|
148
|
+
|
149
|
+
# return unless type = parse_type(str)
|
150
|
+
|
151
|
+
# return unless year = parse_year_parent(str)
|
152
|
+
|
153
|
+
# { group: group, number: num_suff, type: type, year: year }
|
154
|
+
# end
|
155
|
+
|
156
|
+
# def parse_group(str)
|
157
|
+
# str.scan %r{CGPM|CIPM(?:\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]|CEC|WG-MS}
|
158
|
+
# end
|
159
|
+
|
160
|
+
# def parse_num_suff(str)
|
161
|
+
# num = parse_numdash(str)
|
162
|
+
# num if num && str.scan(/[a-z]{1,2}\s/)
|
163
|
+
# end
|
164
|
+
|
165
|
+
# def parse_number(str)
|
166
|
+
# parse_numdash(str)
|
167
|
+
# end
|
168
|
+
|
169
|
+
# def parse_numdash(str)
|
170
|
+
# str.scan(/[A-Z0-9-]+/)
|
171
|
+
# end
|
172
|
+
|
88
173
|
#
|
89
174
|
# Compare two Id objects
|
90
175
|
#
|
@@ -129,6 +214,8 @@ module RelatonBipm
|
|
129
214
|
hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
|
130
215
|
hash[:year] = src[:year].to_s if src[:year]
|
131
216
|
hash[:corr] = true if src[:corr]
|
217
|
+
hash[:part] = src[:part].to_s if src[:part]
|
218
|
+
hash[:append] = src[:append].to_s if src[:append]
|
132
219
|
hash[:lang] = src[:lang].to_s if src[:lang]
|
133
220
|
hash
|
134
221
|
end
|
@@ -2,7 +2,7 @@ module RelatonBipm
|
|
2
2
|
module RawdataBipmMetrologia
|
3
3
|
class ArticleParser
|
4
4
|
ATTRS = %i[docid title contributor date copyright abstract relation series
|
5
|
-
extent type doctype].freeze
|
5
|
+
extent type doctype link].freeze
|
6
6
|
#
|
7
7
|
# Create new parser and parse document
|
8
8
|
#
|
@@ -319,6 +319,14 @@ module RelatonBipm
|
|
319
319
|
def parse_doctype
|
320
320
|
DocumentType.new type: "article"
|
321
321
|
end
|
322
|
+
|
323
|
+
def parse_link
|
324
|
+
@meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
|
325
|
+
url = "https://doi.org/#{l.text}"
|
326
|
+
a << RelatonBib::TypedUri.new(content: url, type: "src")
|
327
|
+
a << RelatonBib::TypedUri.new(content: url, type: "doi")
|
328
|
+
end
|
329
|
+
end
|
322
330
|
end
|
323
331
|
end
|
324
332
|
end
|
@@ -33,7 +33,7 @@ module RelatonBipm
|
|
33
33
|
item = ArticleParser.parse path
|
34
34
|
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
35
35
|
out_path = File.join(@data_fetcher.output, file)
|
36
|
-
key = Id.new(item.docidentifier.first.id).to_hash
|
36
|
+
key = Id.new.parse(item.docidentifier.first.id).to_hash
|
37
37
|
@data_fetcher.index2.add_or_update key, out_path
|
38
38
|
@data_fetcher.write_file out_path, item
|
39
39
|
end
|
@@ -76,7 +76,7 @@ module RelatonBipm
|
|
76
76
|
)
|
77
77
|
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
78
78
|
path = File.join(@data_fetcher.output, file)
|
79
|
-
@data_fetcher.index2.add_or_update Id.new(id).to_hash, path
|
79
|
+
@data_fetcher.index2.add_or_update Id.new.parse(id).to_hash, path
|
80
80
|
@data_fetcher.write_file path, item
|
81
81
|
end
|
82
82
|
|
@@ -128,7 +128,7 @@ module RelatonBipm
|
|
128
128
|
#
|
129
129
|
# Fetch relations
|
130
130
|
#
|
131
|
-
# @
|
131
|
+
# @see #fetch_metrologia
|
132
132
|
#
|
133
133
|
# @return [Array<RelatonBib::DocumentRelation>] relations
|
134
134
|
#
|
data/lib/relaton_bipm/version.rb
CHANGED
data/relaton_bipm.gemspec
CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
35
|
spec.add_dependency "faraday", "~> 2.7.0"
|
36
|
-
spec.add_dependency "mechanize", "~> 2.
|
36
|
+
spec.add_dependency "mechanize", "~> 2.10"
|
37
37
|
spec.add_dependency "parslet", "~> 2.0.0"
|
38
38
|
spec.add_dependency "relaton-bib", "~> 1.18.0"
|
39
39
|
spec.add_dependency "relaton-index", "~> 0.2.2"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.18.
|
4
|
+
version: 1.18.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.
|
33
|
+
version: '2.10'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.
|
40
|
+
version: '2.10'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: parslet
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -165,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
|
-
rubygems_version: 3.3.
|
168
|
+
rubygems_version: 3.3.27
|
169
169
|
signing_key:
|
170
170
|
specification_version: 4
|
171
171
|
summary: 'RelatonBipm: retrieve BIPM Standards for bibliographic use using the BibliographicItem
|