relaton-bipm 1.18.0 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_bipm/bipm_bibliography.rb +2 -2
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +13 -13
- data/lib/relaton_bipm/data_outcomes_parser.rb +1 -1
- data/lib/relaton_bipm/id_parser.rb +146 -59
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +9 -1
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +3 -3
- data/lib/relaton_bipm/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0ceb817fce307673ac91a29c26c09684bb8309aad1b522c33e4d76859db4c90
|
|
4
|
+
data.tar.gz: 075b945ebc0792e814e38d811579a9a91209339cf91874fa2af572402ee05f5e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: db4ad0e7175acc4f78f788f443aa4affddee29e9352cb27d5d0290f8dd27512506724a2c6f0e6a417456a8cfba24b041840e43473700d4501b107ec8b93f99a8
|
|
7
|
+
data.tar.gz: 23638af28bddb83784b67c89e7b5d523e7c09147bdc95cc920d17cbeef713f2c7f7d8d7f7790a9593b1e4716b7f1386c655a8cae5bb89fa889682789180b57ac
|
|
@@ -46,7 +46,7 @@ module RelatonBipm
|
|
|
46
46
|
# @return [RelatonBipm::BipmBibliographicItem]
|
|
47
47
|
#
|
|
48
48
|
def get_bipm(reference) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
49
|
-
ref_id = Id.new reference
|
|
49
|
+
ref_id = Id.new.parse reference
|
|
50
50
|
rows = index.search { |r| ref_id == r[:id] }
|
|
51
51
|
return unless rows.any?
|
|
52
52
|
|
|
@@ -63,7 +63,7 @@ module RelatonBipm
|
|
|
63
63
|
|
|
64
64
|
def index
|
|
65
65
|
Relaton::Index.find_or_create(
|
|
66
|
-
:bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr]
|
|
66
|
+
:bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr part append]
|
|
67
67
|
)
|
|
68
68
|
end
|
|
69
69
|
|
|
@@ -36,7 +36,7 @@ module RelatonBipm
|
|
|
36
36
|
basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
|
|
37
37
|
outfile = "#{basename}.#{@data_fetcher.ext}"
|
|
38
38
|
key = hash1["docnumber"] || basename
|
|
39
|
-
@data_fetcher.index2.add_or_update Id.new(key).to_hash, outfile
|
|
39
|
+
@data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
|
|
40
40
|
hash = if File.exist? outfile
|
|
41
41
|
warn_duplicate = false
|
|
42
42
|
hash2 = YAML.load_file outfile
|
|
@@ -60,19 +60,19 @@ module RelatonBipm
|
|
|
60
60
|
# @return [void]
|
|
61
61
|
#
|
|
62
62
|
def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
return unless did["id"] == "BIPM Brochure"
|
|
63
|
+
# isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
|
|
64
|
+
# num = isbn && isbn["id"] == "978-92-822-2272-0" ? "SI Brochure" : "SI Brochure, Appendix 4"
|
|
66
65
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
hash["
|
|
75
|
-
|
|
66
|
+
hash["docid"].each do |id|
|
|
67
|
+
next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)
|
|
68
|
+
|
|
69
|
+
id["primary"] = true
|
|
70
|
+
id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
num = hash["docid"].detect { |id| id["primary"] && id["language"] == "en" }["id"]
|
|
74
|
+
hash["docnumber"].sub!(/^Brochure$/i, num.sub(/^BIPM\s/, ""))
|
|
75
|
+
hash["id"] = num.gsub(/[,\s]/, "")
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
#
|
|
@@ -230,7 +230,7 @@ module RelatonBipm
|
|
|
230
230
|
# @param [String] path path to YAML file
|
|
231
231
|
#
|
|
232
232
|
def add_to_index(item, path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
233
|
-
key = Id.new(item.docnumber).to_hash
|
|
233
|
+
key = Id.new.parse(item.docnumber).to_hash
|
|
234
234
|
@data_fetcher.index2.add_or_update key, path
|
|
235
235
|
end
|
|
236
236
|
|
|
@@ -1,60 +1,63 @@
|
|
|
1
1
|
module RelatonBipm
|
|
2
2
|
class Id
|
|
3
|
-
class Parser < Parslet::Parser
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
3
|
+
# class Parser < Parslet::Parser
|
|
4
|
+
# rule(:space) { match("\s").repeat(1) }
|
|
5
|
+
# rule(:space?) { space.maybe }
|
|
6
|
+
# rule(:comma) { str(",") >> space? }
|
|
7
|
+
# rule(:lparen) { str("(") }
|
|
8
|
+
# rule(:rparen) { str(")") }
|
|
9
|
+
# rule(:slash) { str("/") }
|
|
10
|
+
# rule(:num) { match["0-9"].repeat(1) }
|
|
11
|
+
|
|
12
|
+
# rule(:delimeter) { str("--") >> space }
|
|
13
|
+
# rule(:delimeter?) { delimeter.maybe }
|
|
14
|
+
|
|
15
|
+
# rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
|
|
16
|
+
# rule(:lang?) { lang.maybe }
|
|
17
|
+
|
|
18
|
+
# rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
|
|
19
|
+
# rule(:number) { numdash >> space? }
|
|
20
|
+
# rule(:number?) { number.maybe }
|
|
21
|
+
# rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
|
|
22
|
+
|
|
23
|
+
# rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
|
|
24
|
+
# rule(:year_paren) { lparen >> year >> lang? >> rparen }
|
|
25
|
+
# rule(:num_year) { number? >> year_paren }
|
|
26
|
+
# rule(:year_num) { year >> str("-") >> number }
|
|
27
|
+
# rule(:num_and_year) { num_year | year_num | number }
|
|
28
|
+
|
|
29
|
+
# rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
|
|
30
|
+
# rule(:suff) { match["a-zA-Z-"].repeat(1) }
|
|
31
|
+
# rule(:cgmp) { str("CGPM") }
|
|
32
|
+
# rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
|
|
33
|
+
# rule(:cc) { str("CC") >> suff >> sect.maybe }
|
|
34
|
+
# rule(:jc) { str("JC") >> suff }
|
|
35
|
+
# rule(:cec) { str("CEC") }
|
|
36
|
+
# rule(:wgms) { str("WG-MS") }
|
|
37
|
+
# rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
|
|
38
|
+
|
|
39
|
+
# rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
|
|
40
|
+
|
|
41
|
+
# rule(:type_group) { type >> group >> slash >> num_and_year }
|
|
42
|
+
# rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
|
|
43
|
+
# rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
|
|
44
|
+
# rule(:outcome) { group_num | group_type | type_group }
|
|
45
|
+
|
|
46
|
+
# rule(:part_partie) { str("Part") | str("Partie") }
|
|
47
|
+
# rule(:part) { comma >> part_partie >> space >> num.as(:part) }
|
|
48
|
+
# rule(:append) { (comma | space) >> (str("Appendix") | str("Annexe")) >> space >> num.as(:append) }
|
|
49
|
+
# rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> (part | append).maybe }
|
|
50
|
+
|
|
51
|
+
# rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
|
|
52
|
+
|
|
53
|
+
# rule(:corr) { space >> str("Corrigendum").as(:corr) }
|
|
54
|
+
# rule(:corr?) { corr.maybe }
|
|
55
|
+
# rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
|
|
56
|
+
|
|
57
|
+
# rule(:result) { outcome | brochure | metrologia | jcgm }
|
|
58
|
+
|
|
59
|
+
# root :result
|
|
60
|
+
# end
|
|
58
61
|
|
|
59
62
|
TYPES = {
|
|
60
63
|
"Resolution" => "RES",
|
|
@@ -75,16 +78,98 @@ module RelatonBipm
|
|
|
75
78
|
#
|
|
76
79
|
# Create a new Id object
|
|
77
80
|
#
|
|
81
|
+
def initialize
|
|
82
|
+
# @id = Parser.new.parse(id)
|
|
83
|
+
# @id = parse(id)
|
|
84
|
+
# rescue Parslet::ParseFailed => e
|
|
85
|
+
# Util.warn "WARNING: Incorrect reference: `#{id}`"
|
|
86
|
+
# warn e.parse_failure_cause.ascii_tree
|
|
87
|
+
# raise RelatonBib::RequestError, e
|
|
88
|
+
end
|
|
89
|
+
|
|
78
90
|
# @param [String] id id string
|
|
79
91
|
#
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
92
|
+
def parse(id)
|
|
93
|
+
# str = StringScanner.new id
|
|
94
|
+
match = parse_outcome(id) || parse_brochure(id) || parse_metrologia(id) || parse_jcgm(id)
|
|
95
|
+
@id = match.named_captures.compact.transform_keys(&:to_sym)
|
|
96
|
+
self
|
|
97
|
+
rescue StandardError => e
|
|
83
98
|
Util.warn "WARNING: Incorrect reference: `#{id}`"
|
|
84
|
-
# warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
|
|
85
99
|
raise RelatonBib::RequestError, e
|
|
86
100
|
end
|
|
87
101
|
|
|
102
|
+
def parse_outcome(id)
|
|
103
|
+
parse_group_num(id) || parse_group_type(id) || parse_type_group(id)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def parse_group_num(id)
|
|
107
|
+
%r{^#{group}\s#{number}[a-z]{1,2}\s#{type}\s#{year_lang}$}.match(id)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def parse_group_type(id)
|
|
111
|
+
%r{^#{group}\s(?:--\s)?#{type}\s#{num_and_year}$}.match(id)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def parse_type_group(id)
|
|
115
|
+
%r{^#{type}\s#{group}\/#{num_and_year}$}.match(id)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def group
|
|
119
|
+
"(?<group>CGPM|CIPM(?:\\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]+|CEC|WG-MS)"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def type; "(?<type>[[:alpha:]]+)"; end
|
|
123
|
+
def number; "(?<number>[A-Z0-9-]+)"; end
|
|
124
|
+
def year; "(?<year>\\d{4})"; end
|
|
125
|
+
def lang; ",\\s?(?<lang>[A-Z]{1,2})"; end
|
|
126
|
+
def year_lang; "\\(#{year}(?:#{lang})?\\)"; end
|
|
127
|
+
def num_and_year; "(?:(?:#{number}\\s)?#{year_lang}|#{year}-#{number}|#{number})"; end
|
|
128
|
+
|
|
129
|
+
def parse_brochure(id)
|
|
130
|
+
%r{^
|
|
131
|
+
(?<group>SI)\s(?<type>Brochure)
|
|
132
|
+
(?:,?\s(?:(?:Part|Partie)\s(?<part>\d+)|(?:Appendix|Annexe)\s(?<append>\d+)))?
|
|
133
|
+
$}x.match(id)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def parse_metrologia(id)
|
|
137
|
+
%r{^(?<group>Metrologia)(?:\s(?<number>[a-zA-Z0-9\s]+))?$}.match(id)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def parse_jcgm(id)
|
|
141
|
+
%r{^#{group}\s#{number}(?::#{year})?(?:\s(?<corr>Corrigendum))?$}.match(id)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# def parse_gorup_num(str)
|
|
145
|
+
# return unless group = parse_group(str)
|
|
146
|
+
|
|
147
|
+
# return unless str.scan(" ") && num_suff = parse_num_suff(str)
|
|
148
|
+
|
|
149
|
+
# return unless type = parse_type(str)
|
|
150
|
+
|
|
151
|
+
# return unless year = parse_year_parent(str)
|
|
152
|
+
|
|
153
|
+
# { group: group, number: num_suff, type: type, year: year }
|
|
154
|
+
# end
|
|
155
|
+
|
|
156
|
+
# def parse_group(str)
|
|
157
|
+
# str.scan %r{CGPM|CIPM(?:\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]|CEC|WG-MS}
|
|
158
|
+
# end
|
|
159
|
+
|
|
160
|
+
# def parse_num_suff(str)
|
|
161
|
+
# num = parse_numdash(str)
|
|
162
|
+
# num if num && str.scan(/[a-z]{1,2}\s/)
|
|
163
|
+
# end
|
|
164
|
+
|
|
165
|
+
# def parse_number(str)
|
|
166
|
+
# parse_numdash(str)
|
|
167
|
+
# end
|
|
168
|
+
|
|
169
|
+
# def parse_numdash(str)
|
|
170
|
+
# str.scan(/[A-Z0-9-]+/)
|
|
171
|
+
# end
|
|
172
|
+
|
|
88
173
|
#
|
|
89
174
|
# Compare two Id objects
|
|
90
175
|
#
|
|
@@ -129,6 +214,8 @@ module RelatonBipm
|
|
|
129
214
|
hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
|
|
130
215
|
hash[:year] = src[:year].to_s if src[:year]
|
|
131
216
|
hash[:corr] = true if src[:corr]
|
|
217
|
+
hash[:part] = src[:part].to_s if src[:part]
|
|
218
|
+
hash[:append] = src[:append].to_s if src[:append]
|
|
132
219
|
hash[:lang] = src[:lang].to_s if src[:lang]
|
|
133
220
|
hash
|
|
134
221
|
end
|
|
@@ -2,7 +2,7 @@ module RelatonBipm
|
|
|
2
2
|
module RawdataBipmMetrologia
|
|
3
3
|
class ArticleParser
|
|
4
4
|
ATTRS = %i[docid title contributor date copyright abstract relation series
|
|
5
|
-
extent type doctype].freeze
|
|
5
|
+
extent type doctype link].freeze
|
|
6
6
|
#
|
|
7
7
|
# Create new parser and parse document
|
|
8
8
|
#
|
|
@@ -319,6 +319,14 @@ module RelatonBipm
|
|
|
319
319
|
def parse_doctype
|
|
320
320
|
DocumentType.new type: "article"
|
|
321
321
|
end
|
|
322
|
+
|
|
323
|
+
def parse_link
|
|
324
|
+
@meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
|
|
325
|
+
url = "https://doi.org/#{l.text}"
|
|
326
|
+
a << RelatonBib::TypedUri.new(content: url, type: "src")
|
|
327
|
+
a << RelatonBib::TypedUri.new(content: url, type: "doi")
|
|
328
|
+
end
|
|
329
|
+
end
|
|
322
330
|
end
|
|
323
331
|
end
|
|
324
332
|
end
|
|
@@ -33,7 +33,7 @@ module RelatonBipm
|
|
|
33
33
|
item = ArticleParser.parse path
|
|
34
34
|
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
|
35
35
|
out_path = File.join(@data_fetcher.output, file)
|
|
36
|
-
key = Id.new(item.docidentifier.first.id).to_hash
|
|
36
|
+
key = Id.new.parse(item.docidentifier.first.id).to_hash
|
|
37
37
|
@data_fetcher.index2.add_or_update key, out_path
|
|
38
38
|
@data_fetcher.write_file out_path, item
|
|
39
39
|
end
|
|
@@ -76,7 +76,7 @@ module RelatonBipm
|
|
|
76
76
|
)
|
|
77
77
|
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
|
78
78
|
path = File.join(@data_fetcher.output, file)
|
|
79
|
-
@data_fetcher.index2.add_or_update Id.new(id).to_hash, path
|
|
79
|
+
@data_fetcher.index2.add_or_update Id.new.parse(id).to_hash, path
|
|
80
80
|
@data_fetcher.write_file path, item
|
|
81
81
|
end
|
|
82
82
|
|
|
@@ -128,7 +128,7 @@ module RelatonBipm
|
|
|
128
128
|
#
|
|
129
129
|
# Fetch relations
|
|
130
130
|
#
|
|
131
|
-
# @
|
|
131
|
+
# @see #fetch_metrologia
|
|
132
132
|
#
|
|
133
133
|
# @return [Array<RelatonBib::DocumentRelation>] relations
|
|
134
134
|
#
|
data/lib/relaton_bipm/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-bipm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.18.
|
|
4
|
+
version: 1.18.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-03-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|