relaton-bipm 1.18.0 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d51b6507b240f142eca33e8276587dc32d0e5e7a3a794d1ab7eaa2b6e2af09ac
4
- data.tar.gz: 491c23ff3ef47f435ec18d605b0bc35806eb872755e338c1d06ce609284bab34
3
+ metadata.gz: a0ceb817fce307673ac91a29c26c09684bb8309aad1b522c33e4d76859db4c90
4
+ data.tar.gz: 075b945ebc0792e814e38d811579a9a91209339cf91874fa2af572402ee05f5e
5
5
  SHA512:
6
- metadata.gz: b362e8a613b038c7890987beb9ebd84f42162a94f8f42149bb763426c2f8bf8536a815006f9a9587b12f1a89e714aaa832f619d22214a88e0549fb9eee11ca06
7
- data.tar.gz: a71ec348afeeb1383bc1f3ea2c5ae11f9c1a34014f0f81462d5bc1cf248566ced81c2043991ff67bd0c40e42b9480a5472abf9d46ed2da1d0b6a3ec9a08a433e
6
+ metadata.gz: db4ad0e7175acc4f78f788f443aa4affddee29e9352cb27d5d0290f8dd27512506724a2c6f0e6a417456a8cfba24b041840e43473700d4501b107ec8b93f99a8
7
+ data.tar.gz: 23638af28bddb83784b67c89e7b5d523e7c09147bdc95cc920d17cbeef713f2c7f7d8d7f7790a9593b1e4716b7f1386c655a8cae5bb89fa889682789180b57ac
@@ -46,7 +46,7 @@ module RelatonBipm
46
46
  # @return [RelatonBipm::BipmBibliographicItem]
47
47
  #
48
48
  def get_bipm(reference) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
49
- ref_id = Id.new reference
49
+ ref_id = Id.new.parse reference
50
50
  rows = index.search { |r| ref_id == r[:id] }
51
51
  return unless rows.any?
52
52
 
@@ -63,7 +63,7 @@ module RelatonBipm
63
63
 
64
64
  def index
65
65
  Relaton::Index.find_or_create(
66
- :bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr]
66
+ :bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr part append]
67
67
  )
68
68
  end
69
69
 
@@ -36,7 +36,7 @@ module RelatonBipm
36
36
  basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
37
37
  outfile = "#{basename}.#{@data_fetcher.ext}"
38
38
  key = hash1["docnumber"] || basename
39
- @data_fetcher.index2.add_or_update Id.new(key).to_hash, outfile
39
+ @data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
40
40
  hash = if File.exist? outfile
41
41
  warn_duplicate = false
42
42
  hash2 = YAML.load_file outfile
@@ -60,19 +60,19 @@ module RelatonBipm
60
60
  # @return [void]
61
61
  #
62
62
  def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
63
- did = hash["docid"].detect { |id| id["type"] == "BIPM" }
64
- did["primary"] = true
65
- return unless did["id"] == "BIPM Brochure"
63
+ # isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
64
+ # num = isbn && isbn["id"] == "978-92-822-2272-0" ? "SI Brochure" : "SI Brochure, Appendix 4"
66
65
 
67
- isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
68
- num = if isbn && isbn["id"] == "978-92-822-2272-0"
69
- "SI Brochure"
70
- else
71
- "SI Brochure, Appendix 4"
72
- end
73
- hash["id"] = hash["id"].sub(/(?<=^BIPM)Brochure$/i, num.gsub(/[,\s]/, ""))
74
- hash["docnumber"] = hash["docnumber"].sub(/^Brochure$/i, num)
75
- did["id"] = did["id"].sub(/(?<=^BIPM\s)Brochure$/i, num)
66
+ hash["docid"].each do |id|
67
+ next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)
68
+
69
+ id["primary"] = true
70
+ id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
71
+ end
72
+
73
+ num = hash["docid"].detect { |id| id["primary"] && id["language"] == "en" }["id"]
74
+ hash["docnumber"].sub!(/^Brochure$/i, num.sub(/^BIPM\s/, ""))
75
+ hash["id"] = num.gsub(/[,\s]/, "")
76
76
  end
77
77
 
78
78
  #
@@ -230,7 +230,7 @@ module RelatonBipm
230
230
  # @param [String] path path to YAML file
231
231
  #
232
232
  def add_to_index(item, path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
233
- key = Id.new(item.docnumber).to_hash
233
+ key = Id.new.parse(item.docnumber).to_hash
234
234
  @data_fetcher.index2.add_or_update key, path
235
235
  end
236
236
 
@@ -1,60 +1,63 @@
1
1
  module RelatonBipm
2
2
  class Id
3
- class Parser < Parslet::Parser
4
- rule(:space) { match("\s").repeat(1) }
5
- rule(:space?) { space.maybe }
6
- rule(:comma) { str(",") >> space? }
7
- rule(:lparen) { str("(") }
8
- rule(:rparen) { str(")") }
9
- rule(:slash) { str("/") }
10
-
11
- rule(:delimeter) { str("--") >> space }
12
- rule(:delimeter?) { delimeter.maybe }
13
-
14
- rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
15
- rule(:lang?) { lang.maybe }
16
-
17
- rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
18
- rule(:number) { numdash >> space? }
19
- rule(:number?) { number.maybe }
20
- rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
21
-
22
- rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
23
- rule(:year_paren) { lparen >> year >> lang? >> rparen }
24
- rule(:num_year) { number? >> year_paren }
25
- rule(:year_num) { year >> str("-") >> number }
26
- rule(:num_and_year) { num_year | year_num | number }
27
-
28
- rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
29
- rule(:suff) { match["a-zA-Z-"].repeat(1) }
30
- rule(:cgmp) { str("CGPM") }
31
- rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
32
- rule(:cc) { str("CC") >> suff >> sect.maybe }
33
- rule(:jc) { str("JC") >> suff }
34
- rule(:cec) { str("CEC") }
35
- rule(:wgms) { str("WG-MS") }
36
- rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
37
-
38
- rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
39
-
40
- rule(:type_group) { type >> group >> slash >> num_and_year }
41
- rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
42
- rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
43
- rule(:outcome) { group_num | group_type | type_group }
44
-
45
- rule(:append) { comma >> str("Appendix") >> space >> number }
46
- rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
47
-
48
- rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
49
-
50
- rule(:corr) { space >> str("Corrigendum").as(:corr) }
51
- rule(:corr?) { corr.maybe }
52
- rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
53
-
54
- rule(:result) { outcome | brochure | metrologia | jcgm }
55
-
56
- root :result
57
- end
3
+ # class Parser < Parslet::Parser
4
+ # rule(:space) { match("\s").repeat(1) }
5
+ # rule(:space?) { space.maybe }
6
+ # rule(:comma) { str(",") >> space? }
7
+ # rule(:lparen) { str("(") }
8
+ # rule(:rparen) { str(")") }
9
+ # rule(:slash) { str("/") }
10
+ # rule(:num) { match["0-9"].repeat(1) }
11
+
12
+ # rule(:delimeter) { str("--") >> space }
13
+ # rule(:delimeter?) { delimeter.maybe }
14
+
15
+ # rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
16
+ # rule(:lang?) { lang.maybe }
17
+
18
+ # rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
19
+ # rule(:number) { numdash >> space? }
20
+ # rule(:number?) { number.maybe }
21
+ # rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
22
+
23
+ # rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
24
+ # rule(:year_paren) { lparen >> year >> lang? >> rparen }
25
+ # rule(:num_year) { number? >> year_paren }
26
+ # rule(:year_num) { year >> str("-") >> number }
27
+ # rule(:num_and_year) { num_year | year_num | number }
28
+
29
+ # rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
30
+ # rule(:suff) { match["a-zA-Z-"].repeat(1) }
31
+ # rule(:cgmp) { str("CGPM") }
32
+ # rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
33
+ # rule(:cc) { str("CC") >> suff >> sect.maybe }
34
+ # rule(:jc) { str("JC") >> suff }
35
+ # rule(:cec) { str("CEC") }
36
+ # rule(:wgms) { str("WG-MS") }
37
+ # rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
38
+
39
+ # rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
40
+
41
+ # rule(:type_group) { type >> group >> slash >> num_and_year }
42
+ # rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
43
+ # rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
44
+ # rule(:outcome) { group_num | group_type | type_group }
45
+
46
+ # rule(:part_partie) { str("Part") | str("Partie") }
47
+ # rule(:part) { comma >> part_partie >> space >> num.as(:part) }
48
+ # rule(:append) { (comma | space) >> (str("Appendix") | str("Annexe")) >> space >> num.as(:append) }
49
+ # rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> (part | append).maybe }
50
+
51
+ # rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
52
+
53
+ # rule(:corr) { space >> str("Corrigendum").as(:corr) }
54
+ # rule(:corr?) { corr.maybe }
55
+ # rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
56
+
57
+ # rule(:result) { outcome | brochure | metrologia | jcgm }
58
+
59
+ # root :result
60
+ # end
58
61
 
59
62
  TYPES = {
60
63
  "Resolution" => "RES",
@@ -75,16 +78,98 @@ module RelatonBipm
75
78
  #
76
79
  # Create a new Id object
77
80
  #
81
+ def initialize
82
+ # @id = Parser.new.parse(id)
83
+ # @id = parse(id)
84
+ # rescue Parslet::ParseFailed => e
85
+ # Util.warn "WARNING: Incorrect reference: `#{id}`"
86
+ # warn e.parse_failure_cause.ascii_tree
87
+ # raise RelatonBib::RequestError, e
88
+ end
89
+
78
90
  # @param [String] id id string
79
91
  #
80
- def initialize(id)
81
- @id = Parser.new.parse(id)
82
- rescue Parslet::ParseFailed => e
92
+ def parse(id)
93
+ # str = StringScanner.new id
94
+ match = parse_outcome(id) || parse_brochure(id) || parse_metrologia(id) || parse_jcgm(id)
95
+ @id = match.named_captures.compact.transform_keys(&:to_sym)
96
+ self
97
+ rescue StandardError => e
83
98
  Util.warn "WARNING: Incorrect reference: `#{id}`"
84
- # warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
85
99
  raise RelatonBib::RequestError, e
86
100
  end
87
101
 
102
+ def parse_outcome(id)
103
+ parse_group_num(id) || parse_group_type(id) || parse_type_group(id)
104
+ end
105
+
106
+ def parse_group_num(id)
107
+ %r{^#{group}\s#{number}[a-z]{1,2}\s#{type}\s#{year_lang}$}.match(id)
108
+ end
109
+
110
+ def parse_group_type(id)
111
+ %r{^#{group}\s(?:--\s)?#{type}\s#{num_and_year}$}.match(id)
112
+ end
113
+
114
+ def parse_type_group(id)
115
+ %r{^#{type}\s#{group}\/#{num_and_year}$}.match(id)
116
+ end
117
+
118
+ def group
119
+ "(?<group>CGPM|CIPM(?:\\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]+|CEC|WG-MS)"
120
+ end
121
+
122
+ def type; "(?<type>[[:alpha:]]+)"; end
123
+ def number; "(?<number>[A-Z0-9-]+)"; end
124
+ def year; "(?<year>\\d{4})"; end
125
+ def lang; ",\\s?(?<lang>[A-Z]{1,2})"; end
126
+ def year_lang; "\\(#{year}(?:#{lang})?\\)"; end
127
+ def num_and_year; "(?:(?:#{number}\\s)?#{year_lang}|#{year}-#{number}|#{number})"; end
128
+
129
+ def parse_brochure(id)
130
+ %r{^
131
+ (?<group>SI)\s(?<type>Brochure)
132
+ (?:,?\s(?:(?:Part|Partie)\s(?<part>\d+)|(?:Appendix|Annexe)\s(?<append>\d+)))?
133
+ $}x.match(id)
134
+ end
135
+
136
+ def parse_metrologia(id)
137
+ %r{^(?<group>Metrologia)(?:\s(?<number>[a-zA-Z0-9\s]+))?$}.match(id)
138
+ end
139
+
140
+ def parse_jcgm(id)
141
+ %r{^#{group}\s#{number}(?::#{year})?(?:\s(?<corr>Corrigendum))?$}.match(id)
142
+ end
143
+
144
+ # def parse_gorup_num(str)
145
+ # return unless group = parse_group(str)
146
+
147
+ # return unless str.scan(" ") && num_suff = parse_num_suff(str)
148
+
149
+ # return unless type = parse_type(str)
150
+
151
+ # return unless year = parse_year_parent(str)
152
+
153
+ # { group: group, number: num_suff, type: type, year: year }
154
+ # end
155
+
156
+ # def parse_group(str)
157
+ # str.scan %r{CGPM|CIPM(?:\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]|CEC|WG-MS}
158
+ # end
159
+
160
+ # def parse_num_suff(str)
161
+ # num = parse_numdash(str)
162
+ # num if num && str.scan(/[a-z]{1,2}\s/)
163
+ # end
164
+
165
+ # def parse_number(str)
166
+ # parse_numdash(str)
167
+ # end
168
+
169
+ # def parse_numdash(str)
170
+ # str.scan(/[A-Z0-9-]+/)
171
+ # end
172
+
88
173
  #
89
174
  # Compare two Id objects
90
175
  #
@@ -129,6 +214,8 @@ module RelatonBipm
129
214
  hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
130
215
  hash[:year] = src[:year].to_s if src[:year]
131
216
  hash[:corr] = true if src[:corr]
217
+ hash[:part] = src[:part].to_s if src[:part]
218
+ hash[:append] = src[:append].to_s if src[:append]
132
219
  hash[:lang] = src[:lang].to_s if src[:lang]
133
220
  hash
134
221
  end
@@ -2,7 +2,7 @@ module RelatonBipm
2
2
  module RawdataBipmMetrologia
3
3
  class ArticleParser
4
4
  ATTRS = %i[docid title contributor date copyright abstract relation series
5
- extent type doctype].freeze
5
+ extent type doctype link].freeze
6
6
  #
7
7
  # Create new parser and parse document
8
8
  #
@@ -319,6 +319,14 @@ module RelatonBipm
319
319
  def parse_doctype
320
320
  DocumentType.new type: "article"
321
321
  end
322
+
323
+ def parse_link
324
+ @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
325
+ url = "https://doi.org/#{l.text}"
326
+ a << RelatonBib::TypedUri.new(content: url, type: "src")
327
+ a << RelatonBib::TypedUri.new(content: url, type: "doi")
328
+ end
329
+ end
322
330
  end
323
331
  end
324
332
  end
@@ -33,7 +33,7 @@ module RelatonBipm
33
33
  item = ArticleParser.parse path
34
34
  file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
35
35
  out_path = File.join(@data_fetcher.output, file)
36
- key = Id.new(item.docidentifier.first.id).to_hash
36
+ key = Id.new.parse(item.docidentifier.first.id).to_hash
37
37
  @data_fetcher.index2.add_or_update key, out_path
38
38
  @data_fetcher.write_file out_path, item
39
39
  end
@@ -76,7 +76,7 @@ module RelatonBipm
76
76
  )
77
77
  file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
78
78
  path = File.join(@data_fetcher.output, file)
79
- @data_fetcher.index2.add_or_update Id.new(id).to_hash, path
79
+ @data_fetcher.index2.add_or_update Id.new.parse(id).to_hash, path
80
80
  @data_fetcher.write_file path, item
81
81
  end
82
82
 
@@ -128,7 +128,7 @@ module RelatonBipm
128
128
  #
129
129
  # Fetch relations
130
130
  #
131
- # @param (see #fetch_metrologia)
131
+ # @see #fetch_metrologia
132
132
  #
133
133
  # @return [Array<RelatonBib::DocumentRelation>] relations
134
134
  #
@@ -1,3 +1,3 @@
1
1
  module RelatonBipm
2
- VERSION = "1.18.0".freeze
2
+ VERSION = "1.18.1".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-bipm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.0
4
+ version: 1.18.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday