relaton-bipm 1.18.0 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d51b6507b240f142eca33e8276587dc32d0e5e7a3a794d1ab7eaa2b6e2af09ac
4
- data.tar.gz: 491c23ff3ef47f435ec18d605b0bc35806eb872755e338c1d06ce609284bab34
3
+ metadata.gz: a0ceb817fce307673ac91a29c26c09684bb8309aad1b522c33e4d76859db4c90
4
+ data.tar.gz: 075b945ebc0792e814e38d811579a9a91209339cf91874fa2af572402ee05f5e
5
5
  SHA512:
6
- metadata.gz: b362e8a613b038c7890987beb9ebd84f42162a94f8f42149bb763426c2f8bf8536a815006f9a9587b12f1a89e714aaa832f619d22214a88e0549fb9eee11ca06
7
- data.tar.gz: a71ec348afeeb1383bc1f3ea2c5ae11f9c1a34014f0f81462d5bc1cf248566ced81c2043991ff67bd0c40e42b9480a5472abf9d46ed2da1d0b6a3ec9a08a433e
6
+ metadata.gz: db4ad0e7175acc4f78f788f443aa4affddee29e9352cb27d5d0290f8dd27512506724a2c6f0e6a417456a8cfba24b041840e43473700d4501b107ec8b93f99a8
7
+ data.tar.gz: 23638af28bddb83784b67c89e7b5d523e7c09147bdc95cc920d17cbeef713f2c7f7d8d7f7790a9593b1e4716b7f1386c655a8cae5bb89fa889682789180b57ac
@@ -46,7 +46,7 @@ module RelatonBipm
46
46
  # @return [RelatonBipm::BipmBibliographicItem]
47
47
  #
48
48
  def get_bipm(reference) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
49
- ref_id = Id.new reference
49
+ ref_id = Id.new.parse reference
50
50
  rows = index.search { |r| ref_id == r[:id] }
51
51
  return unless rows.any?
52
52
 
@@ -63,7 +63,7 @@ module RelatonBipm
63
63
 
64
64
  def index
65
65
  Relaton::Index.find_or_create(
66
- :bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr]
66
+ :bipm, url: "#{GH_ENDPOINT}index2.zip", file: INDEX_FILE, id_keys: %i[group type number year corr part append]
67
67
  )
68
68
  end
69
69
 
@@ -36,7 +36,7 @@ module RelatonBipm
36
36
  basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
37
37
  outfile = "#{basename}.#{@data_fetcher.ext}"
38
38
  key = hash1["docnumber"] || basename
39
- @data_fetcher.index2.add_or_update Id.new(key).to_hash, outfile
39
+ @data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
40
40
  hash = if File.exist? outfile
41
41
  warn_duplicate = false
42
42
  hash2 = YAML.load_file outfile
@@ -60,19 +60,19 @@ module RelatonBipm
60
60
  # @return [void]
61
61
  #
62
62
  def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
63
- did = hash["docid"].detect { |id| id["type"] == "BIPM" }
64
- did["primary"] = true
65
- return unless did["id"] == "BIPM Brochure"
63
+ # isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
64
+ # num = isbn && isbn["id"] == "978-92-822-2272-0" ? "SI Brochure" : "SI Brochure, Appendix 4"
66
65
 
67
- isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
68
- num = if isbn && isbn["id"] == "978-92-822-2272-0"
69
- "SI Brochure"
70
- else
71
- "SI Brochure, Appendix 4"
72
- end
73
- hash["id"] = hash["id"].sub(/(?<=^BIPM)Brochure$/i, num.gsub(/[,\s]/, ""))
74
- hash["docnumber"] = hash["docnumber"].sub(/^Brochure$/i, num)
75
- did["id"] = did["id"].sub(/(?<=^BIPM\s)Brochure$/i, num)
66
+ hash["docid"].each do |id|
67
+ next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)
68
+
69
+ id["primary"] = true
70
+ id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
71
+ end
72
+
73
+ num = hash["docid"].detect { |id| id["primary"] && id["language"] == "en" }["id"]
74
+ hash["docnumber"].sub!(/^Brochure$/i, num.sub(/^BIPM\s/, ""))
75
+ hash["id"] = num.gsub(/[,\s]/, "")
76
76
  end
77
77
 
78
78
  #
@@ -230,7 +230,7 @@ module RelatonBipm
230
230
  # @param [String] path path to YAML file
231
231
  #
232
232
  def add_to_index(item, path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
233
- key = Id.new(item.docnumber).to_hash
233
+ key = Id.new.parse(item.docnumber).to_hash
234
234
  @data_fetcher.index2.add_or_update key, path
235
235
  end
236
236
 
@@ -1,60 +1,63 @@
1
1
  module RelatonBipm
2
2
  class Id
3
- class Parser < Parslet::Parser
4
- rule(:space) { match("\s").repeat(1) }
5
- rule(:space?) { space.maybe }
6
- rule(:comma) { str(",") >> space? }
7
- rule(:lparen) { str("(") }
8
- rule(:rparen) { str(")") }
9
- rule(:slash) { str("/") }
10
-
11
- rule(:delimeter) { str("--") >> space }
12
- rule(:delimeter?) { delimeter.maybe }
13
-
14
- rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
15
- rule(:lang?) { lang.maybe }
16
-
17
- rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
18
- rule(:number) { numdash >> space? }
19
- rule(:number?) { number.maybe }
20
- rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
21
-
22
- rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
23
- rule(:year_paren) { lparen >> year >> lang? >> rparen }
24
- rule(:num_year) { number? >> year_paren }
25
- rule(:year_num) { year >> str("-") >> number }
26
- rule(:num_and_year) { num_year | year_num | number }
27
-
28
- rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
29
- rule(:suff) { match["a-zA-Z-"].repeat(1) }
30
- rule(:cgmp) { str("CGPM") }
31
- rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
32
- rule(:cc) { str("CC") >> suff >> sect.maybe }
33
- rule(:jc) { str("JC") >> suff }
34
- rule(:cec) { str("CEC") }
35
- rule(:wgms) { str("WG-MS") }
36
- rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
37
-
38
- rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
39
-
40
- rule(:type_group) { type >> group >> slash >> num_and_year }
41
- rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
42
- rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
43
- rule(:outcome) { group_num | group_type | type_group }
44
-
45
- rule(:append) { comma >> str("Appendix") >> space >> number }
46
- rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
47
-
48
- rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
49
-
50
- rule(:corr) { space >> str("Corrigendum").as(:corr) }
51
- rule(:corr?) { corr.maybe }
52
- rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
53
-
54
- rule(:result) { outcome | brochure | metrologia | jcgm }
55
-
56
- root :result
57
- end
3
+ # class Parser < Parslet::Parser
4
+ # rule(:space) { match("\s").repeat(1) }
5
+ # rule(:space?) { space.maybe }
6
+ # rule(:comma) { str(",") >> space? }
7
+ # rule(:lparen) { str("(") }
8
+ # rule(:rparen) { str(")") }
9
+ # rule(:slash) { str("/") }
10
+ # rule(:num) { match["0-9"].repeat(1) }
11
+
12
+ # rule(:delimeter) { str("--") >> space }
13
+ # rule(:delimeter?) { delimeter.maybe }
14
+
15
+ # rule(:lang) { comma >> space? >> match["A-Z"].repeat(1, 2).as(:lang) }
16
+ # rule(:lang?) { lang.maybe }
17
+
18
+ # rule(:numdash) { match["A-Z0-9-"].repeat(1).as(:number) }
19
+ # rule(:number) { numdash >> space? }
20
+ # rule(:number?) { number.maybe }
21
+ # rule(:num_suff) { numdash >> match["a-z"].repeat(1, 2) >> space }
22
+
23
+ # rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
24
+ # rule(:year_paren) { lparen >> year >> lang? >> rparen }
25
+ # rule(:num_year) { number? >> year_paren }
26
+ # rule(:year_num) { year >> str("-") >> number }
27
+ # rule(:num_and_year) { num_year | year_num | number }
28
+
29
+ # rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
30
+ # rule(:suff) { match["a-zA-Z-"].repeat(1) }
31
+ # rule(:cgmp) { str("CGPM") }
32
+ # rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
33
+ # rule(:cc) { str("CC") >> suff >> sect.maybe }
34
+ # rule(:jc) { str("JC") >> suff }
35
+ # rule(:cec) { str("CEC") }
36
+ # rule(:wgms) { str("WG-MS") }
37
+ # rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
38
+
39
+ # rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
40
+
41
+ # rule(:type_group) { type >> group >> slash >> num_and_year }
42
+ # rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
43
+ # rule(:group_num) { group >> space >> num_suff >> type >> year_paren }
44
+ # rule(:outcome) { group_num | group_type | type_group }
45
+
46
+ # rule(:part_partie) { str("Part") | str("Partie") }
47
+ # rule(:part) { comma >> part_partie >> space >> num.as(:part) }
48
+ # rule(:append) { (comma | space) >> (str("Appendix") | str("Annexe")) >> space >> num.as(:append) }
49
+ # rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> (part | append).maybe }
50
+
51
+ # rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
52
+
53
+ # rule(:corr) { space >> str("Corrigendum").as(:corr) }
54
+ # rule(:corr?) { corr.maybe }
55
+ # rule(:jcgm) { group >> space >> numdash >> (str(":") >> year).maybe >> corr? }
56
+
57
+ # rule(:result) { outcome | brochure | metrologia | jcgm }
58
+
59
+ # root :result
60
+ # end
58
61
 
59
62
  TYPES = {
60
63
  "Resolution" => "RES",
@@ -75,16 +78,98 @@ module RelatonBipm
75
78
  #
76
79
  # Create a new Id object
77
80
  #
81
+ def initialize
82
+ # @id = Parser.new.parse(id)
83
+ # @id = parse(id)
84
+ # rescue Parslet::ParseFailed => e
85
+ # Util.warn "WARNING: Incorrect reference: `#{id}`"
86
+ # warn e.parse_failure_cause.ascii_tree
87
+ # raise RelatonBib::RequestError, e
88
+ end
89
+
78
90
  # @param [String] id id string
79
91
  #
80
- def initialize(id)
81
- @id = Parser.new.parse(id)
82
- rescue Parslet::ParseFailed => e
92
+ def parse(id)
93
+ # str = StringScanner.new id
94
+ match = parse_outcome(id) || parse_brochure(id) || parse_metrologia(id) || parse_jcgm(id)
95
+ @id = match.named_captures.compact.transform_keys(&:to_sym)
96
+ self
97
+ rescue StandardError => e
83
98
  Util.warn "WARNING: Incorrect reference: `#{id}`"
84
- # warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
85
99
  raise RelatonBib::RequestError, e
86
100
  end
87
101
 
102
+ def parse_outcome(id)
103
+ parse_group_num(id) || parse_group_type(id) || parse_type_group(id)
104
+ end
105
+
106
+ def parse_group_num(id)
107
+ %r{^#{group}\s#{number}[a-z]{1,2}\s#{type}\s#{year_lang}$}.match(id)
108
+ end
109
+
110
+ def parse_group_type(id)
111
+ %r{^#{group}\s(?:--\s)?#{type}\s#{num_and_year}$}.match(id)
112
+ end
113
+
114
+ def parse_type_group(id)
115
+ %r{^#{type}\s#{group}\/#{num_and_year}$}.match(id)
116
+ end
117
+
118
+ def group
119
+ "(?<group>CGPM|CIPM(?:\\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]+|CEC|WG-MS)"
120
+ end
121
+
122
+ def type; "(?<type>[[:alpha:]]+)"; end
123
+ def number; "(?<number>[A-Z0-9-]+)"; end
124
+ def year; "(?<year>\\d{4})"; end
125
+ def lang; ",\\s?(?<lang>[A-Z]{1,2})"; end
126
+ def year_lang; "\\(#{year}(?:#{lang})?\\)"; end
127
+ def num_and_year; "(?:(?:#{number}\\s)?#{year_lang}|#{year}-#{number}|#{number})"; end
128
+
129
+ def parse_brochure(id)
130
+ %r{^
131
+ (?<group>SI)\s(?<type>Brochure)
132
+ (?:,?\s(?:(?:Part|Partie)\s(?<part>\d+)|(?:Appendix|Annexe)\s(?<append>\d+)))?
133
+ $}x.match(id)
134
+ end
135
+
136
+ def parse_metrologia(id)
137
+ %r{^(?<group>Metrologia)(?:\s(?<number>[a-zA-Z0-9\s]+))?$}.match(id)
138
+ end
139
+
140
+ def parse_jcgm(id)
141
+ %r{^#{group}\s#{number}(?::#{year})?(?:\s(?<corr>Corrigendum))?$}.match(id)
142
+ end
143
+
144
+ # def parse_gorup_num(str)
145
+ # return unless group = parse_group(str)
146
+
147
+ # return unless str.scan(" ") && num_suff = parse_num_suff(str)
148
+
149
+ # return unless type = parse_type(str)
150
+
151
+ # return unless year = parse_year_parent(str)
152
+
153
+ # { group: group, number: num_suff, type: type, year: year }
154
+ # end
155
+
156
+ # def parse_group(str)
157
+ # str.scan %r{CGPM|CIPM(?:\sMRA|[A-Z-])?|CC[a-zA-Z-]+[IVX]*|JC[a-zA-Z-]|CEC|WG-MS}
158
+ # end
159
+
160
+ # def parse_num_suff(str)
161
+ # num = parse_numdash(str)
162
+ # num if num && str.scan(/[a-z]{1,2}\s/)
163
+ # end
164
+
165
+ # def parse_number(str)
166
+ # parse_numdash(str)
167
+ # end
168
+
169
+ # def parse_numdash(str)
170
+ # str.scan(/[A-Z0-9-]+/)
171
+ # end
172
+
88
173
  #
89
174
  # Compare two Id objects
90
175
  #
@@ -129,6 +214,8 @@ module RelatonBipm
129
214
  hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
130
215
  hash[:year] = src[:year].to_s if src[:year]
131
216
  hash[:corr] = true if src[:corr]
217
+ hash[:part] = src[:part].to_s if src[:part]
218
+ hash[:append] = src[:append].to_s if src[:append]
132
219
  hash[:lang] = src[:lang].to_s if src[:lang]
133
220
  hash
134
221
  end
@@ -2,7 +2,7 @@ module RelatonBipm
2
2
  module RawdataBipmMetrologia
3
3
  class ArticleParser
4
4
  ATTRS = %i[docid title contributor date copyright abstract relation series
5
- extent type doctype].freeze
5
+ extent type doctype link].freeze
6
6
  #
7
7
  # Create new parser and parse document
8
8
  #
@@ -319,6 +319,14 @@ module RelatonBipm
319
319
  def parse_doctype
320
320
  DocumentType.new type: "article"
321
321
  end
322
+
323
+ def parse_link
324
+ @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
325
+ url = "https://doi.org/#{l.text}"
326
+ a << RelatonBib::TypedUri.new(content: url, type: "src")
327
+ a << RelatonBib::TypedUri.new(content: url, type: "doi")
328
+ end
329
+ end
322
330
  end
323
331
  end
324
332
  end
@@ -33,7 +33,7 @@ module RelatonBipm
33
33
  item = ArticleParser.parse path
34
34
  file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
35
35
  out_path = File.join(@data_fetcher.output, file)
36
- key = Id.new(item.docidentifier.first.id).to_hash
36
+ key = Id.new.parse(item.docidentifier.first.id).to_hash
37
37
  @data_fetcher.index2.add_or_update key, out_path
38
38
  @data_fetcher.write_file out_path, item
39
39
  end
@@ -76,7 +76,7 @@ module RelatonBipm
76
76
  )
77
77
  file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
78
78
  path = File.join(@data_fetcher.output, file)
79
- @data_fetcher.index2.add_or_update Id.new(id).to_hash, path
79
+ @data_fetcher.index2.add_or_update Id.new.parse(id).to_hash, path
80
80
  @data_fetcher.write_file path, item
81
81
  end
82
82
 
@@ -128,7 +128,7 @@ module RelatonBipm
128
128
  #
129
129
  # Fetch relations
130
130
  #
131
- # @param (see #fetch_metrologia)
131
+ # @see #fetch_metrologia
132
132
  #
133
133
  # @return [Array<RelatonBib::DocumentRelation>] relations
134
134
  #
@@ -1,3 +1,3 @@
1
1
  module RelatonBipm
2
- VERSION = "1.18.0".freeze
2
+ VERSION = "1.18.1".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-bipm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.0
4
+ version: 1.18.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday