bipm-data-importer 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bf2e9209301b13261c1b15013611cd17cf5355d6152c1f59d131aa486bbec6c
4
- data.tar.gz: d1bf3ab0059a945f134068cf3c9cab045fdb6c9970edd3dc0cb2ce839c3dce9b
3
+ metadata.gz: 738de7ebf04b508b4a769577c1c7f82eb81e48f1c401e0e7895197cbb33b65c0
4
+ data.tar.gz: 37d6c10ed703778e8dd4c968ac2a3e174cc6cc937f6ab0bffb1ede6a3448d0c2
5
5
  SHA512:
6
- metadata.gz: f653f802d49a88b5e9f0f5b6380a1c9a0fdd73e52be093cae3bc62020d9057221dc9b9264a75bf10198b154f16dd8010480056ae624f7a1c49e7659e1b87b6fc
7
- data.tar.gz: dd797598112540212aa451e62089b4dde8dc89e4cbfd85e83e6c653e002e65fbab8783ca4cfaab6d8c10a4442e90cacbb6c057b02615c270bf6b7dc3f531a3e0
6
+ metadata.gz: '06043389fc63d5038a836342accce06736f9fcb68e61aa309ce980ae439f69d66dd79ffec35003c2d6ea121a5d727bb06cb453a4860d1760adedaf52db9c98f7'
7
+ data.tar.gz: 35e740c67821496b97012c251291bf755b19be2983f43f12129cc94b7ef3494fe8d51858269942ec275cb9e28bdec3f6f4d92049dc0c3bfd06e44ee6ed91f8e5
data/exe/bipm-fetch CHANGED
@@ -29,22 +29,6 @@ bodies.each do |bodyid, bodyurl|
29
29
 
30
30
  body = bodyid.to_s.downcase.gsub(" ", "-").to_sym
31
31
 
32
- meetings_en = VCR.use_cassette "#{body}/#{body}-meetings" do
33
- a.get "#{bodyurl}/meetings"
34
- end
35
-
36
- meetings_fr = VCR.use_cassette "#{body}/#{body}-meetings-fr" do
37
- a.get "#{bodyurl.gsub("/en/", "/fr/")}/meetings"
38
- end
39
-
40
- publications_en = VCR.use_cassette "#{body}/#{body}-publications" do
41
- a.get "#{bodyurl}/publications"
42
- end
43
-
44
- publications_fr = VCR.use_cassette "#{body}/#{body}-publications-fr" do
45
- a.get "#{bodyurl.gsub("/en/", "/fr/")}/publications"
46
- end
47
-
48
32
  resolutions = {}
49
33
  %w[en fr].each do |meeting_lang|
50
34
  next if ARGV[0] == '--fork' && fork
@@ -52,8 +36,45 @@ bodies.each do |bodyid, bodyurl|
52
36
  meeting_lang_sfx = (meeting_lang == 'fr') ? "-fr" : ""
53
37
  meeting_lang_sfx_dir = (meeting_lang == 'fr') ? "-fr" : "-en"
54
38
 
55
- meetings = (meeting_lang == 'en') ? meetings_en : meetings_fr
56
- publications = (meeting_lang == 'en') ? publications_en : publications_fr
39
+ bodyurl_local = meeting_lang == "en" ? bodyurl : bodyurl.gsub("/en/", "/fr/")
40
+
41
+ cassfx = meeting_lang == "en" ? "" : "-fr"
42
+
43
+ pages = {}
44
+
45
+ pages[:index] = VCR.use_cassette "#{body}/#{body}-index#{meeting_lang_sfx}" do
46
+ a.get "#{bodyurl_local}"
47
+ end
48
+
49
+ pages[:meetings] = VCR.use_cassette "#{body}/#{body}-meetings#{meeting_lang_sfx}" do
50
+ a.get "#{bodyurl_local}/meetings"
51
+ end
52
+
53
+ pages[:publications] = VCR.use_cassette "#{body}/#{body}-publications#{meeting_lang_sfx}" do
54
+ a.get "#{bodyurl_local}/publications"
55
+ end
56
+
57
+ # CIPM
58
+ pages[:recommendations] = VCR.use_cassette "#{body}/#{body}-recommendations#{meeting_lang_sfx}" do
59
+ a.get "#{bodyurl_local}/recommendations"
60
+ rescue Mechanize::ResponseCodeError
61
+ nil
62
+ end
63
+
64
+ # CIPM has outcomes, JCRB has meeting-outcomes
65
+ # As of 2024-12, no other body has this special case.
66
+ outcomes_path = bodyid == :CIPM ? "outcomes" : "meeting-outcomes"
67
+
68
+ pages[:outcomes] = VCR.use_cassette "#{body}/#{body}-outcomes#{meeting_lang_sfx}" do
69
+ a.get "#{bodyurl_local}/#{outcomes_path}"
70
+ rescue Mechanize::ResponseCodeError
71
+ nil
72
+ end
73
+
74
+ meetings = pages[:meetings]
75
+ publications = pages[:publications]
76
+ recommendations = pages[:recommendations]
77
+ outcomes = pages[:outcomes]
57
78
 
58
79
  index = {
59
80
  "meetings" => {"fr" => [], "en" => []},
@@ -116,6 +137,17 @@ bodies.each do |bodyid, bodyurl|
116
137
  res_div.at_css('a').attr('href')
117
138
  end
118
139
 
140
+ resolutions_additional = recommendations&.css(".bipm-resolutions .publications__content")&.map do |res_div|
141
+ href = res_div.at_css('a').attr('href')
142
+
143
+ # bad case of french data...
144
+ href = href.gsub('/106-2017/', '/104-_1-2015/') if href =~ %r"/ci/cipm/106-2017/resolution-[12]\z"
145
+
146
+ href
147
+ end&.select do |href|
148
+ href.include? "/#{ident}/"
149
+ end || []
150
+
119
151
  # A mistake on a website, resolution 2 listed twice...
120
152
  # https://www.bipm.org/fr/committees/ci/cipm/94-2005/
121
153
  if [bodyid, meeting_lang, meeting_id] == [:CIPM, 'fr', '94'] && resolutions.sort.uniq != resolutions.sort
@@ -124,6 +156,8 @@ bodies.each do |bodyid, bodyurl|
124
156
  end
125
157
  end
126
158
 
159
+ resolutions = (resolutions + resolutions_additional).uniq
160
+
127
161
  h["resolutions"] = resolutions.map do |href|
128
162
  href = href.gsub('/web/guest/', "/#{meeting_lang}/")
129
163
  href = href.sub("www.bipm.org/", "www.bipm.org/#{meeting_lang}/") unless href.include? "/#{meeting_lang}/"
@@ -160,7 +194,31 @@ bodies.each do |bodyid, bodyurl|
160
194
 
161
195
  h["metadata"]["workgroup"] = wg if wg
162
196
 
163
- h["resolutions"] = meeting.css('.bipm-decisions .decisions').map do |titletr|
197
+ decisions = meeting.css('.bipm-decisions .decisions')
198
+
199
+ # For some bodies, decisions/outcomes are on a different page altogether.
200
+ # But then we must select only decisions pertaining to our meeting.
201
+ if outcomes
202
+ decisions_additional = outcomes.css('.bipm-decisions .decisions')
203
+
204
+ decisions_additional = decisions_additional.select do |i|
205
+ pass = true if i["data-meeting_key"] == meeting_id
206
+ pass = true if i["data-meeting_key"] == "#{meeting_id}-0" # Some are with a 0, some without
207
+ pass = true if i["data-meeting"] == meeting_id # Some don't have meeting_key set, but have meeting set
208
+
209
+ pass
210
+ end
211
+
212
+ decisions = decisions.to_a + decisions_additional.to_a
213
+
214
+ # duplicates check...
215
+ duplicates = decisions.map{|i|i.at_css('.title-third').text}
216
+ if duplicates != duplicates.uniq
217
+ pp [:duplicates_found, decisions]
218
+ end
219
+ end
220
+
221
+ h["resolutions"] = decisions.map do |titletr|
164
222
  title = titletr.at_css('.title-third').text.strip
165
223
 
166
224
  type = case title
@@ -176,6 +234,9 @@ bodies.each do |bodyid, bodyurl|
176
234
  "decision"
177
235
  end
178
236
 
237
+ categories = titletr.attr('data-decisioncategories')
238
+ categories ||= "[]"
239
+
179
240
  r = {
180
241
  "dates" => [date.to_s],
181
242
  "subject" => bodyid.to_s,
@@ -185,7 +246,7 @@ bodies.each do |bodyid, bodyurl|
185
246
  "url" => meeting.uri.to_s,
186
247
  #TODO: "reference" => meeting.uri.merge(titletr.attr('data-link')).to_s,
187
248
 
188
- "categories" => JSON.parse(titletr.attr('data-decisioncategories')).map(&:strip).uniq,
249
+ "categories" => JSON.parse(categories).map(&:strip).uniq,
189
250
 
190
251
  "considerations" => [],
191
252
  "actions" => [],
@@ -329,6 +390,9 @@ bodies.each do |bodyid, bodyurl|
329
390
  wg = hs.first["metadata"]["workgroup"]
330
391
  if wg
331
392
  fn = "#{BASE_DIR}/#{body}/workgroups/#{wg}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
393
+ elsif body == :cgpm
394
+ # CGPM old script used numbering like 00, 01, ..., 11, ...
395
+ fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{"%02d" % mid}.yml"
332
396
  else
333
397
  fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
334
398
  end
@@ -9,10 +9,10 @@ module Bipm
9
9
  SPACE_AFTER=/(?:\Z|(?= |\s|[,()\/.~"^]))/
10
10
 
11
11
  PREFIXES = /m|c|d|k|M|G|T|/
12
- UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|°C|°F|°K/
12
+ UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|fm|°C|°F|°K/
13
13
 
14
14
  def asciidoc_extract_math str
15
- str.gsub(/\b_(#{MATH}{1,3})_/, 'stem:[\1]')
15
+ str.gsub(/\b_?_(#{MATH}{1,3})_?_/, 'stem:[\1]')
16
16
  .gsub("_,_", ',') # Some mistake in formatting
17
17
  .gsub("^er^", 'ESCUPerESCUP') # French specialities
18
18
  .gsub(/(bar|A) (table|of|key|de|being|full|1)( |,)/, 'ESC\1 \2\3') # A is Ampere, but also a particle, bar is a bar but also a bar
@@ -69,4 +69,4 @@ module Bipm
69
69
  end
70
70
  end
71
71
  end
72
- end
72
+ end
@@ -338,7 +338,7 @@ module Bipm
338
338
 
339
339
  listmarker = nil
340
340
  listitems = []
341
- if (i["message"].split(/(?<!\+)\n/).all? { |j|
341
+ if (i["message"].split(/(?<!\+)\n(?!\+)/).all? { |j|
342
342
  case j
343
343
  when /\A\s*[*_]?#{PREFIX}#{kk}/i
344
344
  true
@@ -3,7 +3,7 @@
3
3
  module Bipm
4
4
  module Data
5
5
  module Importer
6
- VERSION = "0.2.1"
6
+ VERSION = "0.2.2"
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bipm-data-importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-12-08 00:00:00.000000000 Z
11
+ date: 2024-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri