bipm-data-importer 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bf2e9209301b13261c1b15013611cd17cf5355d6152c1f59d131aa486bbec6c
4
- data.tar.gz: d1bf3ab0059a945f134068cf3c9cab045fdb6c9970edd3dc0cb2ce839c3dce9b
3
+ metadata.gz: 738de7ebf04b508b4a769577c1c7f82eb81e48f1c401e0e7895197cbb33b65c0
4
+ data.tar.gz: 37d6c10ed703778e8dd4c968ac2a3e174cc6cc937f6ab0bffb1ede6a3448d0c2
5
5
  SHA512:
6
- metadata.gz: f653f802d49a88b5e9f0f5b6380a1c9a0fdd73e52be093cae3bc62020d9057221dc9b9264a75bf10198b154f16dd8010480056ae624f7a1c49e7659e1b87b6fc
7
- data.tar.gz: dd797598112540212aa451e62089b4dde8dc89e4cbfd85e83e6c653e002e65fbab8783ca4cfaab6d8c10a4442e90cacbb6c057b02615c270bf6b7dc3f531a3e0
6
+ metadata.gz: '06043389fc63d5038a836342accce06736f9fcb68e61aa309ce980ae439f69d66dd79ffec35003c2d6ea121a5d727bb06cb453a4860d1760adedaf52db9c98f7'
7
+ data.tar.gz: 35e740c67821496b97012c251291bf755b19be2983f43f12129cc94b7ef3494fe8d51858269942ec275cb9e28bdec3f6f4d92049dc0c3bfd06e44ee6ed91f8e5
data/exe/bipm-fetch CHANGED
@@ -29,22 +29,6 @@ bodies.each do |bodyid, bodyurl|
29
29
 
30
30
  body = bodyid.to_s.downcase.gsub(" ", "-").to_sym
31
31
 
32
- meetings_en = VCR.use_cassette "#{body}/#{body}-meetings" do
33
- a.get "#{bodyurl}/meetings"
34
- end
35
-
36
- meetings_fr = VCR.use_cassette "#{body}/#{body}-meetings-fr" do
37
- a.get "#{bodyurl.gsub("/en/", "/fr/")}/meetings"
38
- end
39
-
40
- publications_en = VCR.use_cassette "#{body}/#{body}-publications" do
41
- a.get "#{bodyurl}/publications"
42
- end
43
-
44
- publications_fr = VCR.use_cassette "#{body}/#{body}-publications-fr" do
45
- a.get "#{bodyurl.gsub("/en/", "/fr/")}/publications"
46
- end
47
-
48
32
  resolutions = {}
49
33
  %w[en fr].each do |meeting_lang|
50
34
  next if ARGV[0] == '--fork' && fork
@@ -52,8 +36,45 @@ bodies.each do |bodyid, bodyurl|
52
36
  meeting_lang_sfx = (meeting_lang == 'fr') ? "-fr" : ""
53
37
  meeting_lang_sfx_dir = (meeting_lang == 'fr') ? "-fr" : "-en"
54
38
 
55
- meetings = (meeting_lang == 'en') ? meetings_en : meetings_fr
56
- publications = (meeting_lang == 'en') ? publications_en : publications_fr
39
+ bodyurl_local = meeting_lang == "en" ? bodyurl : bodyurl.gsub("/en/", "/fr/")
40
+
41
+ cassfx = meeting_lang == "en" ? "" : "-fr"
42
+
43
+ pages = {}
44
+
45
+ pages[:index] = VCR.use_cassette "#{body}/#{body}-index#{meeting_lang_sfx}" do
46
+ a.get "#{bodyurl_local}"
47
+ end
48
+
49
+ pages[:meetings] = VCR.use_cassette "#{body}/#{body}-meetings#{meeting_lang_sfx}" do
50
+ a.get "#{bodyurl_local}/meetings"
51
+ end
52
+
53
+ pages[:publications] = VCR.use_cassette "#{body}/#{body}-publications#{meeting_lang_sfx}" do
54
+ a.get "#{bodyurl_local}/publications"
55
+ end
56
+
57
+ # CIPM
58
+ pages[:recommendations] = VCR.use_cassette "#{body}/#{body}-recommendations#{meeting_lang_sfx}" do
59
+ a.get "#{bodyurl_local}/recommendations"
60
+ rescue Mechanize::ResponseCodeError
61
+ nil
62
+ end
63
+
64
+ # CIPM has outcomes, JCRB has meeting-outcomes
65
+ # As of 2024-12, no other body has this special case.
66
+ outcomes_path = bodyid == :CIPM ? "outcomes" : "meeting-outcomes"
67
+
68
+ pages[:outcomes] = VCR.use_cassette "#{body}/#{body}-outcomes#{meeting_lang_sfx}" do
69
+ a.get "#{bodyurl_local}/#{outcomes_path}"
70
+ rescue Mechanize::ResponseCodeError
71
+ nil
72
+ end
73
+
74
+ meetings = pages[:meetings]
75
+ publications = pages[:publications]
76
+ recommendations = pages[:recommendations]
77
+ outcomes = pages[:outcomes]
57
78
 
58
79
  index = {
59
80
  "meetings" => {"fr" => [], "en" => []},
@@ -116,6 +137,17 @@ bodies.each do |bodyid, bodyurl|
116
137
  res_div.at_css('a').attr('href')
117
138
  end
118
139
 
140
+ resolutions_additional = recommendations&.css(".bipm-resolutions .publications__content")&.map do |res_div|
141
+ href = res_div.at_css('a').attr('href')
142
+
143
+ # bad case of french data...
144
+ href = href.gsub('/106-2017/', '/104-_1-2015/') if href =~ %r"/ci/cipm/106-2017/resolution-[12]\z"
145
+
146
+ href
147
+ end&.select do |href|
148
+ href.include? "/#{ident}/"
149
+ end || []
150
+
119
151
  # A mistake on a website, resolution 2 listed twice...
120
152
  # https://www.bipm.org/fr/committees/ci/cipm/94-2005/
121
153
  if [bodyid, meeting_lang, meeting_id] == [:CIPM, 'fr', '94'] && resolutions.sort.uniq != resolutions.sort
@@ -124,6 +156,8 @@ bodies.each do |bodyid, bodyurl|
124
156
  end
125
157
  end
126
158
 
159
+ resolutions = (resolutions + resolutions_additional).uniq
160
+
127
161
  h["resolutions"] = resolutions.map do |href|
128
162
  href = href.gsub('/web/guest/', "/#{meeting_lang}/")
129
163
  href = href.sub("www.bipm.org/", "www.bipm.org/#{meeting_lang}/") unless href.include? "/#{meeting_lang}/"
@@ -160,7 +194,31 @@ bodies.each do |bodyid, bodyurl|
160
194
 
161
195
  h["metadata"]["workgroup"] = wg if wg
162
196
 
163
- h["resolutions"] = meeting.css('.bipm-decisions .decisions').map do |titletr|
197
+ decisions = meeting.css('.bipm-decisions .decisions')
198
+
199
+ # For some bodies, decisions/outcomes are on a different page altogether.
200
+ # But then we must select only decisions pertaining to our meeting.
201
+ if outcomes
202
+ decisions_additional = outcomes.css('.bipm-decisions .decisions')
203
+
204
+ decisions_additional = decisions_additional.select do |i|
205
+ pass = true if i["data-meeting_key"] == meeting_id
206
+ pass = true if i["data-meeting_key"] == "#{meeting_id}-0" # Some are with a 0, some without
207
+ pass = true if i["data-meeting"] == meeting_id # Some don't have meeting_key set, but have meeting set
208
+
209
+ pass
210
+ end
211
+
212
+ decisions = decisions.to_a + decisions_additional.to_a
213
+
214
+ # duplicates check...
215
+ duplicates = decisions.map{|i|i.at_css('.title-third').text}
216
+ if duplicates != duplicates.uniq
217
+ pp [:duplicates_found, decisions]
218
+ end
219
+ end
220
+
221
+ h["resolutions"] = decisions.map do |titletr|
164
222
  title = titletr.at_css('.title-third').text.strip
165
223
 
166
224
  type = case title
@@ -176,6 +234,9 @@ bodies.each do |bodyid, bodyurl|
176
234
  "decision"
177
235
  end
178
236
 
237
+ categories = titletr.attr('data-decisioncategories')
238
+ categories ||= "[]"
239
+
179
240
  r = {
180
241
  "dates" => [date.to_s],
181
242
  "subject" => bodyid.to_s,
@@ -185,7 +246,7 @@ bodies.each do |bodyid, bodyurl|
185
246
  "url" => meeting.uri.to_s,
186
247
  #TODO: "reference" => meeting.uri.merge(titletr.attr('data-link')).to_s,
187
248
 
188
- "categories" => JSON.parse(titletr.attr('data-decisioncategories')).map(&:strip).uniq,
249
+ "categories" => JSON.parse(categories).map(&:strip).uniq,
189
250
 
190
251
  "considerations" => [],
191
252
  "actions" => [],
@@ -329,6 +390,9 @@ bodies.each do |bodyid, bodyurl|
329
390
  wg = hs.first["metadata"]["workgroup"]
330
391
  if wg
331
392
  fn = "#{BASE_DIR}/#{body}/workgroups/#{wg}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
393
+ elsif body == :cgpm
394
+ # CGPM old script used numbering like 00, 01, ..., 11, ...
395
+ fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{"%02d" % mid}.yml"
332
396
  else
333
397
  fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
334
398
  end
@@ -9,10 +9,10 @@ module Bipm
9
9
  SPACE_AFTER=/(?:\Z|(?= |\s|[,()\/.~"^]))/
10
10
 
11
11
  PREFIXES = /m|c|d|k|M|G|T|/
12
- UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|°C|°F|°K/
12
+ UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|fm|°C|°F|°K/
13
13
 
14
14
  def asciidoc_extract_math str
15
- str.gsub(/\b_(#{MATH}{1,3})_/, 'stem:[\1]')
15
+ str.gsub(/\b_?_(#{MATH}{1,3})_?_/, 'stem:[\1]')
16
16
  .gsub("_,_", ',') # Some mistake in formatting
17
17
  .gsub("^er^", 'ESCUPerESCUP') # French specialities
18
18
  .gsub(/(bar|A) (table|of|key|de|being|full|1)( |,)/, 'ESC\1 \2\3') # A is Ampere, but also a particle, bar is a bar but also a bar
@@ -69,4 +69,4 @@ module Bipm
69
69
  end
70
70
  end
71
71
  end
72
- end
72
+ end
@@ -338,7 +338,7 @@ module Bipm
338
338
 
339
339
  listmarker = nil
340
340
  listitems = []
341
- if (i["message"].split(/(?<!\+)\n/).all? { |j|
341
+ if (i["message"].split(/(?<!\+)\n(?!\+)/).all? { |j|
342
342
  case j
343
343
  when /\A\s*[*_]?#{PREFIX}#{kk}/i
344
344
  true
@@ -3,7 +3,7 @@
3
3
  module Bipm
4
4
  module Data
5
5
  module Importer
6
- VERSION = "0.2.1"
6
+ VERSION = "0.2.2"
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bipm-data-importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-12-08 00:00:00.000000000 Z
11
+ date: 2024-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri