bipm-data-importer 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/bipm-fetch +84 -20
- data/lib/bipm/data/importer/asciimath.rb +3 -3
- data/lib/bipm/data/importer/common.rb +1 -1
- data/lib/bipm/data/importer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 738de7ebf04b508b4a769577c1c7f82eb81e48f1c401e0e7895197cbb33b65c0
|
4
|
+
data.tar.gz: 37d6c10ed703778e8dd4c968ac2a3e174cc6cc937f6ab0bffb1ede6a3448d0c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '06043389fc63d5038a836342accce06736f9fcb68e61aa309ce980ae439f69d66dd79ffec35003c2d6ea121a5d727bb06cb453a4860d1760adedaf52db9c98f7'
|
7
|
+
data.tar.gz: 35e740c67821496b97012c251291bf755b19be2983f43f12129cc94b7ef3494fe8d51858269942ec275cb9e28bdec3f6f4d92049dc0c3bfd06e44ee6ed91f8e5
|
data/exe/bipm-fetch
CHANGED
@@ -29,22 +29,6 @@ bodies.each do |bodyid, bodyurl|
|
|
29
29
|
|
30
30
|
body = bodyid.to_s.downcase.gsub(" ", "-").to_sym
|
31
31
|
|
32
|
-
meetings_en = VCR.use_cassette "#{body}/#{body}-meetings" do
|
33
|
-
a.get "#{bodyurl}/meetings"
|
34
|
-
end
|
35
|
-
|
36
|
-
meetings_fr = VCR.use_cassette "#{body}/#{body}-meetings-fr" do
|
37
|
-
a.get "#{bodyurl.gsub("/en/", "/fr/")}/meetings"
|
38
|
-
end
|
39
|
-
|
40
|
-
publications_en = VCR.use_cassette "#{body}/#{body}-publications" do
|
41
|
-
a.get "#{bodyurl}/publications"
|
42
|
-
end
|
43
|
-
|
44
|
-
publications_fr = VCR.use_cassette "#{body}/#{body}-publications-fr" do
|
45
|
-
a.get "#{bodyurl.gsub("/en/", "/fr/")}/publications"
|
46
|
-
end
|
47
|
-
|
48
32
|
resolutions = {}
|
49
33
|
%w[en fr].each do |meeting_lang|
|
50
34
|
next if ARGV[0] == '--fork' && fork
|
@@ -52,8 +36,45 @@ bodies.each do |bodyid, bodyurl|
|
|
52
36
|
meeting_lang_sfx = (meeting_lang == 'fr') ? "-fr" : ""
|
53
37
|
meeting_lang_sfx_dir = (meeting_lang == 'fr') ? "-fr" : "-en"
|
54
38
|
|
55
|
-
|
56
|
-
|
39
|
+
bodyurl_local = meeting_lang == "en" ? bodyurl : bodyurl.gsub("/en/", "/fr/")
|
40
|
+
|
41
|
+
cassfx = meeting_lang == "en" ? "" : "-fr"
|
42
|
+
|
43
|
+
pages = {}
|
44
|
+
|
45
|
+
pages[:index] = VCR.use_cassette "#{body}/#{body}-index#{meeting_lang_sfx}" do
|
46
|
+
a.get "#{bodyurl_local}"
|
47
|
+
end
|
48
|
+
|
49
|
+
pages[:meetings] = VCR.use_cassette "#{body}/#{body}-meetings#{meeting_lang_sfx}" do
|
50
|
+
a.get "#{bodyurl_local}/meetings"
|
51
|
+
end
|
52
|
+
|
53
|
+
pages[:publications] = VCR.use_cassette "#{body}/#{body}-publications#{meeting_lang_sfx}" do
|
54
|
+
a.get "#{bodyurl_local}/publications"
|
55
|
+
end
|
56
|
+
|
57
|
+
# CIPM
|
58
|
+
pages[:recommendations] = VCR.use_cassette "#{body}/#{body}-recommendations#{meeting_lang_sfx}" do
|
59
|
+
a.get "#{bodyurl_local}/recommendations"
|
60
|
+
rescue Mechanize::ResponseCodeError
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# CIPM has outcomes, JCRB has meeting-outcomes
|
65
|
+
# As of 2024-12, no other body has this special case.
|
66
|
+
outcomes_path = bodyid == :CIPM ? "outcomes" : "meeting-outcomes"
|
67
|
+
|
68
|
+
pages[:outcomes] = VCR.use_cassette "#{body}/#{body}-outcomes#{meeting_lang_sfx}" do
|
69
|
+
a.get "#{bodyurl_local}/#{outcomes_path}"
|
70
|
+
rescue Mechanize::ResponseCodeError
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
|
74
|
+
meetings = pages[:meetings]
|
75
|
+
publications = pages[:publications]
|
76
|
+
recommendations = pages[:recommendations]
|
77
|
+
outcomes = pages[:outcomes]
|
57
78
|
|
58
79
|
index = {
|
59
80
|
"meetings" => {"fr" => [], "en" => []},
|
@@ -116,6 +137,17 @@ bodies.each do |bodyid, bodyurl|
|
|
116
137
|
res_div.at_css('a').attr('href')
|
117
138
|
end
|
118
139
|
|
140
|
+
resolutions_additional = recommendations&.css(".bipm-resolutions .publications__content")&.map do |res_div|
|
141
|
+
href = res_div.at_css('a').attr('href')
|
142
|
+
|
143
|
+
# bad case of french data...
|
144
|
+
href = href.gsub('/106-2017/', '/104-_1-2015/') if href =~ %r"/ci/cipm/106-2017/resolution-[12]\z"
|
145
|
+
|
146
|
+
href
|
147
|
+
end&.select do |href|
|
148
|
+
href.include? "/#{ident}/"
|
149
|
+
end || []
|
150
|
+
|
119
151
|
# A mistake on a website, resolution 2 listed twice...
|
120
152
|
# https://www.bipm.org/fr/committees/ci/cipm/94-2005/
|
121
153
|
if [bodyid, meeting_lang, meeting_id] == [:CIPM, 'fr', '94'] && resolutions.sort.uniq != resolutions.sort
|
@@ -124,6 +156,8 @@ bodies.each do |bodyid, bodyurl|
|
|
124
156
|
end
|
125
157
|
end
|
126
158
|
|
159
|
+
resolutions = (resolutions + resolutions_additional).uniq
|
160
|
+
|
127
161
|
h["resolutions"] = resolutions.map do |href|
|
128
162
|
href = href.gsub('/web/guest/', "/#{meeting_lang}/")
|
129
163
|
href = href.sub("www.bipm.org/", "www.bipm.org/#{meeting_lang}/") unless href.include? "/#{meeting_lang}/"
|
@@ -160,7 +194,31 @@ bodies.each do |bodyid, bodyurl|
|
|
160
194
|
|
161
195
|
h["metadata"]["workgroup"] = wg if wg
|
162
196
|
|
163
|
-
|
197
|
+
decisions = meeting.css('.bipm-decisions .decisions')
|
198
|
+
|
199
|
+
# For some bodies, decisions/outcomes are on a different page altogether.
|
200
|
+
# But then we must select only decisions pertaining to our meeting.
|
201
|
+
if outcomes
|
202
|
+
decisions_additional = outcomes.css('.bipm-decisions .decisions')
|
203
|
+
|
204
|
+
decisions_additional = decisions_additional.select do |i|
|
205
|
+
pass = true if i["data-meeting_key"] == meeting_id
|
206
|
+
pass = true if i["data-meeting_key"] == "#{meeting_id}-0" # Some are with a 0, some without
|
207
|
+
pass = true if i["data-meeting"] == meeting_id # Some don't have meeting_key set, but have meeting set
|
208
|
+
|
209
|
+
pass
|
210
|
+
end
|
211
|
+
|
212
|
+
decisions = decisions.to_a + decisions_additional.to_a
|
213
|
+
|
214
|
+
# duplicates check...
|
215
|
+
duplicates = decisions.map{|i|i.at_css('.title-third').text}
|
216
|
+
if duplicates != duplicates.uniq
|
217
|
+
pp [:duplicates_found, decisions]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
h["resolutions"] = decisions.map do |titletr|
|
164
222
|
title = titletr.at_css('.title-third').text.strip
|
165
223
|
|
166
224
|
type = case title
|
@@ -176,6 +234,9 @@ bodies.each do |bodyid, bodyurl|
|
|
176
234
|
"decision"
|
177
235
|
end
|
178
236
|
|
237
|
+
categories = titletr.attr('data-decisioncategories')
|
238
|
+
categories ||= "[]"
|
239
|
+
|
179
240
|
r = {
|
180
241
|
"dates" => [date.to_s],
|
181
242
|
"subject" => bodyid.to_s,
|
@@ -185,7 +246,7 @@ bodies.each do |bodyid, bodyurl|
|
|
185
246
|
"url" => meeting.uri.to_s,
|
186
247
|
#TODO: "reference" => meeting.uri.merge(titletr.attr('data-link')).to_s,
|
187
248
|
|
188
|
-
"categories" => JSON.parse(
|
249
|
+
"categories" => JSON.parse(categories).map(&:strip).uniq,
|
189
250
|
|
190
251
|
"considerations" => [],
|
191
252
|
"actions" => [],
|
@@ -329,6 +390,9 @@ bodies.each do |bodyid, bodyurl|
|
|
329
390
|
wg = hs.first["metadata"]["workgroup"]
|
330
391
|
if wg
|
331
392
|
fn = "#{BASE_DIR}/#{body}/workgroups/#{wg}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
|
393
|
+
elsif body == :cgpm
|
394
|
+
# CGPM old script used numbering like 00, 01, ..., 11, ...
|
395
|
+
fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{"%02d" % mid}.yml"
|
332
396
|
else
|
333
397
|
fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
|
334
398
|
end
|
@@ -9,10 +9,10 @@ module Bipm
|
|
9
9
|
SPACE_AFTER=/(?:\Z|(?= |\s|[,()\/.~"^]))/
|
10
10
|
|
11
11
|
PREFIXES = /m|c|d|k|M|G|T|/
|
12
|
-
UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|°C|°F|°K/
|
12
|
+
UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|fm|°C|°F|°K/
|
13
13
|
|
14
14
|
def asciidoc_extract_math str
|
15
|
-
str.gsub(/\b_(#{MATH}{1,3})_/, 'stem:[\1]')
|
15
|
+
str.gsub(/\b_?_(#{MATH}{1,3})_?_/, 'stem:[\1]')
|
16
16
|
.gsub("_,_", ',') # Some mistake in formatting
|
17
17
|
.gsub("^er^", 'ESCUPerESCUP') # French specialities
|
18
18
|
.gsub(/(bar|A) (table|of|key|de|being|full|1)( |,)/, 'ESC\1 \2\3') # A is Ampere, but also a particle, bar is a bar but also a bar
|
@@ -69,4 +69,4 @@ module Bipm
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
end
|
72
|
-
end
|
72
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bipm-data-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|