bipm-data-importer 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/bipm-fetch +84 -20
- data/lib/bipm/data/importer/asciimath.rb +3 -3
- data/lib/bipm/data/importer/common.rb +1 -1
- data/lib/bipm/data/importer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 738de7ebf04b508b4a769577c1c7f82eb81e48f1c401e0e7895197cbb33b65c0
|
4
|
+
data.tar.gz: 37d6c10ed703778e8dd4c968ac2a3e174cc6cc937f6ab0bffb1ede6a3448d0c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '06043389fc63d5038a836342accce06736f9fcb68e61aa309ce980ae439f69d66dd79ffec35003c2d6ea121a5d727bb06cb453a4860d1760adedaf52db9c98f7'
|
7
|
+
data.tar.gz: 35e740c67821496b97012c251291bf755b19be2983f43f12129cc94b7ef3494fe8d51858269942ec275cb9e28bdec3f6f4d92049dc0c3bfd06e44ee6ed91f8e5
|
data/exe/bipm-fetch
CHANGED
@@ -29,22 +29,6 @@ bodies.each do |bodyid, bodyurl|
|
|
29
29
|
|
30
30
|
body = bodyid.to_s.downcase.gsub(" ", "-").to_sym
|
31
31
|
|
32
|
-
meetings_en = VCR.use_cassette "#{body}/#{body}-meetings" do
|
33
|
-
a.get "#{bodyurl}/meetings"
|
34
|
-
end
|
35
|
-
|
36
|
-
meetings_fr = VCR.use_cassette "#{body}/#{body}-meetings-fr" do
|
37
|
-
a.get "#{bodyurl.gsub("/en/", "/fr/")}/meetings"
|
38
|
-
end
|
39
|
-
|
40
|
-
publications_en = VCR.use_cassette "#{body}/#{body}-publications" do
|
41
|
-
a.get "#{bodyurl}/publications"
|
42
|
-
end
|
43
|
-
|
44
|
-
publications_fr = VCR.use_cassette "#{body}/#{body}-publications-fr" do
|
45
|
-
a.get "#{bodyurl.gsub("/en/", "/fr/")}/publications"
|
46
|
-
end
|
47
|
-
|
48
32
|
resolutions = {}
|
49
33
|
%w[en fr].each do |meeting_lang|
|
50
34
|
next if ARGV[0] == '--fork' && fork
|
@@ -52,8 +36,45 @@ bodies.each do |bodyid, bodyurl|
|
|
52
36
|
meeting_lang_sfx = (meeting_lang == 'fr') ? "-fr" : ""
|
53
37
|
meeting_lang_sfx_dir = (meeting_lang == 'fr') ? "-fr" : "-en"
|
54
38
|
|
55
|
-
|
56
|
-
|
39
|
+
bodyurl_local = meeting_lang == "en" ? bodyurl : bodyurl.gsub("/en/", "/fr/")
|
40
|
+
|
41
|
+
cassfx = meeting_lang == "en" ? "" : "-fr"
|
42
|
+
|
43
|
+
pages = {}
|
44
|
+
|
45
|
+
pages[:index] = VCR.use_cassette "#{body}/#{body}-index#{meeting_lang_sfx}" do
|
46
|
+
a.get "#{bodyurl_local}"
|
47
|
+
end
|
48
|
+
|
49
|
+
pages[:meetings] = VCR.use_cassette "#{body}/#{body}-meetings#{meeting_lang_sfx}" do
|
50
|
+
a.get "#{bodyurl_local}/meetings"
|
51
|
+
end
|
52
|
+
|
53
|
+
pages[:publications] = VCR.use_cassette "#{body}/#{body}-publications#{meeting_lang_sfx}" do
|
54
|
+
a.get "#{bodyurl_local}/publications"
|
55
|
+
end
|
56
|
+
|
57
|
+
# CIPM
|
58
|
+
pages[:recommendations] = VCR.use_cassette "#{body}/#{body}-recommendations#{meeting_lang_sfx}" do
|
59
|
+
a.get "#{bodyurl_local}/recommendations"
|
60
|
+
rescue Mechanize::ResponseCodeError
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# CIPM has outcomes, JCRB has meeting-outcomes
|
65
|
+
# As of 2024-12, no other body has this special case.
|
66
|
+
outcomes_path = bodyid == :CIPM ? "outcomes" : "meeting-outcomes"
|
67
|
+
|
68
|
+
pages[:outcomes] = VCR.use_cassette "#{body}/#{body}-outcomes#{meeting_lang_sfx}" do
|
69
|
+
a.get "#{bodyurl_local}/#{outcomes_path}"
|
70
|
+
rescue Mechanize::ResponseCodeError
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
|
74
|
+
meetings = pages[:meetings]
|
75
|
+
publications = pages[:publications]
|
76
|
+
recommendations = pages[:recommendations]
|
77
|
+
outcomes = pages[:outcomes]
|
57
78
|
|
58
79
|
index = {
|
59
80
|
"meetings" => {"fr" => [], "en" => []},
|
@@ -116,6 +137,17 @@ bodies.each do |bodyid, bodyurl|
|
|
116
137
|
res_div.at_css('a').attr('href')
|
117
138
|
end
|
118
139
|
|
140
|
+
resolutions_additional = recommendations&.css(".bipm-resolutions .publications__content")&.map do |res_div|
|
141
|
+
href = res_div.at_css('a').attr('href')
|
142
|
+
|
143
|
+
# bad case of french data...
|
144
|
+
href = href.gsub('/106-2017/', '/104-_1-2015/') if href =~ %r"/ci/cipm/106-2017/resolution-[12]\z"
|
145
|
+
|
146
|
+
href
|
147
|
+
end&.select do |href|
|
148
|
+
href.include? "/#{ident}/"
|
149
|
+
end || []
|
150
|
+
|
119
151
|
# A mistake on a website, resolution 2 listed twice...
|
120
152
|
# https://www.bipm.org/fr/committees/ci/cipm/94-2005/
|
121
153
|
if [bodyid, meeting_lang, meeting_id] == [:CIPM, 'fr', '94'] && resolutions.sort.uniq != resolutions.sort
|
@@ -124,6 +156,8 @@ bodies.each do |bodyid, bodyurl|
|
|
124
156
|
end
|
125
157
|
end
|
126
158
|
|
159
|
+
resolutions = (resolutions + resolutions_additional).uniq
|
160
|
+
|
127
161
|
h["resolutions"] = resolutions.map do |href|
|
128
162
|
href = href.gsub('/web/guest/', "/#{meeting_lang}/")
|
129
163
|
href = href.sub("www.bipm.org/", "www.bipm.org/#{meeting_lang}/") unless href.include? "/#{meeting_lang}/"
|
@@ -160,7 +194,31 @@ bodies.each do |bodyid, bodyurl|
|
|
160
194
|
|
161
195
|
h["metadata"]["workgroup"] = wg if wg
|
162
196
|
|
163
|
-
|
197
|
+
decisions = meeting.css('.bipm-decisions .decisions')
|
198
|
+
|
199
|
+
# For some bodies, decisions/outcomes are on a different page altogether.
|
200
|
+
# But then we must select only decisions pertaining to our meeting.
|
201
|
+
if outcomes
|
202
|
+
decisions_additional = outcomes.css('.bipm-decisions .decisions')
|
203
|
+
|
204
|
+
decisions_additional = decisions_additional.select do |i|
|
205
|
+
pass = true if i["data-meeting_key"] == meeting_id
|
206
|
+
pass = true if i["data-meeting_key"] == "#{meeting_id}-0" # Some are with a 0, some without
|
207
|
+
pass = true if i["data-meeting"] == meeting_id # Some don't have meeting_key set, but have meeting set
|
208
|
+
|
209
|
+
pass
|
210
|
+
end
|
211
|
+
|
212
|
+
decisions = decisions.to_a + decisions_additional.to_a
|
213
|
+
|
214
|
+
# duplicates check...
|
215
|
+
duplicates = decisions.map{|i|i.at_css('.title-third').text}
|
216
|
+
if duplicates != duplicates.uniq
|
217
|
+
pp [:duplicates_found, decisions]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
h["resolutions"] = decisions.map do |titletr|
|
164
222
|
title = titletr.at_css('.title-third').text.strip
|
165
223
|
|
166
224
|
type = case title
|
@@ -176,6 +234,9 @@ bodies.each do |bodyid, bodyurl|
|
|
176
234
|
"decision"
|
177
235
|
end
|
178
236
|
|
237
|
+
categories = titletr.attr('data-decisioncategories')
|
238
|
+
categories ||= "[]"
|
239
|
+
|
179
240
|
r = {
|
180
241
|
"dates" => [date.to_s],
|
181
242
|
"subject" => bodyid.to_s,
|
@@ -185,7 +246,7 @@ bodies.each do |bodyid, bodyurl|
|
|
185
246
|
"url" => meeting.uri.to_s,
|
186
247
|
#TODO: "reference" => meeting.uri.merge(titletr.attr('data-link')).to_s,
|
187
248
|
|
188
|
-
"categories" => JSON.parse(
|
249
|
+
"categories" => JSON.parse(categories).map(&:strip).uniq,
|
189
250
|
|
190
251
|
"considerations" => [],
|
191
252
|
"actions" => [],
|
@@ -329,6 +390,9 @@ bodies.each do |bodyid, bodyurl|
|
|
329
390
|
wg = hs.first["metadata"]["workgroup"]
|
330
391
|
if wg
|
331
392
|
fn = "#{BASE_DIR}/#{body}/workgroups/#{wg}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
|
393
|
+
elsif body == :cgpm
|
394
|
+
# CGPM old script used numbering like 00, 01, ..., 11, ...
|
395
|
+
fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{"%02d" % mid}.yml"
|
332
396
|
else
|
333
397
|
fn = "#{BASE_DIR}/#{body}/meetings#{meeting_lang_sfx_dir}/meeting-#{mid}.yml"
|
334
398
|
end
|
@@ -9,10 +9,10 @@ module Bipm
|
|
9
9
|
SPACE_AFTER=/(?:\Z|(?= |\s|[,()\/.~"^]))/
|
10
10
|
|
11
11
|
PREFIXES = /m|c|d|k|M|G|T|/
|
12
|
-
UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|°C|°F|°K/
|
12
|
+
UNITS = /t|m|mol|cal|µ|s|g|W|cd|Hz|J|K|N|V|H|A|C|F|T|Wb|sr|lx|lm|bar|sb|h|rad|fm|°C|°F|°K/
|
13
13
|
|
14
14
|
def asciidoc_extract_math str
|
15
|
-
str.gsub(/\b_(#{MATH}{1,3})_/, 'stem:[\1]')
|
15
|
+
str.gsub(/\b_?_(#{MATH}{1,3})_?_/, 'stem:[\1]')
|
16
16
|
.gsub("_,_", ',') # Some mistake in formatting
|
17
17
|
.gsub("^er^", 'ESCUPerESCUP') # French specialities
|
18
18
|
.gsub(/(bar|A) (table|of|key|de|being|full|1)( |,)/, 'ESC\1 \2\3') # A is Ampere, but also a particle, bar is a bar but also a bar
|
@@ -69,4 +69,4 @@ module Bipm
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
end
|
72
|
-
end
|
72
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bipm-data-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|