relaton-bipm 1.13.11 → 1.13.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +2 -2
- data/lib/relaton_bipm/bipm_bibliographic_item.rb +2 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +16 -13
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +112 -0
- data/lib/relaton_bipm/data_fetcher.rb +4 -410
- data/lib/relaton_bipm/data_outcomes_parser.rb +450 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm/xml_parser.rb +16 -10
- data/lib/relaton_bipm.rb +2 -0
- data/relaton_bipm.gemspec +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54b7b1e3965a37d276f3a5cb7b80b8f3697e5433339a6df730ec5806dcfdce5e
|
4
|
+
data.tar.gz: 9103c18317ab811ac65f19787677a4d5dc3bcbe1480be299b332b15e9a14d2b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e883e2ba880c5c7853a1070bad87dcc09dcfb1417a9be29905e8e201810d5faa12e79bb6176f277cdbc908098c6adc1561b32677968d5feecc7ece98da42034c
|
7
|
+
data.tar.gz: ee38718ab59b03d7bd683365b640da4df602e1d3ef4ec893cbf4d413f6b04402822e9d5ccbcba7dbcc656da50cb0f503fcbe7ef31ada925a992f606715d0ce18
|
data/README.adoc
CHANGED
@@ -77,7 +77,7 @@ Allowed document names are:
|
|
77
77
|
- `{ISSUE}` - number of issue, optional
|
78
78
|
- `{PAGE}` - number of page, optional
|
79
79
|
|
80
|
-
==== Reference structures for CCTF (CCDS),
|
80
|
+
==== Reference structures for CCTF (CCDS), CGPM, CIPM documents
|
81
81
|
|
82
82
|
- `{BODY} {TYPE} {YEAR}-{2_DIGITS_NUMBER}`
|
83
83
|
- `{BODY} {TYPE} {NUMBER} ({YEAR})` or `{BODY} {TYPE} {NUMBER} {YEAR}`
|
@@ -87,7 +87,7 @@ Allowed document names are:
|
|
87
87
|
|
88
88
|
The parts of the structures:
|
89
89
|
|
90
|
-
- `{BODY}` - could be `CCTF` (or old named `CCDS`),
|
90
|
+
- `{BODY}` - could be `CCTF` (or old named `CCDS`), CGPM, or CIMP
|
91
91
|
- `{TYPE}` - could be English word: `Resolution`, `Decision`, `Declaration`, `Recommendation`; or French word: `Résolution`, `Décision`, `Déclaration`, `Recommandation`
|
92
92
|
- `{YEAR}` - year of Resolution/Decision/Declaration/Recommendation
|
93
93
|
- `{NUMBER}` - number of Resolution/Decision/Declaration/Recommendation
|
@@ -27,12 +27,12 @@ module RelatonBipm
|
|
27
27
|
# @param structuredidentifier [RelatonBipm::StructuredIdentifier]
|
28
28
|
def initialize(**args) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
29
29
|
if args[:docstatus] && !STATUSES.include?(args[:docstatus].stage.value)
|
30
|
-
warn "[relaton-bipm] Warning: invalid docstatus: #{args[:docstatus].stage.value}. "\
|
30
|
+
warn "[relaton-bipm] Warning: invalid docstatus: #{args[:docstatus].stage.value}. " \
|
31
31
|
"It should be one of: #{STATUSES}"
|
32
32
|
end
|
33
33
|
|
34
34
|
if args[:si_aspect] && !SI_ASPECTS.include?(args[:si_aspect])
|
35
|
-
warn "[relaton-bipm] Warning: invalid si_aspect: #{args[:si_aspect]}. "\
|
35
|
+
warn "[relaton-bipm] Warning: invalid si_aspect: #{args[:si_aspect]}. " \
|
36
36
|
"It should be one of: #{SI_ASPECTS}"
|
37
37
|
end
|
38
38
|
|
@@ -35,8 +35,8 @@ module RelatonBipm
|
|
35
35
|
def magent # rubocop:disable Metrics/MethodLength
|
36
36
|
a = Mechanize.new
|
37
37
|
a.request_headers = {
|
38
|
-
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,"\
|
39
|
-
"image/avif,image/webp,image/apng,"\
|
38
|
+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
|
39
|
+
"image/avif,image/webp,image/apng," \
|
40
40
|
"*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
41
41
|
"Accept-Encoding" => "gzip, deflate, br",
|
42
42
|
"Accept-Language" => "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
|
@@ -52,12 +52,12 @@ module RelatonBipm
|
|
52
52
|
# @param agent [Mechanize]
|
53
53
|
# @return [RelatonBipm::BipmBibliographicItem]
|
54
54
|
def get_bipm(ref, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
55
|
-
rf = ref.sub(/(?:(\d{1,2})\s)?\(?(\d{4})(?!-)\)?/) do
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
TRANSLATIONS.each { |fr, en| rf.sub! fr, en }
|
60
|
-
path = Index.new.search
|
55
|
+
# rf = ref.sub(/(?:(\d{1,2})\s)?\(?(\d{4})(?!-)\)?/) do
|
56
|
+
# "#{$2}-#{$1.to_s.rjust(2, '0')}"
|
57
|
+
# end
|
58
|
+
ref.sub!("CCDS", "CCTF")
|
59
|
+
# TRANSLATIONS.each { |fr, en| rf.sub! fr, en }
|
60
|
+
path = Index.new.search ref
|
61
61
|
return unless path
|
62
62
|
|
63
63
|
url = "#{GH_ENDPOINT}#{path}"
|
@@ -66,6 +66,7 @@ module RelatonBipm
|
|
66
66
|
return unless resp.code == "200"
|
67
67
|
|
68
68
|
yaml = RelatonBib.parse_yaml resp.body, [Date]
|
69
|
+
yaml["fetched"] = Date.today.to_s
|
69
70
|
bib_hash = HashConverter.hash_to_bib yaml
|
70
71
|
BipmBibliographicItem.new(**bib_hash)
|
71
72
|
end
|
@@ -307,11 +308,13 @@ module RelatonBipm
|
|
307
308
|
|
308
309
|
# @param bibtex [BibTeX::Entry]
|
309
310
|
# @return [Array<Hash>]
|
310
|
-
def btcontrib(bibtex)
|
311
|
-
contribs = [
|
312
|
-
|
313
|
-
|
314
|
-
|
311
|
+
def btcontrib(bibtex) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
312
|
+
contribs = []
|
313
|
+
if bibtex.publisher && !bibtex.publisher.empty?
|
314
|
+
org = RelatonBib::Organization.new name: bibtex.publisher.to_s
|
315
|
+
contribs << { entity: org, role: [{ type: "publisher" }] }
|
316
|
+
end
|
317
|
+
return contribs unless bibtex.author && !bibtex.author.empty?
|
315
318
|
|
316
319
|
bibtex.author.split(" and ").inject(contribs) do |mem, name|
|
317
320
|
cname = RelatonBib::LocalizedString.new name, "en", "Latn"
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
class BipmSiBrochureParser
|
3
|
+
#
|
4
|
+
# Create new parser
|
5
|
+
#
|
6
|
+
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
7
|
+
#
|
8
|
+
def initialize(data_fetcher)
|
9
|
+
@data_fetcher = data_fetcher
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Parse documents from SI brochure dataset and write thems to YAML files
|
14
|
+
#
|
15
|
+
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
16
|
+
#
|
17
|
+
def self.parse(data_fetcher)
|
18
|
+
new(data_fetcher).parse
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Parse SI brochure and write them to YAML files
|
23
|
+
#
|
24
|
+
def parse # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
25
|
+
# puts "Parsing SI brochure..."
|
26
|
+
# puts "Ls #{Dir['*']}"
|
27
|
+
# puts "Ls #{Dir['bipm-si-brochure/*']}"
|
28
|
+
# puts "Ls #{Dir['bipm-si-brochure/site/*']}"
|
29
|
+
# puts "Ls #{Dir['bipm-si-brochure/site/documents/*']}"
|
30
|
+
Dir["bipm-si-brochure/site/documents/*.rxl"].each do |f|
|
31
|
+
puts "Parsing #{f}"
|
32
|
+
docstd = Nokogiri::XML File.read f
|
33
|
+
doc = docstd.at "/bibdata"
|
34
|
+
hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
|
35
|
+
fix_si_brochure_id hash1
|
36
|
+
outfile = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, ".yaml")
|
37
|
+
@data_fetcher.index[[hash1["docnumber"] || File.basename(outfile, ".yaml")]] = outfile
|
38
|
+
hash = if File.exist? outfile
|
39
|
+
warn_duplicate = false
|
40
|
+
hash2 = YAML.load_file outfile
|
41
|
+
fix_si_brochure_id hash2
|
42
|
+
deep_merge hash1, hash2
|
43
|
+
else
|
44
|
+
warn_duplicate = true
|
45
|
+
hash1
|
46
|
+
end
|
47
|
+
item = RelatonBipm::BipmBibliographicItem.from_hash(**hash)
|
48
|
+
@data_fetcher.write_file outfile, item, warn_duplicate: warn_duplicate
|
49
|
+
puts "Saved to #{outfile}"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Update ID of SI brochure
|
55
|
+
#
|
56
|
+
# @param [Hash] hash hash of bibitem
|
57
|
+
#
|
58
|
+
# @return [void]
|
59
|
+
#
|
60
|
+
def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
61
|
+
did = hash["docid"].detect { |id| id["type"] == "BIPM" }
|
62
|
+
did["primary"] = true
|
63
|
+
return unless did["id"] == "BIPM Brochure"
|
64
|
+
|
65
|
+
isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
|
66
|
+
num = if isbn && isbn["id"] == "978-92-822-2272-0"
|
67
|
+
"SI Brochure"
|
68
|
+
else
|
69
|
+
"SI Brochure, Appendix 4"
|
70
|
+
end
|
71
|
+
hash["id"] = hash["id"].sub(/(?<=^BIPM)Brochure$/i, num.gsub(/[,\s]/, ""))
|
72
|
+
hash["docnumber"] = hash["docnumber"].sub(/^Brochure$/i, num)
|
73
|
+
did["id"] = did["id"].sub(/(?<=^BIPM\s)Brochure$/i, num)
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Deep merge two hashes
|
78
|
+
#
|
79
|
+
# @param [Hash] hash1
|
80
|
+
# @param [Hash] hash2
|
81
|
+
#
|
82
|
+
# @return [Hash] Merged hash
|
83
|
+
#
|
84
|
+
def deep_merge(hash1, hash2) # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
85
|
+
hash1.merge(hash2) do |_, oldval, newval|
|
86
|
+
if oldval.is_a?(Hash) && newval.is_a?(Hash)
|
87
|
+
deep_merge(oldval, newval)
|
88
|
+
elsif oldval.is_a?(Array) && newval.is_a?(Array)
|
89
|
+
(oldval + newval).uniq { |i| downcase_all i }
|
90
|
+
else
|
91
|
+
newval || oldval
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Downcase all values in hash or array
|
98
|
+
#
|
99
|
+
# @param [Array, Hash, String] content hash, array or string
|
100
|
+
#
|
101
|
+
# @return [Array, Hash, String] hash, array or string with downcased values
|
102
|
+
#
|
103
|
+
def downcase_all(content)
|
104
|
+
case content
|
105
|
+
when Hash then content.transform_values { |v| downcase_all v }
|
106
|
+
when Array then content.map { |v| downcase_all v }
|
107
|
+
when String then content.downcase
|
108
|
+
else content
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module RelatonBipm
|
2
2
|
class DataFetcher
|
3
|
+
attr_reader :output, :format, :ext, :files, :index
|
4
|
+
|
3
5
|
#
|
4
6
|
# Initialize fetcher
|
5
7
|
#
|
@@ -39,420 +41,12 @@ module RelatonBipm
|
|
39
41
|
#
|
40
42
|
def fetch(source)
|
41
43
|
case source
|
42
|
-
when "bipm-data-outcomes" then
|
43
|
-
when "bipm-si-brochure" then
|
44
|
+
when "bipm-data-outcomes" then DataOutcomesParser.parse(self)
|
45
|
+
when "bipm-si-brochure" then BipmSiBrochureParser.parse(self)
|
44
46
|
end
|
45
47
|
File.write @index_path, @index.to_yaml, encoding: "UTF-8"
|
46
48
|
end
|
47
49
|
|
48
|
-
#
|
49
|
-
# Parse BIPM meeting and write them to YAML files
|
50
|
-
#
|
51
|
-
def parse_bipm_data_outcomes
|
52
|
-
source_path = File.join "bipm-data-outcomes", "{cctf,cgpm,cipm}"
|
53
|
-
Dir[source_path].each { |body_dir| fetch_body(body_dir) }
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
# Parse SI brochure and write them to YAML files
|
58
|
-
#
|
59
|
-
def parse_si_brochure # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
60
|
-
# puts "Parsing SI brochure..."
|
61
|
-
# puts "Ls #{Dir['*']}"
|
62
|
-
# puts "Ls #{Dir['bipm-si-brochure/*']}"
|
63
|
-
# puts "Ls #{Dir['bipm-si-brochure/site/*']}"
|
64
|
-
# puts "Ls #{Dir['bipm-si-brochure/site/documents/*']}"
|
65
|
-
Dir["bipm-si-brochure/site/documents/*.rxl"].each do |f|
|
66
|
-
puts "Parsing #{f}"
|
67
|
-
docstd = Nokogiri::XML File.read f
|
68
|
-
doc = docstd.at "/bibdata"
|
69
|
-
hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
|
70
|
-
fix_si_brochure_id hash1
|
71
|
-
outfile = File.join @output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, ".yaml")
|
72
|
-
@index[[hash1["docnumber"] || File.basename(outfile, ".yaml")]] = outfile
|
73
|
-
hash = if File.exist? outfile
|
74
|
-
warn_duplicate = false
|
75
|
-
hash2 = YAML.load_file outfile
|
76
|
-
fix_si_brochure_id hash2
|
77
|
-
deep_merge hash1, hash2
|
78
|
-
else
|
79
|
-
warn_duplicate = true
|
80
|
-
hash1
|
81
|
-
end
|
82
|
-
item = RelatonBipm::BipmBibliographicItem.from_hash(**hash)
|
83
|
-
write_file outfile, item, warn_duplicate: warn_duplicate
|
84
|
-
puts "Saved to #{outfile}"
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
#
|
89
|
-
# Update ID of SI brochure
|
90
|
-
#
|
91
|
-
# @param [Hash] hash hash of bibitem
|
92
|
-
#
|
93
|
-
# @return [void]
|
94
|
-
#
|
95
|
-
def fix_si_brochure_id(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
96
|
-
did = hash["docid"].detect { |id| id["type"] == "BIPM" }
|
97
|
-
did["primary"] = true
|
98
|
-
return unless did["id"] == "BIPM Brochure"
|
99
|
-
|
100
|
-
isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
|
101
|
-
num = if isbn && isbn["id"] == "978-92-822-2272-0"
|
102
|
-
"SI Brochure"
|
103
|
-
else
|
104
|
-
"SI Brochure, Appendix 4"
|
105
|
-
end
|
106
|
-
hash["id"] = hash["id"].sub(/(?<=^BIPM)Brochure$/i, num.gsub(/[,\s]/, ""))
|
107
|
-
hash["docnumber"] = hash["docnumber"].sub(/^Brochure$/i, num)
|
108
|
-
did["id"] = did["id"].sub(/(?<=^BIPM\s)Brochure$/i, num)
|
109
|
-
end
|
110
|
-
|
111
|
-
#
|
112
|
-
# Deep merge two hashes
|
113
|
-
#
|
114
|
-
# @param [Hash] hash1
|
115
|
-
# @param [Hash] hash2
|
116
|
-
#
|
117
|
-
# @return [Hash] Merged hash
|
118
|
-
#
|
119
|
-
def deep_merge(hash1, hash2) # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
120
|
-
hash1.merge(hash2) do |_, oldval, newval|
|
121
|
-
if oldval.is_a?(Hash) && newval.is_a?(Hash)
|
122
|
-
deep_merge(oldval, newval)
|
123
|
-
elsif oldval.is_a?(Array) && newval.is_a?(Array)
|
124
|
-
oldval.concat(newval).uniq { |i| downcase_all i }
|
125
|
-
# oldval | newval
|
126
|
-
else
|
127
|
-
newval || oldval
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def downcase_all(content)
|
133
|
-
case content
|
134
|
-
when Hash then content.transform_values { |v| downcase_all v }
|
135
|
-
when Array then content.map { |v| downcase_all v }
|
136
|
-
when String then content.downcase
|
137
|
-
else content
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
#
|
142
|
-
# Search for English meetings in the body directory
|
143
|
-
#
|
144
|
-
# @param [String] dir body directory
|
145
|
-
#
|
146
|
-
def fetch_body(dir)
|
147
|
-
body = dir.split("/").last.upcase
|
148
|
-
Dir[File.join(dir, "*-en")].each { |type_dir| fetch_type type_dir, body }
|
149
|
-
end
|
150
|
-
|
151
|
-
#
|
152
|
-
# Search for meetings
|
153
|
-
#
|
154
|
-
# @param [String] dir meeting directory
|
155
|
-
# @param [String] body name of body
|
156
|
-
#
|
157
|
-
def fetch_type(dir, body) # rubocop:disable Metrics/AbcSize
|
158
|
-
type = dir.split("/").last.split("-").first.sub(/s$/, "")
|
159
|
-
body_dir = File.join @output, body.downcase
|
160
|
-
FileUtils.mkdir_p body_dir
|
161
|
-
outdir = File.join body_dir, type.downcase
|
162
|
-
FileUtils.mkdir_p outdir
|
163
|
-
Dir[File.join(dir, "*.{yml,yaml}")].each { |en_file| fetch_meeting en_file, body, type, outdir }
|
164
|
-
end
|
165
|
-
|
166
|
-
#
|
167
|
-
# Create and write BIPM meeting/resolution
|
168
|
-
#
|
169
|
-
# @param [String] en_file Path to English file
|
170
|
-
# @param [String] body Body name
|
171
|
-
# @param [String] type Type of Recommendation/Decision/Resolution
|
172
|
-
# @param [String] dir output directory
|
173
|
-
#
|
174
|
-
def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
175
|
-
en = RelatonBib.parse_yaml File.read(en_file, encoding: "UTF-8"), [Date]
|
176
|
-
en_md = en["metadata"]
|
177
|
-
fr_file = en_file.sub "en", "fr"
|
178
|
-
fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
|
179
|
-
fr_md = fr["metadata"]
|
180
|
-
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
181
|
-
src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
|
182
|
-
src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
|
183
|
-
src = [
|
184
|
-
{ type: "src", content: src_en, language: "en", script: "Latn" },
|
185
|
-
{ type: "src", content: src_fr, language: "fr", script: "Latn" },
|
186
|
-
]
|
187
|
-
|
188
|
-
/^(?<num>\d+)(?:-_(?<part>\d+))?-\d{4}$/ =~ en_md["url"].split("/").last
|
189
|
-
id = "#{body} #{type.capitalize} #{num}"
|
190
|
-
file = "#{num}.yaml"
|
191
|
-
path = File.join dir, file
|
192
|
-
link = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/master/#{path}"
|
193
|
-
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, id: id, num: num, src: src, pdf: en["pdf"]
|
194
|
-
if @files.include?(path) && part
|
195
|
-
add_part hash, part
|
196
|
-
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
197
|
-
yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
|
198
|
-
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
199
|
-
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
200
|
-
write_file path, has_part_item, warn_duplicate: false
|
201
|
-
path = File.join dir, "#{num}-#{part}.yaml"
|
202
|
-
elsif part
|
203
|
-
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
204
|
-
# hash[:link] = [{ type: "src", content: link }]
|
205
|
-
h = bibitem body: body, type: type, en: en_md, fr: fr_md, id: id, num: num, src: src, pdf: en["pdf"]
|
206
|
-
add_part h, part
|
207
|
-
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
208
|
-
part_item_path = File.join dir, "#{num}-#{part}.yaml"
|
209
|
-
write_file part_item_path, part_item
|
210
|
-
@index[[h[:docnumber]]] = part_item_path
|
211
|
-
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
212
|
-
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
213
|
-
else
|
214
|
-
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
215
|
-
end
|
216
|
-
write_file path, item
|
217
|
-
@index[[hash[:docnumber]]] = path
|
218
|
-
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
219
|
-
end
|
220
|
-
|
221
|
-
#
|
222
|
-
# Parse BIPM resolutions and write them to YAML files
|
223
|
-
#
|
224
|
-
# @param [String] body body name
|
225
|
-
# @param [Hash] eng English metadata
|
226
|
-
# @param [Hash] frn French metadata
|
227
|
-
# @param [String] dir output directory
|
228
|
-
# @param [Array<Hash>] src links to bipm-data-outcomes
|
229
|
-
# @param [String] num number of meeting
|
230
|
-
#
|
231
|
-
def fetch_resolution(**args) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
232
|
-
args[:en]["resolutions"].each.with_index do |r, i| # rubocop:disable Metrics/BlockLength
|
233
|
-
hash = {
|
234
|
-
type: "proceedings", title: [],
|
235
|
-
doctype: r["type"], place: [RelatonBib::Place.new(city: "Paris")]
|
236
|
-
}
|
237
|
-
hash[:title] << title(r["title"], "en") if r["title"]
|
238
|
-
fr_r = args[:fr]["resolutions"].fetch(i, nil)
|
239
|
-
hash[:link] = [{ type: "citation", content: r["url"], language: "en", script: "Latn" }]
|
240
|
-
if fr_r
|
241
|
-
hash[:title] << title(fr_r["title"], "fr") if fr_r["title"]
|
242
|
-
hash[:link] << { type: "citation", content: fr_r["url"], language: "fr", script: "Latn" }
|
243
|
-
end
|
244
|
-
hash[:link] += args[:src]
|
245
|
-
hash[:link] << { type: "pdf", content: r["reference"] } if r["reference"]
|
246
|
-
date = r["dates"].first.to_s
|
247
|
-
hash[:date] = [{ type: "published", on: date }]
|
248
|
-
num = r["identifier"].to_s.split("-").last
|
249
|
-
year = date.split("-").first
|
250
|
-
num = "0" if num == year
|
251
|
-
num_justed = num.rjust 2, "0"
|
252
|
-
type = r["type"].capitalize
|
253
|
-
id = "#{args[:body]} #{type}"
|
254
|
-
hash[:id] = "#{args[:body]}-#{type}-#{year}"
|
255
|
-
if num.to_i.positive?
|
256
|
-
id += " #{num}"
|
257
|
-
hash[:id] += "-#{num_justed}"
|
258
|
-
end
|
259
|
-
id += " (#{year})"
|
260
|
-
hash[:docid] = [
|
261
|
-
make_docid(id: id, type: "BIPM", primary: true),
|
262
|
-
make_docid(id: id, type: "BIPM", primary: true, language: "en", script: "Latn"),
|
263
|
-
id_fr(id),
|
264
|
-
]
|
265
|
-
hash[:docnumber] = id
|
266
|
-
hash[:language] = %w[en fr]
|
267
|
-
hash[:script] = ["Latn"]
|
268
|
-
hash[:contributor] = contributors date, args[:body]
|
269
|
-
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
270
|
-
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
271
|
-
file = year
|
272
|
-
file += "-#{num_justed}" if num.size < 4
|
273
|
-
file += ".yaml"
|
274
|
-
out_dir = File.join args[:dir], r["type"].downcase
|
275
|
-
FileUtils.mkdir_p out_dir
|
276
|
-
path = File.join out_dir, file
|
277
|
-
write_file path, item
|
278
|
-
@index[["#{args[:body]} #{type} #{year}-#{num_justed}", "#{args[:body]} #{type} #{args[:num]}-#{num_justed}"]] = path
|
279
|
-
end
|
280
|
-
end
|
281
|
-
|
282
|
-
#
|
283
|
-
# Create contributors
|
284
|
-
#
|
285
|
-
# @param [Strign] date date of publication
|
286
|
-
# @param [Strign] body organization abbreviation (CCTF, CIPM, CGPM)
|
287
|
-
#
|
288
|
-
# @return [Array<Hash>] contributors
|
289
|
-
#
|
290
|
-
def contributors(date, body) # rubocop:disable Metrics/MethodLength
|
291
|
-
case body
|
292
|
-
when "CCTF" then cctf_org date
|
293
|
-
when "CGPM" then cgpm_org
|
294
|
-
when "CIPM" then cipm_org
|
295
|
-
else []
|
296
|
-
end.reduce(
|
297
|
-
[{ entity: {
|
298
|
-
url: "www.bipm.org",
|
299
|
-
name: "Bureau International des Poids et Mesures",
|
300
|
-
abbreviation: "BIPM",
|
301
|
-
},
|
302
|
-
role: [{ type: "publisher" }] }],
|
303
|
-
) { |a, e| a << { entity: e, role: [{ type: "author" }] } }
|
304
|
-
end
|
305
|
-
|
306
|
-
#
|
307
|
-
# Create CCTF organization
|
308
|
-
#
|
309
|
-
# @param [String] date date of meeting
|
310
|
-
#
|
311
|
-
# @return [Array<Hash>] CCTF organization
|
312
|
-
#
|
313
|
-
def cctf_org(date) # rubocop:disable Metrics/MethodLength
|
314
|
-
if Date.parse(date).year < 1999
|
315
|
-
nms = [
|
316
|
-
{ content: "Consultative Committee for the Definition of the Second", language: "en" },
|
317
|
-
{ content: "Comité Consultatif pour la Définition de la Seconde", language: "fr" },
|
318
|
-
]
|
319
|
-
organization nms, "CCDS"
|
320
|
-
else
|
321
|
-
nms = [
|
322
|
-
{ content: "Consultative Committee for Time and Frequency", language: "en" },
|
323
|
-
{ content: "Comité consultatif du temps et des fréquences", language: "fr" },
|
324
|
-
]
|
325
|
-
organization nms, "CCTF"
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
#
|
330
|
-
# Create organization
|
331
|
-
#
|
332
|
-
# @param [Array<Hash>] names organization names in different languages
|
333
|
-
# @param [String] abbr abbreviation
|
334
|
-
#
|
335
|
-
# @return [Array<Hash>] organization
|
336
|
-
#
|
337
|
-
def organization(names, abbr)
|
338
|
-
names.each { |ctrb| ctrb[:script] = "Latn" }
|
339
|
-
[{ name: names, abbreviation: { content: abbr, language: ["en", "fr"], script: "Latn" } }]
|
340
|
-
end
|
341
|
-
|
342
|
-
#
|
343
|
-
# Create CGPM organization
|
344
|
-
#
|
345
|
-
# @return [Array<Hash>] CGPM organization
|
346
|
-
#
|
347
|
-
def cgpm_org
|
348
|
-
nms = [
|
349
|
-
{ content: "General Conference on Weights and Measures", language: "en" },
|
350
|
-
{ content: "Conférence Générale des Poids et Mesures", language: "fr" },
|
351
|
-
]
|
352
|
-
organization nms, "CGPM"
|
353
|
-
end
|
354
|
-
|
355
|
-
#
|
356
|
-
# Create CIPM organization
|
357
|
-
#
|
358
|
-
# @return [Array<Hash>] CIPM organization
|
359
|
-
#
|
360
|
-
def cipm_org
|
361
|
-
names = [
|
362
|
-
{ content: "International Committee for Weights and Measures", language: "en" },
|
363
|
-
{ content: "Comité International des Poids et Mesures", language: "fr" },
|
364
|
-
]
|
365
|
-
organization names, "CIPM"
|
366
|
-
end
|
367
|
-
|
368
|
-
#
|
369
|
-
# Create a title
|
370
|
-
#
|
371
|
-
# @param [String] content title content
|
372
|
-
# @param [String] language language code (en, fr)
|
373
|
-
#
|
374
|
-
# @return [Hash] title
|
375
|
-
#
|
376
|
-
def title(content, language)
|
377
|
-
{ content: content, language: language, script: "Latn" }
|
378
|
-
end
|
379
|
-
|
380
|
-
#
|
381
|
-
# Add part to ID and structured identifier
|
382
|
-
#
|
383
|
-
# @param [Hash] hash Hash of BIPM meeting
|
384
|
-
# @param [String] session number of meeting
|
385
|
-
#
|
386
|
-
def add_part(hash, part)
|
387
|
-
hash[:id] += "-#{part}"
|
388
|
-
hash[:docnumber] += "-#{part}"
|
389
|
-
id = hash[:docid][0].instance_variable_get(:@id)
|
390
|
-
id += "-#{part}"
|
391
|
-
hash[:docid][0].instance_variable_set(:@id, id)
|
392
|
-
hash[:structuredidentifier].instance_variable_set :@part, part
|
393
|
-
end
|
394
|
-
|
395
|
-
#
|
396
|
-
# Create hash from BIPM meeting/resolution
|
397
|
-
#
|
398
|
-
# @param [Hash] **args Hash of arguments
|
399
|
-
# @option args [String] :type Type of meeting/resolution
|
400
|
-
# @option args [Hash] :en Hash of English metadata
|
401
|
-
# @option args [Hash] :fr Hash of French metadata
|
402
|
-
# @option args [String] :id ID of meeting/resolution
|
403
|
-
# @option args [String] :num Number of meeting/resolution
|
404
|
-
# @option args [Array<Hash>] :src Array of links to bipm-data-outcomes
|
405
|
-
# @option args [String] :pdf link to PDF
|
406
|
-
#
|
407
|
-
# @return [Hash] Hash of BIPM meeting/resolution
|
408
|
-
#
|
409
|
-
def bibitem(**args) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
410
|
-
hash = { title: [], type: "proceedings", doctype: args[:type],
|
411
|
-
place: [RelatonBib::Place.new(city: "Paris")] }
|
412
|
-
hash[:title] << title(args[:en]["title"], "en") if args[:en]["title"]
|
413
|
-
hash[:title] << title(args[:fr]["title"], "fr") if args[:fr]["title"]
|
414
|
-
hash[:date] = [{ type: "published", on: args[:en]["date"] }]
|
415
|
-
hash[:docid] = [
|
416
|
-
make_docid(id: args[:id], type: "BIPM", primary: true),
|
417
|
-
make_docid(id: args[:id], type: "BIPM", primary: true, language: "en", script: "Latn"),
|
418
|
-
id_fr(args[:id]),
|
419
|
-
]
|
420
|
-
hash[:id] = args[:id].gsub " ", "-"
|
421
|
-
hash[:docnumber] = args[:id]
|
422
|
-
hash[:link] = [
|
423
|
-
{ type: "citation", content: args[:en]["url"], language: "en", script: "Latn" },
|
424
|
-
{ type: "citation", content: args[:fr]["url"], language: "fr", script: "Latn" },
|
425
|
-
]
|
426
|
-
RelatonBib.array(args[:pdf]).each { |pdf| hash[:link] << { type: "pdf", content: pdf } }
|
427
|
-
hash[:link] += args[:src] if args[:src]&.any?
|
428
|
-
hash[:language] = %w[en fr]
|
429
|
-
hash[:script] = ["Latn"]
|
430
|
-
hash[:contributor] = contributors args[:en]["date"], args[:body]
|
431
|
-
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: args[:num]
|
432
|
-
hash
|
433
|
-
end
|
434
|
-
|
435
|
-
def id_fr(en_id)
|
436
|
-
tr = BipmBibliography::TRANSLATIONS.detect { |_, v| en_id.include? v }
|
437
|
-
id = en_id.sub tr[1], tr[0]
|
438
|
-
make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
|
439
|
-
end
|
440
|
-
|
441
|
-
#
|
442
|
-
# Create doucment ID
|
443
|
-
#
|
444
|
-
# @param [String] id ID of document
|
445
|
-
# @param [String] type Type of document
|
446
|
-
# @param [Boolean] primary Primary document
|
447
|
-
# @param [String] language Language of document
|
448
|
-
# @param [String] script Script of document
|
449
|
-
#
|
450
|
-
# @return [RelatonBib::DocumentIdentifier] Document ID
|
451
|
-
#
|
452
|
-
def make_docid(**args)
|
453
|
-
RelatonBib::DocumentIdentifier.new(**args)
|
454
|
-
end
|
455
|
-
|
456
50
|
#
|
457
51
|
# Save document to file
|
458
52
|
#
|
@@ -0,0 +1,450 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
class DataOutcomesParser
|
3
|
+
TYPEABBREV = {
|
4
|
+
"Resolution" => "RES",
|
5
|
+
"Recommendation" => "REC",
|
6
|
+
"Decision" => "DECN",
|
7
|
+
"Statement" => "DECL",
|
8
|
+
}.freeze
|
9
|
+
|
10
|
+
#
|
11
|
+
# Create data-outcomes parser
|
12
|
+
#
|
13
|
+
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
14
|
+
#
|
15
|
+
def initialize(data_fetcher)
|
16
|
+
@data_fetcher = data_fetcher
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Parse documents from data-outcomes dataset and write them to YAML files
|
21
|
+
#
|
22
|
+
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
23
|
+
#
|
24
|
+
def self.parse(data_fetcher)
|
25
|
+
new(data_fetcher).parse
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Parse BIPM meeting and write them to YAML files
|
30
|
+
#
|
31
|
+
def parse
|
32
|
+
dirs = "cctf,cgpm,cipm,ccauv,ccem,ccl,ccm,ccpr,ccqm,ccri,cct,ccu,jcgm,jcrb"
|
33
|
+
source_path = File.join "bipm-data-outcomes", "{#{dirs}}"
|
34
|
+
Dir[source_path].each { |body_dir| fetch_body(body_dir) }
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Search for English meetings in the body directory
|
39
|
+
#
|
40
|
+
# @param [String] dir body directory
|
41
|
+
#
|
42
|
+
def fetch_body(dir)
|
43
|
+
body = dir.split("/").last.upcase
|
44
|
+
Dir[File.join(dir, "*-en")].each { |type_dir| fetch_type type_dir, body }
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Search for meetings
|
49
|
+
#
|
50
|
+
# @param [String] dir meeting directory
|
51
|
+
# @param [String] body name of body
|
52
|
+
#
|
53
|
+
def fetch_type(dir, body) # rubocop:disable Metrics/AbcSize
|
54
|
+
type = dir.split("/").last.split("-").first.sub(/s$/, "")
|
55
|
+
body_dir = File.join @data_fetcher.output, body.downcase
|
56
|
+
FileUtils.mkdir_p body_dir
|
57
|
+
outdir = File.join body_dir, type.downcase
|
58
|
+
FileUtils.mkdir_p outdir
|
59
|
+
Dir[File.join(dir, "*.{yml,yaml}")].each { |en_file| fetch_meeting en_file, body, type, outdir }
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Create and write BIPM meeting/resolution
|
64
|
+
#
|
65
|
+
# @param [String] en_file Path to English file
|
66
|
+
# @param [String] body Body name
|
67
|
+
# @param [String] type Type of Recommendation/Decision/Resolution
|
68
|
+
# @param [String] dir output directory
|
69
|
+
#
|
70
|
+
def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
71
|
+
en = RelatonBib.parse_yaml File.read(en_file, encoding: "UTF-8"), [Date]
|
72
|
+
en_md = en["metadata"]
|
73
|
+
fr_file = en_file.sub "en", "fr"
|
74
|
+
fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
|
75
|
+
fr_md = fr["metadata"]
|
76
|
+
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
77
|
+
src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
|
78
|
+
src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
|
79
|
+
src = [
|
80
|
+
{ type: "src", content: src_en, language: "en", script: "Latn" },
|
81
|
+
{ type: "src", content: src_fr, language: "fr", script: "Latn" },
|
82
|
+
]
|
83
|
+
|
84
|
+
/^(?<num>\d+)(?:-_(?<part>\d+))?-\d{4}$/ =~ en_md["url"].split("/").last
|
85
|
+
file = "#{num}.yaml"
|
86
|
+
path = File.join dir, file
|
87
|
+
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
88
|
+
if @data_fetcher.files.include?(path) && part
|
89
|
+
add_part hash, part
|
90
|
+
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
91
|
+
yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
|
92
|
+
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
93
|
+
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
94
|
+
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
95
|
+
path = File.join dir, "#{num}-#{part}.yaml"
|
96
|
+
elsif part
|
97
|
+
hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") }
|
98
|
+
h = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
99
|
+
add_part h, part
|
100
|
+
part_item = RelatonBipm::BipmBibliographicItem.new(**h)
|
101
|
+
part_item_path = File.join dir, "#{num}-#{part}.yaml"
|
102
|
+
@data_fetcher.write_file part_item_path, part_item
|
103
|
+
add_to_index part_item, part_item_path
|
104
|
+
hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)]
|
105
|
+
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
106
|
+
else
|
107
|
+
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
108
|
+
end
|
109
|
+
@data_fetcher.write_file path, item
|
110
|
+
add_to_index item, path
|
111
|
+
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Parse BIPM resolutions and write them to YAML files
|
116
|
+
#
|
117
|
+
# @param [String] body body name
|
118
|
+
# @param [Hash] eng English metadata
|
119
|
+
# @param [Hash] frn French metadata
|
120
|
+
# @param [String] dir output directory
|
121
|
+
# @param [Array<Hash>] src links to bipm-data-outcomes
|
122
|
+
# @param [String] num number of meeting
|
123
|
+
#
|
124
|
+
def fetch_resolution(**args) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
125
|
+
args[:en]["resolutions"].each.with_index do |r, i| # rubocop:disable Metrics/BlockLength
|
126
|
+
hash = {
|
127
|
+
type: "proceedings", title: [],
|
128
|
+
doctype: r["type"], place: [RelatonBib::Place.new(city: "Paris")]
|
129
|
+
}
|
130
|
+
hash[:title] << title(r["title"], "en") if r["title"]
|
131
|
+
fr_r = args[:fr]["resolutions"].fetch(i, nil)
|
132
|
+
hash[:link] = [{ type: "citation", content: r["url"], language: "en", script: "Latn" }]
|
133
|
+
if fr_r
|
134
|
+
hash[:title] << title(fr_r["title"], "fr") if fr_r["title"]
|
135
|
+
hash[:link] << { type: "citation", content: fr_r["url"], language: "fr", script: "Latn" }
|
136
|
+
end
|
137
|
+
hash[:link] += args[:src]
|
138
|
+
hash[:link] << { type: "pdf", content: r["reference"] } if r["reference"]
|
139
|
+
date = r["dates"].first.to_s
|
140
|
+
hash[:date] = [{ type: "published", on: date }]
|
141
|
+
num = r["identifier"].to_s # .split("-").last
|
142
|
+
year = date.split("-").first
|
143
|
+
num = "0" if num == year
|
144
|
+
num_justed = num.rjust 2, "0"
|
145
|
+
type = r["type"].capitalize
|
146
|
+
docnum = create_docnum args[:body], type, num, date
|
147
|
+
hash[:id] = create_id(args[:body], type, num_justed, date)
|
148
|
+
hash[:docid] = create_docids docnum
|
149
|
+
hash[:docnumber] = docnum
|
150
|
+
hash[:language] = %w[en fr]
|
151
|
+
hash[:script] = ["Latn"]
|
152
|
+
hash[:contributor] = contributors date, args[:body]
|
153
|
+
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num
|
154
|
+
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
155
|
+
file = year
|
156
|
+
file += "-#{num_justed}" # if num.size < 4
|
157
|
+
file += ".yaml"
|
158
|
+
out_dir = File.join args[:dir], r["type"].downcase
|
159
|
+
FileUtils.mkdir_p out_dir
|
160
|
+
path = File.join out_dir, file
|
161
|
+
@data_fetcher.write_file path, item
|
162
|
+
add_to_index item, path
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
#
|
167
|
+
# Add item to index
|
168
|
+
#
|
169
|
+
# @param [RelatonBipm::BipmBibliographicItem] item bibliographic item
|
170
|
+
# @param [String] path path to YAML file
|
171
|
+
#
|
172
|
+
def add_to_index(item, path) # rubocop:disable Metrics/AbcSize
|
173
|
+
key = [item.docnumber]
|
174
|
+
TYPEABBREV.each do |k, v|
|
175
|
+
if item.docnumber.include? k
|
176
|
+
key << item.docnumber.sub(k, v).sub(/(\(\d{4})(\))/, "\\1, EN\\2")
|
177
|
+
key << item.docnumber.sub(k, v).sub(/(\(\d{4})(\))/, "\\1, FR\\2")
|
178
|
+
break
|
179
|
+
end
|
180
|
+
end
|
181
|
+
key << item.docidentifier.detect { |i| i.language == "fr" }.id
|
182
|
+
@data_fetcher.index[key] = path
|
183
|
+
end
|
184
|
+
|
185
|
+
#
|
186
|
+
# Create contributors
|
187
|
+
#
|
188
|
+
# @param [Strign] date date of publication
|
189
|
+
# @param [Strign] body organization abbreviation (CCTF, CIPM, CGPM)
|
190
|
+
#
|
191
|
+
# @return [Array<Hash>] contributors
|
192
|
+
#
|
193
|
+
def contributors(date, body) # rubocop:disable Metrics/MethodLength
|
194
|
+
case body
|
195
|
+
when "CCTF" then cctf_org date
|
196
|
+
when "CGPM" then cgpm_org
|
197
|
+
when "CIPM" then cipm_org
|
198
|
+
else []
|
199
|
+
end.reduce(
|
200
|
+
[{ entity: {
|
201
|
+
url: "www.bipm.org",
|
202
|
+
name: "Bureau International des Poids et Mesures",
|
203
|
+
abbreviation: "BIPM",
|
204
|
+
},
|
205
|
+
role: [{ type: "publisher" }] }],
|
206
|
+
) { |a, e| a << { entity: e, role: [{ type: "author" }] } }
|
207
|
+
end
|
208
|
+
|
209
|
+
#
|
210
|
+
# Create CCTF organization
|
211
|
+
#
|
212
|
+
# @param [String] date date of meeting
|
213
|
+
#
|
214
|
+
# @return [Array<Hash>] CCTF organization
|
215
|
+
#
|
216
|
+
def cctf_org(date) # rubocop:disable Metrics/MethodLength
|
217
|
+
if Date.parse(date).year < 1999
|
218
|
+
nms = [
|
219
|
+
{ content: "Consultative Committee for the Definition of the Second", language: "en" },
|
220
|
+
{ content: "Comité Consultatif pour la Définition de la Seconde", language: "fr" },
|
221
|
+
]
|
222
|
+
organization nms, "CCDS"
|
223
|
+
else
|
224
|
+
nms = [
|
225
|
+
{ content: "Consultative Committee for Time and Frequency", language: "en" },
|
226
|
+
{ content: "Comité consultatif du temps et des fréquences", language: "fr" },
|
227
|
+
]
|
228
|
+
organization nms, "CCTF"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
#
|
233
|
+
# Create organization
|
234
|
+
#
|
235
|
+
# @param [Array<Hash>] names organization names in different languages
|
236
|
+
# @param [String] abbr abbreviation
|
237
|
+
#
|
238
|
+
# @return [Array<Hash>] organization
|
239
|
+
#
|
240
|
+
def organization(names, abbr)
|
241
|
+
names.each { |ctrb| ctrb[:script] = "Latn" }
|
242
|
+
[{ name: names, abbreviation: { content: abbr, language: ["en", "fr"], script: "Latn" } }]
|
243
|
+
end
|
244
|
+
|
245
|
+
#
|
246
|
+
# Create CGPM organization
|
247
|
+
#
|
248
|
+
# @return [Array<Hash>] CGPM organization
|
249
|
+
#
|
250
|
+
def cgpm_org
|
251
|
+
nms = [
|
252
|
+
{ content: "General Conference on Weights and Measures", language: "en" },
|
253
|
+
{ content: "Conférence Générale des Poids et Mesures", language: "fr" },
|
254
|
+
]
|
255
|
+
organization nms, "CGPM"
|
256
|
+
end
|
257
|
+
|
258
|
+
#
|
259
|
+
# Create CIPM organization
|
260
|
+
#
|
261
|
+
# @return [Array<Hash>] CIPM organization
|
262
|
+
#
|
263
|
+
def cipm_org
|
264
|
+
names = [
|
265
|
+
{ content: "International Committee for Weights and Measures", language: "en" },
|
266
|
+
{ content: "Comité International des Poids et Mesures", language: "fr" },
|
267
|
+
]
|
268
|
+
organization names, "CIPM"
|
269
|
+
end
|
270
|
+
|
271
|
+
#
|
272
|
+
# Create a title
|
273
|
+
#
|
274
|
+
# @param [String] content title content
|
275
|
+
# @param [String] language language code (en, fr)
|
276
|
+
#
|
277
|
+
# @return [Hash] title
|
278
|
+
#
|
279
|
+
def title(content, language)
|
280
|
+
{ content: content, language: language, script: "Latn" }
|
281
|
+
end
|
282
|
+
|
283
|
+
#
|
284
|
+
# Add part to ID and structured identifier
|
285
|
+
#
|
286
|
+
# @param [Hash] hash Hash of BIPM meeting
|
287
|
+
# @param [String] session number of meeting
|
288
|
+
#
|
289
|
+
def add_part(hash, part)
|
290
|
+
regex = /(\p{L}+\s(?:--\s\p{L}+\s|\w+\/)\d+)/
|
291
|
+
hash[:id] += "-#{part}"
|
292
|
+
hash[:docnumber].sub!(regex) { |m| "#{m}-#{part}" }
|
293
|
+
hash[:docid].select { |id| id.type == "BIPM" }.each do |did|
|
294
|
+
id = did.instance_variable_get(:@id).sub!(regex) { "#{$1}-#{part}" }
|
295
|
+
did.instance_variable_set(:@id, id)
|
296
|
+
end
|
297
|
+
hash[:structuredidentifier].instance_variable_set :@part, part
|
298
|
+
end
|
299
|
+
|
300
|
+
#
|
301
|
+
# Create hash from BIPM meeting/resolution
|
302
|
+
#
|
303
|
+
# @param [Hash] **args Hash of arguments
|
304
|
+
# @option args [String] :type Type of meeting/resolution
|
305
|
+
# @option args [Hash] :en Hash of English metadata
|
306
|
+
# @option args [Hash] :fr Hash of French metadata
|
307
|
+
# @option args [String] :id ID of meeting/resolution
|
308
|
+
# @option args [String] :num Number of meeting/resolution
|
309
|
+
# @option args [Array<Hash>] :src Array of links to bipm-data-outcomes
|
310
|
+
# @option args [String] :pdf link to PDF
|
311
|
+
#
|
312
|
+
# @return [Hash] Hash of BIPM meeting/resolution
|
313
|
+
#
|
314
|
+
def bibitem(**args) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
315
|
+
docnum = create_docnum args[:body], args[:type], args[:num], args[:en]["date"]
|
316
|
+
hash = { title: [], type: "proceedings", doctype: args[:type],
|
317
|
+
place: [RelatonBib::Place.new(city: "Paris")] }
|
318
|
+
hash[:title] << title(args[:en]["title"], "en") if args[:en]["title"]
|
319
|
+
hash[:title] << title(args[:fr]["title"], "fr") if args[:fr]["title"]
|
320
|
+
hash[:date] = [{ type: "published", on: args[:en]["date"] }]
|
321
|
+
hash[:docid] = create_docids docnum
|
322
|
+
hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "")
|
323
|
+
hash[:id] = create_id(args[:body], args[:type], args[:num], args[:en]["date"])
|
324
|
+
hash[:link] = create_links(**args)
|
325
|
+
hash[:language] = %w[en fr]
|
326
|
+
hash[:script] = ["Latn"]
|
327
|
+
hash[:contributor] = contributors args[:en]["date"], args[:body]
|
328
|
+
hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: args[:num]
|
329
|
+
hash
|
330
|
+
end
|
331
|
+
|
332
|
+
#
|
333
|
+
# Create links
|
334
|
+
#
|
335
|
+
# @param [Hash] **args Hash of arguments
|
336
|
+
#
|
337
|
+
# @return [Array<Hash>] Array of links
|
338
|
+
#
|
339
|
+
def create_links(**args)
|
340
|
+
links = [
|
341
|
+
{ type: "citation", content: args[:en]["url"], language: "en", script: "Latn" },
|
342
|
+
{ type: "citation", content: args[:fr]["url"], language: "fr", script: "Latn" },
|
343
|
+
]
|
344
|
+
RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } }
|
345
|
+
links += args[:src] if args[:src]&.any?
|
346
|
+
links
|
347
|
+
end
|
348
|
+
|
349
|
+
#
|
350
|
+
# Creata a document number
|
351
|
+
#
|
352
|
+
# @param [<Type>] body <description>
|
353
|
+
# @param [<Type>] type <description>
|
354
|
+
# @param [<Type>] num <description>
|
355
|
+
# @param [<Type>] date <description>
|
356
|
+
#
|
357
|
+
# @return [<Type>] <description>
|
358
|
+
#
|
359
|
+
def create_docnum(body, type, num, date)
|
360
|
+
year = Date.parse(date).year
|
361
|
+
if special_id_case? body, type, year
|
362
|
+
id = "#{type.capitalize} #{body}"
|
363
|
+
id += "/#{num}" if num.to_i.positive?
|
364
|
+
else
|
365
|
+
id = "#{body} -- #{type.capitalize}"
|
366
|
+
id += " #{num}" if num.to_i.positive?
|
367
|
+
end
|
368
|
+
"#{id} (#{year})"
|
369
|
+
end
|
370
|
+
|
371
|
+
#
|
372
|
+
# Create ID
|
373
|
+
#
|
374
|
+
# @param [String] body body of meeting
|
375
|
+
# @param [String] type type of meeting
|
376
|
+
# @param [String, nil] num part number
|
377
|
+
# @param [String] date published date
|
378
|
+
#
|
379
|
+
# @return [String] ID
|
380
|
+
#
|
381
|
+
def create_id(body, type, num, date)
|
382
|
+
year = Date.parse(date).year
|
383
|
+
id = if special_id_case?(body, type, year)
|
384
|
+
"#{type.capitalize}-#{body}-#{year}"
|
385
|
+
else
|
386
|
+
"#{body}-#{type.capitalize}-#{year}"
|
387
|
+
end
|
388
|
+
id += "-#{num}" if num.to_i.positive?
|
389
|
+
id
|
390
|
+
end
|
391
|
+
|
392
|
+
#
|
393
|
+
# Check if ID is special case
|
394
|
+
#
|
395
|
+
# @param [String] body body of meeting
|
396
|
+
# @param [String] type type of meeting
|
397
|
+
# @param [String] year published year
|
398
|
+
#
|
399
|
+
# @return [Boolean] is special case
|
400
|
+
#
|
401
|
+
def special_id_case?(body, type, year)
|
402
|
+
(body == "CIPM" && type == "Decision" && year.to_i > 2011) ||
|
403
|
+
(body == "JCRB" && %w[recomendation resolution descision].include?(type))
|
404
|
+
end
|
405
|
+
|
406
|
+
#
|
407
|
+
# Create documetn IDs
|
408
|
+
#
|
409
|
+
# @param [String] en_id document ID in English
|
410
|
+
#
|
411
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] document IDs
|
412
|
+
#
|
413
|
+
def create_docids(en_id)
|
414
|
+
id = en_id.clone
|
415
|
+
[
|
416
|
+
make_docid(id: id, type: "BIPM", primary: true),
|
417
|
+
make_docid(id: id.clone, type: "BIPM", primary: true, language: "en", script: "Latn"),
|
418
|
+
create_docid_fr(en_id),
|
419
|
+
]
|
420
|
+
end
|
421
|
+
|
422
|
+
#
|
423
|
+
# Create French document ID
|
424
|
+
#
|
425
|
+
# @param [String] en_id English document ID
|
426
|
+
#
|
427
|
+
# @return [RelatonBib::DocumentIdentifier] french document ID
|
428
|
+
#
|
429
|
+
def create_docid_fr(en_id)
|
430
|
+
tr = BipmBibliography::TRANSLATIONS.detect { |_, v| en_id.include? v }
|
431
|
+
id = en_id.sub tr[1], tr[0]
|
432
|
+
make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
|
433
|
+
end
|
434
|
+
|
435
|
+
#
|
436
|
+
# Create doucment ID
|
437
|
+
#
|
438
|
+
# @param [String] id ID of document
|
439
|
+
# @param [String] type Type of document
|
440
|
+
# @param [Boolean] primary Primary document
|
441
|
+
# @param [String] language Language of document
|
442
|
+
# @param [String] script Script of document
|
443
|
+
#
|
444
|
+
# @return [RelatonBib::DocumentIdentifier] Document ID
|
445
|
+
#
|
446
|
+
def make_docid(**args)
|
447
|
+
RelatonBib::DocumentIdentifier.new(**args)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
end
|
data/lib/relaton_bipm/version.rb
CHANGED
@@ -71,16 +71,6 @@ module RelatonBipm
|
|
71
71
|
EditorialGroup.new committee: cm, workgroup: wg
|
72
72
|
end
|
73
73
|
|
74
|
-
# @TODO remove this method before next (1.7.0) relaton release
|
75
|
-
# it's in the relaton-bib but hasn't released yet
|
76
|
-
# @param title [Nokogiri::XML::Element]
|
77
|
-
# @return [Array<RelatonBib::LocalizedString>]
|
78
|
-
def variants(elm)
|
79
|
-
elm.xpath("variant").map do |v|
|
80
|
-
RelatonBib::LocalizedString.new v.text, v[:language], v[:script]
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
74
|
# @param ext [Nokogiri::XML::Element]
|
85
75
|
# @return [RelatonBipm::StructuredIdentifier]
|
86
76
|
def fetch_structuredidentifier(ext)
|
@@ -91,6 +81,22 @@ module RelatonBipm
|
|
91
81
|
appendix: sid.at("appendix")&.text
|
92
82
|
)
|
93
83
|
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Parse contacts from XML.
|
87
|
+
#
|
88
|
+
# @param [Nokogiri::XML::Element] contrib contributor element
|
89
|
+
#
|
90
|
+
# @return [Array<RelatonBib::Address, RelatonBib::Contact>] contacts
|
91
|
+
#
|
92
|
+
# def parse_contact(contrib)
|
93
|
+
# contrib.xpath("formattedAddress").each_with_object(super) do |fa, conts|
|
94
|
+
# city, country = fa.text.split(", ")
|
95
|
+
# next unless city && country
|
96
|
+
|
97
|
+
# conts << RelatonBib::Address.new(city: city, country: country)
|
98
|
+
# end
|
99
|
+
# end
|
94
100
|
end
|
95
101
|
end
|
96
102
|
end
|
data/lib/relaton_bipm.rb
CHANGED
@@ -15,6 +15,8 @@ require "relaton_bipm/hash_converter"
|
|
15
15
|
require "relaton_bipm/xml_parser"
|
16
16
|
require "relaton_bipm/index"
|
17
17
|
require "relaton_bipm/data_fetcher"
|
18
|
+
require "relaton_bipm/data_outcomes_parser"
|
19
|
+
require "relaton_bipm/bipm_si_brochure_parser"
|
18
20
|
|
19
21
|
module RelatonBipm
|
20
22
|
class Error < StandardError; end
|
data/relaton_bipm.gemspec
CHANGED
@@ -42,7 +42,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
42
42
|
|
43
43
|
spec.add_dependency "faraday", "~> 1.0"
|
44
44
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
45
|
-
spec.add_dependency "relaton-bib", "~> 1.13.
|
45
|
+
spec.add_dependency "relaton-bib", "~> 1.13.13"
|
46
46
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
47
47
|
spec.add_dependency "serrano", "~> 1.0"
|
48
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.13.
|
4
|
+
version: 1.13.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -142,14 +142,14 @@ dependencies:
|
|
142
142
|
requirements:
|
143
143
|
- - "~>"
|
144
144
|
- !ruby/object:Gem::Version
|
145
|
-
version: 1.13.
|
145
|
+
version: 1.13.13
|
146
146
|
type: :runtime
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: 1.13.
|
152
|
+
version: 1.13.13
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: rubyzip
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -208,9 +208,11 @@ files:
|
|
208
208
|
- lib/relaton_bipm/bibliographic_date.rb
|
209
209
|
- lib/relaton_bipm/bipm_bibliographic_item.rb
|
210
210
|
- lib/relaton_bipm/bipm_bibliography.rb
|
211
|
+
- lib/relaton_bipm/bipm_si_brochure_parser.rb
|
211
212
|
- lib/relaton_bipm/comment_periond.rb
|
212
213
|
- lib/relaton_bipm/committee.rb
|
213
214
|
- lib/relaton_bipm/data_fetcher.rb
|
215
|
+
- lib/relaton_bipm/data_outcomes_parser.rb
|
214
216
|
- lib/relaton_bipm/document_relation.rb
|
215
217
|
- lib/relaton_bipm/editorial_group.rb
|
216
218
|
- lib/relaton_bipm/hash_converter.rb
|