relaton-bipm 1.14.2 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +6 -0
- data/README.adoc +22 -9
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +11 -310
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +6 -3
- data/lib/relaton_bipm/data_fetcher.rb +6 -2
- data/lib/relaton_bipm/data_outcomes_parser.rb +64 -23
- data/lib/relaton_bipm/id_parser.rb +134 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +29 -19
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +8 -3
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +3 -1
- data/relaton_bipm.gemspec +2 -6
- metadata +24 -80
- data/lib/relaton_bipm/index.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34d720b316dbd942e2c5d630d2ae0f07b74331e4ef07f68715e84304bad0fb13
|
4
|
+
data.tar.gz: 38d36e34b998db6e4fa9e9f1a6e5306fadacea45b9a878cd14629eaaca2ef50d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a22261617d5c3de8aad7ed410091331698630f958332c5feb0b215b9fafe9167015530e9e6ecb71046307a6775aa7a2a0a71dd4c12e4f982cb0bd259e021a267
|
7
|
+
data.tar.gz: 376bb090dd4d273b8039357d78280c9bc4f1555918920a55b22ec72cb8be87c4ce0a8469254ffc000256fa95069a271b65b978dc767d735acd4b3e141c5dea24
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
@@ -70,22 +70,35 @@ Allowed document names are:
|
|
70
70
|
|
71
71
|
==== Reference structure for Metrologia documents
|
72
72
|
|
73
|
-
`BIPM Metrologia {JOURNAL} {VOLUME} {ISSUE}
|
73
|
+
`BIPM Metrologia {JOURNAL} {VOLUME} {ISSUE}`
|
74
74
|
|
75
|
-
- `{JOURNAL}` - number
|
76
|
-
- `{VOLUME}` - number
|
77
|
-
- `{ISSUE}` - number
|
78
|
-
- `{PAGE}` - number of page, optional
|
75
|
+
- `{JOURNAL}` - journal number, required
|
76
|
+
- `{VOLUME}` - volume number, optional
|
77
|
+
- `{ISSUE}` - issue number, optional
|
79
78
|
|
80
79
|
==== Reference structures for CCTF (CCDS), CGPM, CIPM documents
|
81
80
|
|
82
81
|
===== Basic pattern
|
83
82
|
|
84
83
|
----
|
85
|
-
Long:
|
86
|
-
|
84
|
+
Long:
|
85
|
+
{group name} -- {type} {number} ({year})
|
86
|
+
{group name} {type} {number} ({year})
|
87
|
+
{group name} {type} {year}-{zero_leading_number}
|
88
|
+
|
89
|
+
Short:
|
90
|
+
{group name} -- {type-abbrev} {number} ({year}, {lang})
|
91
|
+
{group name} {type-abbrev} {number} ({year}, {lang})
|
87
92
|
----
|
88
93
|
|
94
|
+
`group name` - a name of the group, required. A full list of group names is available https://github.com/metanorma/bipm-editor-guides/blob/main/sources/bipm-outcomes-en.adoc#appendix-a-bipm-groups-and-codes[here].
|
95
|
+
`type` - a type of document, required. A list of types is: Resolution (Résolution), Recommendation (Recommandation), Decision (Décision), Meeting (Réunion), Declaration (Déclaration).
|
96
|
+
`type-abbrev` - an abbreviation of the type, required. A list of abbreviations: RES (Resolution), REC (Recommendation), DECN (Decision).
|
97
|
+
`number` - a number of the document, optional. Can be with part, e.g. `1-2`.
|
98
|
+
`zero_leading_number` - a number of the document with a leading zero, required. Can be used when a document has a 1 or 2 digits number. It's `00` for documents without a number.
|
99
|
+
`year` - a year of the document, optional.
|
100
|
+
`lang` - a language of the document, optional. Can be `EN` or `FR`.
|
101
|
+
|
89
102
|
===== Special case pattern
|
90
103
|
|
91
104
|
The basic pattern works fine for all, except for these 2 cases:
|
@@ -295,7 +308,7 @@ bib.link
|
|
295
308
|
#<RelatonBib::TypedUri:0x00007fa6d6a29250 @content=#<Addressable::URI:0xc2b0 URI:https://doi.org/10.1088/0026-1394/29/6/001>, @type="doi">]
|
296
309
|
----
|
297
310
|
|
298
|
-
=== Create bibliographic item from XML
|
311
|
+
=== Create a bibliographic item from XML
|
299
312
|
|
300
313
|
[source,ruby]
|
301
314
|
----
|
@@ -304,7 +317,7 @@ RelatonBipm::XMLParser.from_xml File.read('spec/fixtures/bipm_item.xml')
|
|
304
317
|
...
|
305
318
|
----
|
306
319
|
|
307
|
-
=== Create bibliographic item from YAML
|
320
|
+
=== Create a bibliographic item from YAML
|
308
321
|
[source,ruby]
|
309
322
|
----
|
310
323
|
hash = YAML.load_file 'spec/fixtures/bipm_item.yml'
|
data/grammars/basicdoc.rng
CHANGED
data/grammars/biblio.rng
CHANGED
@@ -216,6 +216,9 @@
|
|
216
216
|
<optional>
|
217
217
|
<ref name="fullname"/>
|
218
218
|
</optional>
|
219
|
+
<zeroOrMore>
|
220
|
+
<ref name="credential"/>
|
221
|
+
</zeroOrMore>
|
219
222
|
<zeroOrMore>
|
220
223
|
<ref name="affiliation"/>
|
221
224
|
</zeroOrMore>
|
@@ -232,6 +235,11 @@
|
|
232
235
|
<ref name="FullNameType"/>
|
233
236
|
</element>
|
234
237
|
</define>
|
238
|
+
<define name="credential">
|
239
|
+
<element name="credential">
|
240
|
+
<text/>
|
241
|
+
</element>
|
242
|
+
</define>
|
235
243
|
<define name="FullNameType">
|
236
244
|
<choice>
|
237
245
|
<group>
|
@@ -305,7 +313,9 @@
|
|
305
313
|
<zeroOrMore>
|
306
314
|
<ref name="affiliationdescription"/>
|
307
315
|
</zeroOrMore>
|
308
|
-
<
|
316
|
+
<optional>
|
317
|
+
<ref name="organization"/>
|
318
|
+
</optional>
|
309
319
|
</element>
|
310
320
|
</define>
|
311
321
|
<define name="affiliationname">
|
@@ -1316,7 +1326,7 @@
|
|
1316
1326
|
<value>commentaryOf</value>
|
1317
1327
|
<value>hasCommentary</value>
|
1318
1328
|
<value>related</value>
|
1319
|
-
<value>
|
1329
|
+
<value>hasComplement</value>
|
1320
1330
|
<value>complementOf</value>
|
1321
1331
|
<value>obsoletes</value>
|
1322
1332
|
<value>obsoletedBy</value>
|
@@ -3,14 +3,6 @@ require "mechanize"
|
|
3
3
|
module RelatonBipm
|
4
4
|
class BipmBibliography
|
5
5
|
GH_ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/master/".freeze
|
6
|
-
IOP_DOMAIN = "https://iopscience.iop.org".freeze
|
7
|
-
TRANSLATIONS = {
|
8
|
-
"Déclaration" => "Declaration",
|
9
|
-
"Réunion" => "Meeting",
|
10
|
-
"Recommandation" => "Recommendation",
|
11
|
-
"Résolution" => "Resolution",
|
12
|
-
"Décision" => "Decision",
|
13
|
-
}.freeze
|
14
6
|
|
15
7
|
class << self
|
16
8
|
# @param text [String]
|
@@ -18,7 +10,6 @@ module RelatonBipm
|
|
18
10
|
def search(text, _year = nil, _opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
19
11
|
warn "[relaton-bipm] (\"#{text}\") fetching..."
|
20
12
|
ref = text.sub(/^BIPM\s/, "")
|
21
|
-
# item = ref.match?(/^Metrologia/i) ? get_metrologia(ref, magent) : get_bipm(ref, magent)
|
22
13
|
item = get_bipm(ref, magent)
|
23
14
|
unless item
|
24
15
|
warn "[relaton-bipm] (\"#{text}\") not found."
|
@@ -26,7 +17,6 @@ module RelatonBipm
|
|
26
17
|
end
|
27
18
|
|
28
19
|
warn("[relaton-bipm] (\"#{text}\") found #{item.docidentifier[0].id}")
|
29
|
-
item.fetched = Date.today.to_s
|
30
20
|
item
|
31
21
|
rescue Mechanize::ResponseCodeError => e
|
32
22
|
raise RelatonBib::RequestError, e.message unless e.response_code == "404"
|
@@ -49,295 +39,28 @@ module RelatonBipm
|
|
49
39
|
a
|
50
40
|
end
|
51
41
|
|
52
|
-
# @param
|
42
|
+
# @param reference [String]
|
53
43
|
# @param agent [Mechanize]
|
54
44
|
# @return [RelatonBipm::BipmBibliographicItem]
|
55
|
-
def get_bipm(
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# TRANSLATIONS.each { |fr, en| rf.sub! fr, en }
|
61
|
-
path = Index.new.search ref
|
62
|
-
return unless path
|
45
|
+
def get_bipm(reference, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
46
|
+
ref_id = Id.new reference
|
47
|
+
index = Relaton::Index.find_or_create :BIPM, url: "#{GH_ENDPOINT}index2.zip"
|
48
|
+
rows = index.search { |r| ref_id == r[:id] }
|
49
|
+
return unless rows.any?
|
63
50
|
|
64
|
-
url = "#{GH_ENDPOINT}#{
|
51
|
+
url = "#{GH_ENDPOINT}#{rows.first[:file]}"
|
65
52
|
resp = agent.get url
|
66
|
-
check_response resp
|
67
53
|
return unless resp.code == "200"
|
68
54
|
|
69
55
|
yaml = RelatonBib.parse_yaml resp.body, [Date]
|
70
|
-
|
56
|
+
yaml["fetched"] = Date.today.to_s
|
71
57
|
bib_hash = HashConverter.hash_to_bib yaml
|
72
58
|
BipmBibliographicItem.new(**bib_hash)
|
73
59
|
end
|
74
60
|
|
75
|
-
#
|
76
|
-
#
|
77
|
-
#
|
78
|
-
def get_metrologia(ref, agent)
|
79
|
-
agent.redirect_ok = false
|
80
|
-
ref_arr = ref.split
|
81
|
-
case ref_arr.size
|
82
|
-
when 1 then get_journal agent
|
83
|
-
when 2 then get_volume ref_arr[1], agent
|
84
|
-
when 3 then get_issue(*ref_arr[1..2], agent)
|
85
|
-
when 4 then get_article_from_issue(*ref_arr[1..3], agent)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param agent [Mechanize]
|
90
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
91
|
-
def get_journal(agent)
|
92
|
-
url = "#{IOP_DOMAIN}/journal/0026-1394"
|
93
|
-
rsp = agent.get url
|
94
|
-
check_response rsp
|
95
|
-
rel = rsp.xpath('//select[@id="allVolumesSelector"]/option').map do |v|
|
96
|
-
{ type: "partOf", bibitem: journal_rel(v) }
|
97
|
-
end
|
98
|
-
did = doc_id []
|
99
|
-
bibitem(formattedref: fref(did.id), docid: [did], link: blink(url), relation: rel)
|
100
|
-
end
|
101
|
-
|
102
|
-
# @param elm [Nokogiri::XML::Element]
|
103
|
-
def journal_rel(elm)
|
104
|
-
vol = elm[:value].split("/").last
|
105
|
-
did = doc_id [vol]
|
106
|
-
url = IOP_DOMAIN + elm[:value]
|
107
|
-
BipmBibliographicItem.new(formattedref: fref(did.id), docid: [did], link: blink(url))
|
108
|
-
end
|
109
|
-
|
110
|
-
# @param vol [String]
|
111
|
-
# @param agent [Mechanize]
|
112
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
113
|
-
def get_volume(vol, agent)
|
114
|
-
url = "#{IOP_DOMAIN}/volume/0026-1394/#{vol}"
|
115
|
-
rsp = agent.get url
|
116
|
-
check_response rsp
|
117
|
-
rel = rsp.xpath('//li[@itemprop="hasPart"]').map do |i|
|
118
|
-
{ type: "partOf", bibitem: volume_rel(i, vol) }
|
119
|
-
end
|
120
|
-
did = doc_id [vol]
|
121
|
-
bibitem(formattedref: fref(did.id), docid: [did], link: blink(url), date: bdate(rsp), relation: rel,
|
122
|
-
extent: btextent(vol), series: series)
|
123
|
-
end
|
124
|
-
|
125
|
-
def volume_rel(elm, vol) # rubocop:disable Metrics/AbcSize
|
126
|
-
a = elm.at 'a[@itemprop="issueNumber"]'
|
127
|
-
ish = a[:href].split("/").last
|
128
|
-
url = IOP_DOMAIN + a[:href]
|
129
|
-
docid = doc_id [vol, ish]
|
130
|
-
t = elm.at "p"
|
131
|
-
title_fref = t ? { title: titles(t.text) } : { formattedref: fref(docid.id) }
|
132
|
-
BipmBibliographicItem.new(**title_fref, docid: [docid], link: blink(url))
|
133
|
-
end
|
134
|
-
|
135
|
-
# @param title [String]
|
136
|
-
# @return [RelatonBib::TypedTitleStringCollection]
|
137
|
-
def titles(title)
|
138
|
-
RelatonBib::TypedTitleString.from_string title, "en", "Latn", "text/html"
|
139
|
-
end
|
140
|
-
|
141
|
-
# @param vol [String]
|
142
|
-
# @param ish [String]
|
143
|
-
# @param agent [Mechanize]
|
144
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
145
|
-
def get_issue(vol, ish, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
146
|
-
url = issue_url vol, ish
|
147
|
-
rsp = agent.get url
|
148
|
-
check_response rsp
|
149
|
-
rel = rsp.xpath('//div[@class="art-list-item-body"]').map do |a|
|
150
|
-
{ type: "partOf", bibitem: issue_rel(a, vol, ish) }
|
151
|
-
end
|
152
|
-
did = doc_id [vol, ish]
|
153
|
-
title_fref = { title: issue_title(rsp) }
|
154
|
-
title_fref[:formattedref] = fref did.id unless title_fref[:title].any?
|
155
|
-
bibitem(**title_fref, link: blink(url), relation: rel, docid: [did],
|
156
|
-
date: bdate(rsp), extent: btextent(vol, ish), series: series)
|
157
|
-
end
|
158
|
-
|
159
|
-
# @param ref [String]
|
160
|
-
# @return [RelatonBib::FormattedRef]
|
161
|
-
def fref(ref)
|
162
|
-
RelatonBib::FormattedRef.new content: ref, language: "en", script: "Latn"
|
163
|
-
end
|
164
|
-
|
165
|
-
# @param rsp [Mechanize::Page]
|
166
|
-
# @return [RelatonBib::TypedTitleStringCollection]
|
167
|
-
def issue_title(rsp)
|
168
|
-
t = rsp.at('//div[@id="wd-jnl-issue-title"]/h4')
|
169
|
-
return RelatonBib::TypedTitleStringCollection.new [] unless t
|
170
|
-
|
171
|
-
titles(t.text)
|
172
|
-
end
|
173
|
-
|
174
|
-
# @oaran vol [String]
|
175
|
-
# @param ish [String]
|
176
|
-
# @return [String]
|
177
|
-
def issue_url(vol, ish)
|
178
|
-
"#{IOP_DOMAIN}/issue/0026-1394/#{vol}/#{ish}"
|
179
|
-
end
|
180
|
-
|
181
|
-
# @param elm [Nokogiri::XML::Element]
|
182
|
-
# @param vol [String]
|
183
|
-
# @param ish [String]
|
184
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
185
|
-
def issue_rel(elm, vol, ish)
|
186
|
-
art = elm.at('div[@class="indexer"]').text
|
187
|
-
ref = elm.at('div/a[@class="art-list-item-title"]')
|
188
|
-
title = titles ref.text.strip
|
189
|
-
docid = doc_id [vol, ish, art]
|
190
|
-
link = blink IOP_DOMAIN + ref[:href]
|
191
|
-
BipmBibliographicItem.new(title: title, docid: [docid], link: link)
|
192
|
-
end
|
193
|
-
|
194
|
-
# @param content [RelatonBib::TypedTitleString]
|
195
|
-
# @return [RelatonBib::TypedTitleString]
|
196
|
-
def btitle(content)
|
197
|
-
RelatonBib::TypedTitleString.new type: "main", content: content, language: "en", script: "Latn"
|
198
|
-
end
|
199
|
-
|
200
|
-
# @param url [String]
|
201
|
-
# @return [String]
|
202
|
-
def blink(url)
|
203
|
-
[RelatonBib::TypedUri.new(type: "src", content: url)]
|
204
|
-
end
|
205
|
-
|
206
|
-
# @param rsp [Mechanize::Page]
|
207
|
-
# @return [Array<RelatonBib::BibliographicDate>]
|
208
|
-
def bdate(rsp)
|
209
|
-
date = rsp.at('//p[@itemprop="issueNumber"]|//h2[@itemprop="volumeNumber"]').text.split(", ").last
|
210
|
-
on = date.match?(/^\d{4}$/) ? date : Date.parse(date).strftime("%Y-%m")
|
211
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
212
|
-
end
|
213
|
-
|
214
|
-
# @param args [Array<String>]
|
215
|
-
# @return [RelatonBib::DocumentIdentifier]
|
216
|
-
def doc_id(args)
|
217
|
-
id = args.clone.unshift "Metrologia"
|
218
|
-
RelatonBib::DocumentIdentifier.new(type: "BIPM", id: id.join(" "), primary: true)
|
219
|
-
end
|
220
|
-
|
221
|
-
# @param vol [String]
|
222
|
-
# @param ish [String]
|
223
|
-
# @param art [String]
|
224
|
-
# @param agent [Mechanize]
|
225
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
226
|
-
def get_article_from_issue(vol, ish, art, agent) # rubocop:disable Metrics/MethodLength
|
227
|
-
url = issue_url vol, ish
|
228
|
-
rsp = agent.get url
|
229
|
-
check_response rsp
|
230
|
-
link = rsp.at("//div[@class='indexer'][.='#{art}']/../div/a")
|
231
|
-
unless link
|
232
|
-
arts = rsp.xpath("//div[@class='indexer']").map(&:text)
|
233
|
-
warn "[relaton-bipm] No article is available at the specified start page \"#{art}\" in issue \"BIPM Metrologia #{vol} #{ish}\"."
|
234
|
-
warn "[relaton-bipm] Available articles in the issue start at the following pages: (#{arts.join(', ')})"
|
235
|
-
return
|
236
|
-
end
|
237
|
-
|
238
|
-
get_article link[:href], vol, ish, agent
|
239
|
-
end
|
240
|
-
|
241
|
-
# @param path [String]
|
242
|
-
# @param vol [String]
|
243
|
-
# @param ish [String]
|
244
|
-
# @param agent [Mechanize]
|
245
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
246
|
-
def get_article(path, vol, ish, agent) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
247
|
-
agent.agent.allowed_error_codes = [403]
|
248
|
-
rsp = agent.get path
|
249
|
-
check_response rsp
|
250
|
-
title = rsp.at("//h1[@itemprop='headline']").children.to_xml
|
251
|
-
url = rsp.uri
|
252
|
-
bib = rsp.link_with(text: "BibTeX").href
|
253
|
-
rsp = agent.get bib
|
254
|
-
check_response rsp
|
255
|
-
bt = BibTeX.parse(rsp.body).first
|
256
|
-
bibitem(
|
257
|
-
docid: btdocid(bt), title: titles(title), date: btdate(bt),
|
258
|
-
abstract: btabstract(bt), doctype: bt.type.to_s, series: series,
|
259
|
-
link: btlink(bt, url), contributor: btcontrib(bt),
|
260
|
-
extent: btextent(vol, ish, bt.pages.to_s)
|
261
|
-
)
|
262
|
-
end
|
263
|
-
|
264
|
-
# @param args [Hash]
|
265
|
-
# @return [RelatonBipm::BipmBibliographicItem]
|
266
|
-
def bibitem(**args)
|
267
|
-
BipmBibliographicItem.new(
|
268
|
-
type: "article", language: ["en"], script: ["Latn"], **args,
|
269
|
-
)
|
270
|
-
end
|
271
|
-
|
272
|
-
# @return [Array<RelatonBib::Series>]
|
273
|
-
def series
|
274
|
-
[RelatonBib::Series.new(title: btitle("Metrologia"))]
|
275
|
-
end
|
276
|
-
|
277
|
-
# @param bibtex [BibTeX::Entry]
|
278
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
279
|
-
def btdocid(bibtex)
|
280
|
-
id = "#{bibtex.journal} #{bibtex.volume} #{bibtex.number} #{bibtex.pages.match(/^\d+/)}"
|
281
|
-
[
|
282
|
-
RelatonBib::DocumentIdentifier.new(type: "BIPM", id: id, primary: true),
|
283
|
-
RelatonBib::DocumentIdentifier.new(type: "DOI", id: bibtex.doi),
|
284
|
-
]
|
285
|
-
end
|
286
|
-
|
287
|
-
# @param bibtex [BibTeX::Entry]
|
288
|
-
# @return [Array<RelatonBib::FormattedString>]
|
289
|
-
def btabstract(bibtex)
|
290
|
-
[RelatonBib::FormattedString.new(content: bibtex.abstract.to_s, language: "en", script: "Latn")]
|
291
|
-
end
|
292
|
-
|
293
|
-
# @param bibtex [BibTeX::Entry]
|
294
|
-
# @param ref [URI]
|
295
|
-
# @return [Array<RelatonBib::TypedUri>]
|
296
|
-
def btlink(bibtex, ref)
|
297
|
-
[
|
298
|
-
RelatonBib::TypedUri.new(type: "src", content: ref.to_s),
|
299
|
-
RelatonBib::TypedUri.new(type: "doi", content: bibtex.url.to_s),
|
300
|
-
]
|
301
|
-
end
|
302
|
-
|
303
|
-
# @param bibtex [BibTeX::Entry]
|
304
|
-
# @return [Array<RelatonBib::BibliographicDate>]
|
305
|
-
def btdate(bibtex)
|
306
|
-
on = Date.new(bibtex.year.to_i, bibtex.month_numeric)
|
307
|
-
[RelatonBib::BibliographicDate.new(type: "published", on: on)]
|
308
|
-
end
|
309
|
-
|
310
|
-
# @param bibtex [BibTeX::Entry]
|
311
|
-
# @return [Array<Hash>]
|
312
|
-
def btcontrib(bibtex) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
313
|
-
contribs = []
|
314
|
-
if bibtex.publisher && !bibtex.publisher.empty?
|
315
|
-
org = RelatonBib::Organization.new name: bibtex.publisher.to_s
|
316
|
-
contribs << { entity: org, role: [{ type: "publisher" }] }
|
317
|
-
end
|
318
|
-
return contribs unless bibtex.author && !bibtex.author.empty?
|
319
|
-
|
320
|
-
bibtex.author.split(" and ").inject(contribs) do |mem, name|
|
321
|
-
cname = RelatonBib::LocalizedString.new name, "en", "Latn"
|
322
|
-
name = RelatonBib::FullName.new completename: cname
|
323
|
-
author = RelatonBib::Person.new name: name
|
324
|
-
mem << { entity: author, role: [{ type: "author" }] }
|
325
|
-
end
|
326
|
-
end
|
327
|
-
|
328
|
-
#
|
329
|
-
# @param vol [String] volume
|
330
|
-
# @param ish [String] issue
|
331
|
-
# @param pgs [String] pages
|
332
|
-
#
|
333
|
-
# @return [Array<RelatonBib::BibItemLocality>]
|
334
|
-
#
|
335
|
-
def btextent(vol, ish = nil, pgs = nil)
|
336
|
-
ext = [RelatonBib::Locality.new("volume", vol)]
|
337
|
-
ext << RelatonBib::Locality.new("issue", ish) if ish
|
338
|
-
ext << RelatonBib::Locality.new("page", *pgs.split("--")) if pgs
|
339
|
-
ext
|
340
|
-
end
|
61
|
+
# def match_item(ids, ref_id)
|
62
|
+
# ids.find { |id| Id.new(id) == ref_id }
|
63
|
+
# end
|
341
64
|
|
342
65
|
# @param ref [String] the BIPM standard Code to look up (e..g "BIPM B-11")
|
343
66
|
# @param year [String] not used
|
@@ -346,28 +69,6 @@ module RelatonBipm
|
|
346
69
|
def get(ref, year = nil, opts = {})
|
347
70
|
search(ref, year, opts)
|
348
71
|
end
|
349
|
-
|
350
|
-
private
|
351
|
-
|
352
|
-
#
|
353
|
-
# Check HTTP response. Warn and rise error if response is not 200
|
354
|
-
# or redirect to CAPTCHA.
|
355
|
-
#
|
356
|
-
# @param [Mechanize] rsp response
|
357
|
-
#
|
358
|
-
# @raise [RelatonBib::RequestError] if response is not 200
|
359
|
-
#
|
360
|
-
def check_response(rsp) # rubocop:disable Metrics/AbcSize
|
361
|
-
if rsp.code == "302"
|
362
|
-
warn "[relaton-bipm] This source employs anti-DDoS measures that unfortunately affects automated requests."
|
363
|
-
warn "[relaton-bipm] Please visit this link in your browser to resolve the CAPTCHA, then retry: #{rsp.uri}"
|
364
|
-
# warn "[relaton-bipm] #{rsp.uri} is redirected to #{rsp.header['location']}"
|
365
|
-
raise RelatonBib::RequestError, "cannot access #{rsp.uri}"
|
366
|
-
elsif rsp.code != "200" && rsp.code != "403"
|
367
|
-
warn "[read_bipm] can't acces #{rsp.uri} #{rsp.code}"
|
368
|
-
raise RelatonBib::RequestError, "cannot acces #{rsp.uri} #{rsp.code}"
|
369
|
-
end
|
370
|
-
end
|
371
72
|
end
|
372
73
|
end
|
373
74
|
end
|
@@ -6,7 +6,7 @@ module RelatonBipm
|
|
6
6
|
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
7
7
|
#
|
8
8
|
def initialize(data_fetcher)
|
9
|
-
@data_fetcher = data_fetcher
|
9
|
+
@data_fetcher = WeakRef.new data_fetcher
|
10
10
|
end
|
11
11
|
|
12
12
|
#
|
@@ -27,7 +27,7 @@ module RelatonBipm
|
|
27
27
|
# puts "Ls #{Dir['bipm-si-brochure/*']}"
|
28
28
|
# puts "Ls #{Dir['bipm-si-brochure/site/*']}"
|
29
29
|
# puts "Ls #{Dir['bipm-si-brochure/site/documents/*']}"
|
30
|
-
Dir["bipm-si-brochure/
|
30
|
+
Dir["bipm-si-brochure/_site/documents/*.rxl"].each do |f|
|
31
31
|
puts "Parsing #{f}"
|
32
32
|
docstd = Nokogiri::XML File.read f
|
33
33
|
doc = docstd.at "/bibdata"
|
@@ -35,7 +35,10 @@ module RelatonBipm
|
|
35
35
|
fix_si_brochure_id hash1
|
36
36
|
basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
|
37
37
|
outfile = "#{basename}.#{@data_fetcher.ext}"
|
38
|
-
|
38
|
+
key = hash1["docnumber"] || basename
|
39
|
+
@data_fetcher.index[[key]] = outfile
|
40
|
+
@data_fetcher.index_new.add_or_update [key], outfile
|
41
|
+
@data_fetcher.index2.add_or_update Id.new(key).normalized_hash, outfile
|
39
42
|
hash = if File.exist? outfile
|
40
43
|
warn_duplicate = false
|
41
44
|
hash2 = YAML.load_file outfile
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module RelatonBipm
|
2
2
|
class DataFetcher
|
3
|
-
attr_reader :output, :format, :ext, :files, :index
|
3
|
+
attr_reader :output, :format, :ext, :files, :index, :index_new, :index2
|
4
4
|
|
5
5
|
#
|
6
6
|
# Initialize fetcher
|
@@ -15,6 +15,8 @@ module RelatonBipm
|
|
15
15
|
@files = []
|
16
16
|
@index_path = "index.yaml"
|
17
17
|
@index = File.exist?(@index_path) ? YAML.load_file(@index_path) : {}
|
18
|
+
@index_new = Relaton::Index.find_or_create :BIPM, file: "index-bipm.yaml"
|
19
|
+
@index2 = Relaton::Index.find_or_create :BIPM, file: "index2.yaml"
|
18
20
|
end
|
19
21
|
|
20
22
|
#
|
@@ -45,7 +47,9 @@ module RelatonBipm
|
|
45
47
|
when "bipm-si-brochure" then BipmSiBrochureParser.parse(self)
|
46
48
|
when "rawdata-bipm-metrologia" then RawdataBipmMetrologia::Fetcher.fetch(self)
|
47
49
|
end
|
48
|
-
File.write @index_path,
|
50
|
+
File.write @index_path, index.to_yaml, encoding: "UTF-8"
|
51
|
+
index_new.save
|
52
|
+
index2.save
|
49
53
|
end
|
50
54
|
|
51
55
|
#
|
@@ -7,13 +7,21 @@ module RelatonBipm
|
|
7
7
|
"Statement" => "DECL",
|
8
8
|
}.freeze
|
9
9
|
|
10
|
+
TRANSLATIONS = {
|
11
|
+
"Déclaration" => "Declaration",
|
12
|
+
"Réunion" => "Meeting",
|
13
|
+
"Recommandation" => "Recommendation",
|
14
|
+
"Résolution" => "Resolution",
|
15
|
+
"Décision" => "Decision",
|
16
|
+
}.freeze
|
17
|
+
|
10
18
|
#
|
11
19
|
# Create data-outcomes parser
|
12
20
|
#
|
13
21
|
# @param [RelatonBipm::DataFetcher] data_fetcher data fetcher
|
14
22
|
#
|
15
23
|
def initialize(data_fetcher)
|
16
|
-
@data_fetcher = data_fetcher
|
24
|
+
@data_fetcher = WeakRef.new data_fetcher
|
17
25
|
end
|
18
26
|
|
19
27
|
#
|
@@ -68,20 +76,10 @@ module RelatonBipm
|
|
68
76
|
# @param [String] dir output directory
|
69
77
|
#
|
70
78
|
def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
71
|
-
en =
|
72
|
-
en_md = en
|
73
|
-
|
74
|
-
fr = RelatonBib.parse_yaml File.read(fr_file, encoding: "UTF-8"), [Date]
|
75
|
-
fr_md = fr["metadata"]
|
76
|
-
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
77
|
-
src_en = gh_src + en_file.split("/")[-3..].unshift("main").join("/")
|
78
|
-
src_fr = gh_src + fr_file.split("/")[-3..].unshift("main").join("/")
|
79
|
-
src = [
|
80
|
-
{ type: "src", content: src_en, language: "en", script: "Latn" },
|
81
|
-
{ type: "src", content: src_fr, language: "fr", script: "Latn" },
|
82
|
-
]
|
79
|
+
_, en, fr_file, fr = read_files en_file
|
80
|
+
en_md, fr_md, num, part = meeting_md en, fr
|
81
|
+
src = meeting_links en_file, fr_file
|
83
82
|
|
84
|
-
/^(?<num>\d+)(?:-_(?<part>\d+))?-\d{4}$/ =~ en_md["url"].split("/").last
|
85
83
|
file = "#{num}.#{@data_fetcher.ext}"
|
86
84
|
path = File.join dir, file
|
87
85
|
hash = bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"]
|
@@ -111,6 +109,40 @@ module RelatonBipm
|
|
111
109
|
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
112
110
|
end
|
113
111
|
|
112
|
+
#
|
113
|
+
# Read English and French files
|
114
|
+
#
|
115
|
+
# @param [String] en_file Path to English file
|
116
|
+
#
|
117
|
+
# @return [Array<Hash, String, nil>] English / French metadata and file path
|
118
|
+
#
|
119
|
+
def read_files(en_file)
|
120
|
+
fr_file = en_file.sub "en", "fr"
|
121
|
+
[en_file, fr_file].map do |file|
|
122
|
+
if File.exist? file
|
123
|
+
data = RelatonBib.parse_yaml(File.read(file, encoding: "UTF-8"), [Date])
|
124
|
+
path = file
|
125
|
+
end
|
126
|
+
[path, data]
|
127
|
+
end.flatten
|
128
|
+
end
|
129
|
+
|
130
|
+
def meeting_md(eng, frn)
|
131
|
+
en_md = eng["metadata"]
|
132
|
+
num, part = en_md["identifier"].to_s.split("-")
|
133
|
+
[en_md, frn&.dig("metadata"), num, part]
|
134
|
+
end
|
135
|
+
|
136
|
+
def meeting_links(en_file, fr_file)
|
137
|
+
gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/"
|
138
|
+
{ "en" => en_file, "fr" => fr_file }.map do |lang, file|
|
139
|
+
next unless file
|
140
|
+
|
141
|
+
src = gh_src + file.split("/")[-3..].unshift("main").join("/")
|
142
|
+
{ type: "src", content: src, language: lang, script: "Latn" }
|
143
|
+
end.compact
|
144
|
+
end
|
145
|
+
|
114
146
|
#
|
115
147
|
# Parse BIPM resolutions and write them to YAML files
|
116
148
|
#
|
@@ -207,6 +239,9 @@ module RelatonBipm
|
|
207
239
|
end
|
208
240
|
key << item.docidentifier.detect { |i| i.language == "fr" }.id
|
209
241
|
@data_fetcher.index[key] = path
|
242
|
+
@data_fetcher.index_new.add_or_update key, path
|
243
|
+
key2 = Id.new(item.docnumber).normalized_hash
|
244
|
+
@data_fetcher.index2.add_or_update key2, path
|
210
245
|
end
|
211
246
|
|
212
247
|
#
|
@@ -342,8 +377,7 @@ module RelatonBipm
|
|
342
377
|
docnum = create_docnum args[:body], args[:type], args[:num], args[:en]["date"]
|
343
378
|
hash = { title: [], type: "proceedings", doctype: args[:type],
|
344
379
|
place: [RelatonBib::Place.new(city: "Paris")] }
|
345
|
-
hash[:title]
|
346
|
-
hash[:title] << create_title(args[:fr]["title"], "fr") if args[:fr]["title"]
|
380
|
+
hash[:title] = create_titles args.slice(:en, :fr)
|
347
381
|
hash[:date] = [{ type: "published", on: args[:en]["date"] }]
|
348
382
|
hash[:docid] = create_docids docnum
|
349
383
|
hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "")
|
@@ -356,6 +390,12 @@ module RelatonBipm
|
|
356
390
|
hash
|
357
391
|
end
|
358
392
|
|
393
|
+
def create_titles(data)
|
394
|
+
data.each_with_object([]) do |(lang, md), mem|
|
395
|
+
mem << create_title(md["title"], lang.to_s) if md && md["title"]
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
359
399
|
#
|
360
400
|
# Create links
|
361
401
|
#
|
@@ -364,12 +404,13 @@ module RelatonBipm
|
|
364
404
|
# @return [Array<Hash>] Array of links
|
365
405
|
#
|
366
406
|
def create_links(**args)
|
367
|
-
links = [
|
368
|
-
|
369
|
-
|
370
|
-
|
407
|
+
links = args.slice(:en, :fr).each_with_object([]) do |(lang, md), mem|
|
408
|
+
next unless md && md["url"]
|
409
|
+
|
410
|
+
mem << { type: "citation", content: md["url"], language: lang.to_s, script: "Latn" }
|
411
|
+
end
|
371
412
|
RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } }
|
372
|
-
links += args[:src] if args[:src]
|
413
|
+
links += args[:src] if args[:src]
|
373
414
|
links
|
374
415
|
end
|
375
416
|
|
@@ -454,8 +495,8 @@ module RelatonBipm
|
|
454
495
|
# @return [RelatonBib::DocumentIdentifier] french document ID
|
455
496
|
#
|
456
497
|
def create_docid_fr(en_id)
|
457
|
-
tr =
|
458
|
-
id = en_id.sub
|
498
|
+
tr = TRANSLATIONS.detect { |_, v| en_id.include? v }
|
499
|
+
id = tr ? en_id.sub(tr[1], tr[0]) : en_id
|
459
500
|
make_docid(id: id, type: "BIPM", primary: true, language: "fr", script: "Latn")
|
460
501
|
end
|
461
502
|
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
class Id
|
3
|
+
class Parser < Parslet::Parser
|
4
|
+
rule(:space) { match("\s").repeat(1) }
|
5
|
+
rule(:space?) { space.maybe }
|
6
|
+
rule(:comma) { str(",") >> space? }
|
7
|
+
rule(:lparen) { str("(") }
|
8
|
+
rule(:rparen) { str(")") }
|
9
|
+
rule(:slash) { str("/") }
|
10
|
+
|
11
|
+
rule(:delimeter) { str("--") >> space }
|
12
|
+
rule(:delimeter?) { delimeter.maybe }
|
13
|
+
|
14
|
+
rule(:lang) { comma >> match["A-Z"].repeat(2, 2).as(:lang) }
|
15
|
+
rule(:lang?) { lang.maybe }
|
16
|
+
|
17
|
+
rule(:number) { match["0-9-"].repeat(1).as(:number) >> space? }
|
18
|
+
rule(:number?) { number.maybe }
|
19
|
+
|
20
|
+
rule(:year) { match["0-9"].repeat(4, 4).as(:year) }
|
21
|
+
rule(:year_paren) { lparen >> year >> lang? >> rparen }
|
22
|
+
rule(:num_year) { number? >> year_paren }
|
23
|
+
rule(:year_num) { year >> str("-") >> number }
|
24
|
+
rule(:num_and_year) { num_year | year_num | number }
|
25
|
+
|
26
|
+
rule(:sect) { lparen >> match["IVX"].repeat >> rparen }
|
27
|
+
rule(:suff) { match["a-zA-Z-"].repeat(1) }
|
28
|
+
rule(:cgmp) { str("CGPM") }
|
29
|
+
rule(:cipm) { str("CIPM") >> (str(" MRA") | match["A-Z-"]).maybe }
|
30
|
+
rule(:cc) { str("CC") >> suff >> sect.maybe }
|
31
|
+
rule(:jc) { str("JC") >> suff }
|
32
|
+
rule(:cec) { str("CEC") }
|
33
|
+
rule(:wgms) { str("WG-MS") }
|
34
|
+
rule(:group) { (cgmp | cipm | cc | jc | cec | wgms).as(:group) }
|
35
|
+
|
36
|
+
rule(:type) { match["[:alpha:]"].repeat(1).as(:type) >> space }
|
37
|
+
|
38
|
+
rule(:type_group) { type >> group >> slash >> num_and_year }
|
39
|
+
rule(:group_type) { group >> space >> delimeter? >> type >> num_and_year }
|
40
|
+
rule(:outcome) { group_type | type_group }
|
41
|
+
|
42
|
+
rule(:append) { comma >> str("Appendix") >> space >> number }
|
43
|
+
rule(:brochure) { str("SI").as(:group) >> space >> str("Brochure").as(:type) >> append.maybe }
|
44
|
+
|
45
|
+
rule(:metrologia) { str("Metrologia").as(:group) >> (space >> match["a-zA-Z0-9\s"].repeat(1).as(:number)).maybe }
|
46
|
+
|
47
|
+
rule(:result) { outcome | brochure | metrologia }
|
48
|
+
|
49
|
+
root :result
|
50
|
+
end
|
51
|
+
|
52
|
+
TYPES = {
|
53
|
+
"Resolution" => "RES",
|
54
|
+
"Résolution" => "RES",
|
55
|
+
"Recommendation" => "REC",
|
56
|
+
"Recommandation" => "REC",
|
57
|
+
"Decision" => "DECN",
|
58
|
+
"Décision" => "DECN",
|
59
|
+
"Declaration" => "Déclaration",
|
60
|
+
"Réunion" => "Meeting",
|
61
|
+
}.freeze
|
62
|
+
|
63
|
+
# @return [Hash] the parsed id components
|
64
|
+
attr_accessor :id
|
65
|
+
|
66
|
+
#
|
67
|
+
# Create a new Id object
|
68
|
+
#
|
69
|
+
# @param [String] id id string
|
70
|
+
#
|
71
|
+
def initialize(id)
|
72
|
+
@id = Parser.new.parse(id)
|
73
|
+
rescue Parslet::ParseFailed => e
|
74
|
+
warn "[relaton-bipm] Incorrect reference: #{id}"
|
75
|
+
# warn "[relaton-bipm] #{e.parse_failure_cause.ascii_tree}"
|
76
|
+
raise RelatonBib::RequestError, e
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Compare two Id objects
|
81
|
+
#
|
82
|
+
# @param [RelatonBipm::Id, Hash] other the other Id object
|
83
|
+
#
|
84
|
+
# @return [Boolean] true if the two Id objects are equal
|
85
|
+
#
|
86
|
+
def ==(other)
|
87
|
+
other_hash = other.is_a?(Id) ? other.normalized_hash : other
|
88
|
+
hash = normalized_hash
|
89
|
+
hash.delete(:year) unless other_hash[:year]
|
90
|
+
other_hash.delete(:year) unless hash[:year]
|
91
|
+
hash.delete(:lang) unless other_hash[:lang]
|
92
|
+
other_hash.delete(:lang) unless hash[:lang]
|
93
|
+
hash == other_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Transform ID parts.
|
98
|
+
# Traslate type into abbreviation, remove leading zeros from number
|
99
|
+
#
|
100
|
+
# @return [Hash] the normalized ID parts
|
101
|
+
#
|
102
|
+
def normalized_hash # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
103
|
+
@normalized_hash ||= begin
|
104
|
+
hash = { group: id[:group].to_s.sub("CCDS", "CCTF") }
|
105
|
+
hash[:type] = normalized_type if id[:type]
|
106
|
+
norm_num = normalized_number
|
107
|
+
hash[:number] = norm_num unless norm_num.nil? || norm_num.empty?
|
108
|
+
hash[:year] = id[:year].to_s if id[:year]
|
109
|
+
hash[:lang] = id[:lang].to_s if id[:lang]
|
110
|
+
hash
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Translate type into abbreviation
|
116
|
+
#
|
117
|
+
# @return [String] the normalized type
|
118
|
+
#
|
119
|
+
def normalized_type
|
120
|
+
TYPES[id[:type].to_s] || id[:type].to_s
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Remove leading zeros from number
|
125
|
+
#
|
126
|
+
# @return [String, nil] the normalized number
|
127
|
+
#
|
128
|
+
def normalized_number
|
129
|
+
return unless id[:number]
|
130
|
+
|
131
|
+
id[:number].to_s.sub(/^0+/, "")
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -21,6 +21,7 @@ module RelatonBipm
|
|
21
21
|
#
|
22
22
|
def initialize(doc)
|
23
23
|
@doc = doc
|
24
|
+
@meta = @doc.at("./front/article-meta")
|
24
25
|
end
|
25
26
|
|
26
27
|
#
|
@@ -39,9 +40,9 @@ module RelatonBipm
|
|
39
40
|
# @return [Array<RelatonBib::DocumentIdentifier>] array of document identifiers
|
40
41
|
#
|
41
42
|
def parse_docid
|
42
|
-
pubid = "#{journal_title} #{
|
43
|
+
pubid = "#{journal_title} #{volume_issue_article}"
|
43
44
|
primary_id = create_docid pubid, "BIPM", true
|
44
|
-
@
|
45
|
+
@meta.xpath("./article-id[@pub-id-type='doi']")
|
45
46
|
.each_with_object([primary_id]) do |id, m|
|
46
47
|
m << create_docid(id.text, id["pub-id-type"])
|
47
48
|
end
|
@@ -52,17 +53,15 @@ module RelatonBipm
|
|
52
53
|
#
|
53
54
|
# @return [Array<String>] array of volume, issue and page
|
54
55
|
#
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
[volume, issue, article]
|
61
|
-
end
|
56
|
+
def volume_issue_article
|
57
|
+
volume = @meta.at("./volume").text
|
58
|
+
issue = @meta.at("./issue").text
|
59
|
+
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
60
|
+
[volume, issue, article].join(" ")
|
62
61
|
end
|
63
62
|
|
64
63
|
def article
|
65
|
-
@
|
64
|
+
@meta.at("./article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
|
66
65
|
end
|
67
66
|
|
68
67
|
#
|
@@ -93,9 +92,11 @@ module RelatonBipm
|
|
93
92
|
# @return [Array<RelatonBib::TypedTitleString>] array of title strings
|
94
93
|
#
|
95
94
|
def parse_title
|
96
|
-
@
|
95
|
+
@meta.xpath("./title-group/article-title").map do |t|
|
96
|
+
next if t.text.empty?
|
97
|
+
|
97
98
|
RelatonBib::TypedTitleString.new content: t.text, language: t[:"xml:lang"], script: "Latn"
|
98
|
-
end
|
99
|
+
end.compact
|
99
100
|
end
|
100
101
|
|
101
102
|
#
|
@@ -104,7 +105,7 @@ module RelatonBipm
|
|
104
105
|
# @return [Array<RelatonBib::Contributor>] array of contributors
|
105
106
|
#
|
106
107
|
def parse_contributor
|
107
|
-
@
|
108
|
+
@meta.xpath("./contrib-group/contrib").map do |c|
|
108
109
|
entity = create_person(c) || create_organization(c)
|
109
110
|
RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
|
110
111
|
end
|
@@ -130,7 +131,7 @@ module RelatonBipm
|
|
130
131
|
#
|
131
132
|
def affiliation(contrib) # rubocop:disable Metrics/AbcSize
|
132
133
|
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
133
|
-
a = @
|
134
|
+
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
134
135
|
parts = a.text.split(", ")
|
135
136
|
orgname = parts[0..-3].join(", ")
|
136
137
|
city, country = parts[-2..]
|
@@ -195,7 +196,7 @@ module RelatonBipm
|
|
195
196
|
# @return [Array<String, Object>] string date or whatever block returns
|
196
197
|
#
|
197
198
|
def dates
|
198
|
-
@
|
199
|
+
@meta.xpath("./pub-date").map do |d|
|
199
200
|
month = date_part(d, "month")
|
200
201
|
day = date_part(d, "day")
|
201
202
|
date = "#{d.at('./year').text}-#{month}-#{day}"
|
@@ -216,7 +217,7 @@ module RelatonBipm
|
|
216
217
|
# @return [Array<RelatonBib::CopyrightAssociation>] array of copyright associations
|
217
218
|
#
|
218
219
|
def parse_copyright
|
219
|
-
@
|
220
|
+
@meta.xpath("./permissions").each_with_object([]) do |l, m|
|
220
221
|
from = l.at("./copyright-year")
|
221
222
|
next unless from
|
222
223
|
|
@@ -235,7 +236,7 @@ module RelatonBipm
|
|
235
236
|
# @return [Array<RelatonBib::FormattedString>] array of abstracts
|
236
237
|
#
|
237
238
|
def parse_abstract
|
238
|
-
@
|
239
|
+
@meta.xpath("./abstract").map do |a|
|
239
240
|
RelatonBib::FormattedString.new(
|
240
241
|
content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
|
241
242
|
)
|
@@ -286,9 +287,18 @@ module RelatonBipm
|
|
286
287
|
# @return [Array<RelatonBib::Extent>] array of extents
|
287
288
|
#
|
288
289
|
def parse_extent
|
289
|
-
|
290
|
-
|
290
|
+
@meta.xpath("./volume|./issue|./fpage").map do |e|
|
291
|
+
if e.name == "fpage"
|
292
|
+
type = "page"
|
293
|
+
to = @meta.at("./lpage")&.text
|
294
|
+
else
|
295
|
+
type = e.name
|
296
|
+
end
|
297
|
+
RelatonBib::Locality.new type, e.text, to
|
291
298
|
end
|
299
|
+
# %w[volume issue page].map.with_index do |t, i|
|
300
|
+
# RelatonBib::Locality.new t, volume_issue_page[i]
|
301
|
+
# end
|
292
302
|
end
|
293
303
|
|
294
304
|
def parse_type
|
@@ -12,7 +12,7 @@ module RelatonBipm
|
|
12
12
|
|
13
13
|
# @param data_fetcher [RelatonBipm::DataFetcher]
|
14
14
|
def initialize(data_fetcher)
|
15
|
-
@data_fetcher = data_fetcher
|
15
|
+
@data_fetcher = WeakRef.new data_fetcher
|
16
16
|
end
|
17
17
|
|
18
18
|
#
|
@@ -28,13 +28,16 @@ module RelatonBipm
|
|
28
28
|
#
|
29
29
|
# Fetch articles from rawdata-bipm-metrologia and save to files
|
30
30
|
#
|
31
|
-
def fetch_articles # rubocop:disable Metrics/AbcSize
|
31
|
+
def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
32
32
|
Dir["#{DIR}/**/*.xml"].each do |path|
|
33
33
|
doc = Nokogiri::XML File.read(path, encoding: "UTF-8")
|
34
34
|
item = ArticleParser.parse doc.at("/article")
|
35
35
|
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
36
36
|
out_path = File.join(@data_fetcher.output, file)
|
37
37
|
@data_fetcher.index[[item.docidentifier.first.id]] = out_path
|
38
|
+
@data_fetcher.index_new.add_or_update [item.docidentifier.first.id], out_path
|
39
|
+
key = Id.new(item.docidentifier.first.id).normalized_hash
|
40
|
+
@data_fetcher.index2.add_or_update key, out_path
|
38
41
|
@data_fetcher.write_file out_path, item
|
39
42
|
end
|
40
43
|
end
|
@@ -67,7 +70,7 @@ module RelatonBipm
|
|
67
70
|
# @overload set(volume)
|
68
71
|
# @param [String] volume volume number
|
69
72
|
#
|
70
|
-
def fetch_metrologia(*args)
|
73
|
+
def fetch_metrologia(*args) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
71
74
|
id = identifier(*args)
|
72
75
|
item = BipmBibliographicItem.new(
|
73
76
|
type: "article", formattedref: formattedref(id), docid: docidentifier(id),
|
@@ -77,6 +80,8 @@ module RelatonBipm
|
|
77
80
|
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
78
81
|
path = File.join(@data_fetcher.output, file)
|
79
82
|
@data_fetcher.index[[id]] = path
|
83
|
+
@data_fetcher.index_new.add_or_update [id], path
|
84
|
+
@data_fetcher.index2.add_or_update Id.new(id).normalized_hash, path
|
80
85
|
@data_fetcher.write_file path, item
|
81
86
|
end
|
82
87
|
|
data/lib/relaton_bipm/version.rb
CHANGED
data/lib/relaton_bipm.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require "zip"
|
2
2
|
require "fileutils"
|
3
|
+
require "parslet"
|
3
4
|
require "relaton_bib"
|
5
|
+
require "relaton/index"
|
6
|
+
require "relaton_bipm/id_parser"
|
4
7
|
require "relaton_bipm/version"
|
5
8
|
require "relaton_bipm/editorial_group"
|
6
9
|
require "relaton_bipm/committee"
|
@@ -13,7 +16,6 @@ require "relaton_bipm/bipm_bibliographic_item"
|
|
13
16
|
require "relaton_bipm/bipm_bibliography"
|
14
17
|
require "relaton_bipm/hash_converter"
|
15
18
|
require "relaton_bipm/xml_parser"
|
16
|
-
require "relaton_bipm/index"
|
17
19
|
require "relaton_bipm/data_fetcher"
|
18
20
|
require "relaton_bipm/data_outcomes_parser"
|
19
21
|
require "relaton_bipm/bipm_si_brochure_parser"
|
data/relaton_bipm.gemspec
CHANGED
@@ -32,17 +32,13 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
-
spec.add_development_dependency "byebug"
|
36
35
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
37
|
-
spec.add_development_dependency "pry-byebug"
|
38
|
-
spec.add_development_dependency "ruby-jing"
|
39
|
-
spec.add_development_dependency "simplecov"
|
40
|
-
spec.add_development_dependency "vcr"
|
41
|
-
spec.add_development_dependency "webmock"
|
42
36
|
|
43
37
|
spec.add_dependency "faraday", "~> 1.0"
|
44
38
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
39
|
+
spec.add_dependency "parslet", "~> 2.0.0"
|
45
40
|
spec.add_dependency "relaton-bib", "~> 1.14.0"
|
41
|
+
spec.add_dependency "relaton-index", "~> 0.1.0"
|
46
42
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
47
43
|
spec.add_dependency "serrano", "~> 1.0"
|
48
44
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: byebug
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: equivalent-xml
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,117 +25,75 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '0.6'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: ruby-jing
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: simplecov
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: vcr
|
28
|
+
name: faraday
|
85
29
|
requirement: !ruby/object:Gem::Requirement
|
86
30
|
requirements:
|
87
|
-
- - "
|
31
|
+
- - "~>"
|
88
32
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
|
-
- - "
|
38
|
+
- - "~>"
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
40
|
+
version: '1.0'
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: mechanize
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
|
-
- - "
|
45
|
+
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
104
|
-
type: :
|
47
|
+
version: 2.8.0
|
48
|
+
type: :runtime
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
|
-
- - "
|
52
|
+
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
54
|
+
version: 2.8.0
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
56
|
+
name: parslet
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
114
58
|
requirements:
|
115
59
|
- - "~>"
|
116
60
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
61
|
+
version: 2.0.0
|
118
62
|
type: :runtime
|
119
63
|
prerelease: false
|
120
64
|
version_requirements: !ruby/object:Gem::Requirement
|
121
65
|
requirements:
|
122
66
|
- - "~>"
|
123
67
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
68
|
+
version: 2.0.0
|
125
69
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
70
|
+
name: relaton-bib
|
127
71
|
requirement: !ruby/object:Gem::Requirement
|
128
72
|
requirements:
|
129
73
|
- - "~>"
|
130
74
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
75
|
+
version: 1.14.0
|
132
76
|
type: :runtime
|
133
77
|
prerelease: false
|
134
78
|
version_requirements: !ruby/object:Gem::Requirement
|
135
79
|
requirements:
|
136
80
|
- - "~>"
|
137
81
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
82
|
+
version: 1.14.0
|
139
83
|
- !ruby/object:Gem::Dependency
|
140
|
-
name: relaton-
|
84
|
+
name: relaton-index
|
141
85
|
requirement: !ruby/object:Gem::Requirement
|
142
86
|
requirements:
|
143
87
|
- - "~>"
|
144
88
|
- !ruby/object:Gem::Version
|
145
|
-
version: 1.
|
89
|
+
version: 0.1.0
|
146
90
|
type: :runtime
|
147
91
|
prerelease: false
|
148
92
|
version_requirements: !ruby/object:Gem::Requirement
|
149
93
|
requirements:
|
150
94
|
- - "~>"
|
151
95
|
- !ruby/object:Gem::Version
|
152
|
-
version: 1.
|
96
|
+
version: 0.1.0
|
153
97
|
- !ruby/object:Gem::Dependency
|
154
98
|
name: rubyzip
|
155
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -216,7 +160,7 @@ files:
|
|
216
160
|
- lib/relaton_bipm/document_relation.rb
|
217
161
|
- lib/relaton_bipm/editorial_group.rb
|
218
162
|
- lib/relaton_bipm/hash_converter.rb
|
219
|
-
- lib/relaton_bipm/
|
163
|
+
- lib/relaton_bipm/id_parser.rb
|
220
164
|
- lib/relaton_bipm/processor.rb
|
221
165
|
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
222
166
|
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
data/lib/relaton_bipm/index.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
module RelatonBipm
|
2
|
-
class Index
|
3
|
-
#
|
4
|
-
# Initialize index
|
5
|
-
#
|
6
|
-
def initialize
|
7
|
-
read_index_file || get_index_from_gh
|
8
|
-
end
|
9
|
-
|
10
|
-
#
|
11
|
-
# Search index entry
|
12
|
-
#
|
13
|
-
# @param [String] ref reference
|
14
|
-
#
|
15
|
-
# @return [String] path to document file
|
16
|
-
#
|
17
|
-
def search(ref)
|
18
|
-
@index.detect { |key, _| key.include? ref }&.last
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
#
|
24
|
-
# Create dir if need and return path to index file
|
25
|
-
#
|
26
|
-
# @return [String] path to index file
|
27
|
-
#
|
28
|
-
def path
|
29
|
-
@path ||= begin
|
30
|
-
dir = File.join Dir.home, ".relaton", "bipm"
|
31
|
-
FileUtils.mkdir_p dir
|
32
|
-
File.join dir, "index.yaml"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
#
|
37
|
-
# Read index from file if it exists and not outdated
|
38
|
-
#
|
39
|
-
# @return [Hash, nil] index content
|
40
|
-
#
|
41
|
-
def read_index_file
|
42
|
-
return if !File.exist?(path) || File.ctime(path).to_date < Date.today
|
43
|
-
|
44
|
-
@index = RelatonBib.parse_yaml File.read(path, encoding: "UTF-8")
|
45
|
-
end
|
46
|
-
|
47
|
-
#
|
48
|
-
# Save index to file
|
49
|
-
#
|
50
|
-
# @return [<Type>] <description>
|
51
|
-
#
|
52
|
-
def save_index_file
|
53
|
-
File.write path, @index.to_yaml, encoding: "UTF-8"
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
# Get index from a GitHub repository
|
58
|
-
#
|
59
|
-
# @return [Hash] index content
|
60
|
-
#
|
61
|
-
def get_index_from_gh # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
62
|
-
resp = Zip::InputStream.new URI("#{BipmBibliography::GH_ENDPOINT}index.zip").open
|
63
|
-
zip = resp.get_next_entry
|
64
|
-
@index = RelatonBib.parse_yaml zip.get_input_stream.read
|
65
|
-
save_index_file
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|