relaton-bipm 2.1.2 → 2.2.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <grammar xmlns="http://relaxng.org/ns/structure/1.0">
3
- <include href="basicdoc.rng"/>
4
- <include href="relaton-bipm.rng"/>
5
- <start>
6
- <choice>
7
- <ref name="bibitem"/>
8
- <ref name="bibdata"/>
9
- </choice>
10
- </start>
11
- </grammar>
@@ -1,89 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <grammar xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" xmlns="http://relaxng.org/ns/structure/1.0">
3
- <include href="biblio-standoc.rng">
4
- <define name="DocumentType">
5
- <choice>
6
- <value>brochure</value>
7
- <value>mise-en-pratique</value>
8
- <value>rapport</value>
9
- <value>monographie</value>
10
- <value>guide</value>
11
- <value>meeting-report</value>
12
- <value>technical-report</value>
13
- <value>working-party-note</value>
14
- <value>strategy</value>
15
- <value>cipm-mra</value>
16
- <value>resolution</value>
17
- <value>policy</value>
18
- </choice>
19
- </define>
20
- <define name="structuredidentifier">
21
- <element name="structuredidentifier">
22
- <element name="docnumber">
23
- <text/>
24
- </element>
25
- <optional>
26
- <element name="part">
27
- <text/>
28
- </element>
29
- </optional>
30
- <optional>
31
- <element name="appendix">
32
- <text/>
33
- </element>
34
- </optional>
35
- </element>
36
- </define>
37
- </include>
38
- <define name="BibDataExtensionType" combine="interleave">
39
- <optional>
40
- <ref name="comment-period">
41
- <a:documentation>Period during which comments are allowed on the document draft</a:documentation>
42
- </ref>
43
- </optional>
44
- <optional>
45
- <ref name="si-aspect">
46
- <a:documentation>The domain of SI covered by the document; used to select logo in PDF cover page. The logo nominates one primary SI base units, and defining consonants for that unit and possibly others as well</a:documentation>
47
- </ref>
48
- </optional>
49
- <optional>
50
- <ref name="meeting-note">
51
- <a:documentation>Note on when and where a guide was adopted</a:documentation>
52
- </ref>
53
- </optional>
54
- </define>
55
- <define name="comment-period">
56
- <element name="comment-period">
57
- <element name="from">
58
- <ref name="ISO8601Date"/>
59
- </element>
60
- <element name="to">
61
- <ref name="ISO8601Date"/>
62
- </element>
63
- </element>
64
- </define>
65
- <define name="si-aspect">
66
- <element name="si-aspect">
67
- <choice>
68
- <value>A_e_deltanu</value>
69
- <value>A_e</value>
70
- <value>cd_Kcd_h_deltanu</value>
71
- <value>cd_Kcd</value>
72
- <value>full</value>
73
- <value>K_k_deltanu</value>
74
- <value>K_k</value>
75
- <value>kg_h_c_deltanu</value>
76
- <value>kg_h</value>
77
- <value>m_c_deltanu</value>
78
- <value>m_c</value>
79
- <value>mol_NA</value>
80
- <value>s_deltanu</value>
81
- </choice>
82
- </element>
83
- </define>
84
- <define name="meeting-note">
85
- <element name="meeting-note">
86
- <text/>
87
- </element>
88
- </define>
89
- </grammar>
@@ -1,456 +0,0 @@
1
- module Relaton::Bipm
2
- module RawdataBipmMetrologia
3
- class ArticleParser
4
- ATTRS = %i[docidentifier title contributor date copyright abstract relation series
5
- extent type source ext].freeze
6
- #
7
- # Create new parser and parse document
8
- #
9
- # @param [String] path path to XML file
10
- #
11
- # @return [Relaton::Bipm::ItemDate] document
12
- #
13
- def self.parse(path, errors = {})
14
- doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
15
- journal, volume, article = path.split("/")[-2].split("_")[1..]
16
- new(doc, journal, volume, article, errors).parse
17
- end
18
-
19
- #
20
- # Initialize parser
21
- #
22
- # @param [Nokogiri::XML::Document] doc XML document
23
- # @param [String] journal journal
24
- # @param [String] volume volume
25
- # @param [String] article article
26
- # @param [Hash] errors errors hash
27
- #
28
- def initialize(doc, journal, volume, article, errors = {})
29
- @doc = doc.at "/article"
30
- @journal = journal
31
- @volume = volume
32
- @article = article
33
- @meta = doc.at("/article/front/article-meta")
34
- @errors = errors
35
- end
36
-
37
- #
38
- # Create new document
39
- #
40
- # @return [Relaton::Bipm::ItemData] document
41
- #
42
- def parse
43
- attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
44
- ItemData.new(**attrs)
45
- end
46
-
47
- #
48
- # Parse docid
49
- #
50
- # @return [Array<Relaton::Bib::DocumentIdentifier>] array of document identifiers
51
- #
52
- def parse_docidentifier
53
- primary_id = create_docidentifier pubid, "BIPM", true
54
- result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([primary_id]) do |id, m|
55
- m << create_docidentifier(id.text, id["pub-id-type"])
56
- end
57
- @errors[:article_docidentifier] &&= result.empty?
58
- result
59
- end
60
-
61
- #
62
- # Build primary publication identifier string (e.g. "Metrologia 55 1 125")
63
- #
64
- # @return [String] pubid
65
- #
66
- def pubid
67
- @pubid ||= "#{journal_title} #{volume_issue_article}"
68
- end
69
-
70
- #
71
- # Parse volume, issue and page
72
- #
73
- # @return [String] volume issue page
74
- #
75
- def volume_issue_article
76
- [@journal, @volume, @article].compact.join(" ")
77
- end
78
-
79
- # def article
80
- # @meta.at("./article-id[@pub-id-type='manuscript']").text.match(/[^_]+$/).to_s
81
- # end
82
-
83
- #
84
- # Parse journal title
85
- #
86
- # @return [String] journal title
87
- #
88
- def journal_title
89
- return @journal_title if defined? @journal_title
90
-
91
- @journal_title = @doc.at("./front/journal-meta/journal-title-group/journal-title")&.text
92
- @errors[:journal_title] &&= @journal_title.nil? || @journal_title.empty?
93
- @journal_title
94
- end
95
-
96
- #
97
- # Create document identifier
98
- #
99
- # @param [String] id document id
100
- # @param [String] type id type
101
- # @param [Boolean, nil] primary is primary id
102
- #
103
- # @return [Relaton::Bib::Docidentifier] document identifier
104
- #
105
- def create_docidentifier(id, type, primary = nil)
106
- Relaton::Bib::Docidentifier.new content: id, type: type, primary: primary
107
- end
108
-
109
- #
110
- # Parse title
111
- #
112
- # @return [Array<Relaton::Bib::TypedTitleString>] array of title strings
113
- #
114
- def parse_title
115
- result = @meta.xpath("./title-group/article-title").map do |t|
116
- next if t.text.empty?
117
-
118
- Relaton::Bib::Title.new(content: t.inner_html, language: t[:"xml:lang"], script: "Latn")
119
- end.compact
120
- @errors[:article_title] &&= result.empty?
121
- result
122
- end
123
-
124
- #
125
- # Parse contributor
126
- #
127
- # @return [Array<Relaton::Bib::Contributor>] array of contributors
128
- #
129
- def parse_contributor
130
- result = @meta.xpath("./contrib-group/contrib").map do |c|
131
- role = Relaton::Bib::Contributor::Role.new(type: c[:"contrib-type"])
132
- attrs = { person: create_person(c), organization: create_organization(c), role: [role] }
133
- Relaton::Bib::Contributor.new(**attrs)
134
- end
135
- @errors[:article_contributor] &&= result.empty?
136
- result
137
- end
138
-
139
- def create_person(contrib)
140
- name = contrib.at("./name")
141
- @errors[:article_contributor_person] &&= name.nil? || name.text.empty?
142
- return if name.nil? || name.text.empty?
143
-
144
- Relaton::Bib::Person.new name: fullname(name), affiliation: affiliation(contrib)
145
- end
146
-
147
- def create_organization(contrib)
148
- org = contrib.at("./collab")
149
- @errors[:article_contributor_organization] &&= org.nil? || org.text.empty?
150
- return if org.nil? || org.text.empty?
151
-
152
- name = Relaton::Bib::TypedLocalizedString.new(content: org.text)
153
- Relaton::Bib::Organization.new name: [name]
154
- end
155
-
156
- #
157
- # Parse affiliations
158
- #
159
- # @param [Nokogiri::XML::Element] contrib contributor element
160
- #
161
- # @return [Array<Relaton::Bib::Affiliation>] array of affiliations
162
- #
163
- def affiliation(contrib)
164
- aff = contrib.xpath("./xref[@ref-type='aff']").map do |x|
165
- a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()")
166
- parse_affiliation a
167
- end.compact
168
- @errors[:article_affiliation] &&= aff.empty?
169
- aff
170
- end
171
-
172
- def parse_affiliation(aff)
173
- text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ")
174
- text = CGI::unescapeHTML(text)
175
- return if text.include?("Permanent address:") || text == "Germany" ||
176
- text.start_with?("Guest") || text.start_with?("Deceased") ||
177
- text.include?("Author to whom any correspondence should be addressed")
178
-
179
- args = {}
180
- institution = aff.at('institution')
181
- if institution
182
- name = institution.text
183
- return if name == "1005 Southover Lane"
184
-
185
- args[:subdivision] = parse_division(aff)
186
- args[:address] = parse_address(aff)
187
- else
188
- name = text
189
- end
190
- args[:name] = [Relaton::Bib::TypedLocalizedString.new(content: name)]
191
- org = Relaton::Bib::Organization.new(**args)
192
- Relaton::Bib::Affiliation.new(organization: org)
193
- end
194
-
195
- def parse_division(aff)
196
- div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
197
- @errors[:article_affiliation_division] &&= div.empty?
198
- return [] if div.empty?
199
-
200
- name = Relaton::Bib::TypedLocalizedString.new(content: div, language: "en", script: "Latn")
201
- [Relaton::Bib::Subdivision.new(name: [name])]
202
- end
203
-
204
- def parse_address(aff)
205
- address = []
206
- addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
207
- address << addr unless addr.empty?
208
- country = aff.at('country')
209
- address << country.text if country && !country.text.empty?
210
- address = address.join(", ")
211
- @errors[:article_affiliation_address] &&= address.empty?
212
- return [] if address.empty?
213
-
214
- [Relaton::Bib::Address.new(formatted_address: address)]
215
- end
216
-
217
- #
218
- # Create full name
219
- #
220
- # @param [Nokogiri::XML::Element] contrib contributor element
221
- #
222
- # @return [Relaton::Bib::FullName] full name
223
- #
224
- def fullname(name)
225
- cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ")
226
- @errors[:article_fullname] &&= cname.empty?
227
- return if cname.empty?
228
-
229
- completename = Relaton::Bib::LocalizedString.new content: cname, language: "en", script: "Latn"
230
- Relaton::Bib::FullName.new completename: completename
231
- end
232
-
233
- #
234
- # Parse forename
235
- #
236
- # @param [String] given_name given name
237
- #
238
- # @return [Array<Relaton::Bib::Forename>] array of forenames
239
- #
240
- # def forename(given_name) # rubocop:disable Metrics/MethodLength
241
- # return [] unless given_name
242
-
243
- # given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
244
- # if nm.size == 1
245
- # name = nil
246
- # init = nm
247
- # else
248
- # name = nm
249
- # init = int
250
- # end
251
- # Relaton::Bib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
252
- # end
253
- # end
254
-
255
- #
256
- # Parse date
257
- #
258
- # @return [Array<Relaton::Bib::Date>] array of dates
259
- #
260
- def parse_date
261
- at = dates.min
262
- @errors[:article_date] &&= at.nil?
263
- return [] unless at
264
-
265
- [Relaton::Bib::Date.new(type: "published", at: at)]
266
- end
267
-
268
- #
269
- # Parse date
270
- #
271
- # @yield [date, type] date and type
272
- #
273
- # @return [Array<String, Object>] string date or whatever block returns
274
- #
275
- def dates
276
- @meta.xpath("./pub-date").map do |d|
277
- month = date_part(d, "month")
278
- day = date_part(d, "day")
279
- date = "#{d.at('./year').text}-#{month}-#{day}"
280
- block_given? ? yield(date, d[:"pub-type"]) : date
281
- end
282
- end
283
-
284
- def date_part(date, type)
285
- part = date.at("./#{type}")&.text
286
- return "01" if part.nil? || part.empty?
287
-
288
- part.rjust(2, "0")
289
- end
290
-
291
- #
292
- # Parse copyright
293
- #
294
- # @return [Array<Relaton::Bib::Copyright>] array of copyright associations
295
- #
296
- def parse_copyright
297
- result = @meta.xpath("./permissions").each_with_object([]) do |l, m|
298
- from = l.at("./copyright-year")
299
- next unless from
300
-
301
- owner = l.at("./copyright-statement").text.split(" & ").map do |c|
302
- /(?<name>\p{L}+(?:\s\p{L}+)*)/ =~ c
303
- org_name = Relaton::Bib::TypedLocalizedString.new(content: name, language: "en", script: "Latn")
304
- org = Relaton::Bib::Organization.new name: [org_name]
305
- Relaton::Bib::ContributionInfo.new(organization: org)
306
- end
307
- m << Relaton::Bib::Copyright.new(owner: owner, from: from.text)
308
- end
309
- @errors[:article_copyright] &&= result.empty?
310
- result
311
- end
312
-
313
- #
314
- # Parse abstract
315
- #
316
- # @return [Array<Relaton::Bib::LocalizedMarkedUpString>] array of abstracts
317
- #
318
- def parse_abstract
319
- result = @meta.xpath("./abstract").map do |a|
320
- Relaton::Bib::Abstract.new(
321
- content: a.inner_html, language: a[:"xml:lang"], script: "Latn",
322
- )
323
- end
324
- @errors[:article_abstract] &&= result.empty?
325
- result
326
- end
327
-
328
- #
329
- # Parese relation
330
- #
331
- # @return [Array<Relaton::Bib::Relation>] array of document relations
332
- #
333
- def parse_relation
334
- rels = dates do |d, t|
335
- Relaton::Bib::Relation.new(type: "hasManifestation", bibitem: bibitem(d, t))
336
- end
337
- @errors[:article_relation] &&= rels.empty?
338
- rels + parse_references
339
- end
340
-
341
- #
342
- # Parse back/ref-list references as "cites" relations
343
- #
344
- # @return [Array<Relaton::Bib::Relation>] array of "cites" relations
345
- #
346
- def parse_references
347
- refs = @doc.xpath("./back/ref-list/ref").filter_map do |ref|
348
- citation = ref.at("./element-citation")
349
- next unless citation
350
-
351
- Relaton::Bib::Relation.new(type: "cites", bibitem: citation_bibitem(citation))
352
- end
353
- @errors[:article_references] &&= refs.empty?
354
- refs
355
- end
356
-
357
- #
358
- # Build bibitem from an element-citation
359
- #
360
- # @param [Nokogiri::XML::Element] citation element-citation node
361
- #
362
- # @return [Relaton::Bipm::ItemData] bibitem
363
- #
364
- def citation_bibitem(citation)
365
- attrs = {}
366
- doi = citation.at("./pub-id[@pub-id-type='doi']")&.text
367
- if doi && !doi.empty?
368
- @errors[:article_citation_doi] &&= false
369
- attrs[:docidentifier] = [Relaton::Bib::Docidentifier.new(content: doi, type: "doi")]
370
- attrs[:source] = [Relaton::Bib::Uri.new(content: "https://doi.org/#{doi}", type: "doi")]
371
- else
372
- @errors[:article_citation_doi] &&= true
373
- end
374
- source = citation.at("./source")&.text
375
- if source && !source.empty?
376
- @errors[:article_citation_title] &&= false
377
- attrs[:title] = [Relaton::Bib::Title.new(content: source)]
378
- else
379
- @errors[:article_citation_title] &&= true
380
- end
381
- year = citation.at("./year")&.text
382
- if year && !year.empty?
383
- @errors[:article_citation_date] &&= false
384
- attrs[:date] = [Relaton::Bib::Date.new(type: "published", at: year)]
385
- else
386
- @errors[:article_citation_date] &&= true
387
- end
388
- ItemData.new(**attrs)
389
- end
390
-
391
- #
392
- # Create bibitem
393
- #
394
- # @param [String] date
395
- # @param [String] type date type
396
- #
397
- # @return [Relaton::Bipm::BipmBibliographicItem] bibitem
398
- #
399
- def bibitem(date, type)
400
- dt = Relaton::Bib::Date.new(type: type, at: date)
401
- carrier = type == "epub" ? "online" : "print"
402
- medium = Relaton::Bib::Medium.new carrier: carrier
403
- fref = Relaton::Bib::Formattedref.new(content: pubid)
404
- docid = [create_docidentifier(pubid, "BIPM", true)]
405
- ItemData.new(formattedref: fref, docidentifier: docid, date: [dt], medium: medium)
406
- end
407
-
408
- #
409
- # Parse series
410
- #
411
- # @return [Array<Relaton::Bib::Series>] array of series
412
- #
413
- def parse_series
414
- title = Relaton::Bib::Title.new(content: journal_title, language: "en", script: "Latn")
415
- [Relaton::Bib::Series.new(title: [title])]
416
- end
417
-
418
- #
419
- # Parse extent
420
- #
421
- # @return [Array<Relaton::Bib::Extent>] array of extents
422
- #
423
- def parse_extent
424
- locs = @meta.xpath("./volume|./issue|./fpage").map do |e|
425
- if e.name == "fpage"
426
- type = "page"
427
- to = @meta.at("./lpage")&.text
428
- else
429
- type = e.name
430
- end
431
- Relaton::Bib::Locality.new type: type, reference_from: e.text, reference_to: to
432
- end
433
- @errors[:article_extent] &&= locs.empty?
434
- return [] if locs.empty?
435
-
436
- [Relaton::Bib::Extent.new(locality: locs)]
437
- end
438
-
439
- def parse_type = "article"
440
-
441
- def parse_source
442
- result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
443
- url = "https://doi.org/#{l.text}"
444
- a << Relaton::Bib::Uri.new(content: url, type: "src")
445
- a << Relaton::Bib::Uri.new(content: url, type: "doi")
446
- end
447
- @errors[:article_source] &&= result.empty?
448
- result
449
- end
450
-
451
- def parse_ext = Ext.new(doctype: parse_doctype)
452
-
453
- def parse_doctype = Doctype.new(content: "article")
454
- end
455
- end
456
- end