relaton-bipm 2.1.1 → 2.2.0.pre.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -0
- data/Rakefile +1 -1
- data/lib/relaton/bipm/bibliography.rb +1 -1
- data/lib/relaton/bipm/id_parser.rb +23 -0
- data/lib/relaton/bipm/model/ext.rb +1 -0
- data/lib/relaton/bipm/rawdata_bipm_metrologia/fetcher.rb +2 -2
- data/lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb +353 -0
- data/lib/relaton/bipm/si_brochure_parser.rb +18 -6
- data/lib/relaton/bipm/version.rb +1 -1
- data/relaton-bipm.gemspec +5 -4
- metadata +24 -16
- data/.rubocop.yml +0 -12
- data/grammars/basicdoc.rng +0 -2140
- data/grammars/biblio-standoc.rng +0 -268
- data/grammars/biblio.rng +0 -2125
- data/grammars/relaton-bipm-compile.rng +0 -11
- data/grammars/relaton-bipm.rng +0 -89
- data/lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb +0 -456
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb54f817a493636590193a9dceaa9758c0e0b42aefde145355146d2b46f96bd8
|
|
4
|
+
data.tar.gz: 7940e3c593a1170d942b646aa2aa17de05548c4963547a56910c3a4e3feb6112
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fd892d60b84d85c241bd44c1a886adc0b681b7cbb8f413c2f4d930ad9d70aadd5415641bf4947a01abfc7a3734d4c070775b874f55dc8f8f698c3c8e89c5df61
|
|
7
|
+
data.tar.gz: 5ec15de3ffd3c9168d51a15c51bda5d650a822b0da9fb1a18fb8ea62af670b8df74c6a2d80ad10d78c45fbb8b12150a0a050282c5e3e0bdce3fc61cfa6180672
|
data/Gemfile
CHANGED
|
@@ -3,6 +3,14 @@ source "https://rubygems.org"
|
|
|
3
3
|
# Specify your gem's dependencies in relaton_bipm.gemspec
|
|
4
4
|
gemspec
|
|
5
5
|
|
|
6
|
+
# Use local monorepo sibling gems where available.
|
|
7
|
+
Dir["../*/"].each do |dir|
|
|
8
|
+
name = File.basename(dir)
|
|
9
|
+
next if name == File.basename(__dir__)
|
|
10
|
+
next unless File.exist?(File.join(dir, "#{name}.gemspec"))
|
|
11
|
+
gem name, path: dir
|
|
12
|
+
end
|
|
13
|
+
|
|
6
14
|
gem "byebug"
|
|
7
15
|
gem "equivalent-xml", "~> 0.6"
|
|
8
16
|
gem "pry-byebug"
|
data/Rakefile
CHANGED
|
@@ -11,7 +11,7 @@ namespace :spec do
|
|
|
11
11
|
require "net/http"
|
|
12
12
|
require "uri"
|
|
13
13
|
|
|
14
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/
|
|
14
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/v2/index-v1.zip"
|
|
15
15
|
dest = File.join(__dir__, "spec", "fixtures", "index-v1.zip")
|
|
16
16
|
|
|
17
17
|
puts "Downloading \#{url} ..."
|
|
@@ -3,7 +3,7 @@ require_relative "id_parser"
|
|
|
3
3
|
|
|
4
4
|
module Relaton::Bipm
|
|
5
5
|
class Bibliography
|
|
6
|
-
GH_ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/refs/heads/
|
|
6
|
+
GH_ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-bipm/refs/heads/v2/".freeze
|
|
7
7
|
|
|
8
8
|
class << self
|
|
9
9
|
# @param text [String]
|
|
@@ -132,9 +132,30 @@ module Relaton
|
|
|
132
132
|
end
|
|
133
133
|
|
|
134
134
|
def parse_si_brochure(id)
|
|
135
|
+
parse_si_brochure_en(id) || parse_si_brochure_fr(id)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# English form. Accepts the bare/sectioned forms ("SI Brochure",
|
|
139
|
+
# "SI Brochure, Part 1", "SI Brochure Concise" …) and the edition-tagged
|
|
140
|
+
# docnumber emitted by SI Brochure 9e v3.01:
|
|
141
|
+
# "SI Brochure 9e v3.01 (2019/2024, E)"
|
|
142
|
+
# Edition/version/year/lang are matched but not captured so the index
|
|
143
|
+
# key collapses to {group, type} as the prior collection-render flow.
|
|
144
|
+
def parse_si_brochure_en(id)
|
|
135
145
|
%r{^
|
|
136
146
|
(?<group>SI)\s(?<type>Brochure)
|
|
137
147
|
(?:,?\s(?:(?:Part|Partie)\s(?<part>\d+)|(?:Appendix|Annexe)\s(?<append>\d+)|(?<number>Concise|FAQ)))?
|
|
148
|
+
(?:\s\d+e\sv[\d.]+\s\([\d/]+(?:,\s*[A-Z])?\))?
|
|
149
|
+
$}x.match(id)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# French form. The SI Brochure 9e v3.01 French bibdata emits its
|
|
153
|
+
# docnumber as "Brochure sur le SI 9e v3.01 (2019/2024, F)". Same
|
|
154
|
+
# document as the English form — collapse to the same {group, type}.
|
|
155
|
+
def parse_si_brochure_fr(id)
|
|
156
|
+
%r{^
|
|
157
|
+
(?<type>Brochure)\ssur\sle\s(?<group>SI)
|
|
158
|
+
(?:\s\d+e\sv[\d.]+\s\([\d/]+(?:,\s*[A-Z])?\))?
|
|
138
159
|
$}x.match(id)
|
|
139
160
|
end
|
|
140
161
|
|
|
@@ -195,6 +216,8 @@ module Relaton
|
|
|
195
216
|
other_hash.delete(:year) unless hash[:year]
|
|
196
217
|
hash.delete(:lang) unless other_hash[:lang]
|
|
197
218
|
other_hash.delete(:lang) unless hash[:lang]
|
|
219
|
+
hash.delete(:part) unless other_hash[:part]
|
|
220
|
+
hash.delete(:append) unless other_hash[:append]
|
|
198
221
|
hash == other_hash
|
|
199
222
|
end
|
|
200
223
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "../id_parser"
|
|
4
|
-
require_relative "
|
|
4
|
+
require_relative "niso_jats_parser"
|
|
5
5
|
|
|
6
6
|
module Relaton::Bipm
|
|
7
7
|
module RawdataBipmMetrologia
|
|
@@ -34,7 +34,7 @@ module Relaton::Bipm
|
|
|
34
34
|
def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
35
35
|
# aff = Affiliations.parse DIR
|
|
36
36
|
Dir["#{DIR}/**/*.xml"].sort_by { |p| archive_date(p) }.each do |path|
|
|
37
|
-
item =
|
|
37
|
+
item = NisoJatsParser.parse path, @data_fetcher.errors
|
|
38
38
|
file = "#{item.docidentifier.first.content.downcase.tr(' ', '-')}.#{@data_fetcher.ext}"
|
|
39
39
|
out_path = File.join(@data_fetcher.output, file)
|
|
40
40
|
key = Relaton::Bipm::Id.new.parse(item.docidentifier.first.content).to_hash
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
require "niso-jats"
|
|
2
|
+
|
|
3
|
+
module Relaton::Bipm
|
|
4
|
+
module RawdataBipmMetrologia
|
|
5
|
+
class NisoJatsParser
|
|
6
|
+
ATTRS = %i[docidentifier title contributor date copyright abstract relation series
|
|
7
|
+
extent type source ext].freeze
|
|
8
|
+
|
|
9
|
+
# JATS inline phrasing children handled by #serialize_mixed_content
|
|
10
|
+
INLINE_TYPES = %i[italic bold fixed_case monospace overline roman
|
|
11
|
+
sans_serif sc strike underline sub sup].freeze
|
|
12
|
+
private_constant :INLINE_TYPES
|
|
13
|
+
|
|
14
|
+
# @param [Niso::Jats::Article] doc document
|
|
15
|
+
# @param [String] journal journal
|
|
16
|
+
# @param [String] volume volume
|
|
17
|
+
# @param [String] article article
|
|
18
|
+
# @param [Hash] errors errors hash
|
|
19
|
+
def initialize(doc, journal, volume, article, errors = {})
|
|
20
|
+
@doc = doc
|
|
21
|
+
@journal = journal
|
|
22
|
+
@volume = volume
|
|
23
|
+
@article = article
|
|
24
|
+
@errors = errors
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @param [String] path path to XML file
|
|
28
|
+
# @param [Hash] errors errors hash
|
|
29
|
+
#
|
|
30
|
+
# @return [Relaton::Bipm::ItemData] document
|
|
31
|
+
def self.parse(path, errors = {})
|
|
32
|
+
doc = Niso::Jats::Article.from_xml(File.read(path, encoding: "UTF-8"))
|
|
33
|
+
journal, volume, article = path.split("/")[-2].split("_")[1..]
|
|
34
|
+
new(doc, journal, volume, article, errors).parse
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @return [Relaton::Bipm::ItemData] document
|
|
38
|
+
def parse
|
|
39
|
+
attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
|
40
|
+
ItemData.new(**attrs)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @return [Array<Relaton::Bib::Docidentifier>] array of document identifiers
|
|
44
|
+
def parse_docidentifier
|
|
45
|
+
ids = [create_docidentifier(pubid, "BIPM", true)]
|
|
46
|
+
ids << create_docidentifier(@doc.doi, "doi") if @doc.doi
|
|
47
|
+
@errors[:article_docidentifier] &&= ids.empty?
|
|
48
|
+
ids
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [String] primary BIPM publication identifier
|
|
52
|
+
def pubid
|
|
53
|
+
@pubid ||= "#{@doc.journal_title} #{volume_issue_article}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @return [String] volume issue page
|
|
57
|
+
def volume_issue_article
|
|
58
|
+
[@journal, @volume, @article].compact.join(" ")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @return [Array<Relaton::Bib::Title>] array of title strings
|
|
62
|
+
def parse_title
|
|
63
|
+
title = @doc.front.article_meta.title_group.article_title
|
|
64
|
+
result = [Relaton::Bib::Title.new(
|
|
65
|
+
content: serialize_mixed_content(title), language: title.lang, script: "Latn",
|
|
66
|
+
)]
|
|
67
|
+
@errors[:article_title] &&= result.empty?
|
|
68
|
+
result
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @return [Array<Relaton::Bib::Contributor>] array of contributors
|
|
72
|
+
def parse_contributor
|
|
73
|
+
result = @doc.contributors.map do |contrib|
|
|
74
|
+
role = Relaton::Bib::Contributor::Role.new(type: contrib.contrib_type)
|
|
75
|
+
attrs = { person: create_person(contrib), organization: create_organization(contrib), role: [role] }
|
|
76
|
+
Relaton::Bib::Contributor.new(**attrs)
|
|
77
|
+
end
|
|
78
|
+
@errors[:article_contributor] &&= result.empty?
|
|
79
|
+
result
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @return [Array<Relaton::Bib::Date>] array of dates
|
|
83
|
+
def parse_date
|
|
84
|
+
on = @doc.pub_dates.min
|
|
85
|
+
@errors[:article_date] &&= on.nil?
|
|
86
|
+
return [] unless on
|
|
87
|
+
|
|
88
|
+
[Relaton::Bib::Date.new(type: "published", at: on)]
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [Array<Relaton::Bib::Copyright>] array of copyright associations
|
|
92
|
+
def parse_copyright
|
|
93
|
+
permissions = @doc.front.article_meta.permissions
|
|
94
|
+
return [] unless permissions
|
|
95
|
+
|
|
96
|
+
from = permissions.copyright_year.first
|
|
97
|
+
return [] unless from
|
|
98
|
+
|
|
99
|
+
owner = permissions.copyright_statement.inject([]) do |acc, cs|
|
|
100
|
+
acc + Array(cs.content).join.split(" & ").map do |c|
|
|
101
|
+
/(?<name>[A-Za-z]+(?:\s[A-Za-z]+)*)/ =~ c
|
|
102
|
+
org_name = Relaton::Bib::TypedLocalizedString.new(content: name, language: "en", script: "Latn")
|
|
103
|
+
org = Relaton::Bib::Organization.new name: [org_name]
|
|
104
|
+
Relaton::Bib::ContributionInfo.new(organization: org)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
result = [Relaton::Bib::Copyright.new(owner: owner, from: from.content)]
|
|
108
|
+
@errors[:article_copyright] &&= result.empty?
|
|
109
|
+
result
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# @return [Array<Relaton::Bib::Abstract>] array of abstracts
|
|
113
|
+
def parse_abstract
|
|
114
|
+
abstracts = @doc.front.article_meta.abstract
|
|
115
|
+
return [] unless abstracts
|
|
116
|
+
|
|
117
|
+
result = abstracts.filter_map do |a|
|
|
118
|
+
content_parts = []
|
|
119
|
+
content_parts << Array(a.title.content).join if a.title
|
|
120
|
+
a.p&.each do |paragraph|
|
|
121
|
+
content_parts << "<p>#{extract_paragraph_text(paragraph)}</p>"
|
|
122
|
+
end
|
|
123
|
+
next if content_parts.empty?
|
|
124
|
+
|
|
125
|
+
Relaton::Bib::Abstract.new(
|
|
126
|
+
content: content_parts.join, language: a.lang, script: "Latn",
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
@errors[:article_abstract] &&= result.empty?
|
|
130
|
+
result
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def extract_paragraph_text(paragraph)
|
|
134
|
+
serialize_mixed_content(paragraph)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Reconstruct the marked-up string of a niso-jats mixed_content element
|
|
138
|
+
# (Title, Paragraph, …) by walking element_order in document order.
|
|
139
|
+
# Text nodes are emitted verbatim; recognised inline children are
|
|
140
|
+
# wrapped in their original XML tag so JATS markup like <italic> and
|
|
141
|
+
# <sub> survives into the relaton-bib payload instead of being
|
|
142
|
+
# flattened (paragraphs) or serialised as a stringified Array (titles).
|
|
143
|
+
def serialize_mixed_content(element)
|
|
144
|
+
return "" unless element.respond_to?(:element_order) && element.element_order
|
|
145
|
+
|
|
146
|
+
pools = INLINE_TYPES.to_h { |t| [t, element.send(t).to_a.dup] }
|
|
147
|
+
cursor = Hash.new(0)
|
|
148
|
+
out = []
|
|
149
|
+
element.element_order.each do |el|
|
|
150
|
+
case el.type
|
|
151
|
+
when "Text"
|
|
152
|
+
out << el.text_content
|
|
153
|
+
when "Element"
|
|
154
|
+
attr = el.name.tr("-", "_").to_sym
|
|
155
|
+
next unless pools.key?(attr)
|
|
156
|
+
|
|
157
|
+
inst = pools[attr][cursor[attr]]
|
|
158
|
+
cursor[attr] += 1
|
|
159
|
+
next unless inst.respond_to?(:content)
|
|
160
|
+
|
|
161
|
+
inner = inst.content
|
|
162
|
+
inner = inner.join if inner.is_a?(Array)
|
|
163
|
+
out << "<#{el.name}>#{inner}</#{el.name}>"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
out.join
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @return [Array<Relaton::Bib::Relation>] array of document relations
|
|
170
|
+
def parse_relation
|
|
171
|
+
pub_dates = @doc.front.article_meta.pub_date
|
|
172
|
+
rels = if pub_dates
|
|
173
|
+
pub_dates.sort_by { |pd| pd.pub_type == "ppub" ? 0 : 1 }.map do |pd|
|
|
174
|
+
type = pd.pub_type == "epub" ? "epub" : "ppub"
|
|
175
|
+
Relaton::Bib::Relation.new(type: "hasManifestation", bibitem: bibitem(pd, type))
|
|
176
|
+
end
|
|
177
|
+
else
|
|
178
|
+
[]
|
|
179
|
+
end
|
|
180
|
+
@errors[:article_relation] &&= rels.empty?
|
|
181
|
+
rels
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# @return [Array<Relaton::Bib::Series>] array of series
|
|
185
|
+
def parse_series
|
|
186
|
+
title = Relaton::Bib::Title.new(content: @doc.journal_title, language: "en", script: "Latn")
|
|
187
|
+
[Relaton::Bib::Series.new(title: [title])]
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# @return [Array<Relaton::Bib::Extent>] array of extents
|
|
191
|
+
def parse_extent
|
|
192
|
+
locality = @doc.locality.map { |e| Relaton::Bib::Locality.new(type: e[0], reference_from: e[1], reference_to: e[2]) }
|
|
193
|
+
@errors[:article_extent] &&= locality.empty?
|
|
194
|
+
return [] if locality.empty?
|
|
195
|
+
|
|
196
|
+
[Relaton::Bib::Extent.new(locality: locality)]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def parse_type = "article"
|
|
200
|
+
|
|
201
|
+
def parse_source
|
|
202
|
+
result = @doc.doi_links.map { |link| Relaton::Bib::Uri.new(**link) }
|
|
203
|
+
@errors[:article_source] &&= result.empty?
|
|
204
|
+
result
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def parse_ext = Ext.new(doctype: parse_doctype)
|
|
208
|
+
|
|
209
|
+
def parse_doctype = Doctype.new(content: "article")
|
|
210
|
+
|
|
211
|
+
private
|
|
212
|
+
|
|
213
|
+
# @param [String] id document id
|
|
214
|
+
# @param [String] type id type
|
|
215
|
+
# @param [Boolean, nil] primary is primary id
|
|
216
|
+
#
|
|
217
|
+
# @return [Relaton::Bib::Docidentifier] document identifier
|
|
218
|
+
def create_docidentifier(id, type, primary = nil)
|
|
219
|
+
Relaton::Bib::Docidentifier.new content: id, type: type, primary: primary
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def create_person(contrib)
|
|
223
|
+
return unless contrib.name&.any?
|
|
224
|
+
|
|
225
|
+
@errors[:article_contributor_person] &&= false
|
|
226
|
+
fullname = fullname(contrib.name[0])
|
|
227
|
+
Relaton::Bib::Person.new name: fullname, affiliation: affiliation(contrib)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def create_organization(contrib)
|
|
231
|
+
return unless contrib.collab&.any?
|
|
232
|
+
|
|
233
|
+
@errors[:article_contributor_organization] &&= false
|
|
234
|
+
name = Relaton::Bib::TypedLocalizedString.new(content: contrib.collab.flat_map { |c| Array(c.content) }.join)
|
|
235
|
+
Relaton::Bib::Organization.new name: [name]
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# @param [Niso::Jats::Name] name name element
|
|
239
|
+
#
|
|
240
|
+
# @return [Relaton::Bib::FullName] full name
|
|
241
|
+
def fullname(name)
|
|
242
|
+
cname = [name.given_names, name.surname].compact.map(&:content).join(" ")
|
|
243
|
+
@errors[:article_fullname] &&= cname.empty?
|
|
244
|
+
return if cname.empty?
|
|
245
|
+
|
|
246
|
+
completename = Relaton::Bib::LocalizedString.new content: cname, language: "en", script: "Latn"
|
|
247
|
+
Relaton::Bib::FullName.new completename: completename
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# @param [Niso::Jats::Contrib] contrib contributor element
|
|
251
|
+
#
|
|
252
|
+
# @return [Array<Relaton::Bib::Affiliation>] array of affiliations
|
|
253
|
+
def affiliation(contrib)
|
|
254
|
+
aff = contrib.aff_xrefs.filter_map do |xref|
|
|
255
|
+
a = @doc.affiliation(xref.rid)
|
|
256
|
+
parse_affiliation(a[0]) if a.any?
|
|
257
|
+
end
|
|
258
|
+
@errors[:article_affiliation] &&= aff.empty?
|
|
259
|
+
aff
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def parse_affiliation(aff) # rubocop:disable Metrics/MethodLength
|
|
263
|
+
div, addr = division_address(aff)
|
|
264
|
+
return if addr.include?("Permanent address:") || addr == "Germany" ||
|
|
265
|
+
addr.start_with?("Guest") || addr.start_with?("Deceased") ||
|
|
266
|
+
addr.include?("Author to whom any correspondence should be addressed")
|
|
267
|
+
|
|
268
|
+
args = {}
|
|
269
|
+
institutions = aff.institution || []
|
|
270
|
+
if institutions.any?
|
|
271
|
+
name = Array(institutions[0].content).join
|
|
272
|
+
return if name == "1005 Southover Lane"
|
|
273
|
+
|
|
274
|
+
args[:subdivision] = parse_division(div) if div
|
|
275
|
+
args[:address] = parse_address(aff, addr)
|
|
276
|
+
else
|
|
277
|
+
name = div
|
|
278
|
+
end
|
|
279
|
+
args[:name] = [Relaton::Bib::TypedLocalizedString.new(content: name)]
|
|
280
|
+
org = Relaton::Bib::Organization.new(**args)
|
|
281
|
+
Relaton::Bib::Affiliation.new(organization: org)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def division_address(aff)
|
|
285
|
+
div_addr = aff.content.map do |c|
|
|
286
|
+
CGI.unescapeHTML(c.strip.gsub(/^\W*|\W*$/, ""))
|
|
287
|
+
end.reject(&:empty?)
|
|
288
|
+
|
|
289
|
+
institutions = aff.institution || []
|
|
290
|
+
if div_addr.size > 1 && institutions.any?
|
|
291
|
+
# Multiple text nodes around institution: first ones are division, last is address
|
|
292
|
+
div = div_addr[0..-2].join(", ")
|
|
293
|
+
addr = div_addr[-1]
|
|
294
|
+
elsif institutions.any?
|
|
295
|
+
# Single text node with institution: no division text, it's all address
|
|
296
|
+
div = nil
|
|
297
|
+
addr = div_addr[0] || ""
|
|
298
|
+
else
|
|
299
|
+
# No institution: the whole text is the organization name; no address split
|
|
300
|
+
joined = div_addr.join(", ")
|
|
301
|
+
div = joined.empty? ? nil : joined
|
|
302
|
+
addr = ""
|
|
303
|
+
end
|
|
304
|
+
[div, addr]
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def parse_division(div)
|
|
308
|
+
@errors[:article_affiliation_division] &&= div.empty?
|
|
309
|
+
return [] if div.empty?
|
|
310
|
+
|
|
311
|
+
name = Relaton::Bib::TypedLocalizedString.new(content: div, language: "en", script: "Latn")
|
|
312
|
+
[Relaton::Bib::Subdivision.new(name: [name])]
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def parse_address(_aff, addr)
|
|
316
|
+
address = []
|
|
317
|
+
address << addr unless addr.empty?
|
|
318
|
+
# niso-jats parses country into aff.country but we fold it into the formatted address
|
|
319
|
+
@errors[:article_affiliation_address] &&= address.empty?
|
|
320
|
+
return [] if address.empty?
|
|
321
|
+
|
|
322
|
+
[Relaton::Bib::Address.new(formatted_address: address.join(", "))]
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# @param [Niso::Jats::PubDate] pd pub date object
|
|
326
|
+
# @param [String] type date type
|
|
327
|
+
#
|
|
328
|
+
# @return [Relaton::Bipm::ItemData] bibitem
|
|
329
|
+
def bibitem(pd, type)
|
|
330
|
+
dt = Relaton::Bib::Date.new(type: type, at: format_pub_date(pd))
|
|
331
|
+
carrier = type == "epub" ? "online" : "print"
|
|
332
|
+
medium = Relaton::Bib::Medium.new carrier: carrier
|
|
333
|
+
fref = Relaton::Bib::Formattedref.new(content: pubid)
|
|
334
|
+
docid = [create_docidentifier(pubid, "BIPM", true)]
|
|
335
|
+
ItemData.new(formattedref: fref, docidentifier: docid, date: [dt], medium: medium)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def format_pub_date(pd)
|
|
339
|
+
year = pd.year&.content
|
|
340
|
+
return nil unless year&.match?(/\A\d{1,4}\z/)
|
|
341
|
+
|
|
342
|
+
parts = [year.rjust(4, "0")]
|
|
343
|
+
month = pd.month&.content
|
|
344
|
+
if month&.match?(/\A\d{1,2}\z/)
|
|
345
|
+
parts << month.rjust(2, "0")
|
|
346
|
+
day = pd.day&.content
|
|
347
|
+
parts << day.rjust(2, "0") if day&.match?(/\A\d{1,2}\z/)
|
|
348
|
+
end
|
|
349
|
+
parts.join("-")
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
@@ -25,12 +25,11 @@ module Relaton::Bipm
|
|
|
25
25
|
# Parse SI brochure and write them to YAML files
|
|
26
26
|
#
|
|
27
27
|
def parse # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
|
|
33
|
-
Dir["bipm-si-brochure/_site/documents/*.rxl"].each do |f|
|
|
28
|
+
# metanorma site generate writes per-document outputs into a subdirectory
|
|
29
|
+
# named after the source path (e.g. _site/documents/si-brochure/3.01/
|
|
30
|
+
# si-brochure-en.rxl). The legacy top-level *.rxl glob is kept for
|
|
31
|
+
# backwards compatibility with any older flow that flattened outputs.
|
|
32
|
+
si_brochure_rxls.each do |f|
|
|
34
33
|
puts "Parsing #{f}"
|
|
35
34
|
xml = File.read(f, encoding: "UTF-8")
|
|
36
35
|
xml = xml.force_encoding("UTF-8") if xml.encoding != Encoding::UTF_8
|
|
@@ -71,6 +70,19 @@ module Relaton::Bipm
|
|
|
71
70
|
end
|
|
72
71
|
end
|
|
73
72
|
|
|
73
|
+
#
|
|
74
|
+
# @return [Array<String>] paths to SI Brochure RXL files. Looks at the
|
|
75
|
+
# legacy flat layout first, then the metanorma-cli subdirectory layout
|
|
76
|
+
# (`<source_path_without_'sources'>/<doc>.rxl`) used by current
|
|
77
|
+
# `metanorma site generate` output.
|
|
78
|
+
#
|
|
79
|
+
def si_brochure_rxls
|
|
80
|
+
flat = Dir["bipm-si-brochure/_site/documents/*.rxl"]
|
|
81
|
+
return flat if flat.any?
|
|
82
|
+
|
|
83
|
+
Dir["bipm-si-brochure/_site/documents/**/si-brochure-{en,fr}.rxl"]
|
|
84
|
+
end
|
|
85
|
+
|
|
74
86
|
#
|
|
75
87
|
# Update ID of SI brochure
|
|
76
88
|
#
|
data/lib/relaton/bipm/version.rb
CHANGED
data/relaton-bipm.gemspec
CHANGED
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
|
12
12
|
"bibliographic use using the BibliographicItem model"
|
|
13
13
|
spec.homepage = "https://github.com/relaton/relaton-bipm"
|
|
14
14
|
spec.license = "BSD-2-Clause"
|
|
15
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 3.
|
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
|
|
16
16
|
|
|
17
17
|
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
|
18
18
|
|
|
@@ -34,9 +34,10 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
|
34
34
|
|
|
35
35
|
spec.add_dependency "faraday", "~> 2.7.0"
|
|
36
36
|
spec.add_dependency "mechanize", "~> 2.10"
|
|
37
|
+
spec.add_dependency "niso-jats", "~> 0.3.4"
|
|
37
38
|
spec.add_dependency "parslet", "~> 2.0.0"
|
|
38
|
-
spec.add_dependency "relaton-bib", "~> 2.
|
|
39
|
-
spec.add_dependency "relaton-index", "~>
|
|
40
|
-
spec.add_dependency "relaton-core", "~>
|
|
39
|
+
spec.add_dependency "relaton-bib", "~> 2.2.0.pre.alpha.1"
|
|
40
|
+
spec.add_dependency "relaton-index", "~> 2.2.0.pre.alpha.1"
|
|
41
|
+
spec.add_dependency "relaton-core", "~> 2.2.0.pre.alpha.1"
|
|
41
42
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
|
42
43
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-bipm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.2.0.pre.alpha.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|
|
@@ -38,6 +38,20 @@ dependencies:
|
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '2.10'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: niso-jats
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 0.3.4
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: 0.3.4
|
|
41
55
|
- !ruby/object:Gem::Dependency
|
|
42
56
|
name: parslet
|
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -58,42 +72,42 @@ dependencies:
|
|
|
58
72
|
requirements:
|
|
59
73
|
- - "~>"
|
|
60
74
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 2.
|
|
75
|
+
version: 2.2.0.pre.alpha.1
|
|
62
76
|
type: :runtime
|
|
63
77
|
prerelease: false
|
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
79
|
requirements:
|
|
66
80
|
- - "~>"
|
|
67
81
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: 2.
|
|
82
|
+
version: 2.2.0.pre.alpha.1
|
|
69
83
|
- !ruby/object:Gem::Dependency
|
|
70
84
|
name: relaton-index
|
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
|
72
86
|
requirements:
|
|
73
87
|
- - "~>"
|
|
74
88
|
- !ruby/object:Gem::Version
|
|
75
|
-
version:
|
|
89
|
+
version: 2.2.0.pre.alpha.1
|
|
76
90
|
type: :runtime
|
|
77
91
|
prerelease: false
|
|
78
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
93
|
requirements:
|
|
80
94
|
- - "~>"
|
|
81
95
|
- !ruby/object:Gem::Version
|
|
82
|
-
version:
|
|
96
|
+
version: 2.2.0.pre.alpha.1
|
|
83
97
|
- !ruby/object:Gem::Dependency
|
|
84
98
|
name: relaton-core
|
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
|
86
100
|
requirements:
|
|
87
101
|
- - "~>"
|
|
88
102
|
- !ruby/object:Gem::Version
|
|
89
|
-
version:
|
|
103
|
+
version: 2.2.0.pre.alpha.1
|
|
90
104
|
type: :runtime
|
|
91
105
|
prerelease: false
|
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
107
|
requirements:
|
|
94
108
|
- - "~>"
|
|
95
109
|
- !ruby/object:Gem::Version
|
|
96
|
-
version:
|
|
110
|
+
version: 2.2.0.pre.alpha.1
|
|
97
111
|
- !ruby/object:Gem::Dependency
|
|
98
112
|
name: rubyzip
|
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -120,7 +134,6 @@ files:
|
|
|
120
134
|
- ".github/workflows/release.yml"
|
|
121
135
|
- ".gitignore"
|
|
122
136
|
- ".rspec"
|
|
123
|
-
- ".rubocop.yml"
|
|
124
137
|
- CLAUDE.md
|
|
125
138
|
- Gemfile
|
|
126
139
|
- LICENSE.txt
|
|
@@ -129,11 +142,6 @@ files:
|
|
|
129
142
|
- bin/console
|
|
130
143
|
- bin/rspec
|
|
131
144
|
- bin/setup
|
|
132
|
-
- grammars/basicdoc.rng
|
|
133
|
-
- grammars/biblio-standoc.rng
|
|
134
|
-
- grammars/biblio.rng
|
|
135
|
-
- grammars/relaton-bipm-compile.rng
|
|
136
|
-
- grammars/relaton-bipm.rng
|
|
137
145
|
- lib/relaton/bipm.rb
|
|
138
146
|
- lib/relaton/bipm/bibliography.rb
|
|
139
147
|
- lib/relaton/bipm/converter/asciibib.rb
|
|
@@ -150,8 +158,8 @@ files:
|
|
|
150
158
|
- lib/relaton/bipm/model/structured_identifier.rb
|
|
151
159
|
- lib/relaton/bipm/processor.rb
|
|
152
160
|
- lib/relaton/bipm/rawdata_bipm_metrologia/affiliations.rb
|
|
153
|
-
- lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb
|
|
154
161
|
- lib/relaton/bipm/rawdata_bipm_metrologia/fetcher.rb
|
|
162
|
+
- lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb
|
|
155
163
|
- lib/relaton/bipm/si_brochure_parser.rb
|
|
156
164
|
- lib/relaton/bipm/util.rb
|
|
157
165
|
- lib/relaton/bipm/version.rb
|
|
@@ -170,7 +178,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
170
178
|
requirements:
|
|
171
179
|
- - ">="
|
|
172
180
|
- !ruby/object:Gem::Version
|
|
173
|
-
version: 3.
|
|
181
|
+
version: 3.3.0
|
|
174
182
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
183
|
requirements:
|
|
176
184
|
- - ">="
|
data/.rubocop.yml
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# This project follows the Ribose OSS style guide.
|
|
2
|
-
# https://github.com/riboseinc/oss-guides
|
|
3
|
-
# All project-specific additions and overrides should be specified in this file.
|
|
4
|
-
|
|
5
|
-
require: rubocop-rails
|
|
6
|
-
|
|
7
|
-
inherit_from:
|
|
8
|
-
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
|
9
|
-
AllCops:
|
|
10
|
-
TargetRubyVersion: 3.2
|
|
11
|
-
Rails:
|
|
12
|
-
Enabled: false
|