relaton-iso 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +5 -2
- data/README.adoc +21 -1
- data/lib/relaton/iso/bibliography.rb +11 -1
- data/lib/relaton/iso/data_fetcher.rb +220 -151
- data/lib/relaton/iso/data_parser.rb +443 -0
- data/lib/relaton/iso/model/docidentifier.rb +7 -2
- data/lib/relaton/iso/processor.rb +8 -5
- data/lib/relaton/iso/type/pubid.rb +50 -0
- data/lib/relaton/iso/version.rb +1 -1
- metadata +5 -12
- data/grammars/basicdoc.rng +0 -2140
- data/grammars/biblio-standoc.rng +0 -268
- data/grammars/biblio.rng +0 -2125
- data/grammars/relaton-iso-compile.rng +0 -11
- data/grammars/relaton-iso.rng +0 -165
- data/lib/relaton/iso/queue.rb +0 -63
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require_relative "../iso"
|
|
5
|
+
require_relative "scraper"
|
|
6
|
+
|
|
7
|
+
module Relaton
|
|
8
|
+
module Iso
|
|
9
|
+
#
|
|
10
|
+
# Parses one ISO Open Data record (`iso_deliverables_metadata.jsonl` line)
|
|
11
|
+
# into an `Relaton::Iso::ItemData`.
|
|
12
|
+
#
|
|
13
|
+
# See https://www.iso.org/open-data.html for the field reference.
|
|
14
|
+
#
|
|
15
|
+
class DataParser
|
|
16
|
+
ATTRS = %i[
|
|
17
|
+
type docidentifier docnumber edition language script title status ics
|
|
18
|
+
date contributor abstract copyright source relation place
|
|
19
|
+
structuredidentifier ext
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
DOCTYPES = {
|
|
23
|
+
"IS" => "international-standard",
|
|
24
|
+
"TS" => "technical-specification",
|
|
25
|
+
"TR" => "technical-report",
|
|
26
|
+
"PAS" => "publicly-available-specification",
|
|
27
|
+
"GUIDE" => "guide",
|
|
28
|
+
"IWA" => "international-workshop-agreement",
|
|
29
|
+
"R" => "recommendation",
|
|
30
|
+
"ISP" => "international-standard",
|
|
31
|
+
"DATA" => "international-standard",
|
|
32
|
+
"TTA" => "international-standard",
|
|
33
|
+
}.freeze
|
|
34
|
+
|
|
35
|
+
SUPPLEMENT_DOCTYPES = {
|
|
36
|
+
"Amd" => "amendment",
|
|
37
|
+
"Cor" => "technical-corrigendum",
|
|
38
|
+
"Add" => "addendum",
|
|
39
|
+
}.freeze
|
|
40
|
+
|
|
41
|
+
DOC_URL = "https://www.iso.org/standard/%d.html"
|
|
42
|
+
OBP_URL = "https://www.iso.org/obp/ui/en/#!iso:std:%d:en"
|
|
43
|
+
RSS_URL = "https://www.iso.org/contents/data/standard/%s/%s/%d.detail.rss"
|
|
44
|
+
|
|
45
|
+
#
|
|
46
|
+
# @param [Hash] pub one Open Data record
|
|
47
|
+
# @param [Hash{Integer=>String}] ref_index map of Open Data `id` ->
|
|
48
|
+
# `reference`, used to resolve `replaces` / `replacedBy` (which are
|
|
49
|
+
# numeric IDs in the source).
|
|
50
|
+
# @param [Hash] errors error accumulator (`Hash.new(true)`); fields are
|
|
51
|
+
# AND-ed across all records by the `report_errors` machinery.
|
|
52
|
+
# @param [Hash{String=>Hash}] tc_index map of TC/SC reference ->
|
|
53
|
+
# `{ "en" => title, "fr" => title }`, used to resolve the human
|
|
54
|
+
# committee label from the Open Data technical-committees dataset.
|
|
55
|
+
# @param [Hash{String=>Array<String>}] amend_index map of base
|
|
56
|
+
# reference -> list of supplement (Amd/Cor/Add) references that
|
|
57
|
+
# target it. Open Data records the supplement -> base direction only
|
|
58
|
+
# via the reference string, so we pre-build the reverse map.
|
|
59
|
+
# @param [Hash{String=>String}] date_index map of reference ->
|
|
60
|
+
# `publicationDate`, used to attach a `published` date to each
|
|
61
|
+
# emitted relation's bibitem when the related document is itself
|
|
62
|
+
# present in the Open Data feed.
|
|
63
|
+
#
|
|
64
|
+
def initialize(pub, ref_index = {}, errors = {}, tc_index = {}, amend_index = {}, date_index = {})
|
|
65
|
+
@pub = pub
|
|
66
|
+
@ref_index = ref_index
|
|
67
|
+
@errors = errors
|
|
68
|
+
@tc_index = tc_index
|
|
69
|
+
@amend_index = amend_index
|
|
70
|
+
@date_index = date_index
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def parse
|
|
74
|
+
ItemData.new(**ATTRS.each_with_object({}) { |a, h| h[a] = send(a) })
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def type = "standard"
|
|
80
|
+
|
|
81
|
+
# ---- identifiers -----------------------------------------------------
|
|
82
|
+
|
|
83
|
+
def reference
|
|
84
|
+
@reference ||= @pub["reference"] || ""
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def pubid
|
|
88
|
+
return @pubid if defined?(@pubid)
|
|
89
|
+
|
|
90
|
+
@pubid = begin
|
|
91
|
+
::Pubid::Iso::Identifier.parse(reference)
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
Util.warn "Failed to parse pubid `#{reference}`: #{e.message}"
|
|
94
|
+
nil
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def docidentifier
|
|
99
|
+
ids = []
|
|
100
|
+
if pubid
|
|
101
|
+
ids << Docidentifier.new(content: pubid, type: "ISO", primary: true)
|
|
102
|
+
if (ref = iso_reference_pubid)
|
|
103
|
+
ids << Docidentifier.new(content: ref, type: "iso-reference")
|
|
104
|
+
end
|
|
105
|
+
if (urn = safe_urn_docid)
|
|
106
|
+
ids << urn
|
|
107
|
+
end
|
|
108
|
+
else
|
|
109
|
+
ids << Docidentifier.new(content: reference, type: "ISO", primary: true)
|
|
110
|
+
end
|
|
111
|
+
@errors[:docidentifier] &&= ids.empty?
|
|
112
|
+
ids
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def safe_urn_docid
|
|
116
|
+
return nil unless urn_pubid
|
|
117
|
+
|
|
118
|
+
Docidentifier.new(content: urn_pubid, type: "URN")
|
|
119
|
+
rescue StandardError
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def iso_reference_pubid
|
|
124
|
+
params = pubid.to_h.except(:typed_stage)
|
|
125
|
+
::Pubid::Iso::Identifier.create(language: "en", **params)
|
|
126
|
+
rescue StandardError
|
|
127
|
+
nil
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def urn_pubid
|
|
131
|
+
return @urn_pubid if defined?(@urn_pubid)
|
|
132
|
+
|
|
133
|
+
@urn_pubid = begin
|
|
134
|
+
dup_pubid = pubid.dup
|
|
135
|
+
if dup_pubid.respond_to?(:stage=) && stage_dotted &&
|
|
136
|
+
dup_pubid.respond_to?(:stage) && dup_pubid.stage.nil?
|
|
137
|
+
dup_pubid.stage = ::Pubid::Iso::Identifier.parse_stage(stage_dotted)
|
|
138
|
+
end
|
|
139
|
+
dup_pubid
|
|
140
|
+
rescue StandardError
|
|
141
|
+
nil
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def docnumber
|
|
146
|
+
pubid&.to_s&.match(/\d+/)&.to_s
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def edition
|
|
150
|
+
return nil unless @pub["edition"]
|
|
151
|
+
|
|
152
|
+
Bib::Edition.new(content: @pub["edition"].to_s)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# ---- language / script ----------------------------------------------
|
|
156
|
+
|
|
157
|
+
def language
|
|
158
|
+
langs = Array(@pub["languages"]).dup
|
|
159
|
+
langs << "en" if langs.empty?
|
|
160
|
+
langs.uniq
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def script
|
|
164
|
+
language.filter_map { |l| script_for(l) }.uniq
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def script_for(lang)
|
|
168
|
+
case lang
|
|
169
|
+
when "en", "fr" then "Latn"
|
|
170
|
+
when "ru" then "Cyrl"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# ---- title -----------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def title
|
|
177
|
+
result = []
|
|
178
|
+
result += titles_for("en")
|
|
179
|
+
result += titles_for("fr")
|
|
180
|
+
@errors[:title] &&= result.empty?
|
|
181
|
+
result
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def titles_for(lang)
|
|
185
|
+
raw = @pub.dig("title", lang)
|
|
186
|
+
return [] if raw.nil? || raw.empty?
|
|
187
|
+
|
|
188
|
+
Bib::Title.from_string(normalize_dashes(raw), lang, script_for(lang))
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def normalize_dashes(str)
|
|
192
|
+
str.gsub(/\s—\s/, " - ").gsub(/\s–\s/, " - ")
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# ---- status ----------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
# Open Data exposes a 4-digit stage code (e.g. 2098 = 20.98, 6060 = 60.60).
|
|
198
|
+
# Records occasionally come through with 2 or 3 digits (zero-padded).
|
|
199
|
+
def stage_dotted
|
|
200
|
+
return @stage_dotted if defined?(@stage_dotted)
|
|
201
|
+
|
|
202
|
+
@stage_dotted =
|
|
203
|
+
if @pub["currentStage"]
|
|
204
|
+
digits = format("%04d", @pub["currentStage"].to_i)
|
|
205
|
+
"#{digits[0, 2]}.#{digits[2, 2]}"
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def status
|
|
210
|
+
return nil unless stage_dotted
|
|
211
|
+
|
|
212
|
+
stg, sub = stage_dotted.split(".")
|
|
213
|
+
Bib::Status.new(
|
|
214
|
+
stage: Bib::Status::Stage.new(content: stg),
|
|
215
|
+
substage: sub ? Bib::Status::Stage.new(content: sub) : nil,
|
|
216
|
+
)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# ---- ICS -------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def ics
|
|
222
|
+
return [] unless @pub["icsCode"]
|
|
223
|
+
|
|
224
|
+
Array(@pub["icsCode"]).map do |code|
|
|
225
|
+
info = safe_isoics_fetch(code)
|
|
226
|
+
Bib::ICS.new(code: code, text: info&.description)
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def safe_isoics_fetch(code)
|
|
231
|
+
Isoics.fetch code
|
|
232
|
+
rescue StandardError
|
|
233
|
+
nil
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# ---- dates -----------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
def date
|
|
239
|
+
pd = @pub["publicationDate"]
|
|
240
|
+
return [] if pd.nil? || pd.empty?
|
|
241
|
+
|
|
242
|
+
[Bib::Date.new(type: "published", at: pd)]
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# ---- contributors ----------------------------------------------------
|
|
246
|
+
|
|
247
|
+
def contributor
|
|
248
|
+
publishers + Array(editorialgroup_contributor)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def publishers
|
|
252
|
+
reference.sub(/\s.*/, "").split("/").filter_map do |abbrev|
|
|
253
|
+
info = Scraper::PUBLISHERS[abbrev]
|
|
254
|
+
next unless info
|
|
255
|
+
|
|
256
|
+
name = Bib::TypedLocalizedString.new(content: info[:name])
|
|
257
|
+
abbr = Bib::LocalizedString.new(content: abbrev)
|
|
258
|
+
uri = Bib::Uri.new(content: info[:uri]) if info[:uri]
|
|
259
|
+
org = Bib::Organization.new(name: [name], abbreviation: abbr, uri: [uri].compact)
|
|
260
|
+
role = Bib::Contributor::Role.new(type: "publisher")
|
|
261
|
+
Bib::Contributor.new(organization: org, role: [role])
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def editorialgroup_contributor
|
|
266
|
+
wg = @pub["ownerCommittee"]
|
|
267
|
+
return nil if wg.nil? || wg.empty?
|
|
268
|
+
|
|
269
|
+
parts = wg.split("/")
|
|
270
|
+
prefix = parts[0]
|
|
271
|
+
type = parts[1]&.match(/^[A-Z]+/)&.to_s || "TC"
|
|
272
|
+
|
|
273
|
+
publisher = Scraper::PUBLISHERS[prefix]
|
|
274
|
+
name = if publisher
|
|
275
|
+
[Bib::TypedLocalizedString.new(content: publisher[:name])]
|
|
276
|
+
elsif prefix
|
|
277
|
+
[Bib::TypedLocalizedString.new(content: prefix)]
|
|
278
|
+
else
|
|
279
|
+
[]
|
|
280
|
+
end
|
|
281
|
+
abbreviation = (Bib::LocalizedString.new(content: prefix) if prefix)
|
|
282
|
+
|
|
283
|
+
label = @tc_index.dig(wg, "en") || wg
|
|
284
|
+
subdivision = Bib::Subdivision.new(
|
|
285
|
+
type: "technical-committee",
|
|
286
|
+
subtype: type,
|
|
287
|
+
name: [Bib::TypedLocalizedString.new(content: label)],
|
|
288
|
+
identifier: [Bib::OrganizationType::Identifier.new(content: wg)],
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
role = Bib::Contributor::Role.new(
|
|
292
|
+
type: "author",
|
|
293
|
+
description: [Bib::LocalizedMarkedUpString.new(content: "committee")],
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
Bib::Contributor.new(
|
|
297
|
+
role: [role],
|
|
298
|
+
organization: Bib::Organization.new(
|
|
299
|
+
name: name, subdivision: [subdivision], abbreviation: abbreviation,
|
|
300
|
+
),
|
|
301
|
+
)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# ---- abstract --------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
def abstract
|
|
307
|
+
%w[en fr].filter_map do |lang|
|
|
308
|
+
html = @pub.dig("scope", lang)
|
|
309
|
+
next if html.nil? || html.empty?
|
|
310
|
+
|
|
311
|
+
text = strip_html(html)
|
|
312
|
+
next if text.empty?
|
|
313
|
+
|
|
314
|
+
Bib::Abstract.new(content: text, language: lang, script: script_for(lang))
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def strip_html(html)
|
|
319
|
+
Nokogiri::HTML.fragment(html).text.strip.gsub(/\s+/, " ")
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# ---- copyright -------------------------------------------------------
|
|
323
|
+
|
|
324
|
+
def copyright
|
|
325
|
+
from = reference[/(?<=:)\d{4}/] ||
|
|
326
|
+
@pub["publicationDate"]&.match(/\d{4}/)&.to_s
|
|
327
|
+
return [] unless from && !from.empty?
|
|
328
|
+
|
|
329
|
+
owner_name = reference.match(/.*?(?=\s)/).to_s
|
|
330
|
+
name = Bib::TypedLocalizedString.new(content: owner_name)
|
|
331
|
+
org = Bib::Organization.new(name: [name])
|
|
332
|
+
contrib = Bib::ContributionInfo.new(organization: org)
|
|
333
|
+
[Bib::Copyright.new(owner: [contrib], from: from)]
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# ---- source links ----------------------------------------------------
|
|
337
|
+
|
|
338
|
+
def source
|
|
339
|
+
id = @pub["id"]
|
|
340
|
+
return [] unless id
|
|
341
|
+
|
|
342
|
+
pad = format("%06d", id)
|
|
343
|
+
[
|
|
344
|
+
Bib::Uri.new(type: "src", content: format(DOC_URL, id)),
|
|
345
|
+
Bib::Uri.new(type: "obp", content: format(OBP_URL, id)),
|
|
346
|
+
Bib::Uri.new(type: "rss", content: format(RSS_URL, pad[0, 2], pad[2, 2], id)),
|
|
347
|
+
]
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# ---- relations -------------------------------------------------------
|
|
351
|
+
|
|
352
|
+
# Open Data semantics:
|
|
353
|
+
# * `replaces` - older docs THIS one supersedes -> `obsoletes`
|
|
354
|
+
# * `replacedBy` - newer docs that supersede THIS one -> `obsoletedBy`
|
|
355
|
+
# Amendments/corrigenda/addenda are stitched in via two routes:
|
|
356
|
+
# * on the BASE record, look up `@amend_index` for supplements
|
|
357
|
+
# targeting it (-> `updatedBy`); the index is pre-built in
|
|
358
|
+
# `DataFetcher#build_ref_index` because Open Data only records
|
|
359
|
+
# the supplement -> base direction via the reference string.
|
|
360
|
+
# * on the SUPPLEMENT record itself, derive the base from
|
|
361
|
+
# `pubid.base` and emit the forward `updates` relation.
|
|
362
|
+
def relation
|
|
363
|
+
rels = []
|
|
364
|
+
rels += build_relations(@pub["replaces"], "obsoletes")
|
|
365
|
+
rels += build_relations(@pub["replacedBy"], "obsoletedBy")
|
|
366
|
+
rels += amendment_relations
|
|
367
|
+
rels += base_relation
|
|
368
|
+
rels
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def build_relations(ids, type)
|
|
372
|
+
Array(ids).filter_map do |id|
|
|
373
|
+
ref = @ref_index[id] || @ref_index[id.to_s]
|
|
374
|
+
next unless ref
|
|
375
|
+
|
|
376
|
+
relation_for(ref, type)
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def amendment_relations
|
|
381
|
+
Array(@amend_index[pubid&.to_s || reference]).map do |amend_ref|
|
|
382
|
+
relation_for(amend_ref, "updatedBy")
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def base_relation
|
|
387
|
+
return [] unless pubid&.respond_to?(:base) && pubid.base
|
|
388
|
+
|
|
389
|
+
[relation_for(pubid.base.to_s, "updates")]
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def relation_for(ref, type)
|
|
393
|
+
docid = Docidentifier.new(content: ref, type: "ISO", primary: true)
|
|
394
|
+
attrs = {
|
|
395
|
+
docidentifier: [docid],
|
|
396
|
+
formattedref: Bib::Formattedref.new(content: ref),
|
|
397
|
+
}
|
|
398
|
+
if (pub_date = @date_index[ref]) && !pub_date.empty?
|
|
399
|
+
attrs[:date] = [Bib::Date.new(type: "published", at: pub_date)]
|
|
400
|
+
end
|
|
401
|
+
Relation.new(type: type, bibitem: ItemData.new(**attrs))
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# ---- structured identifier ------------------------------------------
|
|
405
|
+
|
|
406
|
+
def structuredidentifier
|
|
407
|
+
return nil unless @pub["id"]
|
|
408
|
+
|
|
409
|
+
pnum = ProjectNumber.new(content: @pub["id"].to_s)
|
|
410
|
+
publisher = pubid&.respond_to?(:publisher) ? pubid.publisher : nil
|
|
411
|
+
StructuredIdentifier.new(project_number: pnum, type: publisher || "ISO")
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# ---- place -----------------------------------------------------------
|
|
415
|
+
|
|
416
|
+
def place
|
|
417
|
+
[Bib::Place.new(city: "Geneva")]
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# ---- ext -------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
def ext
|
|
423
|
+
Ext.new(
|
|
424
|
+
doctype: doctype,
|
|
425
|
+
flavor: "iso",
|
|
426
|
+
ics: ics,
|
|
427
|
+
structuredidentifier: structuredidentifier,
|
|
428
|
+
stagename: nil,
|
|
429
|
+
updates_document_type: nil,
|
|
430
|
+
fast_track: nil,
|
|
431
|
+
price_code: nil,
|
|
432
|
+
)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def doctype
|
|
436
|
+
type = SUPPLEMENT_DOCTYPES[@pub["supplementType"]] ||
|
|
437
|
+
DOCTYPES[@pub["deliverableType"]] ||
|
|
438
|
+
"international-standard"
|
|
439
|
+
Doctype.new(content: type)
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
end
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
require_relative "../type/pubid"
|
|
2
|
+
|
|
1
3
|
module Relaton
|
|
2
4
|
module Iso
|
|
3
5
|
class Docidentifier < Bib::Docidentifier
|
|
4
|
-
attribute :content,
|
|
6
|
+
attribute :content, Type::Pubid
|
|
5
7
|
|
|
6
8
|
attr_reader :pubid
|
|
7
9
|
|
|
@@ -30,7 +32,10 @@ module Relaton
|
|
|
30
32
|
begin
|
|
31
33
|
::Pubid::Iso::Identifier.parse(value)
|
|
32
34
|
rescue StandardError
|
|
33
|
-
|
|
35
|
+
# Suppress when type is not yet set (lutaml runs the setter
|
|
36
|
+
# once during init before `type` is assigned, then `initialize`
|
|
37
|
+
# re-runs it; only the second pass is authoritative).
|
|
38
|
+
Util.warn "Failed to parse Pubid: #{value}" if type
|
|
34
39
|
nil
|
|
35
40
|
end
|
|
36
41
|
end
|
|
@@ -10,7 +10,7 @@ module Relaton
|
|
|
10
10
|
@prefix = "ISO"
|
|
11
11
|
@defaultprefix = %r{^ISO(/IEC)?\s}
|
|
12
12
|
@idtype = "ISO"
|
|
13
|
-
@datasets = %w[iso-
|
|
13
|
+
@datasets = %w[iso-open-data iso-open-data-all]
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
# @param code [String]
|
|
@@ -23,16 +23,19 @@ module Relaton
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
#
|
|
26
|
-
# Fetch all the documents from
|
|
26
|
+
# Fetch all the documents from the ISO Open Data programme
|
|
27
|
+
# (https://www.iso.org/open-data.html).
|
|
27
28
|
#
|
|
28
|
-
# @param [String] source source name
|
|
29
|
+
# @param [String] source source name
|
|
30
|
+
# * `iso-open-data` - skip if upstream `Last-Modified` is unchanged
|
|
31
|
+
# * `iso-open-data-all` - wipe `output` and re-emit every record
|
|
29
32
|
# @param [Hash] opts
|
|
30
33
|
# @option opts [String] :output directory to output documents
|
|
31
34
|
# @option opts [String] :format output format (xml, yaml, bibxml)
|
|
32
35
|
#
|
|
33
|
-
def fetch_data(
|
|
36
|
+
def fetch_data(source, opts)
|
|
34
37
|
require_relative "data_fetcher"
|
|
35
|
-
DataFetcher.fetch(**opts)
|
|
38
|
+
DataFetcher.fetch(source, **opts)
|
|
36
39
|
end
|
|
37
40
|
|
|
38
41
|
# @param xml [String]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
module Relaton
|
|
2
|
+
module Iso
|
|
3
|
+
module Type
|
|
4
|
+
# Lutaml-model attribute type that preserves `Pubid::Iso::Identifier::Base`
|
|
5
|
+
# instances on the way in and stringifies them on the way out.
|
|
6
|
+
#
|
|
7
|
+
# The default `:string` type calls `.to_s` during `cast`, which loses the
|
|
8
|
+
# parsed structure and forces `Docidentifier#content=` to re-parse the
|
|
9
|
+
# human-readable form. That round-trip can render dual-type strings
|
|
10
|
+
# (e.g. `"ISO/IS TR 17"` from a TR pubid with stage 60.60) that the
|
|
11
|
+
# pubid-iso parslet grammar can't capture cleanly, producing
|
|
12
|
+
# `Duplicate subtrees while merging result of ROOT` warnings.
|
|
13
|
+
class Pubid < Lutaml::Model::Type::Value
|
|
14
|
+
def self.cast(value, _options = {})
|
|
15
|
+
return nil if value.nil?
|
|
16
|
+
return value if Lutaml::Model::Utils.uninitialized?(value)
|
|
17
|
+
|
|
18
|
+
value
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.serialize(value)
|
|
22
|
+
return nil if value.nil?
|
|
23
|
+
return value if Lutaml::Model::Utils.uninitialized?(value)
|
|
24
|
+
|
|
25
|
+
value.to_s
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_s
|
|
29
|
+
value.to_s
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_yaml
|
|
33
|
+
value.to_s
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_xml
|
|
37
|
+
value.to_s
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def to_json(*_args)
|
|
41
|
+
value.to_s
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.default_xsd_type
|
|
45
|
+
"xs:string"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
data/lib/relaton/iso/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-iso
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.1.
|
|
4
|
+
version: 2.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: isoics
|
|
@@ -117,14 +116,10 @@ files:
|
|
|
117
116
|
- bin/console
|
|
118
117
|
- bin/rspec
|
|
119
118
|
- bin/setup
|
|
120
|
-
- grammars/basicdoc.rng
|
|
121
|
-
- grammars/biblio-standoc.rng
|
|
122
|
-
- grammars/biblio.rng
|
|
123
|
-
- grammars/relaton-iso-compile.rng
|
|
124
|
-
- grammars/relaton-iso.rng
|
|
125
119
|
- lib/relaton/iso.rb
|
|
126
120
|
- lib/relaton/iso/bibliography.rb
|
|
127
121
|
- lib/relaton/iso/data_fetcher.rb
|
|
122
|
+
- lib/relaton/iso/data_parser.rb
|
|
128
123
|
- lib/relaton/iso/hash_parser_v1.rb
|
|
129
124
|
- lib/relaton/iso/hit.rb
|
|
130
125
|
- lib/relaton/iso/hit_collection.rb
|
|
@@ -144,8 +139,8 @@ files:
|
|
|
144
139
|
- lib/relaton/iso/model/stagename.rb
|
|
145
140
|
- lib/relaton/iso/model/structured_identifier.rb
|
|
146
141
|
- lib/relaton/iso/processor.rb
|
|
147
|
-
- lib/relaton/iso/queue.rb
|
|
148
142
|
- lib/relaton/iso/scraper.rb
|
|
143
|
+
- lib/relaton/iso/type/pubid.rb
|
|
149
144
|
- lib/relaton/iso/util.rb
|
|
150
145
|
- lib/relaton/iso/version.rb
|
|
151
146
|
- relaton-iso.gemspec
|
|
@@ -153,7 +148,6 @@ homepage: https://github.com/relaton/relaton-iso
|
|
|
153
148
|
licenses:
|
|
154
149
|
- BSD-2-Clause
|
|
155
150
|
metadata: {}
|
|
156
|
-
post_install_message:
|
|
157
151
|
rdoc_options: []
|
|
158
152
|
require_paths:
|
|
159
153
|
- lib
|
|
@@ -168,8 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
168
162
|
- !ruby/object:Gem::Version
|
|
169
163
|
version: '0'
|
|
170
164
|
requirements: []
|
|
171
|
-
rubygems_version: 3.
|
|
172
|
-
signing_key:
|
|
165
|
+
rubygems_version: 3.6.9
|
|
173
166
|
specification_version: 4
|
|
174
167
|
summary: 'Relaton::Iso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
|
175
168
|
model'
|