relaton-ietf 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton/ietf/data_fetcher.rb +165 -98
- data/lib/relaton/ietf/rfc/entry.rb +4 -3
- data/lib/relaton/ietf/version.rb +1 -1
- data/relaton-ietf.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5a128dcfe956c56d598a18a28d3b463adc25db7d4d326fc00771a191e8e2ba75
|
|
4
|
+
data.tar.gz: 6dd1eb0d196aee09416e6aafcf766e648b95e7c203636fe0f99857859f380d38
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 41fdec2098e1fefc0b9a3e621c9bb9cefbbbb81df55df19c56ca010574c065ce27d2b1aa73d1507b825ee6605d1294c4b7901bc07b7af8653745bc054568b305
|
|
7
|
+
data.tar.gz: 50ce7e81d9cd37f51f05d5800f8fb912c51ca302875293b789baadd60a26af5a6c0987b4e0f06ce1dcf4f71df75a750a8e3a2affc56029102d4c11af9e660ad4
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
require "etc"
|
|
2
|
+
require "parallel"
|
|
1
3
|
require "relaton/core"
|
|
2
4
|
require_relative "../ietf"
|
|
3
5
|
require_relative "bibxml_parser"
|
|
@@ -41,107 +43,152 @@ module Relaton
|
|
|
41
43
|
end
|
|
42
44
|
|
|
43
45
|
#
|
|
44
|
-
# Fetches ietf-internet-drafts documents
|
|
46
|
+
# Fetches ietf-internet-drafts documents.
|
|
45
47
|
#
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
48
|
+
# Each work unit (one series, or one singleton XML) is processed
|
|
49
|
+
# end-to-end in a worker process: parse → link relations → serialize →
|
|
50
|
+
# write. Workers return Marshal-friendly index entries; the parent
|
|
51
|
+
# collects them and updates `Relaton::Index` and the duplicate-check set
|
|
52
|
+
# serially. Set `RELATON_IETF_PARALLEL_WORKERS=0` to force serial
|
|
53
|
+
# execution (useful for tests and debugging).
|
|
54
|
+
#
|
|
55
|
+
def fetch_ieft_internet_drafts
|
|
56
|
+
series_groups, singleton_paths = group_draft_paths
|
|
57
|
+
|
|
58
|
+
series_results = parallelize(series_groups.to_a) do |(series, paths_info)|
|
|
59
|
+
process_series(series, paths_info)
|
|
60
|
+
end.flatten(1)
|
|
61
|
+
|
|
62
|
+
singleton_results = parallelize(singleton_paths) do |path|
|
|
63
|
+
process_singleton(path)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
(series_results + singleton_results).compact.each { |r| record_index_entry(r) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
#
|
|
70
|
+
# Run `block` once per item, in parallel worker processes when configured.
|
|
71
|
+
# `Parallel.map(items, in_processes: 0)` runs synchronously in the
|
|
72
|
+
# current process, which keeps tests deterministic and lets mocks work.
|
|
73
|
+
#
|
|
74
|
+
def parallelize(items, &block)
|
|
75
|
+
Parallel.map(items, in_processes: worker_count, &block)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def worker_count
|
|
79
|
+
ENV.fetch("RELATON_IETF_PARALLEL_WORKERS", Etc.nprocessors.to_s).to_i
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
#
|
|
83
|
+
# Filename-only scan: group versioned drafts by normalized series stem;
|
|
84
|
+
# everything else (non-versioned, non-`D.draft-`) goes to singletons.
|
|
85
|
+
# No XML parsing happens here — workers do that.
|
|
86
|
+
#
|
|
87
|
+
# @return [Array(Hash, Array<String>)]
|
|
88
|
+
# series_groups: { normalized_series => [{path, ver, ref}, ...] }
|
|
89
|
+
# singleton_paths: [path, ...]
|
|
90
|
+
#
|
|
91
|
+
def group_draft_paths
|
|
92
|
+
series_groups = {}
|
|
93
|
+
singleton_paths = []
|
|
94
|
+
Dir["bibxml-ids/*.xml"].each do |path|
|
|
95
|
+
file = File.basename(path, ".xml")
|
|
96
|
+
is_draft = file.include?("D.draft-")
|
|
97
|
+
ver = is_draft ? file[/(\d+)$/, 1] : nil
|
|
98
|
+
ref = file.sub(/^reference\.I-D\./, "").downcase
|
|
99
|
+
stem_match = is_draft && ver ? /^(draft-.+)-(\d{2})$/.match(ref) : nil
|
|
100
|
+
if stem_match
|
|
101
|
+
series = stem_match[1].gsub(/[.\s\/:-]+/, "-")
|
|
102
|
+
(series_groups[series] ||= []) << { path: path, ver: ver, ref: ref }
|
|
103
|
+
else
|
|
104
|
+
singleton_paths << path
|
|
58
105
|
end
|
|
59
|
-
save_doc bib
|
|
60
106
|
end
|
|
61
|
-
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
#
|
|
65
|
-
#
|
|
66
|
-
#
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
107
|
+
[series_groups, singleton_paths]
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#
|
|
111
|
+
# Worker: parse all files in a series, sort by version, append
|
|
112
|
+
# immediate-neighbor relations (skipped for bibxml), write each version
|
|
113
|
+
# and the un-versioned aggregator doc. Returns an array of index entries
|
|
114
|
+
# for the parent.
|
|
115
|
+
#
|
|
116
|
+
def process_series(series, paths_info)
|
|
117
|
+
sorted = paths_info.sort_by { |p| p[:ver].to_i }.map do |p|
|
|
118
|
+
bib = BibXMLParser.parse(File.read(p[:path], encoding: "UTF-8"))
|
|
119
|
+
bib.version = [Bib::Version.new(draft: p[:ver])]
|
|
120
|
+
p.merge(bib: bib, source: bib.source)
|
|
121
|
+
end
|
|
122
|
+
link_neighbor_relations(sorted) if @format != "bibxml"
|
|
123
|
+
|
|
124
|
+
results = sorted.map { |entry| serialize_and_write(entry[:bib]) }
|
|
125
|
+
results << serialize_and_write(build_unversioned_doc(series, sorted)) if @format != "bibxml"
|
|
126
|
+
results.compact
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
#
|
|
130
|
+
# Worker: parse + serialize + write a single non-grouped XML.
|
|
131
|
+
#
|
|
132
|
+
def process_singleton(path)
|
|
133
|
+
file = File.basename(path, ".xml")
|
|
134
|
+
is_draft = file.include?("D.draft-")
|
|
135
|
+
ver = is_draft ? file[/(\d+)$/, 1] : nil
|
|
136
|
+
bib = BibXMLParser.parse(File.read(path, encoding: "UTF-8"))
|
|
137
|
+
bib.version = [Bib::Version.new(draft: ver)] if ver
|
|
138
|
+
serialize_and_write(bib)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
#
|
|
142
|
+
# Append immediate-neighbor `updates` / `updatedBy` relations in memory.
|
|
143
|
+
# Single-version series get no relations (no neighbors).
|
|
144
|
+
#
|
|
145
|
+
def link_neighbor_relations(sorted)
|
|
146
|
+
sorted.each_with_index do |entry, i|
|
|
147
|
+
if i.positive?
|
|
148
|
+
prev = sorted[i - 1]
|
|
149
|
+
entry[:bib].relation << version_relation({ ref: prev[:ref], source: prev[:source] }, "updates")
|
|
150
|
+
end
|
|
151
|
+
if i < sorted.size - 1
|
|
152
|
+
nxt = sorted[i + 1]
|
|
153
|
+
entry[:bib].relation << version_relation({ ref: nxt[:ref], source: nxt[:source] }, "updatedBy")
|
|
88
154
|
end
|
|
89
155
|
end
|
|
90
156
|
end
|
|
91
157
|
|
|
92
158
|
#
|
|
93
|
-
#
|
|
159
|
+
# Build (but do not write) the un-versioned series aggregator doc with
|
|
160
|
+
# `includes` relations to every version. Uses the latest version's
|
|
161
|
+
# title/abstract from memory.
|
|
94
162
|
#
|
|
95
|
-
# @
|
|
96
|
-
# @param [Array<String>] versions list of versions
|
|
163
|
+
# @return [Relaton::Ietf::ItemData, nil]
|
|
97
164
|
#
|
|
98
|
-
def
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
return
|
|
165
|
+
def build_unversioned_doc(series, sorted)
|
|
166
|
+
if sorted.empty?
|
|
167
|
+
Util.warn "No versions found for #{series}"
|
|
168
|
+
return nil
|
|
103
169
|
end
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
bib = ItemData.new(
|
|
111
|
-
title: last_v.title, abstract: last_v.abstract, formattedref: Bib::Formattedref.new(content: ref),
|
|
170
|
+
|
|
171
|
+
last_v = sorted.last[:bib]
|
|
172
|
+
docid = Bib::Docidentifier.new(type: "Internet-Draft", content: series, primary: true)
|
|
173
|
+
rel = sorted.map { |e| version_relation({ ref: e[:ref], source: e[:source] }, "includes") }
|
|
174
|
+
ItemData.new(
|
|
175
|
+
title: last_v.title, abstract: last_v.abstract, formattedref: Bib::Formattedref.new(content: series),
|
|
112
176
|
docidentifier: [docid], relation: rel
|
|
113
177
|
)
|
|
114
|
-
save_doc bib
|
|
115
178
|
end
|
|
116
179
|
|
|
117
180
|
#
|
|
118
181
|
# Create bibitem relation
|
|
119
182
|
#
|
|
120
|
-
# @param [
|
|
183
|
+
# @param [Hash] ver version reference, { ref:, source: }
|
|
121
184
|
# @param [String] type relation type
|
|
122
185
|
#
|
|
123
|
-
# @return [Relaton::
|
|
186
|
+
# @return [Relaton::Ietf::Relation] relation
|
|
124
187
|
#
|
|
125
188
|
def version_relation(ver, type)
|
|
126
189
|
docid = Bib::Docidentifier.new(type: "Internet-Draft", content: ver[:ref], primary: true)
|
|
127
190
|
bibitem = ItemData.new(formattedref: Bib::Formattedref.new(content: ver[:ref]), docidentifier: [docid], source: ver[:source])
|
|
128
|
-
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
#
|
|
132
|
-
# Redad saved documents
|
|
133
|
-
#
|
|
134
|
-
# @param [String] file path to file
|
|
135
|
-
#
|
|
136
|
-
# @return [Relaton::Ietf::ItemData] bibliographic item
|
|
137
|
-
#
|
|
138
|
-
def read_doc(file)
|
|
139
|
-
doc = File.read(file, encoding: "UTF-8")
|
|
140
|
-
case @format
|
|
141
|
-
when "xml" then Item.from_xml(doc)
|
|
142
|
-
when "yaml" then Item.from_yaml(doc)
|
|
143
|
-
else BibXMLParser.parse(doc)
|
|
144
|
-
end
|
|
191
|
+
Relaton::Ietf::Relation.new(type: type, bibitem: bibitem)
|
|
145
192
|
end
|
|
146
193
|
|
|
147
194
|
#
|
|
@@ -172,38 +219,58 @@ module Relaton
|
|
|
172
219
|
end
|
|
173
220
|
|
|
174
221
|
#
|
|
175
|
-
# Save document to file
|
|
222
|
+
# Save document to file (sequential path: serialize, write, index).
|
|
223
|
+
# Used by the rfcsubseries / rfc-entries fetchers; the I-D fetcher splits
|
|
224
|
+
# this into worker-safe `serialize_and_write` plus parent-only
|
|
225
|
+
# `record_index_entry` so the index is touched only in the main process.
|
|
176
226
|
#
|
|
177
|
-
# @param [Relaton::Ietf::Rfc::Entry, nil]
|
|
227
|
+
# @param [Relaton::Ietf::Rfc::Entry, nil] entry
|
|
178
228
|
# @param [Boolean] check_duplicate check for duplicate
|
|
179
229
|
#
|
|
180
|
-
def save_doc(entry, check_duplicate: true)
|
|
181
|
-
|
|
230
|
+
def save_doc(entry, check_duplicate: true)
|
|
231
|
+
result = serialize_and_write(entry)
|
|
232
|
+
record_index_entry(result, check_duplicate: check_duplicate) if result
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
#
|
|
236
|
+
# Worker-safe: serialize, compute output filename, write to disk, return
|
|
237
|
+
# a Marshal-friendly hash with the docid+file pair the parent needs to
|
|
238
|
+
# update `Relaton::Index` and `@files`. Does NOT touch instance state
|
|
239
|
+
# that has to stay consistent across workers (`@files`, the index).
|
|
240
|
+
#
|
|
241
|
+
# @param [#to_yaml, #to_xml, #to_rfcxml, nil] entry
|
|
242
|
+
# @return [Hash, nil]
|
|
243
|
+
#
|
|
244
|
+
def serialize_and_write(entry) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
|
245
|
+
return nil unless entry
|
|
182
246
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
247
|
+
content = case @format
|
|
248
|
+
when "xml" then entry.to_xml(bibdata: true)
|
|
249
|
+
when "yaml" then entry.to_yaml
|
|
250
|
+
when "bibxml" then entry.to_rfcxml
|
|
251
|
+
else entry.send("to_#{@format}")
|
|
252
|
+
end
|
|
189
253
|
id = if entry.respond_to?(:docidentifier)
|
|
190
254
|
entry.docidentifier.detect { |i| i.type == "Internet-Draft" && i.primary }&.content
|
|
191
255
|
end
|
|
192
256
|
id ||= entry.docnumber || entry.formattedref.content
|
|
193
257
|
file = output_file(id)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
@files << file
|
|
198
|
-
end
|
|
199
|
-
File.write file, c, encoding: "UTF-8"
|
|
200
|
-
add_to_index entry, file
|
|
258
|
+
File.write file, content, encoding: "UTF-8"
|
|
259
|
+
primary = entry.docidentifier.detect(&:primary) || entry.docidentifier.first
|
|
260
|
+
{ docnumber: entry.docnumber, file: file, index_id: primary.content }
|
|
201
261
|
end
|
|
202
262
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
263
|
+
#
|
|
264
|
+
# Parent-only: dedupe-check `@files` and update `Relaton::Index`. Called
|
|
265
|
+
# serially after workers return so index updates are race-free.
|
|
266
|
+
#
|
|
267
|
+
def record_index_entry(result, check_duplicate: true)
|
|
268
|
+
if check_duplicate && @files.include?(result[:file])
|
|
269
|
+
Util.warn "File #{result[:file]} already exists. Document: #{result[:docnumber]}"
|
|
270
|
+
elsif check_duplicate
|
|
271
|
+
@files << result[:file]
|
|
272
|
+
end
|
|
273
|
+
index.add_or_update result[:index_id], result[:file]
|
|
207
274
|
end
|
|
208
275
|
|
|
209
276
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "cgi"
|
|
3
4
|
require_relative "rfc_index_namespace"
|
|
4
5
|
require_relative "is_also"
|
|
5
6
|
require_relative "author"
|
|
@@ -212,7 +213,7 @@ module Relaton
|
|
|
212
213
|
is_also.doc_id.map do |ref|
|
|
213
214
|
rfc_entry = rfc_index&.[](ref)
|
|
214
215
|
bibitem = rfc_entry ? rfc_entry.to_rfc_item(wg_names: wg_names) : build_minimal_bibitem(ref)
|
|
215
|
-
|
|
216
|
+
Relaton::Ietf::Relation.new(type: "includes", bibitem: bibitem)
|
|
216
217
|
end.compact
|
|
217
218
|
end
|
|
218
219
|
|
|
@@ -317,7 +318,7 @@ module Relaton
|
|
|
317
318
|
def build_rfc_abstract
|
|
318
319
|
return [] unless abstract&.p&.any?
|
|
319
320
|
|
|
320
|
-
content = abstract.p.map { |para| "<p>#{para.strip}</p>" }.join
|
|
321
|
+
content = abstract.p.map { |para| "<p>#{CGI.escapeHTML(para.strip)}</p>" }.join
|
|
321
322
|
[Bib::Abstract.new(content: content, language: "en", script: "Latn")]
|
|
322
323
|
end
|
|
323
324
|
|
|
@@ -339,7 +340,7 @@ module Relaton
|
|
|
339
340
|
def build_rfc_doc_relation(ref, type)
|
|
340
341
|
docid = Bib::Docidentifier.new(type: "IETF", content: ref, primary: true)
|
|
341
342
|
bibitem = ItemData.new(formattedref: Bib::Formattedref.new(content: ref), docidentifier: [docid])
|
|
342
|
-
|
|
343
|
+
Relaton::Ietf::Relation.new(type: type, bibitem: bibitem)
|
|
343
344
|
end
|
|
344
345
|
|
|
345
346
|
def build_rfc_status
|
data/lib/relaton/ietf/version.rb
CHANGED
data/relaton-ietf.gemspec
CHANGED
|
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
|
30
30
|
spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
|
|
31
31
|
|
|
32
32
|
spec.add_dependency "base64"
|
|
33
|
+
spec.add_dependency "parallel", "~> 1.26"
|
|
33
34
|
spec.add_dependency "relaton-bib", "~> 2.1.0"
|
|
34
35
|
spec.add_dependency "relaton-core", "~> 0.0.13"
|
|
35
36
|
spec.add_dependency "relaton-index", "~> 0.2.3"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-ietf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.1.
|
|
4
|
+
version: 2.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -24,6 +24,20 @@ dependencies:
|
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: parallel
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.26'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.26'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: relaton-bib
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|