relaton-ietf 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca0c823ba49c0bf4b1f184a0172e38e525312171f55b1ee17a4bf611926d0477
4
- data.tar.gz: bc80ad922e473bf98c412a8dddd35b150093b0b16f18bb7c47f00c7c28983321
3
+ metadata.gz: 5a128dcfe956c56d598a18a28d3b463adc25db7d4d326fc00771a191e8e2ba75
4
+ data.tar.gz: 6dd1eb0d196aee09416e6aafcf766e648b95e7c203636fe0f99857859f380d38
5
5
  SHA512:
6
- metadata.gz: 7394ae875b66647a0d24d6509f266a6396343a5083fb6737c4fb4212fc8fc892aeb55a87faedccdf2b6711641e14f4619dfa8bf05ccae94fd4d39a1ad0132d2b
7
- data.tar.gz: 00a969e3ccfd8296de2417541c2a887e23767798645f8f1ea07321523ab626146e452759cbd6774c5e1a8aea188e3f2c8408f0fc119adb13c626a2f6ab9e5d04
6
+ metadata.gz: 41fdec2098e1fefc0b9a3e621c9bb9cefbbbb81df55df19c56ca010574c065ce27d2b1aa73d1507b825ee6605d1294c4b7901bc07b7af8653745bc054568b305
7
+ data.tar.gz: 50ce7e81d9cd37f51f05d5800f8fb912c51ca302875293b789baadd60a26af5a6c0987b4e0f06ce1dcf4f71df75a750a8e3a2affc56029102d4c11af9e660ad4
@@ -1,3 +1,5 @@
1
+ require "etc"
2
+ require "parallel"
1
3
  require "relaton/core"
2
4
  require_relative "../ietf"
3
5
  require_relative "bibxml_parser"
@@ -41,107 +43,152 @@ module Relaton
41
43
  end
42
44
 
43
45
  #
44
- # Fetches ietf-internet-drafts documents
46
+ # Fetches ietf-internet-drafts documents.
45
47
  #
46
- def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
47
- versions = Dir["bibxml-ids/*.xml"].each_with_object([]) do |path, vers|
48
- file = File.basename path, ".xml"
49
- draft = file.include?("D.draft-")
50
- /(?<ver>\d+)$/ =~ file if draft
51
- bib = BibXMLParser.parse(File.read(path, encoding: "UTF-8"))
52
- if ver
53
- version = Bib::Version.new(draft: ver)
54
- bib.version = [version]
55
- end
56
- if draft
57
- vers << { ref: file.sub(/^reference\.I-D\./, "").downcase, source: bib.source }
48
+ # Each work unit (one series, or one singleton XML) is processed
49
+ # end-to-end in a worker process: parse → link relations → serialize →
50
+ # write. Workers return Marshal-friendly index entries; the parent
51
+ # collects them and updates `Relaton::Index` and the duplicate-check set
52
+ # serially. Set `RELATON_IETF_PARALLEL_WORKERS=0` to force serial
53
+ # execution (useful for tests and debugging).
54
+ #
55
+ def fetch_ieft_internet_drafts
56
+ series_groups, singleton_paths = group_draft_paths
57
+
58
+ series_results = parallelize(series_groups.to_a) do |(series, paths_info)|
59
+ process_series(series, paths_info)
60
+ end.flatten(1)
61
+
62
+ singleton_results = parallelize(singleton_paths) do |path|
63
+ process_singleton(path)
64
+ end
65
+
66
+ (series_results + singleton_results).compact.each { |r| record_index_entry(r) }
67
+ end
68
+
69
+ #
70
+ # Run `block` once per item, in parallel worker processes when configured.
71
+ # `Parallel.map(items, in_processes: 0)` runs synchronously in the
72
+ # current process, which keeps tests deterministic and lets mocks work.
73
+ #
74
+ def parallelize(items, &block)
75
+ Parallel.map(items, in_processes: worker_count, &block)
76
+ end
77
+
78
+ def worker_count
79
+ ENV.fetch("RELATON_IETF_PARALLEL_WORKERS", Etc.nprocessors.to_s).to_i
80
+ end
81
+
82
+ #
83
+ # Filename-only scan: group versioned drafts by normalized series stem;
84
+ # everything else (non-versioned, non-`D.draft-`) goes to singletons.
85
+ # No XML parsing happens here — workers do that.
86
+ #
87
+ # @return [Array(Hash, Array<String>)]
88
+ # series_groups: { normalized_series => [{path, ver, ref}, ...] }
89
+ # singleton_paths: [path, ...]
90
+ #
91
+ def group_draft_paths
92
+ series_groups = {}
93
+ singleton_paths = []
94
+ Dir["bibxml-ids/*.xml"].each do |path|
95
+ file = File.basename(path, ".xml")
96
+ is_draft = file.include?("D.draft-")
97
+ ver = is_draft ? file[/(\d+)$/, 1] : nil
98
+ ref = file.sub(/^reference\.I-D\./, "").downcase
99
+ stem_match = is_draft && ver ? /^(draft-.+)-(\d{2})$/.match(ref) : nil
100
+ if stem_match
101
+ series = stem_match[1].gsub(/[.\s\/:-]+/, "-")
102
+ (series_groups[series] ||= []) << { path: path, ver: ver, ref: ref }
103
+ else
104
+ singleton_paths << path
58
105
  end
59
- save_doc bib
60
106
  end
61
- update_versions(versions) if versions.any? && @format != "bibxml"
62
- end
63
-
64
- #
65
- # Updates I-D's versions
66
- #
67
- # @param [Array<String>] versions list of versions
68
- #
69
- def update_versions(versions) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
70
- series = ""
71
- bib_versions = []
72
- Dir["#{@output}/*.#{@ext}"].each do |file|
73
- match = /(?<series>draft-.+)-(?<ver>\d{2})\.#{@ext}$/.match file
74
- if match
75
- if series != match[:series]
76
- bib_versions = versions.select { |v| v[:ref].downcase.gsub(/[.\s\/:-]+/, "-").match?(/^#{Regexp.quote match[:series]}-\d{2}/) }
77
- create_series match[:series], bib_versions
78
- series = match[:series]
79
- end
80
- lv = bib_versions.select { |v| v[:ref].match(/\d+$/).to_s.to_i < match[:ver].to_i }
81
- hv = bib_versions.select { |v| v[:ref].match(/\d+$/).to_s.to_i > match[:ver].to_i }
82
- if lv.any? || hv.any?
83
- bib = read_doc(file)
84
- bib.relation << version_relation(lv.last, "updates") if lv.any?
85
- bib.relation << version_relation(hv.first, "updatedBy") if hv.any?
86
- save_doc bib, check_duplicate: false
87
- end
107
+ [series_groups, singleton_paths]
108
+ end
109
+
110
+ #
111
+ # Worker: parse all files in a series, sort by version, append
112
+ # immediate-neighbor relations (skipped for bibxml), write each version
113
+ # and the un-versioned aggregator doc. Returns an array of index entries
114
+ # for the parent.
115
+ #
116
+ def process_series(series, paths_info)
117
+ sorted = paths_info.sort_by { |p| p[:ver].to_i }.map do |p|
118
+ bib = BibXMLParser.parse(File.read(p[:path], encoding: "UTF-8"))
119
+ bib.version = [Bib::Version.new(draft: p[:ver])]
120
+ p.merge(bib: bib, source: bib.source)
121
+ end
122
+ link_neighbor_relations(sorted) if @format != "bibxml"
123
+
124
+ results = sorted.map { |entry| serialize_and_write(entry[:bib]) }
125
+ results << serialize_and_write(build_unversioned_doc(series, sorted)) if @format != "bibxml"
126
+ results.compact
127
+ end
128
+
129
+ #
130
+ # Worker: parse + serialize + write a single non-grouped XML.
131
+ #
132
+ def process_singleton(path)
133
+ file = File.basename(path, ".xml")
134
+ is_draft = file.include?("D.draft-")
135
+ ver = is_draft ? file[/(\d+)$/, 1] : nil
136
+ bib = BibXMLParser.parse(File.read(path, encoding: "UTF-8"))
137
+ bib.version = [Bib::Version.new(draft: ver)] if ver
138
+ serialize_and_write(bib)
139
+ end
140
+
141
+ #
142
+ # Append immediate-neighbor `updates` / `updatedBy` relations in memory.
143
+ # Single-version series get no relations (no neighbors).
144
+ #
145
+ def link_neighbor_relations(sorted)
146
+ sorted.each_with_index do |entry, i|
147
+ if i.positive?
148
+ prev = sorted[i - 1]
149
+ entry[:bib].relation << version_relation({ ref: prev[:ref], source: prev[:source] }, "updates")
150
+ end
151
+ if i < sorted.size - 1
152
+ nxt = sorted[i + 1]
153
+ entry[:bib].relation << version_relation({ ref: nxt[:ref], source: nxt[:source] }, "updatedBy")
88
154
  end
89
155
  end
90
156
  end
91
157
 
92
158
  #
93
- # Create unversioned bibliographic item
159
+ # Build (but do not write) the un-versioned series aggregator doc with
160
+ # `includes` relations to every version. Uses the latest version's
161
+ # title/abstract from memory.
94
162
  #
95
- # @param [String] ref reference
96
- # @param [Array<String>] versions list of versions
163
+ # @return [Relaton::Ietf::ItemData, nil]
97
164
  #
98
- def create_series(ref, versions) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
99
- vs = versions.sort_by { |v| v[:ref].match(/\d+$/).to_s.to_i }
100
- if vs.empty?
101
- Util.warn "No versions found for #{ref}"
102
- return
165
+ def build_unversioned_doc(series, sorted)
166
+ if sorted.empty?
167
+ Util.warn "No versions found for #{series}"
168
+ return nil
103
169
  end
104
- file = output_file(vs.last[:ref])
105
- # return unless File.exist?(file)
106
-
107
- docid = Bib::Docidentifier.new(type: "Internet-Draft", content: ref, primary: true)
108
- rel = vs.map { |v| version_relation v, "includes" }
109
- last_v = Item.from_yaml(File.read(file, encoding: "UTF-8"))
110
- bib = ItemData.new(
111
- title: last_v.title, abstract: last_v.abstract, formattedref: Bib::Formattedref.new(content: ref),
170
+
171
+ last_v = sorted.last[:bib]
172
+ docid = Bib::Docidentifier.new(type: "Internet-Draft", content: series, primary: true)
173
+ rel = sorted.map { |e| version_relation({ ref: e[:ref], source: e[:source] }, "includes") }
174
+ ItemData.new(
175
+ title: last_v.title, abstract: last_v.abstract, formattedref: Bib::Formattedref.new(content: series),
112
176
  docidentifier: [docid], relation: rel
113
177
  )
114
- save_doc bib
115
178
  end
116
179
 
117
180
  #
118
181
  # Create bibitem relation
119
182
  #
120
- # @param [String] ref reference
183
+ # @param [Hash] ver version reference, { ref:, source: }
121
184
  # @param [String] type relation type
122
185
  #
123
- # @return [Relaton::Bib::Relation] relation
186
+ # @return [Relaton::Ietf::Relation] relation
124
187
  #
125
188
  def version_relation(ver, type)
126
189
  docid = Bib::Docidentifier.new(type: "Internet-Draft", content: ver[:ref], primary: true)
127
190
  bibitem = ItemData.new(formattedref: Bib::Formattedref.new(content: ver[:ref]), docidentifier: [docid], source: ver[:source])
128
- Bib::Relation.new(type: type, bibitem: bibitem)
129
- end
130
-
131
- #
132
- # Redad saved documents
133
- #
134
- # @param [String] file path to file
135
- #
136
- # @return [Relaton::Ietf::ItemData] bibliographic item
137
- #
138
- def read_doc(file)
139
- doc = File.read(file, encoding: "UTF-8")
140
- case @format
141
- when "xml" then Item.from_xml(doc)
142
- when "yaml" then Item.from_yaml(doc)
143
- else BibXMLParser.parse(doc)
144
- end
191
+ Relaton::Ietf::Relation.new(type: type, bibitem: bibitem)
145
192
  end
146
193
 
147
194
  #
@@ -172,38 +219,58 @@ module Relaton
172
219
  end
173
220
 
174
221
  #
175
- # Save document to file
222
+ # Save document to file (sequential path: serialize, write, index).
223
+ # Used by the rfcsubseries / rfc-entries fetchers; the I-D fetcher splits
224
+ # this into worker-safe `serialize_and_write` plus parent-only
225
+ # `record_index_entry` so the index is touched only in the main process.
176
226
  #
177
- # @param [Relaton::Ietf::Rfc::Entry, nil] rfc index entry
227
+ # @param [Relaton::Ietf::Rfc::Entry, nil] entry
178
228
  # @param [Boolean] check_duplicate check for duplicate
179
229
  #
180
- def save_doc(entry, check_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
181
- return unless entry
230
+ def save_doc(entry, check_duplicate: true)
231
+ result = serialize_and_write(entry)
232
+ record_index_entry(result, check_duplicate: check_duplicate) if result
233
+ end
234
+
235
+ #
236
+ # Worker-safe: serialize, compute output filename, write to disk, return
237
+ # a Marshal-friendly hash with the docid+file pair the parent needs to
238
+ # update `Relaton::Index` and `@files`. Does NOT touch instance state
239
+ # that has to stay consistent across workers (`@files`, the index).
240
+ #
241
+ # @param [#to_yaml, #to_xml, #to_rfcxml, nil] entry
242
+ # @return [Hash, nil]
243
+ #
244
+ def serialize_and_write(entry) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
245
+ return nil unless entry
182
246
 
183
- c = case @format
184
- when "xml" then entry.to_xml(bibdata: true)
185
- when "yaml" then entry.to_yaml
186
- when "bibxml" then entry.to_rfcxml
187
- else entry.send("to_#{@format}")
188
- end
247
+ content = case @format
248
+ when "xml" then entry.to_xml(bibdata: true)
249
+ when "yaml" then entry.to_yaml
250
+ when "bibxml" then entry.to_rfcxml
251
+ else entry.send("to_#{@format}")
252
+ end
189
253
  id = if entry.respond_to?(:docidentifier)
190
254
  entry.docidentifier.detect { |i| i.type == "Internet-Draft" && i.primary }&.content
191
255
  end
192
256
  id ||= entry.docnumber || entry.formattedref.content
193
257
  file = output_file(id)
194
- if check_duplicate && @files.include?(file)
195
- Util.warn "File #{file} already exists. Document: #{entry.docnumber}"
196
- elsif check_duplicate
197
- @files << file
198
- end
199
- File.write file, c, encoding: "UTF-8"
200
- add_to_index entry, file
258
+ File.write file, content, encoding: "UTF-8"
259
+ primary = entry.docidentifier.detect(&:primary) || entry.docidentifier.first
260
+ { docnumber: entry.docnumber, file: file, index_id: primary.content }
201
261
  end
202
262
 
203
- def add_to_index(entry, file)
204
- docid = entry.docidentifier.detect(&:primary)
205
- docid ||= entry.docidentifier.first
206
- index.add_or_update docid.content, file
263
+ #
264
+ # Parent-only: dedupe-check `@files` and update `Relaton::Index`. Called
265
+ # serially after workers return so index updates are race-free.
266
+ #
267
+ def record_index_entry(result, check_duplicate: true)
268
+ if check_duplicate && @files.include?(result[:file])
269
+ Util.warn "File #{result[:file]} already exists. Document: #{result[:docnumber]}"
270
+ elsif check_duplicate
271
+ @files << result[:file]
272
+ end
273
+ index.add_or_update result[:index_id], result[:file]
207
274
  end
208
275
 
209
276
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "cgi"
3
4
  require_relative "rfc_index_namespace"
4
5
  require_relative "is_also"
5
6
  require_relative "author"
@@ -212,7 +213,7 @@ module Relaton
212
213
  is_also.doc_id.map do |ref|
213
214
  rfc_entry = rfc_index&.[](ref)
214
215
  bibitem = rfc_entry ? rfc_entry.to_rfc_item(wg_names: wg_names) : build_minimal_bibitem(ref)
215
- Bib::Relation.new(type: "includes", bibitem: bibitem)
216
+ Relaton::Ietf::Relation.new(type: "includes", bibitem: bibitem)
216
217
  end.compact
217
218
  end
218
219
 
@@ -317,7 +318,7 @@ module Relaton
317
318
  def build_rfc_abstract
318
319
  return [] unless abstract&.p&.any?
319
320
 
320
- content = abstract.p.map { |para| "<p>#{para.strip}</p>" }.join
321
+ content = abstract.p.map { |para| "<p>#{CGI.escapeHTML(para.strip)}</p>" }.join
321
322
  [Bib::Abstract.new(content: content, language: "en", script: "Latn")]
322
323
  end
323
324
 
@@ -339,7 +340,7 @@ module Relaton
339
340
  def build_rfc_doc_relation(ref, type)
340
341
  docid = Bib::Docidentifier.new(type: "IETF", content: ref, primary: true)
341
342
  bibitem = ItemData.new(formattedref: Bib::Formattedref.new(content: ref), docidentifier: [docid])
342
- Bib::Relation.new(type: type, bibitem: bibitem)
343
+ Relaton::Ietf::Relation.new(type: type, bibitem: bibitem)
343
344
  end
344
345
 
345
346
  def build_rfc_status
@@ -1,5 +1,5 @@
1
1
  module Relaton
2
2
  module Ietf
3
- VERSION = "2.1.1".freeze
3
+ VERSION = "2.1.2".freeze
4
4
  end
5
5
  end
data/relaton-ietf.gemspec CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
31
31
 
32
32
  spec.add_dependency "base64"
33
+ spec.add_dependency "parallel", "~> 1.26"
33
34
  spec.add_dependency "relaton-bib", "~> 2.1.0"
34
35
  spec.add_dependency "relaton-core", "~> 0.0.13"
35
36
  spec.add_dependency "relaton-index", "~> 0.2.3"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ietf
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-06 00:00:00.000000000 Z
11
+ date: 2026-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: parallel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.26'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.26'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: relaton-bib
29
43
  requirement: !ruby/object:Gem::Requirement