svg_conform 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +79 -13
- data/docs/sax_validation_mode.adoc +576 -0
- data/lib/svg_conform/document.rb +4 -1
- data/lib/svg_conform/document_analyzer.rb +118 -0
- data/lib/svg_conform/errors/base.rb +58 -0
- data/lib/svg_conform/errors/validation_issue.rb +245 -0
- data/lib/svg_conform/errors/validation_notice.rb +30 -0
- data/lib/svg_conform/interfaces/requirement_interface.rb +177 -0
- data/lib/svg_conform/remediations/namespace_remediation.rb +2 -1
- data/lib/svg_conform/requirements/base_requirement.rb +16 -0
- data/lib/svg_conform/requirements/color_restrictions_requirement.rb +19 -61
- data/lib/svg_conform/requirements/font_family_requirement.rb +14 -24
- data/lib/svg_conform/requirements/no_external_css_requirement.rb +19 -77
- data/lib/svg_conform/requirements/viewbox_required_requirement.rb +16 -72
- data/lib/svg_conform/sax_validation_handler.rb +15 -7
- data/lib/svg_conform/validation/error_tracker.rb +103 -0
- data/lib/svg_conform/validation/node_id_manager.rb +35 -0
- data/lib/svg_conform/validation/structural_invalidity_tracker.rb +63 -0
- data/lib/svg_conform/validation_context.rb +112 -459
- data/lib/svg_conform/validation_issue.rb +12 -0
- data/lib/svg_conform/version.rb +1 -1
- data/lib/svg_conform.rb +2 -0
- metadata +11 -2
- data/lib/svg_conform/fast_document_analyzer.rb +0 -82
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
= Writing SAX-Compatible Requirements
|
|
2
|
+
:toc: left
|
|
3
|
+
:toclevels: 4
|
|
4
|
+
:sectlinks:
|
|
5
|
+
:sectanchors:
|
|
6
|
+
:source-highlighter: rouge
|
|
7
|
+
|
|
8
|
+
== Purpose
|
|
9
|
+
|
|
10
|
+
This guide explains how to write custom requirements that work with SvgConform's SAX-based validation mode. All requirements must support SAX validation for memory-safe processing of large SVG files.
|
|
11
|
+
|
|
12
|
+
== Architecture: Two Operating Modes
|
|
13
|
+
|
|
14
|
+
SvgConform uses two distinct modes:
|
|
15
|
+
|
|
16
|
+
[cols="1,3,3"]
|
|
17
|
+
|===
|
|
18
|
+
| Mode | Purpose | Characteristics
|
|
19
|
+
|
|
20
|
+
| **SAX Validation**
|
|
21
|
+
| Validate structure and rules
|
|
22
|
+
| ✅ Constant memory usage +
|
|
23
|
+
✅ Handles any file size +
|
|
24
|
+
✅ Streaming parser +
|
|
25
|
+
❌ Read-only, cannot modify
|
|
26
|
+
|
|
27
|
+
| **DOM Remediation**
|
|
28
|
+
| Modify and fix documents
|
|
29
|
+
| ✅ Full document tree +
|
|
30
|
+
✅ XPath queries +
|
|
31
|
+
✅ Can modify structure +
|
|
32
|
+
❌ Memory scales with size
|
|
33
|
+
|===
|
|
34
|
+
|
|
35
|
+
**Key Point**: Requirements run in SAX mode (validation), remediations run in DOM mode (fixes).
|
|
36
|
+
|
|
37
|
+
== Why SAX for Requirements?
|
|
38
|
+
|
|
39
|
+
=== Memory Safety
|
|
40
|
+
|
|
41
|
+
SAX (Simple API for XML) is a streaming parser that processes XML sequentially:
|
|
42
|
+
|
|
43
|
+
[source,ruby]
|
|
44
|
+
----
|
|
45
|
+
# SAX: Process one element at a time
|
|
46
|
+
start_element("svg", {"width" => "500"})
|
|
47
|
+
start_element("rect", {"fill" => "red"})
|
|
48
|
+
end_element("rect")
|
|
49
|
+
end_element("svg")
|
|
50
|
+
|
|
51
|
+
# Memory usage: O(1) - constant, regardless of file size
|
|
52
|
+
----
|
|
53
|
+
|
|
54
|
+
[source,ruby]
|
|
55
|
+
----
|
|
56
|
+
# DOM: Load entire tree into memory
|
|
57
|
+
document = Nokogiri::XML(svg_content)
|
|
58
|
+
# Memory usage: O(n) - grows with file size
|
|
59
|
+
# 100MB file = 100MB+ in memory
|
|
60
|
+
----
|
|
61
|
+
|
|
62
|
+
=== Real-World Impact
|
|
63
|
+
|
|
64
|
+
[cols="1,2,2"]
|
|
65
|
+
|===
|
|
66
|
+
| File Size | SAX Performance | DOM Performance
|
|
67
|
+
|
|
68
|
+
| 10 KB
|
|
69
|
+
| < 10ms
|
|
70
|
+
| < 5ms
|
|
71
|
+
|
|
72
|
+
| 100 KB
|
|
73
|
+
| 10-50ms
|
|
74
|
+
| 20-100ms
|
|
75
|
+
|
|
76
|
+
| 1 MB
|
|
77
|
+
| 50-200ms
|
|
78
|
+
| 200-500ms
|
|
79
|
+
|
|
80
|
+
| 10 MB
|
|
81
|
+
| 500ms-1s
|
|
82
|
+
| **HANGS** (can crash)
|
|
83
|
+
|
|
84
|
+
| 100 MB
|
|
85
|
+
| 5-10s
|
|
86
|
+
| **CRASHES** (out of memory)
|
|
87
|
+
|===
|
|
88
|
+
|
|
89
|
+
=== Conclusion
|
|
90
|
+
|
|
91
|
+
SAX validation ensures the library can handle ANY file size safely. This is critical for:
|
|
92
|
+
|
|
93
|
+
- CI/CD pipelines processing many files
|
|
94
|
+
- Production systems with unknown file sizes
|
|
95
|
+
- User-facing applications that must not crash
|
|
96
|
+
|
|
97
|
+
== BaseRequirement API
|
|
98
|
+
|
|
99
|
+
Every requirement inherits from [`BaseRequirement`](../lib/svg_conform/requirements/base_requirement.rb) and must implement SAX validation methods.
|
|
100
|
+
|
|
101
|
+
=== Required Methods
|
|
102
|
+
|
|
103
|
+
==== validate_sax_element(element, context)
|
|
104
|
+
|
|
105
|
+
Called for each element as it's parsed.
|
|
106
|
+
|
|
107
|
+
**Parameters**:
|
|
108
|
+
|
|
109
|
+
* `element` ([`ElementProxy`](../lib/svg_conform/element_proxy.rb)) - Lightweight element representation
|
|
110
|
+
* `context` ([`ValidationContext`](../lib/svg_conform/validation_context.rb)) - Validation state
|
|
111
|
+
|
|
112
|
+
**ElementProxy Properties**:
|
|
113
|
+
|
|
114
|
+
[source,ruby]
|
|
115
|
+
----
|
|
116
|
+
element.name # String: element name (e.g., "rect", "svg")
|
|
117
|
+
element.attributes # Hash: all attributes {name => value}
|
|
118
|
+
element.parent # ElementProxy: parent element (or nil for root)
|
|
119
|
+
element.position # Integer: sibling position (1-based)
|
|
120
|
+
element.path # Array: XPath-style path ["svg[1]", "g[1]", "rect[2]"]
|
|
121
|
+
element.namespace # String: element namespace (or nil)
|
|
122
|
+
----
|
|
123
|
+
|
|
124
|
+
**Example**:
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
class RectMustHaveWidthRequirement < BaseRequirement
|
|
129
|
+
def validate_sax_element(element, context)
|
|
130
|
+
return unless element.name == "rect"
|
|
131
|
+
|
|
132
|
+
unless element.attributes["width"]
|
|
133
|
+
context.add_error(
|
|
134
|
+
requirement_id: id,
|
|
135
|
+
message: "rect elements must have a width attribute",
|
|
136
|
+
node: element,
|
|
137
|
+
severity: :error
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
----
|
|
143
|
+
|
|
144
|
+
=== Optional Methods
|
|
145
|
+
|
|
146
|
+
==== collect_sax_data(element, context)
|
|
147
|
+
|
|
148
|
+
Called during parsing to collect data for deferred validation.
|
|
149
|
+
|
|
150
|
+
Use this when you need to validate relationships between elements after seeing the entire document (e.g., ID references).
|
|
151
|
+
|
|
152
|
+
**Example**:
|
|
153
|
+
|
|
154
|
+
[source,ruby]
|
|
155
|
+
----
|
|
156
|
+
class NoDuplicateIdsRequirement < BaseRequirement
|
|
157
|
+
def collect_sax_data(element, context)
|
|
158
|
+
if id_value = element.attributes["id"]
|
|
159
|
+
context.data[:ids] ||= []
|
|
160
|
+
context.data[:ids] << {
|
|
161
|
+
id: id_value,
|
|
162
|
+
element: element.name,
|
|
163
|
+
line: element.line_number
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def validate_sax_complete(context)
|
|
169
|
+
ids = context.data[:ids] || []
|
|
170
|
+
id_counts = Hash.new(0)
|
|
171
|
+
|
|
172
|
+
ids.each do |entry|
|
|
173
|
+
id_counts[entry[:id]] += 1
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
id_counts.each do |id, count|
|
|
177
|
+
next if count == 1
|
|
178
|
+
|
|
179
|
+
context.add_error(
|
|
180
|
+
requirement_id: self.id,
|
|
181
|
+
message: "Duplicate ID '#{id}' found #{count} times",
|
|
182
|
+
severity: :error
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def needs_deferred_validation?
|
|
188
|
+
true
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
----
|
|
192
|
+
|
|
193
|
+
==== validate_sax_complete(context)
|
|
194
|
+
|
|
195
|
+
Called after document parsing completes. Use for validation that requires complete document knowledge.
|
|
196
|
+
|
|
197
|
+
==== needs_deferred_validation?
|
|
198
|
+
|
|
199
|
+
Return `true` if your requirement uses `validate_sax_complete()`.
|
|
200
|
+
|
|
201
|
+
[source,ruby]
|
|
202
|
+
----
|
|
203
|
+
def needs_deferred_validation?
|
|
204
|
+
true # This requirement needs full document context
|
|
205
|
+
end
|
|
206
|
+
----
|
|
207
|
+
|
|
208
|
+
== Validation Patterns
|
|
209
|
+
|
|
210
|
+
=== Pattern 1: Immediate Element Validation
|
|
211
|
+
|
|
212
|
+
Check each element as it's parsed. Most requirements use this pattern.
|
|
213
|
+
|
|
214
|
+
[source,ruby]
|
|
215
|
+
----
|
|
216
|
+
class NoScriptElementsRequirement < BaseRequirement
|
|
217
|
+
def validate_sax_element(element, context)
|
|
218
|
+
if element.name == "script"
|
|
219
|
+
context.add_error(
|
|
220
|
+
requirement_id: id,
|
|
221
|
+
message: "script elements are not allowed",
|
|
222
|
+
node: element,
|
|
223
|
+
severity: :error
|
|
224
|
+
)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
----
|
|
229
|
+
|
|
230
|
+
=== Pattern 2: Attribute Validation
|
|
231
|
+
|
|
232
|
+
Validate attributes on specific elements.
|
|
233
|
+
|
|
234
|
+
[source,ruby]
|
|
235
|
+
----
|
|
236
|
+
class ViewboxRequiredRequirement < BaseRequirement
|
|
237
|
+
def validate_sax_element(element, context)
|
|
238
|
+
return unless element.name == "svg"
|
|
239
|
+
|
|
240
|
+
unless element.attributes["viewBox"]
|
|
241
|
+
context.add_error(
|
|
242
|
+
requirement_id: id,
|
|
243
|
+
message: "svg element must have viewBox attribute",
|
|
244
|
+
node: element,
|
|
245
|
+
severity: :error
|
|
246
|
+
)
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
----
|
|
251
|
+
|
|
252
|
+
=== Pattern 3: Parent-Child Validation
|
|
253
|
+
|
|
254
|
+
Check element context using parent reference.
|
|
255
|
+
|
|
256
|
+
[source,ruby]
|
|
257
|
+
----
|
|
258
|
+
class TextOnlyInTextElementsRequirement < BaseRequirement
|
|
259
|
+
def validate_sax_element(element, context)
|
|
260
|
+
return unless element.name == "text"
|
|
261
|
+
return unless element.parent
|
|
262
|
+
|
|
263
|
+
unless ["text", "tspan", "tref"].include?(element.parent.name)
|
|
264
|
+
context.add_error(
|
|
265
|
+
requirement_id: id,
|
|
266
|
+
message: "text elements must be inside text containers",
|
|
267
|
+
node: element,
|
|
268
|
+
severity: :error
|
|
269
|
+
)
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
----
|
|
274
|
+
|
|
275
|
+
=== Pattern 4: Deferred Cross-Reference Validation
|
|
276
|
+
|
|
277
|
+
Collect data during parsing, validate after complete.
|
|
278
|
+
|
|
279
|
+
[source,ruby]
|
|
280
|
+
----
|
|
281
|
+
class IdReferenceRequirement < BaseRequirement
|
|
282
|
+
def collect_sax_data(element, context)
|
|
283
|
+
# Collect all defined IDs
|
|
284
|
+
if id_value = element.attributes["id"]
|
|
285
|
+
context.data[:defined_ids] ||= Set.new
|
|
286
|
+
context.data[:defined_ids] << id_value
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Collect all ID references
|
|
290
|
+
%w[href xlink:href fill stroke].each do |attr|
|
|
291
|
+
if value = element.attributes[attr]
|
|
292
|
+
if value.start_with?("#")
|
|
293
|
+
context.data[:id_refs] ||= []
|
|
294
|
+
context.data[:id_refs] << {
|
|
295
|
+
ref: value[1..-1],
|
|
296
|
+
element: element.name,
|
|
297
|
+
attribute: attr,
|
|
298
|
+
line: element.line_number
|
|
299
|
+
}
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def validate_sax_complete(context)
|
|
306
|
+
defined_ids = context.data[:defined_ids] || Set.new
|
|
307
|
+
id_refs = context.data[:id_refs] || []
|
|
308
|
+
|
|
309
|
+
id_refs.each do |ref_info|
|
|
310
|
+
unless defined_ids.include?(ref_info[:ref])
|
|
311
|
+
context.add_error(
|
|
312
|
+
requirement_id: id,
|
|
313
|
+
message: "Reference to undefined ID: #{ref_info[:ref]}",
|
|
314
|
+
severity: :error
|
|
315
|
+
)
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def needs_deferred_validation?
|
|
321
|
+
true
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
----
|
|
325
|
+
|
|
326
|
+
== Common Pitfalls
|
|
327
|
+
|
|
328
|
+
=== Pitfall 1: Assuming DOM Availability
|
|
329
|
+
|
|
330
|
+
[source,ruby]
|
|
331
|
+
----
|
|
332
|
+
# ❌ WRONG: ElementProxy is not a DOM node
|
|
333
|
+
def validate_sax_element(element, context)
|
|
334
|
+
rects = element.xpath("//rect") # FAILS: No such method
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# ✅ CORRECT: Use element properties
|
|
338
|
+
def validate_sax_element(element, context)
|
|
339
|
+
if element.name == "rect"
|
|
340
|
+
# Process this rect
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
----
|
|
344
|
+
|
|
345
|
+
=== Pitfall 2: Attempting Modification
|
|
346
|
+
|
|
347
|
+
[source,ruby]
|
|
348
|
+
----
|
|
349
|
+
# ❌ WRONG: SAX is read-only
|
|
350
|
+
def validate_sax_element(element, context)
|
|
351
|
+
element.attributes["fill"] = "black" # FAILS: Cannot modify
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# ✅ CORRECT: Report error only (fixing happens in remediations)
|
|
355
|
+
def validate_sax_element(element, context)
|
|
356
|
+
if element.attributes["fill"] == "red"
|
|
357
|
+
context.add_error(
|
|
358
|
+
requirement_id: id,
|
|
359
|
+
message: "Red fill not allowed",
|
|
360
|
+
node: element
|
|
361
|
+
)
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
----
|
|
365
|
+
|
|
366
|
+
=== Pitfall 3: Deep Parent Navigation
|
|
367
|
+
|
|
368
|
+
[source,ruby]
|
|
369
|
+
----
|
|
370
|
+
# ❌ FRAGILE: Long parent chains break easily
|
|
371
|
+
def validate_sax_element(element, context)
|
|
372
|
+
if element.parent.parent.parent.name == "svg"
|
|
373
|
+
# This breaks if nesting changes
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# ✅ BETTER: Use path or collect data
|
|
378
|
+
def validate_sax_element(element, context)
|
|
379
|
+
# Check if "svg" is in the ancestor path
|
|
380
|
+
if element.path.any? { |p| p.start_with?("svg[") }
|
|
381
|
+
# More robust
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
----
|
|
385
|
+
|
|
386
|
+
=== Pitfall 4: State Between Elements
|
|
387
|
+
|
|
388
|
+
[source,ruby]
|
|
389
|
+
----
|
|
390
|
+
# ❌ WRONG: Instance variables leak between validations
|
|
391
|
+
def validate_sax_element(element, context)
|
|
392
|
+
@count ||= 0 # DANGER: Shared across files in batch
|
|
393
|
+
@count += 1
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# ✅ CORRECT: Use context.data for per-document state
|
|
397
|
+
def validate_sax_element(element, context)
|
|
398
|
+
context.data[:count] ||= 0
|
|
399
|
+
context.data[:count] += 1
|
|
400
|
+
end
|
|
401
|
+
----
|
|
402
|
+
|
|
403
|
+
== Complete Example
|
|
404
|
+
|
|
405
|
+
Here's a complete requirement that uses multiple patterns:
|
|
406
|
+
|
|
407
|
+
[source,ruby]
|
|
408
|
+
----
|
|
409
|
+
module SvgConform
|
|
410
|
+
module Requirements
|
|
411
|
+
class CompleteExampleRequirement < BaseRequirement
|
|
412
|
+
# Configuration
|
|
413
|
+
attribute :max_depth, :integer, default: 10
|
|
414
|
+
attribute :allowed_elements, :string, collection: true
|
|
415
|
+
|
|
416
|
+
# Immediate validation: Check depth
|
|
417
|
+
def
|
|
418
|
+
|
|
419
|
+
validate_sax_element(element, context)
|
|
420
|
+
depth = element.path.length
|
|
421
|
+
|
|
422
|
+
if depth > max_depth
|
|
423
|
+
context.add_error(
|
|
424
|
+
requirement_id: id,
|
|
425
|
+
message: "Nesting too deep: #{depth} levels (max: #{max_depth})",
|
|
426
|
+
node: element,
|
|
427
|
+
severity: :warning
|
|
428
|
+
)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
# Check allowed elements
|
|
432
|
+
if allowed_elements&.any? && !allowed_elements.include?(element.name)
|
|
433
|
+
context.add_error(
|
|
434
|
+
requirement_id: id,
|
|
435
|
+
message: "Element '#{element.name}' not allowed",
|
|
436
|
+
node: element,
|
|
437
|
+
severity: :error
|
|
438
|
+
)
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Data collection: Track element counts
|
|
443
|
+
def collect_sax_data(element, context)
|
|
444
|
+
context.data[:element_counts] ||= Hash.new(0)
|
|
445
|
+
context.data[:element_counts][element.name] += 1
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# Deferred validation: Report statistics
|
|
449
|
+
def validate_sax_complete(context)
|
|
450
|
+
counts = context.data[:element_counts] || {}
|
|
451
|
+
total = counts.values.sum
|
|
452
|
+
|
|
453
|
+
context.add_info(
|
|
454
|
+
requirement_id: id,
|
|
455
|
+
message: "Document has #{total} elements: #{counts.inspect}"
|
|
456
|
+
)
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def needs_deferred_validation?
|
|
460
|
+
true
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
----
|
|
466
|
+
|
|
467
|
+
== Testing Your Requirement
|
|
468
|
+
|
|
469
|
+
[source,ruby]
|
|
470
|
+
----
|
|
471
|
+
RSpec.describe CompleteExampleRequirement do
|
|
472
|
+
let(:requirement) { described_class.new(max_depth: 5) }
|
|
473
|
+
let(:profile) { SvgConform::Profile.new.tap { |p| p.add_requirement(requirement) } }
|
|
474
|
+
let(:validator) { SvgConform::Validator.new }
|
|
475
|
+
|
|
476
|
+
it "validates depth" do
|
|
477
|
+
svg = <<~SVG
|
|
478
|
+
<svg>
|
|
479
|
+
<g><g><g><g><g><g>
|
|
480
|
+
<rect/>
|
|
481
|
+
</g></g></g></g></g></g>
|
|
482
|
+
</svg>
|
|
483
|
+
SVG
|
|
484
|
+
|
|
485
|
+
result = validator.validate(svg, profile: profile)
|
|
486
|
+
|
|
487
|
+
expect(result.valid?).to be false
|
|
488
|
+
expect(result.errors.first.message).to include("Nesting too deep")
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
----
|
|
492
|
+
|
|
493
|
+
== Performance Tips
|
|
494
|
+
|
|
495
|
+
1. **Minimize work per element**: SAX calls your method for EVERY element
|
|
496
|
+
2. **Use early returns**: Skip irrelevant elements quickly
|
|
497
|
+
3. **Collect minimal data**: Only store what you need for deferred validation
|
|
498
|
+
4. **Use Sets for lookups**: `Set#include?` is O(1) vs Array O(n)
|
|
499
|
+
5. **Avoid regex when possible**: String comparison is faster
|
|
500
|
+
|
|
501
|
+
[source,ruby]
|
|
502
|
+
----
|
|
503
|
+
# ✅ FAST: Early return
|
|
504
|
+
def validate_sax_element(element, context)
|
|
505
|
+
return unless element.name == "rect" # Skip 99% of elements
|
|
506
|
+
# Complex logic only for rect elements
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# ✅ FAST: Set for lookups
|
|
510
|
+
ALLOWED = Set.new(%w[svg g rect circle path])
|
|
511
|
+
|
|
512
|
+
def validate_sax_element(element, context)
|
|
513
|
+
unless ALLOWED.include?(element.name)
|
|
514
|
+
# Error
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
----
|
|
518
|
+
|
|
519
|
+
== Migration from DOM Requirements
|
|
520
|
+
|
|
521
|
+
If you have an existing DOM-based requirement:
|
|
522
|
+
|
|
523
|
+
[source,ruby]
|
|
524
|
+
----
|
|
525
|
+
# OLD: DOM-based requirement
|
|
526
|
+
class OldRequirement < BaseRequirement
|
|
527
|
+
def check(node, context)
|
|
528
|
+
return unless node.name == "rect"
|
|
529
|
+
# DOM-specific logic
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
----
|
|
533
|
+
|
|
534
|
+
Convert to SAX:
|
|
535
|
+
|
|
536
|
+
[source,ruby]
|
|
537
|
+
----
|
|
538
|
+
# NEW: SAX-compatible requirement
|
|
539
|
+
class NewRequirement < BaseRequirement
|
|
540
|
+
# Keep original DOM logic for backward compatibility
|
|
541
|
+
def check(node, context)
|
|
542
|
+
return unless node.name == "rect"
|
|
543
|
+
# DOM logic still works for tests
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# Add SAX support
|
|
547
|
+
def validate_sax_element(element, context)
|
|
548
|
+
return unless element.name == "rect"
|
|
549
|
+
# SAX logic (usually same logic, different API)
|
|
550
|
+
end
|
|
551
|
+
end
|
|
552
|
+
----
|
|
553
|
+
|
|
554
|
+
== Conclusion
|
|
555
|
+
|
|
556
|
+
Writing SAX-compatible requirements ensures your validation:
|
|
557
|
+
|
|
558
|
+
- ✅ Works with files of any size
|
|
559
|
+
- ✅ Never hangs or crashes on large files
|
|
560
|
+
- ✅ Integrates seamlessly with SvgConform
|
|
561
|
+
- ✅ Provides consistent performance
|
|
562
|
+
|
|
563
|
+
Remember:
|
|
564
|
+
|
|
565
|
+
1. Implement `validate_sax_element()` for immediate checks
|
|
566
|
+
2. Use `collect_sax_data()` + `validate_sax_complete()` for deferred validation
|
|
567
|
+
3. Never assume DOM availability in requirements
|
|
568
|
+
4. Test with large files to verify performance
|
|
569
|
+
|
|
570
|
+
== Additional Resources
|
|
571
|
+
|
|
572
|
+
* link:../README.adoc[Main Documentation]
|
|
573
|
+
* link:api_reference.adoc[API Reference]
|
|
574
|
+
* link:requirements.adoc[Built-in Requirements]
|
|
575
|
+
* link:../lib/svg_conform/requirements/base_requirement.rb[BaseRequirement Source]
|
|
576
|
+
* link:../lib/svg_conform/element_proxy.rb[ElementProxy Source]
|
data/lib/svg_conform/document.rb
CHANGED
|
@@ -97,7 +97,10 @@ module SvgConform
|
|
|
97
97
|
# Clean up unused namespace declarations if marked by remediations
|
|
98
98
|
if instance_variable_defined?(:@unused_namespace_prefixes)
|
|
99
99
|
prefixes = @unused_namespace_prefixes
|
|
100
|
-
|
|
100
|
+
if prefixes && !prefixes.empty?
|
|
101
|
+
xml = remove_namespace_declarations(xml,
|
|
102
|
+
prefixes)
|
|
103
|
+
end
|
|
101
104
|
# Clear the marker after cleanup
|
|
102
105
|
remove_instance_variable(:@unused_namespace_prefixes)
|
|
103
106
|
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SvgConform
|
|
4
|
+
# Document analyzer that computes node IDs using forward counting
|
|
5
|
+
# to avoid expensive backward sibling traversal.
|
|
6
|
+
#
|
|
7
|
+
# Uses parent.children iteration to calculate position among siblings,
|
|
8
|
+
# which is more efficient than walking previous_sibling chain.
|
|
9
|
+
class DocumentAnalyzer
|
|
10
|
+
attr_reader :cache
|
|
11
|
+
|
|
12
|
+
def initialize(document)
|
|
13
|
+
@document = document
|
|
14
|
+
@cache = {}
|
|
15
|
+
populate_cache
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Get path-based ID for a node from cache
|
|
19
|
+
def get_node_id(node)
|
|
20
|
+
return nil unless node.respond_to?(:name) && node.name
|
|
21
|
+
|
|
22
|
+
# Return cached ID or compute on-demand
|
|
23
|
+
@cache[node.object_id] ||= compute_and_cache_path(node)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def populate_cache
|
|
29
|
+
parent_stack = []
|
|
30
|
+
counter_stack = [{}] # Stack of {element_name => count} hashes
|
|
31
|
+
|
|
32
|
+
@document.traverse do |node|
|
|
33
|
+
next unless node.respond_to?(:name) && node.name
|
|
34
|
+
|
|
35
|
+
# Detect parent changes by checking node.parent
|
|
36
|
+
current_parent = node.respond_to?(:parent) ? node.parent : nil
|
|
37
|
+
|
|
38
|
+
# Adjust stack based on actual parent
|
|
39
|
+
while parent_stack.size.positive? && !parent_stack.last.equal?(current_parent)
|
|
40
|
+
parent_stack.pop
|
|
41
|
+
counter_stack.pop
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# If we have a new parent level, push it (only if parent has a name)
|
|
45
|
+
if current_parent.respond_to?(:name) && current_parent.name &&
|
|
46
|
+
(parent_stack.empty? || !parent_stack.last.equal?(current_parent))
|
|
47
|
+
parent_stack.push(current_parent)
|
|
48
|
+
counter_stack.push({})
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Increment counter at current level
|
|
52
|
+
current_counters = counter_stack.last || {}
|
|
53
|
+
current_counters[node.name] ||= 0
|
|
54
|
+
current_counters[node.name] += 1
|
|
55
|
+
position = current_counters[node.name]
|
|
56
|
+
|
|
57
|
+
# Build and cache path (only include parents with valid names)
|
|
58
|
+
path_parts = parent_stack.map.with_index do |parent, idx|
|
|
59
|
+
next unless parent.respond_to?(:name) && parent.name
|
|
60
|
+
|
|
61
|
+
counters = counter_stack[idx + 1]
|
|
62
|
+
"#{parent.name}[#{counters[parent.name]}]"
|
|
63
|
+
end.compact
|
|
64
|
+
path_parts << "#{node.name}[#{position}]"
|
|
65
|
+
|
|
66
|
+
@cache[node.object_id] = "/#{path_parts.join('/')}"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def compute_and_cache_path(node)
|
|
71
|
+
path_parts = []
|
|
72
|
+
current = node
|
|
73
|
+
|
|
74
|
+
while current
|
|
75
|
+
if current.respond_to?(:name) && current.name
|
|
76
|
+
# Count position among siblings by iterating forward from parent
|
|
77
|
+
position = calculate_position_fast(current)
|
|
78
|
+
path_parts.unshift("#{current.name}[#{position}]")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
break unless current.respond_to?(:parent)
|
|
82
|
+
|
|
83
|
+
begin
|
|
84
|
+
current = current.parent
|
|
85
|
+
rescue NoMethodError
|
|
86
|
+
break
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
break unless current
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
"/#{path_parts.join('/')}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def calculate_position_fast(node)
|
|
96
|
+
return 1 unless node.respond_to?(:parent)
|
|
97
|
+
|
|
98
|
+
parent = begin
|
|
99
|
+
node.parent
|
|
100
|
+
rescue NoMethodError
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
return 1 unless parent.respond_to?(:children)
|
|
105
|
+
|
|
106
|
+
# Count this node's position among siblings with same name
|
|
107
|
+
position = 0
|
|
108
|
+
parent.children.each do |child|
|
|
109
|
+
next unless child.respond_to?(:name) && child.name == node.name
|
|
110
|
+
|
|
111
|
+
position += 1
|
|
112
|
+
break if child.equal?(node)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
position
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|