safe_image 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,93 +1,574 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "rexml/document"
4
- require "rexml/formatters/default"
5
3
  require "pathname"
6
4
  require "tempfile"
5
+ require_relative "svg_css"
7
6
 
8
7
  module SafeImage
8
+ # Allowlist SVG sanitizer. Parses untrusted SVG with Nokogiri (libxml2) and
9
+ # builds a *fresh* output tree containing only allowlisted elements,
10
+ # attributes, and namespaces — the svg-hush model. Nothing the attacker
11
+ # declared is ever carried over: there is no "remove the bad parts" step
12
+ # because only explicitly allowed content is ever added, so the output's
13
+ # element/attribute/namespace sets are a closed allowlist by construction. A
14
+ # bug therefore tends to drop legitimate content (fails closed, visible)
15
+ # rather than leak attacker content (fails open, silent).
16
+ #
17
+ # The structural caps and the byte-level encoding/DOCTYPE/PI rejection run
18
+ # first, in SvgMetadata, on the raw bytes — libxml2 only ever sees input that
19
+ # already passed those gates, so its default internal-entity expansion is
20
+ # unreachable (a DOCTYPE is rejected before parsing).
9
21
  module SvgSanitizer
10
22
  ALLOWED_ELEMENTS = %w[
11
- svg g defs title desc path rect circle ellipse line polyline polygon text tspan
12
- linearGradient radialGradient stop clipPath mask pattern use symbol
23
+ svg g defs title desc path rect circle ellipse line polyline polygon text tspan textPath
24
+ linearGradient radialGradient stop clipPath mask pattern use symbol style
25
+ marker
13
26
  ].freeze
14
27
 
28
+ # Presentation attributes. The CSS-property names here are mirrored by
29
+ # SvgCss::ALLOWED_PROPERTIES (a test asserts the subset relationship) so a
30
+ # style="" / <style> declaration and its attribute twin are treated alike.
31
+ # Attribute values that may carry url() (fill, stroke, clip-path, mask,
32
+ # marker*) are constrained to #fragment references by dangerous_value?.
15
33
  ALLOWED_ATTRIBUTES = %w[
16
34
  id class x y x1 y1 x2 y2 cx cy r rx ry d points width height viewBox
17
35
  fill stroke stroke-width stroke-linecap stroke-linejoin stroke-miterlimit
18
36
  fill-rule clip-rule opacity fill-opacity stroke-opacity transform
19
37
  gradientUnits gradientTransform offset stop-color stop-opacity clip-path
20
38
  mask href xlink:href xmlns xmlns:xlink version preserveAspectRatio
21
- font-family font-size font-weight text-anchor
39
+ font-family font-size font-weight text-anchor style
40
+ color stroke-dasharray stroke-dashoffset vector-effect
41
+ marker marker-start marker-mid marker-end
42
+ markerWidth markerHeight refX refY orient markerUnits
43
+ display visibility overflow paint-order mix-blend-mode isolation
44
+ shape-rendering image-rendering color-interpolation
45
+ font-style font-variant font-stretch text-decoration
46
+ letter-spacing word-spacing dominant-baseline baseline-shift
47
+ writing-mode direction
22
48
  ].freeze
23
49
 
50
+ SVG_NAMESPACE = "http://www.w3.org/2000/svg"
51
+ XLINK_NAMESPACE = "http://www.w3.org/1999/xlink"
52
+
53
+ # Caller namespace tokens must already be valid id/class idents so the
54
+ # prefixed ids and the scope class are well-formed; rejected, not coerced,
55
+ # so two distinct tokens can never collapse to one.
56
+ NAMESPACE_PATTERN = /\A[A-Za-z][A-Za-z0-9_-]*\z/.freeze
57
+
58
+ # A url() referencing a same-document fragment, with optional matching
59
+ # quotes, any case, surrounding whitespace allowed. This is the ONLY url()
60
+ # form dangerous_value? keeps in a presentation attribute, and exactly the
61
+ # form the namespace rewrite targets (capturing the fragment name) — so the
62
+ # validation and rewrite paths cannot disagree and leave a reference bare.
63
+ URL_FRAGMENT_REF = /url\(\s*(['"]?)#([A-Za-z][\w.-]*)\1\s*\)/i.freeze
64
+
65
+ # ARIA attributes whose values are an id or a space-separated list of ids.
66
+ # They are references like href/url(#…) and must move into the namespace too,
67
+ # or they bind to a host element (or dangle) when the SVG is inlined.
68
+ ARIA_IDREF_ATTRIBUTES = %w[
69
+ aria-activedescendant aria-controls aria-describedby aria-details
70
+ aria-errormessage aria-flowto aria-labelledby aria-owns
71
+ ].freeze
72
+
73
+ # Elements that instantiate a referenced <marker> once per vertex, and the
74
+ # attributes that carry the marker reference. Used by the render-expansion
75
+ # bound.
76
+ REPLICATING_ELEMENTS = %w[path line polyline polygon].freeze
77
+ MARKER_ATTRIBUTES = %w[marker marker-start marker-mid marker-end].freeze
78
+
79
+ # Sentinel marking id_namespace as unsupplied, so omitting it raises an
80
+ # instructive error rather than silently picking a safety posture.
81
+ NAMESPACE_REQUIRED = Object.new.freeze
82
+
24
83
  module_function
25
84
 
26
- def sanitize!(path, max_pixels: nil)
85
+ # Sanitizes an SVG in place to the element/attribute/CSS allowlists above.
86
+ #
87
+ # id_namespace is required and forces a deliberate choice of where the
88
+ # output may be used — there is no silently-wrong default:
89
+ #
90
+ # * a stable, per-document String (e.g. the upload sha) makes the output safe
91
+ # to inline into an HTML DOM: every id and every reference to it (href,
92
+ # url(#...), CSS) is prefixed with the namespace, and every <style> selector
93
+ # is scoped under the root, so a preserved <style> cannot reach the host
94
+ # page's cascade and ids cannot clobber host ids. Re-sanitising with the
95
+ # same namespace is a fixed point.
96
+ # * :standalone produces document-safe output (no namespacing) for SVGs that
97
+ # are only ever served as an external `<img src>`, CSS url(...), or their
98
+ # own file — never spliced into an HTML DOM.
99
+ def sanitize!(path, max_pixels: nil, id_namespace: NAMESPACE_REQUIRED)
100
+ require "nokogiri"
101
+
102
+ namespace = resolve_namespace(id_namespace)
27
103
  path = Pathname.new(SvgMetadata.safe_svg_path(path))
104
+
105
+ # Byte-level encoding/DOCTYPE/PI rejection and the streaming structural caps
106
+ # run on the raw bytes before any DOM parse, so libxml2 only ever sees input
107
+ # those gates already accepted.
108
+ xml = SvgMetadata.read_svg(path.to_s)
109
+ _root_name, root_attributes = SvgMetadata.scan_svg!(xml)
28
110
  begin
29
- SvgMetadata.dimensions(path.to_s, max_pixels: max_pixels)
111
+ SvgMetadata.dimensions_from_attributes(root_attributes, max_pixels: max_pixels)
30
112
  rescue InvalidImageError => e
31
113
  raise unless e.message.include?("dimensions are missing")
32
114
  end
33
- doc = SvgMetadata.parse(path.to_s)
34
115
 
35
- clean = REXML::Document.new
36
- clean.add_element(sanitize_element!(doc.root.deep_clone))
116
+ in_doc = parse(xml)
117
+ in_root = in_doc.root
118
+ raise InvalidImageError, "SVG root required" unless in_root && allowed_element?(in_root)
119
+
120
+ out_doc = Nokogiri::XML::Document.new
121
+ # Establish the output root before building anything under it: the root
122
+ # carries the only namespace declarations we ever emit (svg always, xlink
123
+ # lazily), and the recursive build references out_doc.root when an
124
+ # xlink:href survives, so it must exist first.
125
+ out_root = out_doc.create_element(in_root.name)
126
+ out_doc.root = out_root
127
+ out_root.namespace = svg_namespace(out_doc, out_root)
128
+ populate_element(in_root, out_root, out_doc, namespace)
129
+
130
+ # Reference namespacing runs as one pass over the fully-assembled tree, not
131
+ # during the build: an attribute's namespace only resolves once its element
132
+ # is attached under the root that declares the prefix, so href/url rewrites
133
+ # must happen after the whole tree exists.
134
+ namespace_tree!(out_root, namespace) if namespace
135
+
136
+ reject_render_expansion!(out_root)
37
137
 
38
- out = +""
39
- formatter = REXML::Formatters::Default.new
40
- formatter.write(clean, out)
41
- atomic_write(path, out)
138
+ if namespace
139
+ neutralize_root_overflow!(out_root)
140
+ apply_scope_class!(out_root, namespace) if contains_style?(out_root)
141
+ end
142
+
143
+ atomic_write(path, serialize(out_root))
42
144
  { format: "svg", sanitized: true, filesize: File.size(path.to_s) }
43
- rescue REXML::ParseException => e
145
+ end
146
+
147
+ # Hardened parse: no network, no external DTD load. DOCTYPE is already
148
+ # rejected upstream, so entity expansion is unreachable; NONET is set
149
+ # defensively regardless.
150
+ def parse(xml)
151
+ Nokogiri::XML(xml) do |config|
152
+ config.options = Nokogiri::XML::ParseOptions::NONET
153
+ end
154
+ rescue Nokogiri::XML::SyntaxError => e
44
155
  raise InvalidImageError, "invalid SVG: #{e.message}"
45
156
  end
46
157
 
47
- def sanitize_element!(element)
48
- element.children.to_a.each do |child|
158
+ # Builds the sanitized counterpart of an allowed input element as a child of
159
+ # out_parent: the node is created, bound to the SVG namespace, and attached
160
+ # *before* it is populated, so attribute namespaces (xlink) resolve against
161
+ # the root's declarations during the build rather than on a detached node.
162
+ def build_element(in_element, out_parent, out_doc, namespace)
163
+ out = out_doc.create_element(in_element.name)
164
+ out.namespace = svg_namespace(out_doc, out)
165
+ out_parent.add_child(out)
166
+ populate_element(in_element, out, out_doc, namespace)
167
+ out
168
+ end
169
+
170
+ # Fills an already-created, already-attached output node from its input
171
+ # counterpart: sanitized attributes, then sanitized children. <style>
172
+ # collapses to its sanitized stylesheet text; CDATA becomes escaped text;
173
+ # disallowed children are simply never created. Reference namespacing is NOT
174
+ # done here — it is a separate post-build pass over the assembled tree.
175
+ def populate_element(in_element, out, out_doc, namespace)
176
+ if in_element.name == "style"
177
+ build_style_element(in_element, out, namespace)
178
+ return
179
+ end
180
+
181
+ copy_attributes(in_element, out, out_doc, namespace)
182
+
183
+ in_element.children.each do |child|
49
184
  case child
50
- when REXML::Element
51
- if ALLOWED_ELEMENTS.include?(child.name)
52
- sanitize_element!(child)
53
- else
54
- child.remove
55
- end
56
- when REXML::CData
57
- child.replace_with(REXML::Text.new(child.value.to_s))
58
- when REXML::Text
59
- # Text is serialized escaped by REXML::Formatters::Default.
60
- else
61
- child.remove
185
+ when Nokogiri::XML::CDATA, Nokogiri::XML::Text
186
+ out.add_child(out_doc.create_text_node(child.content.to_s))
187
+ when Nokogiri::XML::Element
188
+ build_element(child, out, out_doc, namespace) if allowed_element?(child)
62
189
  end
63
190
  end
191
+ end
192
+
193
+ # A <style> element collapses to a single text node holding the sanitized
194
+ # stylesheet. When nothing survives, the element itself is removed from the
195
+ # output entirely (not left as an empty <style/>), matching the policy that a
196
+ # stylesheet which fails closed leaves no trace. Element attributes (type,
197
+ # media) are never copied: the output is plain CSS.
198
+ def build_style_element(in_element, out, namespace)
199
+ css = in_element.children.select { |c| c.text? || c.cdata? }.map(&:content).join
200
+ sanitized = SvgCss.sanitize_stylesheet(css, namespace: namespace)
201
+ if sanitized
202
+ out.add_child(out.document.create_text_node(sanitized))
203
+ else
204
+ out.unlink
205
+ end
206
+ end
207
+
208
+ # Copies only the attributes the policy allows, applying the same value
209
+ # checks regardless of how the attribute is named. The style="" attribute is
210
+ # the one whose value is CSS: it is rewritten to the sanitized subset (or
211
+ # dropped). Reference namespacing happens later, over the assembled tree.
212
+ def copy_attributes(in_element, out, out_doc, namespace)
213
+ style_value = nil
214
+
215
+ in_element.attribute_nodes.each do |attr|
216
+ next if namespace_declaration?(attr)
64
217
 
65
- attributes_to_delete = []
66
- element.attributes.each_attribute do |attr|
67
- name = attr.name.to_s
68
218
  value = attr.value.to_s
69
- allowed = ALLOWED_ATTRIBUTES.include?(name) || name.start_with?("aria-")
70
- if !allowed || name.downcase.start_with?("on") || dangerous_value?(value)
71
- attributes_to_delete << name
219
+
220
+ if attr_expanded_name(attr) == "style"
221
+ sanitized = SvgCss.sanitize_declarations(value, namespace: namespace)
222
+ style_value = sanitized if sanitized
223
+ next
72
224
  end
225
+
226
+ next unless allowed_attribute?(attr)
227
+ next if event_attribute?(attr)
228
+ next if dangerous_value?(value)
229
+ next if invalid_href?(attr)
230
+
231
+ set_attribute(out, out_doc, attr, value)
232
+ end
233
+
234
+ out["style"] = style_value if style_value
235
+ end
236
+
237
+ # Applies reference namespacing to every element in the assembled output
238
+ # tree. Done after the build so each attribute's namespace has resolved.
239
+ def namespace_tree!(element, namespace)
240
+ namespace_references!(element, namespace)
241
+ element.children.each do |child|
242
+ namespace_tree!(child, namespace) if child.is_a?(Nokogiri::XML::Element)
73
243
  end
74
- attributes_to_delete.each { |name| element.delete_attribute(name) }
244
+ end
75
245
 
76
- %w[href xlink:href].each do |href|
77
- next unless element.attributes[href]
78
- element.delete_attribute(href) unless element.attributes[href].to_s.start_with?("#")
246
+ # Sets an attribute on the output node, preserving the xlink namespace for
247
+ # xlink:href and writing everything else as a plain (no-namespace) attribute.
248
+ # The xlink prefix is declared lazily on the output root the first time an
249
+ # xlink:href actually survives, so we never emit an unused xmlns:xlink.
250
+ def set_attribute(out, out_doc, attr, value)
251
+ if href_attribute?(attr) && attr.namespace&.href == XLINK_NAMESPACE
252
+ ensure_xlink(out_doc)
253
+ out["xlink:href"] = value
254
+ else
255
+ out[attr.name.to_s] = value
256
+ end
257
+ end
258
+
259
+ def ensure_xlink(out_doc)
260
+ root = out_doc.root
261
+ return if root.namespace_definitions.any? { |n| n.prefix == "xlink" }
262
+
263
+ root.add_namespace_definition("xlink", XLINK_NAMESPACE)
264
+ end
265
+
266
+ def svg_namespace(out_doc, out)
267
+ root = out_doc.root
268
+ existing = root&.namespace_definitions&.find { |n| n.prefix.nil? && n.href == SVG_NAMESPACE }
269
+ existing || out.add_namespace_definition(nil, SVG_NAMESPACE)
270
+ end
271
+
272
+ # --- policy predicates against Nokogiri's attribute/namespace model ---
273
+
274
+ def allowed_element?(element)
275
+ href = element.namespace&.href.to_s
276
+ ALLOWED_ELEMENTS.include?(element.name.to_s) && (href.empty? || href == SVG_NAMESPACE)
277
+ end
278
+
279
+ # An attribute is allowed when it is a recognised href (plain or xlink) or a
280
+ # no-namespace attribute on the allowlist (or an aria-* attribute). A prefixed
281
+ # attribute in any other namespace is never copied.
282
+ def allowed_attribute?(attr)
283
+ return true if href_attribute?(attr)
284
+ return false unless attr.namespace.nil?
285
+
286
+ name = attr.name.to_s
287
+ ALLOWED_ATTRIBUTES.include?(name) || name.start_with?("aria-")
288
+ end
289
+
290
+ def namespace_declaration?(attr)
291
+ # Nokogiri does not surface xmlns declarations through attribute_nodes, but
292
+ # guard defensively in case a libxml2 build does.
293
+ name = attr.name.to_s
294
+ name == "xmlns" || attr.namespace&.prefix == "xmlns" || name.start_with?("xmlns")
295
+ end
296
+
297
+ def event_attribute?(attr)
298
+ attr.name.to_s.downcase.start_with?("on")
299
+ end
300
+
301
+ def href_attribute?(attr)
302
+ name = attr.name.to_s
303
+ return true if name == "href" && attr.namespace.nil?
304
+
305
+ name == "href" && attr.namespace&.href == XLINK_NAMESPACE
306
+ end
307
+
308
+ def invalid_href?(attr)
309
+ href_attribute?(attr) && !attr.value.to_s.start_with?("#")
310
+ end
311
+
312
+ def attr_expanded_name(attr)
313
+ prefix = attr.namespace&.prefix
314
+ prefix ? "#{prefix}:#{attr.name}" : attr.name.to_s
315
+ end
316
+
317
+ # Prefixes this element's own id and every same-document reference it makes
318
+ # (href/xlink:href fragments, ARIA IDREFs, and url(#...) in any attribute)
319
+ # with the namespace, keeping definitions and references consistent. The
320
+ # style attribute's url()s are already namespaced by SvgCss.
321
+ def namespace_references!(element, namespace)
322
+ if (id = element["id"])
323
+ element["id"] = SvgCss.apply_namespace(namespace, id)
324
+ end
325
+
326
+ # Class names are attacker-chosen references into the host stylesheet:
327
+ # inlined, a bare class="modal fixed" would pick up the page's framework
328
+ # CSS (an overlay/UI-redress vector). Namespace each token — paired with the
329
+ # matching rewrite of `.class` selectors — so internal class styling still
330
+ # matches while host selectors never do.
331
+ if (klass = element["class"])
332
+ tokens = klass.split(/\s+/).reject(&:empty?)
333
+ element["class"] = tokens.map { |t| SvgCss.apply_namespace(namespace, t) }.join(" ") unless tokens.empty?
334
+ end
335
+
336
+ element.attribute_nodes.each do |attr|
337
+ next unless href_attribute?(attr)
338
+ value = attr.value.to_s
339
+ next unless value.start_with?("#")
340
+ attr.value = "##{SvgCss.apply_namespace(namespace, value[1..])}"
341
+ end
342
+
343
+ ARIA_IDREF_ATTRIBUTES.each do |aria|
344
+ value = element[aria]
345
+ next unless value
346
+ ids = value.split(/\s+/).reject(&:empty?)
347
+ next if ids.empty?
348
+ element[aria] = ids.map { |ref| SvgCss.apply_namespace(namespace, ref) }.join(" ")
349
+ end
350
+
351
+ element.attribute_nodes.each do |attr|
352
+ name = attr.name.to_s
353
+ next if name == "style"
354
+ value = attr.value.to_s
355
+ next unless value.match?(/url\(/i)
356
+ rewritten = value.gsub(URL_FRAGMENT_REF) { "url(##{SvgCss.apply_namespace(namespace, Regexp.last_match(2))})" }
357
+ attr.value = rewritten if rewritten != value
358
+ end
359
+ end
360
+
361
+ # Maps the required id_namespace argument to a namespace token, or nil for an
362
+ # explicit standalone document. Forces the caller to decide, and rejects (does
363
+ # not coerce) malformed tokens so two distinct callers' values can never
364
+ # collapse to the same namespace.
365
+ def resolve_namespace(id_namespace)
366
+ case id_namespace
367
+ when :standalone
368
+ nil
369
+ when String
370
+ return id_namespace if id_namespace.match?(NAMESPACE_PATTERN)
371
+ raise ArgumentError,
372
+ "id_namespace: #{id_namespace.inspect} is not a valid namespace. It must be a letter " \
373
+ "followed by letters/digits/_/- (e.g. prefix a sha like \"u<sha>\")."
374
+ else
375
+ raise ArgumentError,
376
+ "id_namespace: is required. Pass a stable, per-document String (e.g. the upload sha) " \
377
+ "to make the output safe to inline into HTML, or :standalone if it is only ever served " \
378
+ "as an <img>/CSS-url/file and never spliced into a page's DOM."
379
+ end
380
+ end
381
+
382
+ # Anchors a namespaced document's scoped <style> selectors: they target
383
+ # `.<ns>-scope <selector>`, so the root must carry that class for them to
384
+ # match its own content (and nothing else). Idempotent.
385
+ def apply_scope_class!(root, namespace)
386
+ scope = "#{namespace}-scope"
387
+ classes = root["class"].to_s.split(/\s+/)
388
+ return if classes.include?(scope)
389
+ root["class"] = (classes << scope).join(" ").strip
390
+ end
391
+
392
+ def contains_style?(element)
393
+ return true if element.name == "style"
394
+ element.children.any? { |child| child.is_a?(Nokogiri::XML::Element) && contains_style?(child) }
395
+ end
396
+
397
+ # In inline (namespaced) mode the root <svg> must clip to its own box, or a
398
+ # tiny declared viewport with oversized content becomes a full-page overlay.
399
+ # Drop any overflow the SVG set on the root so it falls back to the
400
+ # outermost-svg default (hidden); inner elements keep overflow (markers need
401
+ # it) and the root clip bounds them all. Standalone output is untouched — an
402
+ # <img>/CSS-url resource is already clipped by its own element box.
403
+ def neutralize_root_overflow!(root)
404
+ root.remove_attribute("overflow")
405
+ style = root["style"]
406
+ return unless style
407
+
408
+ kept = style.split(";").reject { |declaration| declaration.start_with?("overflow:") }
409
+ if kept.empty?
410
+ root.remove_attribute("style")
411
+ else
412
+ root["style"] = kept.join(";")
79
413
  end
80
- element
81
414
  end
82
415
 
83
416
  def dangerous_value?(value)
417
+ # Presentation attributes are fed to browsers' CSS value parsers, where
418
+ # escapes re-form tokens after the pattern checks below (\6c is "l", so
419
+ # ur\6c( becomes url(). No allowlisted attribute legitimately contains
420
+ # a backslash; reject outright.
421
+ return true if value.to_s.include?("\\")
422
+
84
423
  normalized = value.to_s.gsub(/[\u0000-\u0020\u007f]+/, "")
85
424
  return true if normalized.match?(/(?:javascript|data):/i)
86
425
 
87
- normalized.scan(/url\(([^)]*)\)/i).any? do |match|
88
- inner = match.first.to_s.delete(%q{'"})
89
- !inner.match?(/\A#[A-Za-z][\w.-]*\z/)
426
+ # var()/env()/attr() resolve against the host page or element context, so an
427
+ # inlined SVG could pull in host-controlled values the sanitizer never saw
428
+ # — including a url() the namespace rewrite missed. They are inert in
429
+ # standalone output anyway (no custom properties survive sanitisation), so
430
+ # reject them in every mode.
431
+ return true if normalized.match?(/(?:var|env|attr)\s*\(/i)
432
+
433
+ # Every url(...) must be a same-document fragment in the canonical form the
434
+ # namespace rewrite handles. Strip those, then fail closed if any url(
435
+ # introducer remains: this catches external URLs, mismatched quotes, AND
436
+ # unterminated/malformed url( that a complete-match scan would miss and
437
+ # browsers may still parse leniently. Keeps validation and the rewrite in
438
+ # lockstep, so no bare reference can survive in namespaced output.
439
+ value.to_s.gsub(URL_FRAGMENT_REF, "").match?(/url\s*\(/i)
440
+ end
441
+
442
+ # Bounds the render tree the document instantiates. The structural caps in
443
+ # SvgMetadata bound the *source* document, but several features replicate
444
+ # referenced content at render time, so the sanitized output is walked once
445
+ # and the instantiated render cost is accumulated against a single budget:
446
+ #
447
+ # * a <use href="#id"> charges a deep copy of its target subtree — a chain
448
+ # of doubling groups fans a few dozen source nodes into billions (the
449
+ # "use bomb"), and a cyclic reference expands forever.
450
+ # * a path/line/polyline/polygon that references a <marker> charges
451
+ # (vertex count) x (referenced marker subtree cost): a marker is drawn
452
+ # once per vertex, so a dense `d` (~200k vertices in 1 MB) times a
453
+ # non-trivial marker is a linear-but-huge "draw bomb" the node/byte/
454
+ # element caps cannot see.
455
+ #
456
+ # The walk is memoised on subtree cost so it cannot itself blow up, with an
457
+ # active-path set so a reference cycle is caught rather than recursed into.
458
+ # Marker references are resolved against the same id map as <use>, so a marker
459
+ # that contains <use> (or another marked path) composes naturally.
460
+ def reject_render_expansion!(root)
461
+ id_map = {}
462
+ collect_ids(root, id_map)
463
+ subtree_render_cost(root, id_map, {}, {})
464
+ end
465
+
466
+ def collect_ids(element, id_map)
467
+ id = element["id"]
468
+ id_map[id.to_s] = element if id && !id_map.key?(id.to_s)
469
+ element.children.each do |child|
470
+ collect_ids(child, id_map) if child.is_a?(Nokogiri::XML::Element)
471
+ end
472
+ end
473
+
474
+ def subtree_render_cost(element, id_map, memo, active)
475
+ key = element.object_id
476
+ cached = memo[key]
477
+ return cached if cached
478
+ raise InvalidImageError, "SVG reference cycle" if active[key]
479
+
480
+ active[key] = true
481
+ cost = 1
482
+ element.children.each do |child|
483
+ next unless child.is_a?(Nokogiri::XML::Element)
484
+
485
+ cost += subtree_render_cost(child, id_map, memo, active)
486
+ check_render_expansion!(cost)
487
+ end
488
+
489
+ if use_element?(element) && (target = use_target(element, id_map))
490
+ cost += subtree_render_cost(target, id_map, memo, active)
491
+ check_render_expansion!(cost)
492
+ end
493
+
494
+ cost += marker_render_cost(element, id_map, memo, active)
495
+ check_render_expansion!(cost)
496
+
497
+ active.delete(key)
498
+ memo[key] = cost
499
+ end
500
+
501
+ # A marked path instantiates each referenced marker once per vertex. Charge
502
+ # (vertex count) x (sum of distinct referenced marker subtree costs). The
503
+ # marker subtree cost goes through subtree_render_cost too, so the active-path
504
+ # set still catches a marker that references itself, and a marker containing a
505
+ # <use> bomb is counted. Vertices are over-counted (see path_vertex_count),
506
+ # which only makes the bound more conservative.
507
+ def marker_render_cost(element, id_map, memo, active)
508
+ return 0 unless REPLICATING_ELEMENTS.include?(element.name.to_s)
509
+
510
+ markers = referenced_markers(element, id_map)
511
+ return 0 if markers.empty?
512
+
513
+ vertices = path_vertex_count(element)
514
+ return 0 if vertices.zero?
515
+
516
+ per_vertex = markers.sum { |marker| subtree_render_cost(marker, id_map, memo, active) }
517
+ vertices * per_vertex
518
+ end
519
+
520
+ # Collects the distinct marker subtrees a geometry element references, via
521
+ # the marker-* presentation attributes or their style="" twins. Only the
522
+ # canonical url(#fragment) form survives sanitisation, so one regex over the
523
+ # marker attributes and the style attribute finds every reference.
524
+ def referenced_markers(element, id_map)
525
+ sources = MARKER_ATTRIBUTES.map { |name| element[name].to_s }
526
+ sources << element["style"].to_s
527
+ targets = []
528
+ sources.each do |value|
529
+ value.scan(URL_FRAGMENT_REF) { targets << id_map[Regexp.last_match(2)] }
90
530
  end
531
+ targets.compact.uniq
532
+ end
533
+
534
+ # A deliberate upper bound on the vertices a geometry element renders, never
535
+ # an exact parse: every run of digits in `d`/`points` is counted as a
536
+ # coordinate, so the result is >= the real vertex count. Over-counting only
537
+ # tightens the bound; under-counting would be the bug, so we never try to be
538
+ # precise about path command grammar.
539
+ def path_vertex_count(element)
540
+ geometry = "#{element['d']} #{element['points']}"
541
+ count = geometry.scan(/\d+(?:\.\d+)?/).length
542
+ count.zero? ? 0 : count + 1
543
+ end
544
+
545
+ def check_render_expansion!(cost)
546
+ return if cost <= SvgMetadata::MAX_SVG_RENDER_UNITS
547
+
548
+ raise LimitError, "SVG render expansion exceeds #{SvgMetadata::MAX_SVG_RENDER_UNITS} rendered nodes"
549
+ end
550
+
551
+ def use_element?(element)
552
+ element.name.to_s == "use" && (element.namespace&.href.to_s.empty? || element.namespace&.href == SVG_NAMESPACE)
553
+ end
554
+
555
+ def use_target(element, id_map)
556
+ ref = nil
557
+ element.attribute_nodes.each do |attr|
558
+ next unless href_attribute?(attr)
559
+
560
+ ref = attr.value.to_s
561
+ break
562
+ end
563
+ return unless ref&.start_with?("#")
564
+
565
+ id_map[ref[1..]]
566
+ end
567
+
568
+ def serialize(root)
569
+ options = Nokogiri::XML::Node::SaveOptions::AS_XML |
570
+ Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
571
+ root.to_xml(save_with: options)
91
572
  end
92
573
 
93
574
  def atomic_write(path, content)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SafeImage
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end