safe_image 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +193 -0
- data/README.md +166 -11
- data/lib/safe_image/discourse_compat.rb +2 -13
- data/lib/safe_image/ico.rb +1 -1
- data/lib/safe_image/native.rb +24 -15
- data/lib/safe_image/optimizer.rb +79 -4
- data/lib/safe_image/processor.rb +1 -1
- data/lib/safe_image/remote.rb +174 -8
- data/lib/safe_image/runner.rb +9 -1
- data/lib/safe_image/sandbox.rb +41 -14
- data/lib/safe_image/svg_css.rb +314 -0
- data/lib/safe_image/svg_metadata.rb +179 -53
- data/lib/safe_image/svg_sanitizer.rb +524 -43
- data/lib/safe_image/version.rb +1 -1
- data/lib/safe_image/zygote.rb +619 -0
- data/lib/safe_image.rb +12 -0
- metadata +18 -2
|
@@ -1,93 +1,574 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "rexml/document"
|
|
4
|
-
require "rexml/formatters/default"
|
|
5
3
|
require "pathname"
|
|
6
4
|
require "tempfile"
|
|
5
|
+
require_relative "svg_css"
|
|
7
6
|
|
|
8
7
|
module SafeImage
|
|
8
|
+
# Allowlist SVG sanitizer. Parses untrusted SVG with Nokogiri (libxml2) and
|
|
9
|
+
# builds a *fresh* output tree containing only allowlisted elements,
|
|
10
|
+
# attributes, and namespaces — the svg-hush model. Nothing the attacker
|
|
11
|
+
# declared is ever carried over: there is no "remove the bad parts" step
|
|
12
|
+
# because only explicitly allowed content is ever added, so the output's
|
|
13
|
+
# element/attribute/namespace sets are a closed allowlist by construction. A
|
|
14
|
+
# bug therefore tends to drop legitimate content (fails closed, visible)
|
|
15
|
+
# rather than leak attacker content (fails open, silent).
|
|
16
|
+
#
|
|
17
|
+
# The structural caps and the byte-level encoding/DOCTYPE/PI rejection run
|
|
18
|
+
# first, in SvgMetadata, on the raw bytes — libxml2 only ever sees input that
|
|
19
|
+
# already passed those gates, so its default internal-entity expansion is
|
|
20
|
+
# unreachable (a DOCTYPE is rejected before parsing).
|
|
9
21
|
module SvgSanitizer
|
|
10
22
|
ALLOWED_ELEMENTS = %w[
|
|
11
|
-
svg g defs title desc path rect circle ellipse line polyline polygon text tspan
|
|
12
|
-
linearGradient radialGradient stop clipPath mask pattern use symbol
|
|
23
|
+
svg g defs title desc path rect circle ellipse line polyline polygon text tspan textPath
|
|
24
|
+
linearGradient radialGradient stop clipPath mask pattern use symbol style
|
|
25
|
+
marker
|
|
13
26
|
].freeze
|
|
14
27
|
|
|
28
|
+
# Presentation attributes. The CSS-property names here are mirrored by
|
|
29
|
+
# SvgCss::ALLOWED_PROPERTIES (a test asserts the subset relationship) so a
|
|
30
|
+
# style="" / <style> declaration and its attribute twin are treated alike.
|
|
31
|
+
# Attribute values that may carry url() (fill, stroke, clip-path, mask,
|
|
32
|
+
# marker*) are constrained to #fragment references by dangerous_value?.
|
|
15
33
|
ALLOWED_ATTRIBUTES = %w[
|
|
16
34
|
id class x y x1 y1 x2 y2 cx cy r rx ry d points width height viewBox
|
|
17
35
|
fill stroke stroke-width stroke-linecap stroke-linejoin stroke-miterlimit
|
|
18
36
|
fill-rule clip-rule opacity fill-opacity stroke-opacity transform
|
|
19
37
|
gradientUnits gradientTransform offset stop-color stop-opacity clip-path
|
|
20
38
|
mask href xlink:href xmlns xmlns:xlink version preserveAspectRatio
|
|
21
|
-
font-family font-size font-weight text-anchor
|
|
39
|
+
font-family font-size font-weight text-anchor style
|
|
40
|
+
color stroke-dasharray stroke-dashoffset vector-effect
|
|
41
|
+
marker marker-start marker-mid marker-end
|
|
42
|
+
markerWidth markerHeight refX refY orient markerUnits
|
|
43
|
+
display visibility overflow paint-order mix-blend-mode isolation
|
|
44
|
+
shape-rendering image-rendering color-interpolation
|
|
45
|
+
font-style font-variant font-stretch text-decoration
|
|
46
|
+
letter-spacing word-spacing dominant-baseline baseline-shift
|
|
47
|
+
writing-mode direction
|
|
22
48
|
].freeze
|
|
23
49
|
|
|
50
|
+
SVG_NAMESPACE = "http://www.w3.org/2000/svg"
|
|
51
|
+
XLINK_NAMESPACE = "http://www.w3.org/1999/xlink"
|
|
52
|
+
|
|
53
|
+
# Caller namespace tokens must already be valid id/class idents so the
|
|
54
|
+
# prefixed ids and the scope class are well-formed; rejected, not coerced,
|
|
55
|
+
# so two distinct tokens can never collapse to one.
|
|
56
|
+
NAMESPACE_PATTERN = /\A[A-Za-z][A-Za-z0-9_-]*\z/.freeze
|
|
57
|
+
|
|
58
|
+
# A url() referencing a same-document fragment, with optional matching
|
|
59
|
+
# quotes, any case, surrounding whitespace allowed. This is the ONLY url()
|
|
60
|
+
# form dangerous_value? keeps in a presentation attribute, and exactly the
|
|
61
|
+
# form the namespace rewrite targets (capturing the fragment name) — so the
|
|
62
|
+
# validation and rewrite paths cannot disagree and leave a reference bare.
|
|
63
|
+
URL_FRAGMENT_REF = /url\(\s*(['"]?)#([A-Za-z][\w.-]*)\1\s*\)/i.freeze
|
|
64
|
+
|
|
65
|
+
# ARIA attributes whose values are an id or a space-separated list of ids.
|
|
66
|
+
# They are references like href/url(#…) and must move into the namespace too,
|
|
67
|
+
# or they bind to a host element (or dangle) when the SVG is inlined.
|
|
68
|
+
ARIA_IDREF_ATTRIBUTES = %w[
|
|
69
|
+
aria-activedescendant aria-controls aria-describedby aria-details
|
|
70
|
+
aria-errormessage aria-flowto aria-labelledby aria-owns
|
|
71
|
+
].freeze
|
|
72
|
+
|
|
73
|
+
# Elements that instantiate a referenced <marker> once per vertex, and the
|
|
74
|
+
# attributes that carry the marker reference. Used by the render-expansion
|
|
75
|
+
# bound.
|
|
76
|
+
REPLICATING_ELEMENTS = %w[path line polyline polygon].freeze
|
|
77
|
+
MARKER_ATTRIBUTES = %w[marker marker-start marker-mid marker-end].freeze
|
|
78
|
+
|
|
79
|
+
# Sentinel marking id_namespace as unsupplied, so omitting it raises an
|
|
80
|
+
# instructive error rather than silently picking a safety posture.
|
|
81
|
+
NAMESPACE_REQUIRED = Object.new.freeze
|
|
82
|
+
|
|
24
83
|
module_function
|
|
25
84
|
|
|
26
|
-
|
|
85
|
+
# Sanitizes an SVG in place to the element/attribute/CSS allowlists above.
|
|
86
|
+
#
|
|
87
|
+
# id_namespace is required and forces a deliberate choice of where the
|
|
88
|
+
# output may be used — there is no silently-wrong default:
|
|
89
|
+
#
|
|
90
|
+
# * a stable, per-document String (e.g. the upload sha) makes the output safe
|
|
91
|
+
# to inline into an HTML DOM: every id and every reference to it (href,
|
|
92
|
+
# url(#...), CSS) is prefixed with the namespace, and every <style> selector
|
|
93
|
+
# is scoped under the root, so a preserved <style> cannot reach the host
|
|
94
|
+
# page's cascade and ids cannot clobber host ids. Re-sanitising with the
|
|
95
|
+
# same namespace is a fixed point.
|
|
96
|
+
# * :standalone produces document-safe output (no namespacing) for SVGs that
|
|
97
|
+
# are only ever served as an external `<img src>`, CSS url(...), or their
|
|
98
|
+
# own file — never spliced into an HTML DOM.
|
|
99
|
+
def sanitize!(path, max_pixels: nil, id_namespace: NAMESPACE_REQUIRED)
|
|
100
|
+
require "nokogiri"
|
|
101
|
+
|
|
102
|
+
namespace = resolve_namespace(id_namespace)
|
|
27
103
|
path = Pathname.new(SvgMetadata.safe_svg_path(path))
|
|
104
|
+
|
|
105
|
+
# Byte-level encoding/DOCTYPE/PI rejection and the streaming structural caps
|
|
106
|
+
# run on the raw bytes before any DOM parse, so libxml2 only ever sees input
|
|
107
|
+
# those gates already accepted.
|
|
108
|
+
xml = SvgMetadata.read_svg(path.to_s)
|
|
109
|
+
_root_name, root_attributes = SvgMetadata.scan_svg!(xml)
|
|
28
110
|
begin
|
|
29
|
-
SvgMetadata.
|
|
111
|
+
SvgMetadata.dimensions_from_attributes(root_attributes, max_pixels: max_pixels)
|
|
30
112
|
rescue InvalidImageError => e
|
|
31
113
|
raise unless e.message.include?("dimensions are missing")
|
|
32
114
|
end
|
|
33
|
-
doc = SvgMetadata.parse(path.to_s)
|
|
34
115
|
|
|
35
|
-
|
|
36
|
-
|
|
116
|
+
in_doc = parse(xml)
|
|
117
|
+
in_root = in_doc.root
|
|
118
|
+
raise InvalidImageError, "SVG root required" unless in_root && allowed_element?(in_root)
|
|
119
|
+
|
|
120
|
+
out_doc = Nokogiri::XML::Document.new
|
|
121
|
+
# Establish the output root before building anything under it: the root
|
|
122
|
+
# carries the only namespace declarations we ever emit (svg always, xlink
|
|
123
|
+
# lazily), and the recursive build references out_doc.root when an
|
|
124
|
+
# xlink:href survives, so it must exist first.
|
|
125
|
+
out_root = out_doc.create_element(in_root.name)
|
|
126
|
+
out_doc.root = out_root
|
|
127
|
+
out_root.namespace = svg_namespace(out_doc, out_root)
|
|
128
|
+
populate_element(in_root, out_root, out_doc, namespace)
|
|
129
|
+
|
|
130
|
+
# Reference namespacing runs as one pass over the fully-assembled tree, not
|
|
131
|
+
# during the build: an attribute's namespace only resolves once its element
|
|
132
|
+
# is attached under the root that declares the prefix, so href/url rewrites
|
|
133
|
+
# must happen after the whole tree exists.
|
|
134
|
+
namespace_tree!(out_root, namespace) if namespace
|
|
135
|
+
|
|
136
|
+
reject_render_expansion!(out_root)
|
|
37
137
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
138
|
+
if namespace
|
|
139
|
+
neutralize_root_overflow!(out_root)
|
|
140
|
+
apply_scope_class!(out_root, namespace) if contains_style?(out_root)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
atomic_write(path, serialize(out_root))
|
|
42
144
|
{ format: "svg", sanitized: true, filesize: File.size(path.to_s) }
|
|
43
|
-
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Hardened parse: no network, no external DTD load. DOCTYPE is already
|
|
148
|
+
# rejected upstream, so entity expansion is unreachable; NONET is set
|
|
149
|
+
# defensively regardless.
|
|
150
|
+
def parse(xml)
|
|
151
|
+
Nokogiri::XML(xml) do |config|
|
|
152
|
+
config.options = Nokogiri::XML::ParseOptions::NONET
|
|
153
|
+
end
|
|
154
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
44
155
|
raise InvalidImageError, "invalid SVG: #{e.message}"
|
|
45
156
|
end
|
|
46
157
|
|
|
47
|
-
|
|
48
|
-
|
|
158
|
+
# Builds the sanitized counterpart of an allowed input element as a child of
|
|
159
|
+
# out_parent: the node is created, bound to the SVG namespace, and attached
|
|
160
|
+
# *before* it is populated, so attribute namespaces (xlink) resolve against
|
|
161
|
+
# the root's declarations during the build rather than on a detached node.
|
|
162
|
+
def build_element(in_element, out_parent, out_doc, namespace)
|
|
163
|
+
out = out_doc.create_element(in_element.name)
|
|
164
|
+
out.namespace = svg_namespace(out_doc, out)
|
|
165
|
+
out_parent.add_child(out)
|
|
166
|
+
populate_element(in_element, out, out_doc, namespace)
|
|
167
|
+
out
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Fills an already-created, already-attached output node from its input
|
|
171
|
+
# counterpart: sanitized attributes, then sanitized children. <style>
|
|
172
|
+
# collapses to its sanitized stylesheet text; CDATA becomes escaped text;
|
|
173
|
+
# disallowed children are simply never created. Reference namespacing is NOT
|
|
174
|
+
# done here — it is a separate post-build pass over the assembled tree.
|
|
175
|
+
def populate_element(in_element, out, out_doc, namespace)
|
|
176
|
+
if in_element.name == "style"
|
|
177
|
+
build_style_element(in_element, out, namespace)
|
|
178
|
+
return
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
copy_attributes(in_element, out, out_doc, namespace)
|
|
182
|
+
|
|
183
|
+
in_element.children.each do |child|
|
|
49
184
|
case child
|
|
50
|
-
when
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
child.remove
|
|
55
|
-
end
|
|
56
|
-
when REXML::CData
|
|
57
|
-
child.replace_with(REXML::Text.new(child.value.to_s))
|
|
58
|
-
when REXML::Text
|
|
59
|
-
# Text is serialized escaped by REXML::Formatters::Default.
|
|
60
|
-
else
|
|
61
|
-
child.remove
|
|
185
|
+
when Nokogiri::XML::CDATA, Nokogiri::XML::Text
|
|
186
|
+
out.add_child(out_doc.create_text_node(child.content.to_s))
|
|
187
|
+
when Nokogiri::XML::Element
|
|
188
|
+
build_element(child, out, out_doc, namespace) if allowed_element?(child)
|
|
62
189
|
end
|
|
63
190
|
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# A <style> element collapses to a single text node holding the sanitized
|
|
194
|
+
# stylesheet. When nothing survives, the element itself is removed from the
|
|
195
|
+
# output entirely (not left as an empty <style/>), matching the policy that a
|
|
196
|
+
# stylesheet which fails closed leaves no trace. Element attributes (type,
|
|
197
|
+
# media) are never copied: the output is plain CSS.
|
|
198
|
+
def build_style_element(in_element, out, namespace)
|
|
199
|
+
css = in_element.children.select { |c| c.text? || c.cdata? }.map(&:content).join
|
|
200
|
+
sanitized = SvgCss.sanitize_stylesheet(css, namespace: namespace)
|
|
201
|
+
if sanitized
|
|
202
|
+
out.add_child(out.document.create_text_node(sanitized))
|
|
203
|
+
else
|
|
204
|
+
out.unlink
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Copies only the attributes the policy allows, applying the same value
|
|
209
|
+
# checks regardless of how the attribute is named. The style="" attribute is
|
|
210
|
+
# the one whose value is CSS: it is rewritten to the sanitized subset (or
|
|
211
|
+
# dropped). Reference namespacing happens later, over the assembled tree.
|
|
212
|
+
def copy_attributes(in_element, out, out_doc, namespace)
|
|
213
|
+
style_value = nil
|
|
214
|
+
|
|
215
|
+
in_element.attribute_nodes.each do |attr|
|
|
216
|
+
next if namespace_declaration?(attr)
|
|
64
217
|
|
|
65
|
-
attributes_to_delete = []
|
|
66
|
-
element.attributes.each_attribute do |attr|
|
|
67
|
-
name = attr.name.to_s
|
|
68
218
|
value = attr.value.to_s
|
|
69
|
-
|
|
70
|
-
if
|
|
71
|
-
|
|
219
|
+
|
|
220
|
+
if attr_expanded_name(attr) == "style"
|
|
221
|
+
sanitized = SvgCss.sanitize_declarations(value, namespace: namespace)
|
|
222
|
+
style_value = sanitized if sanitized
|
|
223
|
+
next
|
|
72
224
|
end
|
|
225
|
+
|
|
226
|
+
next unless allowed_attribute?(attr)
|
|
227
|
+
next if event_attribute?(attr)
|
|
228
|
+
next if dangerous_value?(value)
|
|
229
|
+
next if invalid_href?(attr)
|
|
230
|
+
|
|
231
|
+
set_attribute(out, out_doc, attr, value)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
out["style"] = style_value if style_value
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Applies reference namespacing to every element in the assembled output
|
|
238
|
+
# tree. Done after the build so each attribute's namespace has resolved.
|
|
239
|
+
def namespace_tree!(element, namespace)
|
|
240
|
+
namespace_references!(element, namespace)
|
|
241
|
+
element.children.each do |child|
|
|
242
|
+
namespace_tree!(child, namespace) if child.is_a?(Nokogiri::XML::Element)
|
|
73
243
|
end
|
|
74
|
-
|
|
244
|
+
end
|
|
75
245
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
246
|
+
# Sets an attribute on the output node, preserving the xlink namespace for
|
|
247
|
+
# xlink:href and writing everything else as a plain (no-namespace) attribute.
|
|
248
|
+
# The xlink prefix is declared lazily on the output root the first time an
|
|
249
|
+
# xlink:href actually survives, so we never emit an unused xmlns:xlink.
|
|
250
|
+
def set_attribute(out, out_doc, attr, value)
|
|
251
|
+
if href_attribute?(attr) && attr.namespace&.href == XLINK_NAMESPACE
|
|
252
|
+
ensure_xlink(out_doc)
|
|
253
|
+
out["xlink:href"] = value
|
|
254
|
+
else
|
|
255
|
+
out[attr.name.to_s] = value
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def ensure_xlink(out_doc)
|
|
260
|
+
root = out_doc.root
|
|
261
|
+
return if root.namespace_definitions.any? { |n| n.prefix == "xlink" }
|
|
262
|
+
|
|
263
|
+
root.add_namespace_definition("xlink", XLINK_NAMESPACE)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def svg_namespace(out_doc, out)
|
|
267
|
+
root = out_doc.root
|
|
268
|
+
existing = root&.namespace_definitions&.find { |n| n.prefix.nil? && n.href == SVG_NAMESPACE }
|
|
269
|
+
existing || out.add_namespace_definition(nil, SVG_NAMESPACE)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# --- policy predicates against Nokogiri's attribute/namespace model ---
|
|
273
|
+
|
|
274
|
+
def allowed_element?(element)
|
|
275
|
+
href = element.namespace&.href.to_s
|
|
276
|
+
ALLOWED_ELEMENTS.include?(element.name.to_s) && (href.empty? || href == SVG_NAMESPACE)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# An attribute is allowed when it is a recognised href (plain or xlink) or a
|
|
280
|
+
# no-namespace attribute on the allowlist (or an aria-* attribute). A prefixed
|
|
281
|
+
# attribute in any other namespace is never copied.
|
|
282
|
+
def allowed_attribute?(attr)
|
|
283
|
+
return true if href_attribute?(attr)
|
|
284
|
+
return false unless attr.namespace.nil?
|
|
285
|
+
|
|
286
|
+
name = attr.name.to_s
|
|
287
|
+
ALLOWED_ATTRIBUTES.include?(name) || name.start_with?("aria-")
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def namespace_declaration?(attr)
|
|
291
|
+
# Nokogiri does not surface xmlns declarations through attribute_nodes, but
|
|
292
|
+
# guard defensively in case a libxml2 build does.
|
|
293
|
+
name = attr.name.to_s
|
|
294
|
+
name == "xmlns" || attr.namespace&.prefix == "xmlns" || name.start_with?("xmlns")
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def event_attribute?(attr)
|
|
298
|
+
attr.name.to_s.downcase.start_with?("on")
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def href_attribute?(attr)
|
|
302
|
+
name = attr.name.to_s
|
|
303
|
+
return true if name == "href" && attr.namespace.nil?
|
|
304
|
+
|
|
305
|
+
name == "href" && attr.namespace&.href == XLINK_NAMESPACE
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def invalid_href?(attr)
|
|
309
|
+
href_attribute?(attr) && !attr.value.to_s.start_with?("#")
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def attr_expanded_name(attr)
|
|
313
|
+
prefix = attr.namespace&.prefix
|
|
314
|
+
prefix ? "#{prefix}:#{attr.name}" : attr.name.to_s
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Prefixes this element's own id and every same-document reference it makes
|
|
318
|
+
# (href/xlink:href fragments, ARIA IDREFs, and url(#...) in any attribute)
|
|
319
|
+
# with the namespace, keeping definitions and references consistent. The
|
|
320
|
+
# style attribute's url()s are already namespaced by SvgCss.
|
|
321
|
+
def namespace_references!(element, namespace)
|
|
322
|
+
if (id = element["id"])
|
|
323
|
+
element["id"] = SvgCss.apply_namespace(namespace, id)
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Class names are attacker-chosen references into the host stylesheet:
|
|
327
|
+
# inlined, a bare class="modal fixed" would pick up the page's framework
|
|
328
|
+
# CSS (an overlay/UI-redress vector). Namespace each token — paired with the
|
|
329
|
+
# matching rewrite of `.class` selectors — so internal class styling still
|
|
330
|
+
# matches while host selectors never do.
|
|
331
|
+
if (klass = element["class"])
|
|
332
|
+
tokens = klass.split(/\s+/).reject(&:empty?)
|
|
333
|
+
element["class"] = tokens.map { |t| SvgCss.apply_namespace(namespace, t) }.join(" ") unless tokens.empty?
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
element.attribute_nodes.each do |attr|
|
|
337
|
+
next unless href_attribute?(attr)
|
|
338
|
+
value = attr.value.to_s
|
|
339
|
+
next unless value.start_with?("#")
|
|
340
|
+
attr.value = "##{SvgCss.apply_namespace(namespace, value[1..])}"
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
ARIA_IDREF_ATTRIBUTES.each do |aria|
|
|
344
|
+
value = element[aria]
|
|
345
|
+
next unless value
|
|
346
|
+
ids = value.split(/\s+/).reject(&:empty?)
|
|
347
|
+
next if ids.empty?
|
|
348
|
+
element[aria] = ids.map { |ref| SvgCss.apply_namespace(namespace, ref) }.join(" ")
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
element.attribute_nodes.each do |attr|
|
|
352
|
+
name = attr.name.to_s
|
|
353
|
+
next if name == "style"
|
|
354
|
+
value = attr.value.to_s
|
|
355
|
+
next unless value.match?(/url\(/i)
|
|
356
|
+
rewritten = value.gsub(URL_FRAGMENT_REF) { "url(##{SvgCss.apply_namespace(namespace, Regexp.last_match(2))})" }
|
|
357
|
+
attr.value = rewritten if rewritten != value
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Maps the required id_namespace argument to a namespace token, or nil for an
|
|
362
|
+
# explicit standalone document. Forces the caller to decide, and rejects (does
|
|
363
|
+
# not coerce) malformed tokens so two distinct callers' values can never
|
|
364
|
+
# collapse to the same namespace.
|
|
365
|
+
def resolve_namespace(id_namespace)
|
|
366
|
+
case id_namespace
|
|
367
|
+
when :standalone
|
|
368
|
+
nil
|
|
369
|
+
when String
|
|
370
|
+
return id_namespace if id_namespace.match?(NAMESPACE_PATTERN)
|
|
371
|
+
raise ArgumentError,
|
|
372
|
+
"id_namespace: #{id_namespace.inspect} is not a valid namespace. It must be a letter " \
|
|
373
|
+
"followed by letters/digits/_/- (e.g. prefix a sha like \"u<sha>\")."
|
|
374
|
+
else
|
|
375
|
+
raise ArgumentError,
|
|
376
|
+
"id_namespace: is required. Pass a stable, per-document String (e.g. the upload sha) " \
|
|
377
|
+
"to make the output safe to inline into HTML, or :standalone if it is only ever served " \
|
|
378
|
+
"as an <img>/CSS-url/file and never spliced into a page's DOM."
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# Anchors a namespaced document's scoped <style> selectors: they target
|
|
383
|
+
# `.<ns>-scope <selector>`, so the root must carry that class for them to
|
|
384
|
+
# match its own content (and nothing else). Idempotent.
|
|
385
|
+
def apply_scope_class!(root, namespace)
|
|
386
|
+
scope = "#{namespace}-scope"
|
|
387
|
+
classes = root["class"].to_s.split(/\s+/)
|
|
388
|
+
return if classes.include?(scope)
|
|
389
|
+
root["class"] = (classes << scope).join(" ").strip
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def contains_style?(element)
|
|
393
|
+
return true if element.name == "style"
|
|
394
|
+
element.children.any? { |child| child.is_a?(Nokogiri::XML::Element) && contains_style?(child) }
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# In inline (namespaced) mode the root <svg> must clip to its own box, or a
|
|
398
|
+
# tiny declared viewport with oversized content becomes a full-page overlay.
|
|
399
|
+
# Drop any overflow the SVG set on the root so it falls back to the
|
|
400
|
+
# outermost-svg default (hidden); inner elements keep overflow (markers need
|
|
401
|
+
# it) and the root clip bounds them all. Standalone output is untouched — an
|
|
402
|
+
# <img>/CSS-url resource is already clipped by its own element box.
|
|
403
|
+
def neutralize_root_overflow!(root)
|
|
404
|
+
root.remove_attribute("overflow")
|
|
405
|
+
style = root["style"]
|
|
406
|
+
return unless style
|
|
407
|
+
|
|
408
|
+
kept = style.split(";").reject { |declaration| declaration.start_with?("overflow:") }
|
|
409
|
+
if kept.empty?
|
|
410
|
+
root.remove_attribute("style")
|
|
411
|
+
else
|
|
412
|
+
root["style"] = kept.join(";")
|
|
79
413
|
end
|
|
80
|
-
element
|
|
81
414
|
end
|
|
82
415
|
|
|
83
416
|
def dangerous_value?(value)
|
|
417
|
+
# Presentation attributes are fed to browsers' CSS value parsers, where
|
|
418
|
+
# escapes re-form tokens after the pattern checks below (\6c is "l", so
|
|
419
|
+
# ur\6c( becomes url(). No allowlisted attribute legitimately contains
|
|
420
|
+
# a backslash; reject outright.
|
|
421
|
+
return true if value.to_s.include?("\\")
|
|
422
|
+
|
|
84
423
|
normalized = value.to_s.gsub(/[\u0000-\u0020\u007f]+/, "")
|
|
85
424
|
return true if normalized.match?(/(?:javascript|data):/i)
|
|
86
425
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
426
|
+
# var()/env()/attr() resolve against the host page or element context, so an
|
|
427
|
+
# inlined SVG could pull in host-controlled values the sanitizer never saw
|
|
428
|
+
# — including a url() the namespace rewrite missed. They are inert in
|
|
429
|
+
# standalone output anyway (no custom properties survive sanitisation), so
|
|
430
|
+
# reject them in every mode.
|
|
431
|
+
return true if normalized.match?(/(?:var|env|attr)\s*\(/i)
|
|
432
|
+
|
|
433
|
+
# Every url(...) must be a same-document fragment in the canonical form the
|
|
434
|
+
# namespace rewrite handles. Strip those, then fail closed if any url(
|
|
435
|
+
# introducer remains: this catches external URLs, mismatched quotes, AND
|
|
436
|
+
# unterminated/malformed url( that a complete-match scan would miss and
|
|
437
|
+
# browsers may still parse leniently. Keeps validation and the rewrite in
|
|
438
|
+
# lockstep, so no bare reference can survive in namespaced output.
|
|
439
|
+
value.to_s.gsub(URL_FRAGMENT_REF, "").match?(/url\s*\(/i)
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Bounds the render tree the document instantiates. The structural caps in
|
|
443
|
+
# SvgMetadata bound the *source* document, but several features replicate
|
|
444
|
+
# referenced content at render time, so the sanitized output is walked once
|
|
445
|
+
# and the instantiated render cost is accumulated against a single budget:
|
|
446
|
+
#
|
|
447
|
+
# * a <use href="#id"> charges a deep copy of its target subtree — a chain
|
|
448
|
+
# of doubling groups fans a few dozen source nodes into billions (the
|
|
449
|
+
# "use bomb"), and a cyclic reference expands forever.
|
|
450
|
+
# * a path/line/polyline/polygon that references a <marker> charges
|
|
451
|
+
# (vertex count) x (referenced marker subtree cost): a marker is drawn
|
|
452
|
+
# once per vertex, so a dense `d` (~200k vertices in 1 MB) times a
|
|
453
|
+
# non-trivial marker is a linear-but-huge "draw bomb" the node/byte/
|
|
454
|
+
# element caps cannot see.
|
|
455
|
+
#
|
|
456
|
+
# The walk is memoised on subtree cost so it cannot itself blow up, with an
|
|
457
|
+
# active-path set so a reference cycle is caught rather than recursed into.
|
|
458
|
+
# Marker references are resolved against the same id map as <use>, so a marker
|
|
459
|
+
# that contains <use> (or another marked path) composes naturally.
|
|
460
|
+
def reject_render_expansion!(root)
|
|
461
|
+
id_map = {}
|
|
462
|
+
collect_ids(root, id_map)
|
|
463
|
+
subtree_render_cost(root, id_map, {}, {})
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def collect_ids(element, id_map)
|
|
467
|
+
id = element["id"]
|
|
468
|
+
id_map[id.to_s] = element if id && !id_map.key?(id.to_s)
|
|
469
|
+
element.children.each do |child|
|
|
470
|
+
collect_ids(child, id_map) if child.is_a?(Nokogiri::XML::Element)
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
def subtree_render_cost(element, id_map, memo, active)
|
|
475
|
+
key = element.object_id
|
|
476
|
+
cached = memo[key]
|
|
477
|
+
return cached if cached
|
|
478
|
+
raise InvalidImageError, "SVG reference cycle" if active[key]
|
|
479
|
+
|
|
480
|
+
active[key] = true
|
|
481
|
+
cost = 1
|
|
482
|
+
element.children.each do |child|
|
|
483
|
+
next unless child.is_a?(Nokogiri::XML::Element)
|
|
484
|
+
|
|
485
|
+
cost += subtree_render_cost(child, id_map, memo, active)
|
|
486
|
+
check_render_expansion!(cost)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
if use_element?(element) && (target = use_target(element, id_map))
|
|
490
|
+
cost += subtree_render_cost(target, id_map, memo, active)
|
|
491
|
+
check_render_expansion!(cost)
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
cost += marker_render_cost(element, id_map, memo, active)
|
|
495
|
+
check_render_expansion!(cost)
|
|
496
|
+
|
|
497
|
+
active.delete(key)
|
|
498
|
+
memo[key] = cost
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
# A marked path instantiates each referenced marker once per vertex. Charge
|
|
502
|
+
# (vertex count) x (sum of distinct referenced marker subtree costs). The
|
|
503
|
+
# marker subtree cost goes through subtree_render_cost too, so the active-path
|
|
504
|
+
# set still catches a marker that references itself, and a marker containing a
|
|
505
|
+
# <use> bomb is counted. Vertices are over-counted (see path_vertex_count),
|
|
506
|
+
# which only makes the bound more conservative.
|
|
507
|
+
def marker_render_cost(element, id_map, memo, active)
|
|
508
|
+
return 0 unless REPLICATING_ELEMENTS.include?(element.name.to_s)
|
|
509
|
+
|
|
510
|
+
markers = referenced_markers(element, id_map)
|
|
511
|
+
return 0 if markers.empty?
|
|
512
|
+
|
|
513
|
+
vertices = path_vertex_count(element)
|
|
514
|
+
return 0 if vertices.zero?
|
|
515
|
+
|
|
516
|
+
per_vertex = markers.sum { |marker| subtree_render_cost(marker, id_map, memo, active) }
|
|
517
|
+
vertices * per_vertex
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Collects the distinct marker subtrees a geometry element references, via
|
|
521
|
+
# the marker-* presentation attributes or their style="" twins. Only the
|
|
522
|
+
# canonical url(#fragment) form survives sanitisation, so one regex over the
|
|
523
|
+
# marker attributes and the style attribute finds every reference.
|
|
524
|
+
def referenced_markers(element, id_map)
|
|
525
|
+
sources = MARKER_ATTRIBUTES.map { |name| element[name].to_s }
|
|
526
|
+
sources << element["style"].to_s
|
|
527
|
+
targets = []
|
|
528
|
+
sources.each do |value|
|
|
529
|
+
value.scan(URL_FRAGMENT_REF) { targets << id_map[Regexp.last_match(2)] }
|
|
90
530
|
end
|
|
531
|
+
targets.compact.uniq
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
# A deliberate upper bound on the vertices a geometry element renders, never
|
|
535
|
+
# an exact parse: every run of digits in `d`/`points` is counted as a
|
|
536
|
+
# coordinate, so the result is >= the real vertex count. Over-counting only
|
|
537
|
+
# tightens the bound; under-counting would be the bug, so we never try to be
|
|
538
|
+
# precise about path command grammar.
|
|
539
|
+
def path_vertex_count(element)
|
|
540
|
+
geometry = "#{element['d']} #{element['points']}"
|
|
541
|
+
count = geometry.scan(/\d+(?:\.\d+)?/).length
|
|
542
|
+
count.zero? ? 0 : count + 1
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
def check_render_expansion!(cost)
|
|
546
|
+
return if cost <= SvgMetadata::MAX_SVG_RENDER_UNITS
|
|
547
|
+
|
|
548
|
+
raise LimitError, "SVG render expansion exceeds #{SvgMetadata::MAX_SVG_RENDER_UNITS} rendered nodes"
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
def use_element?(element)
|
|
552
|
+
element.name.to_s == "use" && (element.namespace&.href.to_s.empty? || element.namespace&.href == SVG_NAMESPACE)
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def use_target(element, id_map)
|
|
556
|
+
ref = nil
|
|
557
|
+
element.attribute_nodes.each do |attr|
|
|
558
|
+
next unless href_attribute?(attr)
|
|
559
|
+
|
|
560
|
+
ref = attr.value.to_s
|
|
561
|
+
break
|
|
562
|
+
end
|
|
563
|
+
return unless ref&.start_with?("#")
|
|
564
|
+
|
|
565
|
+
id_map[ref[1..]]
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def serialize(root)
|
|
569
|
+
options = Nokogiri::XML::Node::SaveOptions::AS_XML |
|
|
570
|
+
Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
|
571
|
+
root.to_xml(save_with: options)
|
|
91
572
|
end
|
|
92
573
|
|
|
93
574
|
def atomic_write(path, content)
|
data/lib/safe_image/version.rb
CHANGED