rack-utf8_sanitizer 1.8.0 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/lib/rack/utf8_sanitizer.rb +19 -7
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +78 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
|
4
|
+
data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
|
7
|
+
data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
|
data/.github/workflows/ci.yml
CHANGED
@@ -10,7 +10,7 @@ jobs:
|
|
10
10
|
strategy:
|
11
11
|
fail-fast: false
|
12
12
|
matrix:
|
13
|
-
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
14
|
|
15
15
|
steps:
|
16
16
|
- uses: actions/checkout@v3
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,6 +7,9 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
10
13
|
|
11
14
|
# options[:sanitizable_content_types] Array
|
12
15
|
# options[:additional_content_types] Array
|
@@ -17,6 +20,7 @@ module Rack
|
|
17
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
18
21
|
@only = Array(options[:only]).flatten
|
19
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
20
24
|
end
|
21
25
|
|
22
26
|
def call(env)
|
@@ -29,21 +33,27 @@ module Rack
|
|
29
33
|
end
|
30
34
|
|
31
35
|
DEFAULT_STRATEGIES = {
|
32
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
+
if sanitize_null_bytes
|
38
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
+
end
|
33
40
|
input.
|
34
41
|
force_encoding(Encoding::ASCII_8BIT).
|
35
42
|
encode!(Encoding::UTF_8,
|
36
43
|
invalid: :replace,
|
37
44
|
undef: :replace)
|
38
45
|
end,
|
39
|
-
exception: lambda do |input|
|
46
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
+
raise NullByteInString
|
49
|
+
end
|
40
50
|
input.
|
41
51
|
force_encoding(Encoding::ASCII_8BIT).
|
42
52
|
encode!(Encoding::UTF_8)
|
43
53
|
end
|
44
54
|
}.freeze
|
45
55
|
|
46
|
-
#
|
56
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
47
57
|
URI_FIELDS = %w(
|
48
58
|
SCRIPT_NAME
|
49
59
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -207,7 +217,8 @@ module Rack
|
|
207
217
|
|
208
218
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
209
219
|
# plus all multibyte UTF-8 characters.
|
210
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
220
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
221
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
211
222
|
|
212
223
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
213
224
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -218,7 +229,8 @@ module Rack
|
|
218
229
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
219
230
|
decoded = $1.hex.chr
|
220
231
|
|
221
|
-
|
232
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
233
|
+
if decoded =~ decodable_regex
|
222
234
|
decoded
|
223
235
|
else
|
224
236
|
encoded
|
@@ -244,10 +256,10 @@ module Rack
|
|
244
256
|
if input.is_a? String
|
245
257
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
246
258
|
|
247
|
-
if input.valid_encoding?
|
259
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
248
260
|
input
|
249
261
|
else
|
250
|
-
@strategy.call(input)
|
262
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
251
263
|
end
|
252
264
|
else
|
253
265
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -337,6 +337,61 @@ describe Rack::UTF8Sanitizer do
|
|
337
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
338
|
end
|
339
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
340
395
|
end
|
341
396
|
|
342
397
|
describe "with custom content-type" do
|
@@ -552,7 +607,10 @@ describe Rack::UTF8Sanitizer do
|
|
552
607
|
end
|
553
608
|
|
554
609
|
it "accepts a proc as a strategy" do
|
555
|
-
truncate = -> input
|
610
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
611
|
+
sanitize_null_bytes.should == false
|
612
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
613
|
+
end
|
556
614
|
|
557
615
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
558
616
|
|
@@ -566,5 +624,24 @@ describe Rack::UTF8Sanitizer do
|
|
566
624
|
sanitized_input.should == 'replace'
|
567
625
|
end
|
568
626
|
end
|
627
|
+
|
628
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
629
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
630
|
+
sanitize_null_bytes.should == true
|
631
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
632
|
+
end
|
633
|
+
|
634
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
635
|
+
input = "foo=bla&quux=bar\x00"
|
636
|
+
|
637
|
+
@rack_input = StringIO.new input
|
638
|
+
|
639
|
+
env = request_env
|
640
|
+
sanitize_data(env) do |sanitized_input|
|
641
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
642
|
+
sanitized_input.should.be.valid_encoding
|
643
|
+
sanitized_input.should == 'replace'
|
644
|
+
end
|
645
|
+
end
|
569
646
|
end
|
570
647
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
97
97
|
licenses:
|
98
98
|
- MIT
|
99
99
|
metadata: {}
|
100
|
-
post_install_message:
|
100
|
+
post_install_message:
|
101
101
|
rdoc_options: []
|
102
102
|
require_paths:
|
103
103
|
- lib
|
@@ -113,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
115
|
rubygems_version: 3.2.5
|
116
|
-
signing_key:
|
116
|
+
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
119
119
|
in request URI and headers.
|