rack-utf8_sanitizer 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/lib/rack/utf8_sanitizer.rb +19 -7
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +78 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
|
4
|
+
data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
|
7
|
+
data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
|
data/.github/workflows/ci.yml
CHANGED
@@ -10,7 +10,7 @@ jobs:
|
|
10
10
|
strategy:
|
11
11
|
fail-fast: false
|
12
12
|
matrix:
|
13
|
-
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
14
|
|
15
15
|
steps:
|
16
16
|
- uses: actions/checkout@v3
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,6 +7,9 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
10
13
|
|
11
14
|
# options[:sanitizable_content_types] Array
|
12
15
|
# options[:additional_content_types] Array
|
@@ -17,6 +20,7 @@ module Rack
|
|
17
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
18
21
|
@only = Array(options[:only]).flatten
|
19
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
20
24
|
end
|
21
25
|
|
22
26
|
def call(env)
|
@@ -29,21 +33,27 @@ module Rack
|
|
29
33
|
end
|
30
34
|
|
31
35
|
DEFAULT_STRATEGIES = {
|
32
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
+
if sanitize_null_bytes
|
38
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
+
end
|
33
40
|
input.
|
34
41
|
force_encoding(Encoding::ASCII_8BIT).
|
35
42
|
encode!(Encoding::UTF_8,
|
36
43
|
invalid: :replace,
|
37
44
|
undef: :replace)
|
38
45
|
end,
|
39
|
-
exception: lambda do |input|
|
46
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
+
raise NullByteInString
|
49
|
+
end
|
40
50
|
input.
|
41
51
|
force_encoding(Encoding::ASCII_8BIT).
|
42
52
|
encode!(Encoding::UTF_8)
|
43
53
|
end
|
44
54
|
}.freeze
|
45
55
|
|
46
|
-
#
|
56
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
47
57
|
URI_FIELDS = %w(
|
48
58
|
SCRIPT_NAME
|
49
59
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -207,7 +217,8 @@ module Rack
|
|
207
217
|
|
208
218
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
209
219
|
# plus all multibyte UTF-8 characters.
|
210
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
220
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
221
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
211
222
|
|
212
223
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
213
224
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -218,7 +229,8 @@ module Rack
|
|
218
229
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
219
230
|
decoded = $1.hex.chr
|
220
231
|
|
221
|
-
|
232
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
233
|
+
if decoded =~ decodable_regex
|
222
234
|
decoded
|
223
235
|
else
|
224
236
|
encoded
|
@@ -244,10 +256,10 @@ module Rack
|
|
244
256
|
if input.is_a? String
|
245
257
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
246
258
|
|
247
|
-
if input.valid_encoding?
|
259
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
248
260
|
input
|
249
261
|
else
|
250
|
-
@strategy.call(input)
|
262
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
251
263
|
end
|
252
264
|
else
|
253
265
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -337,6 +337,61 @@ describe Rack::UTF8Sanitizer do
|
|
337
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
338
|
end
|
339
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
340
395
|
end
|
341
396
|
|
342
397
|
describe "with custom content-type" do
|
@@ -552,7 +607,10 @@ describe Rack::UTF8Sanitizer do
|
|
552
607
|
end
|
553
608
|
|
554
609
|
it "accepts a proc as a strategy" do
|
555
|
-
truncate = -> input
|
610
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
611
|
+
sanitize_null_bytes.should == false
|
612
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
613
|
+
end
|
556
614
|
|
557
615
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
558
616
|
|
@@ -566,5 +624,24 @@ describe Rack::UTF8Sanitizer do
|
|
566
624
|
sanitized_input.should == 'replace'
|
567
625
|
end
|
568
626
|
end
|
627
|
+
|
628
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
629
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
630
|
+
sanitize_null_bytes.should == true
|
631
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
632
|
+
end
|
633
|
+
|
634
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
635
|
+
input = "foo=bla&quux=bar\x00"
|
636
|
+
|
637
|
+
@rack_input = StringIO.new input
|
638
|
+
|
639
|
+
env = request_env
|
640
|
+
sanitize_data(env) do |sanitized_input|
|
641
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
642
|
+
sanitized_input.should.be.valid_encoding
|
643
|
+
sanitized_input.should == 'replace'
|
644
|
+
end
|
645
|
+
end
|
569
646
|
end
|
570
647
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
97
97
|
licenses:
|
98
98
|
- MIT
|
99
99
|
metadata: {}
|
100
|
-
post_install_message:
|
100
|
+
post_install_message:
|
101
101
|
rdoc_options: []
|
102
102
|
require_paths:
|
103
103
|
- lib
|
@@ -113,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
115
|
rubygems_version: 3.2.5
|
116
|
-
signing_key:
|
116
|
+
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
119
119
|
in request URI and headers.
|