rack-utf8_sanitizer 1.8.0 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +21 -7
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +88 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/.github/workflows/ci.yml
CHANGED
@@ -10,7 +10,7 @@ jobs:
|
|
10
10
|
strategy:
|
11
11
|
fail-fast: false
|
12
12
|
matrix:
|
13
|
-
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
14
|
|
15
15
|
steps:
|
16
16
|
- uses: actions/checkout@v3
|
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,6 +7,9 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
10
13
|
|
11
14
|
# options[:sanitizable_content_types] Array
|
12
15
|
# options[:additional_content_types] Array
|
@@ -17,6 +20,7 @@ module Rack
|
|
17
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
18
21
|
@only = Array(options[:only]).flatten
|
19
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
20
24
|
end
|
21
25
|
|
22
26
|
def call(env)
|
@@ -29,21 +33,29 @@ module Rack
|
|
29
33
|
end
|
30
34
|
|
31
35
|
DEFAULT_STRATEGIES = {
|
32
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
33
37
|
input.
|
34
38
|
force_encoding(Encoding::ASCII_8BIT).
|
35
39
|
encode!(Encoding::UTF_8,
|
36
40
|
invalid: :replace,
|
37
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
38
46
|
end,
|
39
|
-
exception: lambda do |input|
|
47
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
40
48
|
input.
|
41
49
|
force_encoding(Encoding::ASCII_8BIT).
|
42
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
43
55
|
end
|
44
56
|
}.freeze
|
45
57
|
|
46
|
-
#
|
58
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
47
59
|
URI_FIELDS = %w(
|
48
60
|
SCRIPT_NAME
|
49
61
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -207,7 +219,8 @@ module Rack
|
|
207
219
|
|
208
220
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
209
221
|
# plus all multibyte UTF-8 characters.
|
210
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
222
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
223
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
211
224
|
|
212
225
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
213
226
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -218,7 +231,8 @@ module Rack
|
|
218
231
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
219
232
|
decoded = $1.hex.chr
|
220
233
|
|
221
|
-
|
234
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
235
|
+
if decoded =~ decodable_regex
|
222
236
|
decoded
|
223
237
|
else
|
224
238
|
encoded
|
@@ -244,10 +258,10 @@ module Rack
|
|
244
258
|
if input.is_a? String
|
245
259
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
246
260
|
|
247
|
-
if input.valid_encoding?
|
261
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
248
262
|
input
|
249
263
|
else
|
250
|
-
@strategy.call(input)
|
264
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
251
265
|
end
|
252
266
|
else
|
253
267
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -337,6 +337,71 @@ describe Rack::UTF8Sanitizer do
|
|
337
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
338
|
end
|
339
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
340
405
|
end
|
341
406
|
|
342
407
|
describe "with custom content-type" do
|
@@ -552,7 +617,10 @@ describe Rack::UTF8Sanitizer do
|
|
552
617
|
end
|
553
618
|
|
554
619
|
it "accepts a proc as a strategy" do
|
555
|
-
truncate = -> input
|
620
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
621
|
+
sanitize_null_bytes.should == false
|
622
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
623
|
+
end
|
556
624
|
|
557
625
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
558
626
|
|
@@ -566,5 +634,24 @@ describe Rack::UTF8Sanitizer do
|
|
566
634
|
sanitized_input.should == 'replace'
|
567
635
|
end
|
568
636
|
end
|
637
|
+
|
638
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
639
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
640
|
+
sanitize_null_bytes.should == true
|
641
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
642
|
+
end
|
643
|
+
|
644
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
645
|
+
input = "foo=bla&quux=bar\x00"
|
646
|
+
|
647
|
+
@rack_input = StringIO.new input
|
648
|
+
|
649
|
+
env = request_env
|
650
|
+
sanitize_data(env) do |sanitized_input|
|
651
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
652
|
+
sanitized_input.should.be.valid_encoding
|
653
|
+
sanitized_input.should == 'replace'
|
654
|
+
end
|
655
|
+
end
|
569
656
|
end
|
570
657
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
97
97
|
licenses:
|
98
98
|
- MIT
|
99
99
|
metadata: {}
|
100
|
-
post_install_message:
|
100
|
+
post_install_message:
|
101
101
|
rdoc_options: []
|
102
102
|
require_paths:
|
103
103
|
- lib
|
@@ -112,8 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
116
|
-
signing_key:
|
115
|
+
rubygems_version: 3.3.15
|
116
|
+
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
119
119
|
in request URI and headers.
|