rack-utf8_sanitizer 1.8.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +21 -7
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +88 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/.github/workflows/ci.yml
CHANGED
@@ -10,7 +10,7 @@ jobs:
|
|
10
10
|
strategy:
|
11
11
|
fail-fast: false
|
12
12
|
matrix:
|
13
|
-
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
14
|
|
15
15
|
steps:
|
16
16
|
- uses: actions/checkout@v3
|
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,6 +7,9 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
10
13
|
|
11
14
|
# options[:sanitizable_content_types] Array
|
12
15
|
# options[:additional_content_types] Array
|
@@ -17,6 +20,7 @@ module Rack
|
|
17
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
18
21
|
@only = Array(options[:only]).flatten
|
19
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
20
24
|
end
|
21
25
|
|
22
26
|
def call(env)
|
@@ -29,21 +33,29 @@ module Rack
|
|
29
33
|
end
|
30
34
|
|
31
35
|
DEFAULT_STRATEGIES = {
|
32
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
33
37
|
input.
|
34
38
|
force_encoding(Encoding::ASCII_8BIT).
|
35
39
|
encode!(Encoding::UTF_8,
|
36
40
|
invalid: :replace,
|
37
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
38
46
|
end,
|
39
|
-
exception: lambda do |input|
|
47
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
40
48
|
input.
|
41
49
|
force_encoding(Encoding::ASCII_8BIT).
|
42
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
43
55
|
end
|
44
56
|
}.freeze
|
45
57
|
|
46
|
-
#
|
58
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
47
59
|
URI_FIELDS = %w(
|
48
60
|
SCRIPT_NAME
|
49
61
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -207,7 +219,8 @@ module Rack
|
|
207
219
|
|
208
220
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
209
221
|
# plus all multibyte UTF-8 characters.
|
210
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
222
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
223
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
211
224
|
|
212
225
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
213
226
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -218,7 +231,8 @@ module Rack
|
|
218
231
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
219
232
|
decoded = $1.hex.chr
|
220
233
|
|
221
|
-
|
234
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
235
|
+
if decoded =~ decodable_regex
|
222
236
|
decoded
|
223
237
|
else
|
224
238
|
encoded
|
@@ -244,10 +258,10 @@ module Rack
|
|
244
258
|
if input.is_a? String
|
245
259
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
246
260
|
|
247
|
-
if input.valid_encoding?
|
261
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
248
262
|
input
|
249
263
|
else
|
250
|
-
@strategy.call(input)
|
264
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
251
265
|
end
|
252
266
|
else
|
253
267
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -337,6 +337,71 @@ describe Rack::UTF8Sanitizer do
|
|
337
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
338
|
end
|
339
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
340
405
|
end
|
341
406
|
|
342
407
|
describe "with custom content-type" do
|
@@ -552,7 +617,10 @@ describe Rack::UTF8Sanitizer do
|
|
552
617
|
end
|
553
618
|
|
554
619
|
it "accepts a proc as a strategy" do
|
555
|
-
truncate = -> input
|
620
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
621
|
+
sanitize_null_bytes.should == false
|
622
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
623
|
+
end
|
556
624
|
|
557
625
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
558
626
|
|
@@ -566,5 +634,24 @@ describe Rack::UTF8Sanitizer do
|
|
566
634
|
sanitized_input.should == 'replace'
|
567
635
|
end
|
568
636
|
end
|
637
|
+
|
638
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
639
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
640
|
+
sanitize_null_bytes.should == true
|
641
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
642
|
+
end
|
643
|
+
|
644
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
645
|
+
input = "foo=bla&quux=bar\x00"
|
646
|
+
|
647
|
+
@rack_input = StringIO.new input
|
648
|
+
|
649
|
+
env = request_env
|
650
|
+
sanitize_data(env) do |sanitized_input|
|
651
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
652
|
+
sanitized_input.should.be.valid_encoding
|
653
|
+
sanitized_input.should == 'replace'
|
654
|
+
end
|
655
|
+
end
|
569
656
|
end
|
570
657
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
97
97
|
licenses:
|
98
98
|
- MIT
|
99
99
|
metadata: {}
|
100
|
-
post_install_message:
|
100
|
+
post_install_message:
|
101
101
|
rdoc_options: []
|
102
102
|
require_paths:
|
103
103
|
- lib
|
@@ -112,8 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
116
|
-
signing_key:
|
115
|
+
rubygems_version: 3.3.15
|
116
|
+
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
119
119
|
in request URI and headers.
|