rack-utf8_sanitizer 1.7.0 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +28 -8
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +104 -3
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,10 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
9
13
|
|
10
14
|
# options[:sanitizable_content_types] Array
|
11
15
|
# options[:additional_content_types] Array
|
@@ -16,28 +20,42 @@ module Rack
|
|
16
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
21
|
@only = Array(options[:only]).flatten
|
18
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
19
24
|
end
|
20
25
|
|
21
26
|
def call(env)
|
22
|
-
|
27
|
+
begin
|
28
|
+
env = sanitize(env)
|
29
|
+
rescue EOFError
|
30
|
+
return BAD_REQUEST
|
31
|
+
end
|
32
|
+
@app.call(env)
|
23
33
|
end
|
24
34
|
|
25
35
|
DEFAULT_STRATEGIES = {
|
26
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
27
37
|
input.
|
28
38
|
force_encoding(Encoding::ASCII_8BIT).
|
29
39
|
encode!(Encoding::UTF_8,
|
30
40
|
invalid: :replace,
|
31
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
32
46
|
end,
|
33
|
-
exception: lambda do |input|
|
47
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
34
48
|
input.
|
35
49
|
force_encoding(Encoding::ASCII_8BIT).
|
36
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
37
55
|
end
|
38
56
|
}.freeze
|
39
57
|
|
40
|
-
#
|
58
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
41
59
|
URI_FIELDS = %w(
|
42
60
|
SCRIPT_NAME
|
43
61
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -201,7 +219,8 @@ module Rack
|
|
201
219
|
|
202
220
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
203
221
|
# plus all multibyte UTF-8 characters.
|
204
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
222
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
223
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
205
224
|
|
206
225
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
207
226
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -212,7 +231,8 @@ module Rack
|
|
212
231
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
213
232
|
decoded = $1.hex.chr
|
214
233
|
|
215
|
-
|
234
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
235
|
+
if decoded =~ decodable_regex
|
216
236
|
decoded
|
217
237
|
else
|
218
238
|
encoded
|
@@ -238,10 +258,10 @@ module Rack
|
|
238
258
|
if input.is_a? String
|
239
259
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
240
260
|
|
241
|
-
if input.valid_encoding?
|
261
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
242
262
|
input
|
243
263
|
else
|
244
|
-
@strategy.call(input)
|
264
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
245
265
|
end
|
246
266
|
else
|
247
267
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.9.1'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -323,6 +337,71 @@ describe Rack::UTF8Sanitizer do
|
|
323
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
324
338
|
end
|
325
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
326
405
|
end
|
327
406
|
|
328
407
|
describe "with custom content-type" do
|
@@ -538,7 +617,10 @@ describe Rack::UTF8Sanitizer do
|
|
538
617
|
end
|
539
618
|
|
540
619
|
it "accepts a proc as a strategy" do
|
541
|
-
truncate = -> input
|
620
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
621
|
+
sanitize_null_bytes.should == false
|
622
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
623
|
+
end
|
542
624
|
|
543
625
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
544
626
|
|
@@ -549,7 +631,26 @@ describe Rack::UTF8Sanitizer do
|
|
549
631
|
sanitize_data(env) do |sanitized_input|
|
550
632
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
633
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
634
|
+
sanitized_input.should == 'replace'
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
638
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
639
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
640
|
+
sanitize_null_bytes.should == true
|
641
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
642
|
+
end
|
643
|
+
|
644
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
645
|
+
input = "foo=bla&quux=bar\x00"
|
646
|
+
|
647
|
+
@rack_input = StringIO.new input
|
648
|
+
|
649
|
+
env = request_env
|
650
|
+
sanitize_data(env) do |sanitized_input|
|
651
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
652
|
+
sanitized_input.should.be.valid_encoding
|
653
|
+
sanitized_input.should == 'replace'
|
553
654
|
end
|
554
655
|
end
|
555
656
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
94
97
|
licenses:
|
95
98
|
- MIT
|
96
99
|
metadata: {}
|
97
|
-
post_install_message:
|
100
|
+
post_install_message:
|
98
101
|
rdoc_options: []
|
99
102
|
require_paths:
|
100
103
|
- lib
|
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
|
114
|
-
signing_key:
|
115
|
+
rubygems_version: 3.3.15
|
116
|
+
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
117
119
|
in request URI and headers.
|