rack-utf8_sanitizer 1.7.0 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/lib/rack/utf8_sanitizer.rb +26 -8
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +94 -3
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
|
4
|
+
data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
|
7
|
+
data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,10 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
9
13
|
|
10
14
|
# options[:sanitizable_content_types] Array
|
11
15
|
# options[:additional_content_types] Array
|
@@ -16,28 +20,40 @@ module Rack
|
|
16
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
21
|
@only = Array(options[:only]).flatten
|
18
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
19
24
|
end
|
20
25
|
|
21
26
|
def call(env)
|
22
|
-
|
27
|
+
begin
|
28
|
+
env = sanitize(env)
|
29
|
+
rescue EOFError
|
30
|
+
return BAD_REQUEST
|
31
|
+
end
|
32
|
+
@app.call(env)
|
23
33
|
end
|
24
34
|
|
25
35
|
DEFAULT_STRATEGIES = {
|
26
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
+
if sanitize_null_bytes
|
38
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
+
end
|
27
40
|
input.
|
28
41
|
force_encoding(Encoding::ASCII_8BIT).
|
29
42
|
encode!(Encoding::UTF_8,
|
30
43
|
invalid: :replace,
|
31
44
|
undef: :replace)
|
32
45
|
end,
|
33
|
-
exception: lambda do |input|
|
46
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
+
raise NullByteInString
|
49
|
+
end
|
34
50
|
input.
|
35
51
|
force_encoding(Encoding::ASCII_8BIT).
|
36
52
|
encode!(Encoding::UTF_8)
|
37
53
|
end
|
38
54
|
}.freeze
|
39
55
|
|
40
|
-
#
|
56
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
41
57
|
URI_FIELDS = %w(
|
42
58
|
SCRIPT_NAME
|
43
59
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -201,7 +217,8 @@ module Rack
|
|
201
217
|
|
202
218
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
203
219
|
# plus all multibyte UTF-8 characters.
|
204
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
220
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
221
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
205
222
|
|
206
223
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
207
224
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -212,7 +229,8 @@ module Rack
|
|
212
229
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
213
230
|
decoded = $1.hex.chr
|
214
231
|
|
215
|
-
|
232
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
233
|
+
if decoded =~ decodable_regex
|
216
234
|
decoded
|
217
235
|
else
|
218
236
|
encoded
|
@@ -238,10 +256,10 @@ module Rack
|
|
238
256
|
if input.is_a? String
|
239
257
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
240
258
|
|
241
|
-
if input.valid_encoding?
|
259
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
242
260
|
input
|
243
261
|
else
|
244
|
-
@strategy.call(input)
|
262
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
245
263
|
end
|
246
264
|
else
|
247
265
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.9.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -323,6 +337,61 @@ describe Rack::UTF8Sanitizer do
|
|
323
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
324
338
|
end
|
325
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
326
395
|
end
|
327
396
|
|
328
397
|
describe "with custom content-type" do
|
@@ -538,7 +607,10 @@ describe Rack::UTF8Sanitizer do
|
|
538
607
|
end
|
539
608
|
|
540
609
|
it "accepts a proc as a strategy" do
|
541
|
-
truncate = -> input
|
610
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
611
|
+
sanitize_null_bytes.should == false
|
612
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
613
|
+
end
|
542
614
|
|
543
615
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
544
616
|
|
@@ -549,7 +621,26 @@ describe Rack::UTF8Sanitizer do
|
|
549
621
|
sanitize_data(env) do |sanitized_input|
|
550
622
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
623
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
624
|
+
sanitized_input.should == 'replace'
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
629
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
630
|
+
sanitize_null_bytes.should == true
|
631
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
632
|
+
end
|
633
|
+
|
634
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
635
|
+
input = "foo=bla&quux=bar\x00"
|
636
|
+
|
637
|
+
@rack_input = StringIO.new input
|
638
|
+
|
639
|
+
env = request_env
|
640
|
+
sanitize_data(env) do |sanitized_input|
|
641
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
642
|
+
sanitized_input.should.be.valid_encoding
|
643
|
+
sanitized_input.should == 'replace'
|
553
644
|
end
|
554
645
|
end
|
555
646
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
94
97
|
licenses:
|
95
98
|
- MIT
|
96
99
|
metadata: {}
|
97
|
-
post_install_message:
|
100
|
+
post_install_message:
|
98
101
|
rdoc_options: []
|
99
102
|
require_paths:
|
100
103
|
- lib
|
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
|
114
|
-
signing_key:
|
115
|
+
rubygems_version: 3.2.5
|
116
|
+
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
117
119
|
in request URI and headers.
|