rack-utf8_sanitizer 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/lib/rack/utf8_sanitizer.rb +26 -8
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +94 -3
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
|
4
|
+
data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
|
7
|
+
data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,10 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
9
13
|
|
10
14
|
# options[:sanitizable_content_types] Array
|
11
15
|
# options[:additional_content_types] Array
|
@@ -16,28 +20,40 @@ module Rack
|
|
16
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
21
|
@only = Array(options[:only]).flatten
|
18
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
19
24
|
end
|
20
25
|
|
21
26
|
def call(env)
|
22
|
-
|
27
|
+
begin
|
28
|
+
env = sanitize(env)
|
29
|
+
rescue EOFError
|
30
|
+
return BAD_REQUEST
|
31
|
+
end
|
32
|
+
@app.call(env)
|
23
33
|
end
|
24
34
|
|
25
35
|
DEFAULT_STRATEGIES = {
|
26
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
+
if sanitize_null_bytes
|
38
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
+
end
|
27
40
|
input.
|
28
41
|
force_encoding(Encoding::ASCII_8BIT).
|
29
42
|
encode!(Encoding::UTF_8,
|
30
43
|
invalid: :replace,
|
31
44
|
undef: :replace)
|
32
45
|
end,
|
33
|
-
exception: lambda do |input|
|
46
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
+
raise NullByteInString
|
49
|
+
end
|
34
50
|
input.
|
35
51
|
force_encoding(Encoding::ASCII_8BIT).
|
36
52
|
encode!(Encoding::UTF_8)
|
37
53
|
end
|
38
54
|
}.freeze
|
39
55
|
|
40
|
-
#
|
56
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
41
57
|
URI_FIELDS = %w(
|
42
58
|
SCRIPT_NAME
|
43
59
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -201,7 +217,8 @@ module Rack
|
|
201
217
|
|
202
218
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
203
219
|
# plus all multibyte UTF-8 characters.
|
204
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
220
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
221
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
205
222
|
|
206
223
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
207
224
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -212,7 +229,8 @@ module Rack
|
|
212
229
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
213
230
|
decoded = $1.hex.chr
|
214
231
|
|
215
|
-
|
232
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
233
|
+
if decoded =~ decodable_regex
|
216
234
|
decoded
|
217
235
|
else
|
218
236
|
encoded
|
@@ -238,10 +256,10 @@ module Rack
|
|
238
256
|
if input.is_a? String
|
239
257
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
240
258
|
|
241
|
-
if input.valid_encoding?
|
259
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
242
260
|
input
|
243
261
|
else
|
244
|
-
@strategy.call(input)
|
262
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
245
263
|
end
|
246
264
|
else
|
247
265
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.9.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -323,6 +337,61 @@ describe Rack::UTF8Sanitizer do
|
|
323
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
324
338
|
end
|
325
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
326
395
|
end
|
327
396
|
|
328
397
|
describe "with custom content-type" do
|
@@ -538,7 +607,10 @@ describe Rack::UTF8Sanitizer do
|
|
538
607
|
end
|
539
608
|
|
540
609
|
it "accepts a proc as a strategy" do
|
541
|
-
truncate = -> input
|
610
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
611
|
+
sanitize_null_bytes.should == false
|
612
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
613
|
+
end
|
542
614
|
|
543
615
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
544
616
|
|
@@ -549,7 +621,26 @@ describe Rack::UTF8Sanitizer do
|
|
549
621
|
sanitize_data(env) do |sanitized_input|
|
550
622
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
623
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
624
|
+
sanitized_input.should == 'replace'
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
629
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
630
|
+
sanitize_null_bytes.should == true
|
631
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
632
|
+
end
|
633
|
+
|
634
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
635
|
+
input = "foo=bla&quux=bar\x00"
|
636
|
+
|
637
|
+
@rack_input = StringIO.new input
|
638
|
+
|
639
|
+
env = request_env
|
640
|
+
sanitize_data(env) do |sanitized_input|
|
641
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
642
|
+
sanitized_input.should.be.valid_encoding
|
643
|
+
sanitized_input.should == 'replace'
|
553
644
|
end
|
554
645
|
end
|
555
646
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
94
97
|
licenses:
|
95
98
|
- MIT
|
96
99
|
metadata: {}
|
97
|
-
post_install_message:
|
100
|
+
post_install_message:
|
98
101
|
rdoc_options: []
|
99
102
|
require_paths:
|
100
103
|
- lib
|
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
|
114
|
-
signing_key:
|
115
|
+
rubygems_version: 3.2.5
|
116
|
+
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
117
119
|
in request URI and headers.
|