rack-utf8_sanitizer 1.7.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +28 -8
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +104 -3
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,10 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
10
|
+
NULL_BYTE_REGEX = /\x00/.freeze
|
11
|
+
|
12
|
+
class NullByteInString < StandardError; end
|
9
13
|
|
10
14
|
# options[:sanitizable_content_types] Array
|
11
15
|
# options[:additional_content_types] Array
|
@@ -16,28 +20,42 @@ module Rack
|
|
16
20
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
21
|
@only = Array(options[:only]).flatten
|
18
22
|
@except = Array(options[:except]).flatten
|
23
|
+
@sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
|
19
24
|
end
|
20
25
|
|
21
26
|
def call(env)
|
22
|
-
|
27
|
+
begin
|
28
|
+
env = sanitize(env)
|
29
|
+
rescue EOFError
|
30
|
+
return BAD_REQUEST
|
31
|
+
end
|
32
|
+
@app.call(env)
|
23
33
|
end
|
24
34
|
|
25
35
|
DEFAULT_STRATEGIES = {
|
26
|
-
replace: lambda do |input|
|
36
|
+
replace: lambda do |input, sanitize_null_bytes: false|
|
27
37
|
input.
|
28
38
|
force_encoding(Encoding::ASCII_8BIT).
|
29
39
|
encode!(Encoding::UTF_8,
|
30
40
|
invalid: :replace,
|
31
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
32
46
|
end,
|
33
|
-
exception: lambda do |input|
|
47
|
+
exception: lambda do |input, sanitize_null_bytes: false|
|
34
48
|
input.
|
35
49
|
force_encoding(Encoding::ASCII_8BIT).
|
36
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
37
55
|
end
|
38
56
|
}.freeze
|
39
57
|
|
40
|
-
#
|
58
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
41
59
|
URI_FIELDS = %w(
|
42
60
|
SCRIPT_NAME
|
43
61
|
REQUEST_PATH REQUEST_URI PATH_INFO
|
@@ -201,7 +219,8 @@ module Rack
|
|
201
219
|
|
202
220
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
203
221
|
# plus all multibyte UTF-8 characters.
|
204
|
-
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]
|
222
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
|
223
|
+
UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
|
205
224
|
|
206
225
|
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
207
226
|
# protected during normalization (which is what UTF8Sanitizer does).
|
@@ -212,7 +231,8 @@ module Rack
|
|
212
231
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
213
232
|
decoded = $1.hex.chr
|
214
233
|
|
215
|
-
|
234
|
+
decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
|
235
|
+
if decoded =~ decodable_regex
|
216
236
|
decoded
|
217
237
|
else
|
218
238
|
encoded
|
@@ -238,10 +258,10 @@ module Rack
|
|
238
258
|
if input.is_a? String
|
239
259
|
input = input.dup.force_encoding(Encoding::UTF_8)
|
240
260
|
|
241
|
-
if input.valid_encoding?
|
261
|
+
if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
|
242
262
|
input
|
243
263
|
else
|
244
|
-
@strategy.call(input)
|
264
|
+
@strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
|
245
265
|
end
|
246
266
|
else
|
247
267
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.9.1'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -323,6 +337,71 @@ describe Rack::UTF8Sanitizer do
|
|
323
337
|
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
324
338
|
end
|
325
339
|
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
it "optionally sanitizes null bytes with the replace strategy" do
|
353
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
|
+
@rack_input = StringIO.new input
|
356
|
+
|
357
|
+
sanitize_form_data do |sanitized_input|
|
358
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
359
|
+
sanitized_input.should.be.valid_encoding
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
|
+
@rack_input = StringIO.new input
|
368
|
+
|
369
|
+
sanitize_form_data do |sanitized_input|
|
370
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
371
|
+
sanitized_input.should.be.valid_encoding
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
it "optionally raises on null bytes with the exception strategy" do
|
377
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
378
|
+
input = "foo=bla&quux=bar\x00"
|
379
|
+
@rack_input = StringIO.new input
|
380
|
+
|
381
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
382
|
+
sanitize_form_data
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
it "optionally raises on encoded null bytes with the exception strategy" do
|
387
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
388
|
+
input = "foo=bla&quux=bar%00"
|
389
|
+
@rack_input = StringIO.new input
|
390
|
+
|
391
|
+
should.raise(Rack::UTF8Sanitizer::NullByteInString) do
|
392
|
+
sanitize_form_data
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
326
405
|
end
|
327
406
|
|
328
407
|
describe "with custom content-type" do
|
@@ -538,7 +617,10 @@ describe Rack::UTF8Sanitizer do
|
|
538
617
|
end
|
539
618
|
|
540
619
|
it "accepts a proc as a strategy" do
|
541
|
-
truncate = -> input
|
620
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
621
|
+
sanitize_null_bytes.should == false
|
622
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
623
|
+
end
|
542
624
|
|
543
625
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
544
626
|
|
@@ -549,7 +631,26 @@ describe Rack::UTF8Sanitizer do
|
|
549
631
|
sanitize_data(env) do |sanitized_input|
|
550
632
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
633
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
634
|
+
sanitized_input.should == 'replace'
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
638
|
+
it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
|
639
|
+
truncate = -> (input, sanitize_null_bytes:) do
|
640
|
+
sanitize_null_bytes.should == true
|
641
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
642
|
+
end
|
643
|
+
|
644
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
|
645
|
+
input = "foo=bla&quux=bar\x00"
|
646
|
+
|
647
|
+
@rack_input = StringIO.new input
|
648
|
+
|
649
|
+
env = request_env
|
650
|
+
sanitize_data(env) do |sanitized_input|
|
651
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
652
|
+
sanitized_input.should.be.valid_encoding
|
653
|
+
sanitized_input.should == 'replace'
|
553
654
|
end
|
554
655
|
end
|
555
656
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
|
94
97
|
licenses:
|
95
98
|
- MIT
|
96
99
|
metadata: {}
|
97
|
-
post_install_message:
|
100
|
+
post_install_message:
|
98
101
|
rdoc_options: []
|
99
102
|
require_paths:
|
100
103
|
- lib
|
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
|
114
|
-
signing_key:
|
115
|
+
rubygems_version: 3.3.15
|
116
|
+
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
117
119
|
in request URI and headers.
|