rack-utf8_sanitizer 1.7.0 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4156ca74bbd8c43750cdb733ca500a1cb974492ceb823ffa50e9adaa5733d7d9
4
- data.tar.gz: 2acc566fb2020de35fa94822f3fcf018988e9166682ccf2f72f7bb9ca7c209d7
3
+ metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
+ data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
5
5
  SHA512:
6
- metadata.gz: 5332f698e7d2a06427fe009a1e2f368ca56c4ab04d5b4551f79689dabbce3351733d657e9df14266c6416f5627305e45e511aee701f5a2d783f1b28d7a7d4435
7
- data.tar.gz: 7df6257e5945eec1c928ab2ab9e446fe9d853c74b572732b5318e655b1b34d89557d949ee1300004ac67111fd170fddba7ab89b842dbbf65c9ca79717bdf1aa2
6
+ metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
+ data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
data/.editorconfig ADDED
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ indent_style = space
5
+ indent_size = 2
6
+ end_of_line = lf
7
+ charset = utf-8
8
+ trim_trailing_whitespace = true
9
+ insert_final_newline = true
10
+
11
+ [*.md]
12
+ indent_style = space
13
+ indent_size = 2
14
+
15
+ [*.y{a,}ml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v3
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true # 'bundle install' and cache gems
21
+ ruby-version: ${{ matrix.ruby }}
22
+ - name: Run tests
23
+ run: bundle exec rake
data/.travis.yml CHANGED
@@ -1,17 +1,14 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1
7
- - 2.2
8
4
  - 2.3
9
5
  - 2.4
10
6
  - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - 3.0
10
+ - 3.1
11
11
  - jruby
12
12
 
13
13
  before_install:
14
14
  - gem install bundler
15
-
16
- script:
17
- - rake spec
@@ -6,6 +6,10 @@ require 'stringio'
6
6
  module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
+ BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
9
13
 
10
14
  # options[:sanitizable_content_types] Array
11
15
  # options[:additional_content_types] Array
@@ -16,28 +20,40 @@ module Rack
16
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
17
21
  @only = Array(options[:only]).flatten
18
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
19
24
  end
20
25
 
21
26
  def call(env)
22
- @app.call(sanitize(env))
27
+ begin
28
+ env = sanitize(env)
29
+ rescue EOFError
30
+ return BAD_REQUEST
31
+ end
32
+ @app.call(env)
23
33
  end
24
34
 
25
35
  DEFAULT_STRATEGIES = {
26
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
37
+ if sanitize_null_bytes
38
+ input = input.gsub(NULL_BYTE_REGEX, "")
39
+ end
27
40
  input.
28
41
  force_encoding(Encoding::ASCII_8BIT).
29
42
  encode!(Encoding::UTF_8,
30
43
  invalid: :replace,
31
44
  undef: :replace)
32
45
  end,
33
- exception: lambda do |input|
46
+ exception: lambda do |input, sanitize_null_bytes: false|
47
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
+ raise NullByteInString
49
+ end
34
50
  input.
35
51
  force_encoding(Encoding::ASCII_8BIT).
36
52
  encode!(Encoding::UTF_8)
37
53
  end
38
54
  }.freeze
39
55
 
40
- # http://rack.rubyforge.org/doc/SPEC.html
56
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
41
57
  URI_FIELDS = %w(
42
58
  SCRIPT_NAME
43
59
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -201,7 +217,8 @@ module Rack
201
217
 
202
218
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
203
219
  # plus all multibyte UTF-8 characters.
204
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
220
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
221
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
205
222
 
206
223
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
207
224
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -212,7 +229,8 @@ module Rack
212
229
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
213
230
  decoded = $1.hex.chr
214
231
 
215
- if decoded =~ UNRESERVED_OR_UTF8
232
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
233
+ if decoded =~ decodable_regex
216
234
  decoded
217
235
  else
218
236
  encoded
@@ -238,10 +256,10 @@ module Rack
238
256
  if input.is_a? String
239
257
  input = input.dup.force_encoding(Encoding::UTF_8)
240
258
 
241
- if input.valid_encoding?
259
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
242
260
  input
243
261
  else
244
- @strategy.call(input)
262
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
245
263
  end
246
264
  else
247
265
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.7.0'
5
+ gem.version = '1.9.0'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
12
12
  gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
13
 
14
14
  gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
16
  gem.require_paths = ["lib"]
18
17
 
19
18
  gem.required_ruby_version = '>= 1.9.3'
20
19
 
21
- gem.add_dependency "rack", '>= 1.0', '< 3.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 4.0'
22
21
 
23
22
  gem.add_development_dependency "bacon"
24
23
  gem.add_development_dependency "bacon-colored_output"
@@ -1,6 +1,7 @@
1
1
  # encoding:ascii-8bit
2
2
 
3
3
  require 'bacon/colored_output'
4
+ require 'cgi'
4
5
  require 'rack/utf8_sanitizer'
5
6
 
6
7
  describe Rack::UTF8Sanitizer do
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
  describe "with valid, not percent-encoded UTF-8 URI input" do
119
120
  before do
120
121
  @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
121
123
  end
122
124
 
123
125
  it "does not change URI-like entity (REQUEST_PATH)" do
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
126
128
 
127
129
  result.encoding.should == Encoding::US_ASCII
128
130
  result.should.be.valid_encoding
129
- result.should == URI.encode(@uri_input)
131
+ result.should == @encoded
130
132
  end
131
133
  end
132
134
 
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
205
207
  @response_env['rack.input'].close
206
208
  end
207
209
 
210
+ class BrokenIO < StringIO
211
+ def read
212
+ raise EOFError
213
+ end
214
+ end
215
+
216
+ it "returns HTTP 400 on EOF" do
217
+ @rack_input = BrokenIO.new
218
+ @response_env = @app.(request_env)
219
+ @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
+ end
221
+
208
222
  it "sanitizes StringIO rack.input" do
209
223
  input = "foo=bla&quux=bar"
210
224
  @rack_input = StringIO.new input
@@ -323,6 +337,61 @@ describe Rack::UTF8Sanitizer do
323
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
324
338
  end
325
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
326
395
  end
327
396
 
328
397
  describe "with custom content-type" do
@@ -538,7 +607,10 @@ describe Rack::UTF8Sanitizer do
538
607
  end
539
608
 
540
609
  it "accepts a proc as a strategy" do
541
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
610
+ truncate = -> (input, sanitize_null_bytes:) do
611
+ sanitize_null_bytes.should == false
612
+ 'replace'.force_encoding(Encoding::UTF_8)
613
+ end
542
614
 
543
615
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
544
616
 
@@ -549,7 +621,26 @@ describe Rack::UTF8Sanitizer do
549
621
  sanitize_data(env) do |sanitized_input|
550
622
  sanitized_input.encoding.should == Encoding::UTF_8
551
623
  sanitized_input.should.be.valid_encoding
552
- sanitized_input.should == 'replace'
624
+ sanitized_input.should == 'replace'
625
+ end
626
+ end
627
+
628
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
629
+ truncate = -> (input, sanitize_null_bytes:) do
630
+ sanitize_null_bytes.should == true
631
+ 'replace'.force_encoding(Encoding::UTF_8)
632
+ end
633
+
634
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
635
+ input = "foo=bla&quux=bar\x00"
636
+
637
+ @rack_input = StringIO.new input
638
+
639
+ env = request_env
640
+ sanitize_data(env) do |sanitized_input|
641
+ sanitized_input.encoding.should == Encoding::UTF_8
642
+ sanitized_input.should.be.valid_encoding
643
+ sanitized_input.should == 'replace'
553
644
  end
554
645
  end
555
646
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-05 00:00:00.000000000 Z
11
+ date: 2023-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: '1.0'
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '3.0'
22
+ version: '4.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: '1.0'
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '3.0'
32
+ version: '4.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: bacon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,9 @@ executables: []
80
80
  extensions: []
81
81
  extra_rdoc_files: []
82
82
  files:
83
+ - ".editorconfig"
84
+ - ".github/dependabot.yml"
85
+ - ".github/workflows/ci.yml"
83
86
  - ".gitignore"
84
87
  - ".travis.yml"
85
88
  - CHANGELOG.md
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
97
  licenses:
95
98
  - MIT
96
99
  metadata: {}
97
- post_install_message:
100
+ post_install_message:
98
101
  rdoc_options: []
99
102
  require_paths:
100
103
  - lib
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
112
  - !ruby/object:Gem::Version
110
113
  version: '0'
111
114
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.6.2
114
- signing_key:
115
+ rubygems_version: 3.2.5
116
+ signing_key:
115
117
  specification_version: 4
116
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
117
119
  in request URI and headers.