rack-utf8_sanitizer 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4156ca74bbd8c43750cdb733ca500a1cb974492ceb823ffa50e9adaa5733d7d9
4
- data.tar.gz: 2acc566fb2020de35fa94822f3fcf018988e9166682ccf2f72f7bb9ca7c209d7
3
+ metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
+ data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
5
5
  SHA512:
6
- metadata.gz: 5332f698e7d2a06427fe009a1e2f368ca56c4ab04d5b4551f79689dabbce3351733d657e9df14266c6416f5627305e45e511aee701f5a2d783f1b28d7a7d4435
7
- data.tar.gz: 7df6257e5945eec1c928ab2ab9e446fe9d853c74b572732b5318e655b1b34d89557d949ee1300004ac67111fd170fddba7ab89b842dbbf65c9ca79717bdf1aa2
6
+ metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
+ data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
data/.editorconfig ADDED
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ indent_style = space
5
+ indent_size = 2
6
+ end_of_line = lf
7
+ charset = utf-8
8
+ trim_trailing_whitespace = true
9
+ insert_final_newline = true
10
+
11
+ [*.md]
12
+ indent_style = space
13
+ indent_size = 2
14
+
15
+ [*.y{a,}ml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v3
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true # 'bundle install' and cache gems
21
+ ruby-version: ${{ matrix.ruby }}
22
+ - name: Run tests
23
+ run: bundle exec rake
data/.travis.yml CHANGED
@@ -1,17 +1,14 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1
7
- - 2.2
8
4
  - 2.3
9
5
  - 2.4
10
6
  - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - 3.0
10
+ - 3.1
11
11
  - jruby
12
12
 
13
13
  before_install:
14
14
  - gem install bundler
15
-
16
- script:
17
- - rake spec
@@ -6,6 +6,10 @@ require 'stringio'
6
6
  module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
+ BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
9
13
 
10
14
  # options[:sanitizable_content_types] Array
11
15
  # options[:additional_content_types] Array
@@ -16,28 +20,40 @@ module Rack
16
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
17
21
  @only = Array(options[:only]).flatten
18
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
19
24
  end
20
25
 
21
26
  def call(env)
22
- @app.call(sanitize(env))
27
+ begin
28
+ env = sanitize(env)
29
+ rescue EOFError
30
+ return BAD_REQUEST
31
+ end
32
+ @app.call(env)
23
33
  end
24
34
 
25
35
  DEFAULT_STRATEGIES = {
26
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
37
+ if sanitize_null_bytes
38
+ input = input.gsub(NULL_BYTE_REGEX, "")
39
+ end
27
40
  input.
28
41
  force_encoding(Encoding::ASCII_8BIT).
29
42
  encode!(Encoding::UTF_8,
30
43
  invalid: :replace,
31
44
  undef: :replace)
32
45
  end,
33
- exception: lambda do |input|
46
+ exception: lambda do |input, sanitize_null_bytes: false|
47
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
+ raise NullByteInString
49
+ end
34
50
  input.
35
51
  force_encoding(Encoding::ASCII_8BIT).
36
52
  encode!(Encoding::UTF_8)
37
53
  end
38
54
  }.freeze
39
55
 
40
- # http://rack.rubyforge.org/doc/SPEC.html
56
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
41
57
  URI_FIELDS = %w(
42
58
  SCRIPT_NAME
43
59
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -201,7 +217,8 @@ module Rack
201
217
 
202
218
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
203
219
  # plus all multibyte UTF-8 characters.
204
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
220
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
221
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
205
222
 
206
223
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
207
224
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -212,7 +229,8 @@ module Rack
212
229
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
213
230
  decoded = $1.hex.chr
214
231
 
215
- if decoded =~ UNRESERVED_OR_UTF8
232
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
233
+ if decoded =~ decodable_regex
216
234
  decoded
217
235
  else
218
236
  encoded
@@ -238,10 +256,10 @@ module Rack
238
256
  if input.is_a? String
239
257
  input = input.dup.force_encoding(Encoding::UTF_8)
240
258
 
241
- if input.valid_encoding?
259
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
242
260
  input
243
261
  else
244
- @strategy.call(input)
262
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
245
263
  end
246
264
  else
247
265
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.7.0'
5
+ gem.version = '1.9.0'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
12
12
  gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
13
 
14
14
  gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
16
  gem.require_paths = ["lib"]
18
17
 
19
18
  gem.required_ruby_version = '>= 1.9.3'
20
19
 
21
- gem.add_dependency "rack", '>= 1.0', '< 3.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 4.0'
22
21
 
23
22
  gem.add_development_dependency "bacon"
24
23
  gem.add_development_dependency "bacon-colored_output"
@@ -1,6 +1,7 @@
1
1
  # encoding:ascii-8bit
2
2
 
3
3
  require 'bacon/colored_output'
4
+ require 'cgi'
4
5
  require 'rack/utf8_sanitizer'
5
6
 
6
7
  describe Rack::UTF8Sanitizer do
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
  describe "with valid, not percent-encoded UTF-8 URI input" do
119
120
  before do
120
121
  @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
121
123
  end
122
124
 
123
125
  it "does not change URI-like entity (REQUEST_PATH)" do
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
126
128
 
127
129
  result.encoding.should == Encoding::US_ASCII
128
130
  result.should.be.valid_encoding
129
- result.should == URI.encode(@uri_input)
131
+ result.should == @encoded
130
132
  end
131
133
  end
132
134
 
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
205
207
  @response_env['rack.input'].close
206
208
  end
207
209
 
210
+ class BrokenIO < StringIO
211
+ def read
212
+ raise EOFError
213
+ end
214
+ end
215
+
216
+ it "returns HTTP 400 on EOF" do
217
+ @rack_input = BrokenIO.new
218
+ @response_env = @app.(request_env)
219
+ @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
+ end
221
+
208
222
  it "sanitizes StringIO rack.input" do
209
223
  input = "foo=bla&quux=bar"
210
224
  @rack_input = StringIO.new input
@@ -323,6 +337,61 @@ describe Rack::UTF8Sanitizer do
323
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
324
338
  end
325
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
326
395
  end
327
396
 
328
397
  describe "with custom content-type" do
@@ -538,7 +607,10 @@ describe Rack::UTF8Sanitizer do
538
607
  end
539
608
 
540
609
  it "accepts a proc as a strategy" do
541
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
610
+ truncate = -> (input, sanitize_null_bytes:) do
611
+ sanitize_null_bytes.should == false
612
+ 'replace'.force_encoding(Encoding::UTF_8)
613
+ end
542
614
 
543
615
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
544
616
 
@@ -549,7 +621,26 @@ describe Rack::UTF8Sanitizer do
549
621
  sanitize_data(env) do |sanitized_input|
550
622
  sanitized_input.encoding.should == Encoding::UTF_8
551
623
  sanitized_input.should.be.valid_encoding
552
- sanitized_input.should == 'replace'
624
+ sanitized_input.should == 'replace'
625
+ end
626
+ end
627
+
628
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
629
+ truncate = -> (input, sanitize_null_bytes:) do
630
+ sanitize_null_bytes.should == true
631
+ 'replace'.force_encoding(Encoding::UTF_8)
632
+ end
633
+
634
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
635
+ input = "foo=bla&quux=bar\x00"
636
+
637
+ @rack_input = StringIO.new input
638
+
639
+ env = request_env
640
+ sanitize_data(env) do |sanitized_input|
641
+ sanitized_input.encoding.should == Encoding::UTF_8
642
+ sanitized_input.should.be.valid_encoding
643
+ sanitized_input.should == 'replace'
553
644
  end
554
645
  end
555
646
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-05 00:00:00.000000000 Z
11
+ date: 2023-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: '1.0'
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '3.0'
22
+ version: '4.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: '1.0'
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '3.0'
32
+ version: '4.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: bacon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,9 @@ executables: []
80
80
  extensions: []
81
81
  extra_rdoc_files: []
82
82
  files:
83
+ - ".editorconfig"
84
+ - ".github/dependabot.yml"
85
+ - ".github/workflows/ci.yml"
83
86
  - ".gitignore"
84
87
  - ".travis.yml"
85
88
  - CHANGELOG.md
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
97
  licenses:
95
98
  - MIT
96
99
  metadata: {}
97
- post_install_message:
100
+ post_install_message:
98
101
  rdoc_options: []
99
102
  require_paths:
100
103
  - lib
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
112
  - !ruby/object:Gem::Version
110
113
  version: '0'
111
114
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.6.2
114
- signing_key:
115
+ rubygems_version: 3.2.5
116
+ signing_key:
115
117
  specification_version: 4
116
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
117
119
  in request URI and headers.