rack-utf8_sanitizer 1.7.0 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4156ca74bbd8c43750cdb733ca500a1cb974492ceb823ffa50e9adaa5733d7d9
4
- data.tar.gz: 2acc566fb2020de35fa94822f3fcf018988e9166682ccf2f72f7bb9ca7c209d7
3
+ metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
+ data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
5
5
  SHA512:
6
- metadata.gz: 5332f698e7d2a06427fe009a1e2f368ca56c4ab04d5b4551f79689dabbce3351733d657e9df14266c6416f5627305e45e511aee701f5a2d783f1b28d7a7d4435
7
- data.tar.gz: 7df6257e5945eec1c928ab2ab9e446fe9d853c74b572732b5318e655b1b34d89557d949ee1300004ac67111fd170fddba7ab89b842dbbf65c9ca79717bdf1aa2
6
+ metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
+ data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
data/.editorconfig ADDED
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ indent_style = space
5
+ indent_size = 2
6
+ end_of_line = lf
7
+ charset = utf-8
8
+ trim_trailing_whitespace = true
9
+ insert_final_newline = true
10
+
11
+ [*.md]
12
+ indent_style = space
13
+ indent_size = 2
14
+
15
+ [*.y{a,}ml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v3
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true # 'bundle install' and cache gems
21
+ ruby-version: ${{ matrix.ruby }}
22
+ - name: Run tests
23
+ run: bundle exec rake
data/.travis.yml CHANGED
@@ -1,17 +1,14 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1
7
- - 2.2
8
4
  - 2.3
9
5
  - 2.4
10
6
  - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - 3.0
10
+ - 3.1
11
11
  - jruby
12
12
 
13
13
  before_install:
14
14
  - gem install bundler
15
-
16
- script:
17
- - rake spec
data/README.md CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
113
113
  ```
114
114
 
115
115
  ```ruby
116
- replace_string = lambda do |_invalid|
116
+ replace_string = lambda do |_invalid, sanitize_null_bytes: false|
117
117
  Rails.logger.warn('Replacing invalid string')
118
118
 
119
119
  '<Bad Encoding>'.freeze
@@ -122,6 +122,10 @@ end
122
122
  config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
123
123
  ```
124
124
 
125
+ ### Sanitizing Null Bytes
126
+
127
+ While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
128
+
125
129
  ## Contributing
126
130
 
127
131
  1. Fork it
@@ -6,6 +6,10 @@ require 'stringio'
6
6
  module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
+ BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
9
13
 
10
14
  # options[:sanitizable_content_types] Array
11
15
  # options[:additional_content_types] Array
@@ -16,28 +20,42 @@ module Rack
16
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
17
21
  @only = Array(options[:only]).flatten
18
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
19
24
  end
20
25
 
21
26
  def call(env)
22
- @app.call(sanitize(env))
27
+ begin
28
+ env = sanitize(env)
29
+ rescue EOFError
30
+ return BAD_REQUEST
31
+ end
32
+ @app.call(env)
23
33
  end
24
34
 
25
35
  DEFAULT_STRATEGIES = {
26
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
27
37
  input.
28
38
  force_encoding(Encoding::ASCII_8BIT).
29
39
  encode!(Encoding::UTF_8,
30
40
  invalid: :replace,
31
41
  undef: :replace)
42
+ if sanitize_null_bytes
43
+ input = input.gsub(NULL_BYTE_REGEX, "")
44
+ end
45
+ input
32
46
  end,
33
- exception: lambda do |input|
47
+ exception: lambda do |input, sanitize_null_bytes: false|
34
48
  input.
35
49
  force_encoding(Encoding::ASCII_8BIT).
36
50
  encode!(Encoding::UTF_8)
51
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ raise NullByteInString
53
+ end
54
+ input
37
55
  end
38
56
  }.freeze
39
57
 
40
- # http://rack.rubyforge.org/doc/SPEC.html
58
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
41
59
  URI_FIELDS = %w(
42
60
  SCRIPT_NAME
43
61
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -201,7 +219,8 @@ module Rack
201
219
 
202
220
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
203
221
  # plus all multibyte UTF-8 characters.
204
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
222
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
223
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
205
224
 
206
225
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
207
226
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -212,7 +231,8 @@ module Rack
212
231
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
213
232
  decoded = $1.hex.chr
214
233
 
215
- if decoded =~ UNRESERVED_OR_UTF8
234
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
235
+ if decoded =~ decodable_regex
216
236
  decoded
217
237
  else
218
238
  encoded
@@ -238,10 +258,10 @@ module Rack
238
258
  if input.is_a? String
239
259
  input = input.dup.force_encoding(Encoding::UTF_8)
240
260
 
241
- if input.valid_encoding?
261
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
242
262
  input
243
263
  else
244
- @strategy.call(input)
264
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
245
265
  end
246
266
  else
247
267
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.7.0'
5
+ gem.version = '1.9.1'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
12
12
  gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
13
 
14
14
  gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
16
  gem.require_paths = ["lib"]
18
17
 
19
18
  gem.required_ruby_version = '>= 1.9.3'
20
19
 
21
- gem.add_dependency "rack", '>= 1.0', '< 3.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 4.0'
22
21
 
23
22
  gem.add_development_dependency "bacon"
24
23
  gem.add_development_dependency "bacon-colored_output"
@@ -1,6 +1,7 @@
1
1
  # encoding:ascii-8bit
2
2
 
3
3
  require 'bacon/colored_output'
4
+ require 'cgi'
4
5
  require 'rack/utf8_sanitizer'
5
6
 
6
7
  describe Rack::UTF8Sanitizer do
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
  describe "with valid, not percent-encoded UTF-8 URI input" do
119
120
  before do
120
121
  @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
121
123
  end
122
124
 
123
125
  it "does not change URI-like entity (REQUEST_PATH)" do
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
126
128
 
127
129
  result.encoding.should == Encoding::US_ASCII
128
130
  result.should.be.valid_encoding
129
- result.should == URI.encode(@uri_input)
131
+ result.should == @encoded
130
132
  end
131
133
  end
132
134
 
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
205
207
  @response_env['rack.input'].close
206
208
  end
207
209
 
210
+ class BrokenIO < StringIO
211
+ def read
212
+ raise EOFError
213
+ end
214
+ end
215
+
216
+ it "returns HTTP 400 on EOF" do
217
+ @rack_input = BrokenIO.new
218
+ @response_env = @app.(request_env)
219
+ @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
+ end
221
+
208
222
  it "sanitizes StringIO rack.input" do
209
223
  input = "foo=bla&quux=bar"
210
224
  @rack_input = StringIO.new input
@@ -323,6 +337,71 @@ describe Rack::UTF8Sanitizer do
323
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
324
338
  end
325
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla\xED&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla%ED&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
395
+
396
+ it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
397
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
398
+ input = "foo=bla\x00&quux=bar\xED"
399
+ @rack_input = StringIO.new input
400
+
401
+ should.raise(EncodingError) do
402
+ sanitize_form_data
403
+ end
404
+ end
326
405
  end
327
406
 
328
407
  describe "with custom content-type" do
@@ -538,7 +617,10 @@ describe Rack::UTF8Sanitizer do
538
617
  end
539
618
 
540
619
  it "accepts a proc as a strategy" do
541
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
620
+ truncate = -> (input, sanitize_null_bytes:) do
621
+ sanitize_null_bytes.should == false
622
+ 'replace'.force_encoding(Encoding::UTF_8)
623
+ end
542
624
 
543
625
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
544
626
 
@@ -549,7 +631,26 @@ describe Rack::UTF8Sanitizer do
549
631
  sanitize_data(env) do |sanitized_input|
550
632
  sanitized_input.encoding.should == Encoding::UTF_8
551
633
  sanitized_input.should.be.valid_encoding
552
- sanitized_input.should == 'replace'
634
+ sanitized_input.should == 'replace'
635
+ end
636
+ end
637
+
638
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
639
+ truncate = -> (input, sanitize_null_bytes:) do
640
+ sanitize_null_bytes.should == true
641
+ 'replace'.force_encoding(Encoding::UTF_8)
642
+ end
643
+
644
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
645
+ input = "foo=bla&quux=bar\x00"
646
+
647
+ @rack_input = StringIO.new input
648
+
649
+ env = request_env
650
+ sanitize_data(env) do |sanitized_input|
651
+ sanitized_input.encoding.should == Encoding::UTF_8
652
+ sanitized_input.should.be.valid_encoding
653
+ sanitized_input.should == 'replace'
553
654
  end
554
655
  end
555
656
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-05 00:00:00.000000000 Z
11
+ date: 2023-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: '1.0'
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '3.0'
22
+ version: '4.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: '1.0'
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '3.0'
32
+ version: '4.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: bacon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,9 @@ executables: []
80
80
  extensions: []
81
81
  extra_rdoc_files: []
82
82
  files:
83
+ - ".editorconfig"
84
+ - ".github/dependabot.yml"
85
+ - ".github/workflows/ci.yml"
83
86
  - ".gitignore"
84
87
  - ".travis.yml"
85
88
  - CHANGELOG.md
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
97
  licenses:
95
98
  - MIT
96
99
  metadata: {}
97
- post_install_message:
100
+ post_install_message:
98
101
  rdoc_options: []
99
102
  require_paths:
100
103
  - lib
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
112
  - !ruby/object:Gem::Version
110
113
  version: '0'
111
114
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.6.2
114
- signing_key:
115
+ rubygems_version: 3.3.15
116
+ signing_key:
115
117
  specification_version: 4
116
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
117
119
  in request URI and headers.