rack-utf8_sanitizer 1.7.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4156ca74bbd8c43750cdb733ca500a1cb974492ceb823ffa50e9adaa5733d7d9
4
- data.tar.gz: 2acc566fb2020de35fa94822f3fcf018988e9166682ccf2f72f7bb9ca7c209d7
3
+ metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
+ data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
5
5
  SHA512:
6
- metadata.gz: 5332f698e7d2a06427fe009a1e2f368ca56c4ab04d5b4551f79689dabbce3351733d657e9df14266c6416f5627305e45e511aee701f5a2d783f1b28d7a7d4435
7
- data.tar.gz: 7df6257e5945eec1c928ab2ab9e446fe9d853c74b572732b5318e655b1b34d89557d949ee1300004ac67111fd170fddba7ab89b842dbbf65c9ca79717bdf1aa2
6
+ metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
+ data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
data/.editorconfig ADDED
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ indent_style = space
5
+ indent_size = 2
6
+ end_of_line = lf
7
+ charset = utf-8
8
+ trim_trailing_whitespace = true
9
+ insert_final_newline = true
10
+
11
+ [*.md]
12
+ indent_style = space
13
+ indent_size = 2
14
+
15
+ [*.y{a,}ml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v3
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true # 'bundle install' and cache gems
21
+ ruby-version: ${{ matrix.ruby }}
22
+ - name: Run tests
23
+ run: bundle exec rake
data/.travis.yml CHANGED
@@ -1,17 +1,14 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1
7
- - 2.2
8
4
  - 2.3
9
5
  - 2.4
10
6
  - 2.5
7
+ - 2.6
8
+ - 2.7
9
+ - 3.0
10
+ - 3.1
11
11
  - jruby
12
12
 
13
13
  before_install:
14
14
  - gem install bundler
15
-
16
- script:
17
- - rake spec
data/README.md CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
113
113
  ```
114
114
 
115
115
  ```ruby
116
- replace_string = lambda do |_invalid|
116
+ replace_string = lambda do |_invalid, sanitize_null_bytes: false|
117
117
  Rails.logger.warn('Replacing invalid string')
118
118
 
119
119
  '<Bad Encoding>'.freeze
@@ -122,6 +122,10 @@ end
122
122
  config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
123
123
  ```
124
124
 
125
+ ### Sanitizing Null Bytes
126
+
127
+ While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
128
+
125
129
  ## Contributing
126
130
 
127
131
  1. Fork it
@@ -6,6 +6,10 @@ require 'stringio'
6
6
  module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
+ BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
9
13
 
10
14
  # options[:sanitizable_content_types] Array
11
15
  # options[:additional_content_types] Array
@@ -16,28 +20,42 @@ module Rack
16
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
17
21
  @only = Array(options[:only]).flatten
18
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
19
24
  end
20
25
 
21
26
  def call(env)
22
- @app.call(sanitize(env))
27
+ begin
28
+ env = sanitize(env)
29
+ rescue EOFError
30
+ return BAD_REQUEST
31
+ end
32
+ @app.call(env)
23
33
  end
24
34
 
25
35
  DEFAULT_STRATEGIES = {
26
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
27
37
  input.
28
38
  force_encoding(Encoding::ASCII_8BIT).
29
39
  encode!(Encoding::UTF_8,
30
40
  invalid: :replace,
31
41
  undef: :replace)
42
+ if sanitize_null_bytes
43
+ input = input.gsub(NULL_BYTE_REGEX, "")
44
+ end
45
+ input
32
46
  end,
33
- exception: lambda do |input|
47
+ exception: lambda do |input, sanitize_null_bytes: false|
34
48
  input.
35
49
  force_encoding(Encoding::ASCII_8BIT).
36
50
  encode!(Encoding::UTF_8)
51
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ raise NullByteInString
53
+ end
54
+ input
37
55
  end
38
56
  }.freeze
39
57
 
40
- # http://rack.rubyforge.org/doc/SPEC.html
58
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
41
59
  URI_FIELDS = %w(
42
60
  SCRIPT_NAME
43
61
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -201,7 +219,8 @@ module Rack
201
219
 
202
220
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
203
221
  # plus all multibyte UTF-8 characters.
204
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
222
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
223
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
205
224
 
206
225
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
207
226
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -212,7 +231,8 @@ module Rack
212
231
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
213
232
  decoded = $1.hex.chr
214
233
 
215
- if decoded =~ UNRESERVED_OR_UTF8
234
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
235
+ if decoded =~ decodable_regex
216
236
  decoded
217
237
  else
218
238
  encoded
@@ -238,10 +258,10 @@ module Rack
238
258
  if input.is_a? String
239
259
  input = input.dup.force_encoding(Encoding::UTF_8)
240
260
 
241
- if input.valid_encoding?
261
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
242
262
  input
243
263
  else
244
- @strategy.call(input)
264
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
245
265
  end
246
266
  else
247
267
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.7.0'
5
+ gem.version = '1.9.1'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
12
12
  gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
13
 
14
14
  gem.files = `git ls-files`.split($/)
15
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
16
  gem.require_paths = ["lib"]
18
17
 
19
18
  gem.required_ruby_version = '>= 1.9.3'
20
19
 
21
- gem.add_dependency "rack", '>= 1.0', '< 3.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 4.0'
22
21
 
23
22
  gem.add_development_dependency "bacon"
24
23
  gem.add_development_dependency "bacon-colored_output"
@@ -1,6 +1,7 @@
1
1
  # encoding:ascii-8bit
2
2
 
3
3
  require 'bacon/colored_output'
4
+ require 'cgi'
4
5
  require 'rack/utf8_sanitizer'
5
6
 
6
7
  describe Rack::UTF8Sanitizer do
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
  describe "with valid, not percent-encoded UTF-8 URI input" do
119
120
  before do
120
121
  @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
121
123
  end
122
124
 
123
125
  it "does not change URI-like entity (REQUEST_PATH)" do
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
126
128
 
127
129
  result.encoding.should == Encoding::US_ASCII
128
130
  result.should.be.valid_encoding
129
- result.should == URI.encode(@uri_input)
131
+ result.should == @encoded
130
132
  end
131
133
  end
132
134
 
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
205
207
  @response_env['rack.input'].close
206
208
  end
207
209
 
210
+ class BrokenIO < StringIO
211
+ def read
212
+ raise EOFError
213
+ end
214
+ end
215
+
216
+ it "returns HTTP 400 on EOF" do
217
+ @rack_input = BrokenIO.new
218
+ @response_env = @app.(request_env)
219
+ @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
+ end
221
+
208
222
  it "sanitizes StringIO rack.input" do
209
223
  input = "foo=bla&quux=bar"
210
224
  @rack_input = StringIO.new input
@@ -323,6 +337,71 @@ describe Rack::UTF8Sanitizer do
323
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
324
338
  end
325
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla\xED&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla%ED&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
395
+
396
+ it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
397
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
398
+ input = "foo=bla\x00&quux=bar\xED"
399
+ @rack_input = StringIO.new input
400
+
401
+ should.raise(EncodingError) do
402
+ sanitize_form_data
403
+ end
404
+ end
326
405
  end
327
406
 
328
407
  describe "with custom content-type" do
@@ -538,7 +617,10 @@ describe Rack::UTF8Sanitizer do
538
617
  end
539
618
 
540
619
  it "accepts a proc as a strategy" do
541
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
620
+ truncate = -> (input, sanitize_null_bytes:) do
621
+ sanitize_null_bytes.should == false
622
+ 'replace'.force_encoding(Encoding::UTF_8)
623
+ end
542
624
 
543
625
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
544
626
 
@@ -549,7 +631,26 @@ describe Rack::UTF8Sanitizer do
549
631
  sanitize_data(env) do |sanitized_input|
550
632
  sanitized_input.encoding.should == Encoding::UTF_8
551
633
  sanitized_input.should.be.valid_encoding
552
- sanitized_input.should == 'replace'
634
+ sanitized_input.should == 'replace'
635
+ end
636
+ end
637
+
638
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
639
+ truncate = -> (input, sanitize_null_bytes:) do
640
+ sanitize_null_bytes.should == true
641
+ 'replace'.force_encoding(Encoding::UTF_8)
642
+ end
643
+
644
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
645
+ input = "foo=bla&quux=bar\x00"
646
+
647
+ @rack_input = StringIO.new input
648
+
649
+ env = request_env
650
+ sanitize_data(env) do |sanitized_input|
651
+ sanitized_input.encoding.should == Encoding::UTF_8
652
+ sanitized_input.should.be.valid_encoding
653
+ sanitized_input.should == 'replace'
553
654
  end
554
655
  end
555
656
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-05 00:00:00.000000000 Z
11
+ date: 2023-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: '1.0'
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '3.0'
22
+ version: '4.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: '1.0'
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '3.0'
32
+ version: '4.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: bacon
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,9 @@ executables: []
80
80
  extensions: []
81
81
  extra_rdoc_files: []
82
82
  files:
83
+ - ".editorconfig"
84
+ - ".github/dependabot.yml"
85
+ - ".github/workflows/ci.yml"
83
86
  - ".gitignore"
84
87
  - ".travis.yml"
85
88
  - CHANGELOG.md
@@ -94,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
97
  licenses:
95
98
  - MIT
96
99
  metadata: {}
97
- post_install_message:
100
+ post_install_message:
98
101
  rdoc_options: []
99
102
  require_paths:
100
103
  - lib
@@ -109,9 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
112
  - !ruby/object:Gem::Version
110
113
  version: '0'
111
114
  requirements: []
112
- rubyforge_project:
113
- rubygems_version: 2.7.6.2
114
- signing_key:
115
+ rubygems_version: 3.3.15
116
+ signing_key:
115
117
  specification_version: 4
116
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
117
119
  in request URI and headers.