rack-utf8_sanitizer 1.8.0 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 045740a7d869543a26c071de26ee6585d00e6193eaa2a5a02bfe09142cfe11c1
4
- data.tar.gz: '050977cbbb72a835dea65e4df6bd75d6837c216b2a2f68eecd83701f1153e7ff'
3
+ metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
+ data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
5
5
  SHA512:
6
- metadata.gz: 611f078bdbe5f0247eac8ba3258a174eb70a1d3263b49ca0d4d261a8fd3de1b260da9defc36bc644eb2e6805211038adddf4396a9f7b0bb48fbd932e41991f97
7
- data.tar.gz: 1bc7f43fbd004ac010a7829cb8077b2bdb245500670d2fc4d22372ccc44c959db47a3853aa68f1eb4c1533b5202ee6fdb44d368d722be7fa9439dc6049ddbef2
6
+ metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
+ data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
@@ -10,7 +10,7 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
14
 
15
15
  steps:
16
16
  - uses: actions/checkout@v3
@@ -7,6 +7,9 @@ module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
9
  BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
10
13
 
11
14
  # options[:sanitizable_content_types] Array
12
15
  # options[:additional_content_types] Array
@@ -17,6 +20,7 @@ module Rack
17
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
18
21
  @only = Array(options[:only]).flatten
19
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
20
24
  end
21
25
 
22
26
  def call(env)
@@ -29,21 +33,27 @@ module Rack
29
33
  end
30
34
 
31
35
  DEFAULT_STRATEGIES = {
32
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
37
+ if sanitize_null_bytes
38
+ input = input.gsub(NULL_BYTE_REGEX, "")
39
+ end
33
40
  input.
34
41
  force_encoding(Encoding::ASCII_8BIT).
35
42
  encode!(Encoding::UTF_8,
36
43
  invalid: :replace,
37
44
  undef: :replace)
38
45
  end,
39
- exception: lambda do |input|
46
+ exception: lambda do |input, sanitize_null_bytes: false|
47
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
+ raise NullByteInString
49
+ end
40
50
  input.
41
51
  force_encoding(Encoding::ASCII_8BIT).
42
52
  encode!(Encoding::UTF_8)
43
53
  end
44
54
  }.freeze
45
55
 
46
- # http://rack.rubyforge.org/doc/SPEC.html
56
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
47
57
  URI_FIELDS = %w(
48
58
  SCRIPT_NAME
49
59
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -207,7 +217,8 @@ module Rack
207
217
 
208
218
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
209
219
  # plus all multibyte UTF-8 characters.
210
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
220
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
221
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
211
222
 
212
223
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
213
224
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -218,7 +229,8 @@ module Rack
218
229
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
219
230
  decoded = $1.hex.chr
220
231
 
221
- if decoded =~ UNRESERVED_OR_UTF8
232
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
233
+ if decoded =~ decodable_regex
222
234
  decoded
223
235
  else
224
236
  encoded
@@ -244,10 +256,10 @@ module Rack
244
256
  if input.is_a? String
245
257
  input = input.dup.force_encoding(Encoding::UTF_8)
246
258
 
247
- if input.valid_encoding?
259
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
248
260
  input
249
261
  else
250
- @strategy.call(input)
262
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
251
263
  end
252
264
  else
253
265
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.8.0'
5
+ gem.version = '1.9.0'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -337,6 +337,61 @@ describe Rack::UTF8Sanitizer do
337
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
338
338
  end
339
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
340
395
  end
341
396
 
342
397
  describe "with custom content-type" do
@@ -552,7 +607,10 @@ describe Rack::UTF8Sanitizer do
552
607
  end
553
608
 
554
609
  it "accepts a proc as a strategy" do
555
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
610
+ truncate = -> (input, sanitize_null_bytes:) do
611
+ sanitize_null_bytes.should == false
612
+ 'replace'.force_encoding(Encoding::UTF_8)
613
+ end
556
614
 
557
615
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
558
616
 
@@ -566,5 +624,24 @@ describe Rack::UTF8Sanitizer do
566
624
  sanitized_input.should == 'replace'
567
625
  end
568
626
  end
627
+
628
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
629
+ truncate = -> (input, sanitize_null_bytes:) do
630
+ sanitize_null_bytes.should == true
631
+ 'replace'.force_encoding(Encoding::UTF_8)
632
+ end
633
+
634
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
635
+ input = "foo=bla&quux=bar\x00"
636
+
637
+ @rack_input = StringIO.new input
638
+
639
+ env = request_env
640
+ sanitize_data(env) do |sanitized_input|
641
+ sanitized_input.encoding.should == Encoding::UTF_8
642
+ sanitized_input.should.be.valid_encoding
643
+ sanitized_input.should == 'replace'
644
+ end
645
+ end
569
646
  end
570
647
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-25 00:00:00.000000000 Z
11
+ date: 2023-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
97
97
  licenses:
98
98
  - MIT
99
99
  metadata: {}
100
- post_install_message:
100
+ post_install_message:
101
101
  rdoc_options: []
102
102
  require_paths:
103
103
  - lib
@@ -113,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  version: '0'
114
114
  requirements: []
115
115
  rubygems_version: 3.2.5
116
- signing_key:
116
+ signing_key:
117
117
  specification_version: 4
118
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
119
119
  in request URI and headers.