rack-utf8_sanitizer 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 045740a7d869543a26c071de26ee6585d00e6193eaa2a5a02bfe09142cfe11c1
4
- data.tar.gz: '050977cbbb72a835dea65e4df6bd75d6837c216b2a2f68eecd83701f1153e7ff'
3
+ metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
+ data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
5
5
  SHA512:
6
- metadata.gz: 611f078bdbe5f0247eac8ba3258a174eb70a1d3263b49ca0d4d261a8fd3de1b260da9defc36bc644eb2e6805211038adddf4396a9f7b0bb48fbd932e41991f97
7
- data.tar.gz: 1bc7f43fbd004ac010a7829cb8077b2bdb245500670d2fc4d22372ccc44c959db47a3853aa68f1eb4c1533b5202ee6fdb44d368d722be7fa9439dc6049ddbef2
6
+ metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
+ data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
@@ -10,7 +10,7 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
14
 
15
15
  steps:
16
16
  - uses: actions/checkout@v3
@@ -7,6 +7,9 @@ module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
9
  BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
10
13
 
11
14
  # options[:sanitizable_content_types] Array
12
15
  # options[:additional_content_types] Array
@@ -17,6 +20,7 @@ module Rack
17
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
18
21
  @only = Array(options[:only]).flatten
19
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
20
24
  end
21
25
 
22
26
  def call(env)
@@ -29,21 +33,27 @@ module Rack
29
33
  end
30
34
 
31
35
  DEFAULT_STRATEGIES = {
32
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
37
+ if sanitize_null_bytes
38
+ input = input.gsub(NULL_BYTE_REGEX, "")
39
+ end
33
40
  input.
34
41
  force_encoding(Encoding::ASCII_8BIT).
35
42
  encode!(Encoding::UTF_8,
36
43
  invalid: :replace,
37
44
  undef: :replace)
38
45
  end,
39
- exception: lambda do |input|
46
+ exception: lambda do |input, sanitize_null_bytes: false|
47
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
+ raise NullByteInString
49
+ end
40
50
  input.
41
51
  force_encoding(Encoding::ASCII_8BIT).
42
52
  encode!(Encoding::UTF_8)
43
53
  end
44
54
  }.freeze
45
55
 
46
- # http://rack.rubyforge.org/doc/SPEC.html
56
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
47
57
  URI_FIELDS = %w(
48
58
  SCRIPT_NAME
49
59
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -207,7 +217,8 @@ module Rack
207
217
 
208
218
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
209
219
  # plus all multibyte UTF-8 characters.
210
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
220
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
221
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
211
222
 
212
223
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
213
224
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -218,7 +229,8 @@ module Rack
218
229
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
219
230
  decoded = $1.hex.chr
220
231
 
221
- if decoded =~ UNRESERVED_OR_UTF8
232
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
233
+ if decoded =~ decodable_regex
222
234
  decoded
223
235
  else
224
236
  encoded
@@ -244,10 +256,10 @@ module Rack
244
256
  if input.is_a? String
245
257
  input = input.dup.force_encoding(Encoding::UTF_8)
246
258
 
247
- if input.valid_encoding?
259
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
248
260
  input
249
261
  else
250
- @strategy.call(input)
262
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
251
263
  end
252
264
  else
253
265
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.8.0'
5
+ gem.version = '1.9.0'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -337,6 +337,61 @@ describe Rack::UTF8Sanitizer do
337
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
338
338
  end
339
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
340
395
  end
341
396
 
342
397
  describe "with custom content-type" do
@@ -552,7 +607,10 @@ describe Rack::UTF8Sanitizer do
552
607
  end
553
608
 
554
609
  it "accepts a proc as a strategy" do
555
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
610
+ truncate = -> (input, sanitize_null_bytes:) do
611
+ sanitize_null_bytes.should == false
612
+ 'replace'.force_encoding(Encoding::UTF_8)
613
+ end
556
614
 
557
615
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
558
616
 
@@ -566,5 +624,24 @@ describe Rack::UTF8Sanitizer do
566
624
  sanitized_input.should == 'replace'
567
625
  end
568
626
  end
627
+
628
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
629
+ truncate = -> (input, sanitize_null_bytes:) do
630
+ sanitize_null_bytes.should == true
631
+ 'replace'.force_encoding(Encoding::UTF_8)
632
+ end
633
+
634
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
635
+ input = "foo=bla&quux=bar\x00"
636
+
637
+ @rack_input = StringIO.new input
638
+
639
+ env = request_env
640
+ sanitize_data(env) do |sanitized_input|
641
+ sanitized_input.encoding.should == Encoding::UTF_8
642
+ sanitized_input.should.be.valid_encoding
643
+ sanitized_input.should == 'replace'
644
+ end
645
+ end
569
646
  end
570
647
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-25 00:00:00.000000000 Z
11
+ date: 2023-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
97
97
  licenses:
98
98
  - MIT
99
99
  metadata: {}
100
- post_install_message:
100
+ post_install_message:
101
101
  rdoc_options: []
102
102
  require_paths:
103
103
  - lib
@@ -113,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  version: '0'
114
114
  requirements: []
115
115
  rubygems_version: 3.2.5
116
- signing_key:
116
+ signing_key:
117
117
  specification_version: 4
118
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
119
119
  in request URI and headers.