rack-utf8_sanitizer 1.8.0 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 045740a7d869543a26c071de26ee6585d00e6193eaa2a5a02bfe09142cfe11c1
4
- data.tar.gz: '050977cbbb72a835dea65e4df6bd75d6837c216b2a2f68eecd83701f1153e7ff'
3
+ metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
+ data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
5
5
  SHA512:
6
- metadata.gz: 611f078bdbe5f0247eac8ba3258a174eb70a1d3263b49ca0d4d261a8fd3de1b260da9defc36bc644eb2e6805211038adddf4396a9f7b0bb48fbd932e41991f97
7
- data.tar.gz: 1bc7f43fbd004ac010a7829cb8077b2bdb245500670d2fc4d22372ccc44c959db47a3853aa68f1eb4c1533b5202ee6fdb44d368d722be7fa9439dc6049ddbef2
6
+ metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
+ data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
@@ -10,7 +10,7 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
14
 
15
15
  steps:
16
16
  - uses: actions/checkout@v3
data/README.md CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
113
113
  ```
114
114
 
115
115
  ```ruby
116
- replace_string = lambda do |_invalid|
116
+ replace_string = lambda do |_invalid, sanitize_null_bytes: false|
117
117
  Rails.logger.warn('Replacing invalid string')
118
118
 
119
119
  '<Bad Encoding>'.freeze
@@ -122,6 +122,10 @@ end
122
122
  config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
123
123
  ```
124
124
 
125
+ ### Sanitizing Null Bytes
126
+
127
+ While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
128
+
125
129
  ## Contributing
126
130
 
127
131
  1. Fork it
@@ -7,6 +7,9 @@ module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
9
  BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
+ NULL_BYTE_REGEX = /\x00/.freeze
11
+
12
+ class NullByteInString < StandardError; end
10
13
 
11
14
  # options[:sanitizable_content_types] Array
12
15
  # options[:additional_content_types] Array
@@ -17,6 +20,7 @@ module Rack
17
20
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
18
21
  @only = Array(options[:only]).flatten
19
22
  @except = Array(options[:except]).flatten
23
+ @sanitize_null_bytes = options.fetch(:sanitize_null_bytes, false)
20
24
  end
21
25
 
22
26
  def call(env)
@@ -29,21 +33,29 @@ module Rack
29
33
  end
30
34
 
31
35
  DEFAULT_STRATEGIES = {
32
- replace: lambda do |input|
36
+ replace: lambda do |input, sanitize_null_bytes: false|
33
37
  input.
34
38
  force_encoding(Encoding::ASCII_8BIT).
35
39
  encode!(Encoding::UTF_8,
36
40
  invalid: :replace,
37
41
  undef: :replace)
42
+ if sanitize_null_bytes
43
+ input = input.gsub(NULL_BYTE_REGEX, "")
44
+ end
45
+ input
38
46
  end,
39
- exception: lambda do |input|
47
+ exception: lambda do |input, sanitize_null_bytes: false|
40
48
  input.
41
49
  force_encoding(Encoding::ASCII_8BIT).
42
50
  encode!(Encoding::UTF_8)
51
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ raise NullByteInString
53
+ end
54
+ input
43
55
  end
44
56
  }.freeze
45
57
 
46
- # http://rack.rubyforge.org/doc/SPEC.html
58
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
47
59
  URI_FIELDS = %w(
48
60
  SCRIPT_NAME
49
61
  REQUEST_PATH REQUEST_URI PATH_INFO
@@ -207,7 +219,8 @@ module Rack
207
219
 
208
220
  # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
209
221
  # plus all multibyte UTF-8 characters.
210
- UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
222
+ UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/.freeze
223
+ UNRESERVED_OR_UTF8_OR_NULL = /[A-Za-z0-9\-._~\x00\x80-\xFF]/.freeze
211
224
 
212
225
  # RFC3986, 2.2 states that the characters from 'reserved' group must be
213
226
  # protected during normalization (which is what UTF8Sanitizer does).
@@ -218,7 +231,8 @@ module Rack
218
231
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
219
232
  decoded = $1.hex.chr
220
233
 
221
- if decoded =~ UNRESERVED_OR_UTF8
234
+ decodable_regex = @sanitize_null_bytes ? UNRESERVED_OR_UTF8_OR_NULL : UNRESERVED_OR_UTF8
235
+ if decoded =~ decodable_regex
222
236
  decoded
223
237
  else
224
238
  encoded
@@ -244,10 +258,10 @@ module Rack
244
258
  if input.is_a? String
245
259
  input = input.dup.force_encoding(Encoding::UTF_8)
246
260
 
247
- if input.valid_encoding?
261
+ if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX)
248
262
  input
249
263
  else
250
- @strategy.call(input)
264
+ @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes)
251
265
  end
252
266
  else
253
267
  input
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.8.0'
5
+ gem.version = '1.9.1'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -337,6 +337,71 @@ describe Rack::UTF8Sanitizer do
337
337
  @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
338
338
  end
339
339
  end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+
352
+ it "optionally sanitizes null bytes with the replace strategy" do
353
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
+ input = "foo=bla\xED&quux=bar\x00"
355
+ @rack_input = StringIO.new input
356
+
357
+ sanitize_form_data do |sanitized_input|
358
+ sanitized_input.encoding.should == Encoding::UTF_8
359
+ sanitized_input.should.be.valid_encoding
360
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
361
+ end
362
+ end
363
+
364
+ it "optionally sanitizes encoded null bytes with the replace strategy" do
365
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
+ input = "foo=bla%ED&quux=bar%00"
367
+ @rack_input = StringIO.new input
368
+
369
+ sanitize_form_data do |sanitized_input|
370
+ sanitized_input.encoding.should == Encoding::UTF_8
371
+ sanitized_input.should.be.valid_encoding
372
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
373
+ end
374
+ end
375
+
376
+ it "optionally raises on null bytes with the exception strategy" do
377
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
378
+ input = "foo=bla&quux=bar\x00"
379
+ @rack_input = StringIO.new input
380
+
381
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
382
+ sanitize_form_data
383
+ end
384
+ end
385
+
386
+ it "optionally raises on encoded null bytes with the exception strategy" do
387
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
388
+ input = "foo=bla&quux=bar%00"
389
+ @rack_input = StringIO.new input
390
+
391
+ should.raise(Rack::UTF8Sanitizer::NullByteInString) do
392
+ sanitize_form_data
393
+ end
394
+ end
395
+
396
+ it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
397
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
398
+ input = "foo=bla\x00&quux=bar\xED"
399
+ @rack_input = StringIO.new input
400
+
401
+ should.raise(EncodingError) do
402
+ sanitize_form_data
403
+ end
404
+ end
340
405
  end
341
406
 
342
407
  describe "with custom content-type" do
@@ -552,7 +617,10 @@ describe Rack::UTF8Sanitizer do
552
617
  end
553
618
 
554
619
  it "accepts a proc as a strategy" do
555
- truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
620
+ truncate = -> (input, sanitize_null_bytes:) do
621
+ sanitize_null_bytes.should == false
622
+ 'replace'.force_encoding(Encoding::UTF_8)
623
+ end
556
624
 
557
625
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
558
626
 
@@ -566,5 +634,24 @@ describe Rack::UTF8Sanitizer do
566
634
  sanitized_input.should == 'replace'
567
635
  end
568
636
  end
637
+
638
+ it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
639
+ truncate = -> (input, sanitize_null_bytes:) do
640
+ sanitize_null_bytes.should == true
641
+ 'replace'.force_encoding(Encoding::UTF_8)
642
+ end
643
+
644
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
645
+ input = "foo=bla&quux=bar\x00"
646
+
647
+ @rack_input = StringIO.new input
648
+
649
+ env = request_env
650
+ sanitize_data(env) do |sanitized_input|
651
+ sanitized_input.encoding.should == Encoding::UTF_8
652
+ sanitized_input.should.be.valid_encoding
653
+ sanitized_input.should == 'replace'
654
+ end
655
+ end
569
656
  end
570
657
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-25 00:00:00.000000000 Z
11
+ date: 2023-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -97,7 +97,7 @@ homepage: http://github.com/whitequark/rack-utf8_sanitizer
97
97
  licenses:
98
98
  - MIT
99
99
  metadata: {}
100
- post_install_message:
100
+ post_install_message:
101
101
  rdoc_options: []
102
102
  require_paths:
103
103
  - lib
@@ -112,8 +112,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  - !ruby/object:Gem::Version
113
113
  version: '0'
114
114
  requirements: []
115
- rubygems_version: 3.2.5
116
- signing_key:
115
+ rubygems_version: 3.3.15
116
+ signing_key:
117
117
  specification_version: 4
118
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
119
119
  in request URI and headers.