rack-utf8_sanitizer 1.9.0 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
- data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
3
+ metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
+ data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
5
5
  SHA512:
6
- metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
- data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
6
+ metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
+ data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
data/README.md CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
113
113
  ```
114
114
 
115
115
  ```ruby
116
- replace_string = lambda do |_invalid|
116
+ replace_string = lambda do |_invalid, sanitize_null_bytes: false|
117
117
  Rails.logger.warn('Replacing invalid string')
118
118
 
119
119
  '<Bad Encoding>'.freeze
@@ -122,6 +122,10 @@ end
122
122
  config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
123
123
  ```
124
124
 
125
+ ### Sanitizing Null Bytes
126
+
127
+ While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
128
+
125
129
  ## Contributing
126
130
 
127
131
  1. Fork it
@@ -34,22 +34,24 @@ module Rack
34
34
 
35
35
  DEFAULT_STRATEGIES = {
36
36
  replace: lambda do |input, sanitize_null_bytes: false|
37
- if sanitize_null_bytes
38
- input = input.gsub(NULL_BYTE_REGEX, "")
39
- end
40
37
  input.
41
38
  force_encoding(Encoding::ASCII_8BIT).
42
39
  encode!(Encoding::UTF_8,
43
40
  invalid: :replace,
44
41
  undef: :replace)
42
+ if sanitize_null_bytes
43
+ input = input.gsub(NULL_BYTE_REGEX, "")
44
+ end
45
+ input
45
46
  end,
46
47
  exception: lambda do |input, sanitize_null_bytes: false|
47
- if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
- raise NullByteInString
49
- end
50
48
  input.
51
49
  force_encoding(Encoding::ASCII_8BIT).
52
50
  encode!(Encoding::UTF_8)
51
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ raise NullByteInString
53
+ end
54
+ input
53
55
  end
54
56
  }.freeze
55
57
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.9.0'
5
+ gem.version = '1.9.1'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -351,25 +351,25 @@ describe Rack::UTF8Sanitizer do
351
351
 
352
352
  it "optionally sanitizes null bytes with the replace strategy" do
353
353
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
- input = "foo=bla&quux=bar\x00"
354
+ input = "foo=bla\xED&quux=bar\x00"
355
355
  @rack_input = StringIO.new input
356
356
 
357
357
  sanitize_form_data do |sanitized_input|
358
358
  sanitized_input.encoding.should == Encoding::UTF_8
359
359
  sanitized_input.should.be.valid_encoding
360
- sanitized_input.should == "foo=bla&quux=bar"
360
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
361
361
  end
362
362
  end
363
363
 
364
364
  it "optionally sanitizes encoded null bytes with the replace strategy" do
365
365
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
- input = "foo=bla&quux=bar%00"
366
+ input = "foo=bla%ED&quux=bar%00"
367
367
  @rack_input = StringIO.new input
368
368
 
369
369
  sanitize_form_data do |sanitized_input|
370
370
  sanitized_input.encoding.should == Encoding::UTF_8
371
371
  sanitized_input.should.be.valid_encoding
372
- sanitized_input.should == "foo=bla&quux=bar"
372
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
373
373
  end
374
374
  end
375
375
 
@@ -392,6 +392,16 @@ describe Rack::UTF8Sanitizer do
392
392
  sanitize_form_data
393
393
  end
394
394
  end
395
+
396
+ it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
397
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
398
+ input = "foo=bla\x00&quux=bar\xED"
399
+ @rack_input = StringIO.new input
400
+
401
+ should.raise(EncodingError) do
402
+ sanitize_form_data
403
+ end
404
+ end
395
405
  end
396
406
 
397
407
  describe "with custom content-type" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-05 00:00:00.000000000 Z
11
+ date: 2023-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  - !ruby/object:Gem::Version
113
113
  version: '0'
114
114
  requirements: []
115
- rubygems_version: 3.2.5
115
+ rubygems_version: 3.3.15
116
116
  signing_key:
117
117
  specification_version: 4
118
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters