rack-utf8_sanitizer 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9893353fe731d6c942263ff57af911b10d1c9f88ba2c2c2b56bb7a76586935e5
4
- data.tar.gz: eeeaca39f55d680abe2707fa1aaadfe34eed06de2dd848506d184ae4ff326a8d
3
+ metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
+ data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
5
5
  SHA512:
6
- metadata.gz: 28cfbadcc1fbbf0678db3b51259618a9c9897c0cb78bcd8b8f5d87c91eddb44873a7a77fb21b9db47bcc704cb7df17d942c50436475858d0d9fcc17c2a8b59ce
7
- data.tar.gz: 7e781931df60e405d533f5eb832416510a431a1c6fa74baeabffb9499533d9eba2f97ffa24f89ed79a193de5f6715c0461fff4fb0403c863a3643e4aaabbcf0a
6
+ metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
+ data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
data/README.md CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
113
113
  ```
114
114
 
115
115
  ```ruby
116
- replace_string = lambda do |_invalid|
116
+ replace_string = lambda do |_invalid, sanitize_null_bytes: false|
117
117
  Rails.logger.warn('Replacing invalid string')
118
118
 
119
119
  '<Bad Encoding>'.freeze
@@ -122,6 +122,10 @@ end
122
122
  config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
123
123
  ```
124
124
 
125
+ ### Sanitizing Null Bytes
126
+
127
+ While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
128
+
125
129
  ## Contributing
126
130
 
127
131
  1. Fork it
@@ -34,22 +34,24 @@ module Rack
34
34
 
35
35
  DEFAULT_STRATEGIES = {
36
36
  replace: lambda do |input, sanitize_null_bytes: false|
37
- if sanitize_null_bytes
38
- input = input.gsub(NULL_BYTE_REGEX, "")
39
- end
40
37
  input.
41
38
  force_encoding(Encoding::ASCII_8BIT).
42
39
  encode!(Encoding::UTF_8,
43
40
  invalid: :replace,
44
41
  undef: :replace)
42
+ if sanitize_null_bytes
43
+ input = input.gsub(NULL_BYTE_REGEX, "")
44
+ end
45
+ input
45
46
  end,
46
47
  exception: lambda do |input, sanitize_null_bytes: false|
47
- if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
48
- raise NullByteInString
49
- end
50
48
  input.
51
49
  force_encoding(Encoding::ASCII_8BIT).
52
50
  encode!(Encoding::UTF_8)
51
+ if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ raise NullByteInString
53
+ end
54
+ input
53
55
  end
54
56
  }.freeze
55
57
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.9.0'
5
+ gem.version = '1.9.1'
6
6
  gem.authors = ["whitequark"]
7
7
  gem.license = "MIT"
8
8
  gem.email = ["whitequark@whitequark.org"]
@@ -351,25 +351,25 @@ describe Rack::UTF8Sanitizer do
351
351
 
352
352
  it "optionally sanitizes null bytes with the replace strategy" do
353
353
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
354
- input = "foo=bla&quux=bar\x00"
354
+ input = "foo=bla\xED&quux=bar\x00"
355
355
  @rack_input = StringIO.new input
356
356
 
357
357
  sanitize_form_data do |sanitized_input|
358
358
  sanitized_input.encoding.should == Encoding::UTF_8
359
359
  sanitized_input.should.be.valid_encoding
360
- sanitized_input.should == "foo=bla&quux=bar"
360
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
361
361
  end
362
362
  end
363
363
 
364
364
  it "optionally sanitizes encoded null bytes with the replace strategy" do
365
365
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
366
- input = "foo=bla&quux=bar%00"
366
+ input = "foo=bla%ED&quux=bar%00"
367
367
  @rack_input = StringIO.new input
368
368
 
369
369
  sanitize_form_data do |sanitized_input|
370
370
  sanitized_input.encoding.should == Encoding::UTF_8
371
371
  sanitized_input.should.be.valid_encoding
372
- sanitized_input.should == "foo=bla&quux=bar"
372
+ sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
373
373
  end
374
374
  end
375
375
 
@@ -392,6 +392,16 @@ describe Rack::UTF8Sanitizer do
392
392
  sanitize_form_data
393
393
  end
394
394
  end
395
+
396
+ it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
397
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
398
+ input = "foo=bla\x00&quux=bar\xED"
399
+ @rack_input = StringIO.new input
400
+
401
+ should.raise(EncodingError) do
402
+ sanitize_form_data
403
+ end
404
+ end
395
405
  end
396
406
 
397
407
  describe "with custom content-type" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-05 00:00:00.000000000 Z
11
+ date: 2023-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  - !ruby/object:Gem::Version
113
113
  version: '0'
114
114
  requirements: []
115
- rubygems_version: 3.2.5
115
+ rubygems_version: 3.3.15
116
116
  signing_key:
117
117
  specification_version: 4
118
118
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters