rack-utf8_sanitizer 1.9.0 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +8 -6
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +14 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -34,22 +34,24 @@ module Rack
|
|
34
34
|
|
35
35
|
DEFAULT_STRATEGIES = {
|
36
36
|
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
-
if sanitize_null_bytes
|
38
|
-
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
-
end
|
40
37
|
input.
|
41
38
|
force_encoding(Encoding::ASCII_8BIT).
|
42
39
|
encode!(Encoding::UTF_8,
|
43
40
|
invalid: :replace,
|
44
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
45
46
|
end,
|
46
47
|
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
-
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
-
raise NullByteInString
|
49
|
-
end
|
50
48
|
input.
|
51
49
|
force_encoding(Encoding::ASCII_8BIT).
|
52
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
53
55
|
end
|
54
56
|
}.freeze
|
55
57
|
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -351,25 +351,25 @@ describe Rack::UTF8Sanitizer do
|
|
351
351
|
|
352
352
|
it "optionally sanitizes null bytes with the replace strategy" do
|
353
353
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
-
input =
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
355
|
@rack_input = StringIO.new input
|
356
356
|
|
357
357
|
sanitize_form_data do |sanitized_input|
|
358
358
|
sanitized_input.encoding.should == Encoding::UTF_8
|
359
359
|
sanitized_input.should.be.valid_encoding
|
360
|
-
sanitized_input.should == "foo=bla&quux=bar"
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
361
|
end
|
362
362
|
end
|
363
363
|
|
364
364
|
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
365
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
-
input =
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
367
|
@rack_input = StringIO.new input
|
368
368
|
|
369
369
|
sanitize_form_data do |sanitized_input|
|
370
370
|
sanitized_input.encoding.should == Encoding::UTF_8
|
371
371
|
sanitized_input.should.be.valid_encoding
|
372
|
-
sanitized_input.should == "foo=bla&quux=bar"
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
373
|
end
|
374
374
|
end
|
375
375
|
|
@@ -392,6 +392,16 @@ describe Rack::UTF8Sanitizer do
|
|
392
392
|
sanitize_form_data
|
393
393
|
end
|
394
394
|
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
395
405
|
end
|
396
406
|
|
397
407
|
describe "with custom content-type" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
115
|
+
rubygems_version: 3.3.15
|
116
116
|
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|