rack-utf8_sanitizer 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/rack/utf8_sanitizer.rb +8 -6
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +14 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
|
4
|
+
data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
|
7
|
+
data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
|
data/README.md
CHANGED
@@ -113,7 +113,7 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
|
113
113
|
```
|
114
114
|
|
115
115
|
```ruby
|
116
|
-
replace_string = lambda do |_invalid|
|
116
|
+
replace_string = lambda do |_invalid, sanitize_null_bytes: false|
|
117
117
|
Rails.logger.warn('Replacing invalid string')
|
118
118
|
|
119
119
|
'<Bad Encoding>'.freeze
|
@@ -122,6 +122,10 @@ end
|
|
122
122
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
123
123
|
```
|
124
124
|
|
125
|
+
### Sanitizing Null Bytes
|
126
|
+
|
127
|
+
While null bytes are valid UTF-8, it can be useful to further restrict the valid character set to exclude null bytes. For example, PostgreSQL text columns do not allow storing null bytes. Passing `sanitize_null_bytes: true` in the configuration hash enables sanitizing null bytes, and the two built-in strategies both support this feature. Custom strategies should accept a keyword argument `sanitize_null_bytes` containing this configuration value.
|
128
|
+
|
125
129
|
## Contributing
|
126
130
|
|
127
131
|
1. Fork it
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -34,22 +34,24 @@ module Rack
|
|
34
34
|
|
35
35
|
DEFAULT_STRATEGIES = {
|
36
36
|
replace: lambda do |input, sanitize_null_bytes: false|
|
37
|
-
if sanitize_null_bytes
|
38
|
-
input = input.gsub(NULL_BYTE_REGEX, "")
|
39
|
-
end
|
40
37
|
input.
|
41
38
|
force_encoding(Encoding::ASCII_8BIT).
|
42
39
|
encode!(Encoding::UTF_8,
|
43
40
|
invalid: :replace,
|
44
41
|
undef: :replace)
|
42
|
+
if sanitize_null_bytes
|
43
|
+
input = input.gsub(NULL_BYTE_REGEX, "")
|
44
|
+
end
|
45
|
+
input
|
45
46
|
end,
|
46
47
|
exception: lambda do |input, sanitize_null_bytes: false|
|
47
|
-
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
48
|
-
raise NullByteInString
|
49
|
-
end
|
50
48
|
input.
|
51
49
|
force_encoding(Encoding::ASCII_8BIT).
|
52
50
|
encode!(Encoding::UTF_8)
|
51
|
+
if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
|
52
|
+
raise NullByteInString
|
53
|
+
end
|
54
|
+
input
|
53
55
|
end
|
54
56
|
}.freeze
|
55
57
|
|
data/rack-utf8_sanitizer.gemspec
CHANGED
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -351,25 +351,25 @@ describe Rack::UTF8Sanitizer do
|
|
351
351
|
|
352
352
|
it "optionally sanitizes null bytes with the replace strategy" do
|
353
353
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
354
|
-
input =
|
354
|
+
input = "foo=bla\xED&quux=bar\x00"
|
355
355
|
@rack_input = StringIO.new input
|
356
356
|
|
357
357
|
sanitize_form_data do |sanitized_input|
|
358
358
|
sanitized_input.encoding.should == Encoding::UTF_8
|
359
359
|
sanitized_input.should.be.valid_encoding
|
360
|
-
sanitized_input.should == "foo=bla&quux=bar"
|
360
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
361
361
|
end
|
362
362
|
end
|
363
363
|
|
364
364
|
it "optionally sanitizes encoded null bytes with the replace strategy" do
|
365
365
|
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true)
|
366
|
-
input =
|
366
|
+
input = "foo=bla%ED&quux=bar%00"
|
367
367
|
@rack_input = StringIO.new input
|
368
368
|
|
369
369
|
sanitize_form_data do |sanitized_input|
|
370
370
|
sanitized_input.encoding.should == Encoding::UTF_8
|
371
371
|
sanitized_input.should.be.valid_encoding
|
372
|
-
sanitized_input.should == "foo=bla&quux=bar"
|
372
|
+
sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar"
|
373
373
|
end
|
374
374
|
end
|
375
375
|
|
@@ -392,6 +392,16 @@ describe Rack::UTF8Sanitizer do
|
|
392
392
|
sanitize_form_data
|
393
393
|
end
|
394
394
|
end
|
395
|
+
|
396
|
+
it "gives precedence to encoding errors with the exception strategy and null byte sanitisation" do
|
397
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: :exception)
|
398
|
+
input = "foo=bla\x00&quux=bar\xED"
|
399
|
+
@rack_input = StringIO.new input
|
400
|
+
|
401
|
+
should.raise(EncodingError) do
|
402
|
+
sanitize_form_data
|
403
|
+
end
|
404
|
+
end
|
395
405
|
end
|
396
406
|
|
397
407
|
describe "with custom content-type" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
115
|
+
rubygems_version: 3.3.15
|
116
116
|
signing_key:
|
117
117
|
specification_version: 4
|
118
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|