rack-utf8_sanitizer 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +4 -0
- data/README.md +6 -2
- data/lib/rack/utf8_sanitizer.rb +40 -2
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +24 -8
- metadata +2 -2
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -18,11 +18,15 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
For Rails, add this to your `application.rb`:
|
20
20
|
|
21
|
-
|
21
|
+
``` ruby
|
22
|
+
config.middleware.insert_before "Rack::Lock", Rack::UTF8Sanitizer
|
23
|
+
```
|
22
24
|
|
23
25
|
For Rack apps, add this to `config.ru`:
|
24
26
|
|
25
|
-
|
27
|
+
``` ruby
|
28
|
+
use Rack::UTF8Sanitizer
|
29
|
+
```
|
26
30
|
|
27
31
|
## Usage
|
28
32
|
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
|
3
5
|
module Rack
|
@@ -30,12 +32,13 @@ module Rack
|
|
30
32
|
#
|
31
33
|
# The result is guaranteed to be UTF-8-safe.
|
32
34
|
|
33
|
-
decoded_value =
|
35
|
+
decoded_value = unescape_unreserved(
|
34
36
|
sanitize_string(value).
|
35
37
|
force_encoding('ASCII-8BIT'))
|
36
38
|
|
37
39
|
env[key] = transfer_frozen(value,
|
38
|
-
|
40
|
+
escape_unreserved(
|
41
|
+
sanitize_string(decoded_value)))
|
39
42
|
|
40
43
|
elsif key =~ /^HTTP_/
|
41
44
|
# Just sanitize the headers and leave them in UTF-8. There is
|
@@ -49,6 +52,41 @@ module Rack
|
|
49
52
|
|
50
53
|
protected
|
51
54
|
|
55
|
+
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
56
|
+
# plus all multibyte UTF-8 characters.
|
57
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
|
58
|
+
|
59
|
+
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
60
|
+
# protected during normalization (which is what UTF8Sanitizer does).
|
61
|
+
#
|
62
|
+
# However, the regexp approach used by URI.unescape is not sophisticated
|
63
|
+
# enough for our task.
|
64
|
+
def unescape_unreserved(input)
|
65
|
+
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
66
|
+
decoded = [$1.hex].pack('C')
|
67
|
+
|
68
|
+
if decoded =~ UNRESERVED_OR_UTF8
|
69
|
+
decoded
|
70
|
+
else
|
71
|
+
encoded
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# This regexp matches unsafe characters, i.e. everything except 'reserved'
|
77
|
+
# and 'unreserved' characters from RFC3986 (2.3), and additionally '%',
|
78
|
+
# as percent-encoded unreserved characters could be left over from the
|
79
|
+
# `unescape_unreserved` invocation.
|
80
|
+
#
|
81
|
+
# See also URI::REGEXP::PATTERN::{UNRESERVED,RESERVED}.
|
82
|
+
UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/
|
83
|
+
|
84
|
+
# Performs the reverse function of `unescape_unreserved`. Unlike
|
85
|
+
# the previous function, we can reuse the logic in URI#escape.
|
86
|
+
def escape_unreserved(input)
|
87
|
+
URI.escape(input, UNSAFE)
|
88
|
+
end
|
89
|
+
|
52
90
|
def sanitize_string(input)
|
53
91
|
if input.is_a? String
|
54
92
|
input = input.dup.force_encoding('UTF-8')
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.1.0'
|
6
6
|
gem.authors = ["Peter Zotov"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -31,7 +31,7 @@ describe Rack::UTF8Sanitizer do
|
|
31
31
|
describe "with invalid UTF-8 input" do
|
32
32
|
before do
|
33
33
|
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
34
|
-
@uri_input = "foo%E0".force_encoding('UTF-8')
|
34
|
+
@uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
|
35
35
|
end
|
36
36
|
|
37
37
|
behaves_like :does_sanitize_plain
|
@@ -40,7 +40,7 @@ describe Rack::UTF8Sanitizer do
|
|
40
40
|
|
41
41
|
describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
|
42
42
|
before do
|
43
|
-
@uri_input = "foo%E0\xe0".force_encoding('UTF-8')
|
43
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
44
44
|
end
|
45
45
|
|
46
46
|
behaves_like :does_sanitize_uri
|
@@ -49,7 +49,7 @@ describe Rack::UTF8Sanitizer do
|
|
49
49
|
describe "with invalid ASCII-8BIT input" do
|
50
50
|
before do
|
51
51
|
@plain_input = "foo\xe0"
|
52
|
-
@uri_input = "foo%E0"
|
52
|
+
@uri_input = "http://bar/foo%E0"
|
53
53
|
end
|
54
54
|
|
55
55
|
behaves_like :does_sanitize_plain
|
@@ -58,7 +58,7 @@ describe Rack::UTF8Sanitizer do
|
|
58
58
|
|
59
59
|
describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
|
60
60
|
before do
|
61
|
-
@uri_input = "foo%E0\xe0"
|
61
|
+
@uri_input = "http://bar/foo%E0\xe0"
|
62
62
|
end
|
63
63
|
|
64
64
|
behaves_like :does_sanitize_uri
|
@@ -89,16 +89,24 @@ describe Rack::UTF8Sanitizer do
|
|
89
89
|
describe "with valid UTF-8 input" do
|
90
90
|
before do
|
91
91
|
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
92
|
-
@uri_input = "foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
92
|
+
@uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
93
93
|
end
|
94
94
|
|
95
95
|
behaves_like :identity_plain
|
96
96
|
behaves_like :identity_uri
|
97
|
+
|
98
|
+
describe "with URI characters from reserved range" do
|
99
|
+
before do
|
100
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
101
|
+
end
|
102
|
+
|
103
|
+
behaves_like :identity_uri
|
104
|
+
end
|
97
105
|
end
|
98
106
|
|
99
107
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
100
108
|
before do
|
101
|
-
@uri_input = "foo+bar+лол".force_encoding('UTF-8')
|
109
|
+
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
102
110
|
end
|
103
111
|
|
104
112
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -114,17 +122,25 @@ describe Rack::UTF8Sanitizer do
|
|
114
122
|
describe "with valid ASCII-8BIT input" do
|
115
123
|
before do
|
116
124
|
@plain_input = "bar baz"
|
117
|
-
@uri_input = "bar+baz"
|
125
|
+
@uri_input = "http://bar/bar+baz"
|
118
126
|
end
|
119
127
|
|
120
128
|
behaves_like :identity_plain
|
121
129
|
behaves_like :identity_uri
|
130
|
+
|
131
|
+
describe "with URI characters from reserved range" do
|
132
|
+
before do
|
133
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB"
|
134
|
+
end
|
135
|
+
|
136
|
+
behaves_like :identity_uri
|
137
|
+
end
|
122
138
|
end
|
123
139
|
|
124
140
|
describe "with frozen strings" do
|
125
141
|
before do
|
126
142
|
@plain_input = "bar baz".freeze
|
127
|
-
@uri_input = "bar+baz".freeze
|
143
|
+
@uri_input = "http://bar/bar+baz".freeze
|
128
144
|
end
|
129
145
|
|
130
146
|
it "preserves the frozen? status of input" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rack
|