rack-utf8_sanitizer 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +4 -0
- data/README.md +6 -2
- data/lib/rack/utf8_sanitizer.rb +40 -2
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +24 -8
- metadata +2 -2
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -18,11 +18,15 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
For Rails, add this to your `application.rb`:
|
20
20
|
|
21
|
-
|
21
|
+
``` ruby
|
22
|
+
config.middleware.insert_before "Rack::Lock", Rack::UTF8Sanitizer
|
23
|
+
```
|
22
24
|
|
23
25
|
For Rack apps, add this to `config.ru`:
|
24
26
|
|
25
|
-
|
27
|
+
``` ruby
|
28
|
+
use Rack::UTF8Sanitizer
|
29
|
+
```
|
26
30
|
|
27
31
|
## Usage
|
28
32
|
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
|
3
5
|
module Rack
|
@@ -30,12 +32,13 @@ module Rack
|
|
30
32
|
#
|
31
33
|
# The result is guaranteed to be UTF-8-safe.
|
32
34
|
|
33
|
-
decoded_value =
|
35
|
+
decoded_value = unescape_unreserved(
|
34
36
|
sanitize_string(value).
|
35
37
|
force_encoding('ASCII-8BIT'))
|
36
38
|
|
37
39
|
env[key] = transfer_frozen(value,
|
38
|
-
|
40
|
+
escape_unreserved(
|
41
|
+
sanitize_string(decoded_value)))
|
39
42
|
|
40
43
|
elsif key =~ /^HTTP_/
|
41
44
|
# Just sanitize the headers and leave them in UTF-8. There is
|
@@ -49,6 +52,41 @@ module Rack
|
|
49
52
|
|
50
53
|
protected
|
51
54
|
|
55
|
+
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
56
|
+
# plus all multibyte UTF-8 characters.
|
57
|
+
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
|
58
|
+
|
59
|
+
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
60
|
+
# protected during normalization (which is what UTF8Sanitizer does).
|
61
|
+
#
|
62
|
+
# However, the regexp approach used by URI.unescape is not sophisticated
|
63
|
+
# enough for our task.
|
64
|
+
def unescape_unreserved(input)
|
65
|
+
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
66
|
+
decoded = [$1.hex].pack('C')
|
67
|
+
|
68
|
+
if decoded =~ UNRESERVED_OR_UTF8
|
69
|
+
decoded
|
70
|
+
else
|
71
|
+
encoded
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# This regexp matches unsafe characters, i.e. everything except 'reserved'
|
77
|
+
# and 'unreserved' characters from RFC3986 (2.3), and additionally '%',
|
78
|
+
# as percent-encoded unreserved characters could be left over from the
|
79
|
+
# `unescape_unreserved` invocation.
|
80
|
+
#
|
81
|
+
# See also URI::REGEXP::PATTERN::{UNRESERVED,RESERVED}.
|
82
|
+
UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/
|
83
|
+
|
84
|
+
# Performs the reverse function of `unescape_unreserved`. Unlike
|
85
|
+
# the previous function, we can reuse the logic in URI#escape.
|
86
|
+
def escape_unreserved(input)
|
87
|
+
URI.escape(input, UNSAFE)
|
88
|
+
end
|
89
|
+
|
52
90
|
def sanitize_string(input)
|
53
91
|
if input.is_a? String
|
54
92
|
input = input.dup.force_encoding('UTF-8')
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.1.0'
|
6
6
|
gem.authors = ["Peter Zotov"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -31,7 +31,7 @@ describe Rack::UTF8Sanitizer do
|
|
31
31
|
describe "with invalid UTF-8 input" do
|
32
32
|
before do
|
33
33
|
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
34
|
-
@uri_input = "foo%E0".force_encoding('UTF-8')
|
34
|
+
@uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
|
35
35
|
end
|
36
36
|
|
37
37
|
behaves_like :does_sanitize_plain
|
@@ -40,7 +40,7 @@ describe Rack::UTF8Sanitizer do
|
|
40
40
|
|
41
41
|
describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
|
42
42
|
before do
|
43
|
-
@uri_input = "foo%E0\xe0".force_encoding('UTF-8')
|
43
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
44
44
|
end
|
45
45
|
|
46
46
|
behaves_like :does_sanitize_uri
|
@@ -49,7 +49,7 @@ describe Rack::UTF8Sanitizer do
|
|
49
49
|
describe "with invalid ASCII-8BIT input" do
|
50
50
|
before do
|
51
51
|
@plain_input = "foo\xe0"
|
52
|
-
@uri_input = "foo%E0"
|
52
|
+
@uri_input = "http://bar/foo%E0"
|
53
53
|
end
|
54
54
|
|
55
55
|
behaves_like :does_sanitize_plain
|
@@ -58,7 +58,7 @@ describe Rack::UTF8Sanitizer do
|
|
58
58
|
|
59
59
|
describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
|
60
60
|
before do
|
61
|
-
@uri_input = "foo%E0\xe0"
|
61
|
+
@uri_input = "http://bar/foo%E0\xe0"
|
62
62
|
end
|
63
63
|
|
64
64
|
behaves_like :does_sanitize_uri
|
@@ -89,16 +89,24 @@ describe Rack::UTF8Sanitizer do
|
|
89
89
|
describe "with valid UTF-8 input" do
|
90
90
|
before do
|
91
91
|
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
92
|
-
@uri_input = "foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
92
|
+
@uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
93
93
|
end
|
94
94
|
|
95
95
|
behaves_like :identity_plain
|
96
96
|
behaves_like :identity_uri
|
97
|
+
|
98
|
+
describe "with URI characters from reserved range" do
|
99
|
+
before do
|
100
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
101
|
+
end
|
102
|
+
|
103
|
+
behaves_like :identity_uri
|
104
|
+
end
|
97
105
|
end
|
98
106
|
|
99
107
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
100
108
|
before do
|
101
|
-
@uri_input = "foo+bar+лол".force_encoding('UTF-8')
|
109
|
+
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
102
110
|
end
|
103
111
|
|
104
112
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -114,17 +122,25 @@ describe Rack::UTF8Sanitizer do
|
|
114
122
|
describe "with valid ASCII-8BIT input" do
|
115
123
|
before do
|
116
124
|
@plain_input = "bar baz"
|
117
|
-
@uri_input = "bar+baz"
|
125
|
+
@uri_input = "http://bar/bar+baz"
|
118
126
|
end
|
119
127
|
|
120
128
|
behaves_like :identity_plain
|
121
129
|
behaves_like :identity_uri
|
130
|
+
|
131
|
+
describe "with URI characters from reserved range" do
|
132
|
+
before do
|
133
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB"
|
134
|
+
end
|
135
|
+
|
136
|
+
behaves_like :identity_uri
|
137
|
+
end
|
122
138
|
end
|
123
139
|
|
124
140
|
describe "with frozen strings" do
|
125
141
|
before do
|
126
142
|
@plain_input = "bar baz".freeze
|
127
|
-
@uri_input = "bar+baz".freeze
|
143
|
+
@uri_input = "http://bar/bar+baz".freeze
|
128
144
|
end
|
129
145
|
|
130
146
|
it "preserves the frozen? status of input" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rack
|