rack-utf8_sanitizer 1.9.1 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
- data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
3
+ metadata.gz: c33079dde3e7e3efb8c46742a50303be620b316243b5e11c5f026a89ce29bf7d
4
+ data.tar.gz: 537ff74a7f0c3edfe1bc3904540ae3c95c4ed08f15198a8872bf25a055376673
5
5
  SHA512:
6
- metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
- data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
6
+ metadata.gz: 787fbd5b17de52dbd26bcef3e64acb359d026eb6eeb3f5900fa0fe641980235f795c0fa2067693ab2630b2781e8d8a9c03fda141e7ad2fbca6c65f663b571795
7
+ data.tar.gz: be57486cc8be56299013bf3b101363702a8bc2af26746293e6661dd762b11737a5ae76c1f14373d5200772a3e229b19c7687f3c1855fc1950256cf4e750e3ca8
@@ -10,10 +10,10 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", "3.3", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
14
 
15
15
  steps:
16
- - uses: actions/checkout@v3
16
+ - uses: actions/checkout@v4
17
17
  - name: Set up Ruby
18
18
  uses: ruby/setup-ruby@v1
19
19
  with:
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in rack-utf8_sanitizer.gemspec
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
 
3
5
  task :default => :spec
@@ -1,12 +1,13 @@
1
1
  # encoding: ascii-8bit
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'uri'
4
5
  require 'stringio'
6
+ require 'rack/request'
5
7
 
6
8
  module Rack
7
9
  class UTF8Sanitizer
8
10
  StringIO = ::StringIO
9
- BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
11
  NULL_BYTE_REGEX = /\x00/.freeze
11
12
 
12
13
  class NullByteInString < StandardError; end
@@ -27,7 +28,7 @@ module Rack
27
28
  begin
28
29
  env = sanitize(env)
29
30
  rescue EOFError
30
- return BAD_REQUEST
31
+ return [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
31
32
  end
32
33
  @app.call(env)
33
34
  end
@@ -48,7 +49,7 @@ module Rack
48
49
  input.
49
50
  force_encoding(Encoding::ASCII_8BIT).
50
51
  encode!(Encoding::UTF_8)
51
- if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ if sanitize_null_bytes && NULL_BYTE_REGEX.match?(input)
52
53
  raise NullByteInString
53
54
  end
54
55
  input
@@ -64,20 +65,20 @@ module Rack
64
65
  ORIGINAL_FULLPATH
65
66
  ORIGINAL_SCRIPT_NAME
66
67
  SERVER_NAME
67
- ).map(&:freeze).freeze
68
+ ).freeze
68
69
 
69
70
  SANITIZABLE_CONTENT_TYPES = %w(
70
71
  text/plain
71
72
  application/x-www-form-urlencoded
72
73
  application/json
73
74
  text/javascript
74
- ).map(&:freeze).freeze
75
+ ).freeze
75
76
 
76
77
  URI_ENCODED_CONTENT_TYPES = %w(
77
78
  application/x-www-form-urlencoded
78
- ).map(&:freeze).freeze
79
+ ).freeze
79
80
 
80
- HTTP_ = 'HTTP_'.freeze
81
+ HTTP_ = 'HTTP_'
81
82
 
82
83
  def sanitize(env)
83
84
  sanitize_rack_input(env)
@@ -115,17 +116,17 @@ module Rack
115
116
  end
116
117
 
117
118
  def sanitize_rack_input(env)
118
- # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
119
- # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
120
- # Ignoring charset in content type.
121
- content_type = env['CONTENT_TYPE']
122
- content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
123
- content_type &&= content_type.downcase
119
+ request = Rack::Request.new(env)
120
+ content_type = request.media_type
124
121
  return unless @sanitizable_content_types.any? {|type| content_type == type }
122
+
123
+ charset = request.content_charset
124
+ return if charset && charset.downcase != 'utf-8'
125
+
125
126
  uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
126
127
 
127
128
  if env['rack.input']
128
- sanitized_input = sanitize_io(env['rack.input'], uri_encoded)
129
+ sanitized_input = sanitize_io(env['rack.input'], uri_encoded, env['CONTENT_LENGTH']&.to_i)
129
130
 
130
131
  env['rack.input'] = sanitized_input
131
132
  env['CONTENT_LENGTH'] &&= sanitized_input.size.to_s
@@ -167,8 +168,12 @@ module Rack
167
168
  end
168
169
  end
169
170
 
170
- def sanitize_io(io, uri_encoded = false)
171
- input = io.read
171
+ def sanitize_io(io, uri_encoded = false, content_length = nil)
172
+ input = if content_length && content_length >= 0
173
+ io.read(content_length)
174
+ else
175
+ io.read
176
+ end
172
177
  sanitized_input = sanitize_string(strip_byte_order_mark(input))
173
178
  if uri_encoded
174
179
  sanitized_input = sanitize_uri_encoded_string(sanitized_input).
@@ -251,7 +256,7 @@ module Rack
251
256
  # Performs the reverse function of `unescape_unreserved`. Unlike
252
257
  # the previous function, we can reuse the logic in URI#encode
253
258
  def escape_unreserved(input)
254
- URI::DEFAULT_PARSER.escape(input, UNSAFE)
259
+ URI::RFC2396_PARSER.escape(input, UNSAFE)
255
260
  end
256
261
 
257
262
  def sanitize_string(input)
@@ -276,7 +281,7 @@ module Rack
276
281
  end
277
282
  end
278
283
 
279
- UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
284
+ UTF8_BOM = "\xef\xbb\xbf".dup.force_encoding(Encoding::BINARY).freeze
280
285
  UTF8_BOM_SIZE = UTF8_BOM.bytesize
281
286
 
282
287
  def strip_byte_order_mark(input)
@@ -1,21 +1,22 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
 
3
4
  Gem::Specification.new do |gem|
4
5
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.9.1'
6
- gem.authors = ["whitequark"]
6
+ gem.version = '1.10.0'
7
+ gem.authors = ["Catherine"]
7
8
  gem.license = "MIT"
8
9
  gem.email = ["whitequark@whitequark.org"]
9
- gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
10
- %{invalid UTF8 characters in request URI and headers.}
10
+ gem.description = "Rack::UTF8Sanitizer is a Rack middleware which cleans up " \
11
+ "invalid UTF8 characters in request URI and headers."
11
12
  gem.summary = gem.description
12
- gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
+ gem.homepage = "https://github.com/whitequark/rack-utf8_sanitizer"
13
14
 
14
15
  gem.files = `git ls-files`.split($/)
15
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
17
  gem.require_paths = ["lib"]
17
18
 
18
- gem.required_ruby_version = '>= 1.9.3'
19
+ gem.required_ruby_version = '>= 2.3'
19
20
 
20
21
  gem.add_dependency "rack", '>= 1.0', '< 4.0'
21
22
 
@@ -1,4 +1,5 @@
1
1
  # encoding:ascii-8bit
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'bacon/colored_output'
4
5
  require 'cgi'
@@ -31,7 +32,7 @@ describe Rack::UTF8Sanitizer do
31
32
 
32
33
  describe "with invalid host input" do
33
34
  it "sanitizes host entity (SERVER_NAME)" do
34
- host = "host\xD0".force_encoding('UTF-8')
35
+ host = "host\xD0".dup.force_encoding('UTF-8')
35
36
  env = @app.({ "SERVER_NAME" => host })
36
37
  result = env["SERVER_NAME"]
37
38
 
@@ -42,8 +43,8 @@ describe Rack::UTF8Sanitizer do
42
43
 
43
44
  describe "with invalid UTF-8 input" do
44
45
  before do
45
- @plain_input = "foo\xe0".force_encoding('UTF-8')
46
- @uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
46
+ @plain_input = "foo\xe0".dup.force_encoding('UTF-8')
47
+ @uri_input = "http://bar/foo%E0".dup.force_encoding('UTF-8')
47
48
  end
48
49
 
49
50
  behaves_like :does_sanitize_plain
@@ -52,7 +53,7 @@ describe Rack::UTF8Sanitizer do
52
53
 
53
54
  describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
54
55
  before do
55
- @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
56
+ @uri_input = "http://bar/foo%E0\xe0".dup.force_encoding('UTF-8')
56
57
  end
57
58
 
58
59
  behaves_like :does_sanitize_uri
@@ -100,8 +101,8 @@ describe Rack::UTF8Sanitizer do
100
101
 
101
102
  describe "with valid UTF-8 input" do
102
103
  before do
103
- @plain_input = "foo bar лол".force_encoding('UTF-8')
104
- @uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
104
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
105
+ @uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
105
106
  end
106
107
 
107
108
  behaves_like :identity_plain
@@ -109,7 +110,7 @@ describe Rack::UTF8Sanitizer do
109
110
 
110
111
  describe "with URI characters from reserved range" do
111
112
  before do
112
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
113
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
113
114
  end
114
115
 
115
116
  behaves_like :identity_uri
@@ -118,7 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
 
119
120
  describe "with valid, not percent-encoded UTF-8 URI input" do
120
121
  before do
121
- @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @uri_input = "http://bar/foo+bar+лол".dup.force_encoding('UTF-8')
122
123
  @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
123
124
  end
124
125
 
@@ -152,8 +153,8 @@ describe Rack::UTF8Sanitizer do
152
153
 
153
154
  describe "with frozen strings" do
154
155
  before do
155
- @plain_input = "bar baz".freeze
156
- @uri_input = "http://bar/bar+baz".freeze
156
+ @plain_input = "bar baz"
157
+ @uri_input = "http://bar/bar+baz"
157
158
  end
158
159
 
159
160
  it "preserves the frozen? status of input" do
@@ -165,9 +166,24 @@ describe Rack::UTF8Sanitizer do
165
166
  end
166
167
  end
167
168
 
169
+ describe "with mutable strings" do
170
+ before do
171
+ @plain_input = "bar baz".dup
172
+ @uri_input = "http://bar/bar+baz".dup
173
+ end
174
+
175
+ it "preserves the frozen? status of input" do
176
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input,
177
+ "REQUEST_PATH" => @uri_input })
178
+
179
+ env["HTTP_USER_AGENT"].should.not.be.frozen
180
+ env["REQUEST_PATH"].should.not.be.frozen
181
+ end
182
+ end
183
+
168
184
  describe "with symbols in the env" do
169
185
  before do
170
- @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
186
+ @uri_input = "http://bar/foo%E0\xe0".dup.force_encoding('UTF-8')
171
187
  end
172
188
 
173
189
  it "sanitizes REQUEST_PATH with invalid UTF-8 URI input" do
@@ -183,7 +199,7 @@ describe Rack::UTF8Sanitizer do
183
199
 
184
200
  describe "with form data" do
185
201
  def request_env
186
- @plain_input = "foo bar лол".force_encoding('UTF-8')
202
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
187
203
  {
188
204
  "REQUEST_METHOD" => "POST",
189
205
  "CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
@@ -193,7 +209,7 @@ describe Rack::UTF8Sanitizer do
193
209
  end
194
210
 
195
211
  def sanitize_form_data(request_env = request_env())
196
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
212
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
197
213
  @response_env = @app.(request_env)
198
214
  sanitized_input = @response_env['rack.input'].read
199
215
 
@@ -219,6 +235,16 @@ describe Rack::UTF8Sanitizer do
219
235
  @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
236
  end
221
237
 
238
+ it "Bad Request response can safety be mutated" do
239
+ @rack_input = BrokenIO.new
240
+ response_env = @app.(request_env)
241
+ response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
242
+ response_env[1]["Set-Cookie"] = "you_are_admin"
243
+
244
+ response_env = @app.(request_env)
245
+ response_env[1]["Set-Cookie"].should == nil
246
+ end
247
+
222
248
  it "sanitizes StringIO rack.input" do
223
249
  input = "foo=bla&quux=bar"
224
250
  @rack_input = StringIO.new input
@@ -252,6 +278,18 @@ describe Rack::UTF8Sanitizer do
252
278
  end
253
279
  end
254
280
 
281
+ it "sanitizes the rack body if the charset is present and utf-8" do
282
+ input = "name=#{CGI.escape("まつもと")}"
283
+ @rack_input = StringIO.new input
284
+
285
+ env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=utf-8")
286
+ sanitize_form_data(env) do |sanitized_input|
287
+ sanitized_input.encoding.should == Encoding::UTF_8
288
+ sanitized_input.should.be.valid_encoding
289
+ sanitized_input.should == input
290
+ end
291
+ end
292
+
255
293
  it "strip UTF-8 BOM from StringIO rack.input" do
256
294
  input = %(\xef\xbb\xbf{"Hello": "World"})
257
295
  @rack_input = StringIO.new input
@@ -327,6 +365,18 @@ describe Rack::UTF8Sanitizer do
327
365
  end
328
366
  end
329
367
 
368
+ it "does not sanitize the rack body if the charset is present and not utf-8" do
369
+ input = "name=".encode("Shift_JIS") + CGI.escape("まつもと".encode("Shift_JIS", "UTF-8"))
370
+ @rack_input = StringIO.new input
371
+
372
+ env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=Shift_JIS")
373
+ sanitize_form_data(env) do |sanitized_input|
374
+ sanitized_input.encoding.should == Encoding::SHIFT_JIS
375
+ sanitized_input.should.be.valid_encoding
376
+ sanitized_input.should == input
377
+ end
378
+ end
379
+
330
380
  it "adjusts content-length when replacing input" do
331
381
  input = "foo=bla&quux=bar\xED"
332
382
  @rack_input = StringIO.new input
@@ -434,7 +484,7 @@ describe Rack::UTF8Sanitizer do
434
484
 
435
485
  describe "with custom content-type" do
436
486
  def request_env
437
- @plain_input = "foo bar лол".force_encoding('UTF-8')
487
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
438
488
  {
439
489
  "REQUEST_METHOD" => "POST",
440
490
  "CONTENT_TYPE" => "application/vnd.api+json",
@@ -444,7 +494,7 @@ describe Rack::UTF8Sanitizer do
444
494
  end
445
495
 
446
496
  def sanitize_data(request_env = request_env())
447
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
497
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
448
498
  @response_env = @app.(request_env)
449
499
  sanitized_input = @response_env['rack.input'].read
450
500
 
@@ -518,7 +568,7 @@ describe Rack::UTF8Sanitizer do
518
568
 
519
569
  describe "with only and/or except options" do
520
570
  before do
521
- @plain_input = "foo\xe0".force_encoding('UTF-8')
571
+ @plain_input = "foo\xe0".dup.force_encoding('UTF-8')
522
572
  end
523
573
 
524
574
  def request_env
@@ -575,7 +625,7 @@ describe Rack::UTF8Sanitizer do
575
625
 
576
626
  describe "with custom strategy" do
577
627
  def request_env
578
- @plain_input = "foo bar лол".force_encoding('UTF-8')
628
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
579
629
  {
580
630
  "REQUEST_METHOD" => "POST",
581
631
  "CONTENT_TYPE" => "application/json",
@@ -585,7 +635,7 @@ describe Rack::UTF8Sanitizer do
585
635
  end
586
636
 
587
637
  def sanitize_data(request_env = request_env())
588
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
638
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
589
639
  @response_env = @app.(request_env)
590
640
  sanitized_input = @response_env['rack.input'].read
591
641
 
@@ -619,7 +669,7 @@ describe Rack::UTF8Sanitizer do
619
669
  it "accepts a proc as a strategy" do
620
670
  truncate = -> (input, sanitize_null_bytes:) do
621
671
  sanitize_null_bytes.should == false
622
- 'replace'.force_encoding(Encoding::UTF_8)
672
+ "replace".dup.force_encoding(Encoding::UTF_8)
623
673
  end
624
674
 
625
675
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
@@ -638,7 +688,7 @@ describe Rack::UTF8Sanitizer do
638
688
  it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
639
689
  truncate = -> (input, sanitize_null_bytes:) do
640
690
  sanitize_null_bytes.should == true
641
- 'replace'.force_encoding(Encoding::UTF_8)
691
+ "replace".dup.force_encoding(Encoding::UTF_8)
642
692
  end
643
693
 
644
694
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
- - whitequark
7
+ - Catherine
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-30 00:00:00.000000000 Z
11
+ date: 2025-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -93,7 +93,7 @@ files:
93
93
  - lib/rack/utf8_sanitizer.rb
94
94
  - rack-utf8_sanitizer.gemspec
95
95
  - test/test_utf8_sanitizer.rb
96
- homepage: http://github.com/whitequark/rack-utf8_sanitizer
96
+ homepage: https://github.com/whitequark/rack-utf8_sanitizer
97
97
  licenses:
98
98
  - MIT
99
99
  metadata: {}
@@ -105,7 +105,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
105
105
  requirements:
106
106
  - - ">="
107
107
  - !ruby/object:Gem::Version
108
- version: 1.9.3
108
+ version: '2.3'
109
109
  required_rubygems_version: !ruby/object:Gem::Requirement
110
110
  requirements:
111
111
  - - ">="