rack-utf8_sanitizer 1.9.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7825c2fec2176e38043c4d7a3c1fcbe1cf112bcc7a17a7ef42b249fab30118c4
4
- data.tar.gz: 5090e3c92af9a74377d559be48685d343b29315e5a9ce0f76faf36a8b96437ee
3
+ metadata.gz: c33079dde3e7e3efb8c46742a50303be620b316243b5e11c5f026a89ce29bf7d
4
+ data.tar.gz: 537ff74a7f0c3edfe1bc3904540ae3c95c4ed08f15198a8872bf25a055376673
5
5
  SHA512:
6
- metadata.gz: e20607b2c412ecfb3d2ba719a7d0aeb381cc4f685e08c6a7801fb2b60a0993c71cc5c219b9ff17cedb497f7d5d0ee907da94ab91476960143f25c704058f1ebc
7
- data.tar.gz: 7df7e1d357a6d3b12f089c1d7fea0a55eeb31d2d8f7e3d2b2e2e8729c1ae21c6260a9eb370e8c54e6886344986b1dd436d55b404f7321263d6bbf120115d1788
6
+ metadata.gz: 787fbd5b17de52dbd26bcef3e64acb359d026eb6eeb3f5900fa0fe641980235f795c0fa2067693ab2630b2781e8d8a9c03fda141e7ad2fbca6c65f663b571795
7
+ data.tar.gz: be57486cc8be56299013bf3b101363702a8bc2af26746293e6661dd762b11737a5ae76c1f14373d5200772a3e229b19c7687f3c1855fc1950256cf4e750e3ca8
@@ -10,10 +10,10 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", "3.3", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
14
 
15
15
  steps:
16
- - uses: actions/checkout@v3
16
+ - uses: actions/checkout@v4
17
17
  - name: Set up Ruby
18
18
  uses: ruby/setup-ruby@v1
19
19
  with:
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in rack-utf8_sanitizer.gemspec
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
 
3
5
  task :default => :spec
@@ -1,12 +1,13 @@
1
1
  # encoding: ascii-8bit
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'uri'
4
5
  require 'stringio'
6
+ require 'rack/request'
5
7
 
6
8
  module Rack
7
9
  class UTF8Sanitizer
8
10
  StringIO = ::StringIO
9
- BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
10
11
  NULL_BYTE_REGEX = /\x00/.freeze
11
12
 
12
13
  class NullByteInString < StandardError; end
@@ -27,7 +28,7 @@ module Rack
27
28
  begin
28
29
  env = sanitize(env)
29
30
  rescue EOFError
30
- return BAD_REQUEST
31
+ return [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
31
32
  end
32
33
  @app.call(env)
33
34
  end
@@ -48,7 +49,7 @@ module Rack
48
49
  input.
49
50
  force_encoding(Encoding::ASCII_8BIT).
50
51
  encode!(Encoding::UTF_8)
51
- if sanitize_null_bytes && input =~ NULL_BYTE_REGEX
52
+ if sanitize_null_bytes && NULL_BYTE_REGEX.match?(input)
52
53
  raise NullByteInString
53
54
  end
54
55
  input
@@ -64,20 +65,20 @@ module Rack
64
65
  ORIGINAL_FULLPATH
65
66
  ORIGINAL_SCRIPT_NAME
66
67
  SERVER_NAME
67
- ).map(&:freeze).freeze
68
+ ).freeze
68
69
 
69
70
  SANITIZABLE_CONTENT_TYPES = %w(
70
71
  text/plain
71
72
  application/x-www-form-urlencoded
72
73
  application/json
73
74
  text/javascript
74
- ).map(&:freeze).freeze
75
+ ).freeze
75
76
 
76
77
  URI_ENCODED_CONTENT_TYPES = %w(
77
78
  application/x-www-form-urlencoded
78
- ).map(&:freeze).freeze
79
+ ).freeze
79
80
 
80
- HTTP_ = 'HTTP_'.freeze
81
+ HTTP_ = 'HTTP_'
81
82
 
82
83
  def sanitize(env)
83
84
  sanitize_rack_input(env)
@@ -115,17 +116,17 @@ module Rack
115
116
  end
116
117
 
117
118
  def sanitize_rack_input(env)
118
- # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
119
- # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
120
- # Ignoring charset in content type.
121
- content_type = env['CONTENT_TYPE']
122
- content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
123
- content_type &&= content_type.downcase
119
+ request = Rack::Request.new(env)
120
+ content_type = request.media_type
124
121
  return unless @sanitizable_content_types.any? {|type| content_type == type }
122
+
123
+ charset = request.content_charset
124
+ return if charset && charset.downcase != 'utf-8'
125
+
125
126
  uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
126
127
 
127
128
  if env['rack.input']
128
- sanitized_input = sanitize_io(env['rack.input'], uri_encoded)
129
+ sanitized_input = sanitize_io(env['rack.input'], uri_encoded, env['CONTENT_LENGTH']&.to_i)
129
130
 
130
131
  env['rack.input'] = sanitized_input
131
132
  env['CONTENT_LENGTH'] &&= sanitized_input.size.to_s
@@ -167,8 +168,12 @@ module Rack
167
168
  end
168
169
  end
169
170
 
170
- def sanitize_io(io, uri_encoded = false)
171
- input = io.read
171
+ def sanitize_io(io, uri_encoded = false, content_length = nil)
172
+ input = if content_length && content_length >= 0
173
+ io.read(content_length)
174
+ else
175
+ io.read
176
+ end
172
177
  sanitized_input = sanitize_string(strip_byte_order_mark(input))
173
178
  if uri_encoded
174
179
  sanitized_input = sanitize_uri_encoded_string(sanitized_input).
@@ -251,7 +256,7 @@ module Rack
251
256
  # Performs the reverse function of `unescape_unreserved`. Unlike
252
257
  # the previous function, we can reuse the logic in URI#encode
253
258
  def escape_unreserved(input)
254
- URI::DEFAULT_PARSER.escape(input, UNSAFE)
259
+ URI::RFC2396_PARSER.escape(input, UNSAFE)
255
260
  end
256
261
 
257
262
  def sanitize_string(input)
@@ -276,7 +281,7 @@ module Rack
276
281
  end
277
282
  end
278
283
 
279
- UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
284
+ UTF8_BOM = "\xef\xbb\xbf".dup.force_encoding(Encoding::BINARY).freeze
280
285
  UTF8_BOM_SIZE = UTF8_BOM.bytesize
281
286
 
282
287
  def strip_byte_order_mark(input)
@@ -1,21 +1,22 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
 
3
4
  Gem::Specification.new do |gem|
4
5
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.9.1'
6
- gem.authors = ["whitequark"]
6
+ gem.version = '1.10.0'
7
+ gem.authors = ["Catherine"]
7
8
  gem.license = "MIT"
8
9
  gem.email = ["whitequark@whitequark.org"]
9
- gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
10
- %{invalid UTF8 characters in request URI and headers.}
10
+ gem.description = "Rack::UTF8Sanitizer is a Rack middleware which cleans up " \
11
+ "invalid UTF8 characters in request URI and headers."
11
12
  gem.summary = gem.description
12
- gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
13
+ gem.homepage = "https://github.com/whitequark/rack-utf8_sanitizer"
13
14
 
14
15
  gem.files = `git ls-files`.split($/)
15
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
17
  gem.require_paths = ["lib"]
17
18
 
18
- gem.required_ruby_version = '>= 1.9.3'
19
+ gem.required_ruby_version = '>= 2.3'
19
20
 
20
21
  gem.add_dependency "rack", '>= 1.0', '< 4.0'
21
22
 
@@ -1,4 +1,5 @@
1
1
  # encoding:ascii-8bit
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'bacon/colored_output'
4
5
  require 'cgi'
@@ -31,7 +32,7 @@ describe Rack::UTF8Sanitizer do
31
32
 
32
33
  describe "with invalid host input" do
33
34
  it "sanitizes host entity (SERVER_NAME)" do
34
- host = "host\xD0".force_encoding('UTF-8')
35
+ host = "host\xD0".dup.force_encoding('UTF-8')
35
36
  env = @app.({ "SERVER_NAME" => host })
36
37
  result = env["SERVER_NAME"]
37
38
 
@@ -42,8 +43,8 @@ describe Rack::UTF8Sanitizer do
42
43
 
43
44
  describe "with invalid UTF-8 input" do
44
45
  before do
45
- @plain_input = "foo\xe0".force_encoding('UTF-8')
46
- @uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
46
+ @plain_input = "foo\xe0".dup.force_encoding('UTF-8')
47
+ @uri_input = "http://bar/foo%E0".dup.force_encoding('UTF-8')
47
48
  end
48
49
 
49
50
  behaves_like :does_sanitize_plain
@@ -52,7 +53,7 @@ describe Rack::UTF8Sanitizer do
52
53
 
53
54
  describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
54
55
  before do
55
- @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
56
+ @uri_input = "http://bar/foo%E0\xe0".dup.force_encoding('UTF-8')
56
57
  end
57
58
 
58
59
  behaves_like :does_sanitize_uri
@@ -100,8 +101,8 @@ describe Rack::UTF8Sanitizer do
100
101
 
101
102
  describe "with valid UTF-8 input" do
102
103
  before do
103
- @plain_input = "foo bar лол".force_encoding('UTF-8')
104
- @uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
104
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
105
+ @uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
105
106
  end
106
107
 
107
108
  behaves_like :identity_plain
@@ -109,7 +110,7 @@ describe Rack::UTF8Sanitizer do
109
110
 
110
111
  describe "with URI characters from reserved range" do
111
112
  before do
112
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
113
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
113
114
  end
114
115
 
115
116
  behaves_like :identity_uri
@@ -118,7 +119,7 @@ describe Rack::UTF8Sanitizer do
118
119
 
119
120
  describe "with valid, not percent-encoded UTF-8 URI input" do
120
121
  before do
121
- @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @uri_input = "http://bar/foo+bar+лол".dup.force_encoding('UTF-8')
122
123
  @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
123
124
  end
124
125
 
@@ -152,8 +153,8 @@ describe Rack::UTF8Sanitizer do
152
153
 
153
154
  describe "with frozen strings" do
154
155
  before do
155
- @plain_input = "bar baz".freeze
156
- @uri_input = "http://bar/bar+baz".freeze
156
+ @plain_input = "bar baz"
157
+ @uri_input = "http://bar/bar+baz"
157
158
  end
158
159
 
159
160
  it "preserves the frozen? status of input" do
@@ -165,9 +166,24 @@ describe Rack::UTF8Sanitizer do
165
166
  end
166
167
  end
167
168
 
169
+ describe "with mutable strings" do
170
+ before do
171
+ @plain_input = "bar baz".dup
172
+ @uri_input = "http://bar/bar+baz".dup
173
+ end
174
+
175
+ it "preserves the frozen? status of input" do
176
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input,
177
+ "REQUEST_PATH" => @uri_input })
178
+
179
+ env["HTTP_USER_AGENT"].should.not.be.frozen
180
+ env["REQUEST_PATH"].should.not.be.frozen
181
+ end
182
+ end
183
+
168
184
  describe "with symbols in the env" do
169
185
  before do
170
- @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
186
+ @uri_input = "http://bar/foo%E0\xe0".dup.force_encoding('UTF-8')
171
187
  end
172
188
 
173
189
  it "sanitizes REQUEST_PATH with invalid UTF-8 URI input" do
@@ -183,7 +199,7 @@ describe Rack::UTF8Sanitizer do
183
199
 
184
200
  describe "with form data" do
185
201
  def request_env
186
- @plain_input = "foo bar лол".force_encoding('UTF-8')
202
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
187
203
  {
188
204
  "REQUEST_METHOD" => "POST",
189
205
  "CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
@@ -193,7 +209,7 @@ describe Rack::UTF8Sanitizer do
193
209
  end
194
210
 
195
211
  def sanitize_form_data(request_env = request_env())
196
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
212
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
197
213
  @response_env = @app.(request_env)
198
214
  sanitized_input = @response_env['rack.input'].read
199
215
 
@@ -219,6 +235,16 @@ describe Rack::UTF8Sanitizer do
219
235
  @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
236
  end
221
237
 
238
+ it "Bad Request response can safety be mutated" do
239
+ @rack_input = BrokenIO.new
240
+ response_env = @app.(request_env)
241
+ response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
242
+ response_env[1]["Set-Cookie"] = "you_are_admin"
243
+
244
+ response_env = @app.(request_env)
245
+ response_env[1]["Set-Cookie"].should == nil
246
+ end
247
+
222
248
  it "sanitizes StringIO rack.input" do
223
249
  input = "foo=bla&quux=bar"
224
250
  @rack_input = StringIO.new input
@@ -252,6 +278,18 @@ describe Rack::UTF8Sanitizer do
252
278
  end
253
279
  end
254
280
 
281
+ it "sanitizes the rack body if the charset is present and utf-8" do
282
+ input = "name=#{CGI.escape("まつもと")}"
283
+ @rack_input = StringIO.new input
284
+
285
+ env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=utf-8")
286
+ sanitize_form_data(env) do |sanitized_input|
287
+ sanitized_input.encoding.should == Encoding::UTF_8
288
+ sanitized_input.should.be.valid_encoding
289
+ sanitized_input.should == input
290
+ end
291
+ end
292
+
255
293
  it "strip UTF-8 BOM from StringIO rack.input" do
256
294
  input = %(\xef\xbb\xbf{"Hello": "World"})
257
295
  @rack_input = StringIO.new input
@@ -327,6 +365,18 @@ describe Rack::UTF8Sanitizer do
327
365
  end
328
366
  end
329
367
 
368
+ it "does not sanitize the rack body if the charset is present and not utf-8" do
369
+ input = "name=".encode("Shift_JIS") + CGI.escape("まつもと".encode("Shift_JIS", "UTF-8"))
370
+ @rack_input = StringIO.new input
371
+
372
+ env = request_env.update('CONTENT_TYPE' => "application/x-www-form-urlencoded; charset=Shift_JIS")
373
+ sanitize_form_data(env) do |sanitized_input|
374
+ sanitized_input.encoding.should == Encoding::SHIFT_JIS
375
+ sanitized_input.should.be.valid_encoding
376
+ sanitized_input.should == input
377
+ end
378
+ end
379
+
330
380
  it "adjusts content-length when replacing input" do
331
381
  input = "foo=bla&quux=bar\xED"
332
382
  @rack_input = StringIO.new input
@@ -434,7 +484,7 @@ describe Rack::UTF8Sanitizer do
434
484
 
435
485
  describe "with custom content-type" do
436
486
  def request_env
437
- @plain_input = "foo bar лол".force_encoding('UTF-8')
487
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
438
488
  {
439
489
  "REQUEST_METHOD" => "POST",
440
490
  "CONTENT_TYPE" => "application/vnd.api+json",
@@ -444,7 +494,7 @@ describe Rack::UTF8Sanitizer do
444
494
  end
445
495
 
446
496
  def sanitize_data(request_env = request_env())
447
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
497
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
448
498
  @response_env = @app.(request_env)
449
499
  sanitized_input = @response_env['rack.input'].read
450
500
 
@@ -518,7 +568,7 @@ describe Rack::UTF8Sanitizer do
518
568
 
519
569
  describe "with only and/or except options" do
520
570
  before do
521
- @plain_input = "foo\xe0".force_encoding('UTF-8')
571
+ @plain_input = "foo\xe0".dup.force_encoding('UTF-8')
522
572
  end
523
573
 
524
574
  def request_env
@@ -575,7 +625,7 @@ describe Rack::UTF8Sanitizer do
575
625
 
576
626
  describe "with custom strategy" do
577
627
  def request_env
578
- @plain_input = "foo bar лол".force_encoding('UTF-8')
628
+ @plain_input = "foo bar лол".dup.force_encoding('UTF-8')
579
629
  {
580
630
  "REQUEST_METHOD" => "POST",
581
631
  "CONTENT_TYPE" => "application/json",
@@ -585,7 +635,7 @@ describe Rack::UTF8Sanitizer do
585
635
  end
586
636
 
587
637
  def sanitize_data(request_env = request_env())
588
- @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
638
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".dup.force_encoding('UTF-8')
589
639
  @response_env = @app.(request_env)
590
640
  sanitized_input = @response_env['rack.input'].read
591
641
 
@@ -619,7 +669,7 @@ describe Rack::UTF8Sanitizer do
619
669
  it "accepts a proc as a strategy" do
620
670
  truncate = -> (input, sanitize_null_bytes:) do
621
671
  sanitize_null_bytes.should == false
622
- 'replace'.force_encoding(Encoding::UTF_8)
672
+ "replace".dup.force_encoding(Encoding::UTF_8)
623
673
  end
624
674
 
625
675
  @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
@@ -638,7 +688,7 @@ describe Rack::UTF8Sanitizer do
638
688
  it "accepts a proc as a strategy and passes along sanitize_null_bytes" do
639
689
  truncate = -> (input, sanitize_null_bytes:) do
640
690
  sanitize_null_bytes.should == true
641
- 'replace'.force_encoding(Encoding::UTF_8)
691
+ "replace".dup.force_encoding(Encoding::UTF_8)
642
692
  end
643
693
 
644
694
  @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true, strategy: truncate)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
- - whitequark
7
+ - Catherine
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-30 00:00:00.000000000 Z
11
+ date: 2025-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -93,7 +93,7 @@ files:
93
93
  - lib/rack/utf8_sanitizer.rb
94
94
  - rack-utf8_sanitizer.gemspec
95
95
  - test/test_utf8_sanitizer.rb
96
- homepage: http://github.com/whitequark/rack-utf8_sanitizer
96
+ homepage: https://github.com/whitequark/rack-utf8_sanitizer
97
97
  licenses:
98
98
  - MIT
99
99
  metadata: {}
@@ -105,7 +105,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
105
105
  requirements:
106
106
  - - ">="
107
107
  - !ruby/object:Gem::Version
108
- version: 1.9.3
108
+ version: '2.3'
109
109
  required_rubygems_version: !ruby/object:Gem::Requirement
110
110
  requirements:
111
111
  - - ">="