h1p 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +31 -0
- data/.gitignore +57 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +33 -0
- data/LICENSE +21 -0
- data/README.md +5 -0
- data/Rakefile +16 -0
- data/TODO.md +106 -0
- data/benchmarks/bm_http1_parser.rb +85 -0
- data/examples/http_server.rb +41 -0
- data/ext/h1p/extconf.rb +13 -0
- data/ext/h1p/h1p.c +860 -0
- data/ext/h1p/h1p.h +18 -0
- data/ext/h1p/limits.rb +10 -0
- data/h1p.gemspec +25 -0
- data/lib/h1p.rb +31 -0
- data/lib/h1p/version.rb +5 -0
- data/test/helper.rb +15 -0
- data/test/test_h1p.rb +584 -0
- metadata +125 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4fb3573e65df46350986981759a454c8bc2c4166901a46dd827fdc17a27b08b5
|
4
|
+
data.tar.gz: afc10ead82e3286f61fa2bee1739499f275b75a386ed3e3401988805470c9b3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 72ba304cda888fcfb1c8afaee1a03b94ddb15fde0110c303886e4036125fb5b464d214ebb731ea91dac792070c9cc3d5eccb789f667c8a590fae01afb88d5665
|
7
|
+
data.tar.gz: 31cd6c6d7bb6695d338596b56be808cd1fed99cc76f11ee5f38fbdf7ff5d1206960a7134932ac82e9ca9c19116a20ee99db8f2a002fb76c1997134ac4d432dae
|
@@ -0,0 +1,31 @@
|
|
1
|
+
name: Tests
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
strategy:
|
8
|
+
fail-fast: false
|
9
|
+
matrix:
|
10
|
+
os: [ubuntu-latest]
|
11
|
+
ruby: [2.6, 2.7, 3.0]
|
12
|
+
|
13
|
+
name: >-
|
14
|
+
${{matrix.os}}, ${{matrix.ruby}}
|
15
|
+
|
16
|
+
runs-on: ${{matrix.os}}
|
17
|
+
steps:
|
18
|
+
- uses: actions/checkout@v1
|
19
|
+
- uses: actions/setup-ruby@v1
|
20
|
+
with:
|
21
|
+
ruby-version: ${{matrix.ruby}}
|
22
|
+
- name: Install dependencies
|
23
|
+
run: |
|
24
|
+
gem install bundler
|
25
|
+
bundle install
|
26
|
+
- name: Show Linux kernel version
|
27
|
+
run: uname -r
|
28
|
+
- name: Compile C-extension
|
29
|
+
run: bundle exec rake compile
|
30
|
+
- name: Run tests
|
31
|
+
run: bundle exec rake test
|
data/.gitignore
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
# Ignore Byebug command history file.
|
17
|
+
.byebug_history
|
18
|
+
|
19
|
+
## Specific to RubyMotion:
|
20
|
+
.dat*
|
21
|
+
.repl_history
|
22
|
+
build/
|
23
|
+
*.bridgesupport
|
24
|
+
build-iPhoneOS/
|
25
|
+
build-iPhoneSimulator/
|
26
|
+
|
27
|
+
## Specific to RubyMotion (use of CocoaPods):
|
28
|
+
#
|
29
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
30
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
31
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
32
|
+
#
|
33
|
+
# vendor/Pods/
|
34
|
+
|
35
|
+
## Documentation cache and generated files:
|
36
|
+
/.yardoc/
|
37
|
+
/_yardoc/
|
38
|
+
/doc/
|
39
|
+
/rdoc/
|
40
|
+
|
41
|
+
## Environment normalization:
|
42
|
+
/.bundle/
|
43
|
+
/vendor/bundle
|
44
|
+
/lib/bundler/man/
|
45
|
+
|
46
|
+
# for a library or gem, you might want to ignore these files since the code is
|
47
|
+
# intended to run in multiple environments; otherwise, check them in:
|
48
|
+
# Gemfile.lock
|
49
|
+
# .ruby-version
|
50
|
+
# .ruby-gemset
|
51
|
+
|
52
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
53
|
+
.rvmrc
|
54
|
+
|
55
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
56
|
+
# .rubocop-https?--*
|
57
|
+
lib/*.so
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
h1p (0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ansi (1.5.0)
|
10
|
+
builder (3.2.4)
|
11
|
+
minitest (5.11.3)
|
12
|
+
minitest-reporters (1.4.3)
|
13
|
+
ansi
|
14
|
+
builder
|
15
|
+
minitest (>= 5.0)
|
16
|
+
ruby-progressbar
|
17
|
+
rake (12.3.3)
|
18
|
+
rake-compiler (1.1.1)
|
19
|
+
rake
|
20
|
+
ruby-progressbar (1.11.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
h1p!
|
27
|
+
minitest (~> 5.11.3)
|
28
|
+
minitest-reporters (~> 1.4.2)
|
29
|
+
rake (~> 12.3.3)
|
30
|
+
rake-compiler (= 1.1.1)
|
31
|
+
|
32
|
+
BUNDLED WITH
|
33
|
+
2.1.4
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2021 Digital Fabric
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rake/clean"
|
5
|
+
|
6
|
+
require "rake/extensiontask"
|
7
|
+
Rake::ExtensionTask.new("h1p_ext") do |ext|
|
8
|
+
ext.ext_dir = "ext/h1p"
|
9
|
+
end
|
10
|
+
|
11
|
+
task :recompile => [:clean, :compile]
|
12
|
+
task :default => [:compile, :test]
|
13
|
+
|
14
|
+
task :test do
|
15
|
+
exec 'ruby test/test_h1p.rb'
|
16
|
+
end
|
data/TODO.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
## Add an API for reading a request body chunk into an IO (pipe)
|
2
|
+
|
3
|
+
```ruby
|
4
|
+
# currently
|
5
|
+
chunk = req.next_chunk
|
6
|
+
# or
|
7
|
+
req.each_chunk { |c| do_something(c) }
|
8
|
+
|
9
|
+
# what we'd like to do
|
10
|
+
r, w = IO.pipe
|
11
|
+
len = req.splice_chunk(w)
|
12
|
+
sock << "Here comes a chunk of #{len} bytes\n"
|
13
|
+
sock.splice(r, len)
|
14
|
+
|
15
|
+
# or:
|
16
|
+
r, w = IO.pipe
|
17
|
+
req.splice_each_chunk(w) do |len|
|
18
|
+
sock << "Here comes a chunk of #{len} bytes\n"
|
19
|
+
sock.splice(r, len)
|
20
|
+
end
|
21
|
+
```
|
22
|
+
|
23
|
+
# HTTP/1.1 parser
|
24
|
+
|
25
|
+
- httparser.rb is not actively updated
|
26
|
+
- the httparser.rb C parser code comes originally from https://github.com/nodejs/llhttp
|
27
|
+
- there's a Ruby gem https://github.com/metabahn/llhttp, but its API is too low-level
|
28
|
+
(lots of callbacks, headers need to be retained across callbacks)
|
29
|
+
- the basic idea is to import the C-code, then build a parser object with the following
|
30
|
+
callbacks:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
on_headers_complete(headers)
|
34
|
+
on_body_chunk(chunk)
|
35
|
+
on_message_complete
|
36
|
+
```
|
37
|
+
|
38
|
+
- The llhttp gem's C-code is here: https://github.com/metabahn/llhttp/tree/main/mri
|
39
|
+
|
40
|
+
- Actually, if you do a C extension, instead of a callback-based API, we can
|
41
|
+
design a blocking API:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
parser = Tipi::HTTP1::Parser.new
|
45
|
+
parser.each_request(socket) do |headers|
|
46
|
+
request = Request.new(normalize_headers(headers))
|
47
|
+
handle_request(request)
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
# What about HTTP/2?
|
52
|
+
|
53
|
+
It would be a nice exercise in converting a callback-based API to a blocking
|
54
|
+
one:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
parser = Tipi::HTTP2::Parser.new(socket)
|
58
|
+
parser.each_stream(socket) do |stream|
|
59
|
+
spin { handle_stream(stream) }
|
60
|
+
end
|
61
|
+
```
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
# DF
|
66
|
+
|
67
|
+
- Add attack protection for IP-address HTTP host:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
IPV4_REGEXP = /^\d+\.\d+\.\d+\.\d+$/.freeze
|
71
|
+
|
72
|
+
def is_attack_request?(req)
|
73
|
+
return true if req.host =~ IPV4_REGEXP && req.query[:q] != 'ping'
|
74
|
+
end
|
75
|
+
```
|
76
|
+
|
77
|
+
- Add attack route to Qeweney routing API
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
# Roadmap
|
82
|
+
|
83
|
+
- Update README (get rid of non-http stuff)
|
84
|
+
- Improve Rack spec compliance, add tests
|
85
|
+
- Homogenize HTTP 1 and HTTP 2 headers - downcase symbols
|
86
|
+
|
87
|
+
- Use `http-2-next` instead of `http-2` for http/2
|
88
|
+
- https://gitlab.com/honeyryderchuck/http-2-next
|
89
|
+
- Open an issue there, ask what's the difference between the two gems?
|
90
|
+
|
91
|
+
## 0.38
|
92
|
+
|
93
|
+
- Add more poly CLI commands and options:
|
94
|
+
|
95
|
+
- serve static files from given directory
|
96
|
+
- serve from rack up file
|
97
|
+
- serve both http and https
|
98
|
+
- use custom certificate files for SSL
|
99
|
+
- set host address to bind to
|
100
|
+
- set port to bind to
|
101
|
+
- set forking process count
|
102
|
+
|
103
|
+
## 0.39 Working Sinatra application
|
104
|
+
|
105
|
+
- app with database access (postgresql)
|
106
|
+
- benchmarks!
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
HTTP_REQUEST = "GET /foo HTTP/1.1\r\nHost: example.com\r\nAccept: */*\r\n\r\n"
|
6
|
+
|
7
|
+
def measure_time_and_allocs
|
8
|
+
4.times { GC.start }
|
9
|
+
GC.disable
|
10
|
+
|
11
|
+
t0 = Time.now
|
12
|
+
a0 = object_count
|
13
|
+
yield
|
14
|
+
t1 = Time.now
|
15
|
+
a1 = object_count
|
16
|
+
[t1 - t0, a1 - a0]
|
17
|
+
ensure
|
18
|
+
GC.enable
|
19
|
+
end
|
20
|
+
|
21
|
+
def object_count
|
22
|
+
count = ObjectSpace.count_objects
|
23
|
+
count[:TOTAL] - count[:FREE]
|
24
|
+
end
|
25
|
+
|
26
|
+
def benchmark_other_http1_parser(iterations)
|
27
|
+
STDOUT << "http_parser.rb: "
|
28
|
+
require 'http_parser.rb'
|
29
|
+
|
30
|
+
i, o = IO.pipe
|
31
|
+
parser = Http::Parser.new
|
32
|
+
done = false
|
33
|
+
headers = nil
|
34
|
+
parser.on_headers_complete = proc do |h|
|
35
|
+
headers = h
|
36
|
+
headers[':method'] = parser.http_method
|
37
|
+
headers[':path'] = parser.request_url
|
38
|
+
end
|
39
|
+
parser.on_message_complete = proc { done = true }
|
40
|
+
|
41
|
+
elapsed, allocated = measure_time_and_allocs do
|
42
|
+
iterations.times do
|
43
|
+
o << HTTP_REQUEST
|
44
|
+
done = false
|
45
|
+
while !done
|
46
|
+
msg = i.readpartial(4096)
|
47
|
+
parser << msg
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
puts(format('elapsed: %f, allocated: %d (%f/req), rate: %f ips', elapsed, allocated, allocated.to_f / iterations, iterations / elapsed))
|
52
|
+
end
|
53
|
+
|
54
|
+
def benchmark_tipi_http1_parser(iterations)
|
55
|
+
STDOUT << "tipi parser: "
|
56
|
+
require_relative '../lib/tipi_ext'
|
57
|
+
i, o = IO.pipe
|
58
|
+
reader = proc { |len| i.readpartial(len) }
|
59
|
+
parser = Tipi::HTTP1Parser.new(reader)
|
60
|
+
|
61
|
+
elapsed, allocated = measure_time_and_allocs do
|
62
|
+
iterations.times do
|
63
|
+
o << HTTP_REQUEST
|
64
|
+
headers = parser.parse_headers
|
65
|
+
end
|
66
|
+
end
|
67
|
+
puts(format('elapsed: %f, allocated: %d (%f/req), rate: %f ips', elapsed, allocated, allocated.to_f / iterations, iterations / elapsed))
|
68
|
+
end
|
69
|
+
|
70
|
+
def fork_benchmark(method, iterations)
|
71
|
+
pid = fork do
|
72
|
+
send(method, iterations)
|
73
|
+
rescue Exception => e
|
74
|
+
p e
|
75
|
+
p e.backtrace
|
76
|
+
exit!
|
77
|
+
end
|
78
|
+
Process.wait(pid)
|
79
|
+
end
|
80
|
+
|
81
|
+
x = 500000
|
82
|
+
# fork_benchmark(:benchmark_other_http1_parser, x)
|
83
|
+
# fork_benchmark(:benchmark_tipi_http1_parser, x)
|
84
|
+
|
85
|
+
benchmark_tipi_http1_parser(x)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'tipi'
|
5
|
+
|
6
|
+
opts = {
|
7
|
+
reuse_addr: true,
|
8
|
+
dont_linger: true
|
9
|
+
}
|
10
|
+
|
11
|
+
puts "pid: #{Process.pid}"
|
12
|
+
puts 'Listening on port 10080...'
|
13
|
+
|
14
|
+
# GC.disable
|
15
|
+
# Thread.current.backend.idle_gc_period = 60
|
16
|
+
|
17
|
+
spin_loop(interval: 10) { p Thread.backend.stats }
|
18
|
+
|
19
|
+
spin_loop(interval: 10) do
|
20
|
+
GC.compact
|
21
|
+
end
|
22
|
+
|
23
|
+
spin do
|
24
|
+
Tipi.serve('0.0.0.0', 10080, opts) do |req|
|
25
|
+
if req.path == '/stream'
|
26
|
+
req.send_headers('Foo' => 'Bar')
|
27
|
+
sleep 1
|
28
|
+
req.send_chunk("foo\n")
|
29
|
+
sleep 1
|
30
|
+
req.send_chunk("bar\n")
|
31
|
+
req.finish
|
32
|
+
elsif req.path == '/upload'
|
33
|
+
body = req.read
|
34
|
+
req.respond("Body: #{body.inspect} (#{body.bytesize} bytes)")
|
35
|
+
else
|
36
|
+
req.respond("Hello world!\n")
|
37
|
+
end
|
38
|
+
# p req.transfer_counts
|
39
|
+
end
|
40
|
+
p 'done...'
|
41
|
+
end.await
|
data/ext/h1p/extconf.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mkmf'
|
5
|
+
|
6
|
+
$CFLAGS << " -Wno-format-security"
|
7
|
+
CONFIG['optflags'] << ' -fno-strict-aliasing' unless RUBY_PLATFORM =~ /mswin/
|
8
|
+
|
9
|
+
require_relative './limits'
|
10
|
+
H1P_LIMITS.each { |k, v| $defs << "-D#{k.upcase}=#{v}" }
|
11
|
+
|
12
|
+
dir_config 'h1p_ext'
|
13
|
+
create_makefile 'h1p_ext'
|
data/ext/h1p/h1p.c
ADDED
@@ -0,0 +1,860 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "h1p.h"
|
3
|
+
|
4
|
+
// Security-related limits are defined in limits.rb and injected as
|
5
|
+
// defines in extconf.rb
|
6
|
+
|
7
|
+
#define INITIAL_BUFFER_SIZE 4096
|
8
|
+
#define BUFFER_TRIM_MIN_LEN 4096
|
9
|
+
#define BUFFER_TRIM_MIN_POS 2048
|
10
|
+
#define MAX_HEADERS_READ_LENGTH 4096
|
11
|
+
#define MAX_BODY_READ_LENGTH (1 << 20) // 1MB
|
12
|
+
|
13
|
+
#define BODY_READ_MODE_UNKNOWN -2
|
14
|
+
#define BODY_READ_MODE_CHUNKED -1
|
15
|
+
|
16
|
+
ID ID_arity;
|
17
|
+
ID ID_backend_read;
|
18
|
+
ID ID_backend_recv;
|
19
|
+
ID ID_call;
|
20
|
+
ID ID_downcase;
|
21
|
+
ID ID_eof_p;
|
22
|
+
ID ID_eq;
|
23
|
+
ID ID_parser_read_method;
|
24
|
+
ID ID_read;
|
25
|
+
ID ID_readpartial;
|
26
|
+
ID ID_to_i;
|
27
|
+
|
28
|
+
static VALUE mPolyphony = Qnil;
|
29
|
+
static VALUE cError;
|
30
|
+
|
31
|
+
VALUE NUM_max_headers_read_length;
|
32
|
+
VALUE NUM_buffer_start;
|
33
|
+
VALUE NUM_buffer_end;
|
34
|
+
|
35
|
+
VALUE STR_pseudo_method;
|
36
|
+
VALUE STR_pseudo_path;
|
37
|
+
VALUE STR_pseudo_protocol;
|
38
|
+
VALUE STR_pseudo_rx;
|
39
|
+
|
40
|
+
VALUE STR_chunked;
|
41
|
+
VALUE STR_content_length;
|
42
|
+
VALUE STR_transfer_encoding;
|
43
|
+
|
44
|
+
VALUE SYM_backend_read;
|
45
|
+
VALUE SYM_backend_recv;
|
46
|
+
VALUE SYM_stock_readpartial;
|
47
|
+
|
48
|
+
enum read_method {
|
49
|
+
method_readpartial, // receiver.readpartial(len, buf, pos, raise_on_eof: false) (Polyphony-specific)
|
50
|
+
method_backend_read, // Polyphony.backend_read (Polyphony-specific)
|
51
|
+
method_backend_recv, // Polyphony.backend_recv (Polyphony-specific)
|
52
|
+
method_call, // receiver.call(len) (Universal)
|
53
|
+
method_stock_readpartial // receiver.readpartial(len)
|
54
|
+
};
|
55
|
+
|
56
|
+
typedef struct parser {
|
57
|
+
VALUE io;
|
58
|
+
VALUE buffer;
|
59
|
+
VALUE headers;
|
60
|
+
int pos;
|
61
|
+
int current_request_rx;
|
62
|
+
|
63
|
+
enum read_method read_method;
|
64
|
+
int body_read_mode;
|
65
|
+
int body_left;
|
66
|
+
int request_completed;
|
67
|
+
} Parser_t;
|
68
|
+
|
69
|
+
VALUE cParser = Qnil;
|
70
|
+
|
71
|
+
static void Parser_mark(void *ptr) {
|
72
|
+
Parser_t *parser = ptr;
|
73
|
+
rb_gc_mark(parser->io);
|
74
|
+
rb_gc_mark(parser->buffer);
|
75
|
+
rb_gc_mark(parser->headers);
|
76
|
+
}
|
77
|
+
|
78
|
+
static void Parser_free(void *ptr) {
|
79
|
+
xfree(ptr);
|
80
|
+
}
|
81
|
+
|
82
|
+
static size_t Parser_size(const void *ptr) {
|
83
|
+
return sizeof(Parser_t);
|
84
|
+
}
|
85
|
+
|
86
|
+
static const rb_data_type_t Parser_type = {
|
87
|
+
"Parser",
|
88
|
+
{Parser_mark, Parser_free, Parser_size,},
|
89
|
+
0, 0, 0
|
90
|
+
};
|
91
|
+
|
92
|
+
static VALUE Parser_allocate(VALUE klass) {
|
93
|
+
Parser_t *parser;
|
94
|
+
|
95
|
+
parser = ALLOC(Parser_t);
|
96
|
+
return TypedData_Wrap_Struct(klass, &Parser_type, parser);
|
97
|
+
}
|
98
|
+
|
99
|
+
#define GetParser(obj, parser) \
|
100
|
+
TypedData_Get_Struct((obj), Parser_t, &Parser_type, (parser))
|
101
|
+
|
102
|
+
static inline void get_polyphony() {
|
103
|
+
if (mPolyphony != Qnil) return;
|
104
|
+
|
105
|
+
mPolyphony = rb_const_get(rb_cObject, rb_intern("Polyphony"));
|
106
|
+
rb_gc_register_mark_object(mPolyphony);
|
107
|
+
}
|
108
|
+
|
109
|
+
enum read_method detect_read_method(VALUE io) {
|
110
|
+
if (rb_respond_to(io, ID_parser_read_method)) {
|
111
|
+
VALUE method = rb_funcall(io, ID_parser_read_method, 0);
|
112
|
+
if (method == SYM_stock_readpartial) return method_stock_readpartial;
|
113
|
+
|
114
|
+
get_polyphony();
|
115
|
+
if (method == SYM_backend_read) return method_backend_read;
|
116
|
+
if (method == SYM_backend_recv) return method_backend_recv;
|
117
|
+
|
118
|
+
return method_readpartial;
|
119
|
+
}
|
120
|
+
else if (rb_respond_to(io, ID_call)) {
|
121
|
+
return method_call;
|
122
|
+
}
|
123
|
+
else
|
124
|
+
rb_raise(rb_eRuntimeError, "Provided reader should be a callable or respond to #__parser_read_method__");
|
125
|
+
}
|
126
|
+
|
127
|
+
VALUE Parser_initialize(VALUE self, VALUE io) {
|
128
|
+
Parser_t *parser;
|
129
|
+
GetParser(self, parser);
|
130
|
+
|
131
|
+
parser->io = io;
|
132
|
+
parser->buffer = rb_str_new_literal("");
|
133
|
+
parser->headers = Qnil;
|
134
|
+
parser->pos = 0;
|
135
|
+
|
136
|
+
// pre-allocate the buffer
|
137
|
+
rb_str_modify_expand(parser->buffer, INITIAL_BUFFER_SIZE);
|
138
|
+
|
139
|
+
parser->read_method = detect_read_method(io);
|
140
|
+
parser->body_read_mode = BODY_READ_MODE_UNKNOWN;
|
141
|
+
parser->body_left = 0;
|
142
|
+
return self;
|
143
|
+
}
|
144
|
+
|
145
|
+
////////////////////////////////////////////////////////////////////////////////
|
146
|
+
|
147
|
+
#define str_downcase(str) (rb_funcall((str), ID_downcase, 0))
|
148
|
+
|
149
|
+
#define FILL_BUFFER_OR_GOTO_EOF(state) { if (!fill_buffer(state)) goto eof; }
|
150
|
+
|
151
|
+
#define BUFFER_POS(state) ((state)->parser->pos)
|
152
|
+
#define BUFFER_LEN(state) ((state)->len)
|
153
|
+
#define BUFFER_CUR(state) ((state)->ptr[(state)->parser->pos])
|
154
|
+
#define BUFFER_AT(state, pos) ((state)->ptr[pos])
|
155
|
+
#define BUFFER_PTR(state, pos) ((state)->ptr + pos)
|
156
|
+
#define BUFFER_STR(state, pos, len) (rb_obj_freeze(rb_utf8_str_new((state)->ptr + pos, len)))
|
157
|
+
#define BUFFER_STR_DOWNCASE(state, pos, len) (rb_obj_freeze(str_downcase(rb_utf8_str_new((state)->ptr + pos, len))))
|
158
|
+
|
159
|
+
#define INC_BUFFER_POS(state) { \
|
160
|
+
BUFFER_POS(state)++; \
|
161
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state); \
|
162
|
+
}
|
163
|
+
|
164
|
+
#define INC_BUFFER_POS_NO_FILL(state) BUFFER_POS(state)++;
|
165
|
+
|
166
|
+
#define INC_BUFFER_POS_UTF8(state, len) { \
|
167
|
+
unsigned char c = BUFFER_CUR(state); \
|
168
|
+
if ((c & 0xf0) == 0xf0) { \
|
169
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 4) FILL_BUFFER_OR_GOTO_EOF(state); \
|
170
|
+
BUFFER_POS(state) += 4; \
|
171
|
+
len += 4; \
|
172
|
+
} \
|
173
|
+
else if ((c & 0xe0) == 0xe0) { \
|
174
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 3) FILL_BUFFER_OR_GOTO_EOF(state); \
|
175
|
+
BUFFER_POS(state) += 3; \
|
176
|
+
len += 3; \
|
177
|
+
} \
|
178
|
+
else if ((c & 0xc0) == 0xc0) { \
|
179
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 2) FILL_BUFFER_OR_GOTO_EOF(state); \
|
180
|
+
BUFFER_POS(state) += 2; \
|
181
|
+
len += 2; \
|
182
|
+
} \
|
183
|
+
else { \
|
184
|
+
BUFFER_POS(state)++; \
|
185
|
+
len ++; \
|
186
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state); \
|
187
|
+
} \
|
188
|
+
}
|
189
|
+
|
190
|
+
#define INIT_PARSER_STATE(state) { \
|
191
|
+
(state)->len = RSTRING_LEN((state)->parser->buffer); \
|
192
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) \
|
193
|
+
FILL_BUFFER_OR_GOTO_EOF(state) \
|
194
|
+
else \
|
195
|
+
(state)->ptr = RSTRING_PTR((state)->parser->buffer); \
|
196
|
+
}
|
197
|
+
|
198
|
+
#define RAISE_BAD_REQUEST(msg) rb_raise(cError, msg)
|
199
|
+
|
200
|
+
#define SET_HEADER_VALUE_FROM_BUFFER(state, headers, key, pos, len) { \
|
201
|
+
VALUE value = BUFFER_STR(state, pos, len); \
|
202
|
+
rb_hash_aset(headers, key, value); \
|
203
|
+
RB_GC_GUARD(value); \
|
204
|
+
}
|
205
|
+
|
206
|
+
#define SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, key, pos, len) { \
|
207
|
+
VALUE value = BUFFER_STR_DOWNCASE(state, pos, len); \
|
208
|
+
rb_hash_aset(headers, key, value); \
|
209
|
+
RB_GC_GUARD(value); \
|
210
|
+
}
|
211
|
+
|
212
|
+
#define CONSUME_CRLF(state) { \
|
213
|
+
INC_BUFFER_POS(state); \
|
214
|
+
if (BUFFER_CUR(state) != '\n') goto bad_request; \
|
215
|
+
INC_BUFFER_POS(state); \
|
216
|
+
}
|
217
|
+
|
218
|
+
#define CONSUME_CRLF_NO_FILL(state) { \
|
219
|
+
INC_BUFFER_POS(state); \
|
220
|
+
if (BUFFER_CUR(state) != '\n') goto bad_request; \
|
221
|
+
INC_BUFFER_POS_NO_FILL(state); \
|
222
|
+
}
|
223
|
+
|
224
|
+
#define GLOBAL_STR(v, s) v = rb_str_new_literal(s); rb_global_variable(&v); rb_obj_freeze(v)
|
225
|
+
|
226
|
+
struct parser_state {
|
227
|
+
struct parser *parser;
|
228
|
+
char *ptr;
|
229
|
+
int len;
|
230
|
+
};
|
231
|
+
|
232
|
+
////////////////////////////////////////////////////////////////////////////////
|
233
|
+
|
234
|
+
static inline VALUE io_call(VALUE io, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
235
|
+
VALUE result = rb_funcall(io, ID_call, 1, maxlen);
|
236
|
+
if (result == Qnil) return Qnil;
|
237
|
+
|
238
|
+
if (buf_pos == NUM_buffer_start) rb_str_set_len(buf, 0);
|
239
|
+
rb_str_append(buf, result);
|
240
|
+
RB_GC_GUARD(result);
|
241
|
+
return buf;
|
242
|
+
}
|
243
|
+
|
244
|
+
static inline VALUE io_stock_readpartial(VALUE io, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
245
|
+
VALUE eof = rb_funcall(io, ID_eof_p, 0);
|
246
|
+
if (RTEST(eof)) return Qnil;
|
247
|
+
|
248
|
+
VALUE result = rb_funcall(io, ID_readpartial, 1, maxlen);
|
249
|
+
if (result == Qnil) return Qnil;
|
250
|
+
if (buf == Qnil) return result;
|
251
|
+
|
252
|
+
if (buf_pos == NUM_buffer_start) rb_str_set_len(buf, 0);
|
253
|
+
rb_str_append(buf, result);
|
254
|
+
RB_GC_GUARD(result);
|
255
|
+
return buf;
|
256
|
+
}
|
257
|
+
|
258
|
+
static inline VALUE parser_io_read(Parser_t *parser, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
259
|
+
switch (parser->read_method) {
|
260
|
+
case method_backend_read:
|
261
|
+
return rb_funcall(mPolyphony, ID_backend_read, 5, parser->io, buf, maxlen, Qfalse, buf_pos);
|
262
|
+
case method_backend_recv:
|
263
|
+
return rb_funcall(mPolyphony, ID_backend_recv, 4, parser->io, buf, maxlen, buf_pos);
|
264
|
+
case method_readpartial:
|
265
|
+
return rb_funcall(parser-> io, ID_readpartial, 4, maxlen, buf, buf_pos, Qfalse);
|
266
|
+
case method_call:
|
267
|
+
return io_call(parser ->io, maxlen, buf, buf_pos);
|
268
|
+
case method_stock_readpartial:
|
269
|
+
return io_stock_readpartial(parser->io, maxlen, buf, buf_pos);
|
270
|
+
default:
|
271
|
+
return Qnil;
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
static inline int fill_buffer(struct parser_state *state) {
|
276
|
+
VALUE ret = parser_io_read(state->parser, NUM_max_headers_read_length, state->parser->buffer, NUM_buffer_end);
|
277
|
+
if (ret == Qnil) return 0;
|
278
|
+
|
279
|
+
state->parser->buffer = ret;
|
280
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
281
|
+
int read_bytes = len - state->len;
|
282
|
+
if (!read_bytes) return 0;
|
283
|
+
|
284
|
+
state->ptr = RSTRING_PTR(state->parser->buffer);
|
285
|
+
state->len = len;
|
286
|
+
return read_bytes;
|
287
|
+
}
|
288
|
+
|
289
|
+
static inline void buffer_trim(struct parser_state *state) {
|
290
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
291
|
+
int pos = state->parser->pos;
|
292
|
+
int left = len - pos;
|
293
|
+
|
294
|
+
// The buffer is trimmed only if length and position thresholds are passed,
|
295
|
+
// *and* position is past the halfway point.
|
296
|
+
if (len < BUFFER_TRIM_MIN_LEN ||
|
297
|
+
pos < BUFFER_TRIM_MIN_POS ||
|
298
|
+
left >= pos) return;
|
299
|
+
|
300
|
+
if (left > 0) {
|
301
|
+
char *ptr = RSTRING_PTR(state->parser->buffer);
|
302
|
+
memcpy(ptr, ptr + pos, left);
|
303
|
+
}
|
304
|
+
rb_str_set_len(state->parser->buffer, left);
|
305
|
+
state->parser->pos = 0;
|
306
|
+
}
|
307
|
+
|
308
|
+
static inline void str_append_from_buffer(VALUE str, char *ptr, int len) {
|
309
|
+
int str_len = RSTRING_LEN(str);
|
310
|
+
rb_str_modify_expand(str, len);
|
311
|
+
memcpy(RSTRING_PTR(str) + str_len, ptr, len);
|
312
|
+
rb_str_set_len(str, str_len + len);
|
313
|
+
}
|
314
|
+
|
315
|
+
////////////////////////////////////////////////////////////////////////////////
|
316
|
+
|
317
|
+
static inline int parse_method(struct parser_state *state, VALUE headers) {
|
318
|
+
int pos = BUFFER_POS(state);
|
319
|
+
int len = 0;
|
320
|
+
|
321
|
+
while (1) {
|
322
|
+
switch (BUFFER_CUR(state)) {
|
323
|
+
case ' ':
|
324
|
+
if (len < 1 || len > MAX_METHOD_LENGTH) goto bad_request;
|
325
|
+
INC_BUFFER_POS(state);
|
326
|
+
goto done;
|
327
|
+
case '\r':
|
328
|
+
case '\n':
|
329
|
+
goto bad_request;
|
330
|
+
default:
|
331
|
+
INC_BUFFER_POS(state);
|
332
|
+
len++;
|
333
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
334
|
+
if (len > MAX_METHOD_LENGTH) goto bad_request;
|
335
|
+
}
|
336
|
+
}
|
337
|
+
done:
|
338
|
+
SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, STR_pseudo_method, pos, len);
|
339
|
+
return 1;
|
340
|
+
bad_request:
|
341
|
+
RAISE_BAD_REQUEST("Invalid method");
|
342
|
+
eof:
|
343
|
+
return 0;
|
344
|
+
}
|
345
|
+
|
346
|
+
static int parse_request_target(struct parser_state *state, VALUE headers) {
|
347
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
348
|
+
int pos = BUFFER_POS(state);
|
349
|
+
int len = 0;
|
350
|
+
while (1) {
|
351
|
+
switch (BUFFER_CUR(state)) {
|
352
|
+
case ' ':
|
353
|
+
if (len < 1 || len > MAX_PATH_LENGTH) goto bad_request;
|
354
|
+
INC_BUFFER_POS(state);
|
355
|
+
goto done;
|
356
|
+
case '\r':
|
357
|
+
case '\n':
|
358
|
+
goto bad_request;
|
359
|
+
default:
|
360
|
+
INC_BUFFER_POS(state);
|
361
|
+
len++;
|
362
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
363
|
+
if (len > MAX_PATH_LENGTH) goto bad_request;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
done:
|
367
|
+
SET_HEADER_VALUE_FROM_BUFFER(state, headers, STR_pseudo_path, pos, len);
|
368
|
+
return 1;
|
369
|
+
bad_request:
|
370
|
+
RAISE_BAD_REQUEST("Invalid request target");
|
371
|
+
eof:
|
372
|
+
return 0;
|
373
|
+
}
|
374
|
+
|
375
|
+
// case-insensitive compare
|
376
|
+
#define CMP_CI(state, down, up) ((BUFFER_CUR(state) == down) || (BUFFER_CUR(state) == up))
|
377
|
+
|
378
|
+
static int parse_protocol(struct parser_state *state, VALUE headers) {
|
379
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
380
|
+
int pos = BUFFER_POS(state);
|
381
|
+
int len = 0;
|
382
|
+
|
383
|
+
if (CMP_CI(state, 'H', 'h')) INC_BUFFER_POS(state) else goto bad_request;
|
384
|
+
if (CMP_CI(state, 'T', 't')) INC_BUFFER_POS(state) else goto bad_request;
|
385
|
+
if (CMP_CI(state, 'T', 't')) INC_BUFFER_POS(state) else goto bad_request;
|
386
|
+
if (CMP_CI(state, 'P', 'p')) INC_BUFFER_POS(state) else goto bad_request;
|
387
|
+
if (BUFFER_CUR(state) == '/') INC_BUFFER_POS(state) else goto bad_request;
|
388
|
+
if (BUFFER_CUR(state) == '1') INC_BUFFER_POS(state) else goto bad_request;
|
389
|
+
len = 6;
|
390
|
+
while (1) {
|
391
|
+
switch (BUFFER_CUR(state)) {
|
392
|
+
case '\r':
|
393
|
+
CONSUME_CRLF(state);
|
394
|
+
goto done;
|
395
|
+
case '\n':
|
396
|
+
INC_BUFFER_POS(state);
|
397
|
+
goto done;
|
398
|
+
case '.':
|
399
|
+
INC_BUFFER_POS(state);
|
400
|
+
char c = BUFFER_CUR(state);
|
401
|
+
if (c == '0' || c == '1') {
|
402
|
+
INC_BUFFER_POS(state);
|
403
|
+
len += 2;
|
404
|
+
continue;
|
405
|
+
}
|
406
|
+
goto bad_request;
|
407
|
+
default:
|
408
|
+
goto bad_request;
|
409
|
+
}
|
410
|
+
}
|
411
|
+
done:
|
412
|
+
if (len < 6 || len > 8) goto bad_request;
|
413
|
+
SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, STR_pseudo_protocol, pos, len);
|
414
|
+
return 1;
|
415
|
+
bad_request:
|
416
|
+
RAISE_BAD_REQUEST("Invalid protocol");
|
417
|
+
eof:
|
418
|
+
return 0;
|
419
|
+
}
|
420
|
+
|
421
|
+
int parse_request_line(struct parser_state *state, VALUE headers) {
|
422
|
+
if (!parse_method(state, headers)) goto eof;
|
423
|
+
if (!parse_request_target(state, headers)) goto eof;
|
424
|
+
if (!parse_protocol(state, headers)) goto eof;
|
425
|
+
|
426
|
+
return 1;
|
427
|
+
eof:
|
428
|
+
return 0;
|
429
|
+
}
|
430
|
+
|
431
|
+
static inline int parse_header_key(struct parser_state *state, VALUE *key) {
|
432
|
+
int pos = BUFFER_POS(state);
|
433
|
+
int len = 0;
|
434
|
+
|
435
|
+
while (1) {
|
436
|
+
switch (BUFFER_CUR(state)) {
|
437
|
+
case ' ':
|
438
|
+
goto bad_request;
|
439
|
+
case ':':
|
440
|
+
if (len < 1 || len > MAX_HEADER_KEY_LENGTH)
|
441
|
+
goto bad_request;
|
442
|
+
INC_BUFFER_POS(state);
|
443
|
+
goto done;
|
444
|
+
case '\r':
|
445
|
+
if (BUFFER_POS(state) > pos) goto bad_request;
|
446
|
+
CONSUME_CRLF_NO_FILL(state);
|
447
|
+
goto done;
|
448
|
+
case '\n':
|
449
|
+
if (BUFFER_POS(state) > pos) goto bad_request;
|
450
|
+
|
451
|
+
INC_BUFFER_POS_NO_FILL(state);
|
452
|
+
goto done;
|
453
|
+
default:
|
454
|
+
INC_BUFFER_POS(state);
|
455
|
+
len++;
|
456
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
457
|
+
if (len > MAX_HEADER_KEY_LENGTH) goto bad_request;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
done:
|
461
|
+
if (len == 0) return -1;
|
462
|
+
(*key) = BUFFER_STR_DOWNCASE(state, pos, len);
|
463
|
+
return 1;
|
464
|
+
bad_request:
|
465
|
+
RAISE_BAD_REQUEST("Invalid header key");
|
466
|
+
eof:
|
467
|
+
return 0;
|
468
|
+
}
|
469
|
+
|
470
|
+
static inline int parse_header_value(struct parser_state *state, VALUE *value) {
|
471
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
472
|
+
|
473
|
+
int pos = BUFFER_POS(state);
|
474
|
+
int len = 0;
|
475
|
+
|
476
|
+
while (1) {
|
477
|
+
switch (BUFFER_CUR(state)) {
|
478
|
+
case '\r':
|
479
|
+
CONSUME_CRLF(state);
|
480
|
+
goto done;
|
481
|
+
case '\n':
|
482
|
+
INC_BUFFER_POS(state);
|
483
|
+
goto done;
|
484
|
+
default:
|
485
|
+
INC_BUFFER_POS_UTF8(state, len);
|
486
|
+
if (len > MAX_HEADER_VALUE_LENGTH) goto bad_request;
|
487
|
+
}
|
488
|
+
}
|
489
|
+
done:
|
490
|
+
if (len < 1 || len > MAX_HEADER_VALUE_LENGTH) goto bad_request;
|
491
|
+
(*value) = BUFFER_STR(state, pos, len);
|
492
|
+
return 1;
|
493
|
+
bad_request:
|
494
|
+
RAISE_BAD_REQUEST("Invalid header value");
|
495
|
+
eof:
|
496
|
+
return 0;
|
497
|
+
}
|
498
|
+
|
499
|
+
static inline int parse_header(struct parser_state *state, VALUE headers) {
|
500
|
+
VALUE key, value;
|
501
|
+
|
502
|
+
switch (parse_header_key(state, &key)) {
|
503
|
+
case -1: return -1;
|
504
|
+
case 0: goto eof;
|
505
|
+
}
|
506
|
+
|
507
|
+
if (!parse_header_value(state, &value)) goto eof;
|
508
|
+
|
509
|
+
VALUE existing = rb_hash_aref(headers, key);
|
510
|
+
if (existing != Qnil) {
|
511
|
+
if (TYPE(existing) != T_ARRAY) {
|
512
|
+
existing = rb_ary_new3(2, existing, value);
|
513
|
+
rb_hash_aset(headers, key, existing);
|
514
|
+
}
|
515
|
+
else
|
516
|
+
rb_ary_push(existing, value);
|
517
|
+
}
|
518
|
+
else
|
519
|
+
rb_hash_aset(headers, key, value);
|
520
|
+
|
521
|
+
RB_GC_GUARD(existing);
|
522
|
+
RB_GC_GUARD(key);
|
523
|
+
RB_GC_GUARD(value);
|
524
|
+
return 1;
|
525
|
+
eof:
|
526
|
+
return 0;
|
527
|
+
}
|
528
|
+
|
529
|
+
VALUE Parser_parse_headers(VALUE self) {
|
530
|
+
struct parser_state state;
|
531
|
+
GetParser(self, state.parser);
|
532
|
+
state.parser->headers = rb_hash_new();
|
533
|
+
|
534
|
+
buffer_trim(&state);
|
535
|
+
int initial_pos = state.parser->pos;
|
536
|
+
INIT_PARSER_STATE(&state);
|
537
|
+
state.parser->current_request_rx = 0;
|
538
|
+
|
539
|
+
if (!parse_request_line(&state, state.parser->headers)) goto eof;
|
540
|
+
|
541
|
+
int header_count = 0;
|
542
|
+
while (1) {
|
543
|
+
if (header_count > MAX_HEADER_COUNT) RAISE_BAD_REQUEST("Too many headers");
|
544
|
+
switch (parse_header(&state, state.parser->headers)) {
|
545
|
+
case -1: goto done; // empty header => end of headers
|
546
|
+
case 0: goto eof;
|
547
|
+
}
|
548
|
+
header_count++;
|
549
|
+
}
|
550
|
+
eof:
|
551
|
+
state.parser->headers = Qnil;
|
552
|
+
done:
|
553
|
+
state.parser->body_read_mode = BODY_READ_MODE_UNKNOWN;
|
554
|
+
int read_bytes = BUFFER_POS(&state) - initial_pos;
|
555
|
+
|
556
|
+
state.parser->current_request_rx += read_bytes;
|
557
|
+
if (state.parser->headers != Qnil)
|
558
|
+
rb_hash_aset(state.parser->headers, STR_pseudo_rx, INT2NUM(read_bytes));
|
559
|
+
return state.parser->headers;
|
560
|
+
}
|
561
|
+
|
562
|
+
////////////////////////////////////////////////////////////////////////////////
|
563
|
+
|
564
|
+
static inline int str_to_int(VALUE value, const char *error_msg) {
|
565
|
+
char *ptr = RSTRING_PTR(value);
|
566
|
+
int len = RSTRING_LEN(value);
|
567
|
+
int int_value = 0;
|
568
|
+
|
569
|
+
while (len) {
|
570
|
+
char c = *ptr;
|
571
|
+
if ((c >= '0') && (c <= '9'))
|
572
|
+
int_value = int_value * 10 + (c - '0');
|
573
|
+
else
|
574
|
+
RAISE_BAD_REQUEST(error_msg);
|
575
|
+
len--;
|
576
|
+
ptr++;
|
577
|
+
}
|
578
|
+
|
579
|
+
return int_value;
|
580
|
+
}
|
581
|
+
|
582
|
+
VALUE read_body_with_content_length(Parser_t *parser, int read_entire_body, int buffered_only) {
|
583
|
+
if (parser->body_left <= 0) return Qnil;
|
584
|
+
|
585
|
+
VALUE body = Qnil;
|
586
|
+
|
587
|
+
int len = RSTRING_LEN(parser->buffer);
|
588
|
+
int pos = parser->pos;
|
589
|
+
|
590
|
+
if (pos < len) {
|
591
|
+
int available = len - pos;
|
592
|
+
if (available > parser->body_left) available = parser->body_left;
|
593
|
+
body = rb_str_new(RSTRING_PTR(parser->buffer) + pos, available);
|
594
|
+
parser->pos += available;
|
595
|
+
parser->current_request_rx += available;
|
596
|
+
parser->body_left -= available;
|
597
|
+
if (!parser->body_left) parser->request_completed = 1;
|
598
|
+
}
|
599
|
+
else {
|
600
|
+
body = Qnil;
|
601
|
+
len = 0;
|
602
|
+
}
|
603
|
+
if (buffered_only) return body;
|
604
|
+
|
605
|
+
while (parser->body_left) {
|
606
|
+
int maxlen = parser->body_left <= MAX_BODY_READ_LENGTH ? parser->body_left : MAX_BODY_READ_LENGTH;
|
607
|
+
VALUE tmp_buf = parser_io_read(parser, INT2NUM(maxlen), Qnil, NUM_buffer_start);
|
608
|
+
if (tmp_buf == Qnil) goto eof;
|
609
|
+
if (body != Qnil)
|
610
|
+
rb_str_append(body, tmp_buf);
|
611
|
+
else
|
612
|
+
body = tmp_buf;
|
613
|
+
int read_bytes = RSTRING_LEN(tmp_buf);
|
614
|
+
parser->current_request_rx += read_bytes;
|
615
|
+
parser->body_left -= read_bytes;
|
616
|
+
if (!parser->body_left) parser->request_completed = 1;
|
617
|
+
RB_GC_GUARD(tmp_buf);
|
618
|
+
if (!read_entire_body) goto done;
|
619
|
+
}
|
620
|
+
done:
|
621
|
+
rb_hash_aset(parser->headers, STR_pseudo_rx, INT2NUM(parser->current_request_rx));
|
622
|
+
RB_GC_GUARD(body);
|
623
|
+
return body;
|
624
|
+
eof:
|
625
|
+
RAISE_BAD_REQUEST("Incomplete body");
|
626
|
+
}
|
627
|
+
|
628
|
+
int chunked_encoding_p(VALUE transfer_encoding) {
|
629
|
+
if (transfer_encoding == Qnil) return 0;
|
630
|
+
return rb_funcall(str_downcase(transfer_encoding), ID_eq, 1, STR_chunked) == Qtrue;
|
631
|
+
}
|
632
|
+
|
633
|
+
int parse_chunk_size(struct parser_state *state, int *chunk_size) {
|
634
|
+
int len = 0;
|
635
|
+
int value = 0;
|
636
|
+
int initial_pos = BUFFER_POS(state);
|
637
|
+
|
638
|
+
while (1) {
|
639
|
+
char c = BUFFER_CUR(state);
|
640
|
+
if ((c >= '0') && (c <= '9')) value = (value << 4) + (c - '0');
|
641
|
+
else if ((c >= 'a') && (c <= 'f')) value = (value << 4) + (c - 'a' + 10);
|
642
|
+
else if ((c >= 'A') && (c <= 'F')) value = (value << 4) + (c - 'A' + 10);
|
643
|
+
else switch (c) {
|
644
|
+
case '\r':
|
645
|
+
CONSUME_CRLF_NO_FILL(state);
|
646
|
+
goto done;
|
647
|
+
case '\n':
|
648
|
+
INC_BUFFER_POS_NO_FILL(state);
|
649
|
+
goto done;
|
650
|
+
default:
|
651
|
+
goto bad_request;
|
652
|
+
}
|
653
|
+
INC_BUFFER_POS(state);
|
654
|
+
len++;
|
655
|
+
if (len >= MAX_CHUNKED_ENCODING_CHUNK_SIZE_LENGTH) goto bad_request;
|
656
|
+
}
|
657
|
+
done:
|
658
|
+
if (len == 0) goto bad_request;
|
659
|
+
(*chunk_size) = value;
|
660
|
+
state->parser->current_request_rx += BUFFER_POS(state) - initial_pos;
|
661
|
+
return 1;
|
662
|
+
bad_request:
|
663
|
+
RAISE_BAD_REQUEST("Invalid chunk size");
|
664
|
+
eof:
|
665
|
+
return 0;
|
666
|
+
}
|
667
|
+
|
668
|
+
int read_body_chunk_with_chunked_encoding(struct parser_state *state, VALUE *body, int chunk_size, int buffered_only) {
|
669
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
670
|
+
int pos = state->parser->pos;
|
671
|
+
int left = chunk_size;
|
672
|
+
|
673
|
+
if (pos < len) {
|
674
|
+
int available = len - pos;
|
675
|
+
if (available > left) available = left;
|
676
|
+
if (*body != Qnil)
|
677
|
+
str_append_from_buffer(*body, RSTRING_PTR(state->parser->buffer) + pos, available);
|
678
|
+
else
|
679
|
+
*body = rb_str_new(RSTRING_PTR(state->parser->buffer) + pos, available);
|
680
|
+
state->parser->pos += available;
|
681
|
+
state->parser->current_request_rx += available;
|
682
|
+
left -= available;
|
683
|
+
}
|
684
|
+
if (buffered_only) return 1;
|
685
|
+
|
686
|
+
while (left) {
|
687
|
+
int maxlen = left <= MAX_BODY_READ_LENGTH ? left : MAX_BODY_READ_LENGTH;
|
688
|
+
|
689
|
+
VALUE tmp_buf = parser_io_read(state->parser, INT2NUM(maxlen), Qnil, NUM_buffer_start);
|
690
|
+
if (tmp_buf == Qnil) goto eof;
|
691
|
+
if (*body != Qnil)
|
692
|
+
rb_str_append(*body, tmp_buf);
|
693
|
+
else
|
694
|
+
*body = tmp_buf;
|
695
|
+
int read_bytes = RSTRING_LEN(tmp_buf);
|
696
|
+
state->parser->current_request_rx += read_bytes;
|
697
|
+
left -= read_bytes;
|
698
|
+
RB_GC_GUARD(tmp_buf);
|
699
|
+
}
|
700
|
+
return 1;
|
701
|
+
eof:
|
702
|
+
return 0;
|
703
|
+
}
|
704
|
+
|
705
|
+
static inline int parse_chunk_postfix(struct parser_state *state) {
|
706
|
+
int initial_pos = BUFFER_POS(state);
|
707
|
+
if (initial_pos == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state);
|
708
|
+
switch (BUFFER_CUR(state)) {
|
709
|
+
case '\r':
|
710
|
+
CONSUME_CRLF_NO_FILL(state);
|
711
|
+
goto done;
|
712
|
+
case '\n':
|
713
|
+
INC_BUFFER_POS_NO_FILL(state);
|
714
|
+
goto done;
|
715
|
+
default:
|
716
|
+
goto bad_request;
|
717
|
+
}
|
718
|
+
done:
|
719
|
+
state->parser->current_request_rx += BUFFER_POS(state) - initial_pos;
|
720
|
+
return 1;
|
721
|
+
bad_request:
|
722
|
+
RAISE_BAD_REQUEST("Invalid protocol");
|
723
|
+
eof:
|
724
|
+
return 0;
|
725
|
+
}
|
726
|
+
|
727
|
+
VALUE read_body_with_chunked_encoding(Parser_t *parser, int read_entire_body, int buffered_only) {
|
728
|
+
struct parser_state state;
|
729
|
+
state.parser = parser;
|
730
|
+
buffer_trim(&state);
|
731
|
+
INIT_PARSER_STATE(&state);
|
732
|
+
VALUE body = Qnil;
|
733
|
+
|
734
|
+
while (1) {
|
735
|
+
int chunk_size = 0;
|
736
|
+
if (BUFFER_POS(&state) == BUFFER_LEN(&state)) FILL_BUFFER_OR_GOTO_EOF(&state);
|
737
|
+
if (!parse_chunk_size(&state, &chunk_size)) goto bad_request;
|
738
|
+
|
739
|
+
if (chunk_size) {
|
740
|
+
if (!read_body_chunk_with_chunked_encoding(&state, &body, chunk_size, buffered_only)) goto bad_request;
|
741
|
+
}
|
742
|
+
else parser->request_completed = 1;
|
743
|
+
|
744
|
+
if (!parse_chunk_postfix(&state)) goto bad_request;
|
745
|
+
if (!chunk_size || !read_entire_body) goto done;
|
746
|
+
}
|
747
|
+
bad_request:
|
748
|
+
RAISE_BAD_REQUEST("Malformed request body");
|
749
|
+
eof:
|
750
|
+
RAISE_BAD_REQUEST("Incomplete request body");
|
751
|
+
done:
|
752
|
+
rb_hash_aset(parser->headers, STR_pseudo_rx, INT2NUM(state.parser->current_request_rx));
|
753
|
+
RB_GC_GUARD(body);
|
754
|
+
return body;
|
755
|
+
}
|
756
|
+
|
757
|
+
static inline void detect_body_read_mode(Parser_t *parser) {
|
758
|
+
VALUE content_length = rb_hash_aref(parser->headers, STR_content_length);
|
759
|
+
if (content_length != Qnil) {
|
760
|
+
int int_content_length = str_to_int(content_length, "Invalid content length");
|
761
|
+
if (int_content_length < 0) RAISE_BAD_REQUEST("Invalid body content length");
|
762
|
+
parser->body_read_mode = parser->body_left = int_content_length;
|
763
|
+
parser->request_completed = 0;
|
764
|
+
return;
|
765
|
+
}
|
766
|
+
|
767
|
+
VALUE transfer_encoding = rb_hash_aref(parser->headers, STR_transfer_encoding);
|
768
|
+
if (chunked_encoding_p(transfer_encoding)) {
|
769
|
+
parser->body_read_mode = BODY_READ_MODE_CHUNKED;
|
770
|
+
parser->request_completed = 0;
|
771
|
+
return;
|
772
|
+
}
|
773
|
+
parser->request_completed = 1;
|
774
|
+
|
775
|
+
}
|
776
|
+
|
777
|
+
static inline VALUE read_body(VALUE self, int read_entire_body, int buffered_only) {
|
778
|
+
Parser_t *parser;
|
779
|
+
GetParser(self, parser);
|
780
|
+
|
781
|
+
if (parser->body_read_mode == BODY_READ_MODE_UNKNOWN)
|
782
|
+
detect_body_read_mode(parser);
|
783
|
+
|
784
|
+
if (parser->body_read_mode == BODY_READ_MODE_CHUNKED)
|
785
|
+
return read_body_with_chunked_encoding(parser, read_entire_body, buffered_only);
|
786
|
+
return read_body_with_content_length(parser, read_entire_body, buffered_only);
|
787
|
+
}
|
788
|
+
|
789
|
+
VALUE Parser_read_body(VALUE self) {
|
790
|
+
return read_body(self, 1, 0);
|
791
|
+
}
|
792
|
+
|
793
|
+
VALUE Parser_read_body_chunk(VALUE self, VALUE buffered_only) {
|
794
|
+
return read_body(self, 0, buffered_only == Qtrue);
|
795
|
+
}
|
796
|
+
|
797
|
+
VALUE Parser_complete_p(VALUE self) {
|
798
|
+
Parser_t *parser;
|
799
|
+
GetParser(self, parser);
|
800
|
+
|
801
|
+
if (parser->body_read_mode == BODY_READ_MODE_UNKNOWN)
|
802
|
+
detect_body_read_mode(parser);
|
803
|
+
|
804
|
+
return parser->request_completed ? Qtrue : Qfalse;
|
805
|
+
}
|
806
|
+
|
807
|
+
void Init_H1P() {
|
808
|
+
VALUE mH1P;
|
809
|
+
VALUE cParser;
|
810
|
+
|
811
|
+
mH1P = rb_define_module("H1P");
|
812
|
+
rb_gc_register_mark_object(mH1P);
|
813
|
+
cParser = rb_define_class_under(mH1P, "Parser", rb_cObject);
|
814
|
+
rb_define_alloc_func(cParser, Parser_allocate);
|
815
|
+
|
816
|
+
cError = rb_define_class_under(mH1P, "Error", rb_eRuntimeError);
|
817
|
+
rb_gc_register_mark_object(cError);
|
818
|
+
|
819
|
+
// backend methods
|
820
|
+
rb_define_method(cParser, "initialize", Parser_initialize, 1);
|
821
|
+
rb_define_method(cParser, "parse_headers", Parser_parse_headers, 0);
|
822
|
+
rb_define_method(cParser, "read_body", Parser_read_body, 0);
|
823
|
+
rb_define_method(cParser, "read_body_chunk", Parser_read_body_chunk, 1);
|
824
|
+
rb_define_method(cParser, "complete?", Parser_complete_p, 0);
|
825
|
+
|
826
|
+
ID_arity = rb_intern("arity");
|
827
|
+
ID_backend_read = rb_intern("backend_read");
|
828
|
+
ID_backend_recv = rb_intern("backend_recv");
|
829
|
+
ID_call = rb_intern("call");
|
830
|
+
ID_downcase = rb_intern("downcase");
|
831
|
+
ID_eof_p = rb_intern("eof?");
|
832
|
+
ID_eq = rb_intern("==");
|
833
|
+
ID_parser_read_method = rb_intern("__parser_read_method__");
|
834
|
+
ID_read = rb_intern("read");
|
835
|
+
ID_readpartial = rb_intern("readpartial");
|
836
|
+
ID_to_i = rb_intern("to_i");
|
837
|
+
|
838
|
+
NUM_max_headers_read_length = INT2NUM(MAX_HEADERS_READ_LENGTH);
|
839
|
+
NUM_buffer_start = INT2NUM(0);
|
840
|
+
NUM_buffer_end = INT2NUM(-1);
|
841
|
+
|
842
|
+
GLOBAL_STR(STR_pseudo_method, ":method");
|
843
|
+
GLOBAL_STR(STR_pseudo_path, ":path");
|
844
|
+
GLOBAL_STR(STR_pseudo_protocol, ":protocol");
|
845
|
+
GLOBAL_STR(STR_pseudo_rx, ":rx");
|
846
|
+
|
847
|
+
GLOBAL_STR(STR_chunked, "chunked");
|
848
|
+
GLOBAL_STR(STR_content_length, "content-length");
|
849
|
+
GLOBAL_STR(STR_transfer_encoding, "transfer-encoding");
|
850
|
+
|
851
|
+
SYM_backend_read = ID2SYM(ID_backend_read);
|
852
|
+
SYM_backend_recv = ID2SYM(ID_backend_recv);
|
853
|
+
SYM_stock_readpartial = ID2SYM(rb_intern("stock_readpartial"));
|
854
|
+
|
855
|
+
rb_global_variable(&mH1P);
|
856
|
+
}
|
857
|
+
|
858
|
+
void Init_h1p_ext() {
|
859
|
+
Init_H1P();
|
860
|
+
}
|