h1p 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +31 -0
- data/.gitignore +57 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +33 -0
- data/LICENSE +21 -0
- data/README.md +5 -0
- data/Rakefile +16 -0
- data/TODO.md +106 -0
- data/benchmarks/bm_http1_parser.rb +85 -0
- data/examples/http_server.rb +41 -0
- data/ext/h1p/extconf.rb +13 -0
- data/ext/h1p/h1p.c +860 -0
- data/ext/h1p/h1p.h +18 -0
- data/ext/h1p/limits.rb +10 -0
- data/h1p.gemspec +25 -0
- data/lib/h1p.rb +31 -0
- data/lib/h1p/version.rb +5 -0
- data/test/helper.rb +15 -0
- data/test/test_h1p.rb +584 -0
- metadata +125 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4fb3573e65df46350986981759a454c8bc2c4166901a46dd827fdc17a27b08b5
|
4
|
+
data.tar.gz: afc10ead82e3286f61fa2bee1739499f275b75a386ed3e3401988805470c9b3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 72ba304cda888fcfb1c8afaee1a03b94ddb15fde0110c303886e4036125fb5b464d214ebb731ea91dac792070c9cc3d5eccb789f667c8a590fae01afb88d5665
|
7
|
+
data.tar.gz: 31cd6c6d7bb6695d338596b56be808cd1fed99cc76f11ee5f38fbdf7ff5d1206960a7134932ac82e9ca9c19116a20ee99db8f2a002fb76c1997134ac4d432dae
|
@@ -0,0 +1,31 @@
|
|
1
|
+
name: Tests
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
strategy:
|
8
|
+
fail-fast: false
|
9
|
+
matrix:
|
10
|
+
os: [ubuntu-latest]
|
11
|
+
ruby: [2.6, 2.7, 3.0]
|
12
|
+
|
13
|
+
name: >-
|
14
|
+
${{matrix.os}}, ${{matrix.ruby}}
|
15
|
+
|
16
|
+
runs-on: ${{matrix.os}}
|
17
|
+
steps:
|
18
|
+
- uses: actions/checkout@v1
|
19
|
+
- uses: actions/setup-ruby@v1
|
20
|
+
with:
|
21
|
+
ruby-version: ${{matrix.ruby}}
|
22
|
+
- name: Install dependencies
|
23
|
+
run: |
|
24
|
+
gem install bundler
|
25
|
+
bundle install
|
26
|
+
- name: Show Linux kernel version
|
27
|
+
run: uname -r
|
28
|
+
- name: Compile C-extension
|
29
|
+
run: bundle exec rake compile
|
30
|
+
- name: Run tests
|
31
|
+
run: bundle exec rake test
|
data/.gitignore
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
# Ignore Byebug command history file.
|
17
|
+
.byebug_history
|
18
|
+
|
19
|
+
## Specific to RubyMotion:
|
20
|
+
.dat*
|
21
|
+
.repl_history
|
22
|
+
build/
|
23
|
+
*.bridgesupport
|
24
|
+
build-iPhoneOS/
|
25
|
+
build-iPhoneSimulator/
|
26
|
+
|
27
|
+
## Specific to RubyMotion (use of CocoaPods):
|
28
|
+
#
|
29
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
30
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
31
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
32
|
+
#
|
33
|
+
# vendor/Pods/
|
34
|
+
|
35
|
+
## Documentation cache and generated files:
|
36
|
+
/.yardoc/
|
37
|
+
/_yardoc/
|
38
|
+
/doc/
|
39
|
+
/rdoc/
|
40
|
+
|
41
|
+
## Environment normalization:
|
42
|
+
/.bundle/
|
43
|
+
/vendor/bundle
|
44
|
+
/lib/bundler/man/
|
45
|
+
|
46
|
+
# for a library or gem, you might want to ignore these files since the code is
|
47
|
+
# intended to run in multiple environments; otherwise, check them in:
|
48
|
+
# Gemfile.lock
|
49
|
+
# .ruby-version
|
50
|
+
# .ruby-gemset
|
51
|
+
|
52
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
53
|
+
.rvmrc
|
54
|
+
|
55
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
56
|
+
# .rubocop-https?--*
|
57
|
+
lib/*.so
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
h1p (0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ansi (1.5.0)
|
10
|
+
builder (3.2.4)
|
11
|
+
minitest (5.11.3)
|
12
|
+
minitest-reporters (1.4.3)
|
13
|
+
ansi
|
14
|
+
builder
|
15
|
+
minitest (>= 5.0)
|
16
|
+
ruby-progressbar
|
17
|
+
rake (12.3.3)
|
18
|
+
rake-compiler (1.1.1)
|
19
|
+
rake
|
20
|
+
ruby-progressbar (1.11.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
h1p!
|
27
|
+
minitest (~> 5.11.3)
|
28
|
+
minitest-reporters (~> 1.4.2)
|
29
|
+
rake (~> 12.3.3)
|
30
|
+
rake-compiler (= 1.1.1)
|
31
|
+
|
32
|
+
BUNDLED WITH
|
33
|
+
2.1.4
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2021 Digital Fabric
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rake/clean"
|
5
|
+
|
6
|
+
require "rake/extensiontask"
|
7
|
+
Rake::ExtensionTask.new("h1p_ext") do |ext|
|
8
|
+
ext.ext_dir = "ext/h1p"
|
9
|
+
end
|
10
|
+
|
11
|
+
task :recompile => [:clean, :compile]
|
12
|
+
task :default => [:compile, :test]
|
13
|
+
|
14
|
+
task :test do
|
15
|
+
exec 'ruby test/test_h1p.rb'
|
16
|
+
end
|
data/TODO.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
## Add an API for reading a request body chunk into an IO (pipe)
|
2
|
+
|
3
|
+
```ruby
|
4
|
+
# currently
|
5
|
+
chunk = req.next_chunk
|
6
|
+
# or
|
7
|
+
req.each_chunk { |c| do_something(c) }
|
8
|
+
|
9
|
+
# what we'd like to do
|
10
|
+
r, w = IO.pipe
|
11
|
+
len = req.splice_chunk(w)
|
12
|
+
sock << "Here comes a chunk of #{len} bytes\n"
|
13
|
+
sock.splice(r, len)
|
14
|
+
|
15
|
+
# or:
|
16
|
+
r, w = IO.pipe
|
17
|
+
req.splice_each_chunk(w) do |len|
|
18
|
+
sock << "Here comes a chunk of #{len} bytes\n"
|
19
|
+
sock.splice(r, len)
|
20
|
+
end
|
21
|
+
```
|
22
|
+
|
23
|
+
# HTTP/1.1 parser
|
24
|
+
|
25
|
+
- httparser.rb is not actively updated
|
26
|
+
- the httparser.rb C parser code comes originally from https://github.com/nodejs/llhttp
|
27
|
+
- there's a Ruby gem https://github.com/metabahn/llhttp, but its API is too low-level
|
28
|
+
(lots of callbacks, headers need to be retained across callbacks)
|
29
|
+
- the basic idea is to import the C-code, then build a parser object with the following
|
30
|
+
callbacks:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
on_headers_complete(headers)
|
34
|
+
on_body_chunk(chunk)
|
35
|
+
on_message_complete
|
36
|
+
```
|
37
|
+
|
38
|
+
- The llhttp gem's C-code is here: https://github.com/metabahn/llhttp/tree/main/mri
|
39
|
+
|
40
|
+
- Actually, if you do a C extension, instead of a callback-based API, we can
|
41
|
+
design a blocking API:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
parser = Tipi::HTTP1::Parser.new
|
45
|
+
parser.each_request(socket) do |headers|
|
46
|
+
request = Request.new(normalize_headers(headers))
|
47
|
+
handle_request(request)
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
# What about HTTP/2?
|
52
|
+
|
53
|
+
It would be a nice exercise in converting a callback-based API to a blocking
|
54
|
+
one:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
parser = Tipi::HTTP2::Parser.new(socket)
|
58
|
+
parser.each_stream(socket) do |stream|
|
59
|
+
spin { handle_stream(stream) }
|
60
|
+
end
|
61
|
+
```
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
# DF
|
66
|
+
|
67
|
+
- Add attack protection for IP-address HTTP host:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
IPV4_REGEXP = /^\d+\.\d+\.\d+\.\d+$/.freeze
|
71
|
+
|
72
|
+
def is_attack_request?(req)
|
73
|
+
return true if req.host =~ IPV4_REGEXP && req.query[:q] != 'ping'
|
74
|
+
end
|
75
|
+
```
|
76
|
+
|
77
|
+
- Add attack route to Qeweney routing API
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
# Roadmap
|
82
|
+
|
83
|
+
- Update README (get rid of non-http stuff)
|
84
|
+
- Improve Rack spec compliance, add tests
|
85
|
+
- Homogenize HTTP 1 and HTTP 2 headers - downcase symbols
|
86
|
+
|
87
|
+
- Use `http-2-next` instead of `http-2` for http/2
|
88
|
+
- https://gitlab.com/honeyryderchuck/http-2-next
|
89
|
+
- Open an issue there, ask what's the difference between the two gems?
|
90
|
+
|
91
|
+
## 0.38
|
92
|
+
|
93
|
+
- Add more poly CLI commands and options:
|
94
|
+
|
95
|
+
- serve static files from given directory
|
96
|
+
- serve from rack up file
|
97
|
+
- serve both http and https
|
98
|
+
- use custom certificate files for SSL
|
99
|
+
- set host address to bind to
|
100
|
+
- set port to bind to
|
101
|
+
- set forking process count
|
102
|
+
|
103
|
+
## 0.39 Working Sinatra application
|
104
|
+
|
105
|
+
- app with database access (postgresql)
|
106
|
+
- benchmarks!
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
HTTP_REQUEST = "GET /foo HTTP/1.1\r\nHost: example.com\r\nAccept: */*\r\n\r\n"
|
6
|
+
|
7
|
+
def measure_time_and_allocs
|
8
|
+
4.times { GC.start }
|
9
|
+
GC.disable
|
10
|
+
|
11
|
+
t0 = Time.now
|
12
|
+
a0 = object_count
|
13
|
+
yield
|
14
|
+
t1 = Time.now
|
15
|
+
a1 = object_count
|
16
|
+
[t1 - t0, a1 - a0]
|
17
|
+
ensure
|
18
|
+
GC.enable
|
19
|
+
end
|
20
|
+
|
21
|
+
def object_count
|
22
|
+
count = ObjectSpace.count_objects
|
23
|
+
count[:TOTAL] - count[:FREE]
|
24
|
+
end
|
25
|
+
|
26
|
+
def benchmark_other_http1_parser(iterations)
|
27
|
+
STDOUT << "http_parser.rb: "
|
28
|
+
require 'http_parser.rb'
|
29
|
+
|
30
|
+
i, o = IO.pipe
|
31
|
+
parser = Http::Parser.new
|
32
|
+
done = false
|
33
|
+
headers = nil
|
34
|
+
parser.on_headers_complete = proc do |h|
|
35
|
+
headers = h
|
36
|
+
headers[':method'] = parser.http_method
|
37
|
+
headers[':path'] = parser.request_url
|
38
|
+
end
|
39
|
+
parser.on_message_complete = proc { done = true }
|
40
|
+
|
41
|
+
elapsed, allocated = measure_time_and_allocs do
|
42
|
+
iterations.times do
|
43
|
+
o << HTTP_REQUEST
|
44
|
+
done = false
|
45
|
+
while !done
|
46
|
+
msg = i.readpartial(4096)
|
47
|
+
parser << msg
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
puts(format('elapsed: %f, allocated: %d (%f/req), rate: %f ips', elapsed, allocated, allocated.to_f / iterations, iterations / elapsed))
|
52
|
+
end
|
53
|
+
|
54
|
+
def benchmark_tipi_http1_parser(iterations)
|
55
|
+
STDOUT << "tipi parser: "
|
56
|
+
require_relative '../lib/tipi_ext'
|
57
|
+
i, o = IO.pipe
|
58
|
+
reader = proc { |len| i.readpartial(len) }
|
59
|
+
parser = Tipi::HTTP1Parser.new(reader)
|
60
|
+
|
61
|
+
elapsed, allocated = measure_time_and_allocs do
|
62
|
+
iterations.times do
|
63
|
+
o << HTTP_REQUEST
|
64
|
+
headers = parser.parse_headers
|
65
|
+
end
|
66
|
+
end
|
67
|
+
puts(format('elapsed: %f, allocated: %d (%f/req), rate: %f ips', elapsed, allocated, allocated.to_f / iterations, iterations / elapsed))
|
68
|
+
end
|
69
|
+
|
70
|
+
def fork_benchmark(method, iterations)
|
71
|
+
pid = fork do
|
72
|
+
send(method, iterations)
|
73
|
+
rescue Exception => e
|
74
|
+
p e
|
75
|
+
p e.backtrace
|
76
|
+
exit!
|
77
|
+
end
|
78
|
+
Process.wait(pid)
|
79
|
+
end
|
80
|
+
|
81
|
+
x = 500000
|
82
|
+
# fork_benchmark(:benchmark_other_http1_parser, x)
|
83
|
+
# fork_benchmark(:benchmark_tipi_http1_parser, x)
|
84
|
+
|
85
|
+
benchmark_tipi_http1_parser(x)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'tipi'
|
5
|
+
|
6
|
+
opts = {
|
7
|
+
reuse_addr: true,
|
8
|
+
dont_linger: true
|
9
|
+
}
|
10
|
+
|
11
|
+
puts "pid: #{Process.pid}"
|
12
|
+
puts 'Listening on port 10080...'
|
13
|
+
|
14
|
+
# GC.disable
|
15
|
+
# Thread.current.backend.idle_gc_period = 60
|
16
|
+
|
17
|
+
spin_loop(interval: 10) { p Thread.backend.stats }
|
18
|
+
|
19
|
+
spin_loop(interval: 10) do
|
20
|
+
GC.compact
|
21
|
+
end
|
22
|
+
|
23
|
+
spin do
|
24
|
+
Tipi.serve('0.0.0.0', 10080, opts) do |req|
|
25
|
+
if req.path == '/stream'
|
26
|
+
req.send_headers('Foo' => 'Bar')
|
27
|
+
sleep 1
|
28
|
+
req.send_chunk("foo\n")
|
29
|
+
sleep 1
|
30
|
+
req.send_chunk("bar\n")
|
31
|
+
req.finish
|
32
|
+
elsif req.path == '/upload'
|
33
|
+
body = req.read
|
34
|
+
req.respond("Body: #{body.inspect} (#{body.bytesize} bytes)")
|
35
|
+
else
|
36
|
+
req.respond("Hello world!\n")
|
37
|
+
end
|
38
|
+
# p req.transfer_counts
|
39
|
+
end
|
40
|
+
p 'done...'
|
41
|
+
end.await
|
data/ext/h1p/extconf.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mkmf'
|
5
|
+
|
6
|
+
$CFLAGS << " -Wno-format-security"
|
7
|
+
CONFIG['optflags'] << ' -fno-strict-aliasing' unless RUBY_PLATFORM =~ /mswin/
|
8
|
+
|
9
|
+
require_relative './limits'
|
10
|
+
H1P_LIMITS.each { |k, v| $defs << "-D#{k.upcase}=#{v}" }
|
11
|
+
|
12
|
+
dir_config 'h1p_ext'
|
13
|
+
create_makefile 'h1p_ext'
|
data/ext/h1p/h1p.c
ADDED
@@ -0,0 +1,860 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "h1p.h"
|
3
|
+
|
4
|
+
// Security-related limits are defined in limits.rb and injected as
|
5
|
+
// defines in extconf.rb
|
6
|
+
|
7
|
+
#define INITIAL_BUFFER_SIZE 4096
|
8
|
+
#define BUFFER_TRIM_MIN_LEN 4096
|
9
|
+
#define BUFFER_TRIM_MIN_POS 2048
|
10
|
+
#define MAX_HEADERS_READ_LENGTH 4096
|
11
|
+
#define MAX_BODY_READ_LENGTH (1 << 20) // 1MB
|
12
|
+
|
13
|
+
#define BODY_READ_MODE_UNKNOWN -2
|
14
|
+
#define BODY_READ_MODE_CHUNKED -1
|
15
|
+
|
16
|
+
ID ID_arity;
|
17
|
+
ID ID_backend_read;
|
18
|
+
ID ID_backend_recv;
|
19
|
+
ID ID_call;
|
20
|
+
ID ID_downcase;
|
21
|
+
ID ID_eof_p;
|
22
|
+
ID ID_eq;
|
23
|
+
ID ID_parser_read_method;
|
24
|
+
ID ID_read;
|
25
|
+
ID ID_readpartial;
|
26
|
+
ID ID_to_i;
|
27
|
+
|
28
|
+
static VALUE mPolyphony = Qnil;
|
29
|
+
static VALUE cError;
|
30
|
+
|
31
|
+
VALUE NUM_max_headers_read_length;
|
32
|
+
VALUE NUM_buffer_start;
|
33
|
+
VALUE NUM_buffer_end;
|
34
|
+
|
35
|
+
VALUE STR_pseudo_method;
|
36
|
+
VALUE STR_pseudo_path;
|
37
|
+
VALUE STR_pseudo_protocol;
|
38
|
+
VALUE STR_pseudo_rx;
|
39
|
+
|
40
|
+
VALUE STR_chunked;
|
41
|
+
VALUE STR_content_length;
|
42
|
+
VALUE STR_transfer_encoding;
|
43
|
+
|
44
|
+
VALUE SYM_backend_read;
|
45
|
+
VALUE SYM_backend_recv;
|
46
|
+
VALUE SYM_stock_readpartial;
|
47
|
+
|
48
|
+
enum read_method {
|
49
|
+
method_readpartial, // receiver.readpartial(len, buf, pos, raise_on_eof: false) (Polyphony-specific)
|
50
|
+
method_backend_read, // Polyphony.backend_read (Polyphony-specific)
|
51
|
+
method_backend_recv, // Polyphony.backend_recv (Polyphony-specific)
|
52
|
+
method_call, // receiver.call(len) (Universal)
|
53
|
+
method_stock_readpartial // receiver.readpartial(len)
|
54
|
+
};
|
55
|
+
|
56
|
+
typedef struct parser {
|
57
|
+
VALUE io;
|
58
|
+
VALUE buffer;
|
59
|
+
VALUE headers;
|
60
|
+
int pos;
|
61
|
+
int current_request_rx;
|
62
|
+
|
63
|
+
enum read_method read_method;
|
64
|
+
int body_read_mode;
|
65
|
+
int body_left;
|
66
|
+
int request_completed;
|
67
|
+
} Parser_t;
|
68
|
+
|
69
|
+
VALUE cParser = Qnil;
|
70
|
+
|
71
|
+
static void Parser_mark(void *ptr) {
|
72
|
+
Parser_t *parser = ptr;
|
73
|
+
rb_gc_mark(parser->io);
|
74
|
+
rb_gc_mark(parser->buffer);
|
75
|
+
rb_gc_mark(parser->headers);
|
76
|
+
}
|
77
|
+
|
78
|
+
static void Parser_free(void *ptr) {
|
79
|
+
xfree(ptr);
|
80
|
+
}
|
81
|
+
|
82
|
+
static size_t Parser_size(const void *ptr) {
|
83
|
+
return sizeof(Parser_t);
|
84
|
+
}
|
85
|
+
|
86
|
+
static const rb_data_type_t Parser_type = {
|
87
|
+
"Parser",
|
88
|
+
{Parser_mark, Parser_free, Parser_size,},
|
89
|
+
0, 0, 0
|
90
|
+
};
|
91
|
+
|
92
|
+
static VALUE Parser_allocate(VALUE klass) {
|
93
|
+
Parser_t *parser;
|
94
|
+
|
95
|
+
parser = ALLOC(Parser_t);
|
96
|
+
return TypedData_Wrap_Struct(klass, &Parser_type, parser);
|
97
|
+
}
|
98
|
+
|
99
|
+
#define GetParser(obj, parser) \
|
100
|
+
TypedData_Get_Struct((obj), Parser_t, &Parser_type, (parser))
|
101
|
+
|
102
|
+
static inline void get_polyphony() {
|
103
|
+
if (mPolyphony != Qnil) return;
|
104
|
+
|
105
|
+
mPolyphony = rb_const_get(rb_cObject, rb_intern("Polyphony"));
|
106
|
+
rb_gc_register_mark_object(mPolyphony);
|
107
|
+
}
|
108
|
+
|
109
|
+
enum read_method detect_read_method(VALUE io) {
|
110
|
+
if (rb_respond_to(io, ID_parser_read_method)) {
|
111
|
+
VALUE method = rb_funcall(io, ID_parser_read_method, 0);
|
112
|
+
if (method == SYM_stock_readpartial) return method_stock_readpartial;
|
113
|
+
|
114
|
+
get_polyphony();
|
115
|
+
if (method == SYM_backend_read) return method_backend_read;
|
116
|
+
if (method == SYM_backend_recv) return method_backend_recv;
|
117
|
+
|
118
|
+
return method_readpartial;
|
119
|
+
}
|
120
|
+
else if (rb_respond_to(io, ID_call)) {
|
121
|
+
return method_call;
|
122
|
+
}
|
123
|
+
else
|
124
|
+
rb_raise(rb_eRuntimeError, "Provided reader should be a callable or respond to #__parser_read_method__");
|
125
|
+
}
|
126
|
+
|
127
|
+
VALUE Parser_initialize(VALUE self, VALUE io) {
|
128
|
+
Parser_t *parser;
|
129
|
+
GetParser(self, parser);
|
130
|
+
|
131
|
+
parser->io = io;
|
132
|
+
parser->buffer = rb_str_new_literal("");
|
133
|
+
parser->headers = Qnil;
|
134
|
+
parser->pos = 0;
|
135
|
+
|
136
|
+
// pre-allocate the buffer
|
137
|
+
rb_str_modify_expand(parser->buffer, INITIAL_BUFFER_SIZE);
|
138
|
+
|
139
|
+
parser->read_method = detect_read_method(io);
|
140
|
+
parser->body_read_mode = BODY_READ_MODE_UNKNOWN;
|
141
|
+
parser->body_left = 0;
|
142
|
+
return self;
|
143
|
+
}
|
144
|
+
|
145
|
+
////////////////////////////////////////////////////////////////////////////////
|
146
|
+
|
147
|
+
#define str_downcase(str) (rb_funcall((str), ID_downcase, 0))
|
148
|
+
|
149
|
+
#define FILL_BUFFER_OR_GOTO_EOF(state) { if (!fill_buffer(state)) goto eof; }
|
150
|
+
|
151
|
+
#define BUFFER_POS(state) ((state)->parser->pos)
|
152
|
+
#define BUFFER_LEN(state) ((state)->len)
|
153
|
+
#define BUFFER_CUR(state) ((state)->ptr[(state)->parser->pos])
|
154
|
+
#define BUFFER_AT(state, pos) ((state)->ptr[pos])
|
155
|
+
#define BUFFER_PTR(state, pos) ((state)->ptr + pos)
|
156
|
+
#define BUFFER_STR(state, pos, len) (rb_obj_freeze(rb_utf8_str_new((state)->ptr + pos, len)))
|
157
|
+
#define BUFFER_STR_DOWNCASE(state, pos, len) (rb_obj_freeze(str_downcase(rb_utf8_str_new((state)->ptr + pos, len))))
|
158
|
+
|
159
|
+
#define INC_BUFFER_POS(state) { \
|
160
|
+
BUFFER_POS(state)++; \
|
161
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state); \
|
162
|
+
}
|
163
|
+
|
164
|
+
#define INC_BUFFER_POS_NO_FILL(state) BUFFER_POS(state)++;
|
165
|
+
|
166
|
+
#define INC_BUFFER_POS_UTF8(state, len) { \
|
167
|
+
unsigned char c = BUFFER_CUR(state); \
|
168
|
+
if ((c & 0xf0) == 0xf0) { \
|
169
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 4) FILL_BUFFER_OR_GOTO_EOF(state); \
|
170
|
+
BUFFER_POS(state) += 4; \
|
171
|
+
len += 4; \
|
172
|
+
} \
|
173
|
+
else if ((c & 0xe0) == 0xe0) { \
|
174
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 3) FILL_BUFFER_OR_GOTO_EOF(state); \
|
175
|
+
BUFFER_POS(state) += 3; \
|
176
|
+
len += 3; \
|
177
|
+
} \
|
178
|
+
else if ((c & 0xc0) == 0xc0) { \
|
179
|
+
while (BUFFER_LEN(state) - BUFFER_POS(state) < 2) FILL_BUFFER_OR_GOTO_EOF(state); \
|
180
|
+
BUFFER_POS(state) += 2; \
|
181
|
+
len += 2; \
|
182
|
+
} \
|
183
|
+
else { \
|
184
|
+
BUFFER_POS(state)++; \
|
185
|
+
len ++; \
|
186
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state); \
|
187
|
+
} \
|
188
|
+
}
|
189
|
+
|
190
|
+
#define INIT_PARSER_STATE(state) { \
|
191
|
+
(state)->len = RSTRING_LEN((state)->parser->buffer); \
|
192
|
+
if (BUFFER_POS(state) == BUFFER_LEN(state)) \
|
193
|
+
FILL_BUFFER_OR_GOTO_EOF(state) \
|
194
|
+
else \
|
195
|
+
(state)->ptr = RSTRING_PTR((state)->parser->buffer); \
|
196
|
+
}
|
197
|
+
|
198
|
+
#define RAISE_BAD_REQUEST(msg) rb_raise(cError, msg)
|
199
|
+
|
200
|
+
#define SET_HEADER_VALUE_FROM_BUFFER(state, headers, key, pos, len) { \
|
201
|
+
VALUE value = BUFFER_STR(state, pos, len); \
|
202
|
+
rb_hash_aset(headers, key, value); \
|
203
|
+
RB_GC_GUARD(value); \
|
204
|
+
}
|
205
|
+
|
206
|
+
#define SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, key, pos, len) { \
|
207
|
+
VALUE value = BUFFER_STR_DOWNCASE(state, pos, len); \
|
208
|
+
rb_hash_aset(headers, key, value); \
|
209
|
+
RB_GC_GUARD(value); \
|
210
|
+
}
|
211
|
+
|
212
|
+
#define CONSUME_CRLF(state) { \
|
213
|
+
INC_BUFFER_POS(state); \
|
214
|
+
if (BUFFER_CUR(state) != '\n') goto bad_request; \
|
215
|
+
INC_BUFFER_POS(state); \
|
216
|
+
}
|
217
|
+
|
218
|
+
#define CONSUME_CRLF_NO_FILL(state) { \
|
219
|
+
INC_BUFFER_POS(state); \
|
220
|
+
if (BUFFER_CUR(state) != '\n') goto bad_request; \
|
221
|
+
INC_BUFFER_POS_NO_FILL(state); \
|
222
|
+
}
|
223
|
+
|
224
|
+
#define GLOBAL_STR(v, s) v = rb_str_new_literal(s); rb_global_variable(&v); rb_obj_freeze(v)
|
225
|
+
|
226
|
+
struct parser_state {
|
227
|
+
struct parser *parser;
|
228
|
+
char *ptr;
|
229
|
+
int len;
|
230
|
+
};
|
231
|
+
|
232
|
+
////////////////////////////////////////////////////////////////////////////////
|
233
|
+
|
234
|
+
static inline VALUE io_call(VALUE io, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
235
|
+
VALUE result = rb_funcall(io, ID_call, 1, maxlen);
|
236
|
+
if (result == Qnil) return Qnil;
|
237
|
+
|
238
|
+
if (buf_pos == NUM_buffer_start) rb_str_set_len(buf, 0);
|
239
|
+
rb_str_append(buf, result);
|
240
|
+
RB_GC_GUARD(result);
|
241
|
+
return buf;
|
242
|
+
}
|
243
|
+
|
244
|
+
static inline VALUE io_stock_readpartial(VALUE io, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
245
|
+
VALUE eof = rb_funcall(io, ID_eof_p, 0);
|
246
|
+
if (RTEST(eof)) return Qnil;
|
247
|
+
|
248
|
+
VALUE result = rb_funcall(io, ID_readpartial, 1, maxlen);
|
249
|
+
if (result == Qnil) return Qnil;
|
250
|
+
if (buf == Qnil) return result;
|
251
|
+
|
252
|
+
if (buf_pos == NUM_buffer_start) rb_str_set_len(buf, 0);
|
253
|
+
rb_str_append(buf, result);
|
254
|
+
RB_GC_GUARD(result);
|
255
|
+
return buf;
|
256
|
+
}
|
257
|
+
|
258
|
+
static inline VALUE parser_io_read(Parser_t *parser, VALUE maxlen, VALUE buf, VALUE buf_pos) {
|
259
|
+
switch (parser->read_method) {
|
260
|
+
case method_backend_read:
|
261
|
+
return rb_funcall(mPolyphony, ID_backend_read, 5, parser->io, buf, maxlen, Qfalse, buf_pos);
|
262
|
+
case method_backend_recv:
|
263
|
+
return rb_funcall(mPolyphony, ID_backend_recv, 4, parser->io, buf, maxlen, buf_pos);
|
264
|
+
case method_readpartial:
|
265
|
+
return rb_funcall(parser-> io, ID_readpartial, 4, maxlen, buf, buf_pos, Qfalse);
|
266
|
+
case method_call:
|
267
|
+
return io_call(parser ->io, maxlen, buf, buf_pos);
|
268
|
+
case method_stock_readpartial:
|
269
|
+
return io_stock_readpartial(parser->io, maxlen, buf, buf_pos);
|
270
|
+
default:
|
271
|
+
return Qnil;
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
static inline int fill_buffer(struct parser_state *state) {
|
276
|
+
VALUE ret = parser_io_read(state->parser, NUM_max_headers_read_length, state->parser->buffer, NUM_buffer_end);
|
277
|
+
if (ret == Qnil) return 0;
|
278
|
+
|
279
|
+
state->parser->buffer = ret;
|
280
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
281
|
+
int read_bytes = len - state->len;
|
282
|
+
if (!read_bytes) return 0;
|
283
|
+
|
284
|
+
state->ptr = RSTRING_PTR(state->parser->buffer);
|
285
|
+
state->len = len;
|
286
|
+
return read_bytes;
|
287
|
+
}
|
288
|
+
|
289
|
+
static inline void buffer_trim(struct parser_state *state) {
|
290
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
291
|
+
int pos = state->parser->pos;
|
292
|
+
int left = len - pos;
|
293
|
+
|
294
|
+
// The buffer is trimmed only if length and position thresholds are passed,
|
295
|
+
// *and* position is past the halfway point.
|
296
|
+
if (len < BUFFER_TRIM_MIN_LEN ||
|
297
|
+
pos < BUFFER_TRIM_MIN_POS ||
|
298
|
+
left >= pos) return;
|
299
|
+
|
300
|
+
if (left > 0) {
|
301
|
+
char *ptr = RSTRING_PTR(state->parser->buffer);
|
302
|
+
memcpy(ptr, ptr + pos, left);
|
303
|
+
}
|
304
|
+
rb_str_set_len(state->parser->buffer, left);
|
305
|
+
state->parser->pos = 0;
|
306
|
+
}
|
307
|
+
|
308
|
+
static inline void str_append_from_buffer(VALUE str, char *ptr, int len) {
|
309
|
+
int str_len = RSTRING_LEN(str);
|
310
|
+
rb_str_modify_expand(str, len);
|
311
|
+
memcpy(RSTRING_PTR(str) + str_len, ptr, len);
|
312
|
+
rb_str_set_len(str, str_len + len);
|
313
|
+
}
|
314
|
+
|
315
|
+
////////////////////////////////////////////////////////////////////////////////
|
316
|
+
|
317
|
+
static inline int parse_method(struct parser_state *state, VALUE headers) {
|
318
|
+
int pos = BUFFER_POS(state);
|
319
|
+
int len = 0;
|
320
|
+
|
321
|
+
while (1) {
|
322
|
+
switch (BUFFER_CUR(state)) {
|
323
|
+
case ' ':
|
324
|
+
if (len < 1 || len > MAX_METHOD_LENGTH) goto bad_request;
|
325
|
+
INC_BUFFER_POS(state);
|
326
|
+
goto done;
|
327
|
+
case '\r':
|
328
|
+
case '\n':
|
329
|
+
goto bad_request;
|
330
|
+
default:
|
331
|
+
INC_BUFFER_POS(state);
|
332
|
+
len++;
|
333
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
334
|
+
if (len > MAX_METHOD_LENGTH) goto bad_request;
|
335
|
+
}
|
336
|
+
}
|
337
|
+
done:
|
338
|
+
SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, STR_pseudo_method, pos, len);
|
339
|
+
return 1;
|
340
|
+
bad_request:
|
341
|
+
RAISE_BAD_REQUEST("Invalid method");
|
342
|
+
eof:
|
343
|
+
return 0;
|
344
|
+
}
|
345
|
+
|
346
|
+
static int parse_request_target(struct parser_state *state, VALUE headers) {
|
347
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
348
|
+
int pos = BUFFER_POS(state);
|
349
|
+
int len = 0;
|
350
|
+
while (1) {
|
351
|
+
switch (BUFFER_CUR(state)) {
|
352
|
+
case ' ':
|
353
|
+
if (len < 1 || len > MAX_PATH_LENGTH) goto bad_request;
|
354
|
+
INC_BUFFER_POS(state);
|
355
|
+
goto done;
|
356
|
+
case '\r':
|
357
|
+
case '\n':
|
358
|
+
goto bad_request;
|
359
|
+
default:
|
360
|
+
INC_BUFFER_POS(state);
|
361
|
+
len++;
|
362
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
363
|
+
if (len > MAX_PATH_LENGTH) goto bad_request;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
done:
|
367
|
+
SET_HEADER_VALUE_FROM_BUFFER(state, headers, STR_pseudo_path, pos, len);
|
368
|
+
return 1;
|
369
|
+
bad_request:
|
370
|
+
RAISE_BAD_REQUEST("Invalid request target");
|
371
|
+
eof:
|
372
|
+
return 0;
|
373
|
+
}
|
374
|
+
|
375
|
+
// case-insensitive compare
|
376
|
+
#define CMP_CI(state, down, up) ((BUFFER_CUR(state) == down) || (BUFFER_CUR(state) == up))
|
377
|
+
|
378
|
+
static int parse_protocol(struct parser_state *state, VALUE headers) {
|
379
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
380
|
+
int pos = BUFFER_POS(state);
|
381
|
+
int len = 0;
|
382
|
+
|
383
|
+
if (CMP_CI(state, 'H', 'h')) INC_BUFFER_POS(state) else goto bad_request;
|
384
|
+
if (CMP_CI(state, 'T', 't')) INC_BUFFER_POS(state) else goto bad_request;
|
385
|
+
if (CMP_CI(state, 'T', 't')) INC_BUFFER_POS(state) else goto bad_request;
|
386
|
+
if (CMP_CI(state, 'P', 'p')) INC_BUFFER_POS(state) else goto bad_request;
|
387
|
+
if (BUFFER_CUR(state) == '/') INC_BUFFER_POS(state) else goto bad_request;
|
388
|
+
if (BUFFER_CUR(state) == '1') INC_BUFFER_POS(state) else goto bad_request;
|
389
|
+
len = 6;
|
390
|
+
while (1) {
|
391
|
+
switch (BUFFER_CUR(state)) {
|
392
|
+
case '\r':
|
393
|
+
CONSUME_CRLF(state);
|
394
|
+
goto done;
|
395
|
+
case '\n':
|
396
|
+
INC_BUFFER_POS(state);
|
397
|
+
goto done;
|
398
|
+
case '.':
|
399
|
+
INC_BUFFER_POS(state);
|
400
|
+
char c = BUFFER_CUR(state);
|
401
|
+
if (c == '0' || c == '1') {
|
402
|
+
INC_BUFFER_POS(state);
|
403
|
+
len += 2;
|
404
|
+
continue;
|
405
|
+
}
|
406
|
+
goto bad_request;
|
407
|
+
default:
|
408
|
+
goto bad_request;
|
409
|
+
}
|
410
|
+
}
|
411
|
+
done:
|
412
|
+
if (len < 6 || len > 8) goto bad_request;
|
413
|
+
SET_HEADER_DOWNCASE_VALUE_FROM_BUFFER(state, headers, STR_pseudo_protocol, pos, len);
|
414
|
+
return 1;
|
415
|
+
bad_request:
|
416
|
+
RAISE_BAD_REQUEST("Invalid protocol");
|
417
|
+
eof:
|
418
|
+
return 0;
|
419
|
+
}
|
420
|
+
|
421
|
+
int parse_request_line(struct parser_state *state, VALUE headers) {
|
422
|
+
if (!parse_method(state, headers)) goto eof;
|
423
|
+
if (!parse_request_target(state, headers)) goto eof;
|
424
|
+
if (!parse_protocol(state, headers)) goto eof;
|
425
|
+
|
426
|
+
return 1;
|
427
|
+
eof:
|
428
|
+
return 0;
|
429
|
+
}
|
430
|
+
|
431
|
+
static inline int parse_header_key(struct parser_state *state, VALUE *key) {
|
432
|
+
int pos = BUFFER_POS(state);
|
433
|
+
int len = 0;
|
434
|
+
|
435
|
+
while (1) {
|
436
|
+
switch (BUFFER_CUR(state)) {
|
437
|
+
case ' ':
|
438
|
+
goto bad_request;
|
439
|
+
case ':':
|
440
|
+
if (len < 1 || len > MAX_HEADER_KEY_LENGTH)
|
441
|
+
goto bad_request;
|
442
|
+
INC_BUFFER_POS(state);
|
443
|
+
goto done;
|
444
|
+
case '\r':
|
445
|
+
if (BUFFER_POS(state) > pos) goto bad_request;
|
446
|
+
CONSUME_CRLF_NO_FILL(state);
|
447
|
+
goto done;
|
448
|
+
case '\n':
|
449
|
+
if (BUFFER_POS(state) > pos) goto bad_request;
|
450
|
+
|
451
|
+
INC_BUFFER_POS_NO_FILL(state);
|
452
|
+
goto done;
|
453
|
+
default:
|
454
|
+
INC_BUFFER_POS(state);
|
455
|
+
len++;
|
456
|
+
// INC_BUFFER_POS_UTF8(state, len);
|
457
|
+
if (len > MAX_HEADER_KEY_LENGTH) goto bad_request;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
done:
|
461
|
+
if (len == 0) return -1;
|
462
|
+
(*key) = BUFFER_STR_DOWNCASE(state, pos, len);
|
463
|
+
return 1;
|
464
|
+
bad_request:
|
465
|
+
RAISE_BAD_REQUEST("Invalid header key");
|
466
|
+
eof:
|
467
|
+
return 0;
|
468
|
+
}
|
469
|
+
|
470
|
+
static inline int parse_header_value(struct parser_state *state, VALUE *value) {
|
471
|
+
while (BUFFER_CUR(state) == ' ') INC_BUFFER_POS(state);
|
472
|
+
|
473
|
+
int pos = BUFFER_POS(state);
|
474
|
+
int len = 0;
|
475
|
+
|
476
|
+
while (1) {
|
477
|
+
switch (BUFFER_CUR(state)) {
|
478
|
+
case '\r':
|
479
|
+
CONSUME_CRLF(state);
|
480
|
+
goto done;
|
481
|
+
case '\n':
|
482
|
+
INC_BUFFER_POS(state);
|
483
|
+
goto done;
|
484
|
+
default:
|
485
|
+
INC_BUFFER_POS_UTF8(state, len);
|
486
|
+
if (len > MAX_HEADER_VALUE_LENGTH) goto bad_request;
|
487
|
+
}
|
488
|
+
}
|
489
|
+
done:
|
490
|
+
if (len < 1 || len > MAX_HEADER_VALUE_LENGTH) goto bad_request;
|
491
|
+
(*value) = BUFFER_STR(state, pos, len);
|
492
|
+
return 1;
|
493
|
+
bad_request:
|
494
|
+
RAISE_BAD_REQUEST("Invalid header value");
|
495
|
+
eof:
|
496
|
+
return 0;
|
497
|
+
}
|
498
|
+
|
499
|
+
static inline int parse_header(struct parser_state *state, VALUE headers) {
|
500
|
+
VALUE key, value;
|
501
|
+
|
502
|
+
switch (parse_header_key(state, &key)) {
|
503
|
+
case -1: return -1;
|
504
|
+
case 0: goto eof;
|
505
|
+
}
|
506
|
+
|
507
|
+
if (!parse_header_value(state, &value)) goto eof;
|
508
|
+
|
509
|
+
VALUE existing = rb_hash_aref(headers, key);
|
510
|
+
if (existing != Qnil) {
|
511
|
+
if (TYPE(existing) != T_ARRAY) {
|
512
|
+
existing = rb_ary_new3(2, existing, value);
|
513
|
+
rb_hash_aset(headers, key, existing);
|
514
|
+
}
|
515
|
+
else
|
516
|
+
rb_ary_push(existing, value);
|
517
|
+
}
|
518
|
+
else
|
519
|
+
rb_hash_aset(headers, key, value);
|
520
|
+
|
521
|
+
RB_GC_GUARD(existing);
|
522
|
+
RB_GC_GUARD(key);
|
523
|
+
RB_GC_GUARD(value);
|
524
|
+
return 1;
|
525
|
+
eof:
|
526
|
+
return 0;
|
527
|
+
}
|
528
|
+
|
529
|
+
VALUE Parser_parse_headers(VALUE self) {
|
530
|
+
struct parser_state state;
|
531
|
+
GetParser(self, state.parser);
|
532
|
+
state.parser->headers = rb_hash_new();
|
533
|
+
|
534
|
+
buffer_trim(&state);
|
535
|
+
int initial_pos = state.parser->pos;
|
536
|
+
INIT_PARSER_STATE(&state);
|
537
|
+
state.parser->current_request_rx = 0;
|
538
|
+
|
539
|
+
if (!parse_request_line(&state, state.parser->headers)) goto eof;
|
540
|
+
|
541
|
+
int header_count = 0;
|
542
|
+
while (1) {
|
543
|
+
if (header_count > MAX_HEADER_COUNT) RAISE_BAD_REQUEST("Too many headers");
|
544
|
+
switch (parse_header(&state, state.parser->headers)) {
|
545
|
+
case -1: goto done; // empty header => end of headers
|
546
|
+
case 0: goto eof;
|
547
|
+
}
|
548
|
+
header_count++;
|
549
|
+
}
|
550
|
+
eof:
|
551
|
+
state.parser->headers = Qnil;
|
552
|
+
done:
|
553
|
+
state.parser->body_read_mode = BODY_READ_MODE_UNKNOWN;
|
554
|
+
int read_bytes = BUFFER_POS(&state) - initial_pos;
|
555
|
+
|
556
|
+
state.parser->current_request_rx += read_bytes;
|
557
|
+
if (state.parser->headers != Qnil)
|
558
|
+
rb_hash_aset(state.parser->headers, STR_pseudo_rx, INT2NUM(read_bytes));
|
559
|
+
return state.parser->headers;
|
560
|
+
}
|
561
|
+
|
562
|
+
////////////////////////////////////////////////////////////////////////////////
|
563
|
+
|
564
|
+
static inline int str_to_int(VALUE value, const char *error_msg) {
|
565
|
+
char *ptr = RSTRING_PTR(value);
|
566
|
+
int len = RSTRING_LEN(value);
|
567
|
+
int int_value = 0;
|
568
|
+
|
569
|
+
while (len) {
|
570
|
+
char c = *ptr;
|
571
|
+
if ((c >= '0') && (c <= '9'))
|
572
|
+
int_value = int_value * 10 + (c - '0');
|
573
|
+
else
|
574
|
+
RAISE_BAD_REQUEST(error_msg);
|
575
|
+
len--;
|
576
|
+
ptr++;
|
577
|
+
}
|
578
|
+
|
579
|
+
return int_value;
|
580
|
+
}
|
581
|
+
|
582
|
+
VALUE read_body_with_content_length(Parser_t *parser, int read_entire_body, int buffered_only) {
|
583
|
+
if (parser->body_left <= 0) return Qnil;
|
584
|
+
|
585
|
+
VALUE body = Qnil;
|
586
|
+
|
587
|
+
int len = RSTRING_LEN(parser->buffer);
|
588
|
+
int pos = parser->pos;
|
589
|
+
|
590
|
+
if (pos < len) {
|
591
|
+
int available = len - pos;
|
592
|
+
if (available > parser->body_left) available = parser->body_left;
|
593
|
+
body = rb_str_new(RSTRING_PTR(parser->buffer) + pos, available);
|
594
|
+
parser->pos += available;
|
595
|
+
parser->current_request_rx += available;
|
596
|
+
parser->body_left -= available;
|
597
|
+
if (!parser->body_left) parser->request_completed = 1;
|
598
|
+
}
|
599
|
+
else {
|
600
|
+
body = Qnil;
|
601
|
+
len = 0;
|
602
|
+
}
|
603
|
+
if (buffered_only) return body;
|
604
|
+
|
605
|
+
while (parser->body_left) {
|
606
|
+
int maxlen = parser->body_left <= MAX_BODY_READ_LENGTH ? parser->body_left : MAX_BODY_READ_LENGTH;
|
607
|
+
VALUE tmp_buf = parser_io_read(parser, INT2NUM(maxlen), Qnil, NUM_buffer_start);
|
608
|
+
if (tmp_buf == Qnil) goto eof;
|
609
|
+
if (body != Qnil)
|
610
|
+
rb_str_append(body, tmp_buf);
|
611
|
+
else
|
612
|
+
body = tmp_buf;
|
613
|
+
int read_bytes = RSTRING_LEN(tmp_buf);
|
614
|
+
parser->current_request_rx += read_bytes;
|
615
|
+
parser->body_left -= read_bytes;
|
616
|
+
if (!parser->body_left) parser->request_completed = 1;
|
617
|
+
RB_GC_GUARD(tmp_buf);
|
618
|
+
if (!read_entire_body) goto done;
|
619
|
+
}
|
620
|
+
done:
|
621
|
+
rb_hash_aset(parser->headers, STR_pseudo_rx, INT2NUM(parser->current_request_rx));
|
622
|
+
RB_GC_GUARD(body);
|
623
|
+
return body;
|
624
|
+
eof:
|
625
|
+
RAISE_BAD_REQUEST("Incomplete body");
|
626
|
+
}
|
627
|
+
|
628
|
+
int chunked_encoding_p(VALUE transfer_encoding) {
|
629
|
+
if (transfer_encoding == Qnil) return 0;
|
630
|
+
return rb_funcall(str_downcase(transfer_encoding), ID_eq, 1, STR_chunked) == Qtrue;
|
631
|
+
}
|
632
|
+
|
633
|
+
int parse_chunk_size(struct parser_state *state, int *chunk_size) {
|
634
|
+
int len = 0;
|
635
|
+
int value = 0;
|
636
|
+
int initial_pos = BUFFER_POS(state);
|
637
|
+
|
638
|
+
while (1) {
|
639
|
+
char c = BUFFER_CUR(state);
|
640
|
+
if ((c >= '0') && (c <= '9')) value = (value << 4) + (c - '0');
|
641
|
+
else if ((c >= 'a') && (c <= 'f')) value = (value << 4) + (c - 'a' + 10);
|
642
|
+
else if ((c >= 'A') && (c <= 'F')) value = (value << 4) + (c - 'A' + 10);
|
643
|
+
else switch (c) {
|
644
|
+
case '\r':
|
645
|
+
CONSUME_CRLF_NO_FILL(state);
|
646
|
+
goto done;
|
647
|
+
case '\n':
|
648
|
+
INC_BUFFER_POS_NO_FILL(state);
|
649
|
+
goto done;
|
650
|
+
default:
|
651
|
+
goto bad_request;
|
652
|
+
}
|
653
|
+
INC_BUFFER_POS(state);
|
654
|
+
len++;
|
655
|
+
if (len >= MAX_CHUNKED_ENCODING_CHUNK_SIZE_LENGTH) goto bad_request;
|
656
|
+
}
|
657
|
+
done:
|
658
|
+
if (len == 0) goto bad_request;
|
659
|
+
(*chunk_size) = value;
|
660
|
+
state->parser->current_request_rx += BUFFER_POS(state) - initial_pos;
|
661
|
+
return 1;
|
662
|
+
bad_request:
|
663
|
+
RAISE_BAD_REQUEST("Invalid chunk size");
|
664
|
+
eof:
|
665
|
+
return 0;
|
666
|
+
}
|
667
|
+
|
668
|
+
int read_body_chunk_with_chunked_encoding(struct parser_state *state, VALUE *body, int chunk_size, int buffered_only) {
|
669
|
+
int len = RSTRING_LEN(state->parser->buffer);
|
670
|
+
int pos = state->parser->pos;
|
671
|
+
int left = chunk_size;
|
672
|
+
|
673
|
+
if (pos < len) {
|
674
|
+
int available = len - pos;
|
675
|
+
if (available > left) available = left;
|
676
|
+
if (*body != Qnil)
|
677
|
+
str_append_from_buffer(*body, RSTRING_PTR(state->parser->buffer) + pos, available);
|
678
|
+
else
|
679
|
+
*body = rb_str_new(RSTRING_PTR(state->parser->buffer) + pos, available);
|
680
|
+
state->parser->pos += available;
|
681
|
+
state->parser->current_request_rx += available;
|
682
|
+
left -= available;
|
683
|
+
}
|
684
|
+
if (buffered_only) return 1;
|
685
|
+
|
686
|
+
while (left) {
|
687
|
+
int maxlen = left <= MAX_BODY_READ_LENGTH ? left : MAX_BODY_READ_LENGTH;
|
688
|
+
|
689
|
+
VALUE tmp_buf = parser_io_read(state->parser, INT2NUM(maxlen), Qnil, NUM_buffer_start);
|
690
|
+
if (tmp_buf == Qnil) goto eof;
|
691
|
+
if (*body != Qnil)
|
692
|
+
rb_str_append(*body, tmp_buf);
|
693
|
+
else
|
694
|
+
*body = tmp_buf;
|
695
|
+
int read_bytes = RSTRING_LEN(tmp_buf);
|
696
|
+
state->parser->current_request_rx += read_bytes;
|
697
|
+
left -= read_bytes;
|
698
|
+
RB_GC_GUARD(tmp_buf);
|
699
|
+
}
|
700
|
+
return 1;
|
701
|
+
eof:
|
702
|
+
return 0;
|
703
|
+
}
|
704
|
+
|
705
|
+
static inline int parse_chunk_postfix(struct parser_state *state) {
|
706
|
+
int initial_pos = BUFFER_POS(state);
|
707
|
+
if (initial_pos == BUFFER_LEN(state)) FILL_BUFFER_OR_GOTO_EOF(state);
|
708
|
+
switch (BUFFER_CUR(state)) {
|
709
|
+
case '\r':
|
710
|
+
CONSUME_CRLF_NO_FILL(state);
|
711
|
+
goto done;
|
712
|
+
case '\n':
|
713
|
+
INC_BUFFER_POS_NO_FILL(state);
|
714
|
+
goto done;
|
715
|
+
default:
|
716
|
+
goto bad_request;
|
717
|
+
}
|
718
|
+
done:
|
719
|
+
state->parser->current_request_rx += BUFFER_POS(state) - initial_pos;
|
720
|
+
return 1;
|
721
|
+
bad_request:
|
722
|
+
RAISE_BAD_REQUEST("Invalid protocol");
|
723
|
+
eof:
|
724
|
+
return 0;
|
725
|
+
}
|
726
|
+
|
727
|
+
VALUE read_body_with_chunked_encoding(Parser_t *parser, int read_entire_body, int buffered_only) {
|
728
|
+
struct parser_state state;
|
729
|
+
state.parser = parser;
|
730
|
+
buffer_trim(&state);
|
731
|
+
INIT_PARSER_STATE(&state);
|
732
|
+
VALUE body = Qnil;
|
733
|
+
|
734
|
+
while (1) {
|
735
|
+
int chunk_size = 0;
|
736
|
+
if (BUFFER_POS(&state) == BUFFER_LEN(&state)) FILL_BUFFER_OR_GOTO_EOF(&state);
|
737
|
+
if (!parse_chunk_size(&state, &chunk_size)) goto bad_request;
|
738
|
+
|
739
|
+
if (chunk_size) {
|
740
|
+
if (!read_body_chunk_with_chunked_encoding(&state, &body, chunk_size, buffered_only)) goto bad_request;
|
741
|
+
}
|
742
|
+
else parser->request_completed = 1;
|
743
|
+
|
744
|
+
if (!parse_chunk_postfix(&state)) goto bad_request;
|
745
|
+
if (!chunk_size || !read_entire_body) goto done;
|
746
|
+
}
|
747
|
+
bad_request:
|
748
|
+
RAISE_BAD_REQUEST("Malformed request body");
|
749
|
+
eof:
|
750
|
+
RAISE_BAD_REQUEST("Incomplete request body");
|
751
|
+
done:
|
752
|
+
rb_hash_aset(parser->headers, STR_pseudo_rx, INT2NUM(state.parser->current_request_rx));
|
753
|
+
RB_GC_GUARD(body);
|
754
|
+
return body;
|
755
|
+
}
|
756
|
+
|
757
|
+
static inline void detect_body_read_mode(Parser_t *parser) {
|
758
|
+
VALUE content_length = rb_hash_aref(parser->headers, STR_content_length);
|
759
|
+
if (content_length != Qnil) {
|
760
|
+
int int_content_length = str_to_int(content_length, "Invalid content length");
|
761
|
+
if (int_content_length < 0) RAISE_BAD_REQUEST("Invalid body content length");
|
762
|
+
parser->body_read_mode = parser->body_left = int_content_length;
|
763
|
+
parser->request_completed = 0;
|
764
|
+
return;
|
765
|
+
}
|
766
|
+
|
767
|
+
VALUE transfer_encoding = rb_hash_aref(parser->headers, STR_transfer_encoding);
|
768
|
+
if (chunked_encoding_p(transfer_encoding)) {
|
769
|
+
parser->body_read_mode = BODY_READ_MODE_CHUNKED;
|
770
|
+
parser->request_completed = 0;
|
771
|
+
return;
|
772
|
+
}
|
773
|
+
parser->request_completed = 1;
|
774
|
+
|
775
|
+
}
|
776
|
+
|
777
|
+
static inline VALUE read_body(VALUE self, int read_entire_body, int buffered_only) {
|
778
|
+
Parser_t *parser;
|
779
|
+
GetParser(self, parser);
|
780
|
+
|
781
|
+
if (parser->body_read_mode == BODY_READ_MODE_UNKNOWN)
|
782
|
+
detect_body_read_mode(parser);
|
783
|
+
|
784
|
+
if (parser->body_read_mode == BODY_READ_MODE_CHUNKED)
|
785
|
+
return read_body_with_chunked_encoding(parser, read_entire_body, buffered_only);
|
786
|
+
return read_body_with_content_length(parser, read_entire_body, buffered_only);
|
787
|
+
}
|
788
|
+
|
789
|
+
VALUE Parser_read_body(VALUE self) {
|
790
|
+
return read_body(self, 1, 0);
|
791
|
+
}
|
792
|
+
|
793
|
+
VALUE Parser_read_body_chunk(VALUE self, VALUE buffered_only) {
|
794
|
+
return read_body(self, 0, buffered_only == Qtrue);
|
795
|
+
}
|
796
|
+
|
797
|
+
VALUE Parser_complete_p(VALUE self) {
|
798
|
+
Parser_t *parser;
|
799
|
+
GetParser(self, parser);
|
800
|
+
|
801
|
+
if (parser->body_read_mode == BODY_READ_MODE_UNKNOWN)
|
802
|
+
detect_body_read_mode(parser);
|
803
|
+
|
804
|
+
return parser->request_completed ? Qtrue : Qfalse;
|
805
|
+
}
|
806
|
+
|
807
|
+
void Init_H1P() {
|
808
|
+
VALUE mH1P;
|
809
|
+
VALUE cParser;
|
810
|
+
|
811
|
+
mH1P = rb_define_module("H1P");
|
812
|
+
rb_gc_register_mark_object(mH1P);
|
813
|
+
cParser = rb_define_class_under(mH1P, "Parser", rb_cObject);
|
814
|
+
rb_define_alloc_func(cParser, Parser_allocate);
|
815
|
+
|
816
|
+
cError = rb_define_class_under(mH1P, "Error", rb_eRuntimeError);
|
817
|
+
rb_gc_register_mark_object(cError);
|
818
|
+
|
819
|
+
// backend methods
|
820
|
+
rb_define_method(cParser, "initialize", Parser_initialize, 1);
|
821
|
+
rb_define_method(cParser, "parse_headers", Parser_parse_headers, 0);
|
822
|
+
rb_define_method(cParser, "read_body", Parser_read_body, 0);
|
823
|
+
rb_define_method(cParser, "read_body_chunk", Parser_read_body_chunk, 1);
|
824
|
+
rb_define_method(cParser, "complete?", Parser_complete_p, 0);
|
825
|
+
|
826
|
+
ID_arity = rb_intern("arity");
|
827
|
+
ID_backend_read = rb_intern("backend_read");
|
828
|
+
ID_backend_recv = rb_intern("backend_recv");
|
829
|
+
ID_call = rb_intern("call");
|
830
|
+
ID_downcase = rb_intern("downcase");
|
831
|
+
ID_eof_p = rb_intern("eof?");
|
832
|
+
ID_eq = rb_intern("==");
|
833
|
+
ID_parser_read_method = rb_intern("__parser_read_method__");
|
834
|
+
ID_read = rb_intern("read");
|
835
|
+
ID_readpartial = rb_intern("readpartial");
|
836
|
+
ID_to_i = rb_intern("to_i");
|
837
|
+
|
838
|
+
NUM_max_headers_read_length = INT2NUM(MAX_HEADERS_READ_LENGTH);
|
839
|
+
NUM_buffer_start = INT2NUM(0);
|
840
|
+
NUM_buffer_end = INT2NUM(-1);
|
841
|
+
|
842
|
+
GLOBAL_STR(STR_pseudo_method, ":method");
|
843
|
+
GLOBAL_STR(STR_pseudo_path, ":path");
|
844
|
+
GLOBAL_STR(STR_pseudo_protocol, ":protocol");
|
845
|
+
GLOBAL_STR(STR_pseudo_rx, ":rx");
|
846
|
+
|
847
|
+
GLOBAL_STR(STR_chunked, "chunked");
|
848
|
+
GLOBAL_STR(STR_content_length, "content-length");
|
849
|
+
GLOBAL_STR(STR_transfer_encoding, "transfer-encoding");
|
850
|
+
|
851
|
+
SYM_backend_read = ID2SYM(ID_backend_read);
|
852
|
+
SYM_backend_recv = ID2SYM(ID_backend_recv);
|
853
|
+
SYM_stock_readpartial = ID2SYM(rb_intern("stock_readpartial"));
|
854
|
+
|
855
|
+
rb_global_variable(&mH1P);
|
856
|
+
}
|
857
|
+
|
858
|
+
void Init_h1p_ext() {
|
859
|
+
Init_H1P();
|
860
|
+
}
|