kcar 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.document +1 -0
- data/.olddoc.yml +13 -6
- data/GIT-VERSION-GEN +2 -2
- data/GNUmakefile +1 -1
- data/HACKING +36 -0
- data/LICENSE +1 -1
- data/README +18 -9
- data/archive/slrnpull.conf +4 -1
- data/ext/kcar/c_util.h +2 -2
- data/ext/kcar/ext_help.h +0 -24
- data/ext/kcar/extconf.rb +30 -5
- data/ext/kcar/kcar.rl +554 -111
- data/ext/kcar/kcar_http_common.rl +36 -5
- data/kcar.gemspec +17 -13
- data/lib/kcar.rb +2 -2
- data/lib/kcar/response.rb +10 -11
- data/pkg.mk +3 -3
- data/test/test_parser.rb +32 -0
- data/test/test_request_parser.rb +1219 -0
- metadata +11 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f7cd96d8a5fa081a4e6ac36feb24d3a737f428550da5fa589ab564c516d01820
|
4
|
+
data.tar.gz: 7383af40cdd8bc3ec9954feb4199d0f89ad04fb468633e22e515f6dd70ffc075
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cccb634b38dc4d944f3a216c2f2acafbc563e6efa782643d6ff845f8b09e5338fe285c1d862cab8363ab1598298b6ed1f05d0509c2fa1153adb5f946be45fddb
|
7
|
+
data.tar.gz: 92f1e61ef2049429835403ddc964179fb611ace06ce33a29f20fd3659db629ae05f532bf473efd44372192e476acea23cc09347bc271d2e0bdff42a70d99add2
|
data/.document
CHANGED
data/.olddoc.yml
CHANGED
@@ -1,7 +1,14 @@
|
|
1
1
|
---
|
2
|
-
cgit_url:
|
3
|
-
git_url:
|
4
|
-
rdoc_url:
|
5
|
-
ml_url:
|
6
|
-
|
7
|
-
|
2
|
+
cgit_url: https://yhbt.net/kcar.git
|
3
|
+
git_url: https://yhbt.net/kcar.git
|
4
|
+
rdoc_url: https://yhbt.net/kcar/
|
5
|
+
ml_url:
|
6
|
+
- https://yhbt.net/kcar-public/
|
7
|
+
- http://ou63pmih66umazou.onion/kcar-public/
|
8
|
+
public_email: kcar-public@yhbt.net
|
9
|
+
source_code:
|
10
|
+
- git clone https://yhbt.net/kcar.git
|
11
|
+
- torsocks git clone http://ou63pmih66umazou.onion/kcar.git
|
12
|
+
nntp_url:
|
13
|
+
- nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
|
14
|
+
- nntp://ou63pmih66umazou.onion/inbox.comp.lang.ruby.kcar
|
data/GIT-VERSION-GEN
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
CONSTANT = "Kcar::VERSION"
|
3
3
|
RVF = "lib/kcar/version.rb"
|
4
|
-
DEF_VER = "v0.
|
4
|
+
DEF_VER = "v0.7.0"
|
5
5
|
GVF = "GIT-VERSION-FILE"
|
6
6
|
vn = DEF_VER
|
7
7
|
|
@@ -21,7 +21,7 @@ if File.exist?(".git")
|
|
21
21
|
end
|
22
22
|
|
23
23
|
vn = vn.sub!(/\Av/, "")
|
24
|
-
new_ruby_version = "#{CONSTANT} = '#{vn}'
|
24
|
+
new_ruby_version = "#{CONSTANT} = '#{vn}' # :nodoc:\n"
|
25
25
|
cur_ruby_version = File.read(RVF) rescue nil
|
26
26
|
if new_ruby_version != cur_ruby_version
|
27
27
|
File.open(RVF, "w") { |fp| fp.write(new_ruby_version) }
|
data/GNUmakefile
CHANGED
data/HACKING
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
== development dependencies
|
2
|
+
|
3
|
+
* GNU make - https://www.gnu.org/software/make/
|
4
|
+
* git - https://www.git-scm.com/
|
5
|
+
* ruby - https://www.ruby-lang.org/en/
|
6
|
+
|
7
|
+
git clone https://yhbt.net/kcar.git
|
8
|
+
|
9
|
+
== tests
|
10
|
+
|
11
|
+
* make test - run each test in a separate process (parallelize using -j)
|
12
|
+
|
13
|
+
For non-GNU users, GNU make may be installed as "gmake".
|
14
|
+
|
15
|
+
== test environment
|
16
|
+
|
17
|
+
RUBY - specify an alternative ruby(1) runtime
|
18
|
+
V - set to 1 for verbose test output (may be mangled if multithreaded)
|
19
|
+
|
20
|
+
== installing from git
|
21
|
+
|
22
|
+
* make install-gem
|
23
|
+
|
24
|
+
== contact
|
25
|
+
|
26
|
+
We use git(7) and develop kcar on a public mailing list like git.git
|
27
|
+
developers do. Please send patches via git-send-email(1) to the public
|
28
|
+
mailing list at <mailto:kcar-public@yhbt.net>. Pull requests should be
|
29
|
+
formatted using git-request-pull(1).
|
30
|
+
|
31
|
+
All mail is archived publically at: https://yhbt.net/kcar-public/
|
32
|
+
and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
|
33
|
+
|
34
|
+
Anonymous contributions will always be welcome.
|
35
|
+
No subscription is necessary to post to the mailing list.
|
36
|
+
Please remember to Cc: all recipients as subscription is optional.
|
data/LICENSE
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
kcar is copyrighted free software by all contributors, see logs in
|
2
2
|
revision control for names and email addresses of all of them. You can
|
3
3
|
redistribute it and/or modify it under either the terms of the
|
4
|
-
{GPLv2}[
|
4
|
+
{GPLv2}[https://www.gnu.org/licenses/gpl-2.0.txt] or later or
|
5
5
|
the conditions below:
|
6
6
|
|
7
7
|
1. You may make and give away verbatim copies of the source form of the
|
data/README
CHANGED
@@ -7,7 +7,7 @@ regular files, FIFOs, StringIOs as well as traditional TCP sockets.
|
|
7
7
|
|
8
8
|
== Features
|
9
9
|
|
10
|
-
* RFC2616-compliant Ragel+C parser adapted from
|
10
|
+
* RFC2616-compliant Ragel+C parser adapted from Mongrel
|
11
11
|
|
12
12
|
* decodes chunked response bodies with an optional pass-through mode
|
13
13
|
(to avoid rechunking with Rack::Chunked)
|
@@ -35,7 +35,7 @@ If you use RubyGems:
|
|
35
35
|
|
36
36
|
Otherwise grab the latest tarball from:
|
37
37
|
|
38
|
-
|
38
|
+
https://yhbt.net/kcar/files/
|
39
39
|
|
40
40
|
Unpack it, and run "ruby setup.rb"
|
41
41
|
|
@@ -62,14 +62,14 @@ through the body with body.each.
|
|
62
62
|
|
63
63
|
You can get the latest source via git from the following locations:
|
64
64
|
|
65
|
-
|
66
|
-
|
65
|
+
https://yhbt.net/kcar.git
|
66
|
+
https://repo.or.cz/kcar.git (mirror)
|
67
67
|
|
68
68
|
You may browse the code from the web and download the latest snapshot
|
69
69
|
tarballs here:
|
70
70
|
|
71
|
-
*
|
72
|
-
*
|
71
|
+
* https://yhbt.net/kcar.git
|
72
|
+
* https://repo.or.cz/w/kcar.git (gitweb)
|
73
73
|
|
74
74
|
Inline patches (from "git format-patch") to the mailing list are
|
75
75
|
preferred because they allow code review and comments in the reply to
|
@@ -83,8 +83,17 @@ don't email the git mailing list or maintainer with kcar patches.
|
|
83
83
|
== Contact
|
84
84
|
|
85
85
|
All feedback (bug reports, user/development discussion, patches, pull
|
86
|
-
requests) go to the mailing list: mailto:kcar-public@
|
86
|
+
requests) go to the public mailing list: mailto:kcar-public@yhbt.net
|
87
|
+
All mail is archived publically at: https://yhbt.net/kcar-public/
|
88
|
+
and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
|
87
89
|
|
88
|
-
|
90
|
+
Anonymous posts will always be welcome.
|
89
91
|
|
90
|
-
|
92
|
+
No subscription is necessary to post to the mailing list;
|
93
|
+
but you may subscribe by sending a plain-text mail to:
|
94
|
+
|
95
|
+
mailto:kcar-public+subscribe@yhbt.net
|
96
|
+
|
97
|
+
Keep in mind we suck at delivering email, so using NNTP or
|
98
|
+
Atom feeds might be a better bet.
|
99
|
+
Please remember to Cc: all recipients as subscription is optional.
|
data/archive/slrnpull.conf
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
# This config was used to create the import used to migrate
|
2
|
+
# mailing list archives from gmane to https://yhbt.net/kcar-public/
|
3
|
+
|
1
4
|
# group_name max expire headers_only
|
2
5
|
gmane.comp.lang.ruby.kcar.general 1000000000 1000000000 0
|
3
6
|
|
4
|
-
# usage: slrnpull -d $PWD -h news.gmane.
|
7
|
+
# usage: slrnpull -d $PWD -h news.gmane.io --no-post
|
data/ext/kcar/c_util.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
* Generic C functions and macros go here, there are no dependencies
|
3
|
-
* on
|
3
|
+
* on kcar internal structures or the Ruby C API in here.
|
4
4
|
*/
|
5
5
|
|
6
6
|
#ifndef UH_util_h
|
@@ -49,7 +49,7 @@ static int hexchar2int(int xdigit)
|
|
49
49
|
if (xdigit >= 'a' && xdigit <= 'f')
|
50
50
|
return xdigit - 'a' + 10;
|
51
51
|
|
52
|
-
/* Ragel already does runtime range checking for us
|
52
|
+
/* Ragel already does runtime range checking for us */
|
53
53
|
assert(xdigit >= '0' && xdigit <= '9' && "invalid digit character");
|
54
54
|
|
55
55
|
return xdigit - '0';
|
data/ext/kcar/ext_help.h
CHANGED
@@ -1,26 +1,6 @@
|
|
1
1
|
#ifndef ext_help_h
|
2
2
|
#define ext_help_h
|
3
3
|
|
4
|
-
#ifndef RSTRING_PTR
|
5
|
-
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
6
|
-
#endif /* !defined(RSTRING_PTR) */
|
7
|
-
#ifndef RSTRING_LEN
|
8
|
-
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
9
|
-
#endif /* !defined(RSTRING_LEN) */
|
10
|
-
|
11
|
-
#ifndef HAVE_RB_STR_SET_LEN
|
12
|
-
# ifdef RUBINIUS
|
13
|
-
# error we should never get here with current Rubinius (1.x)
|
14
|
-
# endif
|
15
|
-
/* this is taken from Ruby 1.8.7, 1.8.6 may not have it */
|
16
|
-
static void rb_18_str_set_len(VALUE str, long len)
|
17
|
-
{
|
18
|
-
RSTRING(str)->len = len;
|
19
|
-
RSTRING(str)->ptr[len] = '\0';
|
20
|
-
}
|
21
|
-
# define rb_str_set_len(str,len) rb_18_str_set_len(str,len)
|
22
|
-
#endif /* !defined(HAVE_RB_STR_SET_LEN) */
|
23
|
-
|
24
4
|
/* not all Ruby implementations support frozen objects (Rubinius does not) */
|
25
5
|
#if defined(OBJ_FROZEN)
|
26
6
|
# define assert_frozen(f) assert(OBJ_FROZEN(f) && "unfrozen object")
|
@@ -36,10 +16,6 @@ static void rb_18_str_set_len(VALUE str, long len)
|
|
36
16
|
# endif
|
37
17
|
#endif /* ! defined(OFFT2NUM) */
|
38
18
|
|
39
|
-
#ifndef HAVE_RB_STR_MODIFY
|
40
|
-
# define rb_str_modify(x) do {} while (0)
|
41
|
-
#endif /* ! defined(HAVE_RB_STR_MODIFY) */
|
42
|
-
|
43
19
|
static inline int str_cstr_eq(VALUE val, const char *ptr, long len)
|
44
20
|
{
|
45
21
|
return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len));
|
data/ext/kcar/extconf.rb
CHANGED
@@ -5,10 +5,35 @@ dir_config("kcar")
|
|
5
5
|
|
6
6
|
have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h")
|
7
7
|
have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h")
|
8
|
-
have_func("rb_str_set_len", "ruby.h")
|
9
|
-
have_func("rb_str_modify", "ruby.h")
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
message('checking if String#-@ (str_uminus) dedupes... ')
|
10
|
+
begin
|
11
|
+
a = -(%w(t e s t).join)
|
12
|
+
b = -(%w(t e s t).join)
|
13
|
+
if a.equal?(b)
|
14
|
+
$CPPFLAGS += " -DSTR_UMINUS_DEDUPE=1 "
|
15
|
+
message("yes\n")
|
16
|
+
else
|
17
|
+
$CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
|
18
|
+
message("no, needs Ruby 2.5+\n")
|
19
|
+
end
|
20
|
+
rescue NoMethodError
|
21
|
+
$CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
|
22
|
+
message("no, String#-@ not available\n")
|
14
23
|
end
|
24
|
+
|
25
|
+
message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
|
26
|
+
h = {}
|
27
|
+
x = {}
|
28
|
+
r = rand.to_s
|
29
|
+
h[%W(#{r}).join('')] = :foo
|
30
|
+
x[%W(#{r}).join('')] = :foo
|
31
|
+
if x.keys[0].equal?(h.keys[0])
|
32
|
+
$CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
|
33
|
+
message("yes\n")
|
34
|
+
else
|
35
|
+
$CPPFLAGS += ' -DHASH_ASET_DEDUPE=0 '
|
36
|
+
message("no, needs Ruby 2.6+\n")
|
37
|
+
end
|
38
|
+
|
39
|
+
create_makefile("kcar_ext")
|
data/ext/kcar/kcar.rl
CHANGED
@@ -10,10 +10,20 @@
|
|
10
10
|
#include <stdlib.h>
|
11
11
|
#include <string.h>
|
12
12
|
#include <sys/types.h>
|
13
|
+
#include <limits.h>
|
13
14
|
#include "c_util.h"
|
14
15
|
|
15
16
|
static VALUE eParserError;
|
16
|
-
static ID id_sq, id_sq_set;
|
17
|
+
static ID id_uminus, id_sq, id_sq_set;
|
18
|
+
static VALUE g_rack_url_scheme,
|
19
|
+
g_80, g_443, g_http, g_https,
|
20
|
+
g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
|
21
|
+
g_HTTP_VERSION,
|
22
|
+
g_CONTENT_LENGTH, g_CONTENT_TYPE, g_FRAGMENT,
|
23
|
+
g_PATH_INFO, g_QUERY_STRING,
|
24
|
+
g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
|
25
|
+
g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;
|
26
|
+
static VALUE e413, e414;
|
17
27
|
|
18
28
|
/** Defines common length and error messages for input length validation. */
|
19
29
|
#define DEF_MAX_LENGTH(N, length) \
|
@@ -30,58 +40,104 @@ static ID id_sq, id_sq_set;
|
|
30
40
|
rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
|
31
41
|
} while (0)
|
32
42
|
|
43
|
+
#define VALIDATE_MAX_URI_LENGTH(len, N) do { \
|
44
|
+
if (len > MAX_##N##_LENGTH) \
|
45
|
+
rb_raise(e414, MAX_##N##_LENGTH_ERR); \
|
46
|
+
} while (0)
|
47
|
+
|
33
48
|
/* Defines the maximum allowed lengths for various input elements.*/
|
34
49
|
DEF_MAX_LENGTH(FIELD_NAME, 256);
|
35
50
|
DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
|
36
51
|
DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
#define UH_FL_INBODY 0x4
|
41
|
-
#define UH_FL_HASTRAILER 0x8
|
42
|
-
#define UH_FL_INTRAILER 0x10
|
43
|
-
#define UH_FL_INCHUNK 0x20
|
44
|
-
#define UH_FL_KEEPALIVE 0x40
|
45
|
-
#define UH_FL_HASHEADER 0x80
|
52
|
+
DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
|
53
|
+
DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
|
54
|
+
DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
|
46
55
|
|
47
56
|
struct http_parser {
|
48
57
|
int cs; /* Ragel internal state */
|
49
|
-
unsigned int
|
50
|
-
|
51
|
-
|
58
|
+
unsigned int is_request:1;
|
59
|
+
unsigned int has_query:1;
|
60
|
+
unsigned int has_scheme:1;
|
61
|
+
unsigned int chunked:1;
|
62
|
+
unsigned int has_body:1;
|
63
|
+
unsigned int in_body:1;
|
64
|
+
unsigned int has_trailer:1;
|
65
|
+
unsigned int in_trailer:1;
|
66
|
+
unsigned int in_chunk:1;
|
67
|
+
unsigned int persistent:1;
|
68
|
+
unsigned int has_header:1;
|
69
|
+
unsigned int body_eof_seen:1;
|
70
|
+
unsigned int is_https:1;
|
71
|
+
unsigned int padding:19;
|
72
|
+
unsigned int mark;
|
73
|
+
unsigned int offset;
|
52
74
|
union { /* these 2 fields don't nest */
|
53
|
-
|
54
|
-
|
75
|
+
unsigned int field;
|
76
|
+
unsigned int query;
|
55
77
|
} start;
|
56
78
|
union {
|
57
|
-
|
58
|
-
|
79
|
+
unsigned int field_len; /* only used during header processing */
|
80
|
+
unsigned int dest_offset; /* only used during body processing */
|
59
81
|
} s;
|
60
82
|
VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
|
61
|
-
|
83
|
+
union {
|
84
|
+
/* String or Qnil */
|
85
|
+
VALUE status; /* status string for responses */
|
86
|
+
VALUE host; /* Host: header for requests */
|
87
|
+
} v;
|
62
88
|
union {
|
63
89
|
off_t content;
|
64
90
|
off_t chunk;
|
65
91
|
} len;
|
66
92
|
};
|
67
93
|
|
94
|
+
static unsigned int ulong2uint(unsigned long n)
|
95
|
+
{
|
96
|
+
unsigned int i = (unsigned int)n;
|
97
|
+
|
98
|
+
if (sizeof(unsigned int) != sizeof(unsigned long)) {
|
99
|
+
if ((unsigned long)i != n) {
|
100
|
+
rb_raise(rb_eRangeError, "too large to be 32-bit uint: %lu", n);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
return i;
|
104
|
+
}
|
105
|
+
|
68
106
|
#define REMAINING (unsigned long)(pe - p)
|
69
|
-
#define LEN(AT, FPC) (FPC - buffer - hp->AT)
|
70
|
-
#define MARK(M,FPC) (hp->M = (FPC) - buffer)
|
107
|
+
#define LEN(AT, FPC) (ulong2uint(FPC - buffer) - hp->AT)
|
108
|
+
#define MARK(M,FPC) (hp->M = ulong2uint((FPC) - buffer))
|
71
109
|
#define PTR_TO(F) (buffer + hp->F)
|
72
110
|
#define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
|
73
111
|
#define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))
|
74
112
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
113
|
+
/* Downcases a single ASCII character. Locale-agnostic. */
|
114
|
+
static void downcase_char(char *c)
|
115
|
+
{
|
116
|
+
if (*c >= 'A' && *c <= 'Z')
|
117
|
+
*c |= 0x20;
|
118
|
+
}
|
79
119
|
|
80
120
|
static int is_lws(char c)
|
81
121
|
{
|
82
122
|
return (c == ' ' || c == '\t');
|
83
123
|
}
|
84
124
|
|
125
|
+
/* this will dedupe under Ruby 2.5+ (December 2017) */
|
126
|
+
static VALUE str_dd_freeze(VALUE str)
|
127
|
+
{
|
128
|
+
if (STR_UMINUS_DEDUPE)
|
129
|
+
return rb_funcall(str, id_uminus, 0);
|
130
|
+
|
131
|
+
/* freeze,since it speeds up MRI slightly */
|
132
|
+
OBJ_FREEZE(str);
|
133
|
+
return str;
|
134
|
+
}
|
135
|
+
|
136
|
+
static VALUE str_new_dd_freeze(const char *ptr, long len)
|
137
|
+
{
|
138
|
+
return str_dd_freeze(rb_str_new(ptr, len));
|
139
|
+
}
|
140
|
+
|
85
141
|
static VALUE stripped_str_new(const char *str, long len)
|
86
142
|
{
|
87
143
|
long end;
|
@@ -91,10 +147,69 @@ static VALUE stripped_str_new(const char *str, long len)
|
|
91
147
|
return rb_str_new(str, end + 1);
|
92
148
|
}
|
93
149
|
|
94
|
-
static
|
150
|
+
static VALUE request_host_val(struct http_parser *hp)
|
151
|
+
{
|
152
|
+
assert(hp->is_request == 1 && "not a request");
|
153
|
+
return NIL_P(hp->v.host) ? Qfalse : hp->v.host;
|
154
|
+
}
|
155
|
+
|
156
|
+
static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
|
157
|
+
{
|
158
|
+
char *host_ptr = RSTRING_PTR(host);
|
159
|
+
long host_len = RSTRING_LEN(host);
|
160
|
+
char *colon;
|
161
|
+
VALUE server_name = host;
|
162
|
+
VALUE server_port = hp->has_scheme ? (hp->is_https ? g_443 : g_80) : Qfalse;
|
163
|
+
|
164
|
+
if (*host_ptr == '[') { /* ipv6 address format */
|
165
|
+
char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
|
166
|
+
|
167
|
+
if (rbracket)
|
168
|
+
colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
|
169
|
+
else
|
170
|
+
colon = memchr(host_ptr + 1, ':', host_len - 1);
|
171
|
+
} else {
|
172
|
+
colon = memchr(host_ptr, ':', host_len);
|
173
|
+
}
|
174
|
+
|
175
|
+
if (colon) {
|
176
|
+
long port_start = colon - host_ptr + 1;
|
177
|
+
long port_len = host_len - port_start;
|
178
|
+
|
179
|
+
server_name = rb_str_substr(host, 0, colon - host_ptr);
|
180
|
+
server_name = str_dd_freeze(server_name);
|
181
|
+
if (port_len > 0) {
|
182
|
+
server_port = rb_str_substr(host, port_start, port_len);
|
183
|
+
server_port = str_dd_freeze(server_port);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
rb_hash_aset(env, g_SERVER_NAME, server_name);
|
187
|
+
if (server_port != Qfalse)
|
188
|
+
rb_hash_aset(env, g_SERVER_PORT, server_port);
|
189
|
+
}
|
190
|
+
|
191
|
+
static void finalize_header(struct http_parser *hp, VALUE hdr)
|
95
192
|
{
|
96
|
-
if (
|
193
|
+
if (hp->has_trailer && !hp->chunked)
|
97
194
|
rb_raise(eParserError, "trailer but not chunked");
|
195
|
+
if (hp->is_request) {
|
196
|
+
if (hp->chunked) {
|
197
|
+
if (hp->len.chunk >= 0)
|
198
|
+
rb_raise(eParserError, "Content-Length set with chunked encoding");
|
199
|
+
else
|
200
|
+
hp->len.chunk = 0;
|
201
|
+
} else if (hp->len.content < 0) {
|
202
|
+
hp->len.content = 0;
|
203
|
+
}
|
204
|
+
|
205
|
+
if (!hp->has_query)
|
206
|
+
rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
|
207
|
+
if (hp->has_header) {
|
208
|
+
VALUE host = request_host_val(hp);
|
209
|
+
if (host != Qfalse)
|
210
|
+
set_server_vars(hp, hdr, host);
|
211
|
+
}
|
212
|
+
}
|
98
213
|
}
|
99
214
|
|
100
215
|
/*
|
@@ -107,28 +222,116 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
|
|
107
222
|
/* REQUEST_METHOD is always set before any headers */
|
108
223
|
if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
|
109
224
|
/* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
|
110
|
-
|
225
|
+
hp->persistent = 1;
|
111
226
|
} else if (STR_CSTR_CASE_EQ(val, "close")) {
|
112
227
|
/*
|
113
228
|
* it doesn't matter what HTTP version or request method we have,
|
114
229
|
* if a server says "Connection: close", we disable keepalive
|
115
230
|
*/
|
116
|
-
|
231
|
+
hp->persistent = 0;
|
117
232
|
} else {
|
118
233
|
/*
|
119
234
|
* server could've sent anything, ignore it for now. Maybe
|
120
|
-
* "
|
235
|
+
* "hp->persistent = 0;" just in case?
|
121
236
|
* Raising an exception might be too mean...
|
122
237
|
*/
|
123
238
|
}
|
124
239
|
}
|
125
240
|
|
126
241
|
static void
|
127
|
-
|
242
|
+
request_method(VALUE env, const char *ptr, size_t len)
|
243
|
+
{
|
244
|
+
rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
|
245
|
+
}
|
246
|
+
|
247
|
+
static void
|
248
|
+
url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
|
249
|
+
{
|
250
|
+
VALUE val;
|
251
|
+
|
252
|
+
hp->has_scheme = 1;
|
253
|
+
/* Ragel machine downcases and enforces this as "http" or "https" */
|
254
|
+
if (len == 5) {
|
255
|
+
hp->is_https = 1;
|
256
|
+
assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
|
257
|
+
val = g_https;
|
258
|
+
} else {
|
259
|
+
assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
|
260
|
+
val = g_http;
|
261
|
+
}
|
262
|
+
rb_hash_aset(env, g_rack_url_scheme, val);
|
263
|
+
}
|
264
|
+
|
265
|
+
static void
|
266
|
+
request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
|
267
|
+
{
|
268
|
+
VALUE val = rb_str_new(ptr, len);
|
269
|
+
|
270
|
+
rb_hash_aset(env, g_HTTP_HOST, val);
|
271
|
+
hp->v.host = val;
|
272
|
+
}
|
273
|
+
|
274
|
+
static void
|
275
|
+
set_fragment(VALUE env, const char *ptr, size_t len)
|
276
|
+
{
|
277
|
+
VALUE val = rb_str_new(ptr, len);
|
278
|
+
rb_hash_aset(env, g_FRAGMENT, val);
|
279
|
+
}
|
280
|
+
|
281
|
+
static void
|
282
|
+
request_uri(VALUE env, const char *ptr, size_t len)
|
283
|
+
{
|
284
|
+
VALUE val;
|
285
|
+
|
286
|
+
VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
|
287
|
+
val = rb_str_new(ptr, len);
|
288
|
+
rb_hash_aset(env, g_REQUEST_URI, val);
|
289
|
+
|
290
|
+
/*
|
291
|
+
* rack says PATH_INFO must start with "/" or be empty,
|
292
|
+
* but "OPTIONS *" is a valid request
|
293
|
+
*/
|
294
|
+
if (CONST_MEM_EQ("*", ptr, len)) {
|
295
|
+
val = rb_str_new(NULL, 0);
|
296
|
+
rb_hash_aset(env, g_PATH_INFO, val);
|
297
|
+
rb_hash_aset(env, g_REQUEST_PATH, val);
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
static void
|
302
|
+
query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
|
303
|
+
{
|
304
|
+
VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
|
305
|
+
|
306
|
+
hp->has_query = 1;
|
307
|
+
rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
|
308
|
+
}
|
309
|
+
|
310
|
+
static void
|
311
|
+
request_path(VALUE env, const char *ptr, size_t len)
|
312
|
+
{
|
313
|
+
VALUE val;
|
314
|
+
|
315
|
+
VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
|
316
|
+
val = rb_str_new(ptr, len);
|
317
|
+
|
318
|
+
rb_hash_aset(env, g_REQUEST_PATH, val);
|
319
|
+
rb_hash_aset(env, g_PATH_INFO, val);
|
320
|
+
}
|
321
|
+
|
322
|
+
static void
|
323
|
+
http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
|
128
324
|
{
|
129
325
|
if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
|
130
326
|
/* HTTP/1.1 implies keepalive unless "Connection: close" is set */
|
131
|
-
|
327
|
+
hp->persistent = 1;
|
328
|
+
}
|
329
|
+
if (hp->is_request) {
|
330
|
+
VALUE v = str_new_dd_freeze(ptr, len);
|
331
|
+
hp->has_header = 1;
|
332
|
+
|
333
|
+
rb_hash_aset(env, g_SERVER_PROTOCOL, v);
|
334
|
+
rb_hash_aset(env, g_HTTP_VERSION, v);
|
132
335
|
}
|
133
336
|
}
|
134
337
|
|
@@ -137,21 +340,21 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
|
|
137
340
|
{
|
138
341
|
long nr;
|
139
342
|
|
140
|
-
hp->status =
|
343
|
+
hp->v.status = str_new_dd_freeze(ptr, len);
|
141
344
|
|
142
345
|
/* RSTRING_PTR is null terminated, ptr is not */
|
143
|
-
nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
|
346
|
+
nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);
|
144
347
|
|
145
348
|
if (nr < 100 || nr > 999)
|
146
|
-
rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
|
349
|
+
rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));
|
147
350
|
|
148
351
|
if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
|
149
|
-
|
352
|
+
hp->has_body = 1;
|
150
353
|
}
|
151
354
|
|
152
355
|
static inline void invalid_if_trailer(struct http_parser *hp)
|
153
356
|
{
|
154
|
-
if (
|
357
|
+
if (hp->in_trailer)
|
155
358
|
rb_raise(eParserError, "invalid Trailer");
|
156
359
|
}
|
157
360
|
|
@@ -185,15 +388,15 @@ static void write_cont_value(struct http_parser *hp,
|
|
185
388
|
|
186
389
|
/* normalize tab to space */
|
187
390
|
if (cont_len > 0) {
|
188
|
-
assert((
|
391
|
+
assert(is_lws(*vptr) && "invalid leading white space");
|
189
392
|
*vptr = ' ';
|
190
393
|
}
|
191
394
|
for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--);
|
192
395
|
rb_str_buf_cat(hp->cont, vptr, end + 1);
|
193
396
|
}
|
194
397
|
|
195
|
-
static void
|
196
|
-
|
398
|
+
static void write_response_value(struct http_parser *hp, VALUE hdr,
|
399
|
+
const char *buffer, const char *p)
|
197
400
|
{
|
198
401
|
VALUE f, v;
|
199
402
|
VALUE hclass;
|
@@ -202,7 +405,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
202
405
|
const char *vptr;
|
203
406
|
size_t vlen;
|
204
407
|
|
205
|
-
|
408
|
+
hp->has_header = 1;
|
206
409
|
|
207
410
|
/* Rack does not like Status headers, so we never send them */
|
208
411
|
if (CSTR_CASE_EQ(fptr, flen, "status")) {
|
@@ -214,7 +417,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
214
417
|
vlen = LEN(mark, p);
|
215
418
|
VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
|
216
419
|
VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
|
217
|
-
f =
|
420
|
+
f = str_new_dd_freeze(fptr, (long)flen);
|
218
421
|
v = stripped_str_new(vptr, (long)vlen);
|
219
422
|
|
220
423
|
/* needs more tests for error-checking here */
|
@@ -226,9 +429,9 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
226
429
|
if (STR_CSTR_CASE_EQ(f, "connection")) {
|
227
430
|
hp_keepalive_connection(hp, v);
|
228
431
|
} else if (STR_CSTR_CASE_EQ(f, "content-length")) {
|
229
|
-
if (!
|
432
|
+
if (!hp->has_body)
|
230
433
|
rb_raise(eParserError, "Content-Length with no body expected");
|
231
|
-
if (
|
434
|
+
if (hp->chunked)
|
232
435
|
rb_raise(eParserError,
|
233
436
|
"Content-Length when chunked Transfer-Encoding is set");
|
234
437
|
hp->len.content = parse_length(vptr, vlen);
|
@@ -239,7 +442,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
239
442
|
invalid_if_trailer(hp);
|
240
443
|
} else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
|
241
444
|
if (STR_CSTR_CASE_EQ(v, "chunked")) {
|
242
|
-
if (!
|
445
|
+
if (!hp->has_body)
|
243
446
|
rb_raise(eParserError,
|
244
447
|
"chunked Transfer-Encoding with no body expected");
|
245
448
|
if (hp->len.content >= 0)
|
@@ -247,13 +450,13 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
247
450
|
"chunked Transfer-Encoding when Content-Length is set");
|
248
451
|
|
249
452
|
hp->len.chunk = 0;
|
250
|
-
|
453
|
+
hp->chunked = 1;
|
251
454
|
}
|
252
455
|
invalid_if_trailer(hp);
|
253
456
|
} else if (STR_CSTR_CASE_EQ(f, "trailer")) {
|
254
|
-
if (!
|
457
|
+
if (!hp->has_body)
|
255
458
|
rb_raise(eParserError, "trailer with no body");
|
256
|
-
|
459
|
+
hp->has_trailer = 1;
|
257
460
|
invalid_if_trailer(hp);
|
258
461
|
}
|
259
462
|
|
@@ -272,9 +475,6 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
272
475
|
e = rb_funcall(hdr, id_sq, 1, f);
|
273
476
|
|
274
477
|
if (NIL_P(e)) {
|
275
|
-
/* new value, freeze it since it speeds up MRI slightly */
|
276
|
-
OBJ_FREEZE(f);
|
277
|
-
|
278
478
|
if (hclass == rb_cHash)
|
279
479
|
rb_hash_aset(hdr, f, v);
|
280
480
|
else
|
@@ -295,6 +495,112 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
295
495
|
}
|
296
496
|
}
|
297
497
|
|
498
|
+
static VALUE req_field(const char *ptr, size_t len)
|
499
|
+
{
|
500
|
+
size_t pfxlen = sizeof("HTTP_") - 1;
|
501
|
+
VALUE str = rb_str_new(NULL, pfxlen + len);
|
502
|
+
char *dst = RSTRING_PTR(str);
|
503
|
+
|
504
|
+
memcpy(dst, "HTTP_", pfxlen);
|
505
|
+
memcpy(dst + pfxlen, ptr, len);
|
506
|
+
assert(*(dst + RSTRING_LEN(str)) == '\0' &&
|
507
|
+
"string didn't end with \\0"); /* paranoia */
|
508
|
+
|
509
|
+
return str;
|
510
|
+
}
|
511
|
+
|
512
|
+
static void snake_upcase(char *ptr, size_t len)
|
513
|
+
{
|
514
|
+
char *c;
|
515
|
+
|
516
|
+
for (c = ptr; len--; c++) {
|
517
|
+
if (*c >= 'a' && *c <= 'z')
|
518
|
+
*c &= ~0x20;
|
519
|
+
else if (*c == '-')
|
520
|
+
*c = '_';
|
521
|
+
}
|
522
|
+
}
|
523
|
+
|
524
|
+
static void write_request_value(struct http_parser *hp, VALUE env,
|
525
|
+
char *buffer, const char *p)
|
526
|
+
{
|
527
|
+
char *fptr = PTR_TO(start.field);
|
528
|
+
size_t flen = hp->s.field_len;
|
529
|
+
char *vptr = PTR_TO(mark);
|
530
|
+
size_t vlen = LEN(mark, p);
|
531
|
+
VALUE key, val;
|
532
|
+
VALUE existing;
|
533
|
+
|
534
|
+
VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
|
535
|
+
VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
|
536
|
+
snake_upcase(fptr, flen);
|
537
|
+
|
538
|
+
/*
|
539
|
+
* ignore "Version" headers since they conflict with the HTTP_VERSION
|
540
|
+
* rack env variable.
|
541
|
+
*/
|
542
|
+
if (CONST_MEM_EQ("VERSION", fptr, flen)) {
|
543
|
+
hp->cont = Qnil;
|
544
|
+
return;
|
545
|
+
}
|
546
|
+
val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);
|
547
|
+
|
548
|
+
if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
|
549
|
+
key = g_HTTP_CONNECTION;
|
550
|
+
hp_keepalive_connection(hp, val);
|
551
|
+
} else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
|
552
|
+
key = g_CONTENT_LENGTH;
|
553
|
+
hp->len.content = parse_length(vptr, vlen);
|
554
|
+
if (hp->len.content < 0)
|
555
|
+
rb_raise(eParserError, "invalid Content-Length");
|
556
|
+
if (hp->len.content != 0)
|
557
|
+
hp->has_body = 1;
|
558
|
+
invalid_if_trailer(hp);
|
559
|
+
} else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
|
560
|
+
key = g_CONTENT_TYPE;
|
561
|
+
} else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
|
562
|
+
key = g_HTTP_TRANSFER_ENCODING;
|
563
|
+
if (STR_CSTR_CASE_EQ(val, "chunked")) {
|
564
|
+
hp->chunked = 1;
|
565
|
+
hp->has_body = 1;
|
566
|
+
}
|
567
|
+
invalid_if_trailer(hp);
|
568
|
+
} else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
|
569
|
+
key = g_HTTP_TRAILER;
|
570
|
+
hp->has_trailer = 1;
|
571
|
+
invalid_if_trailer(hp);
|
572
|
+
} else if (CONST_MEM_EQ("HOST", fptr, flen)) {
|
573
|
+
key = g_HTTP_HOST;
|
574
|
+
if (NIL_P(hp->v.host))
|
575
|
+
hp->v.host = val;
|
576
|
+
} else {
|
577
|
+
key = req_field(fptr, flen);
|
578
|
+
if (!HASH_ASET_DEDUPE)
|
579
|
+
key = str_dd_freeze(key);
|
580
|
+
}
|
581
|
+
existing = rb_hash_aref(env, key);
|
582
|
+
if (NIL_P(existing)) {
|
583
|
+
hp->cont = rb_hash_aset(env, key, val);
|
584
|
+
/*
|
585
|
+
* Ignore repeated Host headers and favor host set by absolute URIs.
|
586
|
+
* absoluteURI Request-URI takes precedence over
|
587
|
+
* the Host: header (ref: rfc 2616, section 5.2.1)
|
588
|
+
*/
|
589
|
+
} else if (key == g_HTTP_HOST) {
|
590
|
+
hp->cont = Qnil;
|
591
|
+
} else {
|
592
|
+
rb_str_buf_cat(existing, ",", 1);
|
593
|
+
hp->cont = rb_str_buf_append(existing, val);
|
594
|
+
}
|
595
|
+
}
|
596
|
+
|
597
|
+
static void write_value(struct http_parser *hp, VALUE hdr,
|
598
|
+
char *buf, const char *p)
|
599
|
+
{
|
600
|
+
hp->is_request ? write_request_value(hp, hdr, buf, p) :
|
601
|
+
write_response_value(hp, hdr, buf, p);
|
602
|
+
}
|
603
|
+
|
298
604
|
/** Machine **/
|
299
605
|
|
300
606
|
%%{
|
@@ -302,10 +608,22 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
302
608
|
|
303
609
|
action mark {MARK(mark, fpc); }
|
304
610
|
|
611
|
+
action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
|
612
|
+
action downcase_char { downcase_char(deconst(fpc)); }
|
613
|
+
action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
614
|
+
action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
615
|
+
action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
616
|
+
action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
617
|
+
action fragment { set_fragment(hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
618
|
+
action start_query { MARK(start.query, fpc); }
|
619
|
+
action query_string {
|
620
|
+
query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
|
621
|
+
}
|
622
|
+
action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
305
623
|
action start_field { MARK(start.field, fpc); }
|
306
624
|
action write_field { hp->s.field_len = LEN(start.field, fpc); }
|
307
625
|
action start_value { MARK(mark, fpc); }
|
308
|
-
action write_value { write_value(
|
626
|
+
action write_value { write_value(hp, hdr, buffer, fpc); }
|
309
627
|
action write_cont_value { write_cont_value(hp, buffer, fpc); }
|
310
628
|
action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
311
629
|
action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
|
@@ -316,10 +634,10 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
316
634
|
rb_raise(eParserError, "invalid chunk size");
|
317
635
|
}
|
318
636
|
action header_done {
|
319
|
-
finalize_header(hp);
|
637
|
+
finalize_header(hp, hdr);
|
320
638
|
cs = http_parser_first_final;
|
321
639
|
|
322
|
-
if (
|
640
|
+
if (hp->chunked)
|
323
641
|
cs = http_parser_en_ChunkedBody;
|
324
642
|
|
325
643
|
/*
|
@@ -335,7 +653,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
335
653
|
}
|
336
654
|
|
337
655
|
action end_chunked_body {
|
338
|
-
|
656
|
+
hp->in_trailer = 1;
|
339
657
|
cs = http_parser_en_Trailers;
|
340
658
|
++p;
|
341
659
|
assert(p <= pe && "buffer overflow after chunked body");
|
@@ -351,7 +669,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
|
|
351
669
|
p += nr;
|
352
670
|
assert(hp->len.chunk >= 0 && "negative chunk length");
|
353
671
|
if ((size_t)hp->len.chunk > REMAINING) {
|
354
|
-
|
672
|
+
hp->in_chunk = 1;
|
355
673
|
goto post_exec;
|
356
674
|
} else {
|
357
675
|
fhold;
|
@@ -370,7 +688,7 @@ static void http_parser_init(struct http_parser *hp)
|
|
370
688
|
int cs = 0;
|
371
689
|
memset(hp, 0, sizeof(struct http_parser));
|
372
690
|
hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
|
373
|
-
hp->status = Qnil;
|
691
|
+
hp->v.status = Qnil;
|
374
692
|
hp->len.content = -1;
|
375
693
|
%% write init;
|
376
694
|
hp->cs = cs;
|
@@ -395,43 +713,55 @@ static void http_parser_execute(struct http_parser *hp,
|
|
395
713
|
assert((void *)(pe - p) == (void *)(len - off) &&
|
396
714
|
"pointers aren't same distance");
|
397
715
|
|
398
|
-
if (
|
399
|
-
|
716
|
+
if (hp->in_chunk) {
|
717
|
+
hp->in_chunk = 0;
|
400
718
|
goto skip_chunk_data_hack;
|
401
719
|
}
|
402
720
|
%% write exec;
|
403
721
|
post_exec: /* "_out:" also goes here */
|
404
722
|
if (hp->cs != http_parser_error)
|
405
723
|
hp->cs = cs;
|
406
|
-
hp->offset = p - buffer;
|
724
|
+
hp->offset = ulong2uint(p - buffer);
|
407
725
|
|
408
726
|
assert(p <= pe && "buffer overflow after parsing execute");
|
409
727
|
assert(hp->offset <= len && "offset longer than length");
|
410
728
|
}
|
411
729
|
|
412
|
-
static
|
730
|
+
static void kcar_mark(void *ptr)
|
413
731
|
{
|
414
|
-
struct http_parser *hp;
|
732
|
+
struct http_parser *hp = ptr;
|
415
733
|
|
416
|
-
|
417
|
-
|
418
|
-
return hp;
|
734
|
+
rb_gc_mark(hp->cont);
|
735
|
+
rb_gc_mark(hp->v.status);
|
419
736
|
}
|
420
737
|
|
421
|
-
static
|
738
|
+
static size_t kcar_memsize(const void *ptr)
|
422
739
|
{
|
423
|
-
struct http_parser
|
740
|
+
return sizeof(struct http_parser);
|
741
|
+
}
|
424
742
|
|
425
|
-
|
426
|
-
|
743
|
+
static const rb_data_type_t kcar_type = {
|
744
|
+
"kcar_parser",
|
745
|
+
{ kcar_mark, RUBY_TYPED_DEFAULT_FREE, kcar_memsize, /* reserved */ },
|
746
|
+
/* parent, data, [ flags ] */
|
747
|
+
};
|
748
|
+
|
749
|
+
static VALUE kcar_alloc(VALUE klass)
|
750
|
+
{
|
751
|
+
struct http_parser *hp;
|
752
|
+
return TypedData_Make_Struct(klass, struct http_parser, &kcar_type, hp);
|
427
753
|
}
|
428
754
|
|
429
|
-
static
|
755
|
+
static struct http_parser *data_get(VALUE self)
|
430
756
|
{
|
431
757
|
struct http_parser *hp;
|
432
|
-
|
758
|
+
|
759
|
+
TypedData_Get_Struct(self, struct http_parser, &kcar_type, hp);
|
760
|
+
assert(hp && "failed to extract http_parser struct");
|
761
|
+
return hp;
|
433
762
|
}
|
434
763
|
|
764
|
+
|
435
765
|
/**
|
436
766
|
* call-seq:
|
437
767
|
* Kcar::Parser.new => parser
|
@@ -485,7 +815,7 @@ static VALUE body_bytes_left(VALUE self)
|
|
485
815
|
{
|
486
816
|
struct http_parser *hp = data_get(self);
|
487
817
|
|
488
|
-
if (
|
818
|
+
if (hp->chunked)
|
489
819
|
return Qnil;
|
490
820
|
if (hp->len.content >= 0)
|
491
821
|
return OFFT2NUM(hp->len.content);
|
@@ -505,9 +835,11 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
|
|
505
835
|
{
|
506
836
|
struct http_parser *hp = data_get(self);
|
507
837
|
|
508
|
-
if (
|
838
|
+
if (hp->chunked)
|
509
839
|
rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
|
510
840
|
hp->len.content = NUM2OFFT(bytes);
|
841
|
+
if (hp->len.content == 0)
|
842
|
+
hp->body_eof_seen = 1;
|
511
843
|
return bytes;
|
512
844
|
}
|
513
845
|
|
@@ -522,7 +854,30 @@ static VALUE chunked(VALUE self)
|
|
522
854
|
{
|
523
855
|
struct http_parser *hp = data_get(self);
|
524
856
|
|
525
|
-
return
|
857
|
+
return hp->chunked ? Qtrue : Qfalse;
|
858
|
+
}
|
859
|
+
|
860
|
+
static void check_buffer_size(long dlen)
|
861
|
+
{
|
862
|
+
if ((uint64_t)dlen > UINT_MAX)
|
863
|
+
rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);
|
864
|
+
}
|
865
|
+
|
866
|
+
static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf)
|
867
|
+
{
|
868
|
+
char *ptr;
|
869
|
+
long len;
|
870
|
+
|
871
|
+
Check_Type(buf, T_STRING);
|
872
|
+
rb_str_modify(buf);
|
873
|
+
ptr = RSTRING_PTR(buf);
|
874
|
+
len = RSTRING_LEN(buf);
|
875
|
+
check_buffer_size(len);
|
876
|
+
|
877
|
+
http_parser_execute(hp, hdr, ptr, len);
|
878
|
+
|
879
|
+
if (hp->cs == http_parser_error)
|
880
|
+
rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
|
526
881
|
}
|
527
882
|
|
528
883
|
/**
|
@@ -541,28 +896,49 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
|
|
541
896
|
{
|
542
897
|
struct http_parser *hp = data_get(self);
|
543
898
|
|
544
|
-
|
899
|
+
if (hp->is_request)
|
900
|
+
rb_raise(rb_eRuntimeError, "parser is handling a request, not response");
|
901
|
+
|
902
|
+
parser_execute(hp, hdr, data);
|
545
903
|
VALIDATE_MAX_LENGTH(hp->offset, HEADER);
|
546
904
|
|
547
905
|
if (hp->cs == http_parser_first_final ||
|
548
906
|
hp->cs == http_parser_en_ChunkedBody) {
|
549
907
|
advance_str(data, hp->offset + 1);
|
550
908
|
hp->offset = 0;
|
551
|
-
if (
|
909
|
+
if (hp->in_trailer)
|
552
910
|
return hdr;
|
553
911
|
else
|
554
|
-
return rb_ary_new3(2, hp->status, hdr);
|
912
|
+
return rb_ary_new3(2, hp->v.status, hdr);
|
555
913
|
}
|
556
914
|
|
557
|
-
if (hp->cs == http_parser_error)
|
558
|
-
rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
|
559
|
-
|
560
915
|
return Qnil;
|
561
916
|
}
|
562
917
|
|
918
|
+
static VALUE request(VALUE self, VALUE env, VALUE buf)
|
919
|
+
{
|
920
|
+
struct http_parser *hp = data_get(self);
|
921
|
+
|
922
|
+
hp->is_request = 1;
|
923
|
+
Check_Type(buf, T_STRING);
|
924
|
+
parser_execute(hp, env, buf);
|
925
|
+
|
926
|
+
if (hp->cs == http_parser_first_final ||
|
927
|
+
hp->cs == http_parser_en_ChunkedBody) {
|
928
|
+
advance_str(buf, hp->offset + 1);
|
929
|
+
hp->offset = 0;
|
930
|
+
if (hp->in_trailer)
|
931
|
+
hp->body_eof_seen = 1;
|
932
|
+
|
933
|
+
return env;
|
934
|
+
}
|
935
|
+
return Qnil; /* incomplete */
|
936
|
+
}
|
937
|
+
|
938
|
+
|
563
939
|
static int chunked_eof(struct http_parser *hp)
|
564
940
|
{
|
565
|
-
return ((hp->cs == http_parser_first_final) ||
|
941
|
+
return ((hp->cs == http_parser_first_final) || hp->in_trailer);
|
566
942
|
}
|
567
943
|
|
568
944
|
/**
|
@@ -576,13 +952,13 @@ static VALUE body_eof(VALUE self)
|
|
576
952
|
{
|
577
953
|
struct http_parser *hp = data_get(self);
|
578
954
|
|
579
|
-
if (!
|
955
|
+
if (!hp->has_header && hp->persistent)
|
580
956
|
return Qtrue;
|
581
957
|
|
582
|
-
if (
|
958
|
+
if (hp->chunked)
|
583
959
|
return chunked_eof(hp) ? Qtrue : Qfalse;
|
584
960
|
|
585
|
-
if (!
|
961
|
+
if (!hp->has_body)
|
586
962
|
return Qtrue;
|
587
963
|
|
588
964
|
return hp->len.content == 0 ? Qtrue : Qfalse;
|
@@ -604,10 +980,14 @@ static VALUE keepalive(VALUE self)
|
|
604
980
|
{
|
605
981
|
struct http_parser *hp = data_get(self);
|
606
982
|
|
607
|
-
if (
|
608
|
-
if (
|
609
|
-
if (
|
610
|
-
|
983
|
+
if (hp->persistent) {
|
984
|
+
if (hp->has_header && hp->has_body) {
|
985
|
+
if (hp->chunked || (hp->len.content >= 0)) {
|
986
|
+
if (!hp->is_request)
|
987
|
+
return Qtrue;
|
988
|
+
else
|
989
|
+
return hp->body_eof_seen ? Qtrue : Qfalse;
|
990
|
+
}
|
611
991
|
|
612
992
|
/* unknown Content-Length and not chunked, we must assume close */
|
613
993
|
return Qfalse;
|
@@ -621,54 +1001,77 @@ static VALUE keepalive(VALUE self)
|
|
621
1001
|
|
622
1002
|
/**
|
623
1003
|
* call-seq:
|
624
|
-
* parser.filter_body(
|
1004
|
+
* parser.filter_body(dst, src) => nil/dst
|
625
1005
|
*
|
626
|
-
* Takes a String of +
|
627
|
-
* Returns +nil+ if there is more
|
628
|
-
* +
|
629
|
-
* it may modify +
|
1006
|
+
* Takes a String of +src+, will modify src if dechunking is done.
|
1007
|
+
* Returns +nil+ if there is more +src+ left to process. Returns
|
1008
|
+
* +dst+ if body processing is complete. When returning +dst+,
|
1009
|
+
* it may modify +src+ so the start of the string points to where
|
630
1010
|
* the body ended so that trailer processing can begin.
|
631
1011
|
*
|
632
1012
|
* Raises ParserError if there are dechunking errors.
|
633
|
-
* Basically this is a glorified memcpy(3) that copies +
|
634
|
-
* into +
|
1013
|
+
* Basically this is a glorified memcpy(3) that copies +src+
|
1014
|
+
* into +dst+ while filtering it through the dechunker.
|
635
1015
|
*/
|
636
|
-
static VALUE filter_body(VALUE self, VALUE
|
1016
|
+
static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
|
637
1017
|
{
|
638
1018
|
struct http_parser *hp = data_get(self);
|
639
|
-
char *
|
640
|
-
long
|
1019
|
+
char *sptr;
|
1020
|
+
long slen;
|
641
1021
|
|
642
|
-
|
643
|
-
|
1022
|
+
sptr = RSTRING_PTR(src);
|
1023
|
+
slen = RSTRING_LEN(src);
|
1024
|
+
check_buffer_size(slen);
|
644
1025
|
|
645
|
-
StringValue(
|
646
|
-
rb_str_modify(
|
647
|
-
|
648
|
-
OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
|
1026
|
+
StringValue(dst);
|
1027
|
+
rb_str_modify(dst);
|
1028
|
+
OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
|
649
1029
|
|
650
|
-
|
1030
|
+
/*
|
1031
|
+
* for now, only support filter_body for identity requests,
|
1032
|
+
* not responses; it's rather inefficient to blindly memcpy
|
1033
|
+
* giant request bodies; on the other hand, it simplifies
|
1034
|
+
* server-side code.
|
1035
|
+
*/
|
1036
|
+
if (hp->is_request && !hp->chunked) {
|
1037
|
+
/* no need to enter the Ragel machine for unchunked transfers */
|
1038
|
+
assert(hp->len.content >= 0 && "negative Content-Length");
|
1039
|
+
if (hp->len.content > 0) {
|
1040
|
+
long nr = MIN(slen, hp->len.content);
|
1041
|
+
|
1042
|
+
rb_str_resize(dst, nr);
|
1043
|
+
memcpy(RSTRING_PTR(dst), sptr, nr);
|
1044
|
+
hp->len.content -= nr;
|
1045
|
+
if (hp->len.content == 0)
|
1046
|
+
hp->body_eof_seen = 1;
|
1047
|
+
advance_str(src, nr);
|
1048
|
+
}
|
1049
|
+
return dst;
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
if (!hp->chunked)
|
651
1053
|
rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
|
652
1054
|
|
1055
|
+
rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
|
653
1056
|
if (!chunked_eof(hp)) {
|
654
1057
|
hp->s.dest_offset = 0;
|
655
|
-
http_parser_execute(hp,
|
1058
|
+
http_parser_execute(hp, dst, sptr, slen);
|
656
1059
|
if (hp->cs == http_parser_error)
|
657
1060
|
rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
|
658
1061
|
|
659
1062
|
assert(hp->s.dest_offset <= hp->offset &&
|
660
1063
|
"destination buffer overflow");
|
661
|
-
advance_str(
|
662
|
-
rb_str_set_len(
|
1064
|
+
advance_str(src, hp->offset);
|
1065
|
+
rb_str_set_len(dst, hp->s.dest_offset);
|
663
1066
|
|
664
|
-
if (RSTRING_LEN(
|
1067
|
+
if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
|
665
1068
|
assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
|
666
1069
|
} else {
|
667
|
-
|
1070
|
+
dst = Qnil;
|
668
1071
|
}
|
669
1072
|
}
|
670
1073
|
hp->offset = 0; /* for trailer parsing */
|
671
|
-
return
|
1074
|
+
return dst;
|
672
1075
|
}
|
673
1076
|
|
674
1077
|
void Init_kcar_ext(void)
|
@@ -676,11 +1079,21 @@ void Init_kcar_ext(void)
|
|
676
1079
|
VALUE mKcar = rb_define_module("Kcar");
|
677
1080
|
VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
|
678
1081
|
|
1082
|
+
/*
|
1083
|
+
* Document-class: Kcar::ParserError
|
1084
|
+
*
|
1085
|
+
* This is raised if there are parsing errors.
|
1086
|
+
*/
|
679
1087
|
eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
|
1088
|
+
e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
|
1089
|
+
eParserError);
|
1090
|
+
e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
|
1091
|
+
eParserError);
|
680
1092
|
|
681
|
-
rb_define_alloc_func(cParser,
|
1093
|
+
rb_define_alloc_func(cParser, kcar_alloc);
|
682
1094
|
rb_define_method(cParser, "initialize", initialize, 0);
|
683
1095
|
rb_define_method(cParser, "reset", initialize, 0);
|
1096
|
+
rb_define_method(cParser, "request", request, 2);
|
684
1097
|
rb_define_method(cParser, "headers", headers, 2);
|
685
1098
|
rb_define_method(cParser, "trailers", headers, 2);
|
686
1099
|
rb_define_method(cParser, "filter_body", filter_body, 2);
|
@@ -706,4 +1119,34 @@ void Init_kcar_ext(void)
|
|
706
1119
|
rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
|
707
1120
|
id_sq = rb_intern("[]");
|
708
1121
|
id_sq_set = rb_intern("[]=");
|
1122
|
+
id_uminus = rb_intern("-@");
|
1123
|
+
|
1124
|
+
/* TODO: gperf to make a perfect hash of common strings */
|
1125
|
+
#define C(var, cstr) do { \
|
1126
|
+
var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
|
1127
|
+
rb_gc_register_mark_object((var)); \
|
1128
|
+
} while (0);
|
1129
|
+
|
1130
|
+
C(g_CONTENT_LENGTH, "CONTENT_LENGTH");
|
1131
|
+
C(g_CONTENT_TYPE, "CONTENT_TYPE");
|
1132
|
+
C(g_FRAGMENT, "FRAGMENT");
|
1133
|
+
C(g_HTTP_HOST, "HTTP_HOST");
|
1134
|
+
C(g_HTTP_CONNECTION, "HTTP_CONNECTION");
|
1135
|
+
C(g_HTTP_TRAILER, "HTTP_TRAILER");
|
1136
|
+
C(g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
|
1137
|
+
C(g_HTTP_VERSION, "HTTP_VERSION");
|
1138
|
+
C(g_PATH_INFO, "PATH_INFO");
|
1139
|
+
C(g_QUERY_STRING, "QUERY_STRING");
|
1140
|
+
C(g_REQUEST_METHOD, "REQUEST_METHOD");
|
1141
|
+
C(g_REQUEST_PATH, "REQUEST_PATH");
|
1142
|
+
C(g_REQUEST_URI, "REQUEST_URI");
|
1143
|
+
C(g_SERVER_NAME, "SERVER_NAME");
|
1144
|
+
C(g_SERVER_PORT, "SERVER_PORT");
|
1145
|
+
C(g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
|
1146
|
+
C(g_rack_url_scheme, "rack.url_scheme");
|
1147
|
+
C(g_http, "http");
|
1148
|
+
C(g_https, "https");
|
1149
|
+
C(g_80, "80");
|
1150
|
+
C(g_443, "443");
|
1151
|
+
#undef C
|
709
1152
|
}
|