kcar 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11fd8bf65267d66726c60a780beb44140d7ce64e
4
- data.tar.gz: a6508831c72ee890f06a1a6b70858b15a3817ddb
2
+ SHA256:
3
+ metadata.gz: f7cd96d8a5fa081a4e6ac36feb24d3a737f428550da5fa589ab564c516d01820
4
+ data.tar.gz: 7383af40cdd8bc3ec9954feb4199d0f89ad04fb468633e22e515f6dd70ffc075
5
5
  SHA512:
6
- metadata.gz: 7c28c23285c4c881749facc72bb8388b7e66d8231201b1b5ac87c83ddb2482d8b9324ea47d450eae0e63cdccb2b5bb2f42274ca9835276a92b0cabe7b6ab90c8
7
- data.tar.gz: 2b5ee78603d2e7e36df30e60b06bf3f701b1aa240655b8df84f71e91b0254333b587c2be955816505673b1a92eb234f0151f39ff7e6749c80e540ad9cb645821
6
+ metadata.gz: cccb634b38dc4d944f3a216c2f2acafbc563e6efa782643d6ff845f8b09e5338fe285c1d862cab8363ab1598298b6ed1f05d0509c2fa1153adb5f946be45fddb
7
+ data.tar.gz: 92f1e61ef2049429835403ddc964179fb611ace06ce33a29f20fd3659db629ae05f532bf473efd44372192e476acea23cc09347bc271d2e0bdff42a70d99add2
data/.document CHANGED
@@ -1,5 +1,6 @@
1
1
  LICENSE
2
2
  README
3
3
  NEWS
4
+ HACKING
4
5
  lib
5
6
  ext/kcar/kcar.c
@@ -1,7 +1,14 @@
1
1
  ---
2
- cgit_url: http://bogomips.org/kcar.git
3
- git_url: git://bogomips.org/kcar.git
4
- rdoc_url: http://bogomips.org/kcar/
5
- ml_url: http://bogomips.org/kcar-public/
6
- private_email: kcar@bogomips.org
7
- public_email: kcar-public@bogomips.org
2
+ cgit_url: https://yhbt.net/kcar.git
3
+ git_url: https://yhbt.net/kcar.git
4
+ rdoc_url: https://yhbt.net/kcar/
5
+ ml_url:
6
+ - https://yhbt.net/kcar-public/
7
+ - http://ou63pmih66umazou.onion/kcar-public/
8
+ public_email: kcar-public@yhbt.net
9
+ source_code:
10
+ - git clone https://yhbt.net/kcar.git
11
+ - torsocks git clone http://ou63pmih66umazou.onion/kcar.git
12
+ nntp_url:
13
+ - nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
14
+ - nntp://ou63pmih66umazou.onion/inbox.comp.lang.ruby.kcar
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  CONSTANT = "Kcar::VERSION"
3
3
  RVF = "lib/kcar/version.rb"
4
- DEF_VER = "v0.6.0"
4
+ DEF_VER = "v0.7.0"
5
5
  GVF = "GIT-VERSION-FILE"
6
6
  vn = DEF_VER
7
7
 
@@ -21,7 +21,7 @@ if File.exist?(".git")
21
21
  end
22
22
 
23
23
  vn = vn.sub!(/\Av/, "")
24
- new_ruby_version = "#{CONSTANT} = '#{vn}'\n"
24
+ new_ruby_version = "#{CONSTANT} = '#{vn}' # :nodoc:\n"
25
25
  cur_ruby_version = File.read(RVF) rescue nil
26
26
  if new_ruby_version != cur_ruby_version
27
27
  File.open(RVF, "w") { |fp| fp.write(new_ruby_version) }
@@ -1,5 +1,5 @@
1
1
  all::
2
- RSYNC_DEST := bogomips.org:/srv/bogomips/kcar
2
+ RSYNC_DEST := yhbt.net:/srv/bogomips/kcar
3
3
  RAGEL = ragel
4
4
  RLFLAGS = -G2
5
5
  rfpackage := kcar
data/HACKING ADDED
@@ -0,0 +1,36 @@
1
+ == development dependencies
2
+
3
+ * GNU make - https://www.gnu.org/software/make/
4
+ * git - https://www.git-scm.com/
5
+ * ruby - https://www.ruby-lang.org/en/
6
+
7
+ git clone https://yhbt.net/kcar.git
8
+
9
+ == tests
10
+
11
+ * make test - run each test in a separate process (parallelize using -j)
12
+
13
+ For non-GNU users, GNU make may be installed as "gmake".
14
+
15
+ == test environment
16
+
17
+ RUBY - specify an alternative ruby(1) runtime
18
+ V - set to 1 for verbose test output (may be mangled if multithreaded)
19
+
20
+ == installing from git
21
+
22
+ * make install-gem
23
+
24
+ == contact
25
+
26
+ We use git(7) and develop kcar on a public mailing list like git.git
27
+ developers do. Please send patches via git-send-email(1) to the public
28
+ mailing list at <mailto:kcar-public@yhbt.net>. Pull requests should be
29
+ formatted using git-request-pull(1).
30
+
31
+ All mail is archived publically at: https://yhbt.net/kcar-public/
32
+ and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
33
+
34
+ Anonymous contributions will always be welcome.
35
+ No subscription is necessary to post to the mailing list.
36
+ Please remember to Cc: all recipients as subscription is optional.
data/LICENSE CHANGED
@@ -1,7 +1,7 @@
1
1
  kcar is copyrighted free software by all contributors, see logs in
2
2
  revision control for names and email addresses of all of them. You can
3
3
  redistribute it and/or modify it under either the terms of the
4
- {GPLv2}[http://www.gnu.org/licenses/gpl-2.0.txt] or later or
4
+ {GPLv2}[https://www.gnu.org/licenses/gpl-2.0.txt] or later or
5
5
  the conditions below:
6
6
 
7
7
  1. You may make and give away verbatim copies of the source form of the
data/README CHANGED
@@ -7,7 +7,7 @@ regular files, FIFOs, StringIOs as well as traditional TCP sockets.
7
7
 
8
8
  == Features
9
9
 
10
- * RFC2616-compliant Ragel+C parser adapted from Unicorn and Mongrel
10
+ * RFC2616-compliant Ragel+C parser adapted from Mongrel
11
11
 
12
12
  * decodes chunked response bodies with an optional pass-through mode
13
13
  (to avoid rechunking with Rack::Chunked)
@@ -35,7 +35,7 @@ If you use RubyGems:
35
35
 
36
36
  Otherwise grab the latest tarball from:
37
37
 
38
- http://bogomips.org/kcar/files/
38
+ https://yhbt.net/kcar/files/
39
39
 
40
40
  Unpack it, and run "ruby setup.rb"
41
41
 
@@ -62,14 +62,14 @@ through the body with body.each.
62
62
 
63
63
  You can get the latest source via git from the following locations:
64
64
 
65
- git://bogomips.org/kcar.git
66
- git://repo.or.cz/kcar.git (mirror)
65
+ https://yhbt.net/kcar.git
66
+ https://repo.or.cz/kcar.git (mirror)
67
67
 
68
68
  You may browse the code from the web and download the latest snapshot
69
69
  tarballs here:
70
70
 
71
- * http://bogomips.org/cgit/kcar.git (cgit)
72
- * http://repo.or.cz/w/kcar.git (gitweb)
71
+ * https://yhbt.net/kcar.git
72
+ * https://repo.or.cz/w/kcar.git (gitweb)
73
73
 
74
74
  Inline patches (from "git format-patch") to the mailing list are
75
75
  preferred because they allow code review and comments in the reply to
@@ -83,8 +83,17 @@ don't email the git mailing list or maintainer with kcar patches.
83
83
  == Contact
84
84
 
85
85
  All feedback (bug reports, user/development discussion, patches, pull
86
- requests) go to the mailing list: mailto:kcar-public@bogomips.org
86
+ requests) go to the public mailing list: mailto:kcar-public@yhbt.net
87
+ All mail is archived publically at: https://yhbt.net/kcar-public/
88
+ and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
87
89
 
88
- Mailing list archives are available here:
90
+ Anonymous posts will always be welcome.
89
91
 
90
- http://bogomips.org/kcar-public/
92
+ No subscription is necessary to post to the mailing list;
93
+ but you may subscribe by sending a plain-text mail to:
94
+
95
+ mailto:kcar-public+subscribe@yhbt.net
96
+
97
+ Keep in mind we suck at delivering email, so using NNTP or
98
+ Atom feeds might be a better bet.
99
+ Please remember to Cc: all recipients as subscription is optional.
@@ -1,4 +1,7 @@
1
+ # This config was used to create the import used to migrate
2
+ # mailing list archives from gmane to https://yhbt.net/kcar-public/
3
+
1
4
  # group_name max expire headers_only
2
5
  gmane.comp.lang.ruby.kcar.general 1000000000 1000000000 0
3
6
 
4
- # usage: slrnpull -d $PWD -h news.gmane.org --no-post
7
+ # usage: slrnpull -d $PWD -h news.gmane.io --no-post
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * Generic C functions and macros go here, there are no dependencies
3
- * on Unicorn internal structures or the Ruby C API in here.
3
+ * on kcar internal structures or the Ruby C API in here.
4
4
  */
5
5
 
6
6
  #ifndef UH_util_h
@@ -49,7 +49,7 @@ static int hexchar2int(int xdigit)
49
49
  if (xdigit >= 'a' && xdigit <= 'f')
50
50
  return xdigit - 'a' + 10;
51
51
 
52
- /* Ragel already does runtime range checking for us in Unicorn: */
52
+ /* Ragel already does runtime range checking for us */
53
53
  assert(xdigit >= '0' && xdigit <= '9' && "invalid digit character");
54
54
 
55
55
  return xdigit - '0';
@@ -1,26 +1,6 @@
1
1
  #ifndef ext_help_h
2
2
  #define ext_help_h
3
3
 
4
- #ifndef RSTRING_PTR
5
- #define RSTRING_PTR(s) (RSTRING(s)->ptr)
6
- #endif /* !defined(RSTRING_PTR) */
7
- #ifndef RSTRING_LEN
8
- #define RSTRING_LEN(s) (RSTRING(s)->len)
9
- #endif /* !defined(RSTRING_LEN) */
10
-
11
- #ifndef HAVE_RB_STR_SET_LEN
12
- # ifdef RUBINIUS
13
- # error we should never get here with current Rubinius (1.x)
14
- # endif
15
- /* this is taken from Ruby 1.8.7, 1.8.6 may not have it */
16
- static void rb_18_str_set_len(VALUE str, long len)
17
- {
18
- RSTRING(str)->len = len;
19
- RSTRING(str)->ptr[len] = '\0';
20
- }
21
- # define rb_str_set_len(str,len) rb_18_str_set_len(str,len)
22
- #endif /* !defined(HAVE_RB_STR_SET_LEN) */
23
-
24
4
  /* not all Ruby implementations support frozen objects (Rubinius does not) */
25
5
  #if defined(OBJ_FROZEN)
26
6
  # define assert_frozen(f) assert(OBJ_FROZEN(f) && "unfrozen object")
@@ -36,10 +16,6 @@ static void rb_18_str_set_len(VALUE str, long len)
36
16
  # endif
37
17
  #endif /* ! defined(OFFT2NUM) */
38
18
 
39
- #ifndef HAVE_RB_STR_MODIFY
40
- # define rb_str_modify(x) do {} while (0)
41
- #endif /* ! defined(HAVE_RB_STR_MODIFY) */
42
-
43
19
  static inline int str_cstr_eq(VALUE val, const char *ptr, long len)
44
20
  {
45
21
  return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len));
@@ -5,10 +5,35 @@ dir_config("kcar")
5
5
 
6
6
  have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h")
7
7
  have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h")
8
- have_func("rb_str_set_len", "ruby.h")
9
- have_func("rb_str_modify", "ruby.h")
10
8
 
11
- # -fPIC is needed for Rubinius, MRI already uses it regardless
12
- with_cflags($CFLAGS + " -fPIC ") do
13
- create_makefile("kcar_ext")
9
+ message('checking if String#-@ (str_uminus) dedupes... ')
10
+ begin
11
+ a = -(%w(t e s t).join)
12
+ b = -(%w(t e s t).join)
13
+ if a.equal?(b)
14
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=1 "
15
+ message("yes\n")
16
+ else
17
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
18
+ message("no, needs Ruby 2.5+\n")
19
+ end
20
+ rescue NoMethodError
21
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
22
+ message("no, String#-@ not available\n")
14
23
  end
24
+
25
+ message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
26
+ h = {}
27
+ x = {}
28
+ r = rand.to_s
29
+ h[%W(#{r}).join('')] = :foo
30
+ x[%W(#{r}).join('')] = :foo
31
+ if x.keys[0].equal?(h.keys[0])
32
+ $CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
33
+ message("yes\n")
34
+ else
35
+ $CPPFLAGS += ' -DHASH_ASET_DEDUPE=0 '
36
+ message("no, needs Ruby 2.6+\n")
37
+ end
38
+
39
+ create_makefile("kcar_ext")
@@ -10,10 +10,20 @@
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
  #include <sys/types.h>
13
+ #include <limits.h>
13
14
  #include "c_util.h"
14
15
 
15
16
  static VALUE eParserError;
16
- static ID id_sq, id_sq_set;
17
+ static ID id_uminus, id_sq, id_sq_set;
18
+ static VALUE g_rack_url_scheme,
19
+ g_80, g_443, g_http, g_https,
20
+ g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
21
+ g_HTTP_VERSION,
22
+ g_CONTENT_LENGTH, g_CONTENT_TYPE, g_FRAGMENT,
23
+ g_PATH_INFO, g_QUERY_STRING,
24
+ g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
25
+ g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;
26
+ static VALUE e413, e414;
17
27
 
18
28
  /** Defines common length and error messages for input length validation. */
19
29
  #define DEF_MAX_LENGTH(N, length) \
@@ -30,58 +40,104 @@ static ID id_sq, id_sq_set;
30
40
  rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
31
41
  } while (0)
32
42
 
43
+ #define VALIDATE_MAX_URI_LENGTH(len, N) do { \
44
+ if (len > MAX_##N##_LENGTH) \
45
+ rb_raise(e414, MAX_##N##_LENGTH_ERR); \
46
+ } while (0)
47
+
33
48
  /* Defines the maximum allowed lengths for various input elements.*/
34
49
  DEF_MAX_LENGTH(FIELD_NAME, 256);
35
50
  DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
36
51
  DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
37
-
38
- #define UH_FL_CHUNKED 0x1
39
- #define UH_FL_HASBODY 0x2
40
- #define UH_FL_INBODY 0x4
41
- #define UH_FL_HASTRAILER 0x8
42
- #define UH_FL_INTRAILER 0x10
43
- #define UH_FL_INCHUNK 0x20
44
- #define UH_FL_KEEPALIVE 0x40
45
- #define UH_FL_HASHEADER 0x80
52
+ DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
53
+ DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
54
+ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
46
55
 
47
56
  struct http_parser {
48
57
  int cs; /* Ragel internal state */
49
- unsigned int flags;
50
- size_t mark;
51
- size_t offset;
58
+ unsigned int is_request:1;
59
+ unsigned int has_query:1;
60
+ unsigned int has_scheme:1;
61
+ unsigned int chunked:1;
62
+ unsigned int has_body:1;
63
+ unsigned int in_body:1;
64
+ unsigned int has_trailer:1;
65
+ unsigned int in_trailer:1;
66
+ unsigned int in_chunk:1;
67
+ unsigned int persistent:1;
68
+ unsigned int has_header:1;
69
+ unsigned int body_eof_seen:1;
70
+ unsigned int is_https:1;
71
+ unsigned int padding:19;
72
+ unsigned int mark;
73
+ unsigned int offset;
52
74
  union { /* these 2 fields don't nest */
53
- size_t field;
54
- size_t query;
75
+ unsigned int field;
76
+ unsigned int query;
55
77
  } start;
56
78
  union {
57
- size_t field_len; /* only used during header processing */
58
- size_t dest_offset; /* only used during body processing */
79
+ unsigned int field_len; /* only used during header processing */
80
+ unsigned int dest_offset; /* only used during body processing */
59
81
  } s;
60
82
  VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
61
- VALUE status; /* String or Qnil */
83
+ union {
84
+ /* String or Qnil */
85
+ VALUE status; /* status string for responses */
86
+ VALUE host; /* Host: header for requests */
87
+ } v;
62
88
  union {
63
89
  off_t content;
64
90
  off_t chunk;
65
91
  } len;
66
92
  };
67
93
 
94
+ static unsigned int ulong2uint(unsigned long n)
95
+ {
96
+ unsigned int i = (unsigned int)n;
97
+
98
+ if (sizeof(unsigned int) != sizeof(unsigned long)) {
99
+ if ((unsigned long)i != n) {
100
+ rb_raise(rb_eRangeError, "too large to be 32-bit uint: %lu", n);
101
+ }
102
+ }
103
+ return i;
104
+ }
105
+
68
106
  #define REMAINING (unsigned long)(pe - p)
69
- #define LEN(AT, FPC) (FPC - buffer - hp->AT)
70
- #define MARK(M,FPC) (hp->M = (FPC) - buffer)
107
+ #define LEN(AT, FPC) (ulong2uint(FPC - buffer) - hp->AT)
108
+ #define MARK(M,FPC) (hp->M = ulong2uint((FPC) - buffer))
71
109
  #define PTR_TO(F) (buffer + hp->F)
72
110
  #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
73
111
  #define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))
74
112
 
75
- #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
76
- #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
77
- #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
78
- #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
113
+ /* Downcases a single ASCII character. Locale-agnostic. */
114
+ static void downcase_char(char *c)
115
+ {
116
+ if (*c >= 'A' && *c <= 'Z')
117
+ *c |= 0x20;
118
+ }
79
119
 
80
120
  static int is_lws(char c)
81
121
  {
82
122
  return (c == ' ' || c == '\t');
83
123
  }
84
124
 
125
+ /* this will dedupe under Ruby 2.5+ (December 2017) */
126
+ static VALUE str_dd_freeze(VALUE str)
127
+ {
128
+ if (STR_UMINUS_DEDUPE)
129
+ return rb_funcall(str, id_uminus, 0);
130
+
131
+ /* freeze,since it speeds up MRI slightly */
132
+ OBJ_FREEZE(str);
133
+ return str;
134
+ }
135
+
136
+ static VALUE str_new_dd_freeze(const char *ptr, long len)
137
+ {
138
+ return str_dd_freeze(rb_str_new(ptr, len));
139
+ }
140
+
85
141
  static VALUE stripped_str_new(const char *str, long len)
86
142
  {
87
143
  long end;
@@ -91,10 +147,69 @@ static VALUE stripped_str_new(const char *str, long len)
91
147
  return rb_str_new(str, end + 1);
92
148
  }
93
149
 
94
- static void finalize_header(struct http_parser *hp)
150
+ static VALUE request_host_val(struct http_parser *hp)
151
+ {
152
+ assert(hp->is_request == 1 && "not a request");
153
+ return NIL_P(hp->v.host) ? Qfalse : hp->v.host;
154
+ }
155
+
156
+ static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
157
+ {
158
+ char *host_ptr = RSTRING_PTR(host);
159
+ long host_len = RSTRING_LEN(host);
160
+ char *colon;
161
+ VALUE server_name = host;
162
+ VALUE server_port = hp->has_scheme ? (hp->is_https ? g_443 : g_80) : Qfalse;
163
+
164
+ if (*host_ptr == '[') { /* ipv6 address format */
165
+ char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
166
+
167
+ if (rbracket)
168
+ colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
169
+ else
170
+ colon = memchr(host_ptr + 1, ':', host_len - 1);
171
+ } else {
172
+ colon = memchr(host_ptr, ':', host_len);
173
+ }
174
+
175
+ if (colon) {
176
+ long port_start = colon - host_ptr + 1;
177
+ long port_len = host_len - port_start;
178
+
179
+ server_name = rb_str_substr(host, 0, colon - host_ptr);
180
+ server_name = str_dd_freeze(server_name);
181
+ if (port_len > 0) {
182
+ server_port = rb_str_substr(host, port_start, port_len);
183
+ server_port = str_dd_freeze(server_port);
184
+ }
185
+ }
186
+ rb_hash_aset(env, g_SERVER_NAME, server_name);
187
+ if (server_port != Qfalse)
188
+ rb_hash_aset(env, g_SERVER_PORT, server_port);
189
+ }
190
+
191
+ static void finalize_header(struct http_parser *hp, VALUE hdr)
95
192
  {
96
- if ((HP_FL_TEST(hp, HASTRAILER) && ! HP_FL_TEST(hp, CHUNKED)))
193
+ if (hp->has_trailer && !hp->chunked)
97
194
  rb_raise(eParserError, "trailer but not chunked");
195
+ if (hp->is_request) {
196
+ if (hp->chunked) {
197
+ if (hp->len.chunk >= 0)
198
+ rb_raise(eParserError, "Content-Length set with chunked encoding");
199
+ else
200
+ hp->len.chunk = 0;
201
+ } else if (hp->len.content < 0) {
202
+ hp->len.content = 0;
203
+ }
204
+
205
+ if (!hp->has_query)
206
+ rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
207
+ if (hp->has_header) {
208
+ VALUE host = request_host_val(hp);
209
+ if (host != Qfalse)
210
+ set_server_vars(hp, hdr, host);
211
+ }
212
+ }
98
213
  }
99
214
 
100
215
  /*
@@ -107,28 +222,116 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
107
222
  /* REQUEST_METHOD is always set before any headers */
108
223
  if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
109
224
  /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
110
- HP_FL_SET(hp, KEEPALIVE);
225
+ hp->persistent = 1;
111
226
  } else if (STR_CSTR_CASE_EQ(val, "close")) {
112
227
  /*
113
228
  * it doesn't matter what HTTP version or request method we have,
114
229
  * if a server says "Connection: close", we disable keepalive
115
230
  */
116
- HP_FL_UNSET(hp, KEEPALIVE);
231
+ hp->persistent = 0;
117
232
  } else {
118
233
  /*
119
234
  * server could've sent anything, ignore it for now. Maybe
120
- * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
235
+ * "hp->persistent = 0;" just in case?
121
236
  * Raising an exception might be too mean...
122
237
  */
123
238
  }
124
239
  }
125
240
 
126
241
  static void
127
- http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
242
+ request_method(VALUE env, const char *ptr, size_t len)
243
+ {
244
+ rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
245
+ }
246
+
247
+ static void
248
+ url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
249
+ {
250
+ VALUE val;
251
+
252
+ hp->has_scheme = 1;
253
+ /* Ragel machine downcases and enforces this as "http" or "https" */
254
+ if (len == 5) {
255
+ hp->is_https = 1;
256
+ assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
257
+ val = g_https;
258
+ } else {
259
+ assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
260
+ val = g_http;
261
+ }
262
+ rb_hash_aset(env, g_rack_url_scheme, val);
263
+ }
264
+
265
+ static void
266
+ request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
267
+ {
268
+ VALUE val = rb_str_new(ptr, len);
269
+
270
+ rb_hash_aset(env, g_HTTP_HOST, val);
271
+ hp->v.host = val;
272
+ }
273
+
274
+ static void
275
+ set_fragment(VALUE env, const char *ptr, size_t len)
276
+ {
277
+ VALUE val = rb_str_new(ptr, len);
278
+ rb_hash_aset(env, g_FRAGMENT, val);
279
+ }
280
+
281
+ static void
282
+ request_uri(VALUE env, const char *ptr, size_t len)
283
+ {
284
+ VALUE val;
285
+
286
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
287
+ val = rb_str_new(ptr, len);
288
+ rb_hash_aset(env, g_REQUEST_URI, val);
289
+
290
+ /*
291
+ * rack says PATH_INFO must start with "/" or be empty,
292
+ * but "OPTIONS *" is a valid request
293
+ */
294
+ if (CONST_MEM_EQ("*", ptr, len)) {
295
+ val = rb_str_new(NULL, 0);
296
+ rb_hash_aset(env, g_PATH_INFO, val);
297
+ rb_hash_aset(env, g_REQUEST_PATH, val);
298
+ }
299
+ }
300
+
301
+ static void
302
+ query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
303
+ {
304
+ VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
305
+
306
+ hp->has_query = 1;
307
+ rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
308
+ }
309
+
310
+ static void
311
+ request_path(VALUE env, const char *ptr, size_t len)
312
+ {
313
+ VALUE val;
314
+
315
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
316
+ val = rb_str_new(ptr, len);
317
+
318
+ rb_hash_aset(env, g_REQUEST_PATH, val);
319
+ rb_hash_aset(env, g_PATH_INFO, val);
320
+ }
321
+
322
+ static void
323
+ http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
128
324
  {
129
325
  if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
130
326
  /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
131
- HP_FL_SET(hp, KEEPALIVE);
327
+ hp->persistent = 1;
328
+ }
329
+ if (hp->is_request) {
330
+ VALUE v = str_new_dd_freeze(ptr, len);
331
+ hp->has_header = 1;
332
+
333
+ rb_hash_aset(env, g_SERVER_PROTOCOL, v);
334
+ rb_hash_aset(env, g_HTTP_VERSION, v);
132
335
  }
133
336
  }
134
337
 
@@ -137,21 +340,21 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
137
340
  {
138
341
  long nr;
139
342
 
140
- hp->status = rb_str_new(ptr, len);
343
+ hp->v.status = str_new_dd_freeze(ptr, len);
141
344
 
142
345
  /* RSTRING_PTR is null terminated, ptr is not */
143
- nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
346
+ nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);
144
347
 
145
348
  if (nr < 100 || nr > 999)
146
- rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
349
+ rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));
147
350
 
148
351
  if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
149
- HP_FL_SET(hp, HASBODY);
352
+ hp->has_body = 1;
150
353
  }
151
354
 
152
355
  static inline void invalid_if_trailer(struct http_parser *hp)
153
356
  {
154
- if (HP_FL_TEST(hp, INTRAILER))
357
+ if (hp->in_trailer)
155
358
  rb_raise(eParserError, "invalid Trailer");
156
359
  }
157
360
 
@@ -185,15 +388,15 @@ static void write_cont_value(struct http_parser *hp,
185
388
 
186
389
  /* normalize tab to space */
187
390
  if (cont_len > 0) {
188
- assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
391
+ assert(is_lws(*vptr) && "invalid leading white space");
189
392
  *vptr = ' ';
190
393
  }
191
394
  for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--);
192
395
  rb_str_buf_cat(hp->cont, vptr, end + 1);
193
396
  }
194
397
 
195
- static void write_value(VALUE hdr, struct http_parser *hp,
196
- const char *buffer, const char *p)
398
+ static void write_response_value(struct http_parser *hp, VALUE hdr,
399
+ const char *buffer, const char *p)
197
400
  {
198
401
  VALUE f, v;
199
402
  VALUE hclass;
@@ -202,7 +405,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
202
405
  const char *vptr;
203
406
  size_t vlen;
204
407
 
205
- HP_FL_SET(hp, HASHEADER);
408
+ hp->has_header = 1;
206
409
 
207
410
  /* Rack does not like Status headers, so we never send them */
208
411
  if (CSTR_CASE_EQ(fptr, flen, "status")) {
@@ -214,7 +417,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
214
417
  vlen = LEN(mark, p);
215
418
  VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
216
419
  VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
217
- f = rb_str_new(fptr, (long)flen);
420
+ f = str_new_dd_freeze(fptr, (long)flen);
218
421
  v = stripped_str_new(vptr, (long)vlen);
219
422
 
220
423
  /* needs more tests for error-checking here */
@@ -226,9 +429,9 @@ static void write_value(VALUE hdr, struct http_parser *hp,
226
429
  if (STR_CSTR_CASE_EQ(f, "connection")) {
227
430
  hp_keepalive_connection(hp, v);
228
431
  } else if (STR_CSTR_CASE_EQ(f, "content-length")) {
229
- if (! HP_FL_TEST(hp, HASBODY))
432
+ if (!hp->has_body)
230
433
  rb_raise(eParserError, "Content-Length with no body expected");
231
- if (HP_FL_TEST(hp, CHUNKED))
434
+ if (hp->chunked)
232
435
  rb_raise(eParserError,
233
436
  "Content-Length when chunked Transfer-Encoding is set");
234
437
  hp->len.content = parse_length(vptr, vlen);
@@ -239,7 +442,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
239
442
  invalid_if_trailer(hp);
240
443
  } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
241
444
  if (STR_CSTR_CASE_EQ(v, "chunked")) {
242
- if (! HP_FL_TEST(hp, HASBODY))
445
+ if (!hp->has_body)
243
446
  rb_raise(eParserError,
244
447
  "chunked Transfer-Encoding with no body expected");
245
448
  if (hp->len.content >= 0)
@@ -247,13 +450,13 @@ static void write_value(VALUE hdr, struct http_parser *hp,
247
450
  "chunked Transfer-Encoding when Content-Length is set");
248
451
 
249
452
  hp->len.chunk = 0;
250
- HP_FL_SET(hp, CHUNKED);
453
+ hp->chunked = 1;
251
454
  }
252
455
  invalid_if_trailer(hp);
253
456
  } else if (STR_CSTR_CASE_EQ(f, "trailer")) {
254
- if (! HP_FL_TEST(hp, HASBODY))
457
+ if (!hp->has_body)
255
458
  rb_raise(eParserError, "trailer with no body");
256
- HP_FL_SET(hp, HASTRAILER);
459
+ hp->has_trailer = 1;
257
460
  invalid_if_trailer(hp);
258
461
  }
259
462
 
@@ -272,9 +475,6 @@ static void write_value(VALUE hdr, struct http_parser *hp,
272
475
  e = rb_funcall(hdr, id_sq, 1, f);
273
476
 
274
477
  if (NIL_P(e)) {
275
- /* new value, freeze it since it speeds up MRI slightly */
276
- OBJ_FREEZE(f);
277
-
278
478
  if (hclass == rb_cHash)
279
479
  rb_hash_aset(hdr, f, v);
280
480
  else
@@ -295,6 +495,112 @@ static void write_value(VALUE hdr, struct http_parser *hp,
295
495
  }
296
496
  }
297
497
 
498
+ static VALUE req_field(const char *ptr, size_t len)
499
+ {
500
+ size_t pfxlen = sizeof("HTTP_") - 1;
501
+ VALUE str = rb_str_new(NULL, pfxlen + len);
502
+ char *dst = RSTRING_PTR(str);
503
+
504
+ memcpy(dst, "HTTP_", pfxlen);
505
+ memcpy(dst + pfxlen, ptr, len);
506
+ assert(*(dst + RSTRING_LEN(str)) == '\0' &&
507
+ "string didn't end with \\0"); /* paranoia */
508
+
509
+ return str;
510
+ }
511
+
512
+ static void snake_upcase(char *ptr, size_t len)
513
+ {
514
+ char *c;
515
+
516
+ for (c = ptr; len--; c++) {
517
+ if (*c >= 'a' && *c <= 'z')
518
+ *c &= ~0x20;
519
+ else if (*c == '-')
520
+ *c = '_';
521
+ }
522
+ }
523
+
524
+ static void write_request_value(struct http_parser *hp, VALUE env,
525
+ char *buffer, const char *p)
526
+ {
527
+ char *fptr = PTR_TO(start.field);
528
+ size_t flen = hp->s.field_len;
529
+ char *vptr = PTR_TO(mark);
530
+ size_t vlen = LEN(mark, p);
531
+ VALUE key, val;
532
+ VALUE existing;
533
+
534
+ VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
535
+ VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
536
+ snake_upcase(fptr, flen);
537
+
538
+ /*
539
+ * ignore "Version" headers since they conflict with the HTTP_VERSION
540
+ * rack env variable.
541
+ */
542
+ if (CONST_MEM_EQ("VERSION", fptr, flen)) {
543
+ hp->cont = Qnil;
544
+ return;
545
+ }
546
+ val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);
547
+
548
+ if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
549
+ key = g_HTTP_CONNECTION;
550
+ hp_keepalive_connection(hp, val);
551
+ } else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
552
+ key = g_CONTENT_LENGTH;
553
+ hp->len.content = parse_length(vptr, vlen);
554
+ if (hp->len.content < 0)
555
+ rb_raise(eParserError, "invalid Content-Length");
556
+ if (hp->len.content != 0)
557
+ hp->has_body = 1;
558
+ invalid_if_trailer(hp);
559
+ } else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
560
+ key = g_CONTENT_TYPE;
561
+ } else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
562
+ key = g_HTTP_TRANSFER_ENCODING;
563
+ if (STR_CSTR_CASE_EQ(val, "chunked")) {
564
+ hp->chunked = 1;
565
+ hp->has_body = 1;
566
+ }
567
+ invalid_if_trailer(hp);
568
+ } else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
569
+ key = g_HTTP_TRAILER;
570
+ hp->has_trailer = 1;
571
+ invalid_if_trailer(hp);
572
+ } else if (CONST_MEM_EQ("HOST", fptr, flen)) {
573
+ key = g_HTTP_HOST;
574
+ if (NIL_P(hp->v.host))
575
+ hp->v.host = val;
576
+ } else {
577
+ key = req_field(fptr, flen);
578
+ if (!HASH_ASET_DEDUPE)
579
+ key = str_dd_freeze(key);
580
+ }
581
+ existing = rb_hash_aref(env, key);
582
+ if (NIL_P(existing)) {
583
+ hp->cont = rb_hash_aset(env, key, val);
584
+ /*
585
+ * Ignore repeated Host headers and favor host set by absolute URIs.
586
+ * absoluteURI Request-URI takes precedence over
587
+ * the Host: header (ref: rfc 2616, section 5.2.1)
588
+ */
589
+ } else if (key == g_HTTP_HOST) {
590
+ hp->cont = Qnil;
591
+ } else {
592
+ rb_str_buf_cat(existing, ",", 1);
593
+ hp->cont = rb_str_buf_append(existing, val);
594
+ }
595
+ }
596
+
597
+ static void write_value(struct http_parser *hp, VALUE hdr,
598
+ char *buf, const char *p)
599
+ {
600
+ hp->is_request ? write_request_value(hp, hdr, buf, p) :
601
+ write_response_value(hp, hdr, buf, p);
602
+ }
603
+
298
604
  /** Machine **/
299
605
 
300
606
  %%{
@@ -302,10 +608,22 @@ static void write_value(VALUE hdr, struct http_parser *hp,
302
608
 
303
609
  action mark {MARK(mark, fpc); }
304
610
 
611
+ action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
612
+ action downcase_char { downcase_char(deconst(fpc)); }
613
+ action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
614
+ action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
615
+ action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
616
+ action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
617
+ action fragment { set_fragment(hdr, PTR_TO(mark), LEN(mark, fpc)); }
618
+ action start_query { MARK(start.query, fpc); }
619
+ action query_string {
620
+ query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
621
+ }
622
+ action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
305
623
  action start_field { MARK(start.field, fpc); }
306
624
  action write_field { hp->s.field_len = LEN(start.field, fpc); }
307
625
  action start_value { MARK(mark, fpc); }
308
- action write_value { write_value(hdr, hp, buffer, fpc); }
626
+ action write_value { write_value(hp, hdr, buffer, fpc); }
309
627
  action write_cont_value { write_cont_value(hp, buffer, fpc); }
310
628
  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
311
629
  action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
@@ -316,10 +634,10 @@ static void write_value(VALUE hdr, struct http_parser *hp,
316
634
  rb_raise(eParserError, "invalid chunk size");
317
635
  }
318
636
  action header_done {
319
- finalize_header(hp);
637
+ finalize_header(hp, hdr);
320
638
  cs = http_parser_first_final;
321
639
 
322
- if (HP_FL_TEST(hp, CHUNKED))
640
+ if (hp->chunked)
323
641
  cs = http_parser_en_ChunkedBody;
324
642
 
325
643
  /*
@@ -335,7 +653,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
335
653
  }
336
654
 
337
655
  action end_chunked_body {
338
- HP_FL_SET(hp, INTRAILER);
656
+ hp->in_trailer = 1;
339
657
  cs = http_parser_en_Trailers;
340
658
  ++p;
341
659
  assert(p <= pe && "buffer overflow after chunked body");
@@ -351,7 +669,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
351
669
  p += nr;
352
670
  assert(hp->len.chunk >= 0 && "negative chunk length");
353
671
  if ((size_t)hp->len.chunk > REMAINING) {
354
- HP_FL_SET(hp, INCHUNK);
672
+ hp->in_chunk = 1;
355
673
  goto post_exec;
356
674
  } else {
357
675
  fhold;
@@ -370,7 +688,7 @@ static void http_parser_init(struct http_parser *hp)
370
688
  int cs = 0;
371
689
  memset(hp, 0, sizeof(struct http_parser));
372
690
  hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
373
- hp->status = Qnil;
691
+ hp->v.status = Qnil;
374
692
  hp->len.content = -1;
375
693
  %% write init;
376
694
  hp->cs = cs;
@@ -395,43 +713,55 @@ static void http_parser_execute(struct http_parser *hp,
395
713
  assert((void *)(pe - p) == (void *)(len - off) &&
396
714
  "pointers aren't same distance");
397
715
 
398
- if (HP_FL_TEST(hp, INCHUNK)) {
399
- HP_FL_UNSET(hp, INCHUNK);
716
+ if (hp->in_chunk) {
717
+ hp->in_chunk = 0;
400
718
  goto skip_chunk_data_hack;
401
719
  }
402
720
  %% write exec;
403
721
  post_exec: /* "_out:" also goes here */
404
722
  if (hp->cs != http_parser_error)
405
723
  hp->cs = cs;
406
- hp->offset = p - buffer;
724
+ hp->offset = ulong2uint(p - buffer);
407
725
 
408
726
  assert(p <= pe && "buffer overflow after parsing execute");
409
727
  assert(hp->offset <= len && "offset longer than length");
410
728
  }
411
729
 
412
- static struct http_parser *data_get(VALUE self)
730
+ static void kcar_mark(void *ptr)
413
731
  {
414
- struct http_parser *hp;
732
+ struct http_parser *hp = ptr;
415
733
 
416
- Data_Get_Struct(self, struct http_parser, hp);
417
- assert(hp && "failed to extract http_parser struct");
418
- return hp;
734
+ rb_gc_mark(hp->cont);
735
+ rb_gc_mark(hp->v.status);
419
736
  }
420
737
 
421
- static void mark(void *ptr)
738
+ static size_t kcar_memsize(const void *ptr)
422
739
  {
423
- struct http_parser *hp = ptr;
740
+ return sizeof(struct http_parser);
741
+ }
424
742
 
425
- rb_gc_mark(hp->cont);
426
- rb_gc_mark(hp->status);
743
+ static const rb_data_type_t kcar_type = {
744
+ "kcar_parser",
745
+ { kcar_mark, RUBY_TYPED_DEFAULT_FREE, kcar_memsize, /* reserved */ },
746
+ /* parent, data, [ flags ] */
747
+ };
748
+
749
+ static VALUE kcar_alloc(VALUE klass)
750
+ {
751
+ struct http_parser *hp;
752
+ return TypedData_Make_Struct(klass, struct http_parser, &kcar_type, hp);
427
753
  }
428
754
 
429
- static VALUE alloc(VALUE klass)
755
+ static struct http_parser *data_get(VALUE self)
430
756
  {
431
757
  struct http_parser *hp;
432
- return Data_Make_Struct(klass, struct http_parser, mark, -1, hp);
758
+
759
+ TypedData_Get_Struct(self, struct http_parser, &kcar_type, hp);
760
+ assert(hp && "failed to extract http_parser struct");
761
+ return hp;
433
762
  }
434
763
 
764
+
435
765
  /**
436
766
  * call-seq:
437
767
  * Kcar::Parser.new => parser
@@ -485,7 +815,7 @@ static VALUE body_bytes_left(VALUE self)
485
815
  {
486
816
  struct http_parser *hp = data_get(self);
487
817
 
488
- if (HP_FL_TEST(hp, CHUNKED))
818
+ if (hp->chunked)
489
819
  return Qnil;
490
820
  if (hp->len.content >= 0)
491
821
  return OFFT2NUM(hp->len.content);
@@ -505,9 +835,11 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
505
835
  {
506
836
  struct http_parser *hp = data_get(self);
507
837
 
508
- if (HP_FL_TEST(hp, CHUNKED))
838
+ if (hp->chunked)
509
839
  rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
510
840
  hp->len.content = NUM2OFFT(bytes);
841
+ if (hp->len.content == 0)
842
+ hp->body_eof_seen = 1;
511
843
  return bytes;
512
844
  }
513
845
 
@@ -522,7 +854,30 @@ static VALUE chunked(VALUE self)
522
854
  {
523
855
  struct http_parser *hp = data_get(self);
524
856
 
525
- return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse;
857
+ return hp->chunked ? Qtrue : Qfalse;
858
+ }
859
+
860
+ static void check_buffer_size(long dlen)
861
+ {
862
+ if ((uint64_t)dlen > UINT_MAX)
863
+ rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);
864
+ }
865
+
866
+ static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf)
867
+ {
868
+ char *ptr;
869
+ long len;
870
+
871
+ Check_Type(buf, T_STRING);
872
+ rb_str_modify(buf);
873
+ ptr = RSTRING_PTR(buf);
874
+ len = RSTRING_LEN(buf);
875
+ check_buffer_size(len);
876
+
877
+ http_parser_execute(hp, hdr, ptr, len);
878
+
879
+ if (hp->cs == http_parser_error)
880
+ rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
526
881
  }
527
882
 
528
883
  /**
@@ -541,28 +896,49 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
541
896
  {
542
897
  struct http_parser *hp = data_get(self);
543
898
 
544
- http_parser_execute(hp, hdr, RSTRING_PTR(data), RSTRING_LEN(data));
899
+ if (hp->is_request)
900
+ rb_raise(rb_eRuntimeError, "parser is handling a request, not response");
901
+
902
+ parser_execute(hp, hdr, data);
545
903
  VALIDATE_MAX_LENGTH(hp->offset, HEADER);
546
904
 
547
905
  if (hp->cs == http_parser_first_final ||
548
906
  hp->cs == http_parser_en_ChunkedBody) {
549
907
  advance_str(data, hp->offset + 1);
550
908
  hp->offset = 0;
551
- if (HP_FL_TEST(hp, INTRAILER))
909
+ if (hp->in_trailer)
552
910
  return hdr;
553
911
  else
554
- return rb_ary_new3(2, hp->status, hdr);
912
+ return rb_ary_new3(2, hp->v.status, hdr);
555
913
  }
556
914
 
557
- if (hp->cs == http_parser_error)
558
- rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
559
-
560
915
  return Qnil;
561
916
  }
562
917
 
918
+ static VALUE request(VALUE self, VALUE env, VALUE buf)
919
+ {
920
+ struct http_parser *hp = data_get(self);
921
+
922
+ hp->is_request = 1;
923
+ Check_Type(buf, T_STRING);
924
+ parser_execute(hp, env, buf);
925
+
926
+ if (hp->cs == http_parser_first_final ||
927
+ hp->cs == http_parser_en_ChunkedBody) {
928
+ advance_str(buf, hp->offset + 1);
929
+ hp->offset = 0;
930
+ if (hp->in_trailer)
931
+ hp->body_eof_seen = 1;
932
+
933
+ return env;
934
+ }
935
+ return Qnil; /* incomplete */
936
+ }
937
+
938
+
563
939
  static int chunked_eof(struct http_parser *hp)
564
940
  {
565
- return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
941
+ return ((hp->cs == http_parser_first_final) || hp->in_trailer);
566
942
  }
567
943
 
568
944
  /**
@@ -576,13 +952,13 @@ static VALUE body_eof(VALUE self)
576
952
  {
577
953
  struct http_parser *hp = data_get(self);
578
954
 
579
- if (!HP_FL_TEST(hp, HASHEADER) && HP_FL_ALL(hp, KEEPALIVE))
955
+ if (!hp->has_header && hp->persistent)
580
956
  return Qtrue;
581
957
 
582
- if (HP_FL_TEST(hp, CHUNKED))
958
+ if (hp->chunked)
583
959
  return chunked_eof(hp) ? Qtrue : Qfalse;
584
960
 
585
- if (! HP_FL_TEST(hp, HASBODY))
961
+ if (!hp->has_body)
586
962
  return Qtrue;
587
963
 
588
964
  return hp->len.content == 0 ? Qtrue : Qfalse;
@@ -604,10 +980,14 @@ static VALUE keepalive(VALUE self)
604
980
  {
605
981
  struct http_parser *hp = data_get(self);
606
982
 
607
- if (HP_FL_ALL(hp, KEEPALIVE)) {
608
- if (HP_FL_TEST(hp, HASHEADER) && HP_FL_TEST(hp, HASBODY) ) {
609
- if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0))
610
- return Qtrue;
983
+ if (hp->persistent) {
984
+ if (hp->has_header && hp->has_body) {
985
+ if (hp->chunked || (hp->len.content >= 0)) {
986
+ if (!hp->is_request)
987
+ return Qtrue;
988
+ else
989
+ return hp->body_eof_seen ? Qtrue : Qfalse;
990
+ }
611
991
 
612
992
  /* unknown Content-Length and not chunked, we must assume close */
613
993
  return Qfalse;
@@ -621,54 +1001,77 @@ static VALUE keepalive(VALUE self)
621
1001
 
622
1002
  /**
623
1003
  * call-seq:
624
- * parser.filter_body(buf, data) => nil/data
1004
+ * parser.filter_body(dst, src) => nil/dst
625
1005
  *
626
- * Takes a String of +data+, will modify data if dechunking is done.
627
- * Returns +nil+ if there is more data left to process. Returns
628
- * +data+ if body processing is complete. When returning +data+,
629
- * it may modify +data+ so the start of the string points to where
1006
+ * Takes a String of +src+, will modify src if dechunking is done.
1007
+ * Returns +nil+ if there is more +src+ left to process. Returns
1008
+ * +dst+ if body processing is complete. When returning +dst+,
1009
+ * it may modify +src+ so the start of the string points to where
630
1010
  * the body ended so that trailer processing can begin.
631
1011
  *
632
1012
  * Raises ParserError if there are dechunking errors.
633
- * Basically this is a glorified memcpy(3) that copies +data+
634
- * into +buf+ while filtering it through the dechunker.
1013
+ * Basically this is a glorified memcpy(3) that copies +src+
1014
+ * into +dst+ while filtering it through the dechunker.
635
1015
  */
636
- static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
1016
+ static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
637
1017
  {
638
1018
  struct http_parser *hp = data_get(self);
639
- char *dptr;
640
- long dlen;
1019
+ char *sptr;
1020
+ long slen;
641
1021
 
642
- dptr = RSTRING_PTR(data);
643
- dlen = RSTRING_LEN(data);
1022
+ sptr = RSTRING_PTR(src);
1023
+ slen = RSTRING_LEN(src);
1024
+ check_buffer_size(slen);
644
1025
 
645
- StringValue(buf);
646
- rb_str_modify(buf);
647
- rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
648
- OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
1026
+ StringValue(dst);
1027
+ rb_str_modify(dst);
1028
+ OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
649
1029
 
650
- if (!HP_FL_TEST(hp, CHUNKED))
1030
+ /*
1031
+ * for now, only support filter_body for identity requests,
1032
+ * not responses; it's rather inefficient to blindly memcpy
1033
+ * giant request bodies; on the other hand, it simplifies
1034
+ * server-side code.
1035
+ */
1036
+ if (hp->is_request && !hp->chunked) {
1037
+ /* no need to enter the Ragel machine for unchunked transfers */
1038
+ assert(hp->len.content >= 0 && "negative Content-Length");
1039
+ if (hp->len.content > 0) {
1040
+ long nr = MIN(slen, hp->len.content);
1041
+
1042
+ rb_str_resize(dst, nr);
1043
+ memcpy(RSTRING_PTR(dst), sptr, nr);
1044
+ hp->len.content -= nr;
1045
+ if (hp->len.content == 0)
1046
+ hp->body_eof_seen = 1;
1047
+ advance_str(src, nr);
1048
+ }
1049
+ return dst;
1050
+ }
1051
+
1052
+ if (!hp->chunked)
651
1053
  rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
652
1054
 
1055
+ rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
653
1056
  if (!chunked_eof(hp)) {
654
1057
  hp->s.dest_offset = 0;
655
- http_parser_execute(hp, buf, dptr, dlen);
1058
+ http_parser_execute(hp, dst, sptr, slen);
656
1059
  if (hp->cs == http_parser_error)
657
1060
  rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
658
1061
 
659
1062
  assert(hp->s.dest_offset <= hp->offset &&
660
1063
  "destination buffer overflow");
661
- advance_str(data, hp->offset);
662
- rb_str_set_len(buf, hp->s.dest_offset);
1064
+ advance_str(src, hp->offset);
1065
+ rb_str_set_len(dst, hp->s.dest_offset);
663
1066
 
664
- if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
1067
+ if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
665
1068
  assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
666
1069
  } else {
667
- data = Qnil;
1070
+ dst = Qnil;
668
1071
  }
669
1072
  }
670
1073
  hp->offset = 0; /* for trailer parsing */
671
- return data;
1074
+ return dst;
672
1075
  }
673
1076
 
674
1077
  void Init_kcar_ext(void)
@@ -676,11 +1079,21 @@ void Init_kcar_ext(void)
676
1079
  VALUE mKcar = rb_define_module("Kcar");
677
1080
  VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
678
1081
 
1082
+ /*
1083
+ * Document-class: Kcar::ParserError
1084
+ *
1085
+ * This is raised if there are parsing errors.
1086
+ */
679
1087
  eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
1088
+ e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
1089
+ eParserError);
1090
+ e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
1091
+ eParserError);
680
1092
 
681
- rb_define_alloc_func(cParser, alloc);
1093
+ rb_define_alloc_func(cParser, kcar_alloc);
682
1094
  rb_define_method(cParser, "initialize", initialize, 0);
683
1095
  rb_define_method(cParser, "reset", initialize, 0);
1096
+ rb_define_method(cParser, "request", request, 2);
684
1097
  rb_define_method(cParser, "headers", headers, 2);
685
1098
  rb_define_method(cParser, "trailers", headers, 2);
686
1099
  rb_define_method(cParser, "filter_body", filter_body, 2);
@@ -706,4 +1119,34 @@ void Init_kcar_ext(void)
706
1119
  rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
707
1120
  id_sq = rb_intern("[]");
708
1121
  id_sq_set = rb_intern("[]=");
1122
+ id_uminus = rb_intern("-@");
1123
+
1124
+ /* TODO: gperf to make a perfect hash of common strings */
1125
+ #define C(var, cstr) do { \
1126
+ var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
1127
+ rb_gc_register_mark_object((var)); \
1128
+ } while (0);
1129
+
1130
+ C(g_CONTENT_LENGTH, "CONTENT_LENGTH");
1131
+ C(g_CONTENT_TYPE, "CONTENT_TYPE");
1132
+ C(g_FRAGMENT, "FRAGMENT");
1133
+ C(g_HTTP_HOST, "HTTP_HOST");
1134
+ C(g_HTTP_CONNECTION, "HTTP_CONNECTION");
1135
+ C(g_HTTP_TRAILER, "HTTP_TRAILER");
1136
+ C(g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
1137
+ C(g_HTTP_VERSION, "HTTP_VERSION");
1138
+ C(g_PATH_INFO, "PATH_INFO");
1139
+ C(g_QUERY_STRING, "QUERY_STRING");
1140
+ C(g_REQUEST_METHOD, "REQUEST_METHOD");
1141
+ C(g_REQUEST_PATH, "REQUEST_PATH");
1142
+ C(g_REQUEST_URI, "REQUEST_URI");
1143
+ C(g_SERVER_NAME, "SERVER_NAME");
1144
+ C(g_SERVER_PORT, "SERVER_PORT");
1145
+ C(g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
1146
+ C(g_rack_url_scheme, "rack.url_scheme");
1147
+ C(g_http, "http");
1148
+ C(g_https, "https");
1149
+ C(g_80, "80");
1150
+ C(g_443, "443");
1151
+ #undef C
709
1152
  }