kcar 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11fd8bf65267d66726c60a780beb44140d7ce64e
4
- data.tar.gz: a6508831c72ee890f06a1a6b70858b15a3817ddb
2
+ SHA256:
3
+ metadata.gz: f7cd96d8a5fa081a4e6ac36feb24d3a737f428550da5fa589ab564c516d01820
4
+ data.tar.gz: 7383af40cdd8bc3ec9954feb4199d0f89ad04fb468633e22e515f6dd70ffc075
5
5
  SHA512:
6
- metadata.gz: 7c28c23285c4c881749facc72bb8388b7e66d8231201b1b5ac87c83ddb2482d8b9324ea47d450eae0e63cdccb2b5bb2f42274ca9835276a92b0cabe7b6ab90c8
7
- data.tar.gz: 2b5ee78603d2e7e36df30e60b06bf3f701b1aa240655b8df84f71e91b0254333b587c2be955816505673b1a92eb234f0151f39ff7e6749c80e540ad9cb645821
6
+ metadata.gz: cccb634b38dc4d944f3a216c2f2acafbc563e6efa782643d6ff845f8b09e5338fe285c1d862cab8363ab1598298b6ed1f05d0509c2fa1153adb5f946be45fddb
7
+ data.tar.gz: 92f1e61ef2049429835403ddc964179fb611ace06ce33a29f20fd3659db629ae05f532bf473efd44372192e476acea23cc09347bc271d2e0bdff42a70d99add2
data/.document CHANGED
@@ -1,5 +1,6 @@
1
1
  LICENSE
2
2
  README
3
3
  NEWS
4
+ HACKING
4
5
  lib
5
6
  ext/kcar/kcar.c
@@ -1,7 +1,14 @@
1
1
  ---
2
- cgit_url: http://bogomips.org/kcar.git
3
- git_url: git://bogomips.org/kcar.git
4
- rdoc_url: http://bogomips.org/kcar/
5
- ml_url: http://bogomips.org/kcar-public/
6
- private_email: kcar@bogomips.org
7
- public_email: kcar-public@bogomips.org
2
+ cgit_url: https://yhbt.net/kcar.git
3
+ git_url: https://yhbt.net/kcar.git
4
+ rdoc_url: https://yhbt.net/kcar/
5
+ ml_url:
6
+ - https://yhbt.net/kcar-public/
7
+ - http://ou63pmih66umazou.onion/kcar-public/
8
+ public_email: kcar-public@yhbt.net
9
+ source_code:
10
+ - git clone https://yhbt.net/kcar.git
11
+ - torsocks git clone http://ou63pmih66umazou.onion/kcar.git
12
+ nntp_url:
13
+ - nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
14
+ - nntp://ou63pmih66umazou.onion/inbox.comp.lang.ruby.kcar
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  CONSTANT = "Kcar::VERSION"
3
3
  RVF = "lib/kcar/version.rb"
4
- DEF_VER = "v0.6.0"
4
+ DEF_VER = "v0.7.0"
5
5
  GVF = "GIT-VERSION-FILE"
6
6
  vn = DEF_VER
7
7
 
@@ -21,7 +21,7 @@ if File.exist?(".git")
21
21
  end
22
22
 
23
23
  vn = vn.sub!(/\Av/, "")
24
- new_ruby_version = "#{CONSTANT} = '#{vn}'\n"
24
+ new_ruby_version = "#{CONSTANT} = '#{vn}' # :nodoc:\n"
25
25
  cur_ruby_version = File.read(RVF) rescue nil
26
26
  if new_ruby_version != cur_ruby_version
27
27
  File.open(RVF, "w") { |fp| fp.write(new_ruby_version) }
@@ -1,5 +1,5 @@
1
1
  all::
2
- RSYNC_DEST := bogomips.org:/srv/bogomips/kcar
2
+ RSYNC_DEST := yhbt.net:/srv/bogomips/kcar
3
3
  RAGEL = ragel
4
4
  RLFLAGS = -G2
5
5
  rfpackage := kcar
data/HACKING ADDED
@@ -0,0 +1,36 @@
1
+ == development dependencies
2
+
3
+ * GNU make - https://www.gnu.org/software/make/
4
+ * git - https://www.git-scm.com/
5
+ * ruby - https://www.ruby-lang.org/en/
6
+
7
+ git clone https://yhbt.net/kcar.git
8
+
9
+ == tests
10
+
11
+ * make test - run each test in a separate process (parallelize using -j)
12
+
13
+ For non-GNU users, GNU make may be installed as "gmake".
14
+
15
+ == test environment
16
+
17
+ RUBY - specify an alternative ruby(1) runtime
18
+ V - set to 1 for verbose test output (may be mangled if multithreaded)
19
+
20
+ == installing from git
21
+
22
+ * make install-gem
23
+
24
+ == contact
25
+
26
+ We use git(7) and develop kcar on a public mailing list like git.git
27
+ developers do. Please send patches via git-send-email(1) to the public
28
+ mailing list at <mailto:kcar-public@yhbt.net>. Pull requests should be
29
+ formatted using git-request-pull(1).
30
+
31
+ All mail is archived publically at: https://yhbt.net/kcar-public/
32
+ and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
33
+
34
+ Anonymous contributions will always be welcome.
35
+ No subscription is necessary to post to the mailing list.
36
+ Please remember to Cc: all recipients as subscription is optional.
data/LICENSE CHANGED
@@ -1,7 +1,7 @@
1
1
  kcar is copyrighted free software by all contributors, see logs in
2
2
  revision control for names and email addresses of all of them. You can
3
3
  redistribute it and/or modify it under either the terms of the
4
- {GPLv2}[http://www.gnu.org/licenses/gpl-2.0.txt] or later or
4
+ {GPLv2}[https://www.gnu.org/licenses/gpl-2.0.txt] or later or
5
5
  the conditions below:
6
6
 
7
7
  1. You may make and give away verbatim copies of the source form of the
data/README CHANGED
@@ -7,7 +7,7 @@ regular files, FIFOs, StringIOs as well as traditional TCP sockets.
7
7
 
8
8
  == Features
9
9
 
10
- * RFC2616-compliant Ragel+C parser adapted from Unicorn and Mongrel
10
+ * RFC2616-compliant Ragel+C parser adapted from Mongrel
11
11
 
12
12
  * decodes chunked response bodies with an optional pass-through mode
13
13
  (to avoid rechunking with Rack::Chunked)
@@ -35,7 +35,7 @@ If you use RubyGems:
35
35
 
36
36
  Otherwise grab the latest tarball from:
37
37
 
38
- http://bogomips.org/kcar/files/
38
+ https://yhbt.net/kcar/files/
39
39
 
40
40
  Unpack it, and run "ruby setup.rb"
41
41
 
@@ -62,14 +62,14 @@ through the body with body.each.
62
62
 
63
63
  You can get the latest source via git from the following locations:
64
64
 
65
- git://bogomips.org/kcar.git
66
- git://repo.or.cz/kcar.git (mirror)
65
+ https://yhbt.net/kcar.git
66
+ https://repo.or.cz/kcar.git (mirror)
67
67
 
68
68
  You may browse the code from the web and download the latest snapshot
69
69
  tarballs here:
70
70
 
71
- * http://bogomips.org/cgit/kcar.git (cgit)
72
- * http://repo.or.cz/w/kcar.git (gitweb)
71
+ * https://yhbt.net/kcar.git
72
+ * https://repo.or.cz/w/kcar.git (gitweb)
73
73
 
74
74
  Inline patches (from "git format-patch") to the mailing list are
75
75
  preferred because they allow code review and comments in the reply to
@@ -83,8 +83,17 @@ don't email the git mailing list or maintainer with kcar patches.
83
83
  == Contact
84
84
 
85
85
  All feedback (bug reports, user/development discussion, patches, pull
86
- requests) go to the mailing list: mailto:kcar-public@bogomips.org
86
+ requests) go to the public mailing list: mailto:kcar-public@yhbt.net
87
+ All mail is archived publically at: https://yhbt.net/kcar-public/
88
+ and nntp://news.public-inbox.org/inbox.comp.lang.ruby.kcar
87
89
 
88
- Mailing list archives are available here:
90
+ Anonymous posts will always be welcome.
89
91
 
90
- http://bogomips.org/kcar-public/
92
+ No subscription is necessary to post to the mailing list;
93
+ but you may subscribe by sending a plain-text mail to:
94
+
95
+ mailto:kcar-public+subscribe@yhbt.net
96
+
97
+ Keep in mind we suck at delivering email, so using NNTP or
98
+ Atom feeds might be a better bet.
99
+ Please remember to Cc: all recipients as subscription is optional.
@@ -1,4 +1,7 @@
1
+ # This config was used to create the import used to migrate
2
+ # mailing list archives from gmane to https://yhbt.net/kcar-public/
3
+
1
4
  # group_name max expire headers_only
2
5
  gmane.comp.lang.ruby.kcar.general 1000000000 1000000000 0
3
6
 
4
- # usage: slrnpull -d $PWD -h news.gmane.org --no-post
7
+ # usage: slrnpull -d $PWD -h news.gmane.io --no-post
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * Generic C functions and macros go here, there are no dependencies
3
- * on Unicorn internal structures or the Ruby C API in here.
3
+ * on kcar internal structures or the Ruby C API in here.
4
4
  */
5
5
 
6
6
  #ifndef UH_util_h
@@ -49,7 +49,7 @@ static int hexchar2int(int xdigit)
49
49
  if (xdigit >= 'a' && xdigit <= 'f')
50
50
  return xdigit - 'a' + 10;
51
51
 
52
- /* Ragel already does runtime range checking for us in Unicorn: */
52
+ /* Ragel already does runtime range checking for us */
53
53
  assert(xdigit >= '0' && xdigit <= '9' && "invalid digit character");
54
54
 
55
55
  return xdigit - '0';
@@ -1,26 +1,6 @@
1
1
  #ifndef ext_help_h
2
2
  #define ext_help_h
3
3
 
4
- #ifndef RSTRING_PTR
5
- #define RSTRING_PTR(s) (RSTRING(s)->ptr)
6
- #endif /* !defined(RSTRING_PTR) */
7
- #ifndef RSTRING_LEN
8
- #define RSTRING_LEN(s) (RSTRING(s)->len)
9
- #endif /* !defined(RSTRING_LEN) */
10
-
11
- #ifndef HAVE_RB_STR_SET_LEN
12
- # ifdef RUBINIUS
13
- # error we should never get here with current Rubinius (1.x)
14
- # endif
15
- /* this is taken from Ruby 1.8.7, 1.8.6 may not have it */
16
- static void rb_18_str_set_len(VALUE str, long len)
17
- {
18
- RSTRING(str)->len = len;
19
- RSTRING(str)->ptr[len] = '\0';
20
- }
21
- # define rb_str_set_len(str,len) rb_18_str_set_len(str,len)
22
- #endif /* !defined(HAVE_RB_STR_SET_LEN) */
23
-
24
4
  /* not all Ruby implementations support frozen objects (Rubinius does not) */
25
5
  #if defined(OBJ_FROZEN)
26
6
  # define assert_frozen(f) assert(OBJ_FROZEN(f) && "unfrozen object")
@@ -36,10 +16,6 @@ static void rb_18_str_set_len(VALUE str, long len)
36
16
  # endif
37
17
  #endif /* ! defined(OFFT2NUM) */
38
18
 
39
- #ifndef HAVE_RB_STR_MODIFY
40
- # define rb_str_modify(x) do {} while (0)
41
- #endif /* ! defined(HAVE_RB_STR_MODIFY) */
42
-
43
19
  static inline int str_cstr_eq(VALUE val, const char *ptr, long len)
44
20
  {
45
21
  return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len));
@@ -5,10 +5,35 @@ dir_config("kcar")
5
5
 
6
6
  have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h")
7
7
  have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h")
8
- have_func("rb_str_set_len", "ruby.h")
9
- have_func("rb_str_modify", "ruby.h")
10
8
 
11
- # -fPIC is needed for Rubinius, MRI already uses it regardless
12
- with_cflags($CFLAGS + " -fPIC ") do
13
- create_makefile("kcar_ext")
9
+ message('checking if String#-@ (str_uminus) dedupes... ')
10
+ begin
11
+ a = -(%w(t e s t).join)
12
+ b = -(%w(t e s t).join)
13
+ if a.equal?(b)
14
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=1 "
15
+ message("yes\n")
16
+ else
17
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
18
+ message("no, needs Ruby 2.5+\n")
19
+ end
20
+ rescue NoMethodError
21
+ $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
22
+ message("no, String#-@ not available\n")
14
23
  end
24
+
25
+ message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
26
+ h = {}
27
+ x = {}
28
+ r = rand.to_s
29
+ h[%W(#{r}).join('')] = :foo
30
+ x[%W(#{r}).join('')] = :foo
31
+ if x.keys[0].equal?(h.keys[0])
32
+ $CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
33
+ message("yes\n")
34
+ else
35
+ $CPPFLAGS += ' -DHASH_ASET_DEDUPE=0 '
36
+ message("no, needs Ruby 2.6+\n")
37
+ end
38
+
39
+ create_makefile("kcar_ext")
@@ -10,10 +10,20 @@
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
  #include <sys/types.h>
13
+ #include <limits.h>
13
14
  #include "c_util.h"
14
15
 
15
16
  static VALUE eParserError;
16
- static ID id_sq, id_sq_set;
17
+ static ID id_uminus, id_sq, id_sq_set;
18
+ static VALUE g_rack_url_scheme,
19
+ g_80, g_443, g_http, g_https,
20
+ g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
21
+ g_HTTP_VERSION,
22
+ g_CONTENT_LENGTH, g_CONTENT_TYPE, g_FRAGMENT,
23
+ g_PATH_INFO, g_QUERY_STRING,
24
+ g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
25
+ g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;
26
+ static VALUE e413, e414;
17
27
 
18
28
  /** Defines common length and error messages for input length validation. */
19
29
  #define DEF_MAX_LENGTH(N, length) \
@@ -30,58 +40,104 @@ static ID id_sq, id_sq_set;
30
40
  rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
31
41
  } while (0)
32
42
 
43
+ #define VALIDATE_MAX_URI_LENGTH(len, N) do { \
44
+ if (len > MAX_##N##_LENGTH) \
45
+ rb_raise(e414, MAX_##N##_LENGTH_ERR); \
46
+ } while (0)
47
+
33
48
  /* Defines the maximum allowed lengths for various input elements.*/
34
49
  DEF_MAX_LENGTH(FIELD_NAME, 256);
35
50
  DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
36
51
  DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
37
-
38
- #define UH_FL_CHUNKED 0x1
39
- #define UH_FL_HASBODY 0x2
40
- #define UH_FL_INBODY 0x4
41
- #define UH_FL_HASTRAILER 0x8
42
- #define UH_FL_INTRAILER 0x10
43
- #define UH_FL_INCHUNK 0x20
44
- #define UH_FL_KEEPALIVE 0x40
45
- #define UH_FL_HASHEADER 0x80
52
+ DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
53
+ DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
54
+ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
46
55
 
47
56
  struct http_parser {
48
57
  int cs; /* Ragel internal state */
49
- unsigned int flags;
50
- size_t mark;
51
- size_t offset;
58
+ unsigned int is_request:1;
59
+ unsigned int has_query:1;
60
+ unsigned int has_scheme:1;
61
+ unsigned int chunked:1;
62
+ unsigned int has_body:1;
63
+ unsigned int in_body:1;
64
+ unsigned int has_trailer:1;
65
+ unsigned int in_trailer:1;
66
+ unsigned int in_chunk:1;
67
+ unsigned int persistent:1;
68
+ unsigned int has_header:1;
69
+ unsigned int body_eof_seen:1;
70
+ unsigned int is_https:1;
71
+ unsigned int padding:19;
72
+ unsigned int mark;
73
+ unsigned int offset;
52
74
  union { /* these 2 fields don't nest */
53
- size_t field;
54
- size_t query;
75
+ unsigned int field;
76
+ unsigned int query;
55
77
  } start;
56
78
  union {
57
- size_t field_len; /* only used during header processing */
58
- size_t dest_offset; /* only used during body processing */
79
+ unsigned int field_len; /* only used during header processing */
80
+ unsigned int dest_offset; /* only used during body processing */
59
81
  } s;
60
82
  VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
61
- VALUE status; /* String or Qnil */
83
+ union {
84
+ /* String or Qnil */
85
+ VALUE status; /* status string for responses */
86
+ VALUE host; /* Host: header for requests */
87
+ } v;
62
88
  union {
63
89
  off_t content;
64
90
  off_t chunk;
65
91
  } len;
66
92
  };
67
93
 
94
+ static unsigned int ulong2uint(unsigned long n)
95
+ {
96
+ unsigned int i = (unsigned int)n;
97
+
98
+ if (sizeof(unsigned int) != sizeof(unsigned long)) {
99
+ if ((unsigned long)i != n) {
100
+ rb_raise(rb_eRangeError, "too large to be 32-bit uint: %lu", n);
101
+ }
102
+ }
103
+ return i;
104
+ }
105
+
68
106
  #define REMAINING (unsigned long)(pe - p)
69
- #define LEN(AT, FPC) (FPC - buffer - hp->AT)
70
- #define MARK(M,FPC) (hp->M = (FPC) - buffer)
107
+ #define LEN(AT, FPC) (ulong2uint(FPC - buffer) - hp->AT)
108
+ #define MARK(M,FPC) (hp->M = ulong2uint((FPC) - buffer))
71
109
  #define PTR_TO(F) (buffer + hp->F)
72
110
  #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
73
111
  #define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))
74
112
 
75
- #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
76
- #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
77
- #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
78
- #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
113
+ /* Downcases a single ASCII character. Locale-agnostic. */
114
+ static void downcase_char(char *c)
115
+ {
116
+ if (*c >= 'A' && *c <= 'Z')
117
+ *c |= 0x20;
118
+ }
79
119
 
80
120
  static int is_lws(char c)
81
121
  {
82
122
  return (c == ' ' || c == '\t');
83
123
  }
84
124
 
125
+ /* this will dedupe under Ruby 2.5+ (December 2017) */
126
+ static VALUE str_dd_freeze(VALUE str)
127
+ {
128
+ if (STR_UMINUS_DEDUPE)
129
+ return rb_funcall(str, id_uminus, 0);
130
+
131
+ /* freeze,since it speeds up MRI slightly */
132
+ OBJ_FREEZE(str);
133
+ return str;
134
+ }
135
+
136
+ static VALUE str_new_dd_freeze(const char *ptr, long len)
137
+ {
138
+ return str_dd_freeze(rb_str_new(ptr, len));
139
+ }
140
+
85
141
  static VALUE stripped_str_new(const char *str, long len)
86
142
  {
87
143
  long end;
@@ -91,10 +147,69 @@ static VALUE stripped_str_new(const char *str, long len)
91
147
  return rb_str_new(str, end + 1);
92
148
  }
93
149
 
94
- static void finalize_header(struct http_parser *hp)
150
+ static VALUE request_host_val(struct http_parser *hp)
151
+ {
152
+ assert(hp->is_request == 1 && "not a request");
153
+ return NIL_P(hp->v.host) ? Qfalse : hp->v.host;
154
+ }
155
+
156
+ static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
157
+ {
158
+ char *host_ptr = RSTRING_PTR(host);
159
+ long host_len = RSTRING_LEN(host);
160
+ char *colon;
161
+ VALUE server_name = host;
162
+ VALUE server_port = hp->has_scheme ? (hp->is_https ? g_443 : g_80) : Qfalse;
163
+
164
+ if (*host_ptr == '[') { /* ipv6 address format */
165
+ char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
166
+
167
+ if (rbracket)
168
+ colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
169
+ else
170
+ colon = memchr(host_ptr + 1, ':', host_len - 1);
171
+ } else {
172
+ colon = memchr(host_ptr, ':', host_len);
173
+ }
174
+
175
+ if (colon) {
176
+ long port_start = colon - host_ptr + 1;
177
+ long port_len = host_len - port_start;
178
+
179
+ server_name = rb_str_substr(host, 0, colon - host_ptr);
180
+ server_name = str_dd_freeze(server_name);
181
+ if (port_len > 0) {
182
+ server_port = rb_str_substr(host, port_start, port_len);
183
+ server_port = str_dd_freeze(server_port);
184
+ }
185
+ }
186
+ rb_hash_aset(env, g_SERVER_NAME, server_name);
187
+ if (server_port != Qfalse)
188
+ rb_hash_aset(env, g_SERVER_PORT, server_port);
189
+ }
190
+
191
+ static void finalize_header(struct http_parser *hp, VALUE hdr)
95
192
  {
96
- if ((HP_FL_TEST(hp, HASTRAILER) && ! HP_FL_TEST(hp, CHUNKED)))
193
+ if (hp->has_trailer && !hp->chunked)
97
194
  rb_raise(eParserError, "trailer but not chunked");
195
+ if (hp->is_request) {
196
+ if (hp->chunked) {
197
+ if (hp->len.chunk >= 0)
198
+ rb_raise(eParserError, "Content-Length set with chunked encoding");
199
+ else
200
+ hp->len.chunk = 0;
201
+ } else if (hp->len.content < 0) {
202
+ hp->len.content = 0;
203
+ }
204
+
205
+ if (!hp->has_query)
206
+ rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
207
+ if (hp->has_header) {
208
+ VALUE host = request_host_val(hp);
209
+ if (host != Qfalse)
210
+ set_server_vars(hp, hdr, host);
211
+ }
212
+ }
98
213
  }
99
214
 
100
215
  /*
@@ -107,28 +222,116 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
107
222
  /* REQUEST_METHOD is always set before any headers */
108
223
  if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
109
224
  /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
110
- HP_FL_SET(hp, KEEPALIVE);
225
+ hp->persistent = 1;
111
226
  } else if (STR_CSTR_CASE_EQ(val, "close")) {
112
227
  /*
113
228
  * it doesn't matter what HTTP version or request method we have,
114
229
  * if a server says "Connection: close", we disable keepalive
115
230
  */
116
- HP_FL_UNSET(hp, KEEPALIVE);
231
+ hp->persistent = 0;
117
232
  } else {
118
233
  /*
119
234
  * server could've sent anything, ignore it for now. Maybe
120
- * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
235
+ * "hp->persistent = 0;" just in case?
121
236
  * Raising an exception might be too mean...
122
237
  */
123
238
  }
124
239
  }
125
240
 
126
241
  static void
127
- http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
242
+ request_method(VALUE env, const char *ptr, size_t len)
243
+ {
244
+ rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
245
+ }
246
+
247
+ static void
248
+ url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
249
+ {
250
+ VALUE val;
251
+
252
+ hp->has_scheme = 1;
253
+ /* Ragel machine downcases and enforces this as "http" or "https" */
254
+ if (len == 5) {
255
+ hp->is_https = 1;
256
+ assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
257
+ val = g_https;
258
+ } else {
259
+ assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
260
+ val = g_http;
261
+ }
262
+ rb_hash_aset(env, g_rack_url_scheme, val);
263
+ }
264
+
265
+ static void
266
+ request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
267
+ {
268
+ VALUE val = rb_str_new(ptr, len);
269
+
270
+ rb_hash_aset(env, g_HTTP_HOST, val);
271
+ hp->v.host = val;
272
+ }
273
+
274
+ static void
275
+ set_fragment(VALUE env, const char *ptr, size_t len)
276
+ {
277
+ VALUE val = rb_str_new(ptr, len);
278
+ rb_hash_aset(env, g_FRAGMENT, val);
279
+ }
280
+
281
+ static void
282
+ request_uri(VALUE env, const char *ptr, size_t len)
283
+ {
284
+ VALUE val;
285
+
286
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
287
+ val = rb_str_new(ptr, len);
288
+ rb_hash_aset(env, g_REQUEST_URI, val);
289
+
290
+ /*
291
+ * rack says PATH_INFO must start with "/" or be empty,
292
+ * but "OPTIONS *" is a valid request
293
+ */
294
+ if (CONST_MEM_EQ("*", ptr, len)) {
295
+ val = rb_str_new(NULL, 0);
296
+ rb_hash_aset(env, g_PATH_INFO, val);
297
+ rb_hash_aset(env, g_REQUEST_PATH, val);
298
+ }
299
+ }
300
+
301
+ static void
302
+ query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
303
+ {
304
+ VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
305
+
306
+ hp->has_query = 1;
307
+ rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
308
+ }
309
+
310
+ static void
311
+ request_path(VALUE env, const char *ptr, size_t len)
312
+ {
313
+ VALUE val;
314
+
315
+ VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
316
+ val = rb_str_new(ptr, len);
317
+
318
+ rb_hash_aset(env, g_REQUEST_PATH, val);
319
+ rb_hash_aset(env, g_PATH_INFO, val);
320
+ }
321
+
322
+ static void
323
+ http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
128
324
  {
129
325
  if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
130
326
  /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
131
- HP_FL_SET(hp, KEEPALIVE);
327
+ hp->persistent = 1;
328
+ }
329
+ if (hp->is_request) {
330
+ VALUE v = str_new_dd_freeze(ptr, len);
331
+ hp->has_header = 1;
332
+
333
+ rb_hash_aset(env, g_SERVER_PROTOCOL, v);
334
+ rb_hash_aset(env, g_HTTP_VERSION, v);
132
335
  }
133
336
  }
134
337
 
@@ -137,21 +340,21 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
137
340
  {
138
341
  long nr;
139
342
 
140
- hp->status = rb_str_new(ptr, len);
343
+ hp->v.status = str_new_dd_freeze(ptr, len);
141
344
 
142
345
  /* RSTRING_PTR is null terminated, ptr is not */
143
- nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
346
+ nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);
144
347
 
145
348
  if (nr < 100 || nr > 999)
146
- rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
349
+ rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));
147
350
 
148
351
  if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
149
- HP_FL_SET(hp, HASBODY);
352
+ hp->has_body = 1;
150
353
  }
151
354
 
152
355
  static inline void invalid_if_trailer(struct http_parser *hp)
153
356
  {
154
- if (HP_FL_TEST(hp, INTRAILER))
357
+ if (hp->in_trailer)
155
358
  rb_raise(eParserError, "invalid Trailer");
156
359
  }
157
360
 
@@ -185,15 +388,15 @@ static void write_cont_value(struct http_parser *hp,
185
388
 
186
389
  /* normalize tab to space */
187
390
  if (cont_len > 0) {
188
- assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
391
+ assert(is_lws(*vptr) && "invalid leading white space");
189
392
  *vptr = ' ';
190
393
  }
191
394
  for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--);
192
395
  rb_str_buf_cat(hp->cont, vptr, end + 1);
193
396
  }
194
397
 
195
- static void write_value(VALUE hdr, struct http_parser *hp,
196
- const char *buffer, const char *p)
398
+ static void write_response_value(struct http_parser *hp, VALUE hdr,
399
+ const char *buffer, const char *p)
197
400
  {
198
401
  VALUE f, v;
199
402
  VALUE hclass;
@@ -202,7 +405,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
202
405
  const char *vptr;
203
406
  size_t vlen;
204
407
 
205
- HP_FL_SET(hp, HASHEADER);
408
+ hp->has_header = 1;
206
409
 
207
410
  /* Rack does not like Status headers, so we never send them */
208
411
  if (CSTR_CASE_EQ(fptr, flen, "status")) {
@@ -214,7 +417,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
214
417
  vlen = LEN(mark, p);
215
418
  VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
216
419
  VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
217
- f = rb_str_new(fptr, (long)flen);
420
+ f = str_new_dd_freeze(fptr, (long)flen);
218
421
  v = stripped_str_new(vptr, (long)vlen);
219
422
 
220
423
  /* needs more tests for error-checking here */
@@ -226,9 +429,9 @@ static void write_value(VALUE hdr, struct http_parser *hp,
226
429
  if (STR_CSTR_CASE_EQ(f, "connection")) {
227
430
  hp_keepalive_connection(hp, v);
228
431
  } else if (STR_CSTR_CASE_EQ(f, "content-length")) {
229
- if (! HP_FL_TEST(hp, HASBODY))
432
+ if (!hp->has_body)
230
433
  rb_raise(eParserError, "Content-Length with no body expected");
231
- if (HP_FL_TEST(hp, CHUNKED))
434
+ if (hp->chunked)
232
435
  rb_raise(eParserError,
233
436
  "Content-Length when chunked Transfer-Encoding is set");
234
437
  hp->len.content = parse_length(vptr, vlen);
@@ -239,7 +442,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
239
442
  invalid_if_trailer(hp);
240
443
  } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
241
444
  if (STR_CSTR_CASE_EQ(v, "chunked")) {
242
- if (! HP_FL_TEST(hp, HASBODY))
445
+ if (!hp->has_body)
243
446
  rb_raise(eParserError,
244
447
  "chunked Transfer-Encoding with no body expected");
245
448
  if (hp->len.content >= 0)
@@ -247,13 +450,13 @@ static void write_value(VALUE hdr, struct http_parser *hp,
247
450
  "chunked Transfer-Encoding when Content-Length is set");
248
451
 
249
452
  hp->len.chunk = 0;
250
- HP_FL_SET(hp, CHUNKED);
453
+ hp->chunked = 1;
251
454
  }
252
455
  invalid_if_trailer(hp);
253
456
  } else if (STR_CSTR_CASE_EQ(f, "trailer")) {
254
- if (! HP_FL_TEST(hp, HASBODY))
457
+ if (!hp->has_body)
255
458
  rb_raise(eParserError, "trailer with no body");
256
- HP_FL_SET(hp, HASTRAILER);
459
+ hp->has_trailer = 1;
257
460
  invalid_if_trailer(hp);
258
461
  }
259
462
 
@@ -272,9 +475,6 @@ static void write_value(VALUE hdr, struct http_parser *hp,
272
475
  e = rb_funcall(hdr, id_sq, 1, f);
273
476
 
274
477
  if (NIL_P(e)) {
275
- /* new value, freeze it since it speeds up MRI slightly */
276
- OBJ_FREEZE(f);
277
-
278
478
  if (hclass == rb_cHash)
279
479
  rb_hash_aset(hdr, f, v);
280
480
  else
@@ -295,6 +495,112 @@ static void write_value(VALUE hdr, struct http_parser *hp,
295
495
  }
296
496
  }
297
497
 
498
+ static VALUE req_field(const char *ptr, size_t len)
499
+ {
500
+ size_t pfxlen = sizeof("HTTP_") - 1;
501
+ VALUE str = rb_str_new(NULL, pfxlen + len);
502
+ char *dst = RSTRING_PTR(str);
503
+
504
+ memcpy(dst, "HTTP_", pfxlen);
505
+ memcpy(dst + pfxlen, ptr, len);
506
+ assert(*(dst + RSTRING_LEN(str)) == '\0' &&
507
+ "string didn't end with \\0"); /* paranoia */
508
+
509
+ return str;
510
+ }
511
+
512
+ static void snake_upcase(char *ptr, size_t len)
513
+ {
514
+ char *c;
515
+
516
+ for (c = ptr; len--; c++) {
517
+ if (*c >= 'a' && *c <= 'z')
518
+ *c &= ~0x20;
519
+ else if (*c == '-')
520
+ *c = '_';
521
+ }
522
+ }
523
+
524
+ static void write_request_value(struct http_parser *hp, VALUE env,
525
+ char *buffer, const char *p)
526
+ {
527
+ char *fptr = PTR_TO(start.field);
528
+ size_t flen = hp->s.field_len;
529
+ char *vptr = PTR_TO(mark);
530
+ size_t vlen = LEN(mark, p);
531
+ VALUE key, val;
532
+ VALUE existing;
533
+
534
+ VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
535
+ VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
536
+ snake_upcase(fptr, flen);
537
+
538
+ /*
539
+ * ignore "Version" headers since they conflict with the HTTP_VERSION
540
+ * rack env variable.
541
+ */
542
+ if (CONST_MEM_EQ("VERSION", fptr, flen)) {
543
+ hp->cont = Qnil;
544
+ return;
545
+ }
546
+ val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);
547
+
548
+ if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
549
+ key = g_HTTP_CONNECTION;
550
+ hp_keepalive_connection(hp, val);
551
+ } else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
552
+ key = g_CONTENT_LENGTH;
553
+ hp->len.content = parse_length(vptr, vlen);
554
+ if (hp->len.content < 0)
555
+ rb_raise(eParserError, "invalid Content-Length");
556
+ if (hp->len.content != 0)
557
+ hp->has_body = 1;
558
+ invalid_if_trailer(hp);
559
+ } else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
560
+ key = g_CONTENT_TYPE;
561
+ } else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
562
+ key = g_HTTP_TRANSFER_ENCODING;
563
+ if (STR_CSTR_CASE_EQ(val, "chunked")) {
564
+ hp->chunked = 1;
565
+ hp->has_body = 1;
566
+ }
567
+ invalid_if_trailer(hp);
568
+ } else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
569
+ key = g_HTTP_TRAILER;
570
+ hp->has_trailer = 1;
571
+ invalid_if_trailer(hp);
572
+ } else if (CONST_MEM_EQ("HOST", fptr, flen)) {
573
+ key = g_HTTP_HOST;
574
+ if (NIL_P(hp->v.host))
575
+ hp->v.host = val;
576
+ } else {
577
+ key = req_field(fptr, flen);
578
+ if (!HASH_ASET_DEDUPE)
579
+ key = str_dd_freeze(key);
580
+ }
581
+ existing = rb_hash_aref(env, key);
582
+ if (NIL_P(existing)) {
583
+ hp->cont = rb_hash_aset(env, key, val);
584
+ /*
585
+ * Ignore repeated Host headers and favor host set by absolute URIs.
586
+ * absoluteURI Request-URI takes precedence over
587
+ * the Host: header (ref: rfc 2616, section 5.2.1)
588
+ */
589
+ } else if (key == g_HTTP_HOST) {
590
+ hp->cont = Qnil;
591
+ } else {
592
+ rb_str_buf_cat(existing, ",", 1);
593
+ hp->cont = rb_str_buf_append(existing, val);
594
+ }
595
+ }
596
+
597
+ static void write_value(struct http_parser *hp, VALUE hdr,
598
+ char *buf, const char *p)
599
+ {
600
+ hp->is_request ? write_request_value(hp, hdr, buf, p) :
601
+ write_response_value(hp, hdr, buf, p);
602
+ }
603
+
298
604
  /** Machine **/
299
605
 
300
606
  %%{
@@ -302,10 +608,22 @@ static void write_value(VALUE hdr, struct http_parser *hp,
302
608
 
303
609
  action mark {MARK(mark, fpc); }
304
610
 
611
+ action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
612
+ action downcase_char { downcase_char(deconst(fpc)); }
613
+ action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
614
+ action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
615
+ action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
616
+ action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
617
+ action fragment { set_fragment(hdr, PTR_TO(mark), LEN(mark, fpc)); }
618
+ action start_query { MARK(start.query, fpc); }
619
+ action query_string {
620
+ query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
621
+ }
622
+ action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
305
623
  action start_field { MARK(start.field, fpc); }
306
624
  action write_field { hp->s.field_len = LEN(start.field, fpc); }
307
625
  action start_value { MARK(mark, fpc); }
308
- action write_value { write_value(hdr, hp, buffer, fpc); }
626
+ action write_value { write_value(hp, hdr, buffer, fpc); }
309
627
  action write_cont_value { write_cont_value(hp, buffer, fpc); }
310
628
  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
311
629
  action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
@@ -316,10 +634,10 @@ static void write_value(VALUE hdr, struct http_parser *hp,
316
634
  rb_raise(eParserError, "invalid chunk size");
317
635
  }
318
636
  action header_done {
319
- finalize_header(hp);
637
+ finalize_header(hp, hdr);
320
638
  cs = http_parser_first_final;
321
639
 
322
- if (HP_FL_TEST(hp, CHUNKED))
640
+ if (hp->chunked)
323
641
  cs = http_parser_en_ChunkedBody;
324
642
 
325
643
  /*
@@ -335,7 +653,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
335
653
  }
336
654
 
337
655
  action end_chunked_body {
338
- HP_FL_SET(hp, INTRAILER);
656
+ hp->in_trailer = 1;
339
657
  cs = http_parser_en_Trailers;
340
658
  ++p;
341
659
  assert(p <= pe && "buffer overflow after chunked body");
@@ -351,7 +669,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
351
669
  p += nr;
352
670
  assert(hp->len.chunk >= 0 && "negative chunk length");
353
671
  if ((size_t)hp->len.chunk > REMAINING) {
354
- HP_FL_SET(hp, INCHUNK);
672
+ hp->in_chunk = 1;
355
673
  goto post_exec;
356
674
  } else {
357
675
  fhold;
@@ -370,7 +688,7 @@ static void http_parser_init(struct http_parser *hp)
370
688
  int cs = 0;
371
689
  memset(hp, 0, sizeof(struct http_parser));
372
690
  hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
373
- hp->status = Qnil;
691
+ hp->v.status = Qnil;
374
692
  hp->len.content = -1;
375
693
  %% write init;
376
694
  hp->cs = cs;
@@ -395,43 +713,55 @@ static void http_parser_execute(struct http_parser *hp,
395
713
  assert((void *)(pe - p) == (void *)(len - off) &&
396
714
  "pointers aren't same distance");
397
715
 
398
- if (HP_FL_TEST(hp, INCHUNK)) {
399
- HP_FL_UNSET(hp, INCHUNK);
716
+ if (hp->in_chunk) {
717
+ hp->in_chunk = 0;
400
718
  goto skip_chunk_data_hack;
401
719
  }
402
720
  %% write exec;
403
721
  post_exec: /* "_out:" also goes here */
404
722
  if (hp->cs != http_parser_error)
405
723
  hp->cs = cs;
406
- hp->offset = p - buffer;
724
+ hp->offset = ulong2uint(p - buffer);
407
725
 
408
726
  assert(p <= pe && "buffer overflow after parsing execute");
409
727
  assert(hp->offset <= len && "offset longer than length");
410
728
  }
411
729
 
412
- static struct http_parser *data_get(VALUE self)
730
+ static void kcar_mark(void *ptr)
413
731
  {
414
- struct http_parser *hp;
732
+ struct http_parser *hp = ptr;
415
733
 
416
- Data_Get_Struct(self, struct http_parser, hp);
417
- assert(hp && "failed to extract http_parser struct");
418
- return hp;
734
+ rb_gc_mark(hp->cont);
735
+ rb_gc_mark(hp->v.status);
419
736
  }
420
737
 
421
- static void mark(void *ptr)
738
+ static size_t kcar_memsize(const void *ptr)
422
739
  {
423
- struct http_parser *hp = ptr;
740
+ return sizeof(struct http_parser);
741
+ }
424
742
 
425
- rb_gc_mark(hp->cont);
426
- rb_gc_mark(hp->status);
743
+ static const rb_data_type_t kcar_type = {
744
+ "kcar_parser",
745
+ { kcar_mark, RUBY_TYPED_DEFAULT_FREE, kcar_memsize, /* reserved */ },
746
+ /* parent, data, [ flags ] */
747
+ };
748
+
749
+ static VALUE kcar_alloc(VALUE klass)
750
+ {
751
+ struct http_parser *hp;
752
+ return TypedData_Make_Struct(klass, struct http_parser, &kcar_type, hp);
427
753
  }
428
754
 
429
- static VALUE alloc(VALUE klass)
755
+ static struct http_parser *data_get(VALUE self)
430
756
  {
431
757
  struct http_parser *hp;
432
- return Data_Make_Struct(klass, struct http_parser, mark, -1, hp);
758
+
759
+ TypedData_Get_Struct(self, struct http_parser, &kcar_type, hp);
760
+ assert(hp && "failed to extract http_parser struct");
761
+ return hp;
433
762
  }
434
763
 
764
+
435
765
  /**
436
766
  * call-seq:
437
767
  * Kcar::Parser.new => parser
@@ -485,7 +815,7 @@ static VALUE body_bytes_left(VALUE self)
485
815
  {
486
816
  struct http_parser *hp = data_get(self);
487
817
 
488
- if (HP_FL_TEST(hp, CHUNKED))
818
+ if (hp->chunked)
489
819
  return Qnil;
490
820
  if (hp->len.content >= 0)
491
821
  return OFFT2NUM(hp->len.content);
@@ -505,9 +835,11 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
505
835
  {
506
836
  struct http_parser *hp = data_get(self);
507
837
 
508
- if (HP_FL_TEST(hp, CHUNKED))
838
+ if (hp->chunked)
509
839
  rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
510
840
  hp->len.content = NUM2OFFT(bytes);
841
+ if (hp->len.content == 0)
842
+ hp->body_eof_seen = 1;
511
843
  return bytes;
512
844
  }
513
845
 
@@ -522,7 +854,30 @@ static VALUE chunked(VALUE self)
522
854
  {
523
855
  struct http_parser *hp = data_get(self);
524
856
 
525
- return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse;
857
+ return hp->chunked ? Qtrue : Qfalse;
858
+ }
859
+
860
+ static void check_buffer_size(long dlen)
861
+ {
862
+ if ((uint64_t)dlen > UINT_MAX)
863
+ rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);
864
+ }
865
+
866
+ static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf)
867
+ {
868
+ char *ptr;
869
+ long len;
870
+
871
+ Check_Type(buf, T_STRING);
872
+ rb_str_modify(buf);
873
+ ptr = RSTRING_PTR(buf);
874
+ len = RSTRING_LEN(buf);
875
+ check_buffer_size(len);
876
+
877
+ http_parser_execute(hp, hdr, ptr, len);
878
+
879
+ if (hp->cs == http_parser_error)
880
+ rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
526
881
  }
527
882
 
528
883
  /**
@@ -541,28 +896,49 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
541
896
  {
542
897
  struct http_parser *hp = data_get(self);
543
898
 
544
- http_parser_execute(hp, hdr, RSTRING_PTR(data), RSTRING_LEN(data));
899
+ if (hp->is_request)
900
+ rb_raise(rb_eRuntimeError, "parser is handling a request, not response");
901
+
902
+ parser_execute(hp, hdr, data);
545
903
  VALIDATE_MAX_LENGTH(hp->offset, HEADER);
546
904
 
547
905
  if (hp->cs == http_parser_first_final ||
548
906
  hp->cs == http_parser_en_ChunkedBody) {
549
907
  advance_str(data, hp->offset + 1);
550
908
  hp->offset = 0;
551
- if (HP_FL_TEST(hp, INTRAILER))
909
+ if (hp->in_trailer)
552
910
  return hdr;
553
911
  else
554
- return rb_ary_new3(2, hp->status, hdr);
912
+ return rb_ary_new3(2, hp->v.status, hdr);
555
913
  }
556
914
 
557
- if (hp->cs == http_parser_error)
558
- rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
559
-
560
915
  return Qnil;
561
916
  }
562
917
 
918
+ static VALUE request(VALUE self, VALUE env, VALUE buf)
919
+ {
920
+ struct http_parser *hp = data_get(self);
921
+
922
+ hp->is_request = 1;
923
+ Check_Type(buf, T_STRING);
924
+ parser_execute(hp, env, buf);
925
+
926
+ if (hp->cs == http_parser_first_final ||
927
+ hp->cs == http_parser_en_ChunkedBody) {
928
+ advance_str(buf, hp->offset + 1);
929
+ hp->offset = 0;
930
+ if (hp->in_trailer)
931
+ hp->body_eof_seen = 1;
932
+
933
+ return env;
934
+ }
935
+ return Qnil; /* incomplete */
936
+ }
937
+
938
+
563
939
  static int chunked_eof(struct http_parser *hp)
564
940
  {
565
- return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
941
+ return ((hp->cs == http_parser_first_final) || hp->in_trailer);
566
942
  }
567
943
 
568
944
  /**
@@ -576,13 +952,13 @@ static VALUE body_eof(VALUE self)
576
952
  {
577
953
  struct http_parser *hp = data_get(self);
578
954
 
579
- if (!HP_FL_TEST(hp, HASHEADER) && HP_FL_ALL(hp, KEEPALIVE))
955
+ if (!hp->has_header && hp->persistent)
580
956
  return Qtrue;
581
957
 
582
- if (HP_FL_TEST(hp, CHUNKED))
958
+ if (hp->chunked)
583
959
  return chunked_eof(hp) ? Qtrue : Qfalse;
584
960
 
585
- if (! HP_FL_TEST(hp, HASBODY))
961
+ if (!hp->has_body)
586
962
  return Qtrue;
587
963
 
588
964
  return hp->len.content == 0 ? Qtrue : Qfalse;
@@ -604,10 +980,14 @@ static VALUE keepalive(VALUE self)
604
980
  {
605
981
  struct http_parser *hp = data_get(self);
606
982
 
607
- if (HP_FL_ALL(hp, KEEPALIVE)) {
608
- if (HP_FL_TEST(hp, HASHEADER) && HP_FL_TEST(hp, HASBODY) ) {
609
- if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0))
610
- return Qtrue;
983
+ if (hp->persistent) {
984
+ if (hp->has_header && hp->has_body) {
985
+ if (hp->chunked || (hp->len.content >= 0)) {
986
+ if (!hp->is_request)
987
+ return Qtrue;
988
+ else
989
+ return hp->body_eof_seen ? Qtrue : Qfalse;
990
+ }
611
991
 
612
992
  /* unknown Content-Length and not chunked, we must assume close */
613
993
  return Qfalse;
@@ -621,54 +1001,77 @@ static VALUE keepalive(VALUE self)
621
1001
 
622
1002
  /**
623
1003
  * call-seq:
624
- * parser.filter_body(buf, data) => nil/data
1004
+ * parser.filter_body(dst, src) => nil/dst
625
1005
  *
626
- * Takes a String of +data+, will modify data if dechunking is done.
627
- * Returns +nil+ if there is more data left to process. Returns
628
- * +data+ if body processing is complete. When returning +data+,
629
- * it may modify +data+ so the start of the string points to where
1006
+ * Takes a String of +src+, will modify src if dechunking is done.
1007
+ * Returns +nil+ if there is more +src+ left to process. Returns
1008
+ * +dst+ if body processing is complete. When returning +dst+,
1009
+ * it may modify +src+ so the start of the string points to where
630
1010
  * the body ended so that trailer processing can begin.
631
1011
  *
632
1012
  * Raises ParserError if there are dechunking errors.
633
- * Basically this is a glorified memcpy(3) that copies +data+
634
- * into +buf+ while filtering it through the dechunker.
1013
+ * Basically this is a glorified memcpy(3) that copies +src+
1014
+ * into +dst+ while filtering it through the dechunker.
635
1015
  */
636
- static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
1016
+ static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
637
1017
  {
638
1018
  struct http_parser *hp = data_get(self);
639
- char *dptr;
640
- long dlen;
1019
+ char *sptr;
1020
+ long slen;
641
1021
 
642
- dptr = RSTRING_PTR(data);
643
- dlen = RSTRING_LEN(data);
1022
+ sptr = RSTRING_PTR(src);
1023
+ slen = RSTRING_LEN(src);
1024
+ check_buffer_size(slen);
644
1025
 
645
- StringValue(buf);
646
- rb_str_modify(buf);
647
- rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
648
- OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
1026
+ StringValue(dst);
1027
+ rb_str_modify(dst);
1028
+ OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
649
1029
 
650
- if (!HP_FL_TEST(hp, CHUNKED))
1030
+ /*
1031
+ * for now, only support filter_body for identity requests,
1032
+ * not responses; it's rather inefficient to blindly memcpy
1033
+ * giant request bodies; on the other hand, it simplifies
1034
+ * server-side code.
1035
+ */
1036
+ if (hp->is_request && !hp->chunked) {
1037
+ /* no need to enter the Ragel machine for unchunked transfers */
1038
+ assert(hp->len.content >= 0 && "negative Content-Length");
1039
+ if (hp->len.content > 0) {
1040
+ long nr = MIN(slen, hp->len.content);
1041
+
1042
+ rb_str_resize(dst, nr);
1043
+ memcpy(RSTRING_PTR(dst), sptr, nr);
1044
+ hp->len.content -= nr;
1045
+ if (hp->len.content == 0)
1046
+ hp->body_eof_seen = 1;
1047
+ advance_str(src, nr);
1048
+ }
1049
+ return dst;
1050
+ }
1051
+
1052
+ if (!hp->chunked)
651
1053
  rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
652
1054
 
1055
+ rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
653
1056
  if (!chunked_eof(hp)) {
654
1057
  hp->s.dest_offset = 0;
655
- http_parser_execute(hp, buf, dptr, dlen);
1058
+ http_parser_execute(hp, dst, sptr, slen);
656
1059
  if (hp->cs == http_parser_error)
657
1060
  rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
658
1061
 
659
1062
  assert(hp->s.dest_offset <= hp->offset &&
660
1063
  "destination buffer overflow");
661
- advance_str(data, hp->offset);
662
- rb_str_set_len(buf, hp->s.dest_offset);
1064
+ advance_str(src, hp->offset);
1065
+ rb_str_set_len(dst, hp->s.dest_offset);
663
1066
 
664
- if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
1067
+ if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
665
1068
  assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
666
1069
  } else {
667
- data = Qnil;
1070
+ dst = Qnil;
668
1071
  }
669
1072
  }
670
1073
  hp->offset = 0; /* for trailer parsing */
671
- return data;
1074
+ return dst;
672
1075
  }
673
1076
 
674
1077
  void Init_kcar_ext(void)
@@ -676,11 +1079,21 @@ void Init_kcar_ext(void)
676
1079
  VALUE mKcar = rb_define_module("Kcar");
677
1080
  VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
678
1081
 
1082
+ /*
1083
+ * Document-class: Kcar::ParserError
1084
+ *
1085
+ * This is raised if there are parsing errors.
1086
+ */
679
1087
  eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
1088
+ e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
1089
+ eParserError);
1090
+ e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
1091
+ eParserError);
680
1092
 
681
- rb_define_alloc_func(cParser, alloc);
1093
+ rb_define_alloc_func(cParser, kcar_alloc);
682
1094
  rb_define_method(cParser, "initialize", initialize, 0);
683
1095
  rb_define_method(cParser, "reset", initialize, 0);
1096
+ rb_define_method(cParser, "request", request, 2);
684
1097
  rb_define_method(cParser, "headers", headers, 2);
685
1098
  rb_define_method(cParser, "trailers", headers, 2);
686
1099
  rb_define_method(cParser, "filter_body", filter_body, 2);
@@ -706,4 +1119,34 @@ void Init_kcar_ext(void)
706
1119
  rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
707
1120
  id_sq = rb_intern("[]");
708
1121
  id_sq_set = rb_intern("[]=");
1122
+ id_uminus = rb_intern("-@");
1123
+
1124
+ /* TODO: gperf to make a perfect hash of common strings */
1125
+ #define C(var, cstr) do { \
1126
+ var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
1127
+ rb_gc_register_mark_object((var)); \
1128
+ } while (0);
1129
+
1130
+ C(g_CONTENT_LENGTH, "CONTENT_LENGTH");
1131
+ C(g_CONTENT_TYPE, "CONTENT_TYPE");
1132
+ C(g_FRAGMENT, "FRAGMENT");
1133
+ C(g_HTTP_HOST, "HTTP_HOST");
1134
+ C(g_HTTP_CONNECTION, "HTTP_CONNECTION");
1135
+ C(g_HTTP_TRAILER, "HTTP_TRAILER");
1136
+ C(g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
1137
+ C(g_HTTP_VERSION, "HTTP_VERSION");
1138
+ C(g_PATH_INFO, "PATH_INFO");
1139
+ C(g_QUERY_STRING, "QUERY_STRING");
1140
+ C(g_REQUEST_METHOD, "REQUEST_METHOD");
1141
+ C(g_REQUEST_PATH, "REQUEST_PATH");
1142
+ C(g_REQUEST_URI, "REQUEST_URI");
1143
+ C(g_SERVER_NAME, "SERVER_NAME");
1144
+ C(g_SERVER_PORT, "SERVER_PORT");
1145
+ C(g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
1146
+ C(g_rack_url_scheme, "rack.url_scheme");
1147
+ C(g_http, "http");
1148
+ C(g_https, "https");
1149
+ C(g_80, "80");
1150
+ C(g_443, "443");
1151
+ #undef C
709
1152
  }