http_parser.rb 0.5.0-x86-mingw32 → 0.5.1-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,32 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ http_parser.rb (0.5.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.2)
10
+ json (1.4.6)
11
+ rake (0.8.7)
12
+ rake-compiler (0.7.0)
13
+ rake (>= 0.8.3, < 0.9)
14
+ rspec (2.0.1)
15
+ rspec-core (~> 2.0.1)
16
+ rspec-expectations (~> 2.0.1)
17
+ rspec-mocks (~> 2.0.1)
18
+ rspec-core (2.0.1)
19
+ rspec-expectations (2.0.1)
20
+ diff-lcs (>= 1.1.2)
21
+ rspec-mocks (2.0.1)
22
+ rspec-core (~> 2.0.1)
23
+ rspec-expectations (~> 2.0.1)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ http_parser.rb!
30
+ json (>= 1.4.6)
31
+ rake-compiler (>= 0.7.0)
32
+ rspec (>= 2.0.1)
data/README.md CHANGED
@@ -20,14 +20,15 @@ This gem aims to work on all major Ruby platforms, including:
20
20
 
21
21
  parser = Http::Parser.new
22
22
 
23
- parser.on_headers_complete = proc do |headers|
24
- p parser.http_method
23
+ parser.on_headers_complete = proc do
25
24
  p parser.http_version
26
25
 
27
- p parser.request_url # for requests
26
+ p parser.http_method # for requests
27
+ p parser.request_url
28
+
28
29
  p parser.status_code # for responses
29
30
 
30
- p headers
31
+ p parser.headers
31
32
  end
32
33
 
33
34
  parser.on_body = proc do |chunk|
@@ -43,3 +44,42 @@ This gem aims to work on all major Ruby platforms, including:
43
44
  # Feed raw data from the socket to the parser
44
45
  parser << raw_data
45
46
 
47
+ ## Advanced Usage
48
+
49
+ ### Accept callbacks on an object
50
+
51
+ module MyHttpConnection
52
+ def connection_completed
53
+ @parser = Http::Parser.new(self)
54
+ end
55
+
56
+ def receive_data(data)
57
+ @parser << data
58
+ end
59
+
60
+ def on_message_begin
61
+ @headers = nil
62
+ @body = ''
63
+ end
64
+
65
+ def on_headers_complete
66
+ @headers = @parser.headers
67
+ end
68
+
69
+ def on_body(chunk)
70
+ @body << chunk
71
+ end
72
+
73
+ def on_message_complete
74
+ p [@headers, @body]
75
+ end
76
+ end
77
+
78
+ ### Stop parsing after headers
79
+
80
+ parser = Http::Parser.new
81
+ parser.on_headers_complete = proc{ :stop }
82
+
83
+ offset = parser << request_data
84
+ body = request_data[offset..-1]
85
+
@@ -1,5 +1,13 @@
1
1
  require 'mkmf'
2
2
 
3
+ # check out code if it hasn't been already
4
+ if Dir[File.expand_path('../vendor/http-parser/*', __FILE__)].empty?
5
+ Dir.chdir(File.expand_path('../../../', __FILE__)) do
6
+ xsystem 'git submodule init'
7
+ xsystem 'git submodule update'
8
+ end
9
+ end
10
+
3
11
  # mongrel and http-parser both define http_parser_(init|execute), so we
4
12
  # rename functions in http-parser before using them.
5
13
  vendor_dir = File.expand_path('../vendor/http-parser/', __FILE__)
@@ -120,7 +120,7 @@ public class RubyHttpParser extends RubyObject {
120
120
  if (_current_header == null)
121
121
  _current_header = new String(data);
122
122
  else
123
- _current_header.concat(new String(data));
123
+ _current_header = _current_header.concat(new String(data));
124
124
 
125
125
  return 0;
126
126
  }
@@ -30,10 +30,10 @@ typedef struct ParserWrapper {
30
30
 
31
31
  VALUE callback_object;
32
32
  VALUE stopped;
33
+ VALUE completed;
33
34
 
34
35
  VALUE last_field_name;
35
- const char *last_field_name_at;
36
- size_t last_field_name_length;
36
+ VALUE curr_field_name;
37
37
 
38
38
  enum ryah_http_parser_type type;
39
39
  } ParserWrapper;
@@ -50,10 +50,10 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
50
50
  wrapper->fragment = Qnil;
51
51
 
52
52
  wrapper->headers = Qnil;
53
+ wrapper->completed = Qfalse;
53
54
 
54
55
  wrapper->last_field_name = Qnil;
55
- wrapper->last_field_name_at = NULL;
56
- wrapper->last_field_name_length = 0;
56
+ wrapper->curr_field_name = Qnil;
57
57
  }
58
58
 
59
59
  void ParserWrapper_mark(void *data) {
@@ -70,6 +70,7 @@ void ParserWrapper_mark(void *data) {
70
70
  rb_gc_mark_maybe(wrapper->on_message_complete);
71
71
  rb_gc_mark_maybe(wrapper->callback_object);
72
72
  rb_gc_mark_maybe(wrapper->last_field_name);
73
+ rb_gc_mark_maybe(wrapper->curr_field_name);
73
74
  }
74
75
  }
75
76
 
@@ -147,13 +148,11 @@ int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
147
148
  int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
148
149
  GET_WRAPPER(wrapper, parser);
149
150
 
150
- wrapper->last_field_name = Qnil;
151
-
152
- if (wrapper->last_field_name_at == NULL) {
153
- wrapper->last_field_name_at = at;
154
- wrapper->last_field_name_length = length;
151
+ if (wrapper->curr_field_name == Qnil) {
152
+ wrapper->last_field_name = Qnil;
153
+ wrapper->curr_field_name = rb_str_new(at, length);
155
154
  } else {
156
- wrapper->last_field_name_length += length;
155
+ rb_str_cat(wrapper->curr_field_name, at, length);
157
156
  }
158
157
 
159
158
  return 0;
@@ -163,15 +162,13 @@ int on_header_value(ryah_http_parser *parser, const char *at, size_t length) {
163
162
  GET_WRAPPER(wrapper, parser);
164
163
 
165
164
  if (wrapper->last_field_name == Qnil) {
166
- wrapper->last_field_name = rb_str_new(wrapper->last_field_name_at, wrapper->last_field_name_length);
165
+ wrapper->last_field_name = wrapper->curr_field_name;
166
+ wrapper->curr_field_name = Qnil;
167
167
 
168
168
  VALUE val = rb_hash_aref(wrapper->headers, wrapper->last_field_name);
169
169
  if (val != Qnil) {
170
170
  rb_str_cat(val, ", ", 2);
171
171
  }
172
-
173
- wrapper->last_field_name_at = NULL;
174
- wrapper->last_field_name_length = 0;
175
172
  }
176
173
 
177
174
  HASH_CAT(wrapper->headers, wrapper->last_field_name, at, length);
@@ -221,6 +218,7 @@ int on_message_complete(ryah_http_parser *parser) {
221
218
  GET_WRAPPER(wrapper, parser);
222
219
 
223
220
  VALUE ret = Qnil;
221
+ wrapper->completed = Qtrue;
224
222
 
225
223
  if (wrapper->callback_object != Qnil && rb_respond_to(wrapper->callback_object, Ion_message_complete)) {
226
224
  ret = rb_funcall(wrapper->callback_object, Ion_message_complete, 0);
@@ -290,6 +288,8 @@ VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
290
288
 
291
289
  VALUE Parser_execute(VALUE self, VALUE data) {
292
290
  ParserWrapper *wrapper = NULL;
291
+
292
+ Check_Type(data, T_STRING);
293
293
  char *ptr = RSTRING_PTR(data);
294
294
  long len = RSTRING_LEN(data);
295
295
 
@@ -301,7 +301,7 @@ VALUE Parser_execute(VALUE self, VALUE data) {
301
301
  if (wrapper->parser.upgrade) {
302
302
  // upgrade request
303
303
  } else if (nparsed != len) {
304
- if (!RTEST(wrapper->stopped))
304
+ if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
305
305
  rb_raise(eParserError, "Could not parse data entirely");
306
306
  else
307
307
  nparsed += 1; // error states fail on the current character
@@ -19,16 +19,6 @@
19
19
  * IN THE SOFTWARE.
20
20
  */
21
21
  #include <http_parser.h>
22
- #if defined(_WIN32) && !defined(__MINGW32__)
23
- typedef __int8 int8_t;
24
- typedef unsigned __int8 uint8_t;
25
- typedef __int16 int16_t;
26
- typedef unsigned __int16 uint16_t;
27
- typedef __int16 int32_t;
28
- typedef unsigned __int32 uint32_t;
29
- #else
30
- #include <stdint.h>
31
- #endif
32
22
  #include <assert.h>
33
23
  #include <stddef.h>
34
24
 
@@ -103,12 +93,21 @@ static const char *method_strings[] =
103
93
  , "MKACTIVITY"
104
94
  , "CHECKOUT"
105
95
  , "MERGE"
96
+ , "M-SEARCH"
97
+ , "NOTIFY"
98
+ , "SUBSCRIBE"
99
+ , "UNSUBSCRIBE"
106
100
  };
107
101
 
108
102
 
109
- /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110
- The 'A'-'Z' are lower-cased. */
111
- static const char acceptable_header[256] = {
103
+ /* Tokens as defined by rfc 2616. Also lowercases them.
104
+ * token = 1*<any CHAR except CTLs or separators>
105
+ * separators = "(" | ")" | "<" | ">" | "@"
106
+ * | "," | ";" | ":" | "\" | <">
107
+ * | "/" | "[" | "]" | "?" | "="
108
+ * | "{" | "}" | SP | HT
109
+ */
110
+ static const char tokens[256] = {
112
111
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113
112
  0, 0, 0, 0, 0, 0, 0, 0,
114
113
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
@@ -118,9 +117,9 @@ static const char acceptable_header[256] = {
118
117
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119
118
  0, 0, 0, 0, 0, 0, 0, 0,
120
119
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121
- ' ', 0, 0, 0, 0, 0, 0, 0,
120
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
122
121
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123
- 0, 0, 0, 0, 0, '-', 0, 0,
122
+ 0, 0, '*', '+', 0, '-', '.', '/',
124
123
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125
124
  '0', '1', '2', '3', '4', '5', '6', '7',
126
125
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -132,15 +131,15 @@ static const char acceptable_header[256] = {
132
131
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133
132
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
134
133
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135
- 'x', 'y', 'z', 0, 0, 0, 0, '_',
134
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
136
135
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
136
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
138
137
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139
138
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140
139
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141
140
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
142
141
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143
- 'x', 'y', 'z', 0, 0, 0, 0, 0 };
142
+ 'x', 'y', 'z', 0, '|', '}', '~', 0 };
144
143
 
145
144
 
146
145
  static const int8_t unhex[256] =
@@ -302,6 +301,7 @@ enum flags
302
301
  #define CR '\r'
303
302
  #define LF '\n'
304
303
  #define LOWER(c) (unsigned char)(c | 0x20)
304
+ #define TOKEN(c) tokens[(unsigned char)c]
305
305
 
306
306
 
307
307
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
@@ -579,12 +579,14 @@ size_t http_parser_execute (http_parser *parser,
579
579
  case 'G': parser->method = HTTP_GET; break;
580
580
  case 'H': parser->method = HTTP_HEAD; break;
581
581
  case 'L': parser->method = HTTP_LOCK; break;
582
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
582
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
583
+ case 'N': parser->method = HTTP_NOTIFY; break;
583
584
  case 'O': parser->method = HTTP_OPTIONS; break;
584
585
  case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
585
586
  case 'R': parser->method = HTTP_REPORT; break;
587
+ case 'S': parser->method = HTTP_SUBSCRIBE; break;
586
588
  case 'T': parser->method = HTTP_TRACE; break;
587
- case 'U': parser->method = HTTP_UNLOCK; break;
589
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
588
590
  default: goto error;
589
591
  }
590
592
  state = s_req_method;
@@ -612,6 +614,8 @@ size_t http_parser_execute (http_parser *parser,
612
614
  parser->method = HTTP_MOVE;
613
615
  } else if (index == 1 && ch == 'E') {
614
616
  parser->method = HTTP_MERGE;
617
+ } else if (index == 1 && ch == '-') {
618
+ parser->method = HTTP_MSEARCH;
615
619
  } else if (index == 2 && ch == 'A') {
616
620
  parser->method = HTTP_MKACTIVITY;
617
621
  }
@@ -619,6 +623,8 @@ size_t http_parser_execute (http_parser *parser,
619
623
  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
620
624
  } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
621
625
  parser->method = HTTP_PUT;
626
+ } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
627
+ parser->method = HTTP_UNSUBSCRIBE;
622
628
  } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
623
629
  parser->method = HTTP_PROPPATCH;
624
630
  } else {
@@ -632,7 +638,7 @@ size_t http_parser_execute (http_parser *parser,
632
638
  {
633
639
  if (ch == ' ') break;
634
640
 
635
- if (ch == '/') {
641
+ if (ch == '/' || ch == '*') {
636
642
  MARK(url);
637
643
  MARK(path);
638
644
  state = s_req_path;
@@ -662,6 +668,9 @@ size_t http_parser_execute (http_parser *parser,
662
668
  } else if (ch == '.') {
663
669
  state = s_req_host;
664
670
  break;
671
+ } else if ('0' <= ch && ch <= '9') {
672
+ state = s_req_host;
673
+ break;
665
674
  }
666
675
 
667
676
  goto error;
@@ -739,12 +748,14 @@ size_t http_parser_execute (http_parser *parser,
739
748
  case CR:
740
749
  CALLBACK(url);
741
750
  CALLBACK(path);
751
+ parser->http_major = 0;
742
752
  parser->http_minor = 9;
743
753
  state = s_req_line_almost_done;
744
754
  break;
745
755
  case LF:
746
756
  CALLBACK(url);
747
757
  CALLBACK(path);
758
+ parser->http_major = 0;
748
759
  parser->http_minor = 9;
749
760
  state = s_header_field_start;
750
761
  break;
@@ -779,11 +790,13 @@ size_t http_parser_execute (http_parser *parser,
779
790
  break;
780
791
  case CR:
781
792
  CALLBACK(url);
793
+ parser->http_major = 0;
782
794
  parser->http_minor = 9;
783
795
  state = s_req_line_almost_done;
784
796
  break;
785
797
  case LF:
786
798
  CALLBACK(url);
799
+ parser->http_major = 0;
787
800
  parser->http_minor = 9;
788
801
  state = s_header_field_start;
789
802
  break;
@@ -812,12 +825,14 @@ size_t http_parser_execute (http_parser *parser,
812
825
  case CR:
813
826
  CALLBACK(url);
814
827
  CALLBACK(query_string);
828
+ parser->http_major = 0;
815
829
  parser->http_minor = 9;
816
830
  state = s_req_line_almost_done;
817
831
  break;
818
832
  case LF:
819
833
  CALLBACK(url);
820
834
  CALLBACK(query_string);
835
+ parser->http_major = 0;
821
836
  parser->http_minor = 9;
822
837
  state = s_header_field_start;
823
838
  break;
@@ -846,11 +861,13 @@ size_t http_parser_execute (http_parser *parser,
846
861
  break;
847
862
  case CR:
848
863
  CALLBACK(url);
864
+ parser->http_major = 0;
849
865
  parser->http_minor = 9;
850
866
  state = s_req_line_almost_done;
851
867
  break;
852
868
  case LF:
853
869
  CALLBACK(url);
870
+ parser->http_major = 0;
854
871
  parser->http_minor = 9;
855
872
  state = s_header_field_start;
856
873
  break;
@@ -879,12 +896,14 @@ size_t http_parser_execute (http_parser *parser,
879
896
  case CR:
880
897
  CALLBACK(url);
881
898
  CALLBACK(fragment);
899
+ parser->http_major = 0;
882
900
  parser->http_minor = 9;
883
901
  state = s_req_line_almost_done;
884
902
  break;
885
903
  case LF:
886
904
  CALLBACK(url);
887
905
  CALLBACK(fragment);
906
+ parser->http_major = 0;
888
907
  parser->http_minor = 9;
889
908
  state = s_header_field_start;
890
909
  break;
@@ -1006,9 +1025,9 @@ size_t http_parser_execute (http_parser *parser,
1006
1025
  goto headers_almost_done;
1007
1026
  }
1008
1027
 
1009
- c = LOWER(ch);
1028
+ c = TOKEN(ch);
1010
1029
 
1011
- if (c < 'a' || 'z' < c) goto error;
1030
+ if (!c) goto error;
1012
1031
 
1013
1032
  MARK(header_field);
1014
1033
 
@@ -1041,7 +1060,7 @@ size_t http_parser_execute (http_parser *parser,
1041
1060
 
1042
1061
  case s_header_field:
1043
1062
  {
1044
- c = acceptable_header[(unsigned char)ch];
1063
+ c = TOKEN(ch);
1045
1064
 
1046
1065
  if (c) {
1047
1066
  switch (header_state) {
@@ -1177,23 +1196,18 @@ size_t http_parser_execute (http_parser *parser,
1177
1196
  state = s_header_value;
1178
1197
  index = 0;
1179
1198
 
1180
- c = acceptable_header[(unsigned char)ch];
1181
-
1182
- if (!c) {
1183
- if (ch == CR) {
1184
- CALLBACK(header_value);
1185
- header_state = h_general;
1186
- state = s_header_almost_done;
1187
- break;
1188
- }
1189
-
1190
- if (ch == LF) {
1191
- CALLBACK(header_value);
1192
- state = s_header_field_start;
1193
- break;
1194
- }
1199
+ c = LOWER(ch);
1195
1200
 
1201
+ if (ch == CR) {
1202
+ CALLBACK(header_value);
1196
1203
  header_state = h_general;
1204
+ state = s_header_almost_done;
1205
+ break;
1206
+ }
1207
+
1208
+ if (ch == LF) {
1209
+ CALLBACK(header_value);
1210
+ state = s_header_field_start;
1197
1211
  break;
1198
1212
  }
1199
1213
 
@@ -1238,22 +1252,19 @@ size_t http_parser_execute (http_parser *parser,
1238
1252
 
1239
1253
  case s_header_value:
1240
1254
  {
1241
- c = acceptable_header[(unsigned char)ch];
1242
-
1243
- if (!c) {
1244
- if (ch == CR) {
1245
- CALLBACK(header_value);
1246
- state = s_header_almost_done;
1247
- break;
1248
- }
1255
+ c = LOWER(ch);
1249
1256
 
1250
- if (ch == LF) {
1251
- CALLBACK(header_value);
1252
- goto header_almost_done;
1253
- }
1257
+ if (ch == CR) {
1258
+ CALLBACK(header_value);
1259
+ state = s_header_almost_done;
1254
1260
  break;
1255
1261
  }
1256
1262
 
1263
+ if (ch == LF) {
1264
+ CALLBACK(header_value);
1265
+ goto header_almost_done;
1266
+ }
1267
+
1257
1268
  switch (header_state) {
1258
1269
  case h_general:
1259
1270
  break;
@@ -1373,6 +1384,7 @@ size_t http_parser_execute (http_parser *parser,
1373
1384
  break;
1374
1385
 
1375
1386
  default:
1387
+ parser->state = state;
1376
1388
  return p - data; /* Error */
1377
1389
  }
1378
1390
  }
@@ -26,11 +26,20 @@ extern "C" {
26
26
 
27
27
 
28
28
  #include <sys/types.h>
29
- #include <stdint.h>
30
-
31
29
  #if defined(_WIN32) && !defined(__MINGW32__)
30
+ typedef __int8 int8_t;
31
+ typedef unsigned __int8 uint8_t;
32
+ typedef __int16 int16_t;
33
+ typedef unsigned __int16 uint16_t;
34
+ typedef __int32 int32_t;
35
+ typedef unsigned __int32 uint32_t;
36
+ typedef __int64 int64_t;
37
+ typedef unsigned __int64 uint64_t;
38
+
32
39
  typedef unsigned int size_t;
33
40
  typedef int ssize_t;
41
+ #else
42
+ #include <stdint.h>
34
43
  #endif
35
44
 
36
45
  /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
@@ -92,6 +101,11 @@ enum http_method
92
101
  , HTTP_MKACTIVITY
93
102
  , HTTP_CHECKOUT
94
103
  , HTTP_MERGE
104
+ /* upnp */
105
+ , HTTP_MSEARCH
106
+ , HTTP_NOTIFY
107
+ , HTTP_SUBSCRIBE
108
+ , HTTP_UNSUBSCRIBE
95
109
  };
96
110
 
97
111
 
@@ -31,7 +31,7 @@
31
31
  #undef FALSE
32
32
  #define FALSE 0
33
33
 
34
- #define MAX_HEADERS 10
34
+ #define MAX_HEADERS 13
35
35
  #define MAX_ELEMENT_SIZE 500
36
36
 
37
37
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
@@ -498,7 +498,7 @@ const struct message requests[] =
498
498
  #define CONNECT_REQUEST 17
499
499
  , {.name = "connect request"
500
500
  ,.type= HTTP_REQUEST
501
- ,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
501
+ ,.raw= "CONNECT home0.netscape.com:443 HTTP/1.0\r\n"
502
502
  "User-agent: Mozilla/1.1N\r\n"
503
503
  "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
504
504
  "\r\n"
@@ -510,7 +510,7 @@ const struct message requests[] =
510
510
  ,.query_string= ""
511
511
  ,.fragment= ""
512
512
  ,.request_path= ""
513
- ,.request_url= "home.netscape.com:443"
513
+ ,.request_url= "home0.netscape.com:443"
514
514
  ,.num_headers= 2
515
515
  ,.upgrade=1
516
516
  ,.headers= { { "User-agent", "Mozilla/1.1N" }
@@ -538,6 +538,50 @@ const struct message requests[] =
538
538
  ,.body= ""
539
539
  }
540
540
 
541
+ #define NO_HTTP_VERSION 19
542
+ , {.name= "request with no http version"
543
+ ,.type= HTTP_REQUEST
544
+ ,.raw= "GET /\r\n"
545
+ "\r\n"
546
+ ,.should_keep_alive= FALSE
547
+ ,.message_complete_on_eof= FALSE
548
+ ,.http_major= 0
549
+ ,.http_minor= 9
550
+ ,.method= HTTP_GET
551
+ ,.query_string= ""
552
+ ,.fragment= ""
553
+ ,.request_path= "/"
554
+ ,.request_url= "/"
555
+ ,.num_headers= 0
556
+ ,.headers= {}
557
+ ,.body= ""
558
+ }
559
+
560
+ #define MSEARCH_REQ 19
561
+ , {.name= "m-search request"
562
+ ,.type= HTTP_REQUEST
563
+ ,.raw= "M-SEARCH * HTTP/1.1\r\n"
564
+ "HOST: 239.255.255.250:1900\r\n"
565
+ "MAN: \"ssdp:discover\"\r\n"
566
+ "ST: \"ssdp:all\"\r\n"
567
+ "\r\n"
568
+ ,.should_keep_alive= TRUE
569
+ ,.message_complete_on_eof= FALSE
570
+ ,.http_major= 1
571
+ ,.http_minor= 1
572
+ ,.method= HTTP_MSEARCH
573
+ ,.query_string= ""
574
+ ,.fragment= ""
575
+ ,.request_path= "*"
576
+ ,.request_url= "*"
577
+ ,.num_headers= 3
578
+ ,.headers= { { "HOST", "239.255.255.250:1900" }
579
+ , { "MAN", "\"ssdp:discover\"" }
580
+ , { "ST", "\"ssdp:all\"" }
581
+ }
582
+ ,.body= ""
583
+ }
584
+
541
585
  , {.name= NULL } /* sentinel */
542
586
  };
543
587
 
@@ -551,9 +595,10 @@ const struct message responses[] =
551
595
  "Content-Type: text/html; charset=UTF-8\r\n"
552
596
  "Date: Sun, 26 Apr 2009 11:11:49 GMT\r\n"
553
597
  "Expires: Tue, 26 May 2009 11:11:49 GMT\r\n"
598
+ "X-$PrototypeBI-Version: 1.6.0.3\r\n" /* $ char in header field */
554
599
  "Cache-Control: public, max-age=2592000\r\n"
555
600
  "Server: gws\r\n"
556
- "Content-Length: 219\r\n"
601
+ "Content-Length: 219 \r\n"
557
602
  "\r\n"
558
603
  "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n"
559
604
  "<TITLE>301 Moved</TITLE></HEAD><BODY>\n"
@@ -566,15 +611,16 @@ const struct message responses[] =
566
611
  ,.http_major= 1
567
612
  ,.http_minor= 1
568
613
  ,.status_code= 301
569
- ,.num_headers= 7
614
+ ,.num_headers= 8
570
615
  ,.headers=
571
616
  { { "Location", "http://www.google.com/" }
572
617
  , { "Content-Type", "text/html; charset=UTF-8" }
573
618
  , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" }
574
619
  , { "Expires", "Tue, 26 May 2009 11:11:49 GMT" }
620
+ , { "X-$PrototypeBI-Version", "1.6.0.3" }
575
621
  , { "Cache-Control", "public, max-age=2592000" }
576
622
  , { "Server", "gws" }
577
- , { "Content-Length", "219" }
623
+ , { "Content-Length", "219 " }
578
624
  }
579
625
  ,.body= "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n"
580
626
  "<TITLE>301 Moved</TITLE></HEAD><BODY>\n"
@@ -833,6 +879,71 @@ const struct message responses[] =
833
879
  ,.body= "<xml>hello</xml>"
834
880
  }
835
881
 
882
+
883
+ #define RES_FIELD_UNDERSCORE 10
884
+ /* Should handle spaces in header fields */
885
+ , {.name= "field underscore"
886
+ ,.type= HTTP_RESPONSE
887
+ ,.raw= "HTTP/1.1 200 OK\r\n"
888
+ "Date: Tue, 28 Sep 2010 01:14:13 GMT\r\n"
889
+ "Server: Apache\r\n"
890
+ "Cache-Control: no-cache, must-revalidate\r\n"
891
+ "Expires: Mon, 26 Jul 1997 05:00:00 GMT\r\n"
892
+ ".et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\n"
893
+ "Vary: Accept-Encoding\r\n"
894
+ "_eep-Alive: timeout=45\r\n" /* semantic value ignored */
895
+ "_onnection: Keep-Alive\r\n" /* semantic value ignored */
896
+ "Transfer-Encoding: chunked\r\n"
897
+ "Content-Type: text/html\r\n"
898
+ "Connection: close\r\n"
899
+ "\r\n"
900
+ "0\r\n\r\n"
901
+ ,.should_keep_alive= FALSE
902
+ ,.message_complete_on_eof= FALSE
903
+ ,.http_major= 1
904
+ ,.http_minor= 1
905
+ ,.status_code= 200
906
+ ,.num_headers= 11
907
+ ,.headers=
908
+ { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" }
909
+ , { "Server", "Apache" }
910
+ , { "Cache-Control", "no-cache, must-revalidate" }
911
+ , { "Expires", "Mon, 26 Jul 1997 05:00:00 GMT" }
912
+ , { ".et-Cookie", "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com" }
913
+ , { "Vary", "Accept-Encoding" }
914
+ , { "_eep-Alive", "timeout=45" }
915
+ , { "_onnection", "Keep-Alive" }
916
+ , { "Transfer-Encoding", "chunked" }
917
+ , { "Content-Type", "text/html" }
918
+ , { "Connection", "close" }
919
+ }
920
+ ,.body= ""
921
+ }
922
+
923
+ #define NON_ASCII_IN_STATUS_LINE 11
924
+ /* Should handle non-ASCII in status line */
925
+ , {.name= "non-ASCII in status line"
926
+ ,.type= HTTP_RESPONSE
927
+ ,.raw= "HTTP/1.1 500 Oriëntatieprobleem\r\n"
928
+ "Date: Fri, 5 Nov 2010 23:07:12 GMT+2\r\n"
929
+ "Content-Length: 0\r\n"
930
+ "Connection: close\r\n"
931
+ "\r\n"
932
+ ,.should_keep_alive= FALSE
933
+ ,.message_complete_on_eof= FALSE
934
+ ,.http_major= 1
935
+ ,.http_minor= 1
936
+ ,.status_code= 500
937
+ ,.num_headers= 3
938
+ ,.headers=
939
+ { { "Date", "Fri, 5 Nov 2010 23:07:12 GMT+2" }
940
+ , { "Content-Length", "0" }
941
+ , { "Connection", "close" }
942
+ }
943
+ ,.body= ""
944
+ }
945
+
946
+
836
947
  , {.name= NULL } /* sentinel */
837
948
  };
838
949
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "http_parser.rb"
3
- s.version = "0.5.0"
3
+ s.version = "0.5.1"
4
4
  s.summary = "Simple callback-based HTTP request/response parser"
5
5
  s.description = "Ruby bindings to http://github.com/ry/http-parser and http://github.com/a2800276/http-parser.java"
6
6
 
@@ -12,4 +12,8 @@ Gem::Specification.new do |s|
12
12
 
13
13
  s.require_paths = ["lib"]
14
14
  s.extensions = ["ext/ruby_http_parser/extconf.rb"]
15
+
16
+ s.add_development_dependency 'rake-compiler', '>= 0.7.5'
17
+ s.add_development_dependency 'rspec', '>= 2.0.1'
18
+ s.add_development_dependency 'json', '>= 1.4.6'
15
19
  end
Binary file
Binary file
data/spec/parser_spec.rb CHANGED
@@ -115,6 +115,23 @@ describe HTTP::Parser do
115
115
  @done.should be_true
116
116
  end
117
117
 
118
+ it "should parse headers incrementally" do
119
+ request =
120
+ "GET / HTTP/1.0\r\n" +
121
+ "Header1: value 1\r\n" +
122
+ "Header2: value 2\r\n" +
123
+ "\r\n"
124
+
125
+ while chunk = request.slice!(0,2) and !chunk.empty?
126
+ @parser << chunk
127
+ end
128
+
129
+ @parser.headers.should == {
130
+ 'Header1' => 'value 1',
131
+ 'Header2' => 'value 2'
132
+ }
133
+ end
134
+
118
135
  it "should handle multiple headers" do
119
136
  @parser <<
120
137
  "GET / HTTP/1.0\r\n" +
@@ -143,6 +160,18 @@ describe HTTP::Parser do
143
160
  @done.should be_true
144
161
  end
145
162
 
163
+ it "should ignore extra content beyond specified length" do
164
+ @parser <<
165
+ "GET / HTTP/1.0\r\n" +
166
+ "Content-Length: 5\r\n" +
167
+ "\r\n" +
168
+ "hello" +
169
+ " \n"
170
+
171
+ @body.should == 'hello'
172
+ @done.should be_true
173
+ end
174
+
146
175
  %w[ request response ].each do |type|
147
176
  JSON.parse(File.read(File.expand_path("../support/#{type}s.json", __FILE__))).each do |test|
148
177
  test['headers'] ||= {}
data/spec/spec_helper.rb CHANGED
@@ -1,2 +1 @@
1
- require "rubygems"
2
1
  require "http_parser"
@@ -341,7 +341,7 @@
341
341
  {
342
342
  "name": "connect request",
343
343
  "type": "HTTP_REQUEST",
344
- "raw": "CONNECT home.netscape.com:443 HTTP/1.0\r\nUser-agent: Mozilla/1.1N\r\nProxy-authorization: basic aGVsbG86d29ybGQ=\r\n\r\n",
344
+ "raw": "CONNECT home0.netscape.com:443 HTTP/1.0\r\nUser-agent: Mozilla/1.1N\r\nProxy-authorization: basic aGVsbG86d29ybGQ=\r\n\r\n",
345
345
  "should_keep_alive": false,
346
346
  "message_complete_on_eof": false,
347
347
  "http_major": 1,
@@ -350,7 +350,7 @@
350
350
  "query_string": "",
351
351
  "fragment": "",
352
352
  "request_path": "",
353
- "request_url": "home.netscape.com:443",
353
+ "request_url": "home0.netscape.com:443",
354
354
  "num_headers": 2,
355
355
  "upgrade": 1,
356
356
  "headers": {
@@ -375,6 +375,25 @@
375
375
  "num_headers": 0,
376
376
  "headers": {
377
377
 
378
+ },
379
+ "body": ""
380
+ },
381
+ {
382
+ "name": "request with no http version",
383
+ "type": "HTTP_REQUEST",
384
+ "raw": "GET /\r\n\r\n",
385
+ "should_keep_alive": false,
386
+ "message_complete_on_eof": false,
387
+ "http_major": 0,
388
+ "http_minor": 9,
389
+ "method": "GET",
390
+ "query_string": "",
391
+ "fragment": "",
392
+ "request_path": "/",
393
+ "request_url": "/",
394
+ "num_headers": 0,
395
+ "headers": {
396
+
378
397
  },
379
398
  "body": ""
380
399
  }
@@ -2,21 +2,22 @@
2
2
  {
3
3
  "name": "google 301",
4
4
  "type": "HTTP_RESPONSE",
5
- "raw": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.google.com/\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Sun, 26 Apr 2009 11:11:49 GMT\r\nExpires: Tue, 26 May 2009 11:11:49 GMT\r\nCache-Control: public, max-age=2592000\r\nServer: gws\r\nContent-Length: 219\r\n\r\n<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n",
5
+ "raw": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.google.com/\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Sun, 26 Apr 2009 11:11:49 GMT\r\nExpires: Tue, 26 May 2009 11:11:49 GMT\r\nX-$PrototypeBI-Version: 1.6.0.3\r\nCache-Control: public, max-age=2592000\r\nServer: gws\r\nContent-Length: 219 \r\n\r\n<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n",
6
6
  "should_keep_alive": true,
7
7
  "message_complete_on_eof": false,
8
8
  "http_major": 1,
9
9
  "http_minor": 1,
10
10
  "status_code": 301,
11
- "num_headers": 7,
11
+ "num_headers": 8,
12
12
  "headers": {
13
13
  "Location": "http://www.google.com/",
14
14
  "Content-Type": "text/html; charset=UTF-8",
15
15
  "Date": "Sun, 26 Apr 2009 11:11:49 GMT",
16
16
  "Expires": "Tue, 26 May 2009 11:11:49 GMT",
17
+ "X-$PrototypeBI-Version": "1.6.0.3",
17
18
  "Cache-Control": "public, max-age=2592000",
18
19
  "Server": "gws",
19
- "Content-Length": "219"
20
+ "Content-Length": "219 "
20
21
  },
21
22
  "body": "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n"
22
23
  },
@@ -182,5 +183,47 @@
182
183
  "Connection": "keep-alive"
183
184
  },
184
185
  "body": "<xml>hello</xml>"
186
+ },
187
+ {
188
+ "name": "field underscore",
189
+ "type": "HTTP_RESPONSE",
190
+ "raw": "HTTP/1.1 200 OK\r\nDate: Tue, 28 Sep 2010 01:14:13 GMT\r\nServer: Apache\r\nCache-Control: no-cache, must-revalidate\r\nExpires: Mon, 26 Jul 1997 05:00:00 GMT\r\n.et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\nVary: Accept-Encoding\r\n_eep-Alive: timeout=45\r\n_onnection: Keep-Alive\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\nConnection: close\r\n\r\n0\r\n\r\n",
191
+ "should_keep_alive": false,
192
+ "message_complete_on_eof": false,
193
+ "http_major": 1,
194
+ "http_minor": 1,
195
+ "status_code": 200,
196
+ "num_headers": 11,
197
+ "headers": {
198
+ "Date": "Tue, 28 Sep 2010 01:14:13 GMT",
199
+ "Server": "Apache",
200
+ "Cache-Control": "no-cache, must-revalidate",
201
+ "Expires": "Mon, 26 Jul 1997 05:00:00 GMT",
202
+ ".et-Cookie": "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com",
203
+ "Vary": "Accept-Encoding",
204
+ "_eep-Alive": "timeout=45",
205
+ "_onnection": "Keep-Alive",
206
+ "Transfer-Encoding": "chunked",
207
+ "Content-Type": "text/html",
208
+ "Connection": "close"
209
+ },
210
+ "body": ""
211
+ },
212
+ {
213
+ "name": "non-ASCII in status line",
214
+ "type": "HTTP_RESPONSE",
215
+ "raw": "HTTP/1.1 500 Oriëntatieprobleem\r\nDate: Fri, 5 Nov 2010 23:07:12 GMT+2\r\nContent-Length: 0\r\nConnection: close\r\n\r\n",
216
+ "should_keep_alive": false,
217
+ "message_complete_on_eof": false,
218
+ "http_major": 1,
219
+ "http_minor": 1,
220
+ "status_code": 500,
221
+ "num_headers": 3,
222
+ "headers": {
223
+ "Date": "Fri, 5 Nov 2010 23:07:12 GMT+2",
224
+ "Content-Length": "0",
225
+ "Connection": "close"
226
+ },
227
+ "body": ""
185
228
  }
186
229
  ]
data/tasks/compile.rake CHANGED
@@ -32,6 +32,7 @@ RUBY_VERSION =~ /(\\d+.\\d+)/
32
32
  require "\#{$1}/ruby_http_parser"
33
33
  eoruby
34
34
  end
35
+ at_exit{ FileUtils.rm t.name if File.exists?(t.name) }
35
36
  end
36
37
 
37
38
  if Rake::Task.task_defined?(:cross)
@@ -0,0 +1,65 @@
1
+ desc "Generate test fixtures"
2
+ task :fixtures => :submodules do
3
+ require 'yajl'
4
+ data = File.read File.expand_path('../../ext/ruby_http_parser/vendor/http-parser/test.c', __FILE__)
5
+
6
+ %w[ requests responses ].each do |type|
7
+ # find test definitions in between requests/responses[]= and .name=NULL
8
+ tmp = data[/#{type}\[\]\s*=(.+?),\s*\{\s*\.name=\s*NULL/m, 1]
9
+
10
+ # replace first { with a [ (parsing an array of test cases)
11
+ tmp.sub!('{','[')
12
+
13
+ # replace booleans
14
+ tmp.gsub!('TRUE', 'true')
15
+ tmp.gsub!('FALSE', 'false')
16
+
17
+ # remove macros and comments
18
+ tmp.gsub!(/^#define.+$/,'')
19
+ tmp.gsub!(/\/\*(.+?)\*\/$/,'')
20
+
21
+ # HTTP_* enums become strings
22
+ tmp.gsub!(/(= )(HTTP_\w+)/){
23
+ "#{$1}#{$2.dump}"
24
+ }
25
+
26
+ # join multiline strings for body and raw data
27
+ tmp.gsub!(/((body|raw)\s*=)(.+?)(\n\s+[\},])/m){
28
+ before, after = $1, $4
29
+ raw = $3.split("\n").map{ |l| l.strip[1..-2] }.join('')
30
+ "#{before} \"#{raw}\" #{after}"
31
+ }
32
+
33
+ # make headers an array of array tuples
34
+ tmp.gsub!(/(\.headers\s*=)(.+?)(\s*,\.)/m){
35
+ before, after = $1, $3
36
+ raw = $2.gsub('{', '[').gsub('}', ']')
37
+ "#{before} #{raw} #{after}"
38
+ }
39
+
40
+ # .name= becomes "name":
41
+ tmp.gsub!(/^(.{2,5})\.(\w+)\s*=/){
42
+ "#{$1}#{$2.dump}: "
43
+ }
44
+
45
+ # evaluate addition expressions
46
+ tmp.gsub!(/(body_size\":\s*)(\d+)\+(\d+)/){
47
+ "#{$1}#{$2.to_i+$3.to_i}"
48
+ }
49
+
50
+ # end result array
51
+ tmp << ']'
52
+
53
+ # normalize data
54
+ results = Yajl.load(tmp, :symbolize_keys => true)
55
+ results.map{ |res|
56
+ res[:headers] and res[:headers] = Hash[*res[:headers].flatten]
57
+ res[:method] and res[:method].gsub!(/^HTTP_/, '')
58
+ }
59
+
60
+ # write to a file
61
+ File.open("spec/support/#{type}.json", 'w'){ |f|
62
+ f.write Yajl.dump(results, :pretty => true)
63
+ }
64
+ end
65
+ end
metadata CHANGED
@@ -1,12 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_parser.rb
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 5
8
- - 0
9
- version: 0.5.0
4
+ version: 0.5.1
10
5
  platform: x86-mingw32
11
6
  authors:
12
7
  - Marc-Andre Cournoyer
@@ -15,10 +10,39 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2010-10-28 00:00:00 -07:00
13
+ date: 2011-01-25 00:00:00 -08:00
19
14
  default_executable:
20
- dependencies: []
21
-
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rake-compiler
18
+ type: :development
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: 0.7.5
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ type: :development
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 2.0.1
35
+ version:
36
+ - !ruby/object:Gem::Dependency
37
+ name: json
38
+ type: :development
39
+ version_requirement:
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 1.4.6
45
+ version:
22
46
  description: Ruby bindings to http://github.com/ry/http-parser and http://github.com/a2800276/http-parser.java
23
47
  email:
24
48
  - macournoyer@gmail.com
@@ -32,6 +56,8 @@ extra_rdoc_files: []
32
56
  files:
33
57
  - .gitignore
34
58
  - .gitmodules
59
+ - Gemfile
60
+ - Gemfile.lock
35
61
  - README.md
36
62
  - Rakefile
37
63
  - bench/thin.rb
@@ -50,6 +76,7 @@ files:
50
76
  - spec/support/requests.json
51
77
  - spec/support/responses.json
52
78
  - tasks/compile.rake
79
+ - tasks/fixtures.rake
53
80
  - tasks/spec.rake
54
81
  - tasks/submodules.rake
55
82
  - ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS
@@ -103,20 +130,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
130
  requirements:
104
131
  - - ">="
105
132
  - !ruby/object:Gem::Version
106
- segments:
107
- - 0
108
133
  version: "0"
134
+ version:
109
135
  required_rubygems_version: !ruby/object:Gem::Requirement
110
136
  requirements:
111
137
  - - ">="
112
138
  - !ruby/object:Gem::Version
113
- segments:
114
- - 0
115
139
  version: "0"
140
+ version:
116
141
  requirements: []
117
142
 
118
143
  rubyforge_project:
119
- rubygems_version: 1.3.6
144
+ rubygems_version: 1.3.5
120
145
  signing_key:
121
146
  specification_version: 3
122
147
  summary: Simple callback-based HTTP request/response parser