http_parser.rb 0.5.0-java → 0.5.1-java

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
@@ -0,0 +1,32 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ http_parser.rb (0.5.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.2)
10
+ json (1.5.1)
11
+ json (1.5.1-java)
12
+ rake (0.8.7)
13
+ rake-compiler (0.7.5)
14
+ rake
15
+ rspec (2.4.0)
16
+ rspec-core (~> 2.4.0)
17
+ rspec-expectations (~> 2.4.0)
18
+ rspec-mocks (~> 2.4.0)
19
+ rspec-core (2.4.0)
20
+ rspec-expectations (2.4.0)
21
+ diff-lcs (~> 1.1.2)
22
+ rspec-mocks (2.4.0)
23
+
24
+ PLATFORMS
25
+ java
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ http_parser.rb!
30
+ json (>= 1.4.6)
31
+ rake-compiler (>= 0.7.5)
32
+ rspec (>= 2.0.1)
data/README.md CHANGED
@@ -20,14 +20,15 @@ This gem aims to work on all major Ruby platforms, including:
20
20
 
21
21
  parser = Http::Parser.new
22
22
 
23
- parser.on_headers_complete = proc do |headers|
24
- p parser.http_method
23
+ parser.on_headers_complete = proc do
25
24
  p parser.http_version
26
25
 
27
- p parser.request_url # for requests
26
+ p parser.http_method # for requests
27
+ p parser.request_url
28
+
28
29
  p parser.status_code # for responses
29
30
 
30
- p headers
31
+ p parser.headers
31
32
  end
32
33
 
33
34
  parser.on_body = proc do |chunk|
@@ -43,3 +44,42 @@ This gem aims to work on all major Ruby platforms, including:
43
44
  # Feed raw data from the socket to the parser
44
45
  parser << raw_data
45
46
 
47
+ ## Advanced Usage
48
+
49
+ ### Accept callbacks on an object
50
+
51
+ module MyHttpConnection
52
+ def connection_completed
53
+ @parser = Http::Parser.new(self)
54
+ end
55
+
56
+ def receive_data(data)
57
+ @parser << data
58
+ end
59
+
60
+ def on_message_begin
61
+ @headers = nil
62
+ @body = ''
63
+ end
64
+
65
+ def on_headers_complete
66
+ @headers = @parser.headers
67
+ end
68
+
69
+ def on_body(chunk)
70
+ @body << chunk
71
+ end
72
+
73
+ def on_message_complete
74
+ p [@headers, @body]
75
+ end
76
+ end
77
+
78
+ ### Stop parsing after headers
79
+
80
+ parser = Http::Parser.new
81
+ parser.on_headers_complete = proc{ :stop }
82
+
83
+ offset = parser << request_data
84
+ body = request_data[offset..-1]
85
+
@@ -1,5 +1,13 @@
1
1
  require 'mkmf'
2
2
 
3
+ # check out code if it hasn't been already
4
+ if Dir[File.expand_path('../vendor/http-parser/*', __FILE__)].empty?
5
+ Dir.chdir(File.expand_path('../../../', __FILE__)) do
6
+ xsystem 'git submodule init'
7
+ xsystem 'git submodule update'
8
+ end
9
+ end
10
+
3
11
  # mongrel and http-parser both define http_parser_(init|execute), so we
4
12
  # rename functions in http-parser before using them.
5
13
  vendor_dir = File.expand_path('../vendor/http-parser/', __FILE__)
@@ -120,7 +120,7 @@ public class RubyHttpParser extends RubyObject {
120
120
  if (_current_header == null)
121
121
  _current_header = new String(data);
122
122
  else
123
- _current_header.concat(new String(data));
123
+ _current_header = _current_header.concat(new String(data));
124
124
 
125
125
  return 0;
126
126
  }
@@ -30,10 +30,10 @@ typedef struct ParserWrapper {
30
30
 
31
31
  VALUE callback_object;
32
32
  VALUE stopped;
33
+ VALUE completed;
33
34
 
34
35
  VALUE last_field_name;
35
- const char *last_field_name_at;
36
- size_t last_field_name_length;
36
+ VALUE curr_field_name;
37
37
 
38
38
  enum ryah_http_parser_type type;
39
39
  } ParserWrapper;
@@ -50,10 +50,10 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
50
50
  wrapper->fragment = Qnil;
51
51
 
52
52
  wrapper->headers = Qnil;
53
+ wrapper->completed = Qfalse;
53
54
 
54
55
  wrapper->last_field_name = Qnil;
55
- wrapper->last_field_name_at = NULL;
56
- wrapper->last_field_name_length = 0;
56
+ wrapper->curr_field_name = Qnil;
57
57
  }
58
58
 
59
59
  void ParserWrapper_mark(void *data) {
@@ -70,6 +70,7 @@ void ParserWrapper_mark(void *data) {
70
70
  rb_gc_mark_maybe(wrapper->on_message_complete);
71
71
  rb_gc_mark_maybe(wrapper->callback_object);
72
72
  rb_gc_mark_maybe(wrapper->last_field_name);
73
+ rb_gc_mark_maybe(wrapper->curr_field_name);
73
74
  }
74
75
  }
75
76
 
@@ -147,13 +148,11 @@ int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
147
148
  int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
148
149
  GET_WRAPPER(wrapper, parser);
149
150
 
150
- wrapper->last_field_name = Qnil;
151
-
152
- if (wrapper->last_field_name_at == NULL) {
153
- wrapper->last_field_name_at = at;
154
- wrapper->last_field_name_length = length;
151
+ if (wrapper->curr_field_name == Qnil) {
152
+ wrapper->last_field_name = Qnil;
153
+ wrapper->curr_field_name = rb_str_new(at, length);
155
154
  } else {
156
- wrapper->last_field_name_length += length;
155
+ rb_str_cat(wrapper->curr_field_name, at, length);
157
156
  }
158
157
 
159
158
  return 0;
@@ -163,15 +162,13 @@ int on_header_value(ryah_http_parser *parser, const char *at, size_t length) {
163
162
  GET_WRAPPER(wrapper, parser);
164
163
 
165
164
  if (wrapper->last_field_name == Qnil) {
166
- wrapper->last_field_name = rb_str_new(wrapper->last_field_name_at, wrapper->last_field_name_length);
165
+ wrapper->last_field_name = wrapper->curr_field_name;
166
+ wrapper->curr_field_name = Qnil;
167
167
 
168
168
  VALUE val = rb_hash_aref(wrapper->headers, wrapper->last_field_name);
169
169
  if (val != Qnil) {
170
170
  rb_str_cat(val, ", ", 2);
171
171
  }
172
-
173
- wrapper->last_field_name_at = NULL;
174
- wrapper->last_field_name_length = 0;
175
172
  }
176
173
 
177
174
  HASH_CAT(wrapper->headers, wrapper->last_field_name, at, length);
@@ -221,6 +218,7 @@ int on_message_complete(ryah_http_parser *parser) {
221
218
  GET_WRAPPER(wrapper, parser);
222
219
 
223
220
  VALUE ret = Qnil;
221
+ wrapper->completed = Qtrue;
224
222
 
225
223
  if (wrapper->callback_object != Qnil && rb_respond_to(wrapper->callback_object, Ion_message_complete)) {
226
224
  ret = rb_funcall(wrapper->callback_object, Ion_message_complete, 0);
@@ -290,6 +288,8 @@ VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
290
288
 
291
289
  VALUE Parser_execute(VALUE self, VALUE data) {
292
290
  ParserWrapper *wrapper = NULL;
291
+
292
+ Check_Type(data, T_STRING);
293
293
  char *ptr = RSTRING_PTR(data);
294
294
  long len = RSTRING_LEN(data);
295
295
 
@@ -301,7 +301,7 @@ VALUE Parser_execute(VALUE self, VALUE data) {
301
301
  if (wrapper->parser.upgrade) {
302
302
  // upgrade request
303
303
  } else if (nparsed != len) {
304
- if (!RTEST(wrapper->stopped))
304
+ if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
305
305
  rb_raise(eParserError, "Could not parse data entirely");
306
306
  else
307
307
  nparsed += 1; // error states fail on the current character
@@ -19,16 +19,6 @@
19
19
  * IN THE SOFTWARE.
20
20
  */
21
21
  #include <http_parser.h>
22
- #if defined(_WIN32) && !defined(__MINGW32__)
23
- typedef __int8 int8_t;
24
- typedef unsigned __int8 uint8_t;
25
- typedef __int16 int16_t;
26
- typedef unsigned __int16 uint16_t;
27
- typedef __int16 int32_t;
28
- typedef unsigned __int32 uint32_t;
29
- #else
30
- #include <stdint.h>
31
- #endif
32
22
  #include <assert.h>
33
23
  #include <stddef.h>
34
24
 
@@ -103,12 +93,21 @@ static const char *method_strings[] =
103
93
  , "MKACTIVITY"
104
94
  , "CHECKOUT"
105
95
  , "MERGE"
96
+ , "M-SEARCH"
97
+ , "NOTIFY"
98
+ , "SUBSCRIBE"
99
+ , "UNSUBSCRIBE"
106
100
  };
107
101
 
108
102
 
109
- /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
110
- The 'A'-'Z' are lower-cased. */
111
- static const char acceptable_header[256] = {
103
+ /* Tokens as defined by rfc 2616. Also lowercases them.
104
+ * token = 1*<any CHAR except CTLs or separators>
105
+ * separators = "(" | ")" | "<" | ">" | "@"
106
+ * | "," | ";" | ":" | "\" | <">
107
+ * | "/" | "[" | "]" | "?" | "="
108
+ * | "{" | "}" | SP | HT
109
+ */
110
+ static const char tokens[256] = {
112
111
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
113
112
  0, 0, 0, 0, 0, 0, 0, 0,
114
113
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
@@ -118,9 +117,9 @@ static const char acceptable_header[256] = {
118
117
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
119
118
  0, 0, 0, 0, 0, 0, 0, 0,
120
119
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
121
- ' ', 0, 0, 0, 0, 0, 0, 0,
120
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
122
121
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
123
- 0, 0, 0, 0, 0, '-', 0, 0,
122
+ 0, 0, '*', '+', 0, '-', '.', '/',
124
123
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
125
124
  '0', '1', '2', '3', '4', '5', '6', '7',
126
125
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -132,15 +131,15 @@ static const char acceptable_header[256] = {
132
131
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
133
132
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
134
133
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
135
- 'x', 'y', 'z', 0, 0, 0, 0, '_',
134
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
136
135
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
137
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
136
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
138
137
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
139
138
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140
139
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
141
140
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
142
141
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
143
- 'x', 'y', 'z', 0, 0, 0, 0, 0 };
142
+ 'x', 'y', 'z', 0, '|', '}', '~', 0 };
144
143
 
145
144
 
146
145
  static const int8_t unhex[256] =
@@ -302,6 +301,7 @@ enum flags
302
301
  #define CR '\r'
303
302
  #define LF '\n'
304
303
  #define LOWER(c) (unsigned char)(c | 0x20)
304
+ #define TOKEN(c) tokens[(unsigned char)c]
305
305
 
306
306
 
307
307
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
@@ -579,12 +579,14 @@ size_t http_parser_execute (http_parser *parser,
579
579
  case 'G': parser->method = HTTP_GET; break;
580
580
  case 'H': parser->method = HTTP_HEAD; break;
581
581
  case 'L': parser->method = HTTP_LOCK; break;
582
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
582
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
583
+ case 'N': parser->method = HTTP_NOTIFY; break;
583
584
  case 'O': parser->method = HTTP_OPTIONS; break;
584
585
  case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
585
586
  case 'R': parser->method = HTTP_REPORT; break;
587
+ case 'S': parser->method = HTTP_SUBSCRIBE; break;
586
588
  case 'T': parser->method = HTTP_TRACE; break;
587
- case 'U': parser->method = HTTP_UNLOCK; break;
589
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
588
590
  default: goto error;
589
591
  }
590
592
  state = s_req_method;
@@ -612,6 +614,8 @@ size_t http_parser_execute (http_parser *parser,
612
614
  parser->method = HTTP_MOVE;
613
615
  } else if (index == 1 && ch == 'E') {
614
616
  parser->method = HTTP_MERGE;
617
+ } else if (index == 1 && ch == '-') {
618
+ parser->method = HTTP_MSEARCH;
615
619
  } else if (index == 2 && ch == 'A') {
616
620
  parser->method = HTTP_MKACTIVITY;
617
621
  }
@@ -619,6 +623,8 @@ size_t http_parser_execute (http_parser *parser,
619
623
  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
620
624
  } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
621
625
  parser->method = HTTP_PUT;
626
+ } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
627
+ parser->method = HTTP_UNSUBSCRIBE;
622
628
  } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
623
629
  parser->method = HTTP_PROPPATCH;
624
630
  } else {
@@ -632,7 +638,7 @@ size_t http_parser_execute (http_parser *parser,
632
638
  {
633
639
  if (ch == ' ') break;
634
640
 
635
- if (ch == '/') {
641
+ if (ch == '/' || ch == '*') {
636
642
  MARK(url);
637
643
  MARK(path);
638
644
  state = s_req_path;
@@ -662,6 +668,9 @@ size_t http_parser_execute (http_parser *parser,
662
668
  } else if (ch == '.') {
663
669
  state = s_req_host;
664
670
  break;
671
+ } else if ('0' <= ch && ch <= '9') {
672
+ state = s_req_host;
673
+ break;
665
674
  }
666
675
 
667
676
  goto error;
@@ -739,12 +748,14 @@ size_t http_parser_execute (http_parser *parser,
739
748
  case CR:
740
749
  CALLBACK(url);
741
750
  CALLBACK(path);
751
+ parser->http_major = 0;
742
752
  parser->http_minor = 9;
743
753
  state = s_req_line_almost_done;
744
754
  break;
745
755
  case LF:
746
756
  CALLBACK(url);
747
757
  CALLBACK(path);
758
+ parser->http_major = 0;
748
759
  parser->http_minor = 9;
749
760
  state = s_header_field_start;
750
761
  break;
@@ -779,11 +790,13 @@ size_t http_parser_execute (http_parser *parser,
779
790
  break;
780
791
  case CR:
781
792
  CALLBACK(url);
793
+ parser->http_major = 0;
782
794
  parser->http_minor = 9;
783
795
  state = s_req_line_almost_done;
784
796
  break;
785
797
  case LF:
786
798
  CALLBACK(url);
799
+ parser->http_major = 0;
787
800
  parser->http_minor = 9;
788
801
  state = s_header_field_start;
789
802
  break;
@@ -812,12 +825,14 @@ size_t http_parser_execute (http_parser *parser,
812
825
  case CR:
813
826
  CALLBACK(url);
814
827
  CALLBACK(query_string);
828
+ parser->http_major = 0;
815
829
  parser->http_minor = 9;
816
830
  state = s_req_line_almost_done;
817
831
  break;
818
832
  case LF:
819
833
  CALLBACK(url);
820
834
  CALLBACK(query_string);
835
+ parser->http_major = 0;
821
836
  parser->http_minor = 9;
822
837
  state = s_header_field_start;
823
838
  break;
@@ -846,11 +861,13 @@ size_t http_parser_execute (http_parser *parser,
846
861
  break;
847
862
  case CR:
848
863
  CALLBACK(url);
864
+ parser->http_major = 0;
849
865
  parser->http_minor = 9;
850
866
  state = s_req_line_almost_done;
851
867
  break;
852
868
  case LF:
853
869
  CALLBACK(url);
870
+ parser->http_major = 0;
854
871
  parser->http_minor = 9;
855
872
  state = s_header_field_start;
856
873
  break;
@@ -879,12 +896,14 @@ size_t http_parser_execute (http_parser *parser,
879
896
  case CR:
880
897
  CALLBACK(url);
881
898
  CALLBACK(fragment);
899
+ parser->http_major = 0;
882
900
  parser->http_minor = 9;
883
901
  state = s_req_line_almost_done;
884
902
  break;
885
903
  case LF:
886
904
  CALLBACK(url);
887
905
  CALLBACK(fragment);
906
+ parser->http_major = 0;
888
907
  parser->http_minor = 9;
889
908
  state = s_header_field_start;
890
909
  break;
@@ -1006,9 +1025,9 @@ size_t http_parser_execute (http_parser *parser,
1006
1025
  goto headers_almost_done;
1007
1026
  }
1008
1027
 
1009
- c = LOWER(ch);
1028
+ c = TOKEN(ch);
1010
1029
 
1011
- if (c < 'a' || 'z' < c) goto error;
1030
+ if (!c) goto error;
1012
1031
 
1013
1032
  MARK(header_field);
1014
1033
 
@@ -1041,7 +1060,7 @@ size_t http_parser_execute (http_parser *parser,
1041
1060
 
1042
1061
  case s_header_field:
1043
1062
  {
1044
- c = acceptable_header[(unsigned char)ch];
1063
+ c = TOKEN(ch);
1045
1064
 
1046
1065
  if (c) {
1047
1066
  switch (header_state) {
@@ -1177,23 +1196,18 @@ size_t http_parser_execute (http_parser *parser,
1177
1196
  state = s_header_value;
1178
1197
  index = 0;
1179
1198
 
1180
- c = acceptable_header[(unsigned char)ch];
1181
-
1182
- if (!c) {
1183
- if (ch == CR) {
1184
- CALLBACK(header_value);
1185
- header_state = h_general;
1186
- state = s_header_almost_done;
1187
- break;
1188
- }
1189
-
1190
- if (ch == LF) {
1191
- CALLBACK(header_value);
1192
- state = s_header_field_start;
1193
- break;
1194
- }
1199
+ c = LOWER(ch);
1195
1200
 
1201
+ if (ch == CR) {
1202
+ CALLBACK(header_value);
1196
1203
  header_state = h_general;
1204
+ state = s_header_almost_done;
1205
+ break;
1206
+ }
1207
+
1208
+ if (ch == LF) {
1209
+ CALLBACK(header_value);
1210
+ state = s_header_field_start;
1197
1211
  break;
1198
1212
  }
1199
1213
 
@@ -1238,22 +1252,19 @@ size_t http_parser_execute (http_parser *parser,
1238
1252
 
1239
1253
  case s_header_value:
1240
1254
  {
1241
- c = acceptable_header[(unsigned char)ch];
1242
-
1243
- if (!c) {
1244
- if (ch == CR) {
1245
- CALLBACK(header_value);
1246
- state = s_header_almost_done;
1247
- break;
1248
- }
1255
+ c = LOWER(ch);
1249
1256
 
1250
- if (ch == LF) {
1251
- CALLBACK(header_value);
1252
- goto header_almost_done;
1253
- }
1257
+ if (ch == CR) {
1258
+ CALLBACK(header_value);
1259
+ state = s_header_almost_done;
1254
1260
  break;
1255
1261
  }
1256
1262
 
1263
+ if (ch == LF) {
1264
+ CALLBACK(header_value);
1265
+ goto header_almost_done;
1266
+ }
1267
+
1257
1268
  switch (header_state) {
1258
1269
  case h_general:
1259
1270
  break;
@@ -1373,6 +1384,7 @@ size_t http_parser_execute (http_parser *parser,
1373
1384
  break;
1374
1385
 
1375
1386
  default:
1387
+ parser->state = state;
1376
1388
  return p - data; /* Error */
1377
1389
  }
1378
1390
  }
@@ -26,11 +26,20 @@ extern "C" {
26
26
 
27
27
 
28
28
  #include <sys/types.h>
29
- #include <stdint.h>
30
-
31
29
  #if defined(_WIN32) && !defined(__MINGW32__)
30
+ typedef __int8 int8_t;
31
+ typedef unsigned __int8 uint8_t;
32
+ typedef __int16 int16_t;
33
+ typedef unsigned __int16 uint16_t;
34
+ typedef __int32 int32_t;
35
+ typedef unsigned __int32 uint32_t;
36
+ typedef __int64 int64_t;
37
+ typedef unsigned __int64 uint64_t;
38
+
32
39
  typedef unsigned int size_t;
33
40
  typedef int ssize_t;
41
+ #else
42
+ #include <stdint.h>
34
43
  #endif
35
44
 
36
45
  /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
@@ -92,6 +101,11 @@ enum http_method
92
101
  , HTTP_MKACTIVITY
93
102
  , HTTP_CHECKOUT
94
103
  , HTTP_MERGE
104
+ /* upnp */
105
+ , HTTP_MSEARCH
106
+ , HTTP_NOTIFY
107
+ , HTTP_SUBSCRIBE
108
+ , HTTP_UNSUBSCRIBE
95
109
  };
96
110
 
97
111
 
@@ -31,7 +31,7 @@
31
31
  #undef FALSE
32
32
  #define FALSE 0
33
33
 
34
- #define MAX_HEADERS 10
34
+ #define MAX_HEADERS 13
35
35
  #define MAX_ELEMENT_SIZE 500
36
36
 
37
37
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
@@ -498,7 +498,7 @@ const struct message requests[] =
498
498
  #define CONNECT_REQUEST 17
499
499
  , {.name = "connect request"
500
500
  ,.type= HTTP_REQUEST
501
- ,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
501
+ ,.raw= "CONNECT home0.netscape.com:443 HTTP/1.0\r\n"
502
502
  "User-agent: Mozilla/1.1N\r\n"
503
503
  "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
504
504
  "\r\n"
@@ -510,7 +510,7 @@ const struct message requests[] =
510
510
  ,.query_string= ""
511
511
  ,.fragment= ""
512
512
  ,.request_path= ""
513
- ,.request_url= "home.netscape.com:443"
513
+ ,.request_url= "home0.netscape.com:443"
514
514
  ,.num_headers= 2
515
515
  ,.upgrade=1
516
516
  ,.headers= { { "User-agent", "Mozilla/1.1N" }
@@ -538,6 +538,50 @@ const struct message requests[] =
538
538
  ,.body= ""
539
539
  }
540
540
 
541
+ #define NO_HTTP_VERSION 19
542
+ , {.name= "request with no http version"
543
+ ,.type= HTTP_REQUEST
544
+ ,.raw= "GET /\r\n"
545
+ "\r\n"
546
+ ,.should_keep_alive= FALSE
547
+ ,.message_complete_on_eof= FALSE
548
+ ,.http_major= 0
549
+ ,.http_minor= 9
550
+ ,.method= HTTP_GET
551
+ ,.query_string= ""
552
+ ,.fragment= ""
553
+ ,.request_path= "/"
554
+ ,.request_url= "/"
555
+ ,.num_headers= 0
556
+ ,.headers= {}
557
+ ,.body= ""
558
+ }
559
+
560
+ #define MSEARCH_REQ 19
561
+ , {.name= "m-search request"
562
+ ,.type= HTTP_REQUEST
563
+ ,.raw= "M-SEARCH * HTTP/1.1\r\n"
564
+ "HOST: 239.255.255.250:1900\r\n"
565
+ "MAN: \"ssdp:discover\"\r\n"
566
+ "ST: \"ssdp:all\"\r\n"
567
+ "\r\n"
568
+ ,.should_keep_alive= TRUE
569
+ ,.message_complete_on_eof= FALSE
570
+ ,.http_major= 1
571
+ ,.http_minor= 1
572
+ ,.method= HTTP_MSEARCH
573
+ ,.query_string= ""
574
+ ,.fragment= ""
575
+ ,.request_path= "*"
576
+ ,.request_url= "*"
577
+ ,.num_headers= 3
578
+ ,.headers= { { "HOST", "239.255.255.250:1900" }
579
+ , { "MAN", "\"ssdp:discover\"" }
580
+ , { "ST", "\"ssdp:all\"" }
581
+ }
582
+ ,.body= ""
583
+ }
584
+
541
585
  , {.name= NULL } /* sentinel */
542
586
  };
543
587
 
@@ -551,9 +595,10 @@ const struct message responses[] =
551
595
  "Content-Type: text/html; charset=UTF-8\r\n"
552
596
  "Date: Sun, 26 Apr 2009 11:11:49 GMT\r\n"
553
597
  "Expires: Tue, 26 May 2009 11:11:49 GMT\r\n"
598
+ "X-$PrototypeBI-Version: 1.6.0.3\r\n" /* $ char in header field */
554
599
  "Cache-Control: public, max-age=2592000\r\n"
555
600
  "Server: gws\r\n"
556
- "Content-Length: 219\r\n"
601
+ "Content-Length: 219 \r\n"
557
602
  "\r\n"
558
603
  "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n"
559
604
  "<TITLE>301 Moved</TITLE></HEAD><BODY>\n"
@@ -566,15 +611,16 @@ const struct message responses[] =
566
611
  ,.http_major= 1
567
612
  ,.http_minor= 1
568
613
  ,.status_code= 301
569
- ,.num_headers= 7
614
+ ,.num_headers= 8
570
615
  ,.headers=
571
616
  { { "Location", "http://www.google.com/" }
572
617
  , { "Content-Type", "text/html; charset=UTF-8" }
573
618
  , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" }
574
619
  , { "Expires", "Tue, 26 May 2009 11:11:49 GMT" }
620
+ , { "X-$PrototypeBI-Version", "1.6.0.3" }
575
621
  , { "Cache-Control", "public, max-age=2592000" }
576
622
  , { "Server", "gws" }
577
- , { "Content-Length", "219" }
623
+ , { "Content-Length", "219 " }
578
624
  }
579
625
  ,.body= "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n"
580
626
  "<TITLE>301 Moved</TITLE></HEAD><BODY>\n"
@@ -833,6 +879,71 @@ const struct message responses[] =
833
879
  ,.body= "<xml>hello</xml>"
834
880
  }
835
881
 
882
+
883
+ #define RES_FIELD_UNDERSCORE 10
884
+ /* Should handle spaces in header fields */
885
+ , {.name= "field underscore"
886
+ ,.type= HTTP_RESPONSE
887
+ ,.raw= "HTTP/1.1 200 OK\r\n"
888
+ "Date: Tue, 28 Sep 2010 01:14:13 GMT\r\n"
889
+ "Server: Apache\r\n"
890
+ "Cache-Control: no-cache, must-revalidate\r\n"
891
+ "Expires: Mon, 26 Jul 1997 05:00:00 GMT\r\n"
892
+ ".et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\n"
893
+ "Vary: Accept-Encoding\r\n"
894
+ "_eep-Alive: timeout=45\r\n" /* semantic value ignored */
895
+ "_onnection: Keep-Alive\r\n" /* semantic value ignored */
896
+ "Transfer-Encoding: chunked\r\n"
897
+ "Content-Type: text/html\r\n"
898
+ "Connection: close\r\n"
899
+ "\r\n"
900
+ "0\r\n\r\n"
901
+ ,.should_keep_alive= FALSE
902
+ ,.message_complete_on_eof= FALSE
903
+ ,.http_major= 1
904
+ ,.http_minor= 1
905
+ ,.status_code= 200
906
+ ,.num_headers= 11
907
+ ,.headers=
908
+ { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" }
909
+ , { "Server", "Apache" }
910
+ , { "Cache-Control", "no-cache, must-revalidate" }
911
+ , { "Expires", "Mon, 26 Jul 1997 05:00:00 GMT" }
912
+ , { ".et-Cookie", "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com" }
913
+ , { "Vary", "Accept-Encoding" }
914
+ , { "_eep-Alive", "timeout=45" }
915
+ , { "_onnection", "Keep-Alive" }
916
+ , { "Transfer-Encoding", "chunked" }
917
+ , { "Content-Type", "text/html" }
918
+ , { "Connection", "close" }
919
+ }
920
+ ,.body= ""
921
+ }
922
+
923
+ #define NON_ASCII_IN_STATUS_LINE 11
924
+ /* Should handle non-ASCII in status line */
925
+ , {.name= "non-ASCII in status line"
926
+ ,.type= HTTP_RESPONSE
927
+ ,.raw= "HTTP/1.1 500 Oriëntatieprobleem\r\n"
928
+ "Date: Fri, 5 Nov 2010 23:07:12 GMT+2\r\n"
929
+ "Content-Length: 0\r\n"
930
+ "Connection: close\r\n"
931
+ "\r\n"
932
+ ,.should_keep_alive= FALSE
933
+ ,.message_complete_on_eof= FALSE
934
+ ,.http_major= 1
935
+ ,.http_minor= 1
936
+ ,.status_code= 500
937
+ ,.num_headers= 3
938
+ ,.headers=
939
+ { { "Date", "Fri, 5 Nov 2010 23:07:12 GMT+2" }
940
+ , { "Content-Length", "0" }
941
+ , { "Connection", "close" }
942
+ }
943
+ ,.body= ""
944
+ }
945
+
946
+
836
947
  , {.name= NULL } /* sentinel */
837
948
  };
838
949
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "http_parser.rb"
3
- s.version = "0.5.0"
3
+ s.version = "0.5.1"
4
4
  s.summary = "Simple callback-based HTTP request/response parser"
5
5
  s.description = "Ruby bindings to http://github.com/ry/http-parser and http://github.com/a2800276/http-parser.java"
6
6
 
@@ -12,4 +12,8 @@ Gem::Specification.new do |s|
12
12
 
13
13
  s.require_paths = ["lib"]
14
14
  s.extensions = ["ext/ruby_http_parser/extconf.rb"]
15
+
16
+ s.add_development_dependency 'rake-compiler', '>= 0.7.5'
17
+ s.add_development_dependency 'rspec', '>= 2.0.1'
18
+ s.add_development_dependency 'json', '>= 1.4.6'
15
19
  end
@@ -115,6 +115,23 @@ describe HTTP::Parser do
115
115
  @done.should be_true
116
116
  end
117
117
 
118
+ it "should parse headers incrementally" do
119
+ request =
120
+ "GET / HTTP/1.0\r\n" +
121
+ "Header1: value 1\r\n" +
122
+ "Header2: value 2\r\n" +
123
+ "\r\n"
124
+
125
+ while chunk = request.slice!(0,2) and !chunk.empty?
126
+ @parser << chunk
127
+ end
128
+
129
+ @parser.headers.should == {
130
+ 'Header1' => 'value 1',
131
+ 'Header2' => 'value 2'
132
+ }
133
+ end
134
+
118
135
  it "should handle multiple headers" do
119
136
  @parser <<
120
137
  "GET / HTTP/1.0\r\n" +
@@ -143,6 +160,18 @@ describe HTTP::Parser do
143
160
  @done.should be_true
144
161
  end
145
162
 
163
+ it "should ignore extra content beyond specified length" do
164
+ @parser <<
165
+ "GET / HTTP/1.0\r\n" +
166
+ "Content-Length: 5\r\n" +
167
+ "\r\n" +
168
+ "hello" +
169
+ " \n"
170
+
171
+ @body.should == 'hello'
172
+ @done.should be_true
173
+ end
174
+
146
175
  %w[ request response ].each do |type|
147
176
  JSON.parse(File.read(File.expand_path("../support/#{type}s.json", __FILE__))).each do |test|
148
177
  test['headers'] ||= {}
@@ -1,2 +1 @@
1
- require "rubygems"
2
1
  require "http_parser"
@@ -341,7 +341,7 @@
341
341
  {
342
342
  "name": "connect request",
343
343
  "type": "HTTP_REQUEST",
344
- "raw": "CONNECT home.netscape.com:443 HTTP/1.0\r\nUser-agent: Mozilla/1.1N\r\nProxy-authorization: basic aGVsbG86d29ybGQ=\r\n\r\n",
344
+ "raw": "CONNECT home0.netscape.com:443 HTTP/1.0\r\nUser-agent: Mozilla/1.1N\r\nProxy-authorization: basic aGVsbG86d29ybGQ=\r\n\r\n",
345
345
  "should_keep_alive": false,
346
346
  "message_complete_on_eof": false,
347
347
  "http_major": 1,
@@ -350,7 +350,7 @@
350
350
  "query_string": "",
351
351
  "fragment": "",
352
352
  "request_path": "",
353
- "request_url": "home.netscape.com:443",
353
+ "request_url": "home0.netscape.com:443",
354
354
  "num_headers": 2,
355
355
  "upgrade": 1,
356
356
  "headers": {
@@ -375,6 +375,25 @@
375
375
  "num_headers": 0,
376
376
  "headers": {
377
377
 
378
+ },
379
+ "body": ""
380
+ },
381
+ {
382
+ "name": "request with no http version",
383
+ "type": "HTTP_REQUEST",
384
+ "raw": "GET /\r\n\r\n",
385
+ "should_keep_alive": false,
386
+ "message_complete_on_eof": false,
387
+ "http_major": 0,
388
+ "http_minor": 9,
389
+ "method": "GET",
390
+ "query_string": "",
391
+ "fragment": "",
392
+ "request_path": "/",
393
+ "request_url": "/",
394
+ "num_headers": 0,
395
+ "headers": {
396
+
378
397
  },
379
398
  "body": ""
380
399
  }
@@ -2,21 +2,22 @@
2
2
  {
3
3
  "name": "google 301",
4
4
  "type": "HTTP_RESPONSE",
5
- "raw": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.google.com/\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Sun, 26 Apr 2009 11:11:49 GMT\r\nExpires: Tue, 26 May 2009 11:11:49 GMT\r\nCache-Control: public, max-age=2592000\r\nServer: gws\r\nContent-Length: 219\r\n\r\n<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n",
5
+ "raw": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.google.com/\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Sun, 26 Apr 2009 11:11:49 GMT\r\nExpires: Tue, 26 May 2009 11:11:49 GMT\r\nX-$PrototypeBI-Version: 1.6.0.3\r\nCache-Control: public, max-age=2592000\r\nServer: gws\r\nContent-Length: 219 \r\n\r\n<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n",
6
6
  "should_keep_alive": true,
7
7
  "message_complete_on_eof": false,
8
8
  "http_major": 1,
9
9
  "http_minor": 1,
10
10
  "status_code": 301,
11
- "num_headers": 7,
11
+ "num_headers": 8,
12
12
  "headers": {
13
13
  "Location": "http://www.google.com/",
14
14
  "Content-Type": "text/html; charset=UTF-8",
15
15
  "Date": "Sun, 26 Apr 2009 11:11:49 GMT",
16
16
  "Expires": "Tue, 26 May 2009 11:11:49 GMT",
17
+ "X-$PrototypeBI-Version": "1.6.0.3",
17
18
  "Cache-Control": "public, max-age=2592000",
18
19
  "Server": "gws",
19
- "Content-Length": "219"
20
+ "Content-Length": "219 "
20
21
  },
21
22
  "body": "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<TITLE>301 Moved</TITLE></HEAD><BODY>\n<H1>301 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.com/\">here</A>.\r\n</BODY></HTML>\r\n"
22
23
  },
@@ -182,5 +183,47 @@
182
183
  "Connection": "keep-alive"
183
184
  },
184
185
  "body": "<xml>hello</xml>"
186
+ },
187
+ {
188
+ "name": "field underscore",
189
+ "type": "HTTP_RESPONSE",
190
+ "raw": "HTTP/1.1 200 OK\r\nDate: Tue, 28 Sep 2010 01:14:13 GMT\r\nServer: Apache\r\nCache-Control: no-cache, must-revalidate\r\nExpires: Mon, 26 Jul 1997 05:00:00 GMT\r\n.et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\nVary: Accept-Encoding\r\n_eep-Alive: timeout=45\r\n_onnection: Keep-Alive\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\nConnection: close\r\n\r\n0\r\n\r\n",
191
+ "should_keep_alive": false,
192
+ "message_complete_on_eof": false,
193
+ "http_major": 1,
194
+ "http_minor": 1,
195
+ "status_code": 200,
196
+ "num_headers": 11,
197
+ "headers": {
198
+ "Date": "Tue, 28 Sep 2010 01:14:13 GMT",
199
+ "Server": "Apache",
200
+ "Cache-Control": "no-cache, must-revalidate",
201
+ "Expires": "Mon, 26 Jul 1997 05:00:00 GMT",
202
+ ".et-Cookie": "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com",
203
+ "Vary": "Accept-Encoding",
204
+ "_eep-Alive": "timeout=45",
205
+ "_onnection": "Keep-Alive",
206
+ "Transfer-Encoding": "chunked",
207
+ "Content-Type": "text/html",
208
+ "Connection": "close"
209
+ },
210
+ "body": ""
211
+ },
212
+ {
213
+ "name": "non-ASCII in status line",
214
+ "type": "HTTP_RESPONSE",
215
+ "raw": "HTTP/1.1 500 Oriëntatieprobleem\r\nDate: Fri, 5 Nov 2010 23:07:12 GMT+2\r\nContent-Length: 0\r\nConnection: close\r\n\r\n",
216
+ "should_keep_alive": false,
217
+ "message_complete_on_eof": false,
218
+ "http_major": 1,
219
+ "http_minor": 1,
220
+ "status_code": 500,
221
+ "num_headers": 3,
222
+ "headers": {
223
+ "Date": "Fri, 5 Nov 2010 23:07:12 GMT+2",
224
+ "Content-Length": "0",
225
+ "Connection": "close"
226
+ },
227
+ "body": ""
185
228
  }
186
229
  ]
@@ -32,6 +32,7 @@ RUBY_VERSION =~ /(\\d+.\\d+)/
32
32
  require "\#{$1}/ruby_http_parser"
33
33
  eoruby
34
34
  end
35
+ at_exit{ FileUtils.rm t.name if File.exists?(t.name) }
35
36
  end
36
37
 
37
38
  if Rake::Task.task_defined?(:cross)
@@ -0,0 +1,65 @@
1
+ desc "Generate test fixtures"
2
+ task :fixtures => :submodules do
3
+ require 'yajl'
4
+ data = File.read File.expand_path('../../ext/ruby_http_parser/vendor/http-parser/test.c', __FILE__)
5
+
6
+ %w[ requests responses ].each do |type|
7
+ # find test definitions in between requests/responses[]= and .name=NULL
8
+ tmp = data[/#{type}\[\]\s*=(.+?),\s*\{\s*\.name=\s*NULL/m, 1]
9
+
10
+ # replace first { with a [ (parsing an array of test cases)
11
+ tmp.sub!('{','[')
12
+
13
+ # replace booleans
14
+ tmp.gsub!('TRUE', 'true')
15
+ tmp.gsub!('FALSE', 'false')
16
+
17
+ # remove macros and comments
18
+ tmp.gsub!(/^#define.+$/,'')
19
+ tmp.gsub!(/\/\*(.+?)\*\/$/,'')
20
+
21
+ # HTTP_* enums become strings
22
+ tmp.gsub!(/(= )(HTTP_\w+)/){
23
+ "#{$1}#{$2.dump}"
24
+ }
25
+
26
+ # join multiline strings for body and raw data
27
+ tmp.gsub!(/((body|raw)\s*=)(.+?)(\n\s+[\},])/m){
28
+ before, after = $1, $4
29
+ raw = $3.split("\n").map{ |l| l.strip[1..-2] }.join('')
30
+ "#{before} \"#{raw}\" #{after}"
31
+ }
32
+
33
+ # make headers an array of array tuples
34
+ tmp.gsub!(/(\.headers\s*=)(.+?)(\s*,\.)/m){
35
+ before, after = $1, $3
36
+ raw = $2.gsub('{', '[').gsub('}', ']')
37
+ "#{before} #{raw} #{after}"
38
+ }
39
+
40
+ # .name= becomes "name":
41
+ tmp.gsub!(/^(.{2,5})\.(\w+)\s*=/){
42
+ "#{$1}#{$2.dump}: "
43
+ }
44
+
45
+ # evaluate addition expressions
46
+ tmp.gsub!(/(body_size\":\s*)(\d+)\+(\d+)/){
47
+ "#{$1}#{$2.to_i+$3.to_i}"
48
+ }
49
+
50
+ # end result array
51
+ tmp << ']'
52
+
53
+ # normalize data
54
+ results = Yajl.load(tmp, :symbolize_keys => true)
55
+ results.map{ |res|
56
+ res[:headers] and res[:headers] = Hash[*res[:headers].flatten]
57
+ res[:method] and res[:method].gsub!(/^HTTP_/, '')
58
+ }
59
+
60
+ # write to a file
61
+ File.open("spec/support/#{type}.json", 'w'){ |f|
62
+ f.write Yajl.dump(results, :pretty => true)
63
+ }
64
+ end
65
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 0
9
- version: 0.5.0
8
+ - 1
9
+ version: 0.5.1
10
10
  platform: java
11
11
  authors:
12
12
  - Marc-Andre Cournoyer
@@ -15,10 +15,51 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-10-28 00:00:00 -07:00
18
+ date: 2011-01-25 00:00:00 -08:00
19
19
  default_executable:
20
- dependencies: []
21
-
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rake-compiler
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 7
31
+ - 5
32
+ version: 0.7.5
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 2
44
+ - 0
45
+ - 1
46
+ version: 2.0.1
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: json
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ segments:
57
+ - 1
58
+ - 4
59
+ - 6
60
+ version: 1.4.6
61
+ type: :development
62
+ version_requirements: *id003
22
63
  description: Ruby bindings to http://github.com/ry/http-parser and http://github.com/a2800276/http-parser.java
23
64
  email:
24
65
  - macournoyer@gmail.com
@@ -32,6 +73,8 @@ extra_rdoc_files: []
32
73
  files:
33
74
  - .gitignore
34
75
  - .gitmodules
76
+ - Gemfile
77
+ - Gemfile.lock
35
78
  - README.md
36
79
  - Rakefile
37
80
  - bench/thin.rb
@@ -50,6 +93,7 @@ files:
50
93
  - spec/support/requests.json
51
94
  - spec/support/responses.json
52
95
  - tasks/compile.rake
96
+ - tasks/fixtures.rake
53
97
  - tasks/spec.rake
54
98
  - tasks/submodules.rake
55
99
  - ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS