hpricot 0.7 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,20 +19,26 @@ VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
19
19
  #define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
20
20
 
21
21
  static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
22
- sym_cdata, sym_text, sym_EMPTY, sym_CDATA;
22
+ sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
23
+ sym_allowed, sym_text, sym_children, sym_EMPTY, sym_CDATA;
23
24
  static VALUE mHpricot, rb_eHpricotParseError;
24
- static VALUE cBaseEle, cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cETag, cText,
25
+ static VALUE cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cText,
25
26
  cXMLDecl, cProcIns, symAllow, symDeny;
26
27
  static ID s_ElementContent;
27
28
  static ID s_downcase, s_new, s_parent, s_read, s_to_str;
28
- static ID iv_parent;
29
29
  static VALUE reProcInsParse;
30
30
 
31
- typedef struct {
32
- int name;
33
- VALUE tag, attr, etag, raw, EC;
34
- VALUE parent, children;
35
- } hpricot_ele;
31
+ #define H_ELE_TAG 0
32
+ #define H_ELE_PARENT 1
33
+ #define H_ELE_ATTR 2
34
+ #define H_ELE_ETAG 3
35
+ #define H_ELE_RAW 4
36
+ #define H_ELE_EC 5
37
+ #define H_ELE_HASH 6
38
+ #define H_ELE_CHILDREN 7
39
+
40
+ #define H_ELE_GET(ele, idx) RSTRUCT_PTR(ele)[idx]
41
+ #define H_ELE_SET(ele, idx, val) RSTRUCT_PTR(ele)[idx] = val
36
42
 
37
43
  #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
38
44
 
@@ -107,7 +113,7 @@ typedef struct {
107
113
  action tag { SET(tag, p); }
108
114
  action tagc { SET(tag, p-1); }
109
115
  action aval { SET(aval, p); }
110
- action aunq {
116
+ action aunq {
111
117
  if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); }
112
118
  else { SET(aval, p); }
113
119
  }
@@ -118,14 +124,14 @@ typedef struct {
118
124
  action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
119
125
  action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
120
126
 
121
- action new_attr {
127
+ action new_attr {
122
128
  akey = Qnil;
123
129
  aval = Qnil;
124
130
  mark_akey = NULL;
125
131
  mark_aval = NULL;
126
132
  }
127
133
 
128
- action save_attr {
134
+ action save_attr {
129
135
  ATTR(akey, aval);
130
136
  }
131
137
 
@@ -144,7 +150,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
144
150
  raw = tag;
145
151
  }
146
152
  ary = rb_ary_new3(4, sym, tag, attr, raw);
147
- if (taint) {
153
+ if (taint) {
148
154
  OBJ_TAINT(ary);
149
155
  OBJ_TAINT(tag);
150
156
  OBJ_TAINT(attr);
@@ -153,6 +159,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
153
159
  rb_yield(ary);
154
160
  }
155
161
 
162
+ #ifndef RHASH_TBL
156
163
  /* rb_hash_lookup() is only in Ruby 1.8.7 */
157
164
  static VALUE
158
165
  our_rb_hash_lookup(VALUE hash, VALUE key)
@@ -165,17 +172,17 @@ our_rb_hash_lookup(VALUE hash, VALUE key)
165
172
 
166
173
  return val;
167
174
  }
175
+ #define rb_hash_lookup our_rb_hash_lookup
176
+ #endif
168
177
 
169
178
  static void
170
179
  rb_hpricot_add(VALUE focus, VALUE ele)
171
180
  {
172
- hpricot_ele *he, *he2;
173
- Data_Get_Struct(focus, hpricot_ele, he);
174
- Data_Get_Struct(ele, hpricot_ele, he2);
175
- if (NIL_P(he->children))
176
- he->children = rb_ary_new();
177
- rb_ary_push(he->children, ele);
178
- he2->parent = focus;
181
+ VALUE children = H_ELE_GET(focus, H_ELE_CHILDREN);
182
+ if (NIL_P(children))
183
+ H_ELE_SET(focus, H_ELE_CHILDREN, (children = rb_ary_new2(1)));
184
+ rb_ary_push(children, ele);
185
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
179
186
  }
180
187
 
181
188
  typedef struct {
@@ -186,102 +193,70 @@ typedef struct {
186
193
  unsigned char xml, strict, fixup;
187
194
  } hpricot_state;
188
195
 
189
- static void
190
- hpricot_ele_mark(hpricot_ele *he)
191
- {
192
- rb_gc_mark(he->tag);
193
- rb_gc_mark(he->attr);
194
- rb_gc_mark(he->etag);
195
- rb_gc_mark(he->raw);
196
- rb_gc_mark(he->parent);
197
- rb_gc_mark(he->children);
198
- }
199
-
200
- static void
201
- hpricot_ele_free(hpricot_ele *he)
202
- {
203
- free(he);
204
- }
205
-
206
- #define H_PROP(prop) \
196
+ #define H_PROP(prop, idx) \
207
197
  static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
208
- hpricot_ele *he; \
209
- Data_Get_Struct(self, hpricot_ele, he); \
210
- he->prop = x; \
198
+ H_ELE_SET(self, idx, x); \
211
199
  return self; \
212
200
  } \
201
+ static VALUE hpricot_ele_clear_##prop(VALUE self) { \
202
+ H_ELE_SET(self, idx, Qnil); \
203
+ return Qtrue; \
204
+ } \
213
205
  static VALUE hpricot_ele_get_##prop(VALUE self) { \
214
- hpricot_ele *he; \
215
- Data_Get_Struct(self, hpricot_ele, he); \
216
- return he->prop; \
206
+ return H_ELE_GET(self, idx); \
217
207
  }
218
208
 
219
209
  #define H_ATTR(prop) \
220
210
  static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
221
- hpricot_ele *he; \
222
- Data_Get_Struct(self, hpricot_ele, he); \
223
- rb_hash_aset(he->attr, ID2SYM(rb_intern("" # prop)), x); \
211
+ rb_hash_aset(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop)), x); \
224
212
  return self; \
225
213
  } \
226
214
  static VALUE hpricot_ele_get_##prop(VALUE self) { \
227
- hpricot_ele *he; \
228
- Data_Get_Struct(self, hpricot_ele, he); \
229
- return rb_hash_aref(he->attr, ID2SYM(rb_intern("" # prop))); \
215
+ return rb_hash_aref(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop))); \
230
216
  }
231
217
 
232
- H_PROP(tag);
233
- H_PROP(attr);
234
- H_PROP(etag);
235
- H_PROP(parent);
236
- H_PROP(children);
218
+ H_PROP(name, H_ELE_TAG);
219
+ H_PROP(raw, H_ELE_RAW);
220
+ H_PROP(parent, H_ELE_PARENT);
221
+ H_PROP(attr, H_ELE_ATTR);
222
+ H_PROP(etag, H_ELE_ETAG);
223
+ H_PROP(children, H_ELE_CHILDREN);
224
+ H_ATTR(target);
237
225
  H_ATTR(encoding);
238
226
  H_ATTR(version);
239
227
  H_ATTR(standalone);
240
228
  H_ATTR(system_id);
241
229
  H_ATTR(public_id);
242
230
 
243
- static VALUE
244
- hpricot_ele_get_raw(VALUE self, VALUE x) {
245
- hpricot_ele *he;
246
- Data_Get_Struct(self, hpricot_ele, he);
247
- return he->raw;
248
- }
249
-
250
- static VALUE
251
- hpricot_ele_clear_raw(VALUE self)
252
- {
253
- hpricot_ele *he;
254
- Data_Get_Struct(self, hpricot_ele, he);
255
- he->raw = Qnil;
256
- return Qtrue;
257
- }
258
-
259
231
  #define H_ELE(klass) \
260
- hpricot_ele *he = ALLOC(hpricot_ele); \
261
- he->name = 0; \
262
- he->tag = tag; \
263
- he->attr = attr; \
264
- he->raw = Qnil; \
265
- he->EC = ec; \
266
- he->etag = he->parent = he->children = Qnil; \
267
- if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_etag || sym == sym_doctype)) { \
268
- he->raw = rb_str_new(raw, rawlen); \
232
+ ele = rb_obj_alloc(klass); \
233
+ if (klass == cElem) { \
234
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
235
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
236
+ H_ELE_SET(ele, H_ELE_EC, ec); \
237
+ if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
238
+ H_ELE_SET(ele, H_ELE_RAW, rb_str_new(raw, rawlen)); \
239
+ } \
240
+ } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
241
+ if (klass == cBogusETag) { \
242
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
243
+ if (raw != NULL) \
244
+ H_ELE_SET(ele, H_ELE_ATTR, rb_str_new(raw, rawlen)); \
245
+ } else { \
246
+ if (klass == cDocType) \
247
+ ATTR(ID2SYM(rb_intern("target")), tag); \
248
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
249
+ if (klass != cProcIns) { \
250
+ tag = Qnil; \
251
+ if (raw != NULL) tag = rb_str_new(raw, rawlen); \
252
+ } \
253
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
254
+ } \
255
+ } else { \
256
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
269
257
  } \
270
- ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he); \
271
258
  S->last = ele
272
259
 
273
- VALUE
274
- hpricot_ele_alloc(VALUE klass)
275
- {
276
- VALUE ele;
277
- hpricot_ele *he = ALLOC(hpricot_ele);
278
- he->name = 0;
279
- he->tag = he->attr = he->raw = he->EC = Qnil;
280
- he->etag = he->parent = he->children = Qnil;
281
- ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he);
282
- return ele;
283
- }
284
-
285
260
  //
286
261
  // the swift, compact parser logic. most of the complicated stuff is done
287
262
  // in the lexer. this step just pairs up the start and end tags.
@@ -295,22 +270,23 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
295
270
  // in html mode, fix up start tags incorrectly formed as empty tags
296
271
  //
297
272
  if (!S->xml) {
298
- hpricot_ele *last;
299
- Data_Get_Struct(S->focus, hpricot_ele, last);
300
- if (last->EC == sym_CDATA &&
301
- (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
302
- !(sym == sym_etag && rb_str_hash(tag) == last->name))
303
- {
304
- sym = sym_text;
305
- tag = rb_str_new(raw, rawlen);
306
- }
307
-
308
273
  if (sym == sym_emptytag || sym == sym_stag || sym == sym_etag) {
309
274
  ec = rb_hash_aref(S->EC, tag);
310
275
  if (NIL_P(ec)) {
311
276
  tag = rb_funcall(tag, s_downcase, 0);
312
277
  ec = rb_hash_aref(S->EC, tag);
313
278
  }
279
+ }
280
+
281
+ if (H_ELE_GET(S->focus, H_ELE_EC) == sym_CDATA &&
282
+ (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
283
+ !(sym == sym_etag && INT2NUM(rb_str_hash(tag)) == H_ELE_GET(S->focus, H_ELE_HASH)))
284
+ {
285
+ sym = sym_text;
286
+ tag = rb_str_new(raw, rawlen);
287
+ }
288
+
289
+ if (!NIL_P(ec)) {
314
290
  if (sym == sym_emptytag) {
315
291
  if (ec != sym_EMPTY)
316
292
  sym = sym_stag;
@@ -322,19 +298,19 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
322
298
  }
323
299
 
324
300
  if (sym == sym_emptytag || sym == sym_stag) {
301
+ VALUE name = INT2NUM(rb_str_hash(tag));
325
302
  H_ELE(cElem);
326
- he->name = rb_str_hash(tag);
303
+ H_ELE_SET(ele, H_ELE_HASH, name);
327
304
 
328
305
  if (!S->xml) {
329
306
  VALUE match = Qnil, e = S->focus;
330
307
  while (e != S->doc)
331
308
  {
332
- hpricot_ele *hee;
333
- Data_Get_Struct(e, hpricot_ele, hee);
309
+ VALUE hEC = H_ELE_GET(e, H_ELE_EC);
334
310
 
335
- if (TYPE(hee->EC) == T_HASH)
311
+ if (TYPE(hEC) == T_HASH)
336
312
  {
337
- VALUE has = our_rb_hash_lookup(hee->EC, INT2NUM(he->name));
313
+ VALUE has = rb_hash_lookup(hEC, name);
338
314
  if (has != Qnil) {
339
315
  if (has == Qtrue) {
340
316
  if (match == Qnil)
@@ -347,7 +323,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
347
323
  }
348
324
  }
349
325
 
350
- e = hee->parent;
326
+ e = H_ELE_GET(e, H_ELE_PARENT);
351
327
  }
352
328
 
353
329
  if (match == Qnil)
@@ -369,8 +345,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
369
345
  }
370
346
  }
371
347
  } else if (sym == sym_etag) {
372
- int name;
373
- VALUE match = Qnil, e = S->focus;
348
+ VALUE name, match = Qnil, e = S->focus;
374
349
  if (S->strict) {
375
350
  if (NIL_P(rb_hash_aref(S->EC, tag))) {
376
351
  tag = rb_str_new2("div");
@@ -383,19 +358,16 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
383
358
  //
384
359
  // (see also: the search above for fixups)
385
360
  //
386
- name = rb_str_hash(tag);
361
+ name = INT2NUM(rb_str_hash(tag));
387
362
  while (e != S->doc)
388
363
  {
389
- hpricot_ele *he;
390
- Data_Get_Struct(e, hpricot_ele, he);
391
-
392
- if (he->name == name)
364
+ if (H_ELE_GET(e, H_ELE_HASH) == name)
393
365
  {
394
366
  match = e;
395
367
  break;
396
368
  }
397
369
 
398
- e = he->parent;
370
+ e = H_ELE_GET(e, H_ELE_PARENT);
399
371
  }
400
372
 
401
373
  if (NIL_P(match))
@@ -405,10 +377,11 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
405
377
  }
406
378
  else
407
379
  {
408
- H_ELE(cETag);
409
- Data_Get_Struct(match, hpricot_ele, he);
410
- he->etag = ele;
411
- S->focus = he->parent;
380
+ VALUE ele = Qnil;
381
+ if (raw != NULL)
382
+ ele = rb_str_new(raw, rawlen);
383
+ H_ELE_SET(match, H_ELE_ETAG, ele);
384
+ S->focus = H_ELE_GET(match, H_ELE_PARENT);
412
385
  S->last = Qnil;
413
386
  }
414
387
  } else if (sym == sym_cdata) {
@@ -429,15 +402,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
429
402
  tag = rb_reg_nth_match(1, match);
430
403
  attr = rb_reg_nth_match(2, match);
431
404
  {
432
- H_ELE(cProcIns);
433
- rb_hpricot_add(S->focus, ele);
405
+ H_ELE(cProcIns);
406
+ rb_hpricot_add(S->focus, ele);
434
407
  }
435
408
  } else if (sym == sym_text) {
436
409
  // TODO: add raw_string as well?
437
410
  if (!NIL_P(S->last) && RBASIC(S->last)->klass == cText) {
438
- hpricot_ele *he;
439
- Data_Get_Struct(S->last, hpricot_ele, he);
440
- rb_str_append(he->tag, tag);
411
+ rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
441
412
  } else {
442
413
  H_ELE(cText);
443
414
  rb_hpricot_add(S->focus, ele);
@@ -480,11 +451,8 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
480
451
 
481
452
  if (!rb_block_given_p())
482
453
  {
483
- hpricot_ele *he = ALLOC(hpricot_ele);
484
454
  S = ALLOC(hpricot_state);
485
- MEMZERO(he, hpricot_ele, 1);
486
- he->tag = he->attr = he->etag = he->parent = he->children = Qnil;
487
- S->doc = Data_Wrap_Struct(cDoc, hpricot_ele_mark, hpricot_ele_free, he);
455
+ S->doc = rb_obj_alloc(cDoc);
488
456
  rb_gc_register_address(&S->doc);
489
457
  S->focus = S->doc;
490
458
  S->last = Qnil;
@@ -509,7 +477,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
509
477
  buf = ALLOC_N(char, buffer_size);
510
478
 
511
479
  %% write init;
512
-
480
+
513
481
  while (!done) {
514
482
  VALUE str;
515
483
  char *p, *pe;
@@ -561,7 +529,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
561
529
 
562
530
  pe = p + len;
563
531
  %% write exec;
564
-
532
+
565
533
  if (cs == hpricot_scan_error) {
566
534
  if (buf != NULL)
567
535
  free(buf);
@@ -574,7 +542,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
574
542
  rb_raise(rb_eHpricotParseError, "parse error on line %d.\n" NO_WAY_SERIOUSLY, curline);
575
543
  }
576
544
  }
577
-
545
+
578
546
  if (done && ele_open)
579
547
  {
580
548
  ele_open = 0;
@@ -635,66 +603,103 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
635
603
  return Qnil;
636
604
  }
637
605
 
638
- void Init_hpricot_scan()
606
+ static VALUE
607
+ alloc_hpricot_struct(VALUE klass)
639
608
  {
640
- mHpricot = rb_define_module("Hpricot");
641
- rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1);
642
- rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1);
643
- rb_define_singleton_method(mHpricot, "css", hpricot_css, 3);
644
- rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError);
609
+ VALUE size;
610
+ long n;
611
+ NEWOBJ(st, struct RStruct);
612
+ OBJSETUP(st, klass, T_STRUCT);
613
+
614
+ size = rb_struct_iv_get(klass, "__size__");
615
+ n = FIX2LONG(size);
616
+
617
+ #ifndef RSTRUCT_EMBED_LEN_MAX
618
+ st->ptr = ALLOC_N(VALUE, n);
619
+ rb_mem_clear(st->ptr, n);
620
+ st->len = n;
621
+ #else
622
+ if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
623
+ RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
624
+ RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
625
+ rb_mem_clear(st->as.ary, n);
626
+ } else {
627
+ st->as.heap.ptr = ALLOC_N(VALUE, n);
628
+ rb_mem_clear(st->as.heap.ptr, n);
629
+ st->as.heap.len = n;
630
+ }
631
+ #endif
645
632
 
646
- cDoc = rb_define_class_under(mHpricot, "Doc", rb_cObject);
647
- rb_define_alloc_func(cDoc, hpricot_ele_alloc);
648
- rb_define_method(cDoc, "children", hpricot_ele_get_children, 0);
649
- rb_define_method(cDoc, "children=", hpricot_ele_set_children, 1);
650
-
651
- cBaseEle = rb_define_class_under(mHpricot, "BaseEle", rb_cObject);
652
- rb_define_alloc_func(cBaseEle, hpricot_ele_alloc);
653
- rb_define_method(cBaseEle, "raw_string", hpricot_ele_get_raw, 0);
654
- rb_define_method(cBaseEle, "clear_raw", hpricot_ele_clear_raw, 0);
655
- rb_define_method(cBaseEle, "parent", hpricot_ele_get_parent, 0);
656
- rb_define_method(cBaseEle, "parent=", hpricot_ele_set_parent, 1);
657
- cCData = rb_define_class_under(mHpricot, "CData", cBaseEle);
658
- rb_define_method(cCData, "content", hpricot_ele_get_tag, 0);
659
- rb_define_method(cCData, "content=", hpricot_ele_set_tag, 1);
660
- cComment = rb_define_class_under(mHpricot, "Comment", cBaseEle);
661
- rb_define_method(cComment, "content", hpricot_ele_get_tag, 0);
662
- rb_define_method(cComment, "content=", hpricot_ele_set_tag, 1);
663
- cDocType = rb_define_class_under(mHpricot, "DocType", cBaseEle);
664
- rb_define_method(cDocType, "target", hpricot_ele_get_tag, 0);
665
- rb_define_method(cDocType, "target=", hpricot_ele_set_tag, 1);
666
- rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0);
667
- rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1);
668
- rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0);
669
- rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1);
670
- cElem = rb_define_class_under(mHpricot, "Elem", cBaseEle);
671
- rb_define_method(cElem, "raw_attributes", hpricot_ele_get_attr, 0);
672
- rb_define_method(cElem, "raw_attributes=", hpricot_ele_set_attr, 1);
673
- rb_define_method(cElem, "children", hpricot_ele_get_children, 0);
674
- rb_define_method(cElem, "children=", hpricot_ele_set_children, 1);
675
- rb_define_method(cElem, "etag", hpricot_ele_get_etag, 0);
676
- rb_define_method(cElem, "etag=", hpricot_ele_set_etag, 1);
677
- rb_define_method(cElem, "name", hpricot_ele_get_tag, 0);
678
- rb_define_method(cElem, "name=", hpricot_ele_set_tag, 1);
679
- cETag = rb_define_class_under(mHpricot, "ETag", cBaseEle);
680
- rb_define_method(cETag, "name", hpricot_ele_get_tag, 0);
681
- rb_define_method(cETag, "name=", hpricot_ele_set_tag, 1);
682
- cBogusETag = rb_define_class_under(mHpricot, "BogusETag", cETag);
683
- cText = rb_define_class_under(mHpricot, "Text", cBaseEle);
684
- rb_define_method(cText, "content", hpricot_ele_get_tag, 0);
685
- rb_define_method(cText, "content=", hpricot_ele_set_tag, 1);
686
- cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", cBaseEle);
687
- rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0);
688
- rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1);
689
- rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0);
690
- rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1);
691
- rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0);
692
- rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1);
693
- cProcIns = rb_define_class_under(mHpricot, "ProcIns", cBaseEle);
694
- rb_define_method(cProcIns, "target", hpricot_ele_get_tag, 0);
695
- rb_define_method(cProcIns, "target=", hpricot_ele_set_tag, 1);
696
- rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0);
697
- rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1);
633
+ return (VALUE)st;
634
+ }
635
+
636
+ static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
637
+ static VALUE hpricot_struct_ref1(VALUE obj) {return H_ELE_GET(obj, 1);}
638
+ static VALUE hpricot_struct_ref2(VALUE obj) {return H_ELE_GET(obj, 2);}
639
+ static VALUE hpricot_struct_ref3(VALUE obj) {return H_ELE_GET(obj, 3);}
640
+ static VALUE hpricot_struct_ref4(VALUE obj) {return H_ELE_GET(obj, 4);}
641
+ static VALUE hpricot_struct_ref5(VALUE obj) {return H_ELE_GET(obj, 5);}
642
+ static VALUE hpricot_struct_ref6(VALUE obj) {return H_ELE_GET(obj, 6);}
643
+ static VALUE hpricot_struct_ref7(VALUE obj) {return H_ELE_GET(obj, 7);}
644
+ static VALUE hpricot_struct_ref8(VALUE obj) {return H_ELE_GET(obj, 8);}
645
+ static VALUE hpricot_struct_ref9(VALUE obj) {return H_ELE_GET(obj, 9);}
646
+
647
+ static VALUE (*ref_func[10])() = {
648
+ hpricot_struct_ref0,
649
+ hpricot_struct_ref1,
650
+ hpricot_struct_ref2,
651
+ hpricot_struct_ref3,
652
+ hpricot_struct_ref4,
653
+ hpricot_struct_ref5,
654
+ hpricot_struct_ref6,
655
+ hpricot_struct_ref7,
656
+ hpricot_struct_ref8,
657
+ hpricot_struct_ref9,
658
+ };
659
+
660
+ static VALUE hpricot_struct_set0(VALUE obj, VALUE val) {return H_ELE_SET(obj, 0, val);}
661
+ static VALUE hpricot_struct_set1(VALUE obj, VALUE val) {return H_ELE_SET(obj, 1, val);}
662
+ static VALUE hpricot_struct_set2(VALUE obj, VALUE val) {return H_ELE_SET(obj, 2, val);}
663
+ static VALUE hpricot_struct_set3(VALUE obj, VALUE val) {return H_ELE_SET(obj, 3, val);}
664
+ static VALUE hpricot_struct_set4(VALUE obj, VALUE val) {return H_ELE_SET(obj, 4, val);}
665
+ static VALUE hpricot_struct_set5(VALUE obj, VALUE val) {return H_ELE_SET(obj, 5, val);}
666
+ static VALUE hpricot_struct_set6(VALUE obj, VALUE val) {return H_ELE_SET(obj, 6, val);}
667
+ static VALUE hpricot_struct_set7(VALUE obj, VALUE val) {return H_ELE_SET(obj, 7, val);}
668
+ static VALUE hpricot_struct_set8(VALUE obj, VALUE val) {return H_ELE_SET(obj, 8, val);}
669
+ static VALUE hpricot_struct_set9(VALUE obj, VALUE val) {return H_ELE_SET(obj, 9, val);}
670
+
671
+ static VALUE (*set_func[10])() = {
672
+ hpricot_struct_set0,
673
+ hpricot_struct_set1,
674
+ hpricot_struct_set2,
675
+ hpricot_struct_set3,
676
+ hpricot_struct_set4,
677
+ hpricot_struct_set5,
678
+ hpricot_struct_set6,
679
+ hpricot_struct_set7,
680
+ hpricot_struct_set8,
681
+ hpricot_struct_set9,
682
+ };
683
+
684
+ static VALUE
685
+ make_hpricot_struct(VALUE members)
686
+ {
687
+ int i = 0;
688
+ VALUE klass = rb_class_new(rb_cObject);
689
+ rb_iv_set(klass, "__size__", INT2NUM(RARRAY_LEN(members)));
690
+ rb_define_alloc_func(klass, alloc_hpricot_struct);
691
+ rb_define_singleton_method(klass, "new", rb_class_new_instance, -1);
692
+ for (i = 0; i < RARRAY_LEN(members); i++) {
693
+ ID id = SYM2ID(RARRAY_PTR(members)[i]);
694
+ rb_define_method_id(klass, id, ref_func[i], 0);
695
+ rb_define_method_id(klass, rb_id_attrset(id), set_func[i], 1);
696
+ }
697
+ return klass;
698
+ }
699
+
700
+ void Init_hpricot_scan()
701
+ {
702
+ VALUE structElem, structAttr, structBasic;
698
703
 
699
704
  s_ElementContent = rb_intern("ElementContent");
700
705
  symAllow = ID2SYM(rb_intern("allow"));
@@ -704,19 +709,78 @@ void Init_hpricot_scan()
704
709
  s_parent = rb_intern("parent");
705
710
  s_read = rb_intern("read");
706
711
  s_to_str = rb_intern("to_str");
707
- iv_parent = rb_intern("parent");
708
712
  sym_xmldecl = ID2SYM(rb_intern("xmldecl"));
709
713
  sym_doctype = ID2SYM(rb_intern("doctype"));
710
714
  sym_procins = ID2SYM(rb_intern("procins"));
711
715
  sym_stag = ID2SYM(rb_intern("stag"));
712
716
  sym_etag = ID2SYM(rb_intern("etag"));
713
717
  sym_emptytag = ID2SYM(rb_intern("emptytag"));
718
+ sym_allowed = ID2SYM(rb_intern("allowed"));
719
+ sym_children = ID2SYM(rb_intern("children"));
714
720
  sym_comment = ID2SYM(rb_intern("comment"));
715
721
  sym_cdata = ID2SYM(rb_intern("cdata"));
722
+ sym_name = ID2SYM(rb_intern("name"));
723
+ sym_parent = ID2SYM(rb_intern("parent"));
724
+ sym_raw_attributes = ID2SYM(rb_intern("raw_attributes"));
725
+ sym_raw_string = ID2SYM(rb_intern("raw_string"));
726
+ sym_tagno = ID2SYM(rb_intern("tagno"));
716
727
  sym_text = ID2SYM(rb_intern("text"));
717
728
  sym_EMPTY = ID2SYM(rb_intern("EMPTY"));
718
729
  sym_CDATA = ID2SYM(rb_intern("CDATA"));
719
730
 
731
+ mHpricot = rb_define_module("Hpricot");
732
+ rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1);
733
+ rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1);
734
+ rb_define_singleton_method(mHpricot, "css", hpricot_css, 3);
735
+ rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError);
736
+
737
+ structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
738
+ sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
739
+ sym_tagno, sym_children));
740
+ structAttr = make_hpricot_struct(rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes));
741
+ structBasic = make_hpricot_struct(rb_ary_new3(2, sym_name, sym_parent));
742
+
743
+ cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
744
+ cCData = rb_define_class_under(mHpricot, "CData", structBasic);
745
+ rb_define_method(cCData, "content", hpricot_ele_get_name, 0);
746
+ rb_define_method(cCData, "content=", hpricot_ele_set_name, 1);
747
+ cComment = rb_define_class_under(mHpricot, "Comment", structBasic);
748
+ rb_define_method(cComment, "content", hpricot_ele_get_name, 0);
749
+ rb_define_method(cComment, "content=", hpricot_ele_set_name, 1);
750
+ cDocType = rb_define_class_under(mHpricot, "DocType", structAttr);
751
+ rb_define_method(cDocType, "raw_string", hpricot_ele_get_name, 0);
752
+ rb_define_method(cDocType, "clear_raw", hpricot_ele_clear_name, 0);
753
+ rb_define_method(cDocType, "target", hpricot_ele_get_target, 0);
754
+ rb_define_method(cDocType, "target=", hpricot_ele_set_target, 1);
755
+ rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0);
756
+ rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1);
757
+ rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0);
758
+ rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1);
759
+ cElem = rb_define_class_under(mHpricot, "Elem", structElem);
760
+ rb_define_method(cElem, "clear_raw", hpricot_ele_clear_raw, 0);
761
+ cBogusETag = rb_define_class_under(mHpricot, "BogusETag", structAttr);
762
+ rb_define_method(cBogusETag, "raw_string", hpricot_ele_get_attr, 0);
763
+ rb_define_method(cBogusETag, "clear_raw", hpricot_ele_clear_attr, 0);
764
+ cText = rb_define_class_under(mHpricot, "Text", structBasic);
765
+ rb_define_method(cText, "raw_string", hpricot_ele_get_name, 0);
766
+ rb_define_method(cText, "clear_raw", hpricot_ele_clear_name, 0);
767
+ rb_define_method(cText, "content", hpricot_ele_get_name, 0);
768
+ rb_define_method(cText, "content=", hpricot_ele_set_name, 1);
769
+ cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", structAttr);
770
+ rb_define_method(cXMLDecl, "raw_string", hpricot_ele_get_name, 0);
771
+ rb_define_method(cXMLDecl, "clear_raw", hpricot_ele_clear_name, 0);
772
+ rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0);
773
+ rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1);
774
+ rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0);
775
+ rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1);
776
+ rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0);
777
+ rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1);
778
+ cProcIns = rb_define_class_under(mHpricot, "ProcIns", structAttr);
779
+ rb_define_method(cProcIns, "target", hpricot_ele_get_name, 0);
780
+ rb_define_method(cProcIns, "target=", hpricot_ele_set_name, 1);
781
+ rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0);
782
+ rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1);
783
+
720
784
  rb_const_set(mHpricot, rb_intern("ProcInsParse"),
721
785
  reProcInsParse = rb_eval_string("/\\A<\\?(\\S+)\\s+(.+)/m"));
722
786
  }