hpricot 0.7-x86-mswin32 → 0.8-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,20 +19,26 @@ VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
19
19
  #define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
20
20
 
21
21
  static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
22
- sym_cdata, sym_text, sym_EMPTY, sym_CDATA;
22
+ sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
23
+ sym_allowed, sym_text, sym_children, sym_EMPTY, sym_CDATA;
23
24
  static VALUE mHpricot, rb_eHpricotParseError;
24
- static VALUE cBaseEle, cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cETag, cText,
25
+ static VALUE cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cText,
25
26
  cXMLDecl, cProcIns, symAllow, symDeny;
26
27
  static ID s_ElementContent;
27
28
  static ID s_downcase, s_new, s_parent, s_read, s_to_str;
28
- static ID iv_parent;
29
29
  static VALUE reProcInsParse;
30
30
 
31
- typedef struct {
32
- int name;
33
- VALUE tag, attr, etag, raw, EC;
34
- VALUE parent, children;
35
- } hpricot_ele;
31
+ #define H_ELE_TAG 0
32
+ #define H_ELE_PARENT 1
33
+ #define H_ELE_ATTR 2
34
+ #define H_ELE_ETAG 3
35
+ #define H_ELE_RAW 4
36
+ #define H_ELE_EC 5
37
+ #define H_ELE_HASH 6
38
+ #define H_ELE_CHILDREN 7
39
+
40
+ #define H_ELE_GET(ele, idx) RSTRUCT_PTR(ele)[idx]
41
+ #define H_ELE_SET(ele, idx, val) RSTRUCT_PTR(ele)[idx] = val
36
42
 
37
43
  #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
38
44
 
@@ -107,7 +113,7 @@ typedef struct {
107
113
  action tag { SET(tag, p); }
108
114
  action tagc { SET(tag, p-1); }
109
115
  action aval { SET(aval, p); }
110
- action aunq {
116
+ action aunq {
111
117
  if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); }
112
118
  else { SET(aval, p); }
113
119
  }
@@ -118,14 +124,14 @@ typedef struct {
118
124
  action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
119
125
  action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
120
126
 
121
- action new_attr {
127
+ action new_attr {
122
128
  akey = Qnil;
123
129
  aval = Qnil;
124
130
  mark_akey = NULL;
125
131
  mark_aval = NULL;
126
132
  }
127
133
 
128
- action save_attr {
134
+ action save_attr {
129
135
  ATTR(akey, aval);
130
136
  }
131
137
 
@@ -144,7 +150,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
144
150
  raw = tag;
145
151
  }
146
152
  ary = rb_ary_new3(4, sym, tag, attr, raw);
147
- if (taint) {
153
+ if (taint) {
148
154
  OBJ_TAINT(ary);
149
155
  OBJ_TAINT(tag);
150
156
  OBJ_TAINT(attr);
@@ -153,6 +159,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
153
159
  rb_yield(ary);
154
160
  }
155
161
 
162
+ #ifndef RHASH_TBL
156
163
  /* rb_hash_lookup() is only in Ruby 1.8.7 */
157
164
  static VALUE
158
165
  our_rb_hash_lookup(VALUE hash, VALUE key)
@@ -165,17 +172,17 @@ our_rb_hash_lookup(VALUE hash, VALUE key)
165
172
 
166
173
  return val;
167
174
  }
175
+ #define rb_hash_lookup our_rb_hash_lookup
176
+ #endif
168
177
 
169
178
  static void
170
179
  rb_hpricot_add(VALUE focus, VALUE ele)
171
180
  {
172
- hpricot_ele *he, *he2;
173
- Data_Get_Struct(focus, hpricot_ele, he);
174
- Data_Get_Struct(ele, hpricot_ele, he2);
175
- if (NIL_P(he->children))
176
- he->children = rb_ary_new();
177
- rb_ary_push(he->children, ele);
178
- he2->parent = focus;
181
+ VALUE children = H_ELE_GET(focus, H_ELE_CHILDREN);
182
+ if (NIL_P(children))
183
+ H_ELE_SET(focus, H_ELE_CHILDREN, (children = rb_ary_new2(1)));
184
+ rb_ary_push(children, ele);
185
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
179
186
  }
180
187
 
181
188
  typedef struct {
@@ -186,102 +193,70 @@ typedef struct {
186
193
  unsigned char xml, strict, fixup;
187
194
  } hpricot_state;
188
195
 
189
- static void
190
- hpricot_ele_mark(hpricot_ele *he)
191
- {
192
- rb_gc_mark(he->tag);
193
- rb_gc_mark(he->attr);
194
- rb_gc_mark(he->etag);
195
- rb_gc_mark(he->raw);
196
- rb_gc_mark(he->parent);
197
- rb_gc_mark(he->children);
198
- }
199
-
200
- static void
201
- hpricot_ele_free(hpricot_ele *he)
202
- {
203
- free(he);
204
- }
205
-
206
- #define H_PROP(prop) \
196
+ #define H_PROP(prop, idx) \
207
197
  static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
208
- hpricot_ele *he; \
209
- Data_Get_Struct(self, hpricot_ele, he); \
210
- he->prop = x; \
198
+ H_ELE_SET(self, idx, x); \
211
199
  return self; \
212
200
  } \
201
+ static VALUE hpricot_ele_clear_##prop(VALUE self) { \
202
+ H_ELE_SET(self, idx, Qnil); \
203
+ return Qtrue; \
204
+ } \
213
205
  static VALUE hpricot_ele_get_##prop(VALUE self) { \
214
- hpricot_ele *he; \
215
- Data_Get_Struct(self, hpricot_ele, he); \
216
- return he->prop; \
206
+ return H_ELE_GET(self, idx); \
217
207
  }
218
208
 
219
209
  #define H_ATTR(prop) \
220
210
  static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
221
- hpricot_ele *he; \
222
- Data_Get_Struct(self, hpricot_ele, he); \
223
- rb_hash_aset(he->attr, ID2SYM(rb_intern("" # prop)), x); \
211
+ rb_hash_aset(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop)), x); \
224
212
  return self; \
225
213
  } \
226
214
  static VALUE hpricot_ele_get_##prop(VALUE self) { \
227
- hpricot_ele *he; \
228
- Data_Get_Struct(self, hpricot_ele, he); \
229
- return rb_hash_aref(he->attr, ID2SYM(rb_intern("" # prop))); \
215
+ return rb_hash_aref(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop))); \
230
216
  }
231
217
 
232
- H_PROP(tag);
233
- H_PROP(attr);
234
- H_PROP(etag);
235
- H_PROP(parent);
236
- H_PROP(children);
218
+ H_PROP(name, H_ELE_TAG);
219
+ H_PROP(raw, H_ELE_RAW);
220
+ H_PROP(parent, H_ELE_PARENT);
221
+ H_PROP(attr, H_ELE_ATTR);
222
+ H_PROP(etag, H_ELE_ETAG);
223
+ H_PROP(children, H_ELE_CHILDREN);
224
+ H_ATTR(target);
237
225
  H_ATTR(encoding);
238
226
  H_ATTR(version);
239
227
  H_ATTR(standalone);
240
228
  H_ATTR(system_id);
241
229
  H_ATTR(public_id);
242
230
 
243
- static VALUE
244
- hpricot_ele_get_raw(VALUE self, VALUE x) {
245
- hpricot_ele *he;
246
- Data_Get_Struct(self, hpricot_ele, he);
247
- return he->raw;
248
- }
249
-
250
- static VALUE
251
- hpricot_ele_clear_raw(VALUE self)
252
- {
253
- hpricot_ele *he;
254
- Data_Get_Struct(self, hpricot_ele, he);
255
- he->raw = Qnil;
256
- return Qtrue;
257
- }
258
-
259
231
  #define H_ELE(klass) \
260
- hpricot_ele *he = ALLOC(hpricot_ele); \
261
- he->name = 0; \
262
- he->tag = tag; \
263
- he->attr = attr; \
264
- he->raw = Qnil; \
265
- he->EC = ec; \
266
- he->etag = he->parent = he->children = Qnil; \
267
- if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_etag || sym == sym_doctype)) { \
268
- he->raw = rb_str_new(raw, rawlen); \
232
+ ele = rb_obj_alloc(klass); \
233
+ if (klass == cElem) { \
234
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
235
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
236
+ H_ELE_SET(ele, H_ELE_EC, ec); \
237
+ if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
238
+ H_ELE_SET(ele, H_ELE_RAW, rb_str_new(raw, rawlen)); \
239
+ } \
240
+ } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
241
+ if (klass == cBogusETag) { \
242
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
243
+ if (raw != NULL) \
244
+ H_ELE_SET(ele, H_ELE_ATTR, rb_str_new(raw, rawlen)); \
245
+ } else { \
246
+ if (klass == cDocType) \
247
+ ATTR(ID2SYM(rb_intern("target")), tag); \
248
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
249
+ if (klass != cProcIns) { \
250
+ tag = Qnil; \
251
+ if (raw != NULL) tag = rb_str_new(raw, rawlen); \
252
+ } \
253
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
254
+ } \
255
+ } else { \
256
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
269
257
  } \
270
- ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he); \
271
258
  S->last = ele
272
259
 
273
- VALUE
274
- hpricot_ele_alloc(VALUE klass)
275
- {
276
- VALUE ele;
277
- hpricot_ele *he = ALLOC(hpricot_ele);
278
- he->name = 0;
279
- he->tag = he->attr = he->raw = he->EC = Qnil;
280
- he->etag = he->parent = he->children = Qnil;
281
- ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he);
282
- return ele;
283
- }
284
-
285
260
  //
286
261
  // the swift, compact parser logic. most of the complicated stuff is done
287
262
  // in the lexer. this step just pairs up the start and end tags.
@@ -295,22 +270,23 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
295
270
  // in html mode, fix up start tags incorrectly formed as empty tags
296
271
  //
297
272
  if (!S->xml) {
298
- hpricot_ele *last;
299
- Data_Get_Struct(S->focus, hpricot_ele, last);
300
- if (last->EC == sym_CDATA &&
301
- (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
302
- !(sym == sym_etag && rb_str_hash(tag) == last->name))
303
- {
304
- sym = sym_text;
305
- tag = rb_str_new(raw, rawlen);
306
- }
307
-
308
273
  if (sym == sym_emptytag || sym == sym_stag || sym == sym_etag) {
309
274
  ec = rb_hash_aref(S->EC, tag);
310
275
  if (NIL_P(ec)) {
311
276
  tag = rb_funcall(tag, s_downcase, 0);
312
277
  ec = rb_hash_aref(S->EC, tag);
313
278
  }
279
+ }
280
+
281
+ if (H_ELE_GET(S->focus, H_ELE_EC) == sym_CDATA &&
282
+ (sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
283
+ !(sym == sym_etag && INT2NUM(rb_str_hash(tag)) == H_ELE_GET(S->focus, H_ELE_HASH)))
284
+ {
285
+ sym = sym_text;
286
+ tag = rb_str_new(raw, rawlen);
287
+ }
288
+
289
+ if (!NIL_P(ec)) {
314
290
  if (sym == sym_emptytag) {
315
291
  if (ec != sym_EMPTY)
316
292
  sym = sym_stag;
@@ -322,19 +298,19 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
322
298
  }
323
299
 
324
300
  if (sym == sym_emptytag || sym == sym_stag) {
301
+ VALUE name = INT2NUM(rb_str_hash(tag));
325
302
  H_ELE(cElem);
326
- he->name = rb_str_hash(tag);
303
+ H_ELE_SET(ele, H_ELE_HASH, name);
327
304
 
328
305
  if (!S->xml) {
329
306
  VALUE match = Qnil, e = S->focus;
330
307
  while (e != S->doc)
331
308
  {
332
- hpricot_ele *hee;
333
- Data_Get_Struct(e, hpricot_ele, hee);
309
+ VALUE hEC = H_ELE_GET(e, H_ELE_EC);
334
310
 
335
- if (TYPE(hee->EC) == T_HASH)
311
+ if (TYPE(hEC) == T_HASH)
336
312
  {
337
- VALUE has = our_rb_hash_lookup(hee->EC, INT2NUM(he->name));
313
+ VALUE has = rb_hash_lookup(hEC, name);
338
314
  if (has != Qnil) {
339
315
  if (has == Qtrue) {
340
316
  if (match == Qnil)
@@ -347,7 +323,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
347
323
  }
348
324
  }
349
325
 
350
- e = hee->parent;
326
+ e = H_ELE_GET(e, H_ELE_PARENT);
351
327
  }
352
328
 
353
329
  if (match == Qnil)
@@ -369,8 +345,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
369
345
  }
370
346
  }
371
347
  } else if (sym == sym_etag) {
372
- int name;
373
- VALUE match = Qnil, e = S->focus;
348
+ VALUE name, match = Qnil, e = S->focus;
374
349
  if (S->strict) {
375
350
  if (NIL_P(rb_hash_aref(S->EC, tag))) {
376
351
  tag = rb_str_new2("div");
@@ -383,19 +358,16 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
383
358
  //
384
359
  // (see also: the search above for fixups)
385
360
  //
386
- name = rb_str_hash(tag);
361
+ name = INT2NUM(rb_str_hash(tag));
387
362
  while (e != S->doc)
388
363
  {
389
- hpricot_ele *he;
390
- Data_Get_Struct(e, hpricot_ele, he);
391
-
392
- if (he->name == name)
364
+ if (H_ELE_GET(e, H_ELE_HASH) == name)
393
365
  {
394
366
  match = e;
395
367
  break;
396
368
  }
397
369
 
398
- e = he->parent;
370
+ e = H_ELE_GET(e, H_ELE_PARENT);
399
371
  }
400
372
 
401
373
  if (NIL_P(match))
@@ -405,10 +377,11 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
405
377
  }
406
378
  else
407
379
  {
408
- H_ELE(cETag);
409
- Data_Get_Struct(match, hpricot_ele, he);
410
- he->etag = ele;
411
- S->focus = he->parent;
380
+ VALUE ele = Qnil;
381
+ if (raw != NULL)
382
+ ele = rb_str_new(raw, rawlen);
383
+ H_ELE_SET(match, H_ELE_ETAG, ele);
384
+ S->focus = H_ELE_GET(match, H_ELE_PARENT);
412
385
  S->last = Qnil;
413
386
  }
414
387
  } else if (sym == sym_cdata) {
@@ -429,15 +402,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
429
402
  tag = rb_reg_nth_match(1, match);
430
403
  attr = rb_reg_nth_match(2, match);
431
404
  {
432
- H_ELE(cProcIns);
433
- rb_hpricot_add(S->focus, ele);
405
+ H_ELE(cProcIns);
406
+ rb_hpricot_add(S->focus, ele);
434
407
  }
435
408
  } else if (sym == sym_text) {
436
409
  // TODO: add raw_string as well?
437
410
  if (!NIL_P(S->last) && RBASIC(S->last)->klass == cText) {
438
- hpricot_ele *he;
439
- Data_Get_Struct(S->last, hpricot_ele, he);
440
- rb_str_append(he->tag, tag);
411
+ rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
441
412
  } else {
442
413
  H_ELE(cText);
443
414
  rb_hpricot_add(S->focus, ele);
@@ -480,11 +451,8 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
480
451
 
481
452
  if (!rb_block_given_p())
482
453
  {
483
- hpricot_ele *he = ALLOC(hpricot_ele);
484
454
  S = ALLOC(hpricot_state);
485
- MEMZERO(he, hpricot_ele, 1);
486
- he->tag = he->attr = he->etag = he->parent = he->children = Qnil;
487
- S->doc = Data_Wrap_Struct(cDoc, hpricot_ele_mark, hpricot_ele_free, he);
455
+ S->doc = rb_obj_alloc(cDoc);
488
456
  rb_gc_register_address(&S->doc);
489
457
  S->focus = S->doc;
490
458
  S->last = Qnil;
@@ -509,7 +477,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
509
477
  buf = ALLOC_N(char, buffer_size);
510
478
 
511
479
  %% write init;
512
-
480
+
513
481
  while (!done) {
514
482
  VALUE str;
515
483
  char *p, *pe;
@@ -561,7 +529,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
561
529
 
562
530
  pe = p + len;
563
531
  %% write exec;
564
-
532
+
565
533
  if (cs == hpricot_scan_error) {
566
534
  if (buf != NULL)
567
535
  free(buf);
@@ -574,7 +542,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
574
542
  rb_raise(rb_eHpricotParseError, "parse error on line %d.\n" NO_WAY_SERIOUSLY, curline);
575
543
  }
576
544
  }
577
-
545
+
578
546
  if (done && ele_open)
579
547
  {
580
548
  ele_open = 0;
@@ -635,66 +603,103 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
635
603
  return Qnil;
636
604
  }
637
605
 
638
- void Init_hpricot_scan()
606
+ static VALUE
607
+ alloc_hpricot_struct(VALUE klass)
639
608
  {
640
- mHpricot = rb_define_module("Hpricot");
641
- rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1);
642
- rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1);
643
- rb_define_singleton_method(mHpricot, "css", hpricot_css, 3);
644
- rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError);
609
+ VALUE size;
610
+ long n;
611
+ NEWOBJ(st, struct RStruct);
612
+ OBJSETUP(st, klass, T_STRUCT);
613
+
614
+ size = rb_struct_iv_get(klass, "__size__");
615
+ n = FIX2LONG(size);
616
+
617
+ #ifndef RSTRUCT_EMBED_LEN_MAX
618
+ st->ptr = ALLOC_N(VALUE, n);
619
+ rb_mem_clear(st->ptr, n);
620
+ st->len = n;
621
+ #else
622
+ if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
623
+ RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
624
+ RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
625
+ rb_mem_clear(st->as.ary, n);
626
+ } else {
627
+ st->as.heap.ptr = ALLOC_N(VALUE, n);
628
+ rb_mem_clear(st->as.heap.ptr, n);
629
+ st->as.heap.len = n;
630
+ }
631
+ #endif
645
632
 
646
- cDoc = rb_define_class_under(mHpricot, "Doc", rb_cObject);
647
- rb_define_alloc_func(cDoc, hpricot_ele_alloc);
648
- rb_define_method(cDoc, "children", hpricot_ele_get_children, 0);
649
- rb_define_method(cDoc, "children=", hpricot_ele_set_children, 1);
650
-
651
- cBaseEle = rb_define_class_under(mHpricot, "BaseEle", rb_cObject);
652
- rb_define_alloc_func(cBaseEle, hpricot_ele_alloc);
653
- rb_define_method(cBaseEle, "raw_string", hpricot_ele_get_raw, 0);
654
- rb_define_method(cBaseEle, "clear_raw", hpricot_ele_clear_raw, 0);
655
- rb_define_method(cBaseEle, "parent", hpricot_ele_get_parent, 0);
656
- rb_define_method(cBaseEle, "parent=", hpricot_ele_set_parent, 1);
657
- cCData = rb_define_class_under(mHpricot, "CData", cBaseEle);
658
- rb_define_method(cCData, "content", hpricot_ele_get_tag, 0);
659
- rb_define_method(cCData, "content=", hpricot_ele_set_tag, 1);
660
- cComment = rb_define_class_under(mHpricot, "Comment", cBaseEle);
661
- rb_define_method(cComment, "content", hpricot_ele_get_tag, 0);
662
- rb_define_method(cComment, "content=", hpricot_ele_set_tag, 1);
663
- cDocType = rb_define_class_under(mHpricot, "DocType", cBaseEle);
664
- rb_define_method(cDocType, "target", hpricot_ele_get_tag, 0);
665
- rb_define_method(cDocType, "target=", hpricot_ele_set_tag, 1);
666
- rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0);
667
- rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1);
668
- rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0);
669
- rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1);
670
- cElem = rb_define_class_under(mHpricot, "Elem", cBaseEle);
671
- rb_define_method(cElem, "raw_attributes", hpricot_ele_get_attr, 0);
672
- rb_define_method(cElem, "raw_attributes=", hpricot_ele_set_attr, 1);
673
- rb_define_method(cElem, "children", hpricot_ele_get_children, 0);
674
- rb_define_method(cElem, "children=", hpricot_ele_set_children, 1);
675
- rb_define_method(cElem, "etag", hpricot_ele_get_etag, 0);
676
- rb_define_method(cElem, "etag=", hpricot_ele_set_etag, 1);
677
- rb_define_method(cElem, "name", hpricot_ele_get_tag, 0);
678
- rb_define_method(cElem, "name=", hpricot_ele_set_tag, 1);
679
- cETag = rb_define_class_under(mHpricot, "ETag", cBaseEle);
680
- rb_define_method(cETag, "name", hpricot_ele_get_tag, 0);
681
- rb_define_method(cETag, "name=", hpricot_ele_set_tag, 1);
682
- cBogusETag = rb_define_class_under(mHpricot, "BogusETag", cETag);
683
- cText = rb_define_class_under(mHpricot, "Text", cBaseEle);
684
- rb_define_method(cText, "content", hpricot_ele_get_tag, 0);
685
- rb_define_method(cText, "content=", hpricot_ele_set_tag, 1);
686
- cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", cBaseEle);
687
- rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0);
688
- rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1);
689
- rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0);
690
- rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1);
691
- rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0);
692
- rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1);
693
- cProcIns = rb_define_class_under(mHpricot, "ProcIns", cBaseEle);
694
- rb_define_method(cProcIns, "target", hpricot_ele_get_tag, 0);
695
- rb_define_method(cProcIns, "target=", hpricot_ele_set_tag, 1);
696
- rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0);
697
- rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1);
633
+ return (VALUE)st;
634
+ }
635
+
636
+ static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
637
+ static VALUE hpricot_struct_ref1(VALUE obj) {return H_ELE_GET(obj, 1);}
638
+ static VALUE hpricot_struct_ref2(VALUE obj) {return H_ELE_GET(obj, 2);}
639
+ static VALUE hpricot_struct_ref3(VALUE obj) {return H_ELE_GET(obj, 3);}
640
+ static VALUE hpricot_struct_ref4(VALUE obj) {return H_ELE_GET(obj, 4);}
641
+ static VALUE hpricot_struct_ref5(VALUE obj) {return H_ELE_GET(obj, 5);}
642
+ static VALUE hpricot_struct_ref6(VALUE obj) {return H_ELE_GET(obj, 6);}
643
+ static VALUE hpricot_struct_ref7(VALUE obj) {return H_ELE_GET(obj, 7);}
644
+ static VALUE hpricot_struct_ref8(VALUE obj) {return H_ELE_GET(obj, 8);}
645
+ static VALUE hpricot_struct_ref9(VALUE obj) {return H_ELE_GET(obj, 9);}
646
+
647
+ static VALUE (*ref_func[10])() = {
648
+ hpricot_struct_ref0,
649
+ hpricot_struct_ref1,
650
+ hpricot_struct_ref2,
651
+ hpricot_struct_ref3,
652
+ hpricot_struct_ref4,
653
+ hpricot_struct_ref5,
654
+ hpricot_struct_ref6,
655
+ hpricot_struct_ref7,
656
+ hpricot_struct_ref8,
657
+ hpricot_struct_ref9,
658
+ };
659
+
660
+ static VALUE hpricot_struct_set0(VALUE obj, VALUE val) {return H_ELE_SET(obj, 0, val);}
661
+ static VALUE hpricot_struct_set1(VALUE obj, VALUE val) {return H_ELE_SET(obj, 1, val);}
662
+ static VALUE hpricot_struct_set2(VALUE obj, VALUE val) {return H_ELE_SET(obj, 2, val);}
663
+ static VALUE hpricot_struct_set3(VALUE obj, VALUE val) {return H_ELE_SET(obj, 3, val);}
664
+ static VALUE hpricot_struct_set4(VALUE obj, VALUE val) {return H_ELE_SET(obj, 4, val);}
665
+ static VALUE hpricot_struct_set5(VALUE obj, VALUE val) {return H_ELE_SET(obj, 5, val);}
666
+ static VALUE hpricot_struct_set6(VALUE obj, VALUE val) {return H_ELE_SET(obj, 6, val);}
667
+ static VALUE hpricot_struct_set7(VALUE obj, VALUE val) {return H_ELE_SET(obj, 7, val);}
668
+ static VALUE hpricot_struct_set8(VALUE obj, VALUE val) {return H_ELE_SET(obj, 8, val);}
669
+ static VALUE hpricot_struct_set9(VALUE obj, VALUE val) {return H_ELE_SET(obj, 9, val);}
670
+
671
+ static VALUE (*set_func[10])() = {
672
+ hpricot_struct_set0,
673
+ hpricot_struct_set1,
674
+ hpricot_struct_set2,
675
+ hpricot_struct_set3,
676
+ hpricot_struct_set4,
677
+ hpricot_struct_set5,
678
+ hpricot_struct_set6,
679
+ hpricot_struct_set7,
680
+ hpricot_struct_set8,
681
+ hpricot_struct_set9,
682
+ };
683
+
684
+ static VALUE
685
+ make_hpricot_struct(VALUE members)
686
+ {
687
+ int i = 0;
688
+ VALUE klass = rb_class_new(rb_cObject);
689
+ rb_iv_set(klass, "__size__", INT2NUM(RARRAY_LEN(members)));
690
+ rb_define_alloc_func(klass, alloc_hpricot_struct);
691
+ rb_define_singleton_method(klass, "new", rb_class_new_instance, -1);
692
+ for (i = 0; i < RARRAY_LEN(members); i++) {
693
+ ID id = SYM2ID(RARRAY_PTR(members)[i]);
694
+ rb_define_method_id(klass, id, ref_func[i], 0);
695
+ rb_define_method_id(klass, rb_id_attrset(id), set_func[i], 1);
696
+ }
697
+ return klass;
698
+ }
699
+
700
+ void Init_hpricot_scan()
701
+ {
702
+ VALUE structElem, structAttr, structBasic;
698
703
 
699
704
  s_ElementContent = rb_intern("ElementContent");
700
705
  symAllow = ID2SYM(rb_intern("allow"));
@@ -704,19 +709,78 @@ void Init_hpricot_scan()
704
709
  s_parent = rb_intern("parent");
705
710
  s_read = rb_intern("read");
706
711
  s_to_str = rb_intern("to_str");
707
- iv_parent = rb_intern("parent");
708
712
  sym_xmldecl = ID2SYM(rb_intern("xmldecl"));
709
713
  sym_doctype = ID2SYM(rb_intern("doctype"));
710
714
  sym_procins = ID2SYM(rb_intern("procins"));
711
715
  sym_stag = ID2SYM(rb_intern("stag"));
712
716
  sym_etag = ID2SYM(rb_intern("etag"));
713
717
  sym_emptytag = ID2SYM(rb_intern("emptytag"));
718
+ sym_allowed = ID2SYM(rb_intern("allowed"));
719
+ sym_children = ID2SYM(rb_intern("children"));
714
720
  sym_comment = ID2SYM(rb_intern("comment"));
715
721
  sym_cdata = ID2SYM(rb_intern("cdata"));
722
+ sym_name = ID2SYM(rb_intern("name"));
723
+ sym_parent = ID2SYM(rb_intern("parent"));
724
+ sym_raw_attributes = ID2SYM(rb_intern("raw_attributes"));
725
+ sym_raw_string = ID2SYM(rb_intern("raw_string"));
726
+ sym_tagno = ID2SYM(rb_intern("tagno"));
716
727
  sym_text = ID2SYM(rb_intern("text"));
717
728
  sym_EMPTY = ID2SYM(rb_intern("EMPTY"));
718
729
  sym_CDATA = ID2SYM(rb_intern("CDATA"));
719
730
 
731
+ mHpricot = rb_define_module("Hpricot");
732
+ rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1);
733
+ rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1);
734
+ rb_define_singleton_method(mHpricot, "css", hpricot_css, 3);
735
+ rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError);
736
+
737
+ structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
738
+ sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
739
+ sym_tagno, sym_children));
740
+ structAttr = make_hpricot_struct(rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes));
741
+ structBasic = make_hpricot_struct(rb_ary_new3(2, sym_name, sym_parent));
742
+
743
+ cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
744
+ cCData = rb_define_class_under(mHpricot, "CData", structBasic);
745
+ rb_define_method(cCData, "content", hpricot_ele_get_name, 0);
746
+ rb_define_method(cCData, "content=", hpricot_ele_set_name, 1);
747
+ cComment = rb_define_class_under(mHpricot, "Comment", structBasic);
748
+ rb_define_method(cComment, "content", hpricot_ele_get_name, 0);
749
+ rb_define_method(cComment, "content=", hpricot_ele_set_name, 1);
750
+ cDocType = rb_define_class_under(mHpricot, "DocType", structAttr);
751
+ rb_define_method(cDocType, "raw_string", hpricot_ele_get_name, 0);
752
+ rb_define_method(cDocType, "clear_raw", hpricot_ele_clear_name, 0);
753
+ rb_define_method(cDocType, "target", hpricot_ele_get_target, 0);
754
+ rb_define_method(cDocType, "target=", hpricot_ele_set_target, 1);
755
+ rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0);
756
+ rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1);
757
+ rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0);
758
+ rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1);
759
+ cElem = rb_define_class_under(mHpricot, "Elem", structElem);
760
+ rb_define_method(cElem, "clear_raw", hpricot_ele_clear_raw, 0);
761
+ cBogusETag = rb_define_class_under(mHpricot, "BogusETag", structAttr);
762
+ rb_define_method(cBogusETag, "raw_string", hpricot_ele_get_attr, 0);
763
+ rb_define_method(cBogusETag, "clear_raw", hpricot_ele_clear_attr, 0);
764
+ cText = rb_define_class_under(mHpricot, "Text", structBasic);
765
+ rb_define_method(cText, "raw_string", hpricot_ele_get_name, 0);
766
+ rb_define_method(cText, "clear_raw", hpricot_ele_clear_name, 0);
767
+ rb_define_method(cText, "content", hpricot_ele_get_name, 0);
768
+ rb_define_method(cText, "content=", hpricot_ele_set_name, 1);
769
+ cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", structAttr);
770
+ rb_define_method(cXMLDecl, "raw_string", hpricot_ele_get_name, 0);
771
+ rb_define_method(cXMLDecl, "clear_raw", hpricot_ele_clear_name, 0);
772
+ rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0);
773
+ rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1);
774
+ rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0);
775
+ rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1);
776
+ rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0);
777
+ rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1);
778
+ cProcIns = rb_define_class_under(mHpricot, "ProcIns", structAttr);
779
+ rb_define_method(cProcIns, "target", hpricot_ele_get_name, 0);
780
+ rb_define_method(cProcIns, "target=", hpricot_ele_set_name, 1);
781
+ rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0);
782
+ rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1);
783
+
720
784
  rb_const_set(mHpricot, rb_intern("ProcInsParse"),
721
785
  reProcInsParse = rb_eval_string("/\\A<\\?(\\S+)\\s+(.+)/m"));
722
786
  }