hpricot 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -137,62 +137,62 @@ public class HpricotScanService implements BasicLibraryService {
137
137
 
138
138
  // H_ATTR(target)
139
139
  public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
140
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
140
+ H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
141
141
  return self;
142
142
  }
143
143
 
144
144
  public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
145
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
145
+ return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
146
146
  }
147
147
 
148
148
  // H_ATTR(encoding)
149
149
  public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
150
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
150
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
151
151
  return self;
152
152
  }
153
153
 
154
154
  public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
155
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
155
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
156
156
  }
157
157
 
158
158
  // H_ATTR(version)
159
159
  public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
160
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
160
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
161
161
  return self;
162
162
  }
163
163
 
164
164
  public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
165
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
165
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
166
166
  }
167
167
 
168
168
  // H_ATTR(standalone)
169
169
  public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
170
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
170
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
171
171
  return self;
172
172
  }
173
173
 
174
174
  public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
175
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
175
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
176
176
  }
177
177
 
178
178
  // H_ATTR(system_id)
179
179
  public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
180
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
180
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
181
181
  return self;
182
182
  }
183
183
 
184
184
  public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
185
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
185
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
186
186
  }
187
187
 
188
188
  // H_ATTR(public_id)
189
189
  public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
190
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
190
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
191
191
  return self;
192
192
  }
193
193
 
194
194
  public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
195
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
195
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
196
196
  }
197
197
 
198
198
  public static class Scanner {
@@ -950,7 +950,7 @@ public class HpricotScanService implements BasicLibraryService {
950
950
  }
951
951
  }
952
952
 
953
- public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
953
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!";
954
954
 
955
955
  public final static int H_ELE_TAG = 0;
956
956
  public final static int H_ELE_PARENT = 1;
@@ -965,6 +965,15 @@ public class HpricotScanService implements BasicLibraryService {
965
965
  return ((IRubyObject[])recv.dataGetStruct())[n];
966
966
  }
967
967
 
968
+ public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
969
+ IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
970
+ if(obj.isNil()) {
971
+ obj = RubyHash.newHash(recv.getRuntime());
972
+ ((IRubyObject[])recv.dataGetStruct())[n] = obj;
973
+ }
974
+ return (RubyHash)obj;
975
+ }
976
+
968
977
  public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
969
978
  ((IRubyObject[])recv.dataGetStruct())[n] = value;
970
979
  return value;
@@ -4,9 +4,15 @@
4
4
  * $Author: why $
5
5
  * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $
6
6
  *
7
- * Copyright (C) 2006 why the lucky stiff
7
+ * Copyright (C) 2006, 2010 why the lucky stiff
8
8
  */
9
9
  #include <ruby.h>
10
+ #include <assert.h>
11
+
12
+ struct hpricot_struct {
13
+ int len;
14
+ VALUE* ptr;
15
+ };
10
16
 
11
17
  #ifndef RARRAY_LEN
12
18
  #define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -16,7 +22,7 @@
16
22
 
17
23
  VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
18
24
 
19
- #define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
25
+ #define NO_WAY_SERIOUSLY "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!"
20
26
 
21
27
  static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
22
28
  sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
@@ -37,32 +43,49 @@ static VALUE reProcInsParse;
37
43
  #define H_ELE_HASH 6
38
44
  #define H_ELE_CHILDREN 7
39
45
 
40
- #define H_ELE_GET(ele, idx) RSTRUCT_PTR(ele)[idx]
41
- #define H_ELE_SET(ele, idx, val) RSTRUCT_PTR(ele)[idx] = val
46
+ #define HSTRUCT_PTR(ele) ((struct hpricot_struct*)DATA_PTR(ele))->ptr
47
+
48
+ #define H_ELE_GET(ele, idx) HSTRUCT_PTR(ele)[idx]
49
+ #define H_ELE_SET(ele, idx, val) HSTRUCT_PTR(ele)[idx] = val
42
50
 
43
51
  #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
44
52
 
45
- #define ELE(N) \
46
- if (te > ts || text == 1) { \
47
- char *raw = NULL; \
48
- int rawlen = 0; \
49
- ele_open = 0; text = 0; \
53
+ #ifdef HAVE_RUBY_ENCODING_H
54
+ #include <ruby/encoding.h>
55
+ # define ASSOCIATE_INDEX(s) rb_enc_associate_index((s), encoding_index)
56
+ # define ENCODING_INDEX , encoding_index
57
+ #else
58
+ # define ASSOCIATE_INDEX(s)
59
+ # define ENCODING_INDEX
60
+ #endif
61
+
62
+ #define ELE(N) \
63
+ if (te > ts || text == 1) { \
64
+ char *raw = NULL; \
65
+ int rawlen = 0; \
66
+ ele_open = 0; text = 0; \
50
67
  if (ts != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \
51
- raw = ts; rawlen = te - ts; \
52
- } \
53
- if (rb_block_given_p()) { \
54
- VALUE raw_string = Qnil; \
55
- if (raw != NULL) raw_string = rb_str_new(raw, rawlen); \
56
- rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \
57
- } else \
58
- rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint); \
68
+ raw = ts; rawlen = te - ts; \
69
+ } \
70
+ if (rb_block_given_p()) { \
71
+ VALUE raw_string = Qnil; \
72
+ if (raw != NULL) { \
73
+ raw_string = rb_str_new(raw, rawlen); \
74
+ ASSOCIATE_INDEX(raw_string); \
75
+ } \
76
+ rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \
77
+ } else \
78
+ rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint ENCODING_INDEX); \
59
79
  }
60
80
 
61
- #define SET(N, E) \
62
- if (mark_##N == NULL || E == mark_##N) \
63
- N = rb_str_new2(""); \
64
- else if (E > mark_##N) \
65
- N = rb_str_new(mark_##N, E - mark_##N);
81
+ #define SET(N, E) \
82
+ if (mark_##N == NULL || E == mark_##N) { \
83
+ N = rb_str_new2(""); \
84
+ ASSOCIATE_INDEX(N); \
85
+ } else if (E > mark_##N) { \
86
+ N = rb_str_new(mark_##N, E - mark_##N); \
87
+ ASSOCIATE_INDEX(N); \
88
+ }
66
89
 
67
90
  #define CAT(N, E) if (NIL_P(N)) { SET(N, E); } else { rb_str_cat(N, mark_##N, E - mark_##N); }
68
91
 
@@ -119,7 +142,18 @@ static VALUE reProcInsParse;
119
142
  }
120
143
  action akey { SET(akey, p); }
121
144
  action xmlver { SET(aval, p); ATTR(ID2SYM(rb_intern("version")), aval); }
122
- action xmlenc { SET(aval, p); ATTR(ID2SYM(rb_intern("encoding")), aval); }
145
+ action xmlenc {
146
+ #ifdef HAVE_RUBY_ENCODING_H
147
+ if (mark_aval < p) {
148
+ char psave = *p;
149
+ *p = '\0';
150
+ encoding_index = rb_enc_find_index(mark_aval);
151
+ *p = psave;
152
+ }
153
+ #endif
154
+ SET(aval, p);
155
+ ATTR(ID2SYM(rb_intern("encoding")), aval);
156
+ }
123
157
  action xmlsd { SET(aval, p); ATTR(ID2SYM(rb_intern("standalone")), aval); }
124
158
  action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
125
159
  action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
@@ -132,7 +166,7 @@ static VALUE reProcInsParse;
132
166
  }
133
167
 
134
168
  action save_attr {
135
- if (!S->xml)
169
+ if (!S->xml && !NIL_P(akey))
136
170
  akey = rb_funcall(akey, s_downcase, 0);
137
171
  ATTR(akey, aval);
138
172
  }
@@ -230,33 +264,41 @@ H_ATTR(standalone);
230
264
  H_ATTR(system_id);
231
265
  H_ATTR(public_id);
232
266
 
233
- #define H_ELE(klass) \
234
- ele = rb_obj_alloc(klass); \
235
- if (klass == cElem) { \
236
- H_ELE_SET(ele, H_ELE_TAG, tag); \
237
- H_ELE_SET(ele, H_ELE_ATTR, attr); \
238
- H_ELE_SET(ele, H_ELE_EC, ec); \
267
+ #define H_ELE(klass) \
268
+ ele = rb_obj_alloc(klass); \
269
+ if (klass == cElem) { \
270
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
271
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
272
+ H_ELE_SET(ele, H_ELE_EC, ec); \
239
273
  if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
240
- H_ELE_SET(ele, H_ELE_RAW, rb_str_new(raw, rawlen)); \
241
- } \
274
+ VALUE raw_str = rb_str_new(raw, rawlen); \
275
+ ASSOCIATE_INDEX(raw_str); \
276
+ H_ELE_SET(ele, H_ELE_RAW, raw_str); \
277
+ } \
242
278
  } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
243
- if (klass == cBogusETag) { \
244
- H_ELE_SET(ele, H_ELE_TAG, tag); \
245
- if (raw != NULL) \
246
- H_ELE_SET(ele, H_ELE_ATTR, rb_str_new(raw, rawlen)); \
247
- } else { \
248
- if (klass == cDocType) \
249
- ATTR(ID2SYM(rb_intern("target")), tag); \
250
- H_ELE_SET(ele, H_ELE_ATTR, attr); \
251
- if (klass != cProcIns) { \
252
- tag = Qnil; \
253
- if (raw != NULL) tag = rb_str_new(raw, rawlen); \
254
- } \
255
- H_ELE_SET(ele, H_ELE_TAG, tag); \
256
- } \
257
- } else { \
258
- H_ELE_SET(ele, H_ELE_TAG, tag); \
259
- } \
279
+ if (klass == cBogusETag) { \
280
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
281
+ if (raw != NULL) { \
282
+ VALUE raw_str = rb_str_new(raw, rawlen); \
283
+ ASSOCIATE_INDEX(raw_str); \
284
+ H_ELE_SET(ele, H_ELE_ATTR, raw_str); \
285
+ } \
286
+ } else { \
287
+ if (klass == cDocType) \
288
+ ATTR(ID2SYM(rb_intern("target")), tag); \
289
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
290
+ if (klass != cProcIns) { \
291
+ tag = Qnil; \
292
+ if (raw != NULL) { \
293
+ tag = rb_str_new(raw, rawlen); \
294
+ ASSOCIATE_INDEX(tag); \
295
+ } \
296
+ } \
297
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
298
+ } \
299
+ } else { \
300
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
301
+ } \
260
302
  S->last = ele
261
303
 
262
304
  //
@@ -264,7 +306,12 @@ H_ATTR(public_id);
264
306
  // in the lexer. this step just pairs up the start and end tags.
265
307
  //
266
308
  void
267
- rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw, int rawlen, int taint)
309
+ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr,
310
+ char *raw, int rawlen, int taint
311
+ #ifdef HAVE_RUBY_ENCODING_H
312
+ , int encoding_index
313
+ #endif
314
+ )
268
315
  {
269
316
  VALUE ele, ec = Qnil;
270
317
 
@@ -286,6 +333,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
286
333
  {
287
334
  sym = sym_text;
288
335
  tag = rb_str_new(raw, rawlen);
336
+ ASSOCIATE_INDEX(tag);
289
337
  }
290
338
 
291
339
  if (!NIL_P(ec)) {
@@ -351,6 +399,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
351
399
  if (S->strict) {
352
400
  if (NIL_P(rb_hash_aref(S->EC, tag))) {
353
401
  tag = rb_str_new2("div");
402
+ ASSOCIATE_INDEX(tag);
354
403
  }
355
404
  }
356
405
 
@@ -380,8 +429,10 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
380
429
  else
381
430
  {
382
431
  VALUE ele = Qnil;
383
- if (raw != NULL)
432
+ if (raw != NULL) {
384
433
  ele = rb_str_new(raw, rawlen);
434
+ ASSOCIATE_INDEX(ele);
435
+ }
385
436
  H_ELE_SET(match, H_ELE_ETAG, ele);
386
437
  S->focus = H_ELE_GET(match, H_ELE_PARENT);
387
438
  S->last = Qnil;
@@ -395,8 +446,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
395
446
  } else if (sym == sym_doctype) {
396
447
  H_ELE(cDocType);
397
448
  if (S->strict) {
398
- rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
399
- rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN"));
449
+ VALUE id;
450
+ id = rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
451
+ ASSOCIATE_INDEX(id);
452
+ rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), id);
453
+ id = rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN");
454
+ ASSOCIATE_INDEX(id);
455
+ rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), id);
400
456
  }
401
457
  rb_hpricot_add(S->focus, ele);
402
458
  } else if (sym == sym_procins) {
@@ -409,7 +465,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
409
465
  }
410
466
  } else if (sym == sym_text) {
411
467
  // TODO: add raw_string as well?
412
- if (!NIL_P(S->last) && RBASIC(S->last)->klass == cText) {
468
+ if (!NIL_P(S->last) && RTEST(rb_obj_is_instance_of(S->last, cText))) {
413
469
  rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
414
470
  } else {
415
471
  H_ELE(cText);
@@ -431,6 +487,9 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
431
487
  VALUE attr = Qnil, tag = Qnil, akey = Qnil, aval = Qnil, bufsize = Qnil;
432
488
  char *mark_tag = 0, *mark_akey = 0, *mark_aval = 0;
433
489
  int done = 0, ele_open = 0, buffer_size = 0, taint = 0;
490
+ #ifdef HAVE_RUBY_ENCODING_H
491
+ int encoding_index = rb_enc_to_index(rb_default_external_encoding());
492
+ #endif
434
493
 
435
494
  rb_scan_args(argc, argv, "11", &port, &opts);
436
495
  taint = OBJ_TAINTED(port);
@@ -605,34 +664,68 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
605
664
  return Qnil;
606
665
  }
607
666
 
667
+ void hstruct_mark(void* ptr) {
668
+ struct hpricot_struct* st = (struct hpricot_struct*)ptr;
669
+ int i;
670
+
671
+ for(i = 0; i < st->len; i++) {
672
+ rb_gc_mark(st->ptr[i]);
673
+ }
674
+ }
675
+
676
+ void hstruct_free(void* ptr) {
677
+ struct hpricot_struct* st = (struct hpricot_struct*)ptr;
678
+
679
+ free(st->ptr);
680
+ free(st);
681
+ }
682
+
608
683
  static VALUE
609
- alloc_hpricot_struct(VALUE klass)
684
+ alloc_hpricot_struct8(VALUE klass)
610
685
  {
611
- VALUE size;
612
- long n;
613
- NEWOBJ(st, struct RStruct);
614
- OBJSETUP(st, klass, T_STRUCT);
615
-
616
- size = rb_struct_iv_get(klass, "__size__");
617
- n = FIX2LONG(size);
618
-
619
- #ifndef RSTRUCT_EMBED_LEN_MAX
620
- st->ptr = ALLOC_N(VALUE, n);
621
- rb_mem_clear(st->ptr, n);
622
- st->len = n;
623
- #else
624
- if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
625
- RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
626
- RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
627
- rb_mem_clear(st->as.ary, n);
628
- } else {
629
- st->as.heap.ptr = ALLOC_N(VALUE, n);
630
- rb_mem_clear(st->as.heap.ptr, n);
631
- st->as.heap.len = n;
632
- }
633
- #endif
686
+ VALUE obj;
687
+ struct hpricot_struct* st;
634
688
 
635
- return (VALUE)st;
689
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
690
+
691
+ st->len = 8;
692
+ st->ptr = ALLOC_N(VALUE, 8);
693
+
694
+ rb_mem_clear(st->ptr, 8);
695
+
696
+ return obj;
697
+ }
698
+
699
+ static VALUE
700
+ alloc_hpricot_struct2(VALUE klass)
701
+ {
702
+ VALUE obj;
703
+ struct hpricot_struct* st;
704
+
705
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
706
+
707
+ st->len = 2;
708
+ st->ptr = ALLOC_N(VALUE, 2);
709
+
710
+ rb_mem_clear(st->ptr, 2);
711
+
712
+ return obj;
713
+ }
714
+
715
+ static VALUE
716
+ alloc_hpricot_struct3(VALUE klass)
717
+ {
718
+ VALUE obj;
719
+ struct hpricot_struct* st;
720
+
721
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
722
+
723
+ st->len = 3;
724
+ st->ptr = ALLOC_N(VALUE, 3);
725
+
726
+ rb_mem_clear(st->ptr, 3);
727
+
728
+ return obj;
636
729
  }
637
730
 
638
731
  static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
@@ -684,17 +777,28 @@ static VALUE (*set_func[10])() = {
684
777
  };
685
778
 
686
779
  static VALUE
687
- make_hpricot_struct(VALUE members)
780
+ make_hpricot_struct(VALUE members, VALUE (*alloc)(VALUE klass))
688
781
  {
689
782
  int i = 0;
783
+ char attr_set[128];
784
+
690
785
  VALUE klass = rb_class_new(rb_cObject);
691
- rb_iv_set(klass, "__size__", INT2NUM(RARRAY_LEN(members)));
692
- rb_define_alloc_func(klass, alloc_hpricot_struct);
693
- rb_define_singleton_method(klass, "new", rb_class_new_instance, -1);
694
- for (i = 0; i < RARRAY_LEN(members); i++) {
695
- ID id = SYM2ID(RARRAY_PTR(members)[i]);
696
- rb_define_method_id(klass, id, ref_func[i], 0);
697
- rb_define_method_id(klass, rb_id_attrset(id), set_func[i], 1);
786
+ rb_define_alloc_func(klass, alloc);
787
+
788
+ int len = RARRAY_LEN(members);
789
+ assert(len < 10);
790
+
791
+ for (i = 0; i < len; i++) {
792
+ ID id = SYM2ID(rb_ary_entry(members, i));
793
+ const char* name = rb_id2name(id);
794
+ int len = strlen(name);
795
+
796
+ memcpy(attr_set, name, strlen(name));
797
+ attr_set[len] = '=';
798
+ attr_set[len+1] = 0;
799
+
800
+ rb_define_method(klass, name, ref_func[i], 0);
801
+ rb_define_method(klass, attr_set, set_func[i], 1);
698
802
  }
699
803
  return klass;
700
804
  }
@@ -738,9 +842,13 @@ void Init_hpricot_scan()
738
842
 
739
843
  structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
740
844
  sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
741
- sym_tagno, sym_children));
742
- structAttr = make_hpricot_struct(rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes));
743
- structBasic = make_hpricot_struct(rb_ary_new3(2, sym_name, sym_parent));
845
+ sym_tagno, sym_children), alloc_hpricot_struct8);
846
+ structAttr = make_hpricot_struct(
847
+ rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes),
848
+ alloc_hpricot_struct3);
849
+ structBasic = make_hpricot_struct(
850
+ rb_ary_new3(2, sym_name, sym_parent),
851
+ alloc_hpricot_struct2);
744
852
 
745
853
  cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
746
854
  cCData = rb_define_class_under(mHpricot, "CData", structBasic);