hpricot 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,62 +137,62 @@ public class HpricotScanService implements BasicLibraryService {
137
137
 
138
138
  // H_ATTR(target)
139
139
  public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
140
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
140
+ H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
141
141
  return self;
142
142
  }
143
143
 
144
144
  public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
145
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
145
+ return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
146
146
  }
147
147
 
148
148
  // H_ATTR(encoding)
149
149
  public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
150
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
150
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
151
151
  return self;
152
152
  }
153
153
 
154
154
  public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
155
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
155
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
156
156
  }
157
157
 
158
158
  // H_ATTR(version)
159
159
  public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
160
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
160
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
161
161
  return self;
162
162
  }
163
163
 
164
164
  public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
165
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
165
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
166
166
  }
167
167
 
168
168
  // H_ATTR(standalone)
169
169
  public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
170
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
170
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
171
171
  return self;
172
172
  }
173
173
 
174
174
  public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
175
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
175
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
176
176
  }
177
177
 
178
178
  // H_ATTR(system_id)
179
179
  public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
180
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
180
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
181
181
  return self;
182
182
  }
183
183
 
184
184
  public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
185
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
185
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
186
186
  }
187
187
 
188
188
  // H_ATTR(public_id)
189
189
  public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
190
- ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
190
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
191
191
  return self;
192
192
  }
193
193
 
194
194
  public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
195
- return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
195
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
196
196
  }
197
197
 
198
198
  public static class Scanner {
@@ -950,7 +950,7 @@ public class HpricotScanService implements BasicLibraryService {
950
950
  }
951
951
  }
952
952
 
953
- public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
953
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!";
954
954
 
955
955
  public final static int H_ELE_TAG = 0;
956
956
  public final static int H_ELE_PARENT = 1;
@@ -965,6 +965,15 @@ public class HpricotScanService implements BasicLibraryService {
965
965
  return ((IRubyObject[])recv.dataGetStruct())[n];
966
966
  }
967
967
 
968
+ public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
969
+ IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
970
+ if(obj.isNil()) {
971
+ obj = RubyHash.newHash(recv.getRuntime());
972
+ ((IRubyObject[])recv.dataGetStruct())[n] = obj;
973
+ }
974
+ return (RubyHash)obj;
975
+ }
976
+
968
977
  public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
969
978
  ((IRubyObject[])recv.dataGetStruct())[n] = value;
970
979
  return value;
@@ -4,9 +4,15 @@
4
4
  * $Author: why $
5
5
  * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $
6
6
  *
7
- * Copyright (C) 2006 why the lucky stiff
7
+ * Copyright (C) 2006, 2010 why the lucky stiff
8
8
  */
9
9
  #include <ruby.h>
10
+ #include <assert.h>
11
+
12
+ struct hpricot_struct {
13
+ int len;
14
+ VALUE* ptr;
15
+ };
10
16
 
11
17
  #ifndef RARRAY_LEN
12
18
  #define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -16,7 +22,7 @@
16
22
 
17
23
  VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
18
24
 
19
- #define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
25
+ #define NO_WAY_SERIOUSLY "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!"
20
26
 
21
27
  static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
22
28
  sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
@@ -37,32 +43,49 @@ static VALUE reProcInsParse;
37
43
  #define H_ELE_HASH 6
38
44
  #define H_ELE_CHILDREN 7
39
45
 
40
- #define H_ELE_GET(ele, idx) RSTRUCT_PTR(ele)[idx]
41
- #define H_ELE_SET(ele, idx, val) RSTRUCT_PTR(ele)[idx] = val
46
+ #define HSTRUCT_PTR(ele) ((struct hpricot_struct*)DATA_PTR(ele))->ptr
47
+
48
+ #define H_ELE_GET(ele, idx) HSTRUCT_PTR(ele)[idx]
49
+ #define H_ELE_SET(ele, idx, val) HSTRUCT_PTR(ele)[idx] = val
42
50
 
43
51
  #define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
44
52
 
45
- #define ELE(N) \
46
- if (te > ts || text == 1) { \
47
- char *raw = NULL; \
48
- int rawlen = 0; \
49
- ele_open = 0; text = 0; \
53
+ #ifdef HAVE_RUBY_ENCODING_H
54
+ #include <ruby/encoding.h>
55
+ # define ASSOCIATE_INDEX(s) rb_enc_associate_index((s), encoding_index)
56
+ # define ENCODING_INDEX , encoding_index
57
+ #else
58
+ # define ASSOCIATE_INDEX(s)
59
+ # define ENCODING_INDEX
60
+ #endif
61
+
62
+ #define ELE(N) \
63
+ if (te > ts || text == 1) { \
64
+ char *raw = NULL; \
65
+ int rawlen = 0; \
66
+ ele_open = 0; text = 0; \
50
67
  if (ts != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \
51
- raw = ts; rawlen = te - ts; \
52
- } \
53
- if (rb_block_given_p()) { \
54
- VALUE raw_string = Qnil; \
55
- if (raw != NULL) raw_string = rb_str_new(raw, rawlen); \
56
- rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \
57
- } else \
58
- rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint); \
68
+ raw = ts; rawlen = te - ts; \
69
+ } \
70
+ if (rb_block_given_p()) { \
71
+ VALUE raw_string = Qnil; \
72
+ if (raw != NULL) { \
73
+ raw_string = rb_str_new(raw, rawlen); \
74
+ ASSOCIATE_INDEX(raw_string); \
75
+ } \
76
+ rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \
77
+ } else \
78
+ rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint ENCODING_INDEX); \
59
79
  }
60
80
 
61
- #define SET(N, E) \
62
- if (mark_##N == NULL || E == mark_##N) \
63
- N = rb_str_new2(""); \
64
- else if (E > mark_##N) \
65
- N = rb_str_new(mark_##N, E - mark_##N);
81
+ #define SET(N, E) \
82
+ if (mark_##N == NULL || E == mark_##N) { \
83
+ N = rb_str_new2(""); \
84
+ ASSOCIATE_INDEX(N); \
85
+ } else if (E > mark_##N) { \
86
+ N = rb_str_new(mark_##N, E - mark_##N); \
87
+ ASSOCIATE_INDEX(N); \
88
+ }
66
89
 
67
90
  #define CAT(N, E) if (NIL_P(N)) { SET(N, E); } else { rb_str_cat(N, mark_##N, E - mark_##N); }
68
91
 
@@ -119,7 +142,18 @@ static VALUE reProcInsParse;
119
142
  }
120
143
  action akey { SET(akey, p); }
121
144
  action xmlver { SET(aval, p); ATTR(ID2SYM(rb_intern("version")), aval); }
122
- action xmlenc { SET(aval, p); ATTR(ID2SYM(rb_intern("encoding")), aval); }
145
+ action xmlenc {
146
+ #ifdef HAVE_RUBY_ENCODING_H
147
+ if (mark_aval < p) {
148
+ char psave = *p;
149
+ *p = '\0';
150
+ encoding_index = rb_enc_find_index(mark_aval);
151
+ *p = psave;
152
+ }
153
+ #endif
154
+ SET(aval, p);
155
+ ATTR(ID2SYM(rb_intern("encoding")), aval);
156
+ }
123
157
  action xmlsd { SET(aval, p); ATTR(ID2SYM(rb_intern("standalone")), aval); }
124
158
  action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
125
159
  action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
@@ -132,7 +166,7 @@ static VALUE reProcInsParse;
132
166
  }
133
167
 
134
168
  action save_attr {
135
- if (!S->xml)
169
+ if (!S->xml && !NIL_P(akey))
136
170
  akey = rb_funcall(akey, s_downcase, 0);
137
171
  ATTR(akey, aval);
138
172
  }
@@ -230,33 +264,41 @@ H_ATTR(standalone);
230
264
  H_ATTR(system_id);
231
265
  H_ATTR(public_id);
232
266
 
233
- #define H_ELE(klass) \
234
- ele = rb_obj_alloc(klass); \
235
- if (klass == cElem) { \
236
- H_ELE_SET(ele, H_ELE_TAG, tag); \
237
- H_ELE_SET(ele, H_ELE_ATTR, attr); \
238
- H_ELE_SET(ele, H_ELE_EC, ec); \
267
+ #define H_ELE(klass) \
268
+ ele = rb_obj_alloc(klass); \
269
+ if (klass == cElem) { \
270
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
271
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
272
+ H_ELE_SET(ele, H_ELE_EC, ec); \
239
273
  if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
240
- H_ELE_SET(ele, H_ELE_RAW, rb_str_new(raw, rawlen)); \
241
- } \
274
+ VALUE raw_str = rb_str_new(raw, rawlen); \
275
+ ASSOCIATE_INDEX(raw_str); \
276
+ H_ELE_SET(ele, H_ELE_RAW, raw_str); \
277
+ } \
242
278
  } else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
243
- if (klass == cBogusETag) { \
244
- H_ELE_SET(ele, H_ELE_TAG, tag); \
245
- if (raw != NULL) \
246
- H_ELE_SET(ele, H_ELE_ATTR, rb_str_new(raw, rawlen)); \
247
- } else { \
248
- if (klass == cDocType) \
249
- ATTR(ID2SYM(rb_intern("target")), tag); \
250
- H_ELE_SET(ele, H_ELE_ATTR, attr); \
251
- if (klass != cProcIns) { \
252
- tag = Qnil; \
253
- if (raw != NULL) tag = rb_str_new(raw, rawlen); \
254
- } \
255
- H_ELE_SET(ele, H_ELE_TAG, tag); \
256
- } \
257
- } else { \
258
- H_ELE_SET(ele, H_ELE_TAG, tag); \
259
- } \
279
+ if (klass == cBogusETag) { \
280
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
281
+ if (raw != NULL) { \
282
+ VALUE raw_str = rb_str_new(raw, rawlen); \
283
+ ASSOCIATE_INDEX(raw_str); \
284
+ H_ELE_SET(ele, H_ELE_ATTR, raw_str); \
285
+ } \
286
+ } else { \
287
+ if (klass == cDocType) \
288
+ ATTR(ID2SYM(rb_intern("target")), tag); \
289
+ H_ELE_SET(ele, H_ELE_ATTR, attr); \
290
+ if (klass != cProcIns) { \
291
+ tag = Qnil; \
292
+ if (raw != NULL) { \
293
+ tag = rb_str_new(raw, rawlen); \
294
+ ASSOCIATE_INDEX(tag); \
295
+ } \
296
+ } \
297
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
298
+ } \
299
+ } else { \
300
+ H_ELE_SET(ele, H_ELE_TAG, tag); \
301
+ } \
260
302
  S->last = ele
261
303
 
262
304
  //
@@ -264,7 +306,12 @@ H_ATTR(public_id);
264
306
  // in the lexer. this step just pairs up the start and end tags.
265
307
  //
266
308
  void
267
- rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw, int rawlen, int taint)
309
+ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr,
310
+ char *raw, int rawlen, int taint
311
+ #ifdef HAVE_RUBY_ENCODING_H
312
+ , int encoding_index
313
+ #endif
314
+ )
268
315
  {
269
316
  VALUE ele, ec = Qnil;
270
317
 
@@ -286,6 +333,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
286
333
  {
287
334
  sym = sym_text;
288
335
  tag = rb_str_new(raw, rawlen);
336
+ ASSOCIATE_INDEX(tag);
289
337
  }
290
338
 
291
339
  if (!NIL_P(ec)) {
@@ -351,6 +399,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
351
399
  if (S->strict) {
352
400
  if (NIL_P(rb_hash_aref(S->EC, tag))) {
353
401
  tag = rb_str_new2("div");
402
+ ASSOCIATE_INDEX(tag);
354
403
  }
355
404
  }
356
405
 
@@ -380,8 +429,10 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
380
429
  else
381
430
  {
382
431
  VALUE ele = Qnil;
383
- if (raw != NULL)
432
+ if (raw != NULL) {
384
433
  ele = rb_str_new(raw, rawlen);
434
+ ASSOCIATE_INDEX(ele);
435
+ }
385
436
  H_ELE_SET(match, H_ELE_ETAG, ele);
386
437
  S->focus = H_ELE_GET(match, H_ELE_PARENT);
387
438
  S->last = Qnil;
@@ -395,8 +446,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
395
446
  } else if (sym == sym_doctype) {
396
447
  H_ELE(cDocType);
397
448
  if (S->strict) {
398
- rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
399
- rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN"));
449
+ VALUE id;
450
+ id = rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
451
+ ASSOCIATE_INDEX(id);
452
+ rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), id);
453
+ id = rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN");
454
+ ASSOCIATE_INDEX(id);
455
+ rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), id);
400
456
  }
401
457
  rb_hpricot_add(S->focus, ele);
402
458
  } else if (sym == sym_procins) {
@@ -409,7 +465,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
409
465
  }
410
466
  } else if (sym == sym_text) {
411
467
  // TODO: add raw_string as well?
412
- if (!NIL_P(S->last) && RBASIC(S->last)->klass == cText) {
468
+ if (!NIL_P(S->last) && RTEST(rb_obj_is_instance_of(S->last, cText))) {
413
469
  rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
414
470
  } else {
415
471
  H_ELE(cText);
@@ -431,6 +487,9 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
431
487
  VALUE attr = Qnil, tag = Qnil, akey = Qnil, aval = Qnil, bufsize = Qnil;
432
488
  char *mark_tag = 0, *mark_akey = 0, *mark_aval = 0;
433
489
  int done = 0, ele_open = 0, buffer_size = 0, taint = 0;
490
+ #ifdef HAVE_RUBY_ENCODING_H
491
+ int encoding_index = rb_enc_to_index(rb_default_external_encoding());
492
+ #endif
434
493
 
435
494
  rb_scan_args(argc, argv, "11", &port, &opts);
436
495
  taint = OBJ_TAINTED(port);
@@ -605,34 +664,68 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
605
664
  return Qnil;
606
665
  }
607
666
 
667
+ void hstruct_mark(void* ptr) {
668
+ struct hpricot_struct* st = (struct hpricot_struct*)ptr;
669
+ int i;
670
+
671
+ for(i = 0; i < st->len; i++) {
672
+ rb_gc_mark(st->ptr[i]);
673
+ }
674
+ }
675
+
676
+ void hstruct_free(void* ptr) {
677
+ struct hpricot_struct* st = (struct hpricot_struct*)ptr;
678
+
679
+ free(st->ptr);
680
+ free(st);
681
+ }
682
+
608
683
  static VALUE
609
- alloc_hpricot_struct(VALUE klass)
684
+ alloc_hpricot_struct8(VALUE klass)
610
685
  {
611
- VALUE size;
612
- long n;
613
- NEWOBJ(st, struct RStruct);
614
- OBJSETUP(st, klass, T_STRUCT);
615
-
616
- size = rb_struct_iv_get(klass, "__size__");
617
- n = FIX2LONG(size);
618
-
619
- #ifndef RSTRUCT_EMBED_LEN_MAX
620
- st->ptr = ALLOC_N(VALUE, n);
621
- rb_mem_clear(st->ptr, n);
622
- st->len = n;
623
- #else
624
- if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
625
- RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
626
- RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
627
- rb_mem_clear(st->as.ary, n);
628
- } else {
629
- st->as.heap.ptr = ALLOC_N(VALUE, n);
630
- rb_mem_clear(st->as.heap.ptr, n);
631
- st->as.heap.len = n;
632
- }
633
- #endif
686
+ VALUE obj;
687
+ struct hpricot_struct* st;
634
688
 
635
- return (VALUE)st;
689
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
690
+
691
+ st->len = 8;
692
+ st->ptr = ALLOC_N(VALUE, 8);
693
+
694
+ rb_mem_clear(st->ptr, 8);
695
+
696
+ return obj;
697
+ }
698
+
699
+ static VALUE
700
+ alloc_hpricot_struct2(VALUE klass)
701
+ {
702
+ VALUE obj;
703
+ struct hpricot_struct* st;
704
+
705
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
706
+
707
+ st->len = 2;
708
+ st->ptr = ALLOC_N(VALUE, 2);
709
+
710
+ rb_mem_clear(st->ptr, 2);
711
+
712
+ return obj;
713
+ }
714
+
715
+ static VALUE
716
+ alloc_hpricot_struct3(VALUE klass)
717
+ {
718
+ VALUE obj;
719
+ struct hpricot_struct* st;
720
+
721
+ obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
722
+
723
+ st->len = 3;
724
+ st->ptr = ALLOC_N(VALUE, 3);
725
+
726
+ rb_mem_clear(st->ptr, 3);
727
+
728
+ return obj;
636
729
  }
637
730
 
638
731
  static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
@@ -684,17 +777,28 @@ static VALUE (*set_func[10])() = {
684
777
  };
685
778
 
686
779
  static VALUE
687
- make_hpricot_struct(VALUE members)
780
+ make_hpricot_struct(VALUE members, VALUE (*alloc)(VALUE klass))
688
781
  {
689
782
  int i = 0;
783
+ char attr_set[128];
784
+
690
785
  VALUE klass = rb_class_new(rb_cObject);
691
- rb_iv_set(klass, "__size__", INT2NUM(RARRAY_LEN(members)));
692
- rb_define_alloc_func(klass, alloc_hpricot_struct);
693
- rb_define_singleton_method(klass, "new", rb_class_new_instance, -1);
694
- for (i = 0; i < RARRAY_LEN(members); i++) {
695
- ID id = SYM2ID(RARRAY_PTR(members)[i]);
696
- rb_define_method_id(klass, id, ref_func[i], 0);
697
- rb_define_method_id(klass, rb_id_attrset(id), set_func[i], 1);
786
+ rb_define_alloc_func(klass, alloc);
787
+
788
+ int len = RARRAY_LEN(members);
789
+ assert(len < 10);
790
+
791
+ for (i = 0; i < len; i++) {
792
+ ID id = SYM2ID(rb_ary_entry(members, i));
793
+ const char* name = rb_id2name(id);
794
+ int len = strlen(name);
795
+
796
+ memcpy(attr_set, name, strlen(name));
797
+ attr_set[len] = '=';
798
+ attr_set[len+1] = 0;
799
+
800
+ rb_define_method(klass, name, ref_func[i], 0);
801
+ rb_define_method(klass, attr_set, set_func[i], 1);
698
802
  }
699
803
  return klass;
700
804
  }
@@ -738,9 +842,13 @@ void Init_hpricot_scan()
738
842
 
739
843
  structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
740
844
  sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
741
- sym_tagno, sym_children));
742
- structAttr = make_hpricot_struct(rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes));
743
- structBasic = make_hpricot_struct(rb_ary_new3(2, sym_name, sym_parent));
845
+ sym_tagno, sym_children), alloc_hpricot_struct8);
846
+ structAttr = make_hpricot_struct(
847
+ rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes),
848
+ alloc_hpricot_struct3);
849
+ structBasic = make_hpricot_struct(
850
+ rb_ary_new3(2, sym_name, sym_parent),
851
+ alloc_hpricot_struct2);
744
852
 
745
853
  cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
746
854
  cCData = rb_define_class_under(mHpricot, "CData", structBasic);