hpricot 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +19 -3
- data/{README → README.md} +84 -83
- data/Rakefile +85 -123
- data/ext/fast_xs/FastXsService.java +94 -1
- data/ext/fast_xs/fast_xs.c +60 -51
- data/ext/hpricot_scan/HpricotCss.java +50 -31
- data/ext/hpricot_scan/HpricotScanService.java +77 -64
- data/ext/hpricot_scan/extconf.rb +3 -0
- data/ext/hpricot_scan/hpricot_css.c +331 -323
- data/ext/hpricot_scan/hpricot_css.rl +6 -1
- data/ext/hpricot_scan/hpricot_scan.c +922 -810
- data/ext/hpricot_scan/hpricot_scan.java.rl +22 -13
- data/ext/hpricot_scan/hpricot_scan.rl +198 -90
- data/extras/hpricot.png +0 -0
- data/lib/hpricot/builder.rb +1 -1
- data/lib/hpricot/elements.rb +28 -24
- data/lib/hpricot/htmlinfo.rb +1 -1
- data/lib/hpricot/tag.rb +2 -2
- data/lib/hpricot/tags.rb +4 -4
- data/lib/hpricot/traverse.rb +25 -25
- data/lib/hpricot/xchar.rb +3 -3
- data/test/files/basic.xhtml +1 -1
- data/test/test_parser.rb +31 -2
- metadata +21 -9
- data/extras/mingw-rbconfig.rb +0 -176
@@ -137,62 +137,62 @@ public class HpricotScanService implements BasicLibraryService {
|
|
137
137
|
|
138
138
|
// H_ATTR(target)
|
139
139
|
public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
|
140
|
-
(
|
140
|
+
H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
|
141
141
|
return self;
|
142
142
|
}
|
143
143
|
|
144
144
|
public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
|
145
|
-
return (
|
145
|
+
return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
|
146
146
|
}
|
147
147
|
|
148
148
|
// H_ATTR(encoding)
|
149
149
|
public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
|
150
|
-
((RubyHash)
|
150
|
+
((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
|
151
151
|
return self;
|
152
152
|
}
|
153
153
|
|
154
154
|
public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
|
155
|
-
return ((RubyHash)
|
155
|
+
return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
|
156
156
|
}
|
157
157
|
|
158
158
|
// H_ATTR(version)
|
159
159
|
public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
|
160
|
-
((RubyHash)
|
160
|
+
((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
|
161
161
|
return self;
|
162
162
|
}
|
163
163
|
|
164
164
|
public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
|
165
|
-
return ((RubyHash)
|
165
|
+
return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
|
166
166
|
}
|
167
167
|
|
168
168
|
// H_ATTR(standalone)
|
169
169
|
public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
|
170
|
-
((RubyHash)
|
170
|
+
((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
|
171
171
|
return self;
|
172
172
|
}
|
173
173
|
|
174
174
|
public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
|
175
|
-
return ((RubyHash)
|
175
|
+
return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
|
176
176
|
}
|
177
177
|
|
178
178
|
// H_ATTR(system_id)
|
179
179
|
public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
|
180
|
-
((RubyHash)
|
180
|
+
((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
|
181
181
|
return self;
|
182
182
|
}
|
183
183
|
|
184
184
|
public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
|
185
|
-
return ((RubyHash)
|
185
|
+
return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
|
186
186
|
}
|
187
187
|
|
188
188
|
// H_ATTR(public_id)
|
189
189
|
public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
|
190
|
-
((RubyHash)
|
190
|
+
((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
|
191
191
|
return self;
|
192
192
|
}
|
193
193
|
|
194
194
|
public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
|
195
|
-
return ((RubyHash)
|
195
|
+
return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
|
196
196
|
}
|
197
197
|
|
198
198
|
public static class Scanner {
|
@@ -950,7 +950,7 @@ public class HpricotScanService implements BasicLibraryService {
|
|
950
950
|
}
|
951
951
|
}
|
952
952
|
|
953
|
-
public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please
|
953
|
+
public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!";
|
954
954
|
|
955
955
|
public final static int H_ELE_TAG = 0;
|
956
956
|
public final static int H_ELE_PARENT = 1;
|
@@ -965,6 +965,15 @@ public class HpricotScanService implements BasicLibraryService {
|
|
965
965
|
return ((IRubyObject[])recv.dataGetStruct())[n];
|
966
966
|
}
|
967
967
|
|
968
|
+
public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
|
969
|
+
IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
|
970
|
+
if(obj.isNil()) {
|
971
|
+
obj = RubyHash.newHash(recv.getRuntime());
|
972
|
+
((IRubyObject[])recv.dataGetStruct())[n] = obj;
|
973
|
+
}
|
974
|
+
return (RubyHash)obj;
|
975
|
+
}
|
976
|
+
|
968
977
|
public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
|
969
978
|
((IRubyObject[])recv.dataGetStruct())[n] = value;
|
970
979
|
return value;
|
@@ -4,9 +4,15 @@
|
|
4
4
|
* $Author: why $
|
5
5
|
* $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $
|
6
6
|
*
|
7
|
-
* Copyright (C) 2006 why the lucky stiff
|
7
|
+
* Copyright (C) 2006, 2010 why the lucky stiff
|
8
8
|
*/
|
9
9
|
#include <ruby.h>
|
10
|
+
#include <assert.h>
|
11
|
+
|
12
|
+
struct hpricot_struct {
|
13
|
+
int len;
|
14
|
+
VALUE* ptr;
|
15
|
+
};
|
10
16
|
|
11
17
|
#ifndef RARRAY_LEN
|
12
18
|
#define RARRAY_LEN(arr) RARRAY(arr)->len
|
@@ -16,7 +22,7 @@
|
|
16
22
|
|
17
23
|
VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
|
18
24
|
|
19
|
-
#define NO_WAY_SERIOUSLY "*** This should not happen, please
|
25
|
+
#define NO_WAY_SERIOUSLY "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!"
|
20
26
|
|
21
27
|
static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
|
22
28
|
sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
|
@@ -37,32 +43,49 @@ static VALUE reProcInsParse;
|
|
37
43
|
#define H_ELE_HASH 6
|
38
44
|
#define H_ELE_CHILDREN 7
|
39
45
|
|
40
|
-
#define
|
41
|
-
|
46
|
+
#define HSTRUCT_PTR(ele) ((struct hpricot_struct*)DATA_PTR(ele))->ptr
|
47
|
+
|
48
|
+
#define H_ELE_GET(ele, idx) HSTRUCT_PTR(ele)[idx]
|
49
|
+
#define H_ELE_SET(ele, idx, val) HSTRUCT_PTR(ele)[idx] = val
|
42
50
|
|
43
51
|
#define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
|
44
52
|
|
45
|
-
#
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
53
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
54
|
+
#include <ruby/encoding.h>
|
55
|
+
# define ASSOCIATE_INDEX(s) rb_enc_associate_index((s), encoding_index)
|
56
|
+
# define ENCODING_INDEX , encoding_index
|
57
|
+
#else
|
58
|
+
# define ASSOCIATE_INDEX(s)
|
59
|
+
# define ENCODING_INDEX
|
60
|
+
#endif
|
61
|
+
|
62
|
+
#define ELE(N) \
|
63
|
+
if (te > ts || text == 1) { \
|
64
|
+
char *raw = NULL; \
|
65
|
+
int rawlen = 0; \
|
66
|
+
ele_open = 0; text = 0; \
|
50
67
|
if (ts != 0 && sym_##N != sym_cdata && sym_##N != sym_text && sym_##N != sym_procins && sym_##N != sym_comment) { \
|
51
|
-
raw = ts; rawlen = te - ts;
|
52
|
-
}
|
53
|
-
if (rb_block_given_p()) {
|
54
|
-
VALUE raw_string = Qnil;
|
55
|
-
if (raw != NULL)
|
56
|
-
|
57
|
-
|
58
|
-
|
68
|
+
raw = ts; rawlen = te - ts; \
|
69
|
+
} \
|
70
|
+
if (rb_block_given_p()) { \
|
71
|
+
VALUE raw_string = Qnil; \
|
72
|
+
if (raw != NULL) { \
|
73
|
+
raw_string = rb_str_new(raw, rawlen); \
|
74
|
+
ASSOCIATE_INDEX(raw_string); \
|
75
|
+
} \
|
76
|
+
rb_yield_tokens(sym_##N, tag, attr, Qnil, taint); \
|
77
|
+
} else \
|
78
|
+
rb_hpricot_token(S, sym_##N, tag, attr, raw, rawlen, taint ENCODING_INDEX); \
|
59
79
|
}
|
60
80
|
|
61
|
-
#define SET(N, E)
|
62
|
-
if (mark_##N == NULL || E == mark_##N) \
|
63
|
-
N = rb_str_new2("");
|
64
|
-
|
65
|
-
|
81
|
+
#define SET(N, E) \
|
82
|
+
if (mark_##N == NULL || E == mark_##N) { \
|
83
|
+
N = rb_str_new2(""); \
|
84
|
+
ASSOCIATE_INDEX(N); \
|
85
|
+
} else if (E > mark_##N) { \
|
86
|
+
N = rb_str_new(mark_##N, E - mark_##N); \
|
87
|
+
ASSOCIATE_INDEX(N); \
|
88
|
+
}
|
66
89
|
|
67
90
|
#define CAT(N, E) if (NIL_P(N)) { SET(N, E); } else { rb_str_cat(N, mark_##N, E - mark_##N); }
|
68
91
|
|
@@ -119,7 +142,18 @@ static VALUE reProcInsParse;
|
|
119
142
|
}
|
120
143
|
action akey { SET(akey, p); }
|
121
144
|
action xmlver { SET(aval, p); ATTR(ID2SYM(rb_intern("version")), aval); }
|
122
|
-
action xmlenc {
|
145
|
+
action xmlenc {
|
146
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
147
|
+
if (mark_aval < p) {
|
148
|
+
char psave = *p;
|
149
|
+
*p = '\0';
|
150
|
+
encoding_index = rb_enc_find_index(mark_aval);
|
151
|
+
*p = psave;
|
152
|
+
}
|
153
|
+
#endif
|
154
|
+
SET(aval, p);
|
155
|
+
ATTR(ID2SYM(rb_intern("encoding")), aval);
|
156
|
+
}
|
123
157
|
action xmlsd { SET(aval, p); ATTR(ID2SYM(rb_intern("standalone")), aval); }
|
124
158
|
action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
|
125
159
|
action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
|
@@ -132,7 +166,7 @@ static VALUE reProcInsParse;
|
|
132
166
|
}
|
133
167
|
|
134
168
|
action save_attr {
|
135
|
-
if (!S->xml)
|
169
|
+
if (!S->xml && !NIL_P(akey))
|
136
170
|
akey = rb_funcall(akey, s_downcase, 0);
|
137
171
|
ATTR(akey, aval);
|
138
172
|
}
|
@@ -230,33 +264,41 @@ H_ATTR(standalone);
|
|
230
264
|
H_ATTR(system_id);
|
231
265
|
H_ATTR(public_id);
|
232
266
|
|
233
|
-
#define H_ELE(klass)
|
234
|
-
ele = rb_obj_alloc(klass);
|
235
|
-
if (klass == cElem) {
|
236
|
-
H_ELE_SET(ele, H_ELE_TAG, tag);
|
237
|
-
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
238
|
-
H_ELE_SET(ele, H_ELE_EC, ec);
|
267
|
+
#define H_ELE(klass) \
|
268
|
+
ele = rb_obj_alloc(klass); \
|
269
|
+
if (klass == cElem) { \
|
270
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
271
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr); \
|
272
|
+
H_ELE_SET(ele, H_ELE_EC, ec); \
|
239
273
|
if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
|
240
|
-
|
241
|
-
|
274
|
+
VALUE raw_str = rb_str_new(raw, rawlen); \
|
275
|
+
ASSOCIATE_INDEX(raw_str); \
|
276
|
+
H_ELE_SET(ele, H_ELE_RAW, raw_str); \
|
277
|
+
} \
|
242
278
|
} else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
|
243
|
-
if (klass == cBogusETag) {
|
244
|
-
H_ELE_SET(ele, H_ELE_TAG, tag);
|
245
|
-
if (raw != NULL) \
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
if (klass
|
252
|
-
tag
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
279
|
+
if (klass == cBogusETag) { \
|
280
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
281
|
+
if (raw != NULL) { \
|
282
|
+
VALUE raw_str = rb_str_new(raw, rawlen); \
|
283
|
+
ASSOCIATE_INDEX(raw_str); \
|
284
|
+
H_ELE_SET(ele, H_ELE_ATTR, raw_str); \
|
285
|
+
} \
|
286
|
+
} else { \
|
287
|
+
if (klass == cDocType) \
|
288
|
+
ATTR(ID2SYM(rb_intern("target")), tag); \
|
289
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr); \
|
290
|
+
if (klass != cProcIns) { \
|
291
|
+
tag = Qnil; \
|
292
|
+
if (raw != NULL) { \
|
293
|
+
tag = rb_str_new(raw, rawlen); \
|
294
|
+
ASSOCIATE_INDEX(tag); \
|
295
|
+
} \
|
296
|
+
} \
|
297
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
298
|
+
} \
|
299
|
+
} else { \
|
300
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
301
|
+
} \
|
260
302
|
S->last = ele
|
261
303
|
|
262
304
|
//
|
@@ -264,7 +306,12 @@ H_ATTR(public_id);
|
|
264
306
|
// in the lexer. this step just pairs up the start and end tags.
|
265
307
|
//
|
266
308
|
void
|
267
|
-
rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr,
|
309
|
+
rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr,
|
310
|
+
char *raw, int rawlen, int taint
|
311
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
312
|
+
, int encoding_index
|
313
|
+
#endif
|
314
|
+
)
|
268
315
|
{
|
269
316
|
VALUE ele, ec = Qnil;
|
270
317
|
|
@@ -286,6 +333,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
286
333
|
{
|
287
334
|
sym = sym_text;
|
288
335
|
tag = rb_str_new(raw, rawlen);
|
336
|
+
ASSOCIATE_INDEX(tag);
|
289
337
|
}
|
290
338
|
|
291
339
|
if (!NIL_P(ec)) {
|
@@ -351,6 +399,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
351
399
|
if (S->strict) {
|
352
400
|
if (NIL_P(rb_hash_aref(S->EC, tag))) {
|
353
401
|
tag = rb_str_new2("div");
|
402
|
+
ASSOCIATE_INDEX(tag);
|
354
403
|
}
|
355
404
|
}
|
356
405
|
|
@@ -380,8 +429,10 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
380
429
|
else
|
381
430
|
{
|
382
431
|
VALUE ele = Qnil;
|
383
|
-
if (raw != NULL)
|
432
|
+
if (raw != NULL) {
|
384
433
|
ele = rb_str_new(raw, rawlen);
|
434
|
+
ASSOCIATE_INDEX(ele);
|
435
|
+
}
|
385
436
|
H_ELE_SET(match, H_ELE_ETAG, ele);
|
386
437
|
S->focus = H_ELE_GET(match, H_ELE_PARENT);
|
387
438
|
S->last = Qnil;
|
@@ -395,8 +446,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
395
446
|
} else if (sym == sym_doctype) {
|
396
447
|
H_ELE(cDocType);
|
397
448
|
if (S->strict) {
|
398
|
-
|
399
|
-
|
449
|
+
VALUE id;
|
450
|
+
id = rb_str_new2("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
451
|
+
ASSOCIATE_INDEX(id);
|
452
|
+
rb_hash_aset(attr, ID2SYM(rb_intern("system_id")), id);
|
453
|
+
id = rb_str_new2("-//W3C//DTD XHTML 1.0 Strict//EN");
|
454
|
+
ASSOCIATE_INDEX(id);
|
455
|
+
rb_hash_aset(attr, ID2SYM(rb_intern("public_id")), id);
|
400
456
|
}
|
401
457
|
rb_hpricot_add(S->focus, ele);
|
402
458
|
} else if (sym == sym_procins) {
|
@@ -409,7 +465,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
409
465
|
}
|
410
466
|
} else if (sym == sym_text) {
|
411
467
|
// TODO: add raw_string as well?
|
412
|
-
if (!NIL_P(S->last) &&
|
468
|
+
if (!NIL_P(S->last) && RTEST(rb_obj_is_instance_of(S->last, cText))) {
|
413
469
|
rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
|
414
470
|
} else {
|
415
471
|
H_ELE(cText);
|
@@ -431,6 +487,9 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
431
487
|
VALUE attr = Qnil, tag = Qnil, akey = Qnil, aval = Qnil, bufsize = Qnil;
|
432
488
|
char *mark_tag = 0, *mark_akey = 0, *mark_aval = 0;
|
433
489
|
int done = 0, ele_open = 0, buffer_size = 0, taint = 0;
|
490
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
491
|
+
int encoding_index = rb_enc_to_index(rb_default_external_encoding());
|
492
|
+
#endif
|
434
493
|
|
435
494
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
436
495
|
taint = OBJ_TAINTED(port);
|
@@ -605,34 +664,68 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
605
664
|
return Qnil;
|
606
665
|
}
|
607
666
|
|
667
|
+
void hstruct_mark(void* ptr) {
|
668
|
+
struct hpricot_struct* st = (struct hpricot_struct*)ptr;
|
669
|
+
int i;
|
670
|
+
|
671
|
+
for(i = 0; i < st->len; i++) {
|
672
|
+
rb_gc_mark(st->ptr[i]);
|
673
|
+
}
|
674
|
+
}
|
675
|
+
|
676
|
+
void hstruct_free(void* ptr) {
|
677
|
+
struct hpricot_struct* st = (struct hpricot_struct*)ptr;
|
678
|
+
|
679
|
+
free(st->ptr);
|
680
|
+
free(st);
|
681
|
+
}
|
682
|
+
|
608
683
|
static VALUE
|
609
|
-
|
684
|
+
alloc_hpricot_struct8(VALUE klass)
|
610
685
|
{
|
611
|
-
VALUE
|
612
|
-
|
613
|
-
NEWOBJ(st, struct RStruct);
|
614
|
-
OBJSETUP(st, klass, T_STRUCT);
|
615
|
-
|
616
|
-
size = rb_struct_iv_get(klass, "__size__");
|
617
|
-
n = FIX2LONG(size);
|
618
|
-
|
619
|
-
#ifndef RSTRUCT_EMBED_LEN_MAX
|
620
|
-
st->ptr = ALLOC_N(VALUE, n);
|
621
|
-
rb_mem_clear(st->ptr, n);
|
622
|
-
st->len = n;
|
623
|
-
#else
|
624
|
-
if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
|
625
|
-
RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
|
626
|
-
RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
|
627
|
-
rb_mem_clear(st->as.ary, n);
|
628
|
-
} else {
|
629
|
-
st->as.heap.ptr = ALLOC_N(VALUE, n);
|
630
|
-
rb_mem_clear(st->as.heap.ptr, n);
|
631
|
-
st->as.heap.len = n;
|
632
|
-
}
|
633
|
-
#endif
|
686
|
+
VALUE obj;
|
687
|
+
struct hpricot_struct* st;
|
634
688
|
|
635
|
-
|
689
|
+
obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
|
690
|
+
|
691
|
+
st->len = 8;
|
692
|
+
st->ptr = ALLOC_N(VALUE, 8);
|
693
|
+
|
694
|
+
rb_mem_clear(st->ptr, 8);
|
695
|
+
|
696
|
+
return obj;
|
697
|
+
}
|
698
|
+
|
699
|
+
static VALUE
|
700
|
+
alloc_hpricot_struct2(VALUE klass)
|
701
|
+
{
|
702
|
+
VALUE obj;
|
703
|
+
struct hpricot_struct* st;
|
704
|
+
|
705
|
+
obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
|
706
|
+
|
707
|
+
st->len = 2;
|
708
|
+
st->ptr = ALLOC_N(VALUE, 2);
|
709
|
+
|
710
|
+
rb_mem_clear(st->ptr, 2);
|
711
|
+
|
712
|
+
return obj;
|
713
|
+
}
|
714
|
+
|
715
|
+
static VALUE
|
716
|
+
alloc_hpricot_struct3(VALUE klass)
|
717
|
+
{
|
718
|
+
VALUE obj;
|
719
|
+
struct hpricot_struct* st;
|
720
|
+
|
721
|
+
obj = Data_Make_Struct(klass, struct hpricot_struct, hstruct_mark, hstruct_free, st);
|
722
|
+
|
723
|
+
st->len = 3;
|
724
|
+
st->ptr = ALLOC_N(VALUE, 3);
|
725
|
+
|
726
|
+
rb_mem_clear(st->ptr, 3);
|
727
|
+
|
728
|
+
return obj;
|
636
729
|
}
|
637
730
|
|
638
731
|
static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
|
@@ -684,17 +777,28 @@ static VALUE (*set_func[10])() = {
|
|
684
777
|
};
|
685
778
|
|
686
779
|
static VALUE
|
687
|
-
make_hpricot_struct(VALUE members)
|
780
|
+
make_hpricot_struct(VALUE members, VALUE (*alloc)(VALUE klass))
|
688
781
|
{
|
689
782
|
int i = 0;
|
783
|
+
char attr_set[128];
|
784
|
+
|
690
785
|
VALUE klass = rb_class_new(rb_cObject);
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
786
|
+
rb_define_alloc_func(klass, alloc);
|
787
|
+
|
788
|
+
int len = RARRAY_LEN(members);
|
789
|
+
assert(len < 10);
|
790
|
+
|
791
|
+
for (i = 0; i < len; i++) {
|
792
|
+
ID id = SYM2ID(rb_ary_entry(members, i));
|
793
|
+
const char* name = rb_id2name(id);
|
794
|
+
int len = strlen(name);
|
795
|
+
|
796
|
+
memcpy(attr_set, name, strlen(name));
|
797
|
+
attr_set[len] = '=';
|
798
|
+
attr_set[len+1] = 0;
|
799
|
+
|
800
|
+
rb_define_method(klass, name, ref_func[i], 0);
|
801
|
+
rb_define_method(klass, attr_set, set_func[i], 1);
|
698
802
|
}
|
699
803
|
return klass;
|
700
804
|
}
|
@@ -738,9 +842,13 @@ void Init_hpricot_scan()
|
|
738
842
|
|
739
843
|
structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
|
740
844
|
sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
|
741
|
-
sym_tagno, sym_children));
|
742
|
-
structAttr = make_hpricot_struct(
|
743
|
-
|
845
|
+
sym_tagno, sym_children), alloc_hpricot_struct8);
|
846
|
+
structAttr = make_hpricot_struct(
|
847
|
+
rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes),
|
848
|
+
alloc_hpricot_struct3);
|
849
|
+
structBasic = make_hpricot_struct(
|
850
|
+
rb_ary_new3(2, sym_name, sym_parent),
|
851
|
+
alloc_hpricot_struct2);
|
744
852
|
|
745
853
|
cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
|
746
854
|
cCData = rb_define_class_under(mHpricot, "CData", structBasic);
|