hpricot 0.7-x86-mswin32 → 0.8-x86-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/Rakefile +1 -1
- data/ext/fast_xs/fast_xs.c +1 -0
- data/ext/hpricot_scan/hpricot_css.c +2109 -2105
- data/ext/hpricot_scan/hpricot_scan.c +944 -880
- data/ext/hpricot_scan/hpricot_scan.rl +255 -191
- data/lib/fast_xs.so +0 -0
- data/lib/hpricot/inspect.rb +2 -2
- data/lib/hpricot/modules.rb +2 -0
- data/lib/hpricot/tag.rb +12 -10
- data/lib/hpricot_scan.so +0 -0
- data/test/test_parser.rb +11 -0
- metadata +2 -2
@@ -19,20 +19,26 @@ VALUE hpricot_css(VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
19
19
|
#define NO_WAY_SERIOUSLY "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!"
|
20
20
|
|
21
21
|
static VALUE sym_xmldecl, sym_doctype, sym_procins, sym_stag, sym_etag, sym_emptytag, sym_comment,
|
22
|
-
sym_cdata,
|
22
|
+
sym_cdata, sym_name, sym_parent, sym_raw_attributes, sym_raw_string, sym_tagno,
|
23
|
+
sym_allowed, sym_text, sym_children, sym_EMPTY, sym_CDATA;
|
23
24
|
static VALUE mHpricot, rb_eHpricotParseError;
|
24
|
-
static VALUE
|
25
|
+
static VALUE cBogusETag, cCData, cComment, cDoc, cDocType, cElem, cText,
|
25
26
|
cXMLDecl, cProcIns, symAllow, symDeny;
|
26
27
|
static ID s_ElementContent;
|
27
28
|
static ID s_downcase, s_new, s_parent, s_read, s_to_str;
|
28
|
-
static ID iv_parent;
|
29
29
|
static VALUE reProcInsParse;
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
#define H_ELE_TAG 0
|
32
|
+
#define H_ELE_PARENT 1
|
33
|
+
#define H_ELE_ATTR 2
|
34
|
+
#define H_ELE_ETAG 3
|
35
|
+
#define H_ELE_RAW 4
|
36
|
+
#define H_ELE_EC 5
|
37
|
+
#define H_ELE_HASH 6
|
38
|
+
#define H_ELE_CHILDREN 7
|
39
|
+
|
40
|
+
#define H_ELE_GET(ele, idx) RSTRUCT_PTR(ele)[idx]
|
41
|
+
#define H_ELE_SET(ele, idx, val) RSTRUCT_PTR(ele)[idx] = val
|
36
42
|
|
37
43
|
#define OPT(opts, key) (!NIL_P(opts) && RTEST(rb_hash_aref(opts, ID2SYM(rb_intern("" # key)))))
|
38
44
|
|
@@ -107,7 +113,7 @@ typedef struct {
|
|
107
113
|
action tag { SET(tag, p); }
|
108
114
|
action tagc { SET(tag, p-1); }
|
109
115
|
action aval { SET(aval, p); }
|
110
|
-
action aunq {
|
116
|
+
action aunq {
|
111
117
|
if (*(p-1) == '"' || *(p-1) == '\'') { SET(aval, p-1); }
|
112
118
|
else { SET(aval, p); }
|
113
119
|
}
|
@@ -118,14 +124,14 @@ typedef struct {
|
|
118
124
|
action pubid { SET(aval, p); ATTR(ID2SYM(rb_intern("public_id")), aval); }
|
119
125
|
action sysid { SET(aval, p); ATTR(ID2SYM(rb_intern("system_id")), aval); }
|
120
126
|
|
121
|
-
action new_attr {
|
127
|
+
action new_attr {
|
122
128
|
akey = Qnil;
|
123
129
|
aval = Qnil;
|
124
130
|
mark_akey = NULL;
|
125
131
|
mark_aval = NULL;
|
126
132
|
}
|
127
133
|
|
128
|
-
action save_attr {
|
134
|
+
action save_attr {
|
129
135
|
ATTR(akey, aval);
|
130
136
|
}
|
131
137
|
|
@@ -144,7 +150,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
|
|
144
150
|
raw = tag;
|
145
151
|
}
|
146
152
|
ary = rb_ary_new3(4, sym, tag, attr, raw);
|
147
|
-
if (taint) {
|
153
|
+
if (taint) {
|
148
154
|
OBJ_TAINT(ary);
|
149
155
|
OBJ_TAINT(tag);
|
150
156
|
OBJ_TAINT(attr);
|
@@ -153,6 +159,7 @@ void rb_yield_tokens(VALUE sym, VALUE tag, VALUE attr, VALUE raw, int taint)
|
|
153
159
|
rb_yield(ary);
|
154
160
|
}
|
155
161
|
|
162
|
+
#ifndef RHASH_TBL
|
156
163
|
/* rb_hash_lookup() is only in Ruby 1.8.7 */
|
157
164
|
static VALUE
|
158
165
|
our_rb_hash_lookup(VALUE hash, VALUE key)
|
@@ -165,17 +172,17 @@ our_rb_hash_lookup(VALUE hash, VALUE key)
|
|
165
172
|
|
166
173
|
return val;
|
167
174
|
}
|
175
|
+
#define rb_hash_lookup our_rb_hash_lookup
|
176
|
+
#endif
|
168
177
|
|
169
178
|
static void
|
170
179
|
rb_hpricot_add(VALUE focus, VALUE ele)
|
171
180
|
{
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
rb_ary_push(he->children, ele);
|
178
|
-
he2->parent = focus;
|
181
|
+
VALUE children = H_ELE_GET(focus, H_ELE_CHILDREN);
|
182
|
+
if (NIL_P(children))
|
183
|
+
H_ELE_SET(focus, H_ELE_CHILDREN, (children = rb_ary_new2(1)));
|
184
|
+
rb_ary_push(children, ele);
|
185
|
+
H_ELE_SET(ele, H_ELE_PARENT, focus);
|
179
186
|
}
|
180
187
|
|
181
188
|
typedef struct {
|
@@ -186,102 +193,70 @@ typedef struct {
|
|
186
193
|
unsigned char xml, strict, fixup;
|
187
194
|
} hpricot_state;
|
188
195
|
|
189
|
-
|
190
|
-
hpricot_ele_mark(hpricot_ele *he)
|
191
|
-
{
|
192
|
-
rb_gc_mark(he->tag);
|
193
|
-
rb_gc_mark(he->attr);
|
194
|
-
rb_gc_mark(he->etag);
|
195
|
-
rb_gc_mark(he->raw);
|
196
|
-
rb_gc_mark(he->parent);
|
197
|
-
rb_gc_mark(he->children);
|
198
|
-
}
|
199
|
-
|
200
|
-
static void
|
201
|
-
hpricot_ele_free(hpricot_ele *he)
|
202
|
-
{
|
203
|
-
free(he);
|
204
|
-
}
|
205
|
-
|
206
|
-
#define H_PROP(prop) \
|
196
|
+
#define H_PROP(prop, idx) \
|
207
197
|
static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
|
208
|
-
|
209
|
-
Data_Get_Struct(self, hpricot_ele, he); \
|
210
|
-
he->prop = x; \
|
198
|
+
H_ELE_SET(self, idx, x); \
|
211
199
|
return self; \
|
212
200
|
} \
|
201
|
+
static VALUE hpricot_ele_clear_##prop(VALUE self) { \
|
202
|
+
H_ELE_SET(self, idx, Qnil); \
|
203
|
+
return Qtrue; \
|
204
|
+
} \
|
213
205
|
static VALUE hpricot_ele_get_##prop(VALUE self) { \
|
214
|
-
|
215
|
-
Data_Get_Struct(self, hpricot_ele, he); \
|
216
|
-
return he->prop; \
|
206
|
+
return H_ELE_GET(self, idx); \
|
217
207
|
}
|
218
208
|
|
219
209
|
#define H_ATTR(prop) \
|
220
210
|
static VALUE hpricot_ele_set_##prop(VALUE self, VALUE x) { \
|
221
|
-
|
222
|
-
Data_Get_Struct(self, hpricot_ele, he); \
|
223
|
-
rb_hash_aset(he->attr, ID2SYM(rb_intern("" # prop)), x); \
|
211
|
+
rb_hash_aset(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop)), x); \
|
224
212
|
return self; \
|
225
213
|
} \
|
226
214
|
static VALUE hpricot_ele_get_##prop(VALUE self) { \
|
227
|
-
|
228
|
-
Data_Get_Struct(self, hpricot_ele, he); \
|
229
|
-
return rb_hash_aref(he->attr, ID2SYM(rb_intern("" # prop))); \
|
215
|
+
return rb_hash_aref(H_ELE_GET(self, H_ELE_ATTR), ID2SYM(rb_intern("" # prop))); \
|
230
216
|
}
|
231
217
|
|
232
|
-
H_PROP(
|
233
|
-
H_PROP(
|
234
|
-
H_PROP(
|
235
|
-
H_PROP(
|
236
|
-
H_PROP(
|
218
|
+
H_PROP(name, H_ELE_TAG);
|
219
|
+
H_PROP(raw, H_ELE_RAW);
|
220
|
+
H_PROP(parent, H_ELE_PARENT);
|
221
|
+
H_PROP(attr, H_ELE_ATTR);
|
222
|
+
H_PROP(etag, H_ELE_ETAG);
|
223
|
+
H_PROP(children, H_ELE_CHILDREN);
|
224
|
+
H_ATTR(target);
|
237
225
|
H_ATTR(encoding);
|
238
226
|
H_ATTR(version);
|
239
227
|
H_ATTR(standalone);
|
240
228
|
H_ATTR(system_id);
|
241
229
|
H_ATTR(public_id);
|
242
230
|
|
243
|
-
static VALUE
|
244
|
-
hpricot_ele_get_raw(VALUE self, VALUE x) {
|
245
|
-
hpricot_ele *he;
|
246
|
-
Data_Get_Struct(self, hpricot_ele, he);
|
247
|
-
return he->raw;
|
248
|
-
}
|
249
|
-
|
250
|
-
static VALUE
|
251
|
-
hpricot_ele_clear_raw(VALUE self)
|
252
|
-
{
|
253
|
-
hpricot_ele *he;
|
254
|
-
Data_Get_Struct(self, hpricot_ele, he);
|
255
|
-
he->raw = Qnil;
|
256
|
-
return Qtrue;
|
257
|
-
}
|
258
|
-
|
259
231
|
#define H_ELE(klass) \
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
232
|
+
ele = rb_obj_alloc(klass); \
|
233
|
+
if (klass == cElem) { \
|
234
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
235
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr); \
|
236
|
+
H_ELE_SET(ele, H_ELE_EC, ec); \
|
237
|
+
if (raw != NULL && (sym == sym_emptytag || sym == sym_stag || sym == sym_doctype)) { \
|
238
|
+
H_ELE_SET(ele, H_ELE_RAW, rb_str_new(raw, rawlen)); \
|
239
|
+
} \
|
240
|
+
} else if (klass == cDocType || klass == cProcIns || klass == cXMLDecl || klass == cBogusETag) { \
|
241
|
+
if (klass == cBogusETag) { \
|
242
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
243
|
+
if (raw != NULL) \
|
244
|
+
H_ELE_SET(ele, H_ELE_ATTR, rb_str_new(raw, rawlen)); \
|
245
|
+
} else { \
|
246
|
+
if (klass == cDocType) \
|
247
|
+
ATTR(ID2SYM(rb_intern("target")), tag); \
|
248
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr); \
|
249
|
+
if (klass != cProcIns) { \
|
250
|
+
tag = Qnil; \
|
251
|
+
if (raw != NULL) tag = rb_str_new(raw, rawlen); \
|
252
|
+
} \
|
253
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
254
|
+
} \
|
255
|
+
} else { \
|
256
|
+
H_ELE_SET(ele, H_ELE_TAG, tag); \
|
269
257
|
} \
|
270
|
-
ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he); \
|
271
258
|
S->last = ele
|
272
259
|
|
273
|
-
VALUE
|
274
|
-
hpricot_ele_alloc(VALUE klass)
|
275
|
-
{
|
276
|
-
VALUE ele;
|
277
|
-
hpricot_ele *he = ALLOC(hpricot_ele);
|
278
|
-
he->name = 0;
|
279
|
-
he->tag = he->attr = he->raw = he->EC = Qnil;
|
280
|
-
he->etag = he->parent = he->children = Qnil;
|
281
|
-
ele = Data_Wrap_Struct(klass, hpricot_ele_mark, hpricot_ele_free, he);
|
282
|
-
return ele;
|
283
|
-
}
|
284
|
-
|
285
260
|
//
|
286
261
|
// the swift, compact parser logic. most of the complicated stuff is done
|
287
262
|
// in the lexer. this step just pairs up the start and end tags.
|
@@ -295,22 +270,23 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
295
270
|
// in html mode, fix up start tags incorrectly formed as empty tags
|
296
271
|
//
|
297
272
|
if (!S->xml) {
|
298
|
-
hpricot_ele *last;
|
299
|
-
Data_Get_Struct(S->focus, hpricot_ele, last);
|
300
|
-
if (last->EC == sym_CDATA &&
|
301
|
-
(sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
|
302
|
-
!(sym == sym_etag && rb_str_hash(tag) == last->name))
|
303
|
-
{
|
304
|
-
sym = sym_text;
|
305
|
-
tag = rb_str_new(raw, rawlen);
|
306
|
-
}
|
307
|
-
|
308
273
|
if (sym == sym_emptytag || sym == sym_stag || sym == sym_etag) {
|
309
274
|
ec = rb_hash_aref(S->EC, tag);
|
310
275
|
if (NIL_P(ec)) {
|
311
276
|
tag = rb_funcall(tag, s_downcase, 0);
|
312
277
|
ec = rb_hash_aref(S->EC, tag);
|
313
278
|
}
|
279
|
+
}
|
280
|
+
|
281
|
+
if (H_ELE_GET(S->focus, H_ELE_EC) == sym_CDATA &&
|
282
|
+
(sym != sym_procins && sym != sym_comment && sym != sym_cdata && sym != sym_text) &&
|
283
|
+
!(sym == sym_etag && INT2NUM(rb_str_hash(tag)) == H_ELE_GET(S->focus, H_ELE_HASH)))
|
284
|
+
{
|
285
|
+
sym = sym_text;
|
286
|
+
tag = rb_str_new(raw, rawlen);
|
287
|
+
}
|
288
|
+
|
289
|
+
if (!NIL_P(ec)) {
|
314
290
|
if (sym == sym_emptytag) {
|
315
291
|
if (ec != sym_EMPTY)
|
316
292
|
sym = sym_stag;
|
@@ -322,19 +298,19 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
322
298
|
}
|
323
299
|
|
324
300
|
if (sym == sym_emptytag || sym == sym_stag) {
|
301
|
+
VALUE name = INT2NUM(rb_str_hash(tag));
|
325
302
|
H_ELE(cElem);
|
326
|
-
|
303
|
+
H_ELE_SET(ele, H_ELE_HASH, name);
|
327
304
|
|
328
305
|
if (!S->xml) {
|
329
306
|
VALUE match = Qnil, e = S->focus;
|
330
307
|
while (e != S->doc)
|
331
308
|
{
|
332
|
-
|
333
|
-
Data_Get_Struct(e, hpricot_ele, hee);
|
309
|
+
VALUE hEC = H_ELE_GET(e, H_ELE_EC);
|
334
310
|
|
335
|
-
if (TYPE(
|
311
|
+
if (TYPE(hEC) == T_HASH)
|
336
312
|
{
|
337
|
-
VALUE has =
|
313
|
+
VALUE has = rb_hash_lookup(hEC, name);
|
338
314
|
if (has != Qnil) {
|
339
315
|
if (has == Qtrue) {
|
340
316
|
if (match == Qnil)
|
@@ -347,7 +323,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
347
323
|
}
|
348
324
|
}
|
349
325
|
|
350
|
-
e =
|
326
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
351
327
|
}
|
352
328
|
|
353
329
|
if (match == Qnil)
|
@@ -369,8 +345,7 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
369
345
|
}
|
370
346
|
}
|
371
347
|
} else if (sym == sym_etag) {
|
372
|
-
|
373
|
-
VALUE match = Qnil, e = S->focus;
|
348
|
+
VALUE name, match = Qnil, e = S->focus;
|
374
349
|
if (S->strict) {
|
375
350
|
if (NIL_P(rb_hash_aref(S->EC, tag))) {
|
376
351
|
tag = rb_str_new2("div");
|
@@ -383,19 +358,16 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
383
358
|
//
|
384
359
|
// (see also: the search above for fixups)
|
385
360
|
//
|
386
|
-
name = rb_str_hash(tag);
|
361
|
+
name = INT2NUM(rb_str_hash(tag));
|
387
362
|
while (e != S->doc)
|
388
363
|
{
|
389
|
-
|
390
|
-
Data_Get_Struct(e, hpricot_ele, he);
|
391
|
-
|
392
|
-
if (he->name == name)
|
364
|
+
if (H_ELE_GET(e, H_ELE_HASH) == name)
|
393
365
|
{
|
394
366
|
match = e;
|
395
367
|
break;
|
396
368
|
}
|
397
369
|
|
398
|
-
e =
|
370
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
399
371
|
}
|
400
372
|
|
401
373
|
if (NIL_P(match))
|
@@ -405,10 +377,11 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
405
377
|
}
|
406
378
|
else
|
407
379
|
{
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
380
|
+
VALUE ele = Qnil;
|
381
|
+
if (raw != NULL)
|
382
|
+
ele = rb_str_new(raw, rawlen);
|
383
|
+
H_ELE_SET(match, H_ELE_ETAG, ele);
|
384
|
+
S->focus = H_ELE_GET(match, H_ELE_PARENT);
|
412
385
|
S->last = Qnil;
|
413
386
|
}
|
414
387
|
} else if (sym == sym_cdata) {
|
@@ -429,15 +402,13 @@ rb_hpricot_token(hpricot_state *S, VALUE sym, VALUE tag, VALUE attr, char *raw,
|
|
429
402
|
tag = rb_reg_nth_match(1, match);
|
430
403
|
attr = rb_reg_nth_match(2, match);
|
431
404
|
{
|
432
|
-
|
433
|
-
|
405
|
+
H_ELE(cProcIns);
|
406
|
+
rb_hpricot_add(S->focus, ele);
|
434
407
|
}
|
435
408
|
} else if (sym == sym_text) {
|
436
409
|
// TODO: add raw_string as well?
|
437
410
|
if (!NIL_P(S->last) && RBASIC(S->last)->klass == cText) {
|
438
|
-
|
439
|
-
Data_Get_Struct(S->last, hpricot_ele, he);
|
440
|
-
rb_str_append(he->tag, tag);
|
411
|
+
rb_str_append(H_ELE_GET(S->last, H_ELE_TAG), tag);
|
441
412
|
} else {
|
442
413
|
H_ELE(cText);
|
443
414
|
rb_hpricot_add(S->focus, ele);
|
@@ -480,11 +451,8 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
480
451
|
|
481
452
|
if (!rb_block_given_p())
|
482
453
|
{
|
483
|
-
hpricot_ele *he = ALLOC(hpricot_ele);
|
484
454
|
S = ALLOC(hpricot_state);
|
485
|
-
|
486
|
-
he->tag = he->attr = he->etag = he->parent = he->children = Qnil;
|
487
|
-
S->doc = Data_Wrap_Struct(cDoc, hpricot_ele_mark, hpricot_ele_free, he);
|
455
|
+
S->doc = rb_obj_alloc(cDoc);
|
488
456
|
rb_gc_register_address(&S->doc);
|
489
457
|
S->focus = S->doc;
|
490
458
|
S->last = Qnil;
|
@@ -509,7 +477,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
509
477
|
buf = ALLOC_N(char, buffer_size);
|
510
478
|
|
511
479
|
%% write init;
|
512
|
-
|
480
|
+
|
513
481
|
while (!done) {
|
514
482
|
VALUE str;
|
515
483
|
char *p, *pe;
|
@@ -561,7 +529,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
561
529
|
|
562
530
|
pe = p + len;
|
563
531
|
%% write exec;
|
564
|
-
|
532
|
+
|
565
533
|
if (cs == hpricot_scan_error) {
|
566
534
|
if (buf != NULL)
|
567
535
|
free(buf);
|
@@ -574,7 +542,7 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
574
542
|
rb_raise(rb_eHpricotParseError, "parse error on line %d.\n" NO_WAY_SERIOUSLY, curline);
|
575
543
|
}
|
576
544
|
}
|
577
|
-
|
545
|
+
|
578
546
|
if (done && ele_open)
|
579
547
|
{
|
580
548
|
ele_open = 0;
|
@@ -635,66 +603,103 @@ VALUE hpricot_scan(int argc, VALUE *argv, VALUE self)
|
|
635
603
|
return Qnil;
|
636
604
|
}
|
637
605
|
|
638
|
-
|
606
|
+
static VALUE
|
607
|
+
alloc_hpricot_struct(VALUE klass)
|
639
608
|
{
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
609
|
+
VALUE size;
|
610
|
+
long n;
|
611
|
+
NEWOBJ(st, struct RStruct);
|
612
|
+
OBJSETUP(st, klass, T_STRUCT);
|
613
|
+
|
614
|
+
size = rb_struct_iv_get(klass, "__size__");
|
615
|
+
n = FIX2LONG(size);
|
616
|
+
|
617
|
+
#ifndef RSTRUCT_EMBED_LEN_MAX
|
618
|
+
st->ptr = ALLOC_N(VALUE, n);
|
619
|
+
rb_mem_clear(st->ptr, n);
|
620
|
+
st->len = n;
|
621
|
+
#else
|
622
|
+
if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) {
|
623
|
+
RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
|
624
|
+
RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
|
625
|
+
rb_mem_clear(st->as.ary, n);
|
626
|
+
} else {
|
627
|
+
st->as.heap.ptr = ALLOC_N(VALUE, n);
|
628
|
+
rb_mem_clear(st->as.heap.ptr, n);
|
629
|
+
st->as.heap.len = n;
|
630
|
+
}
|
631
|
+
#endif
|
645
632
|
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
633
|
+
return (VALUE)st;
|
634
|
+
}
|
635
|
+
|
636
|
+
static VALUE hpricot_struct_ref0(VALUE obj) {return H_ELE_GET(obj, 0);}
|
637
|
+
static VALUE hpricot_struct_ref1(VALUE obj) {return H_ELE_GET(obj, 1);}
|
638
|
+
static VALUE hpricot_struct_ref2(VALUE obj) {return H_ELE_GET(obj, 2);}
|
639
|
+
static VALUE hpricot_struct_ref3(VALUE obj) {return H_ELE_GET(obj, 3);}
|
640
|
+
static VALUE hpricot_struct_ref4(VALUE obj) {return H_ELE_GET(obj, 4);}
|
641
|
+
static VALUE hpricot_struct_ref5(VALUE obj) {return H_ELE_GET(obj, 5);}
|
642
|
+
static VALUE hpricot_struct_ref6(VALUE obj) {return H_ELE_GET(obj, 6);}
|
643
|
+
static VALUE hpricot_struct_ref7(VALUE obj) {return H_ELE_GET(obj, 7);}
|
644
|
+
static VALUE hpricot_struct_ref8(VALUE obj) {return H_ELE_GET(obj, 8);}
|
645
|
+
static VALUE hpricot_struct_ref9(VALUE obj) {return H_ELE_GET(obj, 9);}
|
646
|
+
|
647
|
+
static VALUE (*ref_func[10])() = {
|
648
|
+
hpricot_struct_ref0,
|
649
|
+
hpricot_struct_ref1,
|
650
|
+
hpricot_struct_ref2,
|
651
|
+
hpricot_struct_ref3,
|
652
|
+
hpricot_struct_ref4,
|
653
|
+
hpricot_struct_ref5,
|
654
|
+
hpricot_struct_ref6,
|
655
|
+
hpricot_struct_ref7,
|
656
|
+
hpricot_struct_ref8,
|
657
|
+
hpricot_struct_ref9,
|
658
|
+
};
|
659
|
+
|
660
|
+
static VALUE hpricot_struct_set0(VALUE obj, VALUE val) {return H_ELE_SET(obj, 0, val);}
|
661
|
+
static VALUE hpricot_struct_set1(VALUE obj, VALUE val) {return H_ELE_SET(obj, 1, val);}
|
662
|
+
static VALUE hpricot_struct_set2(VALUE obj, VALUE val) {return H_ELE_SET(obj, 2, val);}
|
663
|
+
static VALUE hpricot_struct_set3(VALUE obj, VALUE val) {return H_ELE_SET(obj, 3, val);}
|
664
|
+
static VALUE hpricot_struct_set4(VALUE obj, VALUE val) {return H_ELE_SET(obj, 4, val);}
|
665
|
+
static VALUE hpricot_struct_set5(VALUE obj, VALUE val) {return H_ELE_SET(obj, 5, val);}
|
666
|
+
static VALUE hpricot_struct_set6(VALUE obj, VALUE val) {return H_ELE_SET(obj, 6, val);}
|
667
|
+
static VALUE hpricot_struct_set7(VALUE obj, VALUE val) {return H_ELE_SET(obj, 7, val);}
|
668
|
+
static VALUE hpricot_struct_set8(VALUE obj, VALUE val) {return H_ELE_SET(obj, 8, val);}
|
669
|
+
static VALUE hpricot_struct_set9(VALUE obj, VALUE val) {return H_ELE_SET(obj, 9, val);}
|
670
|
+
|
671
|
+
static VALUE (*set_func[10])() = {
|
672
|
+
hpricot_struct_set0,
|
673
|
+
hpricot_struct_set1,
|
674
|
+
hpricot_struct_set2,
|
675
|
+
hpricot_struct_set3,
|
676
|
+
hpricot_struct_set4,
|
677
|
+
hpricot_struct_set5,
|
678
|
+
hpricot_struct_set6,
|
679
|
+
hpricot_struct_set7,
|
680
|
+
hpricot_struct_set8,
|
681
|
+
hpricot_struct_set9,
|
682
|
+
};
|
683
|
+
|
684
|
+
static VALUE
|
685
|
+
make_hpricot_struct(VALUE members)
|
686
|
+
{
|
687
|
+
int i = 0;
|
688
|
+
VALUE klass = rb_class_new(rb_cObject);
|
689
|
+
rb_iv_set(klass, "__size__", INT2NUM(RARRAY_LEN(members)));
|
690
|
+
rb_define_alloc_func(klass, alloc_hpricot_struct);
|
691
|
+
rb_define_singleton_method(klass, "new", rb_class_new_instance, -1);
|
692
|
+
for (i = 0; i < RARRAY_LEN(members); i++) {
|
693
|
+
ID id = SYM2ID(RARRAY_PTR(members)[i]);
|
694
|
+
rb_define_method_id(klass, id, ref_func[i], 0);
|
695
|
+
rb_define_method_id(klass, rb_id_attrset(id), set_func[i], 1);
|
696
|
+
}
|
697
|
+
return klass;
|
698
|
+
}
|
699
|
+
|
700
|
+
void Init_hpricot_scan()
|
701
|
+
{
|
702
|
+
VALUE structElem, structAttr, structBasic;
|
698
703
|
|
699
704
|
s_ElementContent = rb_intern("ElementContent");
|
700
705
|
symAllow = ID2SYM(rb_intern("allow"));
|
@@ -704,19 +709,78 @@ void Init_hpricot_scan()
|
|
704
709
|
s_parent = rb_intern("parent");
|
705
710
|
s_read = rb_intern("read");
|
706
711
|
s_to_str = rb_intern("to_str");
|
707
|
-
iv_parent = rb_intern("parent");
|
708
712
|
sym_xmldecl = ID2SYM(rb_intern("xmldecl"));
|
709
713
|
sym_doctype = ID2SYM(rb_intern("doctype"));
|
710
714
|
sym_procins = ID2SYM(rb_intern("procins"));
|
711
715
|
sym_stag = ID2SYM(rb_intern("stag"));
|
712
716
|
sym_etag = ID2SYM(rb_intern("etag"));
|
713
717
|
sym_emptytag = ID2SYM(rb_intern("emptytag"));
|
718
|
+
sym_allowed = ID2SYM(rb_intern("allowed"));
|
719
|
+
sym_children = ID2SYM(rb_intern("children"));
|
714
720
|
sym_comment = ID2SYM(rb_intern("comment"));
|
715
721
|
sym_cdata = ID2SYM(rb_intern("cdata"));
|
722
|
+
sym_name = ID2SYM(rb_intern("name"));
|
723
|
+
sym_parent = ID2SYM(rb_intern("parent"));
|
724
|
+
sym_raw_attributes = ID2SYM(rb_intern("raw_attributes"));
|
725
|
+
sym_raw_string = ID2SYM(rb_intern("raw_string"));
|
726
|
+
sym_tagno = ID2SYM(rb_intern("tagno"));
|
716
727
|
sym_text = ID2SYM(rb_intern("text"));
|
717
728
|
sym_EMPTY = ID2SYM(rb_intern("EMPTY"));
|
718
729
|
sym_CDATA = ID2SYM(rb_intern("CDATA"));
|
719
730
|
|
731
|
+
mHpricot = rb_define_module("Hpricot");
|
732
|
+
rb_define_attr(rb_singleton_class(mHpricot), "buffer_size", 1, 1);
|
733
|
+
rb_define_singleton_method(mHpricot, "scan", hpricot_scan, -1);
|
734
|
+
rb_define_singleton_method(mHpricot, "css", hpricot_css, 3);
|
735
|
+
rb_eHpricotParseError = rb_define_class_under(mHpricot, "ParseError", rb_eStandardError);
|
736
|
+
|
737
|
+
structElem = make_hpricot_struct(rb_ary_new3(8, sym_name, sym_parent,
|
738
|
+
sym_raw_attributes, sym_etag, sym_raw_string, sym_allowed,
|
739
|
+
sym_tagno, sym_children));
|
740
|
+
structAttr = make_hpricot_struct(rb_ary_new3(3, sym_name, sym_parent, sym_raw_attributes));
|
741
|
+
structBasic = make_hpricot_struct(rb_ary_new3(2, sym_name, sym_parent));
|
742
|
+
|
743
|
+
cDoc = rb_define_class_under(mHpricot, "Doc", structElem);
|
744
|
+
cCData = rb_define_class_under(mHpricot, "CData", structBasic);
|
745
|
+
rb_define_method(cCData, "content", hpricot_ele_get_name, 0);
|
746
|
+
rb_define_method(cCData, "content=", hpricot_ele_set_name, 1);
|
747
|
+
cComment = rb_define_class_under(mHpricot, "Comment", structBasic);
|
748
|
+
rb_define_method(cComment, "content", hpricot_ele_get_name, 0);
|
749
|
+
rb_define_method(cComment, "content=", hpricot_ele_set_name, 1);
|
750
|
+
cDocType = rb_define_class_under(mHpricot, "DocType", structAttr);
|
751
|
+
rb_define_method(cDocType, "raw_string", hpricot_ele_get_name, 0);
|
752
|
+
rb_define_method(cDocType, "clear_raw", hpricot_ele_clear_name, 0);
|
753
|
+
rb_define_method(cDocType, "target", hpricot_ele_get_target, 0);
|
754
|
+
rb_define_method(cDocType, "target=", hpricot_ele_set_target, 1);
|
755
|
+
rb_define_method(cDocType, "public_id", hpricot_ele_get_public_id, 0);
|
756
|
+
rb_define_method(cDocType, "public_id=", hpricot_ele_set_public_id, 1);
|
757
|
+
rb_define_method(cDocType, "system_id", hpricot_ele_get_system_id, 0);
|
758
|
+
rb_define_method(cDocType, "system_id=", hpricot_ele_set_system_id, 1);
|
759
|
+
cElem = rb_define_class_under(mHpricot, "Elem", structElem);
|
760
|
+
rb_define_method(cElem, "clear_raw", hpricot_ele_clear_raw, 0);
|
761
|
+
cBogusETag = rb_define_class_under(mHpricot, "BogusETag", structAttr);
|
762
|
+
rb_define_method(cBogusETag, "raw_string", hpricot_ele_get_attr, 0);
|
763
|
+
rb_define_method(cBogusETag, "clear_raw", hpricot_ele_clear_attr, 0);
|
764
|
+
cText = rb_define_class_under(mHpricot, "Text", structBasic);
|
765
|
+
rb_define_method(cText, "raw_string", hpricot_ele_get_name, 0);
|
766
|
+
rb_define_method(cText, "clear_raw", hpricot_ele_clear_name, 0);
|
767
|
+
rb_define_method(cText, "content", hpricot_ele_get_name, 0);
|
768
|
+
rb_define_method(cText, "content=", hpricot_ele_set_name, 1);
|
769
|
+
cXMLDecl = rb_define_class_under(mHpricot, "XMLDecl", structAttr);
|
770
|
+
rb_define_method(cXMLDecl, "raw_string", hpricot_ele_get_name, 0);
|
771
|
+
rb_define_method(cXMLDecl, "clear_raw", hpricot_ele_clear_name, 0);
|
772
|
+
rb_define_method(cXMLDecl, "encoding", hpricot_ele_get_encoding, 0);
|
773
|
+
rb_define_method(cXMLDecl, "encoding=", hpricot_ele_set_encoding, 1);
|
774
|
+
rb_define_method(cXMLDecl, "standalone", hpricot_ele_get_standalone, 0);
|
775
|
+
rb_define_method(cXMLDecl, "standalone=", hpricot_ele_set_standalone, 1);
|
776
|
+
rb_define_method(cXMLDecl, "version", hpricot_ele_get_version, 0);
|
777
|
+
rb_define_method(cXMLDecl, "version=", hpricot_ele_set_version, 1);
|
778
|
+
cProcIns = rb_define_class_under(mHpricot, "ProcIns", structAttr);
|
779
|
+
rb_define_method(cProcIns, "target", hpricot_ele_get_name, 0);
|
780
|
+
rb_define_method(cProcIns, "target=", hpricot_ele_set_name, 1);
|
781
|
+
rb_define_method(cProcIns, "content", hpricot_ele_get_attr, 0);
|
782
|
+
rb_define_method(cProcIns, "content=", hpricot_ele_set_attr, 1);
|
783
|
+
|
720
784
|
rb_const_set(mHpricot, rb_intern("ProcInsParse"),
|
721
785
|
reProcInsParse = rb_eval_string("/\\A<\\?(\\S+)\\s+(.+)/m"));
|
722
786
|
}
|