thbar-hpricot 0.8.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/CHANGELOG +104 -0
  2. data/COPYING +18 -0
  3. data/README.md +276 -0
  4. data/Rakefile +234 -0
  5. data/ext/fast_xs/FastXsService.java +1123 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +210 -0
  8. data/ext/hpricot_scan/HpricotCss.java +850 -0
  9. data/ext/hpricot_scan/HpricotScanService.java +2099 -0
  10. data/ext/hpricot_scan/extconf.rb +9 -0
  11. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  12. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  13. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  14. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  15. data/ext/hpricot_scan/hpricot_scan.c +7045 -0
  16. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  17. data/ext/hpricot_scan/hpricot_scan.java.rl +1161 -0
  18. data/ext/hpricot_scan/hpricot_scan.rl +902 -0
  19. data/extras/hpricot.png +0 -0
  20. data/lib/hpricot.rb +26 -0
  21. data/lib/hpricot/blankslate.rb +63 -0
  22. data/lib/hpricot/builder.rb +216 -0
  23. data/lib/hpricot/elements.rb +514 -0
  24. data/lib/hpricot/htmlinfo.rb +691 -0
  25. data/lib/hpricot/inspect.rb +103 -0
  26. data/lib/hpricot/modules.rb +40 -0
  27. data/lib/hpricot/parse.rb +38 -0
  28. data/lib/hpricot/tag.rb +219 -0
  29. data/lib/hpricot/tags.rb +164 -0
  30. data/lib/hpricot/traverse.rb +839 -0
  31. data/lib/hpricot/xchar.rb +94 -0
  32. data/test/files/basic.xhtml +17 -0
  33. data/test/files/boingboing.html +2266 -0
  34. data/test/files/cy0.html +3653 -0
  35. data/test/files/immob.html +400 -0
  36. data/test/files/pace_application.html +1320 -0
  37. data/test/files/tenderlove.html +16 -0
  38. data/test/files/uswebgen.html +220 -0
  39. data/test/files/utf8.html +1054 -0
  40. data/test/files/week9.html +1723 -0
  41. data/test/files/why.xml +19 -0
  42. data/test/load_files.rb +7 -0
  43. data/test/nokogiri-bench.rb +64 -0
  44. data/test/test_alter.rb +96 -0
  45. data/test/test_builder.rb +37 -0
  46. data/test/test_parser.rb +457 -0
  47. data/test/test_paths.rb +25 -0
  48. data/test/test_preserved.rb +88 -0
  49. data/test/test_xml.rb +28 -0
  50. metadata +124 -0
@@ -0,0 +1,79 @@
1
+ /*
2
+ * hpricot_scan.h
3
+ *
4
+ * $Author: why $
5
+ * $Date: 2006-05-08 22:03:50 -0600 (Mon, 08 May 2006) $
6
+ *
7
+ * Copyright (C) 2006 why the lucky stiff
8
+ * You can redistribute it and/or modify it under the same terms as Ruby.
9
+ */
10
+
11
+ #ifndef hpricot_scan_h
12
+ #define hpricot_scan_h
13
+
14
+ #include <sys/types.h>
15
+
16
+ #if defined(_WIN32)
17
+ #include <stddef.h>
18
+ #endif
19
+
20
+ /*
21
+ * Memory Allocation
22
+ */
23
+ #if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
24
+ #include <alloca.h>
25
+ #endif
26
+
27
+ #ifndef NULL
28
+ # define NULL (void *)0
29
+ #endif
30
+
31
+ #define BUFSIZE 16384
32
+
33
+ #define S_ALLOC_N(type,n) (type*)malloc(sizeof(type)*(n))
34
+ #define S_ALLOC(type) (type*)malloc(sizeof(type))
35
+ #define S_REALLOC_N(var,type,n) (var)=(type*)realloc((char*)(var),sizeof(type)*(n))
36
+ #define S_FREE(n) free(n); n = NULL;
37
+
38
+ #define S_ALLOCA_N(type,n) (type*)alloca(sizeof(type)*(n))
39
+
40
+ #define S_MEMZERO(p,type,n) memset((p), 0, sizeof(type)*(n))
41
+ #define S_MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n))
42
+ #define S_MEMMOVE(p1,p2,type,n) memmove((p1), (p2), sizeof(type)*(n))
43
+ #define S_MEMCMP(p1,p2,type,n) memcmp((p1), (p2), sizeof(type)*(n))
44
+
45
+ typedef struct {
46
+ void *name;
47
+ void *attributes;
48
+ } hpricot_element;
49
+
50
+ typedef void (*hpricot_element_cb)(void *data, hpricot_element *token);
51
+
52
+ typedef struct hpricot_scan {
53
+ int lineno;
54
+ int cs;
55
+ size_t nread;
56
+ size_t mark;
57
+
58
+ void *data;
59
+
60
+ hpricot_element_cb xmldecl;
61
+ hpricot_element_cb doctype;
62
+ hpricot_element_cb xmlprocins;
63
+ hpricot_element_cb starttag;
64
+ hpricot_element_cb endtag;
65
+ hpricot_element_cb emptytag;
66
+ hpricot_element_cb comment;
67
+ hpricot_element_cb cdata;
68
+
69
+ } http_scan;
70
+
71
+ // int hpricot_scan_init(hpricot_scan *scan);
72
+ // int hpricot_scan_finish(hpricot_scan *scan);
73
+ // size_t hpricot_scan_execute(hpricot_scan *scan, const char *data, size_t len, size_t off);
74
+ // int hpricot_scan_has_error(hpricot_scan *scan);
75
+ // int hpricot_scan_is_finished(hpricot_scan *scan);
76
+ //
77
+ // #define hpricot_scan_nread(scan) (scan)->nread
78
+
79
+ #endif
@@ -0,0 +1,1161 @@
1
+
2
+ import java.io.IOException;
3
+
4
+ import org.jruby.Ruby;
5
+ import org.jruby.RubyArray;
6
+ import org.jruby.RubyClass;
7
+ import org.jruby.RubyHash;
8
+ import org.jruby.RubyModule;
9
+ import org.jruby.RubyNumeric;
10
+ import org.jruby.RubyObject;
11
+ import org.jruby.RubyObjectAdapter;
12
+ import org.jruby.RubyRegexp;
13
+ import org.jruby.RubyString;
14
+ import org.jruby.anno.JRubyMethod;
15
+ import org.jruby.exceptions.RaiseException;
16
+ import org.jruby.javasupport.JavaEmbedUtils;
17
+ import org.jruby.runtime.Arity;
18
+ import org.jruby.runtime.Block;
19
+ import org.jruby.runtime.ObjectAllocator;
20
+ import org.jruby.runtime.ThreadContext;
21
+ import org.jruby.runtime.builtin.IRubyObject;
22
+ import org.jruby.runtime.callback.Callback;
23
+ import org.jruby.exceptions.RaiseException;
24
+ import org.jruby.runtime.load.BasicLibraryService;
25
+ import org.jruby.util.ByteList;
26
+
27
+ public class HpricotScanService implements BasicLibraryService {
28
+ public static byte[] realloc(byte[] input, int size) {
29
+ byte[] newArray = new byte[size];
30
+ System.arraycopy(input, 0, newArray, 0, input.length);
31
+ return newArray;
32
+ }
33
+
34
+ // hpricot_state
35
+ public static class State {
36
+ public IRubyObject doc;
37
+ public IRubyObject focus;
38
+ public IRubyObject last;
39
+ public IRubyObject EC;
40
+ public boolean xml, strict, fixup;
41
+ }
42
+
43
+ static boolean OPT(IRubyObject opts, String key) {
44
+ Ruby runtime = opts.getRuntime();
45
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
46
+ }
47
+
48
+ // H_PROP(name, H_ELE_TAG)
49
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
50
+ H_ELE_SET(self, H_ELE_TAG, x);
51
+ return self;
52
+ }
53
+
54
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
55
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
56
+ return self.getRuntime().getTrue();
57
+ }
58
+
59
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
60
+ return H_ELE_GET(self, H_ELE_TAG);
61
+ }
62
+
63
+ // H_PROP(raw, H_ELE_RAW)
64
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
65
+ H_ELE_SET(self, H_ELE_RAW, x);
66
+ return self;
67
+ }
68
+
69
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
70
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
71
+ return self.getRuntime().getTrue();
72
+ }
73
+
74
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
75
+ return H_ELE_GET(self, H_ELE_RAW);
76
+ }
77
+
78
+ // H_PROP(parent, H_ELE_PARENT)
79
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
80
+ H_ELE_SET(self, H_ELE_PARENT, x);
81
+ return self;
82
+ }
83
+
84
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
85
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
86
+ return self.getRuntime().getTrue();
87
+ }
88
+
89
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
90
+ return H_ELE_GET(self, H_ELE_PARENT);
91
+ }
92
+
93
+ // H_PROP(attr, H_ELE_ATTR)
94
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
95
+ H_ELE_SET(self, H_ELE_ATTR, x);
96
+ return self;
97
+ }
98
+
99
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
100
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
101
+ return self.getRuntime().getTrue();
102
+ }
103
+
104
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
105
+ return H_ELE_GET(self, H_ELE_ATTR);
106
+ }
107
+
108
+ // H_PROP(etag, H_ELE_ETAG)
109
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
110
+ H_ELE_SET(self, H_ELE_ETAG, x);
111
+ return self;
112
+ }
113
+
114
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
115
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
116
+ return self.getRuntime().getTrue();
117
+ }
118
+
119
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
120
+ return H_ELE_GET(self, H_ELE_ETAG);
121
+ }
122
+
123
+ // H_PROP(children, H_ELE_CHILDREN)
124
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
125
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
126
+ return self;
127
+ }
128
+
129
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
130
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
131
+ return self.getRuntime().getTrue();
132
+ }
133
+
134
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
135
+ return H_ELE_GET(self, H_ELE_CHILDREN);
136
+ }
137
+
138
+ // H_ATTR(target)
139
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
140
+ H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
141
+ return self;
142
+ }
143
+
144
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
145
+ return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
146
+ }
147
+
148
+ // H_ATTR(encoding)
149
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
150
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
151
+ return self;
152
+ }
153
+
154
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
155
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
156
+ }
157
+
158
+ // H_ATTR(version)
159
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
160
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
161
+ return self;
162
+ }
163
+
164
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
165
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
166
+ }
167
+
168
+ // H_ATTR(standalone)
169
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
170
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
171
+ return self;
172
+ }
173
+
174
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
175
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
176
+ }
177
+
178
+ // H_ATTR(system_id)
179
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
180
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
181
+ return self;
182
+ }
183
+
184
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
185
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
186
+ }
187
+
188
+ // H_ATTR(public_id)
189
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
190
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
191
+ return self;
192
+ }
193
+
194
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
195
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
196
+ }
197
+
198
+ public static class Scanner {
199
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
200
+ if(mark == -1 || E == mark) {
201
+ return runtime.newString("");
202
+ } else if(E > mark) {
203
+ return RubyString.newString(runtime, data, mark, E-mark);
204
+ } else {
205
+ return org;
206
+ }
207
+ }
208
+
209
+ public int SLIDE(int N) {
210
+ if(N > ts) {
211
+ return N - ts;
212
+ } else {
213
+ return N;
214
+ }
215
+ }
216
+
217
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
218
+ if(N.isNil()) {
219
+ return SET(mark, E, N);
220
+ } else {
221
+ ((RubyString)N).cat(data, mark, E-mark);
222
+ return N;
223
+ }
224
+ }
225
+
226
+ public void ATTR(IRubyObject K, IRubyObject V) {
227
+ if(!K.isNil()) {
228
+ if(attr.isNil()) {
229
+ attr = RubyHash.newHash(runtime);
230
+ }
231
+ ((RubyHash)attr).fastASet(K, V);
232
+ }
233
+ }
234
+
235
+ public void TEXT_PASS() {
236
+ if(!text) {
237
+ if(ele_open) {
238
+ ele_open = false;
239
+ if(ts != -1) {
240
+ mark_tag = ts;
241
+ }
242
+ } else {
243
+ mark_tag = p;
244
+ }
245
+ attr = runtime.getNil();
246
+ tag = runtime.getNil();
247
+ text = true;
248
+ }
249
+ }
250
+
251
+ public void ELE(IRubyObject N) {
252
+ if(te > ts || text) {
253
+ int raw = -1;
254
+ int rawlen = 0;
255
+ ele_open = false;
256
+ text = false;
257
+
258
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
259
+ raw = ts;
260
+ rawlen = te - ts;
261
+ }
262
+
263
+ if(block.isGiven()) {
264
+ IRubyObject raw_string = runtime.getNil();
265
+ if(raw != -1) {
266
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
267
+ }
268
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
269
+ } else {
270
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
271
+ }
272
+ }
273
+ }
274
+
275
+
276
+ public void EBLK(IRubyObject N, int T) {
277
+ tag = CAT(tag, mark_tag, p - T + 1);
278
+ ELE(N);
279
+ }
280
+
281
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
282
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
283
+ if(children.isNil()) {
284
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
285
+ }
286
+ ((RubyArray)children).append(ele);
287
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
288
+ }
289
+
290
+ private static class TokenInfo {
291
+ public IRubyObject sym;
292
+ public IRubyObject tag;
293
+ public IRubyObject attr;
294
+ public int raw;
295
+ public int rawlen;
296
+ public IRubyObject ec;
297
+ public IRubyObject ele;
298
+ public Extra x;
299
+ public Ruby runtime;
300
+ public Scanner scanner;
301
+ public State S;
302
+
303
+ public void H_ELE(RubyClass klass) {
304
+ ele = klass.allocate();
305
+ if(klass == x.cElem) {
306
+ H_ELE_SET(ele, H_ELE_TAG, tag);
307
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
308
+ H_ELE_SET(ele, H_ELE_EC, ec);
309
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
310
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
311
+ }
312
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
313
+ if(klass == x.cBogusETag) {
314
+ H_ELE_SET(ele, H_ELE_TAG, tag);
315
+ if(raw != -1) {
316
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
317
+ }
318
+ } else {
319
+ if(klass == x.cDocType) {
320
+ scanner.ATTR(runtime.newSymbol("target"), tag);
321
+ }
322
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
323
+ if(klass != x.cProcIns) {
324
+ tag = runtime.getNil();
325
+ if(raw != -1) {
326
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
327
+ }
328
+ }
329
+ H_ELE_SET(ele, H_ELE_TAG, tag);
330
+ }
331
+ } else {
332
+ H_ELE_SET(ele, H_ELE_TAG, tag);
333
+ }
334
+ S.last = ele;
335
+ }
336
+
337
+ public void hpricotToken(boolean taint) {
338
+ //
339
+ // in html mode, fix up start tags incorrectly formed as empty tags
340
+ //
341
+ if(!S.xml) {
342
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
343
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
344
+ if(ec.isNil()) {
345
+ tag = tag.callMethod(scanner.ctx, "downcase");
346
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
347
+ }
348
+ }
349
+
350
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
351
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
352
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
353
+ sym = x.sym_text;
354
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
355
+ }
356
+
357
+ if(!ec.isNil()) {
358
+ if(sym == x.sym_emptytag) {
359
+ if(ec != x.sym_EMPTY) {
360
+ sym = x.sym_stag;
361
+ }
362
+ } else if(sym == x.sym_stag) {
363
+ if(ec == x.sym_EMPTY) {
364
+ sym = x.sym_emptytag;
365
+ }
366
+ }
367
+ }
368
+ }
369
+
370
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
371
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
372
+ H_ELE(x.cElem);
373
+ H_ELE_SET(ele, H_ELE_HASH, name);
374
+
375
+ if(!S.xml) {
376
+ IRubyObject match = runtime.getNil(), e = S.focus;
377
+ while(e != S.doc) {
378
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
379
+ if(hEC instanceof RubyHash) {
380
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
381
+ if(!has.isNil()) {
382
+ if(has == runtime.getTrue()) {
383
+ if(match.isNil()) {
384
+ match = e;
385
+ }
386
+ } else if(has == x.symAllow) {
387
+ match = S.focus;
388
+ } else if(has == x.symDeny) {
389
+ match = runtime.getNil();
390
+ }
391
+ }
392
+ }
393
+ e = H_ELE_GET(e, H_ELE_PARENT);
394
+ }
395
+
396
+ if(match.isNil()) {
397
+ match = S.focus;
398
+ }
399
+ S.focus = match;
400
+ }
401
+
402
+ scanner.hpricotAdd(S.focus, ele);
403
+
404
+ //
405
+ // in the case of a start tag that should be empty, just
406
+ // skip the step that focuses the element. focusing moves
407
+ // us deeper into the document.
408
+ //
409
+ if(sym == x.sym_stag) {
410
+ if(S.xml || ec != x.sym_EMPTY) {
411
+ S.focus = ele;
412
+ S.last = runtime.getNil();
413
+ }
414
+ }
415
+ } else if(sym == x.sym_etag) {
416
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
417
+ if(S.strict) {
418
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
419
+ tag = runtime.newString("div");
420
+ }
421
+ }
422
+
423
+ name = runtime.newFixnum(tag.hashCode());
424
+ while(e != S.doc) {
425
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
426
+ match = e;
427
+ break;
428
+ }
429
+ e = H_ELE_GET(e, H_ELE_PARENT);
430
+
431
+ }
432
+ if(match.isNil()) {
433
+ H_ELE(x.cBogusETag);
434
+ scanner.hpricotAdd(S.focus, ele);
435
+ } else {
436
+ ele = runtime.getNil();
437
+ if(raw != -1) {
438
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
439
+ }
440
+ H_ELE_SET(match, H_ELE_ETAG, ele);
441
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
442
+ S.last = runtime.getNil();
443
+
444
+ }
445
+ } else if(sym == x.sym_cdata) {
446
+ H_ELE(x.cCData);
447
+ scanner.hpricotAdd(S.focus, ele);
448
+ } else if(sym == x.sym_comment) {
449
+ H_ELE(x.cComment);
450
+ scanner.hpricotAdd(S.focus, ele);
451
+ } else if(sym == x.sym_doctype) {
452
+ H_ELE(x.cDocType);
453
+ if(S.strict) {
454
+ RubyHash h = (RubyHash)attr;
455
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
456
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
457
+ }
458
+ scanner.hpricotAdd(S.focus, ele);
459
+ } else if(sym == x.sym_procins) {
460
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
461
+ tag = RubyRegexp.nth_match(1, match);
462
+ attr = RubyRegexp.nth_match(2, match);
463
+ H_ELE(x.cProcIns);
464
+ scanner.hpricotAdd(S.focus, ele);
465
+ } else if(sym == x.sym_text) {
466
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
467
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
468
+ } else {
469
+ H_ELE(x.cText);
470
+ scanner.hpricotAdd(S.focus, ele);
471
+ }
472
+ } else if(sym == x.sym_xmldecl) {
473
+ H_ELE(x.cXMLDecl);
474
+ scanner.hpricotAdd(S.focus, ele);
475
+ }
476
+ }
477
+ }
478
+
479
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
480
+ TokenInfo t = new TokenInfo();
481
+ t.sym = _sym;
482
+ t.tag = _tag;
483
+ t.attr = _attr;
484
+ t.raw = _raw;
485
+ t.rawlen = _rawlen;
486
+ t.ec = runtime.getNil();
487
+ t.ele = runtime.getNil();
488
+ t.x = x;
489
+ t.runtime = runtime;
490
+ t.scanner = this;
491
+ t.S = S;
492
+
493
+ t.hpricotToken(taint);
494
+ }
495
+
496
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
497
+ if(sym == x.sym_text) {
498
+ raw = tag;
499
+ }
500
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
501
+ if(taint) {
502
+ ary.setTaint(true);
503
+ tag.setTaint(true);
504
+ attr.setTaint(true);
505
+ raw.setTaint(true);
506
+ }
507
+
508
+ block.yield(ctx, ary);
509
+ }
510
+
511
+ %%{
512
+ machine hpricot_scan;
513
+
514
+ action newEle {
515
+ if(text) {
516
+ tag = CAT(tag, mark_tag, p);
517
+ ELE(x.sym_text);
518
+ text = false;
519
+ }
520
+ attr = runtime.getNil();
521
+ tag = runtime.getNil();
522
+ mark_tag = -1;
523
+ ele_open = true;
524
+ }
525
+
526
+ action _tag { mark_tag = p; }
527
+ action _aval { mark_aval = p; }
528
+ action _akey { mark_akey = p; }
529
+ action tag { tag = SET(mark_tag, p, tag); }
530
+ action tagc { tag = SET(mark_tag, p-1, tag); }
531
+ action aval { aval = SET(mark_aval, p, aval); }
532
+ action aunq {
533
+ if(data[p-1] == '"' || data[p-1] == '\'') {
534
+ aval = SET(mark_aval, p-1, aval);
535
+ } else {
536
+ aval = SET(mark_aval, p, aval);
537
+ }
538
+ }
539
+ action akey { akey = SET(mark_akey, p, akey); }
540
+ action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
541
+ action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
542
+ action xmlsd { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
543
+ action pubid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
544
+ action sysid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
545
+
546
+ action new_attr {
547
+ akey = runtime.getNil();
548
+ aval = runtime.getNil();
549
+ mark_akey = -1;
550
+ mark_aval = -1;
551
+ }
552
+
553
+ action save_attr {
554
+ if(!S.xml) {
555
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
556
+ }
557
+ ATTR(akey, aval);
558
+ }
559
+
560
+ include hpricot_common "hpricot_common.rl";
561
+ }%%
562
+
563
+ %% write data nofinal;
564
+
565
+ public final static int BUFSIZE = 16384;
566
+
567
+
568
+ private int cs, act, have = 0, nread = 0, curline = 1;
569
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
570
+ private byte[] data;
571
+ private State S = null;
572
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
573
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
574
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
575
+ private int buffer_size = 0;
576
+
577
+ private Extra x;
578
+
579
+ private IRubyObject self;
580
+ private Ruby runtime;
581
+ private ThreadContext ctx;
582
+ private Block block;
583
+
584
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
585
+
586
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
587
+ return new RaiseException(runtime, exceptionClass, message, true);
588
+ }
589
+
590
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
591
+ this.self = self;
592
+ this.runtime = self.getRuntime();
593
+ this.ctx = runtime.getCurrentContext();
594
+ this.block = block;
595
+ attr = runtime.getNil();
596
+ tag = runtime.getNil();
597
+ akey = runtime.getNil();
598
+ aval = runtime.getNil();
599
+ bufsize = runtime.getNil();
600
+
601
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
602
+
603
+ this.xmldecl = x.sym_xmldecl;
604
+ this.doctype = x.sym_doctype;
605
+ this.stag = x.sym_stag;
606
+ this.etag = x.sym_etag;
607
+ this.emptytag = x.sym_emptytag;
608
+ this.comment = x.sym_comment;
609
+ this.cdata = x.sym_cdata;
610
+ this.procins = x.sym_procins;
611
+
612
+ port = args[0];
613
+ if(args.length == 2) {
614
+ opts = args[1];
615
+ } else {
616
+ opts = runtime.getNil();
617
+ }
618
+
619
+ taint = port.isTaint();
620
+ io = port.respondsTo("read");
621
+ if(!io) {
622
+ if(port.respondsTo("to_str")) {
623
+ port = port.callMethod(ctx, "to_str");
624
+ port = port.convertToString();
625
+ } else {
626
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
627
+ }
628
+ }
629
+
630
+ if(!(opts instanceof RubyHash)) {
631
+ opts = runtime.getNil();
632
+ }
633
+
634
+ if(!block.isGiven()) {
635
+ S = new State();
636
+ S.doc = x.cDoc.allocate();
637
+ S.focus = S.doc;
638
+ S.last = runtime.getNil();
639
+ S.xml = OPT(opts, "xml");
640
+ S.strict = OPT(opts, "xhtml_strict");
641
+ S.fixup = OPT(opts, "fixup_tags");
642
+ if(S.strict) {
643
+ S.fixup = true;
644
+ }
645
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
646
+ S.EC = x.mHpricot.getConstant("ElementContent");
647
+ }
648
+
649
+ buffer_size = BUFSIZE;
650
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
651
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
652
+ if(!bufsize.isNil()) {
653
+ buffer_size = RubyNumeric.fix2int(bufsize);
654
+ }
655
+ }
656
+
657
+ if(io) {
658
+ buf = 0;
659
+ data = new byte[buffer_size];
660
+ }
661
+ }
662
+
663
+ private int len, space;
664
+ // hpricot_scan
665
+ public IRubyObject scan() {
666
+ %% write init;
667
+ while(!done) {
668
+ p = pe = len = buf;
669
+ space = buffer_size - have;
670
+
671
+ if(io) {
672
+ if(space == 0) {
673
+ /* We've used up the entire buffer storing an already-parsed token
674
+ * prefix that must be preserved. Likely caused by super-long attributes.
675
+ * Increase buffer size and continue */
676
+ buffer_size += BUFSIZE;
677
+ data = realloc(data, buffer_size);
678
+ space = buffer_size - have;
679
+ }
680
+
681
+ p = have;
682
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
683
+ ByteList bl = str.convertToString().getByteList();
684
+ len = bl.realSize;
685
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
686
+ } else {
687
+ ByteList bl = port.convertToString().getByteList();
688
+ data = bl.bytes;
689
+ buf = bl.begin;
690
+ p = bl.begin;
691
+ len = bl.realSize + 1;
692
+ if(p + len >= data.length) {
693
+ data = new byte[len];
694
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
695
+ p = 0;
696
+ buf = 0;
697
+ }
698
+ done = true;
699
+ eof = p + len;
700
+ }
701
+
702
+ nread += len;
703
+
704
+ /* If this is the last buffer, tack on an EOF. */
705
+ if(io && len < space) {
706
+ data[p + len++] = 0;
707
+ eof = p + len;
708
+ done = true;
709
+ }
710
+
711
+ pe = p + len;
712
+
713
+ %% write exec;
714
+
715
+ if(cs == hpricot_scan_error) {
716
+ if(!tag.isNil()) {
717
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
718
+ } else {
719
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
720
+ }
721
+ }
722
+
723
+ if(done && ele_open) {
724
+ ele_open = false;
725
+ if(ts > 0) {
726
+ mark_tag = ts;
727
+ ts = 0;
728
+ text = true;
729
+ }
730
+ }
731
+
732
+ if(ts == -1) {
733
+ have = 0;
734
+ if(mark_tag != -1 && text) {
735
+ if(done) {
736
+ if(mark_tag < p - 1) {
737
+ tag = CAT(tag, mark_tag, p-1);
738
+ ELE(x.sym_text);
739
+ }
740
+ } else {
741
+ tag = CAT(tag, mark_tag, p);
742
+ }
743
+ }
744
+ if(io) {
745
+ mark_tag = 0;
746
+ } else {
747
+ mark_tag = ((RubyString)port).getByteList().begin;
748
+ }
749
+ } else if(io) {
750
+ have = pe - ts;
751
+ System.arraycopy(data, ts, data, buf, have);
752
+ mark_tag = SLIDE(mark_tag);
753
+ mark_akey = SLIDE(mark_akey);
754
+ mark_aval = SLIDE(mark_aval);
755
+ te -= ts;
756
+ ts = 0;
757
+ }
758
+ }
759
+
760
+ if(S != null) {
761
+ return S.doc;
762
+ }
763
+
764
+ return runtime.getNil();
765
+ }
766
+ }
767
+
768
+ public static class HpricotModule {
769
+ // hpricot_scan
770
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
771
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
772
+ return new Scanner(self, args, block).scan();
773
+ }
774
+
775
+ // hpricot_css
776
+ @JRubyMethod(module = true)
777
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
778
+ return new HpricotCss(self, mod, str, node).scan();
779
+ }
780
+ }
781
+
782
+ public static class CData {
783
+ @JRubyMethod
784
+ public static IRubyObject content(IRubyObject self) {
785
+ return hpricot_ele_get_name(self);
786
+ }
787
+
788
+ @JRubyMethod(name = "content=")
789
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
790
+ return hpricot_ele_set_name(self, value);
791
+ }
792
+ }
793
+
794
+ public static class Comment {
795
+ @JRubyMethod
796
+ public static IRubyObject content(IRubyObject self) {
797
+ return hpricot_ele_get_name(self);
798
+ }
799
+
800
+ @JRubyMethod(name = "content=")
801
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
802
+ return hpricot_ele_set_name(self, value);
803
+ }
804
+ }
805
+
806
+ public static class DocType {
807
+ @JRubyMethod
808
+ public static IRubyObject raw_string(IRubyObject self) {
809
+ return hpricot_ele_get_name(self);
810
+ }
811
+
812
+ @JRubyMethod
813
+ public static IRubyObject clear_raw(IRubyObject self) {
814
+ return hpricot_ele_clear_name(self);
815
+ }
816
+
817
+ @JRubyMethod
818
+ public static IRubyObject target(IRubyObject self) {
819
+ return hpricot_ele_get_target(self);
820
+ }
821
+
822
+ @JRubyMethod(name = "target=")
823
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
824
+ return hpricot_ele_set_target(self, value);
825
+ }
826
+
827
+ @JRubyMethod
828
+ public static IRubyObject public_id(IRubyObject self) {
829
+ return hpricot_ele_get_public_id(self);
830
+ }
831
+
832
+ @JRubyMethod(name = "public_id=")
833
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
834
+ return hpricot_ele_set_public_id(self, value);
835
+ }
836
+
837
+ @JRubyMethod
838
+ public static IRubyObject system_id(IRubyObject self) {
839
+ return hpricot_ele_get_system_id(self);
840
+ }
841
+
842
+ @JRubyMethod(name = "system_id=")
843
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
844
+ return hpricot_ele_set_system_id(self, value);
845
+ }
846
+ }
847
+
848
+ public static class Elem {
849
+ @JRubyMethod
850
+ public static IRubyObject clear_raw(IRubyObject self) {
851
+ return hpricot_ele_clear_raw(self);
852
+ }
853
+ }
854
+
855
+ public static class BogusETag {
856
+ @JRubyMethod
857
+ public static IRubyObject raw_string(IRubyObject self) {
858
+ return hpricot_ele_get_attr(self);
859
+ }
860
+
861
+ @JRubyMethod
862
+ public static IRubyObject clear_raw(IRubyObject self) {
863
+ return hpricot_ele_clear_attr(self);
864
+ }
865
+ }
866
+
867
+ public static class Text {
868
+ @JRubyMethod
869
+ public static IRubyObject raw_string(IRubyObject self) {
870
+ return hpricot_ele_get_name(self);
871
+ }
872
+
873
+ @JRubyMethod
874
+ public static IRubyObject clear_raw(IRubyObject self) {
875
+ return hpricot_ele_clear_name(self);
876
+ }
877
+
878
+ @JRubyMethod
879
+ public static IRubyObject content(IRubyObject self) {
880
+ return hpricot_ele_get_name(self);
881
+ }
882
+
883
+ @JRubyMethod(name = "content=")
884
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
885
+ return hpricot_ele_set_name(self, value);
886
+ }
887
+ }
888
+
889
+ public static class XMLDecl {
890
+ @JRubyMethod
891
+ public static IRubyObject raw_string(IRubyObject self) {
892
+ return hpricot_ele_get_name(self);
893
+ }
894
+
895
+ @JRubyMethod
896
+ public static IRubyObject clear_raw(IRubyObject self) {
897
+ return hpricot_ele_clear_name(self);
898
+ }
899
+
900
+ @JRubyMethod
901
+ public static IRubyObject encoding(IRubyObject self) {
902
+ return hpricot_ele_get_encoding(self);
903
+ }
904
+
905
+ @JRubyMethod(name = "encoding=")
906
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
907
+ return hpricot_ele_set_encoding(self, value);
908
+ }
909
+
910
+ @JRubyMethod
911
+ public static IRubyObject standalone(IRubyObject self) {
912
+ return hpricot_ele_get_standalone(self);
913
+ }
914
+
915
+ @JRubyMethod(name = "standalone=")
916
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
917
+ return hpricot_ele_set_standalone(self, value);
918
+ }
919
+
920
+ @JRubyMethod
921
+ public static IRubyObject version(IRubyObject self) {
922
+ return hpricot_ele_get_version(self);
923
+ }
924
+
925
+ @JRubyMethod(name = "version=")
926
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
927
+ return hpricot_ele_set_version(self, value);
928
+ }
929
+ }
930
+
931
+ public static class ProcIns {
932
+ @JRubyMethod
933
+ public static IRubyObject target(IRubyObject self) {
934
+ return hpricot_ele_get_name(self);
935
+ }
936
+
937
+ @JRubyMethod(name = "target=")
938
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
939
+ return hpricot_ele_set_name(self, value);
940
+ }
941
+
942
+ @JRubyMethod
943
+ public static IRubyObject content(IRubyObject self) {
944
+ return hpricot_ele_get_attr(self);
945
+ }
946
+
947
+ @JRubyMethod(name = "content=")
948
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
949
+ return hpricot_ele_set_attr(self, value);
950
+ }
951
+ }
952
+
953
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!";
954
+
955
+ public final static int H_ELE_TAG = 0;
956
+ public final static int H_ELE_PARENT = 1;
957
+ public final static int H_ELE_ATTR = 2;
958
+ public final static int H_ELE_ETAG = 3;
959
+ public final static int H_ELE_RAW = 4;
960
+ public final static int H_ELE_EC = 5;
961
+ public final static int H_ELE_HASH = 6;
962
+ public final static int H_ELE_CHILDREN = 7;
963
+
964
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
965
+ return ((IRubyObject[])recv.dataGetStruct())[n];
966
+ }
967
+
968
+ public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
969
+ IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
970
+ if(obj.isNil()) {
971
+ obj = RubyHash.newHash(recv.getRuntime());
972
+ ((IRubyObject[])recv.dataGetStruct())[n] = obj;
973
+ }
974
+ return (RubyHash)obj;
975
+ }
976
+
977
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
978
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
979
+ return value;
980
+ }
981
+
982
+ private static class RefCallback implements Callback {
983
+ private final int n;
984
+ public RefCallback(int n) { this.n = n; }
985
+
986
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
987
+ return H_ELE_GET(recv, n);
988
+ }
989
+
990
+ public Arity getArity() {
991
+ return Arity.NO_ARGUMENTS;
992
+ }
993
+ }
994
+
995
+ private static class SetCallback implements Callback {
996
+ private final int n;
997
+ public SetCallback(int n) { this.n = n; }
998
+
999
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1000
+ return H_ELE_SET(recv, n, args[0]);
1001
+ }
1002
+
1003
+ public Arity getArity() {
1004
+ return Arity.ONE_ARGUMENT;
1005
+ }
1006
+ }
1007
+
1008
+ private final static Callback[] ref_func = new Callback[]{
1009
+ new RefCallback(0),
1010
+ new RefCallback(1),
1011
+ new RefCallback(2),
1012
+ new RefCallback(3),
1013
+ new RefCallback(4),
1014
+ new RefCallback(5),
1015
+ new RefCallback(6),
1016
+ new RefCallback(7),
1017
+ new RefCallback(8),
1018
+ new RefCallback(9)};
1019
+
1020
+ private final static Callback[] set_func = new Callback[]{
1021
+ new SetCallback(0),
1022
+ new SetCallback(1),
1023
+ new SetCallback(2),
1024
+ new SetCallback(3),
1025
+ new SetCallback(4),
1026
+ new SetCallback(5),
1027
+ new SetCallback(6),
1028
+ new SetCallback(7),
1029
+ new SetCallback(8),
1030
+ new SetCallback(9)};
1031
+
1032
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
1033
+ // alloc_hpricot_struct
1034
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
1035
+ RubyClass kurrent = klass;
1036
+ Object sz = kurrent.fastGetInternalVariable("__size__");
1037
+ while(sz == null && kurrent != null) {
1038
+ kurrent = kurrent.getSuperClass();
1039
+ sz = kurrent.fastGetInternalVariable("__size__");
1040
+ }
1041
+ int size = RubyNumeric.fix2int((RubyObject)sz);
1042
+ RubyObject obj = new RubyObject(runtime, klass);
1043
+ IRubyObject[] all = new IRubyObject[size];
1044
+ java.util.Arrays.fill(all, runtime.getNil());
1045
+ obj.dataWrapStruct(all);
1046
+ return obj;
1047
+ }
1048
+ };
1049
+
1050
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
1051
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
1052
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
1053
+ klass.setAllocator(alloc_hpricot_struct);
1054
+
1055
+ for(int i = 0; i < members.length; i++) {
1056
+ String id = members[i].toString();
1057
+ klass.defineMethod(id, ref_func[i]);
1058
+ klass.defineMethod(id + "=", set_func[i]);
1059
+ }
1060
+
1061
+ return klass;
1062
+ }
1063
+
1064
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1065
+ Init_hpricot_scan(runtime);
1066
+ return true;
1067
+ }
1068
+
1069
+ public static class Extra {
1070
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
1071
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
1072
+ sym_allowed, sym_children, sym_comment,
1073
+ sym_cdata, sym_name, sym_parent,
1074
+ sym_raw_attributes, sym_raw_string, sym_tagno,
1075
+ sym_text, sym_EMPTY, sym_CDATA;
1076
+
1077
+ public RubyModule mHpricot;
1078
+ public RubyClass structElem;
1079
+ public RubyClass structAttr;
1080
+ public RubyClass structBasic;
1081
+ public RubyClass cDoc;
1082
+ public RubyClass cCData;
1083
+ public RubyClass cComment;
1084
+ public RubyClass cDocType;
1085
+ public RubyClass cElem;
1086
+ public RubyClass cBogusETag;
1087
+ public RubyClass cText;
1088
+ public RubyClass cXMLDecl;
1089
+ public RubyClass cProcIns;
1090
+ public RubyClass rb_eHpricotParseError;
1091
+ public IRubyObject reProcInsParse;
1092
+
1093
+ public Extra(Ruby runtime) {
1094
+ symAllow = runtime.newSymbol("allow");
1095
+ symDeny = runtime.newSymbol("deny");
1096
+ sym_xmldecl = runtime.newSymbol("xmldecl");
1097
+ sym_doctype = runtime.newSymbol("doctype");
1098
+ sym_procins = runtime.newSymbol("procins");
1099
+ sym_stag = runtime.newSymbol("stag");
1100
+ sym_etag = runtime.newSymbol("etag");
1101
+ sym_emptytag = runtime.newSymbol("emptytag");
1102
+ sym_allowed = runtime.newSymbol("allowed");
1103
+ sym_children = runtime.newSymbol("children");
1104
+ sym_comment = runtime.newSymbol("comment");
1105
+ sym_cdata = runtime.newSymbol("cdata");
1106
+ sym_name = runtime.newSymbol("name");
1107
+ sym_parent = runtime.newSymbol("parent");
1108
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
1109
+ sym_raw_string = runtime.newSymbol("raw_string");
1110
+ sym_tagno = runtime.newSymbol("tagno");
1111
+ sym_text = runtime.newSymbol("text");
1112
+ sym_EMPTY = runtime.newSymbol("EMPTY");
1113
+ sym_CDATA = runtime.newSymbol("CDATA");
1114
+ }
1115
+ }
1116
+
1117
+ public static void Init_hpricot_scan(Ruby runtime) {
1118
+ Extra x = new Extra(runtime);
1119
+
1120
+ x.mHpricot = runtime.defineModule("Hpricot");
1121
+ x.mHpricot.dataWrapStruct(x);
1122
+
1123
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1124
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
1125
+
1126
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1127
+
1128
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
1129
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
1130
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
1131
+
1132
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
1133
+
1134
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
1135
+ x.cCData.defineAnnotatedMethods(CData.class);
1136
+
1137
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
1138
+ x.cComment.defineAnnotatedMethods(Comment.class);
1139
+
1140
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
1141
+ x.cDocType.defineAnnotatedMethods(DocType.class);
1142
+
1143
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
1144
+ x.cElem.defineAnnotatedMethods(Elem.class);
1145
+
1146
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
1147
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
1148
+
1149
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
1150
+ x.cText.defineAnnotatedMethods(Text.class);
1151
+
1152
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
1153
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
1154
+
1155
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
1156
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
1157
+
1158
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
1159
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
1160
+ }
1161
+ }