webtranslateit-hpricot 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
@@ -0,0 +1,1173 @@
1
+
2
+ import java.io.IOException;
3
+
4
+ import org.jruby.Ruby;
5
+ import org.jruby.RubyArray;
6
+ import org.jruby.RubyClass;
7
+ import org.jruby.RubyHash;
8
+ import org.jruby.RubyModule;
9
+ import org.jruby.RubyNumeric;
10
+ import org.jruby.RubyObject;
11
+ import org.jruby.RubyObjectAdapter;
12
+ import org.jruby.RubyRegexp;
13
+ import org.jruby.RubyString;
14
+ import org.jruby.anno.JRubyMethod;
15
+ import org.jruby.exceptions.RaiseException;
16
+ import org.jruby.javasupport.JavaEmbedUtils;
17
+ import org.jruby.runtime.Arity;
18
+ import org.jruby.runtime.Block;
19
+ import org.jruby.runtime.ObjectAllocator;
20
+ import org.jruby.runtime.ThreadContext;
21
+ import org.jruby.runtime.builtin.IRubyObject;
22
+ import org.jruby.runtime.callback.Callback;
23
+ import org.jruby.exceptions.RaiseException;
24
+ import org.jruby.runtime.load.BasicLibraryService;
25
+ import org.jruby.util.ByteList;
26
+
27
+ public class HpricotScanService implements BasicLibraryService {
28
+ public static byte[] realloc(byte[] input, int size) {
29
+ byte[] newArray = new byte[size];
30
+ System.arraycopy(input, 0, newArray, 0, input.length);
31
+ return newArray;
32
+ }
33
+
34
+ // hpricot_state
35
+ public static class State {
36
+ public IRubyObject doc;
37
+ public IRubyObject focus;
38
+ public IRubyObject last;
39
+ public IRubyObject EC;
40
+ public boolean xml, strict, fixup;
41
+ }
42
+
43
+ static boolean OPT(IRubyObject opts, String key) {
44
+ Ruby runtime = opts.getRuntime();
45
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
46
+ }
47
+
48
+ // H_PROP(name, H_ELE_TAG)
49
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
50
+ H_ELE_SET(self, H_ELE_TAG, x);
51
+ return self;
52
+ }
53
+
54
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
55
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
56
+ return self.getRuntime().getTrue();
57
+ }
58
+
59
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
60
+ return H_ELE_GET(self, H_ELE_TAG);
61
+ }
62
+
63
+ // H_PROP(raw, H_ELE_RAW)
64
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
65
+ H_ELE_SET(self, H_ELE_RAW, x);
66
+ return self;
67
+ }
68
+
69
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
70
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
71
+ return self.getRuntime().getTrue();
72
+ }
73
+
74
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
75
+ return H_ELE_GET(self, H_ELE_RAW);
76
+ }
77
+
78
+ // H_PROP(parent, H_ELE_PARENT)
79
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
80
+ H_ELE_SET(self, H_ELE_PARENT, x);
81
+ return self;
82
+ }
83
+
84
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
85
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
86
+ return self.getRuntime().getTrue();
87
+ }
88
+
89
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
90
+ return H_ELE_GET(self, H_ELE_PARENT);
91
+ }
92
+
93
+ // H_PROP(attr, H_ELE_ATTR)
94
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
95
+ H_ELE_SET(self, H_ELE_ATTR, x);
96
+ return self;
97
+ }
98
+
99
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
100
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
101
+ return self.getRuntime().getTrue();
102
+ }
103
+
104
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
105
+ return H_ELE_GET(self, H_ELE_ATTR);
106
+ }
107
+
108
+ // H_PROP(etag, H_ELE_ETAG)
109
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
110
+ H_ELE_SET(self, H_ELE_ETAG, x);
111
+ return self;
112
+ }
113
+
114
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
115
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
116
+ return self.getRuntime().getTrue();
117
+ }
118
+
119
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
120
+ return H_ELE_GET(self, H_ELE_ETAG);
121
+ }
122
+
123
+ // H_PROP(children, H_ELE_CHILDREN)
124
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
125
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
126
+ return self;
127
+ }
128
+
129
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
130
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
131
+ return self.getRuntime().getTrue();
132
+ }
133
+
134
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
135
+ return H_ELE_GET(self, H_ELE_CHILDREN);
136
+ }
137
+
138
+ // H_ATTR(target)
139
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
140
+ H_ELE_GET_asHash(self, H_ELE_ATTR).fastASet(self.getRuntime().newSymbol("target"), x);
141
+ return self;
142
+ }
143
+
144
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
145
+ return H_ELE_GET_asHash(self, H_ELE_ATTR).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
146
+ }
147
+
148
+ // H_ATTR(encoding)
149
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
150
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
151
+ return self;
152
+ }
153
+
154
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
155
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
156
+ }
157
+
158
+ // H_ATTR(version)
159
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
160
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
161
+ return self;
162
+ }
163
+
164
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
165
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
166
+ }
167
+
168
+ // H_ATTR(standalone)
169
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
170
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
171
+ return self;
172
+ }
173
+
174
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
175
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
176
+ }
177
+
178
+ // H_ATTR(system_id)
179
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
180
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
181
+ return self;
182
+ }
183
+
184
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
185
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
186
+ }
187
+
188
+ // H_ATTR(public_id)
189
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
190
+ ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
191
+ return self;
192
+ }
193
+
194
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
195
+ return ((RubyHash)H_ELE_GET_asHash(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
196
+ }
197
+
198
+ public static class Scanner {
199
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
200
+ if(mark == -1 || E == mark) {
201
+ return runtime.newString("");
202
+ } else if(E > mark) {
203
+ return RubyString.newString(runtime, data, mark, E-mark);
204
+ } else {
205
+ return org;
206
+ }
207
+ }
208
+
209
+ public int SLIDE(int N) {
210
+ if(N > ts) {
211
+ return N - ts;
212
+ } else {
213
+ return N;
214
+ }
215
+ }
216
+
217
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
218
+ if(N.isNil()) {
219
+ return SET(mark, E, N);
220
+ } else {
221
+ ((RubyString)N).cat(data, mark, E-mark);
222
+ return N;
223
+ }
224
+ }
225
+
226
+ public void ATTR(IRubyObject K, IRubyObject V) {
227
+ if(!K.isNil()) {
228
+ if(attr.isNil()) {
229
+ attr = RubyHash.newHash(runtime);
230
+ }
231
+ ((RubyHash)attr).fastASet(K, V);
232
+ }
233
+ }
234
+
235
+ public void TEXT_PASS() {
236
+ if(!text) {
237
+ if(ele_open) {
238
+ ele_open = false;
239
+ if(ts != -1) {
240
+ mark_tag = ts;
241
+ }
242
+ } else {
243
+ mark_tag = p;
244
+ }
245
+ attr = runtime.getNil();
246
+ tag = runtime.getNil();
247
+ text = true;
248
+ }
249
+ }
250
+
251
+ public void ELE(IRubyObject N) {
252
+ if(te > ts || text) {
253
+ int raw = -1;
254
+ int rawlen = 0;
255
+ ele_open = false;
256
+ text = false;
257
+
258
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
259
+ raw = ts;
260
+ rawlen = te - ts;
261
+ }
262
+
263
+ if(block.isGiven()) {
264
+ IRubyObject raw_string = runtime.getNil();
265
+ if(raw != -1) {
266
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
267
+ }
268
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
269
+ } else {
270
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
271
+ }
272
+ }
273
+ }
274
+
275
+
276
+ public void EBLK(IRubyObject N, int T) {
277
+ tag = CAT(tag, mark_tag, p - T + 1);
278
+ ELE(N);
279
+ }
280
+
281
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
282
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
283
+ if(children.isNil()) {
284
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
285
+ }
286
+ ((RubyArray)children).append(ele);
287
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
288
+ }
289
+
290
+ private static class TokenInfo {
291
+ public IRubyObject sym;
292
+ public IRubyObject tag;
293
+ public IRubyObject attr;
294
+ public int raw;
295
+ public int rawlen;
296
+ public IRubyObject ec;
297
+ public IRubyObject ele;
298
+ public Extra x;
299
+ public Ruby runtime;
300
+ public Scanner scanner;
301
+ public State S;
302
+
303
+ public void H_ELE(RubyClass klass) {
304
+ ele = klass.allocate();
305
+ if(klass == x.cElem) {
306
+ H_ELE_SET(ele, H_ELE_TAG, tag);
307
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
308
+ H_ELE_SET(ele, H_ELE_EC, ec);
309
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
310
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
311
+ }
312
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
313
+ if(klass == x.cBogusETag) {
314
+ H_ELE_SET(ele, H_ELE_TAG, tag);
315
+ if(raw != -1) {
316
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
317
+ }
318
+ } else {
319
+ if(klass == x.cDocType) {
320
+ scanner.ATTR(runtime.newSymbol("target"), tag);
321
+ }
322
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
323
+ if(klass != x.cProcIns) {
324
+ tag = runtime.getNil();
325
+ if(raw != -1) {
326
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
327
+ }
328
+ }
329
+ H_ELE_SET(ele, H_ELE_TAG, tag);
330
+ }
331
+ } else {
332
+ H_ELE_SET(ele, H_ELE_TAG, tag);
333
+ }
334
+ S.last = ele;
335
+ }
336
+
337
+ public void hpricotToken(boolean taint) {
338
+ //
339
+ // in html mode, fix up start tags incorrectly formed as empty tags
340
+ //
341
+ if(!S.xml) {
342
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
343
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
344
+ if(ec.isNil()) {
345
+ tag = tag.callMethod(scanner.ctx, "downcase");
346
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
347
+ }
348
+ }
349
+
350
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
351
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
352
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
353
+ sym = x.sym_text;
354
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
355
+ }
356
+
357
+ if(!ec.isNil()) {
358
+ if(sym == x.sym_emptytag) {
359
+ if(ec != x.sym_EMPTY) {
360
+ sym = x.sym_stag;
361
+ }
362
+ } else if(sym == x.sym_stag) {
363
+ if(ec == x.sym_EMPTY) {
364
+ sym = x.sym_emptytag;
365
+ }
366
+ }
367
+ }
368
+ }
369
+
370
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
371
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
372
+ H_ELE(x.cElem);
373
+ H_ELE_SET(ele, H_ELE_HASH, name);
374
+
375
+ if(!S.xml) {
376
+ IRubyObject match = runtime.getNil(), e = S.focus;
377
+ while(e != S.doc) {
378
+ if (ec.isNil()) {
379
+ // Anything can contain an unknown element
380
+ if(match.isNil()) {
381
+ match = e;
382
+ }
383
+ } else {
384
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
385
+ if(hEC instanceof RubyHash) {
386
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
387
+ if(!has.isNil()) {
388
+ if(has == runtime.getTrue()) {
389
+ if(match.isNil()) {
390
+ match = e;
391
+ }
392
+ } else if(has == x.symAllow) {
393
+ match = S.focus;
394
+ } else if(has == x.symDeny) {
395
+ match = runtime.getNil();
396
+ }
397
+ }
398
+ } else {
399
+ // Unknown elements can contain anything
400
+ if(match.isNil()) {
401
+ match = e;
402
+ }
403
+ }
404
+ }
405
+ e = H_ELE_GET(e, H_ELE_PARENT);
406
+ }
407
+
408
+ if(match.isNil()) {
409
+ match = S.focus;
410
+ }
411
+ S.focus = match;
412
+ }
413
+
414
+ scanner.hpricotAdd(S.focus, ele);
415
+
416
+ //
417
+ // in the case of a start tag that should be empty, just
418
+ // skip the step that focuses the element. focusing moves
419
+ // us deeper into the document.
420
+ //
421
+ if(sym == x.sym_stag) {
422
+ if(S.xml || ec != x.sym_EMPTY) {
423
+ S.focus = ele;
424
+ S.last = runtime.getNil();
425
+ }
426
+ }
427
+ } else if(sym == x.sym_etag) {
428
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
429
+ if(S.strict) {
430
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
431
+ tag = runtime.newString("div");
432
+ }
433
+ }
434
+
435
+ name = runtime.newFixnum(tag.hashCode());
436
+ while(e != S.doc) {
437
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
438
+ match = e;
439
+ break;
440
+ }
441
+ e = H_ELE_GET(e, H_ELE_PARENT);
442
+
443
+ }
444
+ if(match.isNil()) {
445
+ H_ELE(x.cBogusETag);
446
+ scanner.hpricotAdd(S.focus, ele);
447
+ } else {
448
+ ele = runtime.getNil();
449
+ if(raw != -1) {
450
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
451
+ }
452
+ H_ELE_SET(match, H_ELE_ETAG, ele);
453
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
454
+ S.last = runtime.getNil();
455
+
456
+ }
457
+ } else if(sym == x.sym_cdata) {
458
+ H_ELE(x.cCData);
459
+ scanner.hpricotAdd(S.focus, ele);
460
+ } else if(sym == x.sym_comment) {
461
+ H_ELE(x.cComment);
462
+ scanner.hpricotAdd(S.focus, ele);
463
+ } else if(sym == x.sym_doctype) {
464
+ H_ELE(x.cDocType);
465
+ if(S.strict) {
466
+ RubyHash h = (RubyHash)attr;
467
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
468
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
469
+ }
470
+ scanner.hpricotAdd(S.focus, ele);
471
+ } else if(sym == x.sym_procins) {
472
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
473
+ tag = RubyRegexp.nth_match(1, match);
474
+ attr = RubyRegexp.nth_match(2, match);
475
+ H_ELE(x.cProcIns);
476
+ scanner.hpricotAdd(S.focus, ele);
477
+ } else if(sym == x.sym_text) {
478
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
479
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
480
+ } else {
481
+ H_ELE(x.cText);
482
+ scanner.hpricotAdd(S.focus, ele);
483
+ }
484
+ } else if(sym == x.sym_xmldecl) {
485
+ H_ELE(x.cXMLDecl);
486
+ scanner.hpricotAdd(S.focus, ele);
487
+ }
488
+ }
489
+ }
490
+
491
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
492
+ TokenInfo t = new TokenInfo();
493
+ t.sym = _sym;
494
+ t.tag = _tag;
495
+ t.attr = _attr;
496
+ t.raw = _raw;
497
+ t.rawlen = _rawlen;
498
+ t.ec = runtime.getNil();
499
+ t.ele = runtime.getNil();
500
+ t.x = x;
501
+ t.runtime = runtime;
502
+ t.scanner = this;
503
+ t.S = S;
504
+
505
+ t.hpricotToken(taint);
506
+ }
507
+
508
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
509
+ if(sym == x.sym_text) {
510
+ raw = tag;
511
+ }
512
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
513
+ if(taint) {
514
+ ary.setTaint(true);
515
+ tag.setTaint(true);
516
+ attr.setTaint(true);
517
+ raw.setTaint(true);
518
+ }
519
+
520
+ block.yield(ctx, ary);
521
+ }
522
+
523
+ %%{
524
+ machine hpricot_scan;
525
+
526
+ action newEle {
527
+ if(text) {
528
+ tag = CAT(tag, mark_tag, p);
529
+ ELE(x.sym_text);
530
+ text = false;
531
+ }
532
+ attr = runtime.getNil();
533
+ tag = runtime.getNil();
534
+ mark_tag = -1;
535
+ ele_open = true;
536
+ }
537
+
538
+ action _tag { mark_tag = p; }
539
+ action _aval { mark_aval = p; }
540
+ action _akey { mark_akey = p; }
541
+ action tag { tag = SET(mark_tag, p, tag); }
542
+ action tagc { tag = SET(mark_tag, p-1, tag); }
543
+ action aval { aval = SET(mark_aval, p, aval); }
544
+ action aunq {
545
+ if(data[p-1] == '"' || data[p-1] == '\'') {
546
+ aval = SET(mark_aval, p-1, aval);
547
+ } else {
548
+ aval = SET(mark_aval, p, aval);
549
+ }
550
+ }
551
+ action akey { akey = SET(mark_akey, p, akey); }
552
+ action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
553
+ action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
554
+ action xmlsd { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
555
+ action pubid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
556
+ action sysid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
557
+
558
+ action new_attr {
559
+ akey = runtime.getNil();
560
+ aval = runtime.getNil();
561
+ mark_akey = -1;
562
+ mark_aval = -1;
563
+ }
564
+
565
+ action save_attr {
566
+ if(!S.xml && !akey.isNil()) {
567
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
568
+ }
569
+ ATTR(akey, aval);
570
+ }
571
+
572
+ include hpricot_common "hpricot_common.rl";
573
+ }%%
574
+
575
+ %% write data nofinal;
576
+
577
+ public final static int BUFSIZE = 16384;
578
+
579
+
580
+ private int cs, act, have = 0, nread = 0, curline = 1;
581
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
582
+ private byte[] data;
583
+ private State S = null;
584
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
585
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
586
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
587
+ private int buffer_size = 0;
588
+
589
+ private Extra x;
590
+
591
+ private IRubyObject self;
592
+ private Ruby runtime;
593
+ private ThreadContext ctx;
594
+ private Block block;
595
+
596
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
597
+
598
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
599
+ return new RaiseException(runtime, exceptionClass, message, true);
600
+ }
601
+
602
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
603
+ this.self = self;
604
+ this.runtime = self.getRuntime();
605
+ this.ctx = runtime.getCurrentContext();
606
+ this.block = block;
607
+ attr = runtime.getNil();
608
+ tag = runtime.getNil();
609
+ akey = runtime.getNil();
610
+ aval = runtime.getNil();
611
+ bufsize = runtime.getNil();
612
+
613
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
614
+
615
+ this.xmldecl = x.sym_xmldecl;
616
+ this.doctype = x.sym_doctype;
617
+ this.stag = x.sym_stag;
618
+ this.etag = x.sym_etag;
619
+ this.emptytag = x.sym_emptytag;
620
+ this.comment = x.sym_comment;
621
+ this.cdata = x.sym_cdata;
622
+ this.procins = x.sym_procins;
623
+
624
+ port = args[0];
625
+ if(args.length == 2) {
626
+ opts = args[1];
627
+ } else {
628
+ opts = runtime.getNil();
629
+ }
630
+
631
+ taint = port.isTaint();
632
+ io = port.respondsTo("read");
633
+ if(!io) {
634
+ if(port.respondsTo("to_str")) {
635
+ port = port.callMethod(ctx, "to_str");
636
+ port = port.convertToString();
637
+ } else {
638
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
639
+ }
640
+ }
641
+
642
+ if(!(opts instanceof RubyHash)) {
643
+ opts = runtime.getNil();
644
+ }
645
+
646
+ if(!block.isGiven()) {
647
+ S = new State();
648
+ S.doc = x.cDoc.allocate();
649
+ S.focus = S.doc;
650
+ S.last = runtime.getNil();
651
+ S.xml = OPT(opts, "xml");
652
+ S.strict = OPT(opts, "xhtml_strict");
653
+ S.fixup = OPT(opts, "fixup_tags");
654
+ if(S.strict) {
655
+ S.fixup = true;
656
+ }
657
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
658
+ S.EC = x.mHpricot.getConstant("ElementContent");
659
+ }
660
+
661
+ buffer_size = BUFSIZE;
662
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
663
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
664
+ if(!bufsize.isNil()) {
665
+ buffer_size = RubyNumeric.fix2int(bufsize);
666
+ }
667
+ }
668
+
669
+ if(io) {
670
+ buf = 0;
671
+ data = new byte[buffer_size];
672
+ }
673
+ }
674
+
675
+ private int len, space;
676
+ // hpricot_scan
677
+ public IRubyObject scan() {
678
+ %% write init;
679
+ while(!done) {
680
+ p = pe = len = buf;
681
+ space = buffer_size - have;
682
+
683
+ if(io) {
684
+ if(space == 0) {
685
+ /* We've used up the entire buffer storing an already-parsed token
686
+ * prefix that must be preserved. Likely caused by super-long attributes.
687
+ * Increase buffer size and continue */
688
+ buffer_size += BUFSIZE;
689
+ data = realloc(data, buffer_size);
690
+ space = buffer_size - have;
691
+ }
692
+
693
+ p = have;
694
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
695
+ ByteList bl = str.convertToString().getByteList();
696
+ len = bl.realSize;
697
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
698
+ } else {
699
+ ByteList bl = port.convertToString().getByteList();
700
+ data = bl.bytes;
701
+ buf = bl.begin;
702
+ p = bl.begin;
703
+ len = bl.realSize + 1;
704
+ if(p + len >= data.length) {
705
+ data = new byte[len];
706
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
707
+ p = 0;
708
+ buf = 0;
709
+ }
710
+ done = true;
711
+ eof = p + len;
712
+ }
713
+
714
+ nread += len;
715
+
716
+ /* If this is the last buffer, tack on an EOF. */
717
+ if(io && len < space) {
718
+ data[p + len++] = 0;
719
+ eof = p + len;
720
+ done = true;
721
+ }
722
+
723
+ pe = p + len;
724
+
725
+ %% write exec;
726
+
727
+ if(cs == hpricot_scan_error) {
728
+ if(!tag.isNil()) {
729
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
730
+ } else {
731
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
732
+ }
733
+ }
734
+
735
+ if(done && ele_open) {
736
+ ele_open = false;
737
+ if(ts > 0) {
738
+ mark_tag = ts;
739
+ ts = 0;
740
+ text = true;
741
+ }
742
+ }
743
+
744
+ if(ts == -1) {
745
+ have = 0;
746
+ if(mark_tag != -1 && text) {
747
+ if(done) {
748
+ if(mark_tag < p - 1) {
749
+ tag = CAT(tag, mark_tag, p-1);
750
+ ELE(x.sym_text);
751
+ }
752
+ } else {
753
+ tag = CAT(tag, mark_tag, p);
754
+ }
755
+ }
756
+ if(io) {
757
+ mark_tag = 0;
758
+ } else {
759
+ mark_tag = ((RubyString)port).getByteList().begin;
760
+ }
761
+ } else if(io) {
762
+ have = pe - ts;
763
+ System.arraycopy(data, ts, data, buf, have);
764
+ mark_tag = SLIDE(mark_tag);
765
+ mark_akey = SLIDE(mark_akey);
766
+ mark_aval = SLIDE(mark_aval);
767
+ te -= ts;
768
+ ts = 0;
769
+ }
770
+ }
771
+
772
+ if(S != null) {
773
+ return S.doc;
774
+ }
775
+
776
+ return runtime.getNil();
777
+ }
778
+ }
779
+
780
+ public static class HpricotModule {
781
+ // hpricot_scan
782
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
783
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
784
+ return new Scanner(self, args, block).scan();
785
+ }
786
+
787
+ // hpricot_css
788
+ @JRubyMethod(module = true)
789
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
790
+ return new HpricotCss(self, mod, str, node).scan();
791
+ }
792
+ }
793
+
794
+ public static class CData {
795
+ @JRubyMethod
796
+ public static IRubyObject content(IRubyObject self) {
797
+ return hpricot_ele_get_name(self);
798
+ }
799
+
800
+ @JRubyMethod(name = "content=")
801
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
802
+ return hpricot_ele_set_name(self, value);
803
+ }
804
+ }
805
+
806
+ public static class Comment {
807
+ @JRubyMethod
808
+ public static IRubyObject content(IRubyObject self) {
809
+ return hpricot_ele_get_name(self);
810
+ }
811
+
812
+ @JRubyMethod(name = "content=")
813
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
814
+ return hpricot_ele_set_name(self, value);
815
+ }
816
+ }
817
+
818
+ public static class DocType {
819
+ @JRubyMethod
820
+ public static IRubyObject raw_string(IRubyObject self) {
821
+ return hpricot_ele_get_name(self);
822
+ }
823
+
824
+ @JRubyMethod
825
+ public static IRubyObject clear_raw(IRubyObject self) {
826
+ return hpricot_ele_clear_name(self);
827
+ }
828
+
829
+ @JRubyMethod
830
+ public static IRubyObject target(IRubyObject self) {
831
+ return hpricot_ele_get_target(self);
832
+ }
833
+
834
+ @JRubyMethod(name = "target=")
835
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
836
+ return hpricot_ele_set_target(self, value);
837
+ }
838
+
839
+ @JRubyMethod
840
+ public static IRubyObject public_id(IRubyObject self) {
841
+ return hpricot_ele_get_public_id(self);
842
+ }
843
+
844
+ @JRubyMethod(name = "public_id=")
845
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
846
+ return hpricot_ele_set_public_id(self, value);
847
+ }
848
+
849
+ @JRubyMethod
850
+ public static IRubyObject system_id(IRubyObject self) {
851
+ return hpricot_ele_get_system_id(self);
852
+ }
853
+
854
+ @JRubyMethod(name = "system_id=")
855
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
856
+ return hpricot_ele_set_system_id(self, value);
857
+ }
858
+ }
859
+
860
+ public static class Elem {
861
+ @JRubyMethod
862
+ public static IRubyObject clear_raw(IRubyObject self) {
863
+ return hpricot_ele_clear_raw(self);
864
+ }
865
+ }
866
+
867
+ public static class BogusETag {
868
+ @JRubyMethod
869
+ public static IRubyObject raw_string(IRubyObject self) {
870
+ return hpricot_ele_get_attr(self);
871
+ }
872
+
873
+ @JRubyMethod
874
+ public static IRubyObject clear_raw(IRubyObject self) {
875
+ return hpricot_ele_clear_attr(self);
876
+ }
877
+ }
878
+
879
+ public static class Text {
880
+ @JRubyMethod
881
+ public static IRubyObject raw_string(IRubyObject self) {
882
+ return hpricot_ele_get_name(self);
883
+ }
884
+
885
+ @JRubyMethod
886
+ public static IRubyObject clear_raw(IRubyObject self) {
887
+ return hpricot_ele_clear_name(self);
888
+ }
889
+
890
+ @JRubyMethod
891
+ public static IRubyObject content(IRubyObject self) {
892
+ return hpricot_ele_get_name(self);
893
+ }
894
+
895
+ @JRubyMethod(name = "content=")
896
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
897
+ return hpricot_ele_set_name(self, value);
898
+ }
899
+ }
900
+
901
+ public static class XMLDecl {
902
+ @JRubyMethod
903
+ public static IRubyObject raw_string(IRubyObject self) {
904
+ return hpricot_ele_get_name(self);
905
+ }
906
+
907
+ @JRubyMethod
908
+ public static IRubyObject clear_raw(IRubyObject self) {
909
+ return hpricot_ele_clear_name(self);
910
+ }
911
+
912
+ @JRubyMethod
913
+ public static IRubyObject encoding(IRubyObject self) {
914
+ return hpricot_ele_get_encoding(self);
915
+ }
916
+
917
+ @JRubyMethod(name = "encoding=")
918
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
919
+ return hpricot_ele_set_encoding(self, value);
920
+ }
921
+
922
+ @JRubyMethod
923
+ public static IRubyObject standalone(IRubyObject self) {
924
+ return hpricot_ele_get_standalone(self);
925
+ }
926
+
927
+ @JRubyMethod(name = "standalone=")
928
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
929
+ return hpricot_ele_set_standalone(self, value);
930
+ }
931
+
932
+ @JRubyMethod
933
+ public static IRubyObject version(IRubyObject self) {
934
+ return hpricot_ele_get_version(self);
935
+ }
936
+
937
+ @JRubyMethod(name = "version=")
938
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
939
+ return hpricot_ele_set_version(self, value);
940
+ }
941
+ }
942
+
943
+ public static class ProcIns {
944
+ @JRubyMethod
945
+ public static IRubyObject target(IRubyObject self) {
946
+ return hpricot_ele_get_name(self);
947
+ }
948
+
949
+ @JRubyMethod(name = "target=")
950
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
951
+ return hpricot_ele_set_name(self, value);
952
+ }
953
+
954
+ @JRubyMethod
955
+ public static IRubyObject content(IRubyObject self) {
956
+ return hpricot_ele_get_attr(self);
957
+ }
958
+
959
+ @JRubyMethod(name = "content=")
960
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
961
+ return hpricot_ele_set_attr(self, value);
962
+ }
963
+ }
964
+
965
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please file a bug report with the HTML you're parsing at http://github.com/hpricot/hpricot/issues. So sorry!";
966
+
967
+ public final static int H_ELE_TAG = 0;
968
+ public final static int H_ELE_PARENT = 1;
969
+ public final static int H_ELE_ATTR = 2;
970
+ public final static int H_ELE_ETAG = 3;
971
+ public final static int H_ELE_RAW = 4;
972
+ public final static int H_ELE_EC = 5;
973
+ public final static int H_ELE_HASH = 6;
974
+ public final static int H_ELE_CHILDREN = 7;
975
+
976
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
977
+ return ((IRubyObject[])recv.dataGetStruct())[n];
978
+ }
979
+
980
+ public static RubyHash H_ELE_GET_asHash(IRubyObject recv, int n) {
981
+ IRubyObject obj = ((IRubyObject[])recv.dataGetStruct())[n];
982
+ if(obj.isNil()) {
983
+ obj = RubyHash.newHash(recv.getRuntime());
984
+ ((IRubyObject[])recv.dataGetStruct())[n] = obj;
985
+ }
986
+ return (RubyHash)obj;
987
+ }
988
+
989
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
990
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
991
+ return value;
992
+ }
993
+
994
+ private static class RefCallback implements Callback {
995
+ private final int n;
996
+ public RefCallback(int n) { this.n = n; }
997
+
998
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
999
+ return H_ELE_GET(recv, n);
1000
+ }
1001
+
1002
+ public Arity getArity() {
1003
+ return Arity.NO_ARGUMENTS;
1004
+ }
1005
+ }
1006
+
1007
+ private static class SetCallback implements Callback {
1008
+ private final int n;
1009
+ public SetCallback(int n) { this.n = n; }
1010
+
1011
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1012
+ return H_ELE_SET(recv, n, args[0]);
1013
+ }
1014
+
1015
+ public Arity getArity() {
1016
+ return Arity.ONE_ARGUMENT;
1017
+ }
1018
+ }
1019
+
1020
+ private final static Callback[] ref_func = new Callback[]{
1021
+ new RefCallback(0),
1022
+ new RefCallback(1),
1023
+ new RefCallback(2),
1024
+ new RefCallback(3),
1025
+ new RefCallback(4),
1026
+ new RefCallback(5),
1027
+ new RefCallback(6),
1028
+ new RefCallback(7),
1029
+ new RefCallback(8),
1030
+ new RefCallback(9)};
1031
+
1032
+ private final static Callback[] set_func = new Callback[]{
1033
+ new SetCallback(0),
1034
+ new SetCallback(1),
1035
+ new SetCallback(2),
1036
+ new SetCallback(3),
1037
+ new SetCallback(4),
1038
+ new SetCallback(5),
1039
+ new SetCallback(6),
1040
+ new SetCallback(7),
1041
+ new SetCallback(8),
1042
+ new SetCallback(9)};
1043
+
1044
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
1045
+ // alloc_hpricot_struct
1046
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
1047
+ RubyClass kurrent = klass;
1048
+ Object sz = kurrent.fastGetInternalVariable("__size__");
1049
+ while(sz == null && kurrent != null) {
1050
+ kurrent = kurrent.getSuperClass();
1051
+ sz = kurrent.fastGetInternalVariable("__size__");
1052
+ }
1053
+ int size = RubyNumeric.fix2int((RubyObject)sz);
1054
+ RubyObject obj = new RubyObject(runtime, klass);
1055
+ IRubyObject[] all = new IRubyObject[size];
1056
+ java.util.Arrays.fill(all, runtime.getNil());
1057
+ obj.dataWrapStruct(all);
1058
+ return obj;
1059
+ }
1060
+ };
1061
+
1062
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
1063
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
1064
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
1065
+ klass.setAllocator(alloc_hpricot_struct);
1066
+
1067
+ for(int i = 0; i < members.length; i++) {
1068
+ String id = members[i].toString();
1069
+ klass.defineMethod(id, ref_func[i]);
1070
+ klass.defineMethod(id + "=", set_func[i]);
1071
+ }
1072
+
1073
+ return klass;
1074
+ }
1075
+
1076
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1077
+ Init_hpricot_scan(runtime);
1078
+ return true;
1079
+ }
1080
+
1081
+ public static class Extra {
1082
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
1083
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
1084
+ sym_allowed, sym_children, sym_comment,
1085
+ sym_cdata, sym_name, sym_parent,
1086
+ sym_raw_attributes, sym_raw_string, sym_tagno,
1087
+ sym_text, sym_EMPTY, sym_CDATA;
1088
+
1089
+ public RubyModule mHpricot;
1090
+ public RubyClass structElem;
1091
+ public RubyClass structAttr;
1092
+ public RubyClass structBasic;
1093
+ public RubyClass cDoc;
1094
+ public RubyClass cCData;
1095
+ public RubyClass cComment;
1096
+ public RubyClass cDocType;
1097
+ public RubyClass cElem;
1098
+ public RubyClass cBogusETag;
1099
+ public RubyClass cText;
1100
+ public RubyClass cXMLDecl;
1101
+ public RubyClass cProcIns;
1102
+ public RubyClass rb_eHpricotParseError;
1103
+ public IRubyObject reProcInsParse;
1104
+
1105
+ public Extra(Ruby runtime) {
1106
+ symAllow = runtime.newSymbol("allow");
1107
+ symDeny = runtime.newSymbol("deny");
1108
+ sym_xmldecl = runtime.newSymbol("xmldecl");
1109
+ sym_doctype = runtime.newSymbol("doctype");
1110
+ sym_procins = runtime.newSymbol("procins");
1111
+ sym_stag = runtime.newSymbol("stag");
1112
+ sym_etag = runtime.newSymbol("etag");
1113
+ sym_emptytag = runtime.newSymbol("emptytag");
1114
+ sym_allowed = runtime.newSymbol("allowed");
1115
+ sym_children = runtime.newSymbol("children");
1116
+ sym_comment = runtime.newSymbol("comment");
1117
+ sym_cdata = runtime.newSymbol("cdata");
1118
+ sym_name = runtime.newSymbol("name");
1119
+ sym_parent = runtime.newSymbol("parent");
1120
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
1121
+ sym_raw_string = runtime.newSymbol("raw_string");
1122
+ sym_tagno = runtime.newSymbol("tagno");
1123
+ sym_text = runtime.newSymbol("text");
1124
+ sym_EMPTY = runtime.newSymbol("EMPTY");
1125
+ sym_CDATA = runtime.newSymbol("CDATA");
1126
+ }
1127
+ }
1128
+
1129
+ public static void Init_hpricot_scan(Ruby runtime) {
1130
+ Extra x = new Extra(runtime);
1131
+
1132
+ x.mHpricot = runtime.defineModule("Hpricot");
1133
+ x.mHpricot.dataWrapStruct(x);
1134
+
1135
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1136
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
1137
+
1138
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1139
+
1140
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
1141
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
1142
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
1143
+
1144
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
1145
+
1146
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
1147
+ x.cCData.defineAnnotatedMethods(CData.class);
1148
+
1149
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
1150
+ x.cComment.defineAnnotatedMethods(Comment.class);
1151
+
1152
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
1153
+ x.cDocType.defineAnnotatedMethods(DocType.class);
1154
+
1155
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
1156
+ x.cElem.defineAnnotatedMethods(Elem.class);
1157
+
1158
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
1159
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
1160
+
1161
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
1162
+ x.cText.defineAnnotatedMethods(Text.class);
1163
+
1164
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
1165
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
1166
+
1167
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
1168
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
1169
+
1170
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
1171
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
1172
+ }
1173
+ }