hpricot 0.8.1-x86-mswin32 → 0.8.2-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,152 +1,519 @@
1
- // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1
+ // line 1 "hpricot_scan.java.rl"
2
2
 
3
3
  import java.io.IOException;
4
4
 
5
5
  import org.jruby.Ruby;
6
+ import org.jruby.RubyArray;
6
7
  import org.jruby.RubyClass;
7
8
  import org.jruby.RubyHash;
8
9
  import org.jruby.RubyModule;
9
10
  import org.jruby.RubyNumeric;
11
+ import org.jruby.RubyObject;
10
12
  import org.jruby.RubyObjectAdapter;
13
+ import org.jruby.RubyRegexp;
11
14
  import org.jruby.RubyString;
15
+ import org.jruby.anno.JRubyMethod;
16
+ import org.jruby.exceptions.RaiseException;
12
17
  import org.jruby.javasupport.JavaEmbedUtils;
18
+ import org.jruby.runtime.Arity;
13
19
  import org.jruby.runtime.Block;
14
- import org.jruby.runtime.CallbackFactory;
20
+ import org.jruby.runtime.ObjectAllocator;
21
+ import org.jruby.runtime.ThreadContext;
15
22
  import org.jruby.runtime.builtin.IRubyObject;
23
+ import org.jruby.runtime.callback.Callback;
16
24
  import org.jruby.exceptions.RaiseException;
17
25
  import org.jruby.runtime.load.BasicLibraryService;
26
+ import org.jruby.util.ByteList;
18
27
 
19
28
  public class HpricotScanService implements BasicLibraryService {
20
- public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
21
- private static RubyObjectAdapter rubyApi;
22
-
23
- public void ELE(IRubyObject N) {
24
- if (te > ts || text) {
25
- IRubyObject raw_string = runtime.getNil();
26
- ele_open = false; text = false;
27
- if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
28
- raw_string = runtime.newString(new String(buf,ts,te-ts));
29
- }
30
- rb_yield_tokens(N, tag[0], attr, raw_string, taint);
31
- }
32
- }
33
-
34
- public void SET(IRubyObject[] N, int E) {
35
- int mark = 0;
36
- if(N == tag) {
37
- if(mark_tag == -1 || E == mark_tag) {
38
- tag[0] = runtime.newString("");
39
- } else if(E > mark_tag) {
40
- tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
41
- }
42
- } else if(N == akey) {
43
- if(mark_akey == -1 || E == mark_akey) {
44
- akey[0] = runtime.newString("");
45
- } else if(E > mark_akey) {
46
- akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
47
- }
48
- } else if(N == aval) {
49
- if(mark_aval == -1 || E == mark_aval) {
50
- aval[0] = runtime.newString("");
51
- } else if(E > mark_aval) {
52
- aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
53
- }
54
- }
55
- }
56
-
57
- public void CAT(IRubyObject[] N, int E) {
58
- if(N[0].isNil()) {
59
- SET(N,E);
60
- } else {
61
- int mark = 0;
62
- if(N == tag) {
63
- mark = mark_tag;
64
- } else if(N == akey) {
65
- mark = mark_akey;
66
- } else if(N == aval) {
67
- mark = mark_aval;
68
- }
69
- ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
70
- }
71
- }
72
-
73
- public void SLIDE(Object N) {
74
- int mark = 0;
75
- if(N == tag) {
76
- mark = mark_tag;
77
- } else if(N == akey) {
78
- mark = mark_akey;
79
- } else if(N == aval) {
80
- mark = mark_aval;
81
- }
82
- if(mark > ts) {
83
- if(N == tag) {
84
- mark_tag -= ts;
85
- } else if(N == akey) {
86
- mark_akey -= ts;
87
- } else if(N == aval) {
88
- mark_aval -= ts;
89
- }
90
- }
91
- }
92
-
93
- public void ATTR(IRubyObject K, IRubyObject V) {
94
- if(!K.isNil()) {
95
- if(attr.isNil()) {
96
- attr = RubyHash.newHash(runtime);
97
- }
98
- ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V);
99
- // ((RubyHash)attr).aset(K,V);
100
- }
101
- }
102
-
103
- public void ATTR(IRubyObject[] K, IRubyObject V) {
104
- ATTR(K[0],V);
105
- }
106
-
107
- public void ATTR(IRubyObject K, IRubyObject[] V) {
108
- ATTR(K,V[0]);
109
- }
110
-
111
- public void ATTR(IRubyObject[] K, IRubyObject[] V) {
112
- ATTR(K[0],V[0]);
113
- }
114
-
115
- public void TEXT_PASS() {
116
- if(!text) {
117
- if(ele_open) {
118
- ele_open = false;
119
- if(ts > -1) {
120
- mark_tag = ts;
121
- }
122
- } else {
123
- mark_tag = p;
124
- }
125
- attr = runtime.getNil();
126
- tag[0] = runtime.getNil();
127
- text = true;
128
- }
129
- }
130
-
131
- public void EBLK(IRubyObject N, int T) {
132
- CAT(tag, p - T + 1);
133
- ELE(N);
134
- }
135
-
136
-
137
- public void rb_raise(RubyClass error, String message) {
138
- throw new RaiseException(runtime, error, message, true);
139
- }
140
-
141
- public IRubyObject rb_str_new2(String s) {
142
- return runtime.newString(s);
143
- }
144
-
145
- // line 189 "ext/hpricot_scan/hpricot_scan.java.rl"
146
-
147
-
148
-
149
- // line 150 "ext/hpricot_scan/HpricotScanService.java"
29
+ public static byte[] realloc(byte[] input, int size) {
30
+ byte[] newArray = new byte[size];
31
+ System.arraycopy(input, 0, newArray, 0, input.length);
32
+ return newArray;
33
+ }
34
+
35
+ // hpricot_state
36
+ public static class State {
37
+ public IRubyObject doc;
38
+ public IRubyObject focus;
39
+ public IRubyObject last;
40
+ public IRubyObject EC;
41
+ public boolean xml, strict, fixup;
42
+ }
43
+
44
+ static boolean OPT(IRubyObject opts, String key) {
45
+ Ruby runtime = opts.getRuntime();
46
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
47
+ }
48
+
49
+ // H_PROP(name, H_ELE_TAG)
50
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
51
+ H_ELE_SET(self, H_ELE_TAG, x);
52
+ return self;
53
+ }
54
+
55
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
56
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
57
+ return self.getRuntime().getTrue();
58
+ }
59
+
60
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
61
+ return H_ELE_GET(self, H_ELE_TAG);
62
+ }
63
+
64
+ // H_PROP(raw, H_ELE_RAW)
65
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
66
+ H_ELE_SET(self, H_ELE_RAW, x);
67
+ return self;
68
+ }
69
+
70
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
71
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
72
+ return self.getRuntime().getTrue();
73
+ }
74
+
75
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
76
+ return H_ELE_GET(self, H_ELE_RAW);
77
+ }
78
+
79
+ // H_PROP(parent, H_ELE_PARENT)
80
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
81
+ H_ELE_SET(self, H_ELE_PARENT, x);
82
+ return self;
83
+ }
84
+
85
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
86
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
87
+ return self.getRuntime().getTrue();
88
+ }
89
+
90
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
91
+ return H_ELE_GET(self, H_ELE_PARENT);
92
+ }
93
+
94
+ // H_PROP(attr, H_ELE_ATTR)
95
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
96
+ H_ELE_SET(self, H_ELE_ATTR, x);
97
+ return self;
98
+ }
99
+
100
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
101
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
102
+ return self.getRuntime().getTrue();
103
+ }
104
+
105
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
106
+ return H_ELE_GET(self, H_ELE_ATTR);
107
+ }
108
+
109
+ // H_PROP(etag, H_ELE_ETAG)
110
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
111
+ H_ELE_SET(self, H_ELE_ETAG, x);
112
+ return self;
113
+ }
114
+
115
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
116
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
117
+ return self.getRuntime().getTrue();
118
+ }
119
+
120
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
121
+ return H_ELE_GET(self, H_ELE_ETAG);
122
+ }
123
+
124
+ // H_PROP(children, H_ELE_CHILDREN)
125
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
126
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
127
+ return self;
128
+ }
129
+
130
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
131
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
132
+ return self.getRuntime().getTrue();
133
+ }
134
+
135
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
136
+ return H_ELE_GET(self, H_ELE_CHILDREN);
137
+ }
138
+
139
+ // H_ATTR(target)
140
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
141
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
142
+ return self;
143
+ }
144
+
145
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
146
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
147
+ }
148
+
149
+ // H_ATTR(encoding)
150
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
151
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
152
+ return self;
153
+ }
154
+
155
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
156
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
157
+ }
158
+
159
+ // H_ATTR(version)
160
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
161
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
162
+ return self;
163
+ }
164
+
165
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
166
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
167
+ }
168
+
169
+ // H_ATTR(standalone)
170
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
171
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
172
+ return self;
173
+ }
174
+
175
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
176
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
177
+ }
178
+
179
+ // H_ATTR(system_id)
180
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
181
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
182
+ return self;
183
+ }
184
+
185
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
186
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
187
+ }
188
+
189
+ // H_ATTR(public_id)
190
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
191
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
192
+ return self;
193
+ }
194
+
195
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
196
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
197
+ }
198
+
199
+ public static class Scanner {
200
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
201
+ if(mark == -1 || E == mark) {
202
+ return runtime.newString("");
203
+ } else if(E > mark) {
204
+ return RubyString.newString(runtime, data, mark, E-mark);
205
+ } else {
206
+ return org;
207
+ }
208
+ }
209
+
210
+ public int SLIDE(int N) {
211
+ if(N > ts) {
212
+ return N - ts;
213
+ } else {
214
+ return N;
215
+ }
216
+ }
217
+
218
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
219
+ if(N.isNil()) {
220
+ return SET(mark, E, N);
221
+ } else {
222
+ ((RubyString)N).cat(data, mark, E-mark);
223
+ return N;
224
+ }
225
+ }
226
+
227
+ public void ATTR(IRubyObject K, IRubyObject V) {
228
+ if(!K.isNil()) {
229
+ if(attr.isNil()) {
230
+ attr = RubyHash.newHash(runtime);
231
+ }
232
+ ((RubyHash)attr).fastASet(K, V);
233
+ }
234
+ }
235
+
236
+ public void TEXT_PASS() {
237
+ if(!text) {
238
+ if(ele_open) {
239
+ ele_open = false;
240
+ if(ts != -1) {
241
+ mark_tag = ts;
242
+ }
243
+ } else {
244
+ mark_tag = p;
245
+ }
246
+ attr = runtime.getNil();
247
+ tag = runtime.getNil();
248
+ text = true;
249
+ }
250
+ }
251
+
252
+ public void ELE(IRubyObject N) {
253
+ if(te > ts || text) {
254
+ int raw = -1;
255
+ int rawlen = 0;
256
+ ele_open = false;
257
+ text = false;
258
+
259
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
260
+ raw = ts;
261
+ rawlen = te - ts;
262
+ }
263
+
264
+ if(block.isGiven()) {
265
+ IRubyObject raw_string = runtime.getNil();
266
+ if(raw != -1) {
267
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
268
+ }
269
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
270
+ } else {
271
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
272
+ }
273
+ }
274
+ }
275
+
276
+
277
+ public void EBLK(IRubyObject N, int T) {
278
+ tag = CAT(tag, mark_tag, p - T + 1);
279
+ ELE(N);
280
+ }
281
+
282
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
283
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
284
+ if(children.isNil()) {
285
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
286
+ }
287
+ ((RubyArray)children).append(ele);
288
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
289
+ }
290
+
291
+ private static class TokenInfo {
292
+ public IRubyObject sym;
293
+ public IRubyObject tag;
294
+ public IRubyObject attr;
295
+ public int raw;
296
+ public int rawlen;
297
+ public IRubyObject ec;
298
+ public IRubyObject ele;
299
+ public Extra x;
300
+ public Ruby runtime;
301
+ public Scanner scanner;
302
+ public State S;
303
+
304
+ public void H_ELE(RubyClass klass) {
305
+ ele = klass.allocate();
306
+ if(klass == x.cElem) {
307
+ H_ELE_SET(ele, H_ELE_TAG, tag);
308
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
309
+ H_ELE_SET(ele, H_ELE_EC, ec);
310
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
311
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
312
+ }
313
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
314
+ if(klass == x.cBogusETag) {
315
+ H_ELE_SET(ele, H_ELE_TAG, tag);
316
+ if(raw != -1) {
317
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
318
+ }
319
+ } else {
320
+ if(klass == x.cDocType) {
321
+ scanner.ATTR(runtime.newSymbol("target"), tag);
322
+ }
323
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
324
+ if(klass != x.cProcIns) {
325
+ tag = runtime.getNil();
326
+ if(raw != -1) {
327
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
328
+ }
329
+ }
330
+ H_ELE_SET(ele, H_ELE_TAG, tag);
331
+ }
332
+ } else {
333
+ H_ELE_SET(ele, H_ELE_TAG, tag);
334
+ }
335
+ S.last = ele;
336
+ }
337
+
338
+ public void hpricotToken(boolean taint) {
339
+ //
340
+ // in html mode, fix up start tags incorrectly formed as empty tags
341
+ //
342
+ if(!S.xml) {
343
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
344
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
345
+ if(ec.isNil()) {
346
+ tag = tag.callMethod(scanner.ctx, "downcase");
347
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
348
+ }
349
+ }
350
+
351
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
352
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
353
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
354
+ sym = x.sym_text;
355
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
356
+ }
357
+
358
+ if(!ec.isNil()) {
359
+ if(sym == x.sym_emptytag) {
360
+ if(ec != x.sym_EMPTY) {
361
+ sym = x.sym_stag;
362
+ }
363
+ } else if(sym == x.sym_stag) {
364
+ if(ec == x.sym_EMPTY) {
365
+ sym = x.sym_emptytag;
366
+ }
367
+ }
368
+ }
369
+ }
370
+
371
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
372
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
373
+ H_ELE(x.cElem);
374
+ H_ELE_SET(ele, H_ELE_HASH, name);
375
+
376
+ if(!S.xml) {
377
+ IRubyObject match = runtime.getNil(), e = S.focus;
378
+ while(e != S.doc) {
379
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
380
+ if(hEC instanceof RubyHash) {
381
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
382
+ if(!has.isNil()) {
383
+ if(has == runtime.getTrue()) {
384
+ if(match.isNil()) {
385
+ match = e;
386
+ }
387
+ } else if(has == x.symAllow) {
388
+ match = S.focus;
389
+ } else if(has == x.symDeny) {
390
+ match = runtime.getNil();
391
+ }
392
+ }
393
+ }
394
+ e = H_ELE_GET(e, H_ELE_PARENT);
395
+ }
396
+
397
+ if(match.isNil()) {
398
+ match = S.focus;
399
+ }
400
+ S.focus = match;
401
+ }
402
+
403
+ scanner.hpricotAdd(S.focus, ele);
404
+
405
+ //
406
+ // in the case of a start tag that should be empty, just
407
+ // skip the step that focuses the element. focusing moves
408
+ // us deeper into the document.
409
+ //
410
+ if(sym == x.sym_stag) {
411
+ if(S.xml || ec != x.sym_EMPTY) {
412
+ S.focus = ele;
413
+ S.last = runtime.getNil();
414
+ }
415
+ }
416
+ } else if(sym == x.sym_etag) {
417
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
418
+ if(S.strict) {
419
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
420
+ tag = runtime.newString("div");
421
+ }
422
+ }
423
+
424
+ name = runtime.newFixnum(tag.hashCode());
425
+ while(e != S.doc) {
426
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
427
+ match = e;
428
+ break;
429
+ }
430
+ e = H_ELE_GET(e, H_ELE_PARENT);
431
+
432
+ }
433
+ if(match.isNil()) {
434
+ H_ELE(x.cBogusETag);
435
+ scanner.hpricotAdd(S.focus, ele);
436
+ } else {
437
+ ele = runtime.getNil();
438
+ if(raw != -1) {
439
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
440
+ }
441
+ H_ELE_SET(match, H_ELE_ETAG, ele);
442
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
443
+ S.last = runtime.getNil();
444
+
445
+ }
446
+ } else if(sym == x.sym_cdata) {
447
+ H_ELE(x.cCData);
448
+ scanner.hpricotAdd(S.focus, ele);
449
+ } else if(sym == x.sym_comment) {
450
+ H_ELE(x.cComment);
451
+ scanner.hpricotAdd(S.focus, ele);
452
+ } else if(sym == x.sym_doctype) {
453
+ H_ELE(x.cDocType);
454
+ if(S.strict) {
455
+ RubyHash h = (RubyHash)attr;
456
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
457
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
458
+ }
459
+ scanner.hpricotAdd(S.focus, ele);
460
+ } else if(sym == x.sym_procins) {
461
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
462
+ tag = RubyRegexp.nth_match(1, match);
463
+ attr = RubyRegexp.nth_match(2, match);
464
+ H_ELE(x.cProcIns);
465
+ scanner.hpricotAdd(S.focus, ele);
466
+ } else if(sym == x.sym_text) {
467
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
468
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
469
+ } else {
470
+ H_ELE(x.cText);
471
+ scanner.hpricotAdd(S.focus, ele);
472
+ }
473
+ } else if(sym == x.sym_xmldecl) {
474
+ H_ELE(x.cXMLDecl);
475
+ scanner.hpricotAdd(S.focus, ele);
476
+ }
477
+ }
478
+ }
479
+
480
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
481
+ TokenInfo t = new TokenInfo();
482
+ t.sym = _sym;
483
+ t.tag = _tag;
484
+ t.attr = _attr;
485
+ t.raw = _raw;
486
+ t.rawlen = _rawlen;
487
+ t.ec = runtime.getNil();
488
+ t.ele = runtime.getNil();
489
+ t.x = x;
490
+ t.runtime = runtime;
491
+ t.scanner = this;
492
+ t.S = S;
493
+
494
+ t.hpricotToken(taint);
495
+ }
496
+
497
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
498
+ if(sym == x.sym_text) {
499
+ raw = tag;
500
+ }
501
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
502
+ if(taint) {
503
+ ary.setTaint(true);
504
+ tag.setTaint(true);
505
+ attr.setTaint(true);
506
+ raw.setTaint(true);
507
+ }
508
+
509
+ block.yield(ctx, ary);
510
+ }
511
+
512
+ // line 561 "hpricot_scan.java.rl"
513
+
514
+
515
+
516
+ // line 517 "HpricotScanService.java"
150
517
  private static byte[] init__hpricot_scan_actions_0()
151
518
  {
152
519
  return new byte [] {
@@ -568,7 +935,7 @@ private static short[] init__hpricot_scan_indicies_0()
568
935
  private static final short _hpricot_scan_indicies[] = init__hpricot_scan_indicies_0();
569
936
 
570
937
 
571
- private static short[] init__hpricot_scan_trans_targs_0()
938
+ private static short[] init__hpricot_scan_trans_targs_wi_0()
572
939
  {
573
940
  return new short [] {
574
941
  204, 1, 2, 53, 204, 3, 4, 5, 6, 7, 8, 9,
@@ -611,10 +978,10 @@ private static short[] init__hpricot_scan_trans_targs_0()
611
978
  };
612
979
  }
613
980
 
614
- private static final short _hpricot_scan_trans_targs[] = init__hpricot_scan_trans_targs_0();
981
+ private static final short _hpricot_scan_trans_targs_wi[] = init__hpricot_scan_trans_targs_wi_0();
615
982
 
616
983
 
617
- private static short[] init__hpricot_scan_trans_actions_0()
984
+ private static short[] init__hpricot_scan_trans_actions_wi_0()
618
985
  {
619
986
  return new short [] {
620
987
  73, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, 0,
@@ -657,7 +1024,7 @@ private static short[] init__hpricot_scan_trans_actions_0()
657
1024
  };
658
1025
  }
659
1026
 
660
- private static final short _hpricot_scan_trans_actions[] = init__hpricot_scan_trans_actions_0();
1027
+ private static final short _hpricot_scan_trans_actions_wi[] = init__hpricot_scan_trans_actions_wi_0();
661
1028
 
662
1029
 
663
1030
  private static short[] init__hpricot_scan_to_state_actions_0()
@@ -752,121 +1119,166 @@ static final int hpricot_scan_en_html_cdata = 216;
752
1119
  static final int hpricot_scan_en_html_procins = 218;
753
1120
  static final int hpricot_scan_en_main = 204;
754
1121
 
755
- // line 192 "ext/hpricot_scan/hpricot_scan.java.rl"
1122
+ // line 564 "hpricot_scan.java.rl"
756
1123
 
757
- public final static int BUFSIZE=16384;
1124
+ public final static int BUFSIZE = 16384;
758
1125
 
759
- private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
760
- IRubyObject ary;
761
- if (sym == runtime.newSymbol("text")) {
762
- raw = tag;
763
- }
764
- ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
765
- if (taint) {
766
- ary.setTaint(true);
767
- tag.setTaint(true);
768
- attr.setTaint(true);
769
- raw.setTaint(true);
770
- }
771
- block.yield(runtime.getCurrentContext(), ary, null, null, false);
772
- }
773
1126
 
1127
+ private int cs, act, have = 0, nread = 0, curline = 1;
1128
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
1129
+ private byte[] data;
1130
+ private State S = null;
1131
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
1132
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
1133
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
1134
+ private int buffer_size = 0;
774
1135
 
775
- int cs, act, have = 0, nread = 0, curline = 1, p=-1;
776
- boolean text = false;
777
- int ts=-1, te;
778
- int eof=-1;
779
- char[] buf;
780
- Ruby runtime;
781
- IRubyObject attr, bufsize;
782
- IRubyObject[] tag, akey, aval;
783
- int mark_tag, mark_akey, mark_aval;
784
- boolean done = false, ele_open = false;
785
- int buffer_size = 0;
786
- boolean taint = false;
787
- Block block = null;
788
-
789
-
790
- IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
791
- cdata, sym_text;
792
-
793
- IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
794
- attr = bufsize = runtime.getNil();
795
- tag = new IRubyObject[]{runtime.getNil()};
796
- akey = new IRubyObject[]{runtime.getNil()};
797
- aval = new IRubyObject[]{runtime.getNil()};
798
-
799
- RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
800
-
801
- taint = port.isTaint();
802
- if ( !port.respondsTo("read")) {
803
- if ( port.respondsTo("to_str")) {
804
- port = port.callMethod(runtime.getCurrentContext(),"to_str");
805
- } else {
806
- throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
807
- }
808
- }
1136
+ private Extra x;
809
1137
 
810
- buffer_size = BUFSIZE;
811
- if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
812
- bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
813
- if (!bufsize.isNil()) {
814
- buffer_size = RubyNumeric.fix2int(bufsize);
815
- }
816
- }
817
- buf = new char[buffer_size];
1138
+ private IRubyObject self;
1139
+ private Ruby runtime;
1140
+ private ThreadContext ctx;
1141
+ private Block block;
1142
+
1143
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
1144
+
1145
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
1146
+ return new RaiseException(runtime, exceptionClass, message, true);
1147
+ }
818
1148
 
819
-
820
- // line 821 "ext/hpricot_scan/HpricotScanService.java"
1149
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
1150
+ this.self = self;
1151
+ this.runtime = self.getRuntime();
1152
+ this.ctx = runtime.getCurrentContext();
1153
+ this.block = block;
1154
+ attr = runtime.getNil();
1155
+ tag = runtime.getNil();
1156
+ akey = runtime.getNil();
1157
+ aval = runtime.getNil();
1158
+ bufsize = runtime.getNil();
1159
+
1160
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
1161
+
1162
+ this.xmldecl = x.sym_xmldecl;
1163
+ this.doctype = x.sym_doctype;
1164
+ this.stag = x.sym_stag;
1165
+ this.etag = x.sym_etag;
1166
+ this.emptytag = x.sym_emptytag;
1167
+ this.comment = x.sym_comment;
1168
+ this.cdata = x.sym_cdata;
1169
+ this.procins = x.sym_procins;
1170
+
1171
+ port = args[0];
1172
+ if(args.length == 2) {
1173
+ opts = args[1];
1174
+ } else {
1175
+ opts = runtime.getNil();
1176
+ }
1177
+
1178
+ taint = port.isTaint();
1179
+ io = port.respondsTo("read");
1180
+ if(!io) {
1181
+ if(port.respondsTo("to_str")) {
1182
+ port = port.callMethod(ctx, "to_str");
1183
+ port = port.convertToString();
1184
+ } else {
1185
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
1186
+ }
1187
+ }
1188
+
1189
+ if(!(opts instanceof RubyHash)) {
1190
+ opts = runtime.getNil();
1191
+ }
1192
+
1193
+ if(!block.isGiven()) {
1194
+ S = new State();
1195
+ S.doc = x.cDoc.allocate();
1196
+ S.focus = S.doc;
1197
+ S.last = runtime.getNil();
1198
+ S.xml = OPT(opts, "xml");
1199
+ S.strict = OPT(opts, "xhtml_strict");
1200
+ S.fixup = OPT(opts, "fixup_tags");
1201
+ if(S.strict) {
1202
+ S.fixup = true;
1203
+ }
1204
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
1205
+ S.EC = x.mHpricot.getConstant("ElementContent");
1206
+ }
1207
+
1208
+ buffer_size = BUFSIZE;
1209
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
1210
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
1211
+ if(!bufsize.isNil()) {
1212
+ buffer_size = RubyNumeric.fix2int(bufsize);
1213
+ }
1214
+ }
1215
+
1216
+ if(io) {
1217
+ buf = 0;
1218
+ data = new byte[buffer_size];
1219
+ }
1220
+ }
1221
+
1222
+ private int len, space;
1223
+ // hpricot_scan
1224
+ public IRubyObject scan() {
1225
+
1226
+ // line 1227 "HpricotScanService.java"
821
1227
  {
822
1228
  cs = hpricot_scan_start;
823
1229
  ts = -1;
824
1230
  te = -1;
825
1231
  act = 0;
826
1232
  }
827
- // line 256 "ext/hpricot_scan/hpricot_scan.java.rl"
828
-
829
- while( !done ) {
830
- IRubyObject str;
831
- p = have;
832
- int pe;
833
- int len, space = buffer_size - have;
834
-
835
- if ( space == 0 ) {
836
- /* We've used up the entire buffer storing an already-parsed token
837
- * prefix that must be preserved. Likely caused by super-long attributes.
838
- * See ticket #13. */
839
- buffer_size += BUFSIZE;
840
- char[] new_buf = new char[buffer_size];
841
- System.arraycopy(buf, 0, new_buf, 0, buf.length);
842
- buf = new_buf;
843
- space = buffer_size - have;
844
- }
845
-
846
- if (port.respondsTo("read")) {
847
- str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
848
- } else {
849
- str = ((RubyString)port).substr(nread,space);
850
- }
851
-
852
- str = str.convertToString();
853
- String sss = str.toString();
854
- char[] chars = sss.toCharArray();
855
- System.arraycopy(chars,0,buf,p,chars.length);
856
-
857
- len = sss.length();
858
- nread += len;
859
-
860
- if ( len < space ) {
861
- len++;
862
- done = true;
863
- }
864
-
865
- pe = p + len;
866
- char[] data = buf;
867
-
868
-
869
- // line 870 "ext/hpricot_scan/HpricotScanService.java"
1233
+ // line 667 "hpricot_scan.java.rl"
1234
+ while(!done) {
1235
+ p = pe = len = buf;
1236
+ space = buffer_size - have;
1237
+
1238
+ if(io) {
1239
+ if(space == 0) {
1240
+ /* We've used up the entire buffer storing an already-parsed token
1241
+ * prefix that must be preserved. Likely caused by super-long attributes.
1242
+ * Increase buffer size and continue */
1243
+ buffer_size += BUFSIZE;
1244
+ data = realloc(data, buffer_size);
1245
+ space = buffer_size - have;
1246
+ }
1247
+
1248
+ p = have;
1249
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
1250
+ ByteList bl = str.convertToString().getByteList();
1251
+ len = bl.realSize;
1252
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
1253
+ } else {
1254
+ ByteList bl = port.convertToString().getByteList();
1255
+ data = bl.bytes;
1256
+ buf = bl.begin;
1257
+ p = bl.begin;
1258
+ len = bl.realSize + 1;
1259
+ if(p + len >= data.length) {
1260
+ data = new byte[len];
1261
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
1262
+ p = 0;
1263
+ buf = 0;
1264
+ }
1265
+ done = true;
1266
+ eof = p + len;
1267
+ }
1268
+
1269
+ nread += len;
1270
+
1271
+ /* If this is the last buffer, tack on an EOF. */
1272
+ if(io && len < space) {
1273
+ data[p + len++] = 0;
1274
+ eof = p + len;
1275
+ done = true;
1276
+ }
1277
+
1278
+ pe = p + len;
1279
+
1280
+
1281
+ // line 1282 "HpricotScanService.java"
870
1282
  {
871
1283
  int _klen;
872
1284
  int _trans = 0;
@@ -888,10 +1300,10 @@ case 1:
888
1300
  while ( _nacts-- > 0 ) {
889
1301
  switch ( _hpricot_scan_actions[_acts++] ) {
890
1302
  case 21:
891
- // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1303
+ // line 1 "hpricot_scan.java.rl"
892
1304
  {ts = p;}
893
1305
  break;
894
- // line 895 "ext/hpricot_scan/HpricotScanService.java"
1306
+ // line 1307 "HpricotScanService.java"
895
1307
  }
896
1308
  }
897
1309
 
@@ -946,233 +1358,239 @@ case 1:
946
1358
 
947
1359
  _trans = _hpricot_scan_indicies[_trans];
948
1360
  case 3:
949
- cs = _hpricot_scan_trans_targs[_trans];
1361
+ cs = _hpricot_scan_trans_targs_wi[_trans];
950
1362
 
951
- if ( _hpricot_scan_trans_actions[_trans] != 0 ) {
952
- _acts = _hpricot_scan_trans_actions[_trans];
1363
+ if ( _hpricot_scan_trans_actions_wi[_trans] != 0 ) {
1364
+ _acts = _hpricot_scan_trans_actions_wi[_trans];
953
1365
  _nacts = (int) _hpricot_scan_actions[_acts++];
954
1366
  while ( _nacts-- > 0 )
955
1367
  {
956
1368
  switch ( _hpricot_scan_actions[_acts++] )
957
1369
  {
958
1370
  case 0:
959
- // line 147 "ext/hpricot_scan/hpricot_scan.java.rl"
1371
+ // line 514 "hpricot_scan.java.rl"
960
1372
  {
961
- if (text) {
962
- CAT(tag, p);
963
- ELE(sym_text);
964
- text = false;
1373
+ if(text) {
1374
+ tag = CAT(tag, mark_tag, p);
1375
+ ELE(x.sym_text);
1376
+ text = false;
965
1377
  }
966
1378
  attr = runtime.getNil();
967
- tag[0] = runtime.getNil();
1379
+ tag = runtime.getNil();
968
1380
  mark_tag = -1;
969
1381
  ele_open = true;
970
1382
  }
971
1383
  break;
972
1384
  case 1:
973
- // line 159 "ext/hpricot_scan/hpricot_scan.java.rl"
1385
+ // line 526 "hpricot_scan.java.rl"
974
1386
  { mark_tag = p; }
975
1387
  break;
976
1388
  case 2:
977
- // line 160 "ext/hpricot_scan/hpricot_scan.java.rl"
1389
+ // line 527 "hpricot_scan.java.rl"
978
1390
  { mark_aval = p; }
979
1391
  break;
980
1392
  case 3:
981
- // line 161 "ext/hpricot_scan/hpricot_scan.java.rl"
1393
+ // line 528 "hpricot_scan.java.rl"
982
1394
  { mark_akey = p; }
983
1395
  break;
984
1396
  case 4:
985
- // line 162 "ext/hpricot_scan/hpricot_scan.java.rl"
986
- { SET(tag, p); }
1397
+ // line 529 "hpricot_scan.java.rl"
1398
+ { tag = SET(mark_tag, p, tag); }
987
1399
  break;
988
1400
  case 5:
989
- // line 164 "ext/hpricot_scan/hpricot_scan.java.rl"
990
- { SET(aval, p); }
1401
+ // line 531 "hpricot_scan.java.rl"
1402
+ { aval = SET(mark_aval, p, aval); }
991
1403
  break;
992
1404
  case 6:
993
- // line 165 "ext/hpricot_scan/hpricot_scan.java.rl"
994
- {
995
- if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
996
- else { SET(aval, p); }
1405
+ // line 532 "hpricot_scan.java.rl"
1406
+ {
1407
+ if(data[p-1] == '"' || data[p-1] == '\'') {
1408
+ aval = SET(mark_aval, p-1, aval);
1409
+ } else {
1410
+ aval = SET(mark_aval, p, aval);
1411
+ }
997
1412
  }
998
1413
  break;
999
1414
  case 7:
1000
- // line 169 "ext/hpricot_scan/hpricot_scan.java.rl"
1001
- { SET(akey, p); }
1415
+ // line 539 "hpricot_scan.java.rl"
1416
+ { akey = SET(mark_akey, p, akey); }
1002
1417
  break;
1003
1418
  case 8:
1004
- // line 170 "ext/hpricot_scan/hpricot_scan.java.rl"
1005
- { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
1419
+ // line 540 "hpricot_scan.java.rl"
1420
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
1006
1421
  break;
1007
1422
  case 9:
1008
- // line 171 "ext/hpricot_scan/hpricot_scan.java.rl"
1009
- { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
1423
+ // line 541 "hpricot_scan.java.rl"
1424
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
1010
1425
  break;
1011
1426
  case 10:
1012
- // line 172 "ext/hpricot_scan/hpricot_scan.java.rl"
1013
- { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
1427
+ // line 542 "hpricot_scan.java.rl"
1428
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
1014
1429
  break;
1015
1430
  case 11:
1016
- // line 173 "ext/hpricot_scan/hpricot_scan.java.rl"
1017
- { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
1431
+ // line 543 "hpricot_scan.java.rl"
1432
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
1018
1433
  break;
1019
1434
  case 12:
1020
- // line 174 "ext/hpricot_scan/hpricot_scan.java.rl"
1021
- { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
1435
+ // line 544 "hpricot_scan.java.rl"
1436
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
1022
1437
  break;
1023
1438
  case 13:
1024
- // line 176 "ext/hpricot_scan/hpricot_scan.java.rl"
1025
- {
1026
- akey[0] = runtime.getNil();
1027
- aval[0] = runtime.getNil();
1028
- mark_akey = -1;
1029
- mark_aval = -1;
1439
+ // line 546 "hpricot_scan.java.rl"
1440
+ {
1441
+ akey = runtime.getNil();
1442
+ aval = runtime.getNil();
1443
+ mark_akey = -1;
1444
+ mark_aval = -1;
1030
1445
  }
1031
1446
  break;
1032
1447
  case 14:
1033
- // line 183 "ext/hpricot_scan/hpricot_scan.java.rl"
1034
- {
1035
- ATTR(akey, aval);
1448
+ // line 553 "hpricot_scan.java.rl"
1449
+ {
1450
+ if(!S.xml) {
1451
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
1452
+ }
1453
+ ATTR(akey, aval);
1036
1454
  }
1037
1455
  break;
1038
1456
  case 15:
1039
- // line 9 "ext/hpricot_scan/hpricot_scan.java.rl"
1457
+ // line 9 "hpricot_scan.java.rl"
1040
1458
  {curline += 1;}
1041
1459
  break;
1042
1460
  case 16:
1043
- // line 46 "ext/hpricot_scan/hpricot_scan.java.rl"
1461
+ // line 46 "hpricot_scan.java.rl"
1044
1462
  { TEXT_PASS(); }
1045
1463
  break;
1046
1464
  case 17:
1047
- // line 50 "ext/hpricot_scan/hpricot_scan.java.rl"
1465
+ // line 50 "hpricot_scan.java.rl"
1048
1466
  { EBLK(comment, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1049
1467
  break;
1050
1468
  case 18:
1051
- // line 55 "ext/hpricot_scan/hpricot_scan.java.rl"
1469
+ // line 55 "hpricot_scan.java.rl"
1052
1470
  { EBLK(cdata, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1053
1471
  break;
1054
1472
  case 19:
1055
- // line 60 "ext/hpricot_scan/hpricot_scan.java.rl"
1473
+ // line 60 "hpricot_scan.java.rl"
1056
1474
  { EBLK(procins, 2); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1057
1475
  break;
1058
1476
  case 22:
1059
- // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1477
+ // line 1 "hpricot_scan.java.rl"
1060
1478
  {te = p+1;}
1061
1479
  break;
1062
1480
  case 23:
1063
- // line 50 "ext/hpricot_scan/hpricot_scan.java.rl"
1481
+ // line 50 "hpricot_scan.java.rl"
1064
1482
  {te = p+1;}
1065
1483
  break;
1066
1484
  case 24:
1067
- // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1485
+ // line 51 "hpricot_scan.java.rl"
1068
1486
  {te = p+1;{ TEXT_PASS(); }}
1069
1487
  break;
1070
1488
  case 25:
1071
- // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1489
+ // line 51 "hpricot_scan.java.rl"
1072
1490
  {te = p;p--;{ TEXT_PASS(); }}
1073
1491
  break;
1074
1492
  case 26:
1075
- // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1493
+ // line 51 "hpricot_scan.java.rl"
1076
1494
  {{p = ((te))-1;}{ TEXT_PASS(); }}
1077
1495
  break;
1078
1496
  case 27:
1079
- // line 55 "ext/hpricot_scan/hpricot_scan.java.rl"
1497
+ // line 55 "hpricot_scan.java.rl"
1080
1498
  {te = p+1;}
1081
1499
  break;
1082
1500
  case 28:
1083
- // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1501
+ // line 56 "hpricot_scan.java.rl"
1084
1502
  {te = p+1;{ TEXT_PASS(); }}
1085
1503
  break;
1086
1504
  case 29:
1087
- // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1505
+ // line 56 "hpricot_scan.java.rl"
1088
1506
  {te = p;p--;{ TEXT_PASS(); }}
1089
1507
  break;
1090
1508
  case 30:
1091
- // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1509
+ // line 56 "hpricot_scan.java.rl"
1092
1510
  {{p = ((te))-1;}{ TEXT_PASS(); }}
1093
1511
  break;
1094
1512
  case 31:
1095
- // line 60 "ext/hpricot_scan/hpricot_scan.java.rl"
1513
+ // line 60 "hpricot_scan.java.rl"
1096
1514
  {te = p+1;}
1097
1515
  break;
1098
1516
  case 32:
1099
- // line 61 "ext/hpricot_scan/hpricot_scan.java.rl"
1517
+ // line 61 "hpricot_scan.java.rl"
1100
1518
  {te = p+1;{ TEXT_PASS(); }}
1101
1519
  break;
1102
1520
  case 33:
1103
- // line 61 "ext/hpricot_scan/hpricot_scan.java.rl"
1521
+ // line 61 "hpricot_scan.java.rl"
1104
1522
  {te = p;p--;{ TEXT_PASS(); }}
1105
1523
  break;
1106
1524
  case 34:
1107
- // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1525
+ // line 66 "hpricot_scan.java.rl"
1108
1526
  {act = 8;}
1109
1527
  break;
1110
1528
  case 35:
1111
- // line 68 "ext/hpricot_scan/hpricot_scan.java.rl"
1529
+ // line 68 "hpricot_scan.java.rl"
1112
1530
  {act = 10;}
1113
1531
  break;
1114
1532
  case 36:
1115
- // line 70 "ext/hpricot_scan/hpricot_scan.java.rl"
1533
+ // line 70 "hpricot_scan.java.rl"
1116
1534
  {act = 12;}
1117
1535
  break;
1118
1536
  case 37:
1119
- // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1537
+ // line 73 "hpricot_scan.java.rl"
1120
1538
  {act = 15;}
1121
1539
  break;
1122
1540
  case 38:
1123
- // line 65 "ext/hpricot_scan/hpricot_scan.java.rl"
1541
+ // line 65 "hpricot_scan.java.rl"
1124
1542
  {te = p+1;{ ELE(xmldecl); }}
1125
1543
  break;
1126
1544
  case 39:
1127
- // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1545
+ // line 66 "hpricot_scan.java.rl"
1128
1546
  {te = p+1;{ ELE(doctype); }}
1129
1547
  break;
1130
1548
  case 40:
1131
- // line 68 "ext/hpricot_scan/hpricot_scan.java.rl"
1549
+ // line 68 "hpricot_scan.java.rl"
1132
1550
  {te = p+1;{ ELE(stag); }}
1133
1551
  break;
1134
1552
  case 41:
1135
- // line 69 "ext/hpricot_scan/hpricot_scan.java.rl"
1553
+ // line 69 "hpricot_scan.java.rl"
1136
1554
  {te = p+1;{ ELE(etag); }}
1137
1555
  break;
1138
1556
  case 42:
1139
- // line 70 "ext/hpricot_scan/hpricot_scan.java.rl"
1557
+ // line 70 "hpricot_scan.java.rl"
1140
1558
  {te = p+1;{ ELE(emptytag); }}
1141
1559
  break;
1142
1560
  case 43:
1143
- // line 71 "ext/hpricot_scan/hpricot_scan.java.rl"
1561
+ // line 71 "hpricot_scan.java.rl"
1144
1562
  {te = p+1;{ {cs = 214; _goto_targ = 2; if (true) continue _goto;} }}
1145
1563
  break;
1146
1564
  case 44:
1147
- // line 72 "ext/hpricot_scan/hpricot_scan.java.rl"
1565
+ // line 72 "hpricot_scan.java.rl"
1148
1566
  {te = p+1;{ {cs = 216; _goto_targ = 2; if (true) continue _goto;} }}
1149
1567
  break;
1150
1568
  case 45:
1151
- // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1569
+ // line 73 "hpricot_scan.java.rl"
1152
1570
  {te = p+1;{ TEXT_PASS(); }}
1153
1571
  break;
1154
1572
  case 46:
1155
- // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1573
+ // line 66 "hpricot_scan.java.rl"
1156
1574
  {te = p;p--;{ ELE(doctype); }}
1157
1575
  break;
1158
1576
  case 47:
1159
- // line 67 "ext/hpricot_scan/hpricot_scan.java.rl"
1577
+ // line 67 "hpricot_scan.java.rl"
1160
1578
  {te = p;p--;{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
1161
1579
  break;
1162
1580
  case 48:
1163
- // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1581
+ // line 73 "hpricot_scan.java.rl"
1164
1582
  {te = p;p--;{ TEXT_PASS(); }}
1165
1583
  break;
1166
1584
  case 49:
1167
- // line 67 "ext/hpricot_scan/hpricot_scan.java.rl"
1585
+ // line 67 "hpricot_scan.java.rl"
1168
1586
  {{p = ((te))-1;}{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
1169
1587
  break;
1170
1588
  case 50:
1171
- // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1589
+ // line 73 "hpricot_scan.java.rl"
1172
1590
  {{p = ((te))-1;}{ TEXT_PASS(); }}
1173
1591
  break;
1174
1592
  case 51:
1175
- // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1593
+ // line 1 "hpricot_scan.java.rl"
1176
1594
  { switch( act ) {
1177
1595
  case 8:
1178
1596
  {{p = ((te))-1;} ELE(doctype); }
@@ -1186,10 +1604,11 @@ case 3:
1186
1604
  case 15:
1187
1605
  {{p = ((te))-1;} TEXT_PASS(); }
1188
1606
  break;
1607
+ default: break;
1189
1608
  }
1190
1609
  }
1191
1610
  break;
1192
- // line 1193 "ext/hpricot_scan/HpricotScanService.java"
1611
+ // line 1612 "HpricotScanService.java"
1193
1612
  }
1194
1613
  }
1195
1614
  }
@@ -1200,10 +1619,10 @@ case 2:
1200
1619
  while ( _nacts-- > 0 ) {
1201
1620
  switch ( _hpricot_scan_actions[_acts++] ) {
1202
1621
  case 20:
1203
- // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1622
+ // line 1 "hpricot_scan.java.rl"
1204
1623
  {ts = -1;}
1205
1624
  break;
1206
- // line 1207 "ext/hpricot_scan/HpricotScanService.java"
1625
+ // line 1626 "HpricotScanService.java"
1207
1626
  }
1208
1627
  }
1209
1628
 
@@ -1225,81 +1644,443 @@ case 5:
1225
1644
  }
1226
1645
  break; }
1227
1646
  }
1228
- // line 297 "ext/hpricot_scan/hpricot_scan.java.rl"
1229
-
1230
- if ( cs == hpricot_scan_error ) {
1231
- if(!tag[0].isNil()) {
1232
- rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1233
- } else {
1234
- rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1235
- }
1647
+ // line 714 "hpricot_scan.java.rl"
1648
+
1649
+ if(cs == hpricot_scan_error) {
1650
+ if(!tag.isNil()) {
1651
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
1652
+ } else {
1653
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
1654
+ }
1655
+ }
1656
+
1657
+ if(done && ele_open) {
1658
+ ele_open = false;
1659
+ if(ts > 0) {
1660
+ mark_tag = ts;
1661
+ ts = 0;
1662
+ text = true;
1663
+ }
1664
+ }
1665
+
1666
+ if(ts == -1) {
1667
+ have = 0;
1668
+ if(mark_tag != -1 && text) {
1669
+ if(done) {
1670
+ if(mark_tag < p - 1) {
1671
+ tag = CAT(tag, mark_tag, p-1);
1672
+ ELE(x.sym_text);
1673
+ }
1674
+ } else {
1675
+ tag = CAT(tag, mark_tag, p);
1676
+ }
1677
+ }
1678
+ if(io) {
1679
+ mark_tag = 0;
1680
+ } else {
1681
+ mark_tag = ((RubyString)port).getByteList().begin;
1682
+ }
1683
+ } else if(io) {
1684
+ have = pe - ts;
1685
+ System.arraycopy(data, ts, data, buf, have);
1686
+ mark_tag = SLIDE(mark_tag);
1687
+ mark_akey = SLIDE(mark_akey);
1688
+ mark_aval = SLIDE(mark_aval);
1689
+ te -= ts;
1690
+ ts = 0;
1691
+ }
1692
+ }
1693
+
1694
+ if(S != null) {
1695
+ return S.doc;
1696
+ }
1697
+
1698
+ return runtime.getNil();
1699
+ }
1700
+ }
1701
+
1702
+ public static class HpricotModule {
1703
+ // hpricot_scan
1704
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
1705
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
1706
+ return new Scanner(self, args, block).scan();
1707
+ }
1708
+
1709
+ // hpricot_css
1710
+ @JRubyMethod(module = true)
1711
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
1712
+ return new HpricotCss(self, mod, str, node).scan();
1713
+ }
1714
+ }
1715
+
1716
+ public static class CData {
1717
+ @JRubyMethod
1718
+ public static IRubyObject content(IRubyObject self) {
1719
+ return hpricot_ele_get_name(self);
1720
+ }
1721
+
1722
+ @JRubyMethod(name = "content=")
1723
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1724
+ return hpricot_ele_set_name(self, value);
1725
+ }
1726
+ }
1727
+
1728
+ public static class Comment {
1729
+ @JRubyMethod
1730
+ public static IRubyObject content(IRubyObject self) {
1731
+ return hpricot_ele_get_name(self);
1732
+ }
1733
+
1734
+ @JRubyMethod(name = "content=")
1735
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1736
+ return hpricot_ele_set_name(self, value);
1737
+ }
1738
+ }
1739
+
1740
+ public static class DocType {
1741
+ @JRubyMethod
1742
+ public static IRubyObject raw_string(IRubyObject self) {
1743
+ return hpricot_ele_get_name(self);
1744
+ }
1745
+
1746
+ @JRubyMethod
1747
+ public static IRubyObject clear_raw(IRubyObject self) {
1748
+ return hpricot_ele_clear_name(self);
1749
+ }
1750
+
1751
+ @JRubyMethod
1752
+ public static IRubyObject target(IRubyObject self) {
1753
+ return hpricot_ele_get_target(self);
1754
+ }
1755
+
1756
+ @JRubyMethod(name = "target=")
1757
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
1758
+ return hpricot_ele_set_target(self, value);
1759
+ }
1760
+
1761
+ @JRubyMethod
1762
+ public static IRubyObject public_id(IRubyObject self) {
1763
+ return hpricot_ele_get_public_id(self);
1764
+ }
1765
+
1766
+ @JRubyMethod(name = "public_id=")
1767
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
1768
+ return hpricot_ele_set_public_id(self, value);
1769
+ }
1770
+
1771
+ @JRubyMethod
1772
+ public static IRubyObject system_id(IRubyObject self) {
1773
+ return hpricot_ele_get_system_id(self);
1774
+ }
1775
+
1776
+ @JRubyMethod(name = "system_id=")
1777
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
1778
+ return hpricot_ele_set_system_id(self, value);
1779
+ }
1780
+ }
1781
+
1782
+ public static class Elem {
1783
+ @JRubyMethod
1784
+ public static IRubyObject clear_raw(IRubyObject self) {
1785
+ return hpricot_ele_clear_raw(self);
1786
+ }
1787
+ }
1788
+
1789
+ public static class BogusETag {
1790
+ @JRubyMethod
1791
+ public static IRubyObject raw_string(IRubyObject self) {
1792
+ return hpricot_ele_get_attr(self);
1793
+ }
1794
+
1795
+ @JRubyMethod
1796
+ public static IRubyObject clear_raw(IRubyObject self) {
1797
+ return hpricot_ele_clear_attr(self);
1798
+ }
1799
+ }
1800
+
1801
+ public static class Text {
1802
+ @JRubyMethod
1803
+ public static IRubyObject raw_string(IRubyObject self) {
1804
+ return hpricot_ele_get_name(self);
1805
+ }
1806
+
1807
+ @JRubyMethod
1808
+ public static IRubyObject clear_raw(IRubyObject self) {
1809
+ return hpricot_ele_clear_name(self);
1810
+ }
1811
+
1812
+ @JRubyMethod
1813
+ public static IRubyObject content(IRubyObject self) {
1814
+ return hpricot_ele_get_name(self);
1815
+ }
1816
+
1817
+ @JRubyMethod(name = "content=")
1818
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1819
+ return hpricot_ele_set_name(self, value);
1820
+ }
1821
+ }
1822
+
1823
+ public static class XMLDecl {
1824
+ @JRubyMethod
1825
+ public static IRubyObject raw_string(IRubyObject self) {
1826
+ return hpricot_ele_get_name(self);
1827
+ }
1828
+
1829
+ @JRubyMethod
1830
+ public static IRubyObject clear_raw(IRubyObject self) {
1831
+ return hpricot_ele_clear_name(self);
1832
+ }
1833
+
1834
+ @JRubyMethod
1835
+ public static IRubyObject encoding(IRubyObject self) {
1836
+ return hpricot_ele_get_encoding(self);
1837
+ }
1838
+
1839
+ @JRubyMethod(name = "encoding=")
1840
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
1841
+ return hpricot_ele_set_encoding(self, value);
1842
+ }
1843
+
1844
+ @JRubyMethod
1845
+ public static IRubyObject standalone(IRubyObject self) {
1846
+ return hpricot_ele_get_standalone(self);
1847
+ }
1848
+
1849
+ @JRubyMethod(name = "standalone=")
1850
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
1851
+ return hpricot_ele_set_standalone(self, value);
1852
+ }
1853
+
1854
+ @JRubyMethod
1855
+ public static IRubyObject version(IRubyObject self) {
1856
+ return hpricot_ele_get_version(self);
1857
+ }
1858
+
1859
+ @JRubyMethod(name = "version=")
1860
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
1861
+ return hpricot_ele_set_version(self, value);
1862
+ }
1863
+ }
1864
+
1865
+ public static class ProcIns {
1866
+ @JRubyMethod
1867
+ public static IRubyObject target(IRubyObject self) {
1868
+ return hpricot_ele_get_name(self);
1869
+ }
1870
+
1871
+ @JRubyMethod(name = "target=")
1872
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
1873
+ return hpricot_ele_set_name(self, value);
1874
+ }
1875
+
1876
+ @JRubyMethod
1877
+ public static IRubyObject content(IRubyObject self) {
1878
+ return hpricot_ele_get_attr(self);
1879
+ }
1880
+
1881
+ @JRubyMethod(name = "content=")
1882
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1883
+ return hpricot_ele_set_attr(self, value);
1884
+ }
1885
+ }
1886
+
1887
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
1888
+
1889
+ public final static int H_ELE_TAG = 0;
1890
+ public final static int H_ELE_PARENT = 1;
1891
+ public final static int H_ELE_ATTR = 2;
1892
+ public final static int H_ELE_ETAG = 3;
1893
+ public final static int H_ELE_RAW = 4;
1894
+ public final static int H_ELE_EC = 5;
1895
+ public final static int H_ELE_HASH = 6;
1896
+ public final static int H_ELE_CHILDREN = 7;
1897
+
1898
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
1899
+ return ((IRubyObject[])recv.dataGetStruct())[n];
1900
+ }
1901
+
1902
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
1903
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
1904
+ return value;
1905
+ }
1906
+
1907
+ private static class RefCallback implements Callback {
1908
+ private final int n;
1909
+ public RefCallback(int n) { this.n = n; }
1910
+
1911
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1912
+ return H_ELE_GET(recv, n);
1913
+ }
1914
+
1915
+ public Arity getArity() {
1916
+ return Arity.NO_ARGUMENTS;
1917
+ }
1918
+ }
1919
+
1920
+ private static class SetCallback implements Callback {
1921
+ private final int n;
1922
+ public SetCallback(int n) { this.n = n; }
1923
+
1924
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1925
+ return H_ELE_SET(recv, n, args[0]);
1926
+ }
1927
+
1928
+ public Arity getArity() {
1929
+ return Arity.ONE_ARGUMENT;
1930
+ }
1236
1931
  }
1932
+
1933
+ private final static Callback[] ref_func = new Callback[]{
1934
+ new RefCallback(0),
1935
+ new RefCallback(1),
1936
+ new RefCallback(2),
1937
+ new RefCallback(3),
1938
+ new RefCallback(4),
1939
+ new RefCallback(5),
1940
+ new RefCallback(6),
1941
+ new RefCallback(7),
1942
+ new RefCallback(8),
1943
+ new RefCallback(9)};
1944
+
1945
+ private final static Callback[] set_func = new Callback[]{
1946
+ new SetCallback(0),
1947
+ new SetCallback(1),
1948
+ new SetCallback(2),
1949
+ new SetCallback(3),
1950
+ new SetCallback(4),
1951
+ new SetCallback(5),
1952
+ new SetCallback(6),
1953
+ new SetCallback(7),
1954
+ new SetCallback(8),
1955
+ new SetCallback(9)};
1956
+
1957
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
1958
+ // alloc_hpricot_struct
1959
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
1960
+ RubyClass kurrent = klass;
1961
+ Object sz = kurrent.fastGetInternalVariable("__size__");
1962
+ while(sz == null && kurrent != null) {
1963
+ kurrent = kurrent.getSuperClass();
1964
+ sz = kurrent.fastGetInternalVariable("__size__");
1965
+ }
1966
+ int size = RubyNumeric.fix2int((RubyObject)sz);
1967
+ RubyObject obj = new RubyObject(runtime, klass);
1968
+ IRubyObject[] all = new IRubyObject[size];
1969
+ java.util.Arrays.fill(all, runtime.getNil());
1970
+ obj.dataWrapStruct(all);
1971
+ return obj;
1972
+ }
1973
+ };
1974
+
1975
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
1976
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
1977
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
1978
+ klass.setAllocator(alloc_hpricot_struct);
1979
+
1980
+ for(int i = 0; i < members.length; i++) {
1981
+ String id = members[i].toString();
1982
+ klass.defineMethod(id, ref_func[i]);
1983
+ klass.defineMethod(id + "=", set_func[i]);
1984
+ }
1237
1985
 
1238
- if ( done && ele_open ) {
1239
- ele_open = false;
1240
- if(ts > -1) {
1241
- mark_tag = ts;
1242
- ts = -1;
1243
- text = true;
1244
- }
1986
+ return klass;
1987
+ }
1988
+
1989
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1990
+ Init_hpricot_scan(runtime);
1991
+ return true;
1245
1992
  }
1246
1993
 
1247
- if(ts == -1) {
1248
- have = 0;
1249
- /* text nodes have no ts because each byte is parsed alone */
1250
- if(mark_tag != -1 && text) {
1251
- if (done) {
1252
- if(mark_tag < p-1) {
1253
- CAT(tag, p-1);
1254
- ELE(sym_text);
1255
- }
1256
- } else {
1257
- CAT(tag, p);
1994
+ public static class Extra {
1995
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
1996
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
1997
+ sym_allowed, sym_children, sym_comment,
1998
+ sym_cdata, sym_name, sym_parent,
1999
+ sym_raw_attributes, sym_raw_string, sym_tagno,
2000
+ sym_text, sym_EMPTY, sym_CDATA;
2001
+
2002
+ public RubyModule mHpricot;
2003
+ public RubyClass structElem;
2004
+ public RubyClass structAttr;
2005
+ public RubyClass structBasic;
2006
+ public RubyClass cDoc;
2007
+ public RubyClass cCData;
2008
+ public RubyClass cComment;
2009
+ public RubyClass cDocType;
2010
+ public RubyClass cElem;
2011
+ public RubyClass cBogusETag;
2012
+ public RubyClass cText;
2013
+ public RubyClass cXMLDecl;
2014
+ public RubyClass cProcIns;
2015
+ public RubyClass rb_eHpricotParseError;
2016
+ public IRubyObject reProcInsParse;
2017
+
2018
+ public Extra(Ruby runtime) {
2019
+ symAllow = runtime.newSymbol("allow");
2020
+ symDeny = runtime.newSymbol("deny");
2021
+ sym_xmldecl = runtime.newSymbol("xmldecl");
2022
+ sym_doctype = runtime.newSymbol("doctype");
2023
+ sym_procins = runtime.newSymbol("procins");
2024
+ sym_stag = runtime.newSymbol("stag");
2025
+ sym_etag = runtime.newSymbol("etag");
2026
+ sym_emptytag = runtime.newSymbol("emptytag");
2027
+ sym_allowed = runtime.newSymbol("allowed");
2028
+ sym_children = runtime.newSymbol("children");
2029
+ sym_comment = runtime.newSymbol("comment");
2030
+ sym_cdata = runtime.newSymbol("cdata");
2031
+ sym_name = runtime.newSymbol("name");
2032
+ sym_parent = runtime.newSymbol("parent");
2033
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
2034
+ sym_raw_string = runtime.newSymbol("raw_string");
2035
+ sym_tagno = runtime.newSymbol("tagno");
2036
+ sym_text = runtime.newSymbol("text");
2037
+ sym_EMPTY = runtime.newSymbol("EMPTY");
2038
+ sym_CDATA = runtime.newSymbol("CDATA");
1258
2039
  }
1259
- }
1260
- mark_tag = 0;
1261
- } else {
1262
- have = pe - ts;
1263
- System.arraycopy(buf,ts,buf,0,have);
1264
- SLIDE(tag);
1265
- SLIDE(akey);
1266
- SLIDE(aval);
1267
- te = (te - ts);
1268
- ts = 0;
1269
2040
  }
1270
- }
1271
- return runtime.getNil();
1272
- }
1273
2041
 
1274
- public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
1275
- Ruby runtime = recv.getRuntime();
1276
- HpricotScanService service = new HpricotScanService();
1277
- service.runtime = runtime;
1278
- service.xmldecl = runtime.newSymbol("xmldecl");
1279
- service.doctype = runtime.newSymbol("doctype");
1280
- service.procins = runtime.newSymbol("procins");
1281
- service.stag = runtime.newSymbol("stag");
1282
- service.etag = runtime.newSymbol("etag");
1283
- service.emptytag = runtime.newSymbol("emptytag");
1284
- service.comment = runtime.newSymbol("comment");
1285
- service.cdata = runtime.newSymbol("cdata");
1286
- service.sym_text = runtime.newSymbol("text");
1287
- service.block = block;
1288
- return service.hpricot_scan(recv, port);
1289
- }
2042
+ public static void Init_hpricot_scan(Ruby runtime) {
2043
+ Extra x = new Extra(runtime);
1290
2044
 
2045
+ x.mHpricot = runtime.defineModule("Hpricot");
2046
+ x.mHpricot.dataWrapStruct(x);
1291
2047
 
1292
- public boolean basicLoad(final Ruby runtime) throws IOException {
1293
- Init_hpricot_scan(runtime);
1294
- return true;
1295
- }
2048
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
2049
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
1296
2050
 
1297
- public static void Init_hpricot_scan(Ruby runtime) {
1298
- RubyModule mHpricot = runtime.defineModule("Hpricot");
1299
- mHpricot.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1300
- CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
1301
- mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
1302
- mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1303
- rubyApi = JavaEmbedUtils.newObjectAdapter();
1304
- }
2051
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
2052
+
2053
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
2054
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
2055
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
2056
+
2057
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
2058
+
2059
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
2060
+ x.cCData.defineAnnotatedMethods(CData.class);
2061
+
2062
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
2063
+ x.cComment.defineAnnotatedMethods(Comment.class);
2064
+
2065
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
2066
+ x.cDocType.defineAnnotatedMethods(DocType.class);
2067
+
2068
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
2069
+ x.cElem.defineAnnotatedMethods(Elem.class);
2070
+
2071
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
2072
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
2073
+
2074
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
2075
+ x.cText.defineAnnotatedMethods(Text.class);
2076
+
2077
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
2078
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
2079
+
2080
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
2081
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
2082
+
2083
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
2084
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
2085
+ }
1305
2086
  }