hpricot 0.8.1-x86-mswin32 → 0.8.2-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,372 +2,1151 @@
2
2
  import java.io.IOException;
3
3
 
4
4
  import org.jruby.Ruby;
5
+ import org.jruby.RubyArray;
5
6
  import org.jruby.RubyClass;
6
7
  import org.jruby.RubyHash;
7
8
  import org.jruby.RubyModule;
8
9
  import org.jruby.RubyNumeric;
10
+ import org.jruby.RubyObject;
9
11
  import org.jruby.RubyObjectAdapter;
12
+ import org.jruby.RubyRegexp;
10
13
  import org.jruby.RubyString;
14
+ import org.jruby.anno.JRubyMethod;
15
+ import org.jruby.exceptions.RaiseException;
11
16
  import org.jruby.javasupport.JavaEmbedUtils;
17
+ import org.jruby.runtime.Arity;
12
18
  import org.jruby.runtime.Block;
13
- import org.jruby.runtime.CallbackFactory;
19
+ import org.jruby.runtime.ObjectAllocator;
20
+ import org.jruby.runtime.ThreadContext;
14
21
  import org.jruby.runtime.builtin.IRubyObject;
22
+ import org.jruby.runtime.callback.Callback;
15
23
  import org.jruby.exceptions.RaiseException;
16
24
  import org.jruby.runtime.load.BasicLibraryService;
25
+ import org.jruby.util.ByteList;
17
26
 
18
27
  public class HpricotScanService implements BasicLibraryService {
19
- public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
20
- private static RubyObjectAdapter rubyApi;
21
-
22
- public void ELE(IRubyObject N) {
23
- if (te > ts || text) {
24
- IRubyObject raw_string = runtime.getNil();
25
- ele_open = false; text = false;
26
- if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
27
- raw_string = runtime.newString(new String(buf,ts,te-ts));
28
- }
29
- rb_yield_tokens(N, tag[0], attr, raw_string, taint);
30
- }
31
- }
32
-
33
- public void SET(IRubyObject[] N, int E) {
34
- int mark = 0;
35
- if(N == tag) {
36
- if(mark_tag == -1 || E == mark_tag) {
37
- tag[0] = runtime.newString("");
38
- } else if(E > mark_tag) {
39
- tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
40
- }
41
- } else if(N == akey) {
42
- if(mark_akey == -1 || E == mark_akey) {
43
- akey[0] = runtime.newString("");
44
- } else if(E > mark_akey) {
45
- akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
46
- }
47
- } else if(N == aval) {
48
- if(mark_aval == -1 || E == mark_aval) {
49
- aval[0] = runtime.newString("");
50
- } else if(E > mark_aval) {
51
- aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
52
- }
53
- }
54
- }
55
-
56
- public void CAT(IRubyObject[] N, int E) {
57
- if(N[0].isNil()) {
58
- SET(N,E);
59
- } else {
60
- int mark = 0;
61
- if(N == tag) {
62
- mark = mark_tag;
63
- } else if(N == akey) {
64
- mark = mark_akey;
65
- } else if(N == aval) {
66
- mark = mark_aval;
67
- }
68
- ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
69
- }
70
- }
71
-
72
- public void SLIDE(Object N) {
73
- int mark = 0;
74
- if(N == tag) {
75
- mark = mark_tag;
76
- } else if(N == akey) {
77
- mark = mark_akey;
78
- } else if(N == aval) {
79
- mark = mark_aval;
80
- }
81
- if(mark > ts) {
82
- if(N == tag) {
83
- mark_tag -= ts;
84
- } else if(N == akey) {
85
- mark_akey -= ts;
86
- } else if(N == aval) {
87
- mark_aval -= ts;
88
- }
89
- }
90
- }
91
-
92
- public void ATTR(IRubyObject K, IRubyObject V) {
93
- if(!K.isNil()) {
94
- if(attr.isNil()) {
95
- attr = RubyHash.newHash(runtime);
96
- }
97
- ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V);
98
- // ((RubyHash)attr).aset(K,V);
99
- }
100
- }
101
-
102
- public void ATTR(IRubyObject[] K, IRubyObject V) {
103
- ATTR(K[0],V);
104
- }
105
-
106
- public void ATTR(IRubyObject K, IRubyObject[] V) {
107
- ATTR(K,V[0]);
108
- }
109
-
110
- public void ATTR(IRubyObject[] K, IRubyObject[] V) {
111
- ATTR(K[0],V[0]);
112
- }
113
-
114
- public void TEXT_PASS() {
115
- if(!text) {
116
- if(ele_open) {
117
- ele_open = false;
118
- if(ts > -1) {
119
- mark_tag = ts;
120
- }
121
- } else {
122
- mark_tag = p;
123
- }
124
- attr = runtime.getNil();
125
- tag[0] = runtime.getNil();
126
- text = true;
127
- }
128
- }
129
-
130
- public void EBLK(IRubyObject N, int T) {
131
- CAT(tag, p - T + 1);
132
- ELE(N);
133
- }
134
-
135
-
136
- public void rb_raise(RubyClass error, String message) {
137
- throw new RaiseException(runtime, error, message, true);
138
- }
139
-
140
- public IRubyObject rb_str_new2(String s) {
141
- return runtime.newString(s);
142
- }
28
+ public static byte[] realloc(byte[] input, int size) {
29
+ byte[] newArray = new byte[size];
30
+ System.arraycopy(input, 0, newArray, 0, input.length);
31
+ return newArray;
32
+ }
33
+
34
+ // hpricot_state
35
+ public static class State {
36
+ public IRubyObject doc;
37
+ public IRubyObject focus;
38
+ public IRubyObject last;
39
+ public IRubyObject EC;
40
+ public boolean xml, strict, fixup;
41
+ }
42
+
43
+ static boolean OPT(IRubyObject opts, String key) {
44
+ Ruby runtime = opts.getRuntime();
45
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
46
+ }
47
+
48
+ // H_PROP(name, H_ELE_TAG)
49
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
50
+ H_ELE_SET(self, H_ELE_TAG, x);
51
+ return self;
52
+ }
53
+
54
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
55
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
56
+ return self.getRuntime().getTrue();
57
+ }
58
+
59
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
60
+ return H_ELE_GET(self, H_ELE_TAG);
61
+ }
62
+
63
+ // H_PROP(raw, H_ELE_RAW)
64
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
65
+ H_ELE_SET(self, H_ELE_RAW, x);
66
+ return self;
67
+ }
68
+
69
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
70
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
71
+ return self.getRuntime().getTrue();
72
+ }
73
+
74
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
75
+ return H_ELE_GET(self, H_ELE_RAW);
76
+ }
77
+
78
+ // H_PROP(parent, H_ELE_PARENT)
79
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
80
+ H_ELE_SET(self, H_ELE_PARENT, x);
81
+ return self;
82
+ }
83
+
84
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
85
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
86
+ return self.getRuntime().getTrue();
87
+ }
88
+
89
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
90
+ return H_ELE_GET(self, H_ELE_PARENT);
91
+ }
92
+
93
+ // H_PROP(attr, H_ELE_ATTR)
94
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
95
+ H_ELE_SET(self, H_ELE_ATTR, x);
96
+ return self;
97
+ }
98
+
99
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
100
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
101
+ return self.getRuntime().getTrue();
102
+ }
103
+
104
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
105
+ return H_ELE_GET(self, H_ELE_ATTR);
106
+ }
107
+
108
+ // H_PROP(etag, H_ELE_ETAG)
109
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
110
+ H_ELE_SET(self, H_ELE_ETAG, x);
111
+ return self;
112
+ }
113
+
114
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
115
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
116
+ return self.getRuntime().getTrue();
117
+ }
118
+
119
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
120
+ return H_ELE_GET(self, H_ELE_ETAG);
121
+ }
122
+
123
+ // H_PROP(children, H_ELE_CHILDREN)
124
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
125
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
126
+ return self;
127
+ }
128
+
129
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
130
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
131
+ return self.getRuntime().getTrue();
132
+ }
133
+
134
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
135
+ return H_ELE_GET(self, H_ELE_CHILDREN);
136
+ }
137
+
138
+ // H_ATTR(target)
139
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
140
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
141
+ return self;
142
+ }
143
+
144
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
145
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
146
+ }
147
+
148
+ // H_ATTR(encoding)
149
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
150
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
151
+ return self;
152
+ }
153
+
154
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
155
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
156
+ }
157
+
158
+ // H_ATTR(version)
159
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
160
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
161
+ return self;
162
+ }
163
+
164
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
165
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
166
+ }
167
+
168
+ // H_ATTR(standalone)
169
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
170
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
171
+ return self;
172
+ }
173
+
174
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
175
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
176
+ }
177
+
178
+ // H_ATTR(system_id)
179
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
180
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
181
+ return self;
182
+ }
183
+
184
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
185
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
186
+ }
187
+
188
+ // H_ATTR(public_id)
189
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
190
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
191
+ return self;
192
+ }
193
+
194
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
195
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
196
+ }
197
+
198
+ public static class Scanner {
199
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
200
+ if(mark == -1 || E == mark) {
201
+ return runtime.newString("");
202
+ } else if(E > mark) {
203
+ return RubyString.newString(runtime, data, mark, E-mark);
204
+ } else {
205
+ return org;
206
+ }
207
+ }
208
+
209
+ public int SLIDE(int N) {
210
+ if(N > ts) {
211
+ return N - ts;
212
+ } else {
213
+ return N;
214
+ }
215
+ }
216
+
217
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
218
+ if(N.isNil()) {
219
+ return SET(mark, E, N);
220
+ } else {
221
+ ((RubyString)N).cat(data, mark, E-mark);
222
+ return N;
223
+ }
224
+ }
225
+
226
+ public void ATTR(IRubyObject K, IRubyObject V) {
227
+ if(!K.isNil()) {
228
+ if(attr.isNil()) {
229
+ attr = RubyHash.newHash(runtime);
230
+ }
231
+ ((RubyHash)attr).fastASet(K, V);
232
+ }
233
+ }
234
+
235
+ public void TEXT_PASS() {
236
+ if(!text) {
237
+ if(ele_open) {
238
+ ele_open = false;
239
+ if(ts != -1) {
240
+ mark_tag = ts;
241
+ }
242
+ } else {
243
+ mark_tag = p;
244
+ }
245
+ attr = runtime.getNil();
246
+ tag = runtime.getNil();
247
+ text = true;
248
+ }
249
+ }
250
+
251
+ public void ELE(IRubyObject N) {
252
+ if(te > ts || text) {
253
+ int raw = -1;
254
+ int rawlen = 0;
255
+ ele_open = false;
256
+ text = false;
257
+
258
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
259
+ raw = ts;
260
+ rawlen = te - ts;
261
+ }
262
+
263
+ if(block.isGiven()) {
264
+ IRubyObject raw_string = runtime.getNil();
265
+ if(raw != -1) {
266
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
267
+ }
268
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
269
+ } else {
270
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
271
+ }
272
+ }
273
+ }
274
+
275
+
276
+ public void EBLK(IRubyObject N, int T) {
277
+ tag = CAT(tag, mark_tag, p - T + 1);
278
+ ELE(N);
279
+ }
280
+
281
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
282
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
283
+ if(children.isNil()) {
284
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
285
+ }
286
+ ((RubyArray)children).append(ele);
287
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
288
+ }
289
+
290
+ private static class TokenInfo {
291
+ public IRubyObject sym;
292
+ public IRubyObject tag;
293
+ public IRubyObject attr;
294
+ public int raw;
295
+ public int rawlen;
296
+ public IRubyObject ec;
297
+ public IRubyObject ele;
298
+ public Extra x;
299
+ public Ruby runtime;
300
+ public Scanner scanner;
301
+ public State S;
302
+
303
+ public void H_ELE(RubyClass klass) {
304
+ ele = klass.allocate();
305
+ if(klass == x.cElem) {
306
+ H_ELE_SET(ele, H_ELE_TAG, tag);
307
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
308
+ H_ELE_SET(ele, H_ELE_EC, ec);
309
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
310
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
311
+ }
312
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
313
+ if(klass == x.cBogusETag) {
314
+ H_ELE_SET(ele, H_ELE_TAG, tag);
315
+ if(raw != -1) {
316
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
317
+ }
318
+ } else {
319
+ if(klass == x.cDocType) {
320
+ scanner.ATTR(runtime.newSymbol("target"), tag);
321
+ }
322
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
323
+ if(klass != x.cProcIns) {
324
+ tag = runtime.getNil();
325
+ if(raw != -1) {
326
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
327
+ }
328
+ }
329
+ H_ELE_SET(ele, H_ELE_TAG, tag);
330
+ }
331
+ } else {
332
+ H_ELE_SET(ele, H_ELE_TAG, tag);
333
+ }
334
+ S.last = ele;
335
+ }
336
+
337
+ public void hpricotToken(boolean taint) {
338
+ //
339
+ // in html mode, fix up start tags incorrectly formed as empty tags
340
+ //
341
+ if(!S.xml) {
342
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
343
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
344
+ if(ec.isNil()) {
345
+ tag = tag.callMethod(scanner.ctx, "downcase");
346
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
347
+ }
348
+ }
349
+
350
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
351
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
352
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
353
+ sym = x.sym_text;
354
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
355
+ }
356
+
357
+ if(!ec.isNil()) {
358
+ if(sym == x.sym_emptytag) {
359
+ if(ec != x.sym_EMPTY) {
360
+ sym = x.sym_stag;
361
+ }
362
+ } else if(sym == x.sym_stag) {
363
+ if(ec == x.sym_EMPTY) {
364
+ sym = x.sym_emptytag;
365
+ }
366
+ }
367
+ }
368
+ }
369
+
370
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
371
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
372
+ H_ELE(x.cElem);
373
+ H_ELE_SET(ele, H_ELE_HASH, name);
374
+
375
+ if(!S.xml) {
376
+ IRubyObject match = runtime.getNil(), e = S.focus;
377
+ while(e != S.doc) {
378
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
379
+ if(hEC instanceof RubyHash) {
380
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
381
+ if(!has.isNil()) {
382
+ if(has == runtime.getTrue()) {
383
+ if(match.isNil()) {
384
+ match = e;
385
+ }
386
+ } else if(has == x.symAllow) {
387
+ match = S.focus;
388
+ } else if(has == x.symDeny) {
389
+ match = runtime.getNil();
390
+ }
391
+ }
392
+ }
393
+ e = H_ELE_GET(e, H_ELE_PARENT);
394
+ }
395
+
396
+ if(match.isNil()) {
397
+ match = S.focus;
398
+ }
399
+ S.focus = match;
400
+ }
401
+
402
+ scanner.hpricotAdd(S.focus, ele);
403
+
404
+ //
405
+ // in the case of a start tag that should be empty, just
406
+ // skip the step that focuses the element. focusing moves
407
+ // us deeper into the document.
408
+ //
409
+ if(sym == x.sym_stag) {
410
+ if(S.xml || ec != x.sym_EMPTY) {
411
+ S.focus = ele;
412
+ S.last = runtime.getNil();
413
+ }
414
+ }
415
+ } else if(sym == x.sym_etag) {
416
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
417
+ if(S.strict) {
418
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
419
+ tag = runtime.newString("div");
420
+ }
421
+ }
422
+
423
+ name = runtime.newFixnum(tag.hashCode());
424
+ while(e != S.doc) {
425
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
426
+ match = e;
427
+ break;
428
+ }
429
+ e = H_ELE_GET(e, H_ELE_PARENT);
430
+
431
+ }
432
+ if(match.isNil()) {
433
+ H_ELE(x.cBogusETag);
434
+ scanner.hpricotAdd(S.focus, ele);
435
+ } else {
436
+ ele = runtime.getNil();
437
+ if(raw != -1) {
438
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
439
+ }
440
+ H_ELE_SET(match, H_ELE_ETAG, ele);
441
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
442
+ S.last = runtime.getNil();
443
+
444
+ }
445
+ } else if(sym == x.sym_cdata) {
446
+ H_ELE(x.cCData);
447
+ scanner.hpricotAdd(S.focus, ele);
448
+ } else if(sym == x.sym_comment) {
449
+ H_ELE(x.cComment);
450
+ scanner.hpricotAdd(S.focus, ele);
451
+ } else if(sym == x.sym_doctype) {
452
+ H_ELE(x.cDocType);
453
+ if(S.strict) {
454
+ RubyHash h = (RubyHash)attr;
455
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
456
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
457
+ }
458
+ scanner.hpricotAdd(S.focus, ele);
459
+ } else if(sym == x.sym_procins) {
460
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
461
+ tag = RubyRegexp.nth_match(1, match);
462
+ attr = RubyRegexp.nth_match(2, match);
463
+ H_ELE(x.cProcIns);
464
+ scanner.hpricotAdd(S.focus, ele);
465
+ } else if(sym == x.sym_text) {
466
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
467
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
468
+ } else {
469
+ H_ELE(x.cText);
470
+ scanner.hpricotAdd(S.focus, ele);
471
+ }
472
+ } else if(sym == x.sym_xmldecl) {
473
+ H_ELE(x.cXMLDecl);
474
+ scanner.hpricotAdd(S.focus, ele);
475
+ }
476
+ }
477
+ }
478
+
479
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
480
+ TokenInfo t = new TokenInfo();
481
+ t.sym = _sym;
482
+ t.tag = _tag;
483
+ t.attr = _attr;
484
+ t.raw = _raw;
485
+ t.rawlen = _rawlen;
486
+ t.ec = runtime.getNil();
487
+ t.ele = runtime.getNil();
488
+ t.x = x;
489
+ t.runtime = runtime;
490
+ t.scanner = this;
491
+ t.S = S;
492
+
493
+ t.hpricotToken(taint);
494
+ }
495
+
496
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
497
+ if(sym == x.sym_text) {
498
+ raw = tag;
499
+ }
500
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
501
+ if(taint) {
502
+ ary.setTaint(true);
503
+ tag.setTaint(true);
504
+ attr.setTaint(true);
505
+ raw.setTaint(true);
506
+ }
507
+
508
+ block.yield(ctx, ary);
509
+ }
143
510
 
144
511
  %%{
145
512
  machine hpricot_scan;
146
513
 
147
514
  action newEle {
148
- if (text) {
149
- CAT(tag, p);
150
- ELE(sym_text);
151
- text = false;
515
+ if(text) {
516
+ tag = CAT(tag, mark_tag, p);
517
+ ELE(x.sym_text);
518
+ text = false;
152
519
  }
153
520
  attr = runtime.getNil();
154
- tag[0] = runtime.getNil();
521
+ tag = runtime.getNil();
155
522
  mark_tag = -1;
156
523
  ele_open = true;
157
524
  }
158
525
 
159
- action _tag { mark_tag = p; }
526
+ action _tag { mark_tag = p; }
160
527
  action _aval { mark_aval = p; }
161
528
  action _akey { mark_akey = p; }
162
- action tag { SET(tag, p); }
163
- action tagc { SET(tag, p-1); }
164
- action aval { SET(aval, p); }
165
- action aunq {
166
- if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
167
- else { SET(aval, p); }
529
+ action tag { tag = SET(mark_tag, p, tag); }
530
+ action tagc { tag = SET(mark_tag, p-1, tag); }
531
+ action aval { aval = SET(mark_aval, p, aval); }
532
+ action aunq {
533
+ if(data[p-1] == '"' || data[p-1] == '\'') {
534
+ aval = SET(mark_aval, p-1, aval);
535
+ } else {
536
+ aval = SET(mark_aval, p, aval);
537
+ }
168
538
  }
169
- action akey { SET(akey, p); }
170
- action xmlver { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
171
- action xmlenc { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
172
- action xmlsd { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
173
- action pubid { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
174
- action sysid { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
175
-
176
- action new_attr {
177
- akey[0] = runtime.getNil();
178
- aval[0] = runtime.getNil();
179
- mark_akey = -1;
180
- mark_aval = -1;
539
+ action akey { akey = SET(mark_akey, p, akey); }
540
+ action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
541
+ action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
542
+ action xmlsd { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
543
+ action pubid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
544
+ action sysid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
545
+
546
+ action new_attr {
547
+ akey = runtime.getNil();
548
+ aval = runtime.getNil();
549
+ mark_akey = -1;
550
+ mark_aval = -1;
181
551
  }
182
552
 
183
- action save_attr {
184
- ATTR(akey, aval);
553
+ action save_attr {
554
+ if(!S.xml) {
555
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
556
+ }
557
+ ATTR(akey, aval);
185
558
  }
186
559
 
187
560
  include hpricot_common "hpricot_common.rl";
188
-
189
561
  }%%
190
562
 
191
563
  %% write data nofinal;
192
564
 
193
- public final static int BUFSIZE=16384;
565
+ public final static int BUFSIZE = 16384;
194
566
 
195
- private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
196
- IRubyObject ary;
197
- if (sym == runtime.newSymbol("text")) {
198
- raw = tag;
199
- }
200
- ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
201
- if (taint) {
202
- ary.setTaint(true);
203
- tag.setTaint(true);
204
- attr.setTaint(true);
205
- raw.setTaint(true);
206
- }
207
- block.yield(runtime.getCurrentContext(), ary, null, null, false);
208
- }
209
567
 
568
+ private int cs, act, have = 0, nread = 0, curline = 1;
569
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
570
+ private byte[] data;
571
+ private State S = null;
572
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
573
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
574
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
575
+ private int buffer_size = 0;
210
576
 
211
- int cs, act, have = 0, nread = 0, curline = 1, p=-1;
212
- boolean text = false;
213
- int ts=-1, te;
214
- int eof=-1;
215
- char[] buf;
216
- Ruby runtime;
217
- IRubyObject attr, bufsize;
218
- IRubyObject[] tag, akey, aval;
219
- int mark_tag, mark_akey, mark_aval;
220
- boolean done = false, ele_open = false;
221
- int buffer_size = 0;
222
- boolean taint = false;
223
- Block block = null;
577
+ private Extra x;
578
+
579
+ private IRubyObject self;
580
+ private Ruby runtime;
581
+ private ThreadContext ctx;
582
+ private Block block;
583
+
584
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
585
+
586
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
587
+ return new RaiseException(runtime, exceptionClass, message, true);
588
+ }
224
589
 
590
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
591
+ this.self = self;
592
+ this.runtime = self.getRuntime();
593
+ this.ctx = runtime.getCurrentContext();
594
+ this.block = block;
595
+ attr = runtime.getNil();
596
+ tag = runtime.getNil();
597
+ akey = runtime.getNil();
598
+ aval = runtime.getNil();
599
+ bufsize = runtime.getNil();
225
600
 
226
- IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
227
- cdata, sym_text;
601
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
228
602
 
229
- IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
230
- attr = bufsize = runtime.getNil();
231
- tag = new IRubyObject[]{runtime.getNil()};
232
- akey = new IRubyObject[]{runtime.getNil()};
233
- aval = new IRubyObject[]{runtime.getNil()};
603
+ this.xmldecl = x.sym_xmldecl;
604
+ this.doctype = x.sym_doctype;
605
+ this.stag = x.sym_stag;
606
+ this.etag = x.sym_etag;
607
+ this.emptytag = x.sym_emptytag;
608
+ this.comment = x.sym_comment;
609
+ this.cdata = x.sym_cdata;
610
+ this.procins = x.sym_procins;
611
+
612
+ port = args[0];
613
+ if(args.length == 2) {
614
+ opts = args[1];
615
+ } else {
616
+ opts = runtime.getNil();
617
+ }
618
+
619
+ taint = port.isTaint();
620
+ io = port.respondsTo("read");
621
+ if(!io) {
622
+ if(port.respondsTo("to_str")) {
623
+ port = port.callMethod(ctx, "to_str");
624
+ port = port.convertToString();
625
+ } else {
626
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
627
+ }
628
+ }
629
+
630
+ if(!(opts instanceof RubyHash)) {
631
+ opts = runtime.getNil();
632
+ }
633
+
634
+ if(!block.isGiven()) {
635
+ S = new State();
636
+ S.doc = x.cDoc.allocate();
637
+ S.focus = S.doc;
638
+ S.last = runtime.getNil();
639
+ S.xml = OPT(opts, "xml");
640
+ S.strict = OPT(opts, "xhtml_strict");
641
+ S.fixup = OPT(opts, "fixup_tags");
642
+ if(S.strict) {
643
+ S.fixup = true;
644
+ }
645
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
646
+ S.EC = x.mHpricot.getConstant("ElementContent");
647
+ }
648
+
649
+ buffer_size = BUFSIZE;
650
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
651
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
652
+ if(!bufsize.isNil()) {
653
+ buffer_size = RubyNumeric.fix2int(bufsize);
654
+ }
655
+ }
656
+
657
+ if(io) {
658
+ buf = 0;
659
+ data = new byte[buffer_size];
660
+ }
661
+ }
662
+
663
+ private int len, space;
664
+ // hpricot_scan
665
+ public IRubyObject scan() {
666
+ %% write init;
667
+ while(!done) {
668
+ p = pe = len = buf;
669
+ space = buffer_size - have;
670
+
671
+ if(io) {
672
+ if(space == 0) {
673
+ /* We've used up the entire buffer storing an already-parsed token
674
+ * prefix that must be preserved. Likely caused by super-long attributes.
675
+ * Increase buffer size and continue */
676
+ buffer_size += BUFSIZE;
677
+ data = realloc(data, buffer_size);
678
+ space = buffer_size - have;
679
+ }
234
680
 
235
- RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
681
+ p = have;
682
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
683
+ ByteList bl = str.convertToString().getByteList();
684
+ len = bl.realSize;
685
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
686
+ } else {
687
+ ByteList bl = port.convertToString().getByteList();
688
+ data = bl.bytes;
689
+ buf = bl.begin;
690
+ p = bl.begin;
691
+ len = bl.realSize + 1;
692
+ if(p + len >= data.length) {
693
+ data = new byte[len];
694
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
695
+ p = 0;
696
+ buf = 0;
697
+ }
698
+ done = true;
699
+ eof = p + len;
700
+ }
236
701
 
237
- taint = port.isTaint();
238
- if ( !port.respondsTo("read")) {
239
- if ( port.respondsTo("to_str")) {
240
- port = port.callMethod(runtime.getCurrentContext(),"to_str");
241
- } else {
242
- throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
702
+ nread += len;
703
+
704
+ /* If this is the last buffer, tack on an EOF. */
705
+ if(io && len < space) {
706
+ data[p + len++] = 0;
707
+ eof = p + len;
708
+ done = true;
709
+ }
710
+
711
+ pe = p + len;
712
+
713
+ %% write exec;
714
+
715
+ if(cs == hpricot_scan_error) {
716
+ if(!tag.isNil()) {
717
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
718
+ } else {
719
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
720
+ }
721
+ }
722
+
723
+ if(done && ele_open) {
724
+ ele_open = false;
725
+ if(ts > 0) {
726
+ mark_tag = ts;
727
+ ts = 0;
728
+ text = true;
729
+ }
730
+ }
731
+
732
+ if(ts == -1) {
733
+ have = 0;
734
+ if(mark_tag != -1 && text) {
735
+ if(done) {
736
+ if(mark_tag < p - 1) {
737
+ tag = CAT(tag, mark_tag, p-1);
738
+ ELE(x.sym_text);
739
+ }
740
+ } else {
741
+ tag = CAT(tag, mark_tag, p);
742
+ }
743
+ }
744
+ if(io) {
745
+ mark_tag = 0;
746
+ } else {
747
+ mark_tag = ((RubyString)port).getByteList().begin;
748
+ }
749
+ } else if(io) {
750
+ have = pe - ts;
751
+ System.arraycopy(data, ts, data, buf, have);
752
+ mark_tag = SLIDE(mark_tag);
753
+ mark_akey = SLIDE(mark_akey);
754
+ mark_aval = SLIDE(mark_aval);
755
+ te -= ts;
756
+ ts = 0;
757
+ }
758
+ }
759
+
760
+ if(S != null) {
761
+ return S.doc;
762
+ }
763
+
764
+ return runtime.getNil();
765
+ }
243
766
  }
244
- }
245
767
 
246
- buffer_size = BUFSIZE;
247
- if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
248
- bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
249
- if (!bufsize.isNil()) {
250
- buffer_size = RubyNumeric.fix2int(bufsize);
768
+ public static class HpricotModule {
769
+ // hpricot_scan
770
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
771
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
772
+ return new Scanner(self, args, block).scan();
773
+ }
774
+
775
+ // hpricot_css
776
+ @JRubyMethod(module = true)
777
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
778
+ return new HpricotCss(self, mod, str, node).scan();
779
+ }
251
780
  }
252
- }
253
- buf = new char[buffer_size];
254
781
 
255
- %% write init;
782
+ public static class CData {
783
+ @JRubyMethod
784
+ public static IRubyObject content(IRubyObject self) {
785
+ return hpricot_ele_get_name(self);
786
+ }
256
787
 
257
- while( !done ) {
258
- IRubyObject str;
259
- p = have;
260
- int pe;
261
- int len, space = buffer_size - have;
788
+ @JRubyMethod(name = "content=")
789
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
790
+ return hpricot_ele_set_name(self, value);
791
+ }
792
+ }
793
+
794
+ public static class Comment {
795
+ @JRubyMethod
796
+ public static IRubyObject content(IRubyObject self) {
797
+ return hpricot_ele_get_name(self);
798
+ }
262
799
 
263
- if ( space == 0 ) {
264
- /* We've used up the entire buffer storing an already-parsed token
265
- * prefix that must be preserved. Likely caused by super-long attributes.
266
- * See ticket #13. */
267
- buffer_size += BUFSIZE;
268
- char[] new_buf = new char[buffer_size];
269
- System.arraycopy(buf, 0, new_buf, 0, buf.length);
270
- buf = new_buf;
271
- space = buffer_size - have;
800
+ @JRubyMethod(name = "content=")
801
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
802
+ return hpricot_ele_set_name(self, value);
803
+ }
272
804
  }
273
805
 
274
- if (port.respondsTo("read")) {
275
- str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
276
- } else {
277
- str = ((RubyString)port).substr(nread,space);
806
+ public static class DocType {
807
+ @JRubyMethod
808
+ public static IRubyObject raw_string(IRubyObject self) {
809
+ return hpricot_ele_get_name(self);
810
+ }
811
+
812
+ @JRubyMethod
813
+ public static IRubyObject clear_raw(IRubyObject self) {
814
+ return hpricot_ele_clear_name(self);
815
+ }
816
+
817
+ @JRubyMethod
818
+ public static IRubyObject target(IRubyObject self) {
819
+ return hpricot_ele_get_target(self);
820
+ }
821
+
822
+ @JRubyMethod(name = "target=")
823
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
824
+ return hpricot_ele_set_target(self, value);
825
+ }
826
+
827
+ @JRubyMethod
828
+ public static IRubyObject public_id(IRubyObject self) {
829
+ return hpricot_ele_get_public_id(self);
830
+ }
831
+
832
+ @JRubyMethod(name = "public_id=")
833
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
834
+ return hpricot_ele_set_public_id(self, value);
835
+ }
836
+
837
+ @JRubyMethod
838
+ public static IRubyObject system_id(IRubyObject self) {
839
+ return hpricot_ele_get_system_id(self);
840
+ }
841
+
842
+ @JRubyMethod(name = "system_id=")
843
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
844
+ return hpricot_ele_set_system_id(self, value);
845
+ }
278
846
  }
279
847
 
280
- str = str.convertToString();
281
- String sss = str.toString();
282
- char[] chars = sss.toCharArray();
283
- System.arraycopy(chars,0,buf,p,chars.length);
848
+ public static class Elem {
849
+ @JRubyMethod
850
+ public static IRubyObject clear_raw(IRubyObject self) {
851
+ return hpricot_ele_clear_raw(self);
852
+ }
853
+ }
284
854
 
285
- len = sss.length();
286
- nread += len;
855
+ public static class BogusETag {
856
+ @JRubyMethod
857
+ public static IRubyObject raw_string(IRubyObject self) {
858
+ return hpricot_ele_get_attr(self);
859
+ }
287
860
 
288
- if ( len < space ) {
289
- len++;
290
- done = true;
861
+ @JRubyMethod
862
+ public static IRubyObject clear_raw(IRubyObject self) {
863
+ return hpricot_ele_clear_attr(self);
864
+ }
291
865
  }
292
866
 
293
- pe = p + len;
294
- char[] data = buf;
867
+ public static class Text {
868
+ @JRubyMethod
869
+ public static IRubyObject raw_string(IRubyObject self) {
870
+ return hpricot_ele_get_name(self);
871
+ }
295
872
 
296
- %% write exec;
297
-
298
- if ( cs == hpricot_scan_error ) {
299
- if(!tag[0].isNil()) {
300
- rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
301
- } else {
302
- rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
303
- }
873
+ @JRubyMethod
874
+ public static IRubyObject clear_raw(IRubyObject self) {
875
+ return hpricot_ele_clear_name(self);
876
+ }
877
+
878
+ @JRubyMethod
879
+ public static IRubyObject content(IRubyObject self) {
880
+ return hpricot_ele_get_name(self);
881
+ }
882
+
883
+ @JRubyMethod(name = "content=")
884
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
885
+ return hpricot_ele_set_name(self, value);
886
+ }
304
887
  }
888
+
889
+ public static class XMLDecl {
890
+ @JRubyMethod
891
+ public static IRubyObject raw_string(IRubyObject self) {
892
+ return hpricot_ele_get_name(self);
893
+ }
894
+
895
+ @JRubyMethod
896
+ public static IRubyObject clear_raw(IRubyObject self) {
897
+ return hpricot_ele_clear_name(self);
898
+ }
899
+
900
+ @JRubyMethod
901
+ public static IRubyObject encoding(IRubyObject self) {
902
+ return hpricot_ele_get_encoding(self);
903
+ }
904
+
905
+ @JRubyMethod(name = "encoding=")
906
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
907
+ return hpricot_ele_set_encoding(self, value);
908
+ }
909
+
910
+ @JRubyMethod
911
+ public static IRubyObject standalone(IRubyObject self) {
912
+ return hpricot_ele_get_standalone(self);
913
+ }
914
+
915
+ @JRubyMethod(name = "standalone=")
916
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
917
+ return hpricot_ele_set_standalone(self, value);
918
+ }
919
+
920
+ @JRubyMethod
921
+ public static IRubyObject version(IRubyObject self) {
922
+ return hpricot_ele_get_version(self);
923
+ }
924
+
925
+ @JRubyMethod(name = "version=")
926
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
927
+ return hpricot_ele_set_version(self, value);
928
+ }
929
+ }
930
+
931
+ public static class ProcIns {
932
+ @JRubyMethod
933
+ public static IRubyObject target(IRubyObject self) {
934
+ return hpricot_ele_get_name(self);
935
+ }
936
+
937
+ @JRubyMethod(name = "target=")
938
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
939
+ return hpricot_ele_set_name(self, value);
940
+ }
941
+
942
+ @JRubyMethod
943
+ public static IRubyObject content(IRubyObject self) {
944
+ return hpricot_ele_get_attr(self);
945
+ }
946
+
947
+ @JRubyMethod(name = "content=")
948
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
949
+ return hpricot_ele_set_attr(self, value);
950
+ }
951
+ }
952
+
953
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
954
+
955
+ public final static int H_ELE_TAG = 0;
956
+ public final static int H_ELE_PARENT = 1;
957
+ public final static int H_ELE_ATTR = 2;
958
+ public final static int H_ELE_ETAG = 3;
959
+ public final static int H_ELE_RAW = 4;
960
+ public final static int H_ELE_EC = 5;
961
+ public final static int H_ELE_HASH = 6;
962
+ public final static int H_ELE_CHILDREN = 7;
963
+
964
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
965
+ return ((IRubyObject[])recv.dataGetStruct())[n];
966
+ }
967
+
968
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
969
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
970
+ return value;
971
+ }
972
+
973
+ private static class RefCallback implements Callback {
974
+ private final int n;
975
+ public RefCallback(int n) { this.n = n; }
976
+
977
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
978
+ return H_ELE_GET(recv, n);
979
+ }
980
+
981
+ public Arity getArity() {
982
+ return Arity.NO_ARGUMENTS;
983
+ }
984
+ }
985
+
986
+ private static class SetCallback implements Callback {
987
+ private final int n;
988
+ public SetCallback(int n) { this.n = n; }
989
+
990
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
991
+ return H_ELE_SET(recv, n, args[0]);
992
+ }
993
+
994
+ public Arity getArity() {
995
+ return Arity.ONE_ARGUMENT;
996
+ }
997
+ }
998
+
999
+ private final static Callback[] ref_func = new Callback[]{
1000
+ new RefCallback(0),
1001
+ new RefCallback(1),
1002
+ new RefCallback(2),
1003
+ new RefCallback(3),
1004
+ new RefCallback(4),
1005
+ new RefCallback(5),
1006
+ new RefCallback(6),
1007
+ new RefCallback(7),
1008
+ new RefCallback(8),
1009
+ new RefCallback(9)};
1010
+
1011
+ private final static Callback[] set_func = new Callback[]{
1012
+ new SetCallback(0),
1013
+ new SetCallback(1),
1014
+ new SetCallback(2),
1015
+ new SetCallback(3),
1016
+ new SetCallback(4),
1017
+ new SetCallback(5),
1018
+ new SetCallback(6),
1019
+ new SetCallback(7),
1020
+ new SetCallback(8),
1021
+ new SetCallback(9)};
1022
+
1023
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
1024
+ // alloc_hpricot_struct
1025
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
1026
+ RubyClass kurrent = klass;
1027
+ Object sz = kurrent.fastGetInternalVariable("__size__");
1028
+ while(sz == null && kurrent != null) {
1029
+ kurrent = kurrent.getSuperClass();
1030
+ sz = kurrent.fastGetInternalVariable("__size__");
1031
+ }
1032
+ int size = RubyNumeric.fix2int((RubyObject)sz);
1033
+ RubyObject obj = new RubyObject(runtime, klass);
1034
+ IRubyObject[] all = new IRubyObject[size];
1035
+ java.util.Arrays.fill(all, runtime.getNil());
1036
+ obj.dataWrapStruct(all);
1037
+ return obj;
1038
+ }
1039
+ };
1040
+
1041
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
1042
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
1043
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
1044
+ klass.setAllocator(alloc_hpricot_struct);
1045
+
1046
+ for(int i = 0; i < members.length; i++) {
1047
+ String id = members[i].toString();
1048
+ klass.defineMethod(id, ref_func[i]);
1049
+ klass.defineMethod(id + "=", set_func[i]);
1050
+ }
305
1051
 
306
- if ( done && ele_open ) {
307
- ele_open = false;
308
- if(ts > -1) {
309
- mark_tag = ts;
310
- ts = -1;
311
- text = true;
312
- }
1052
+ return klass;
313
1053
  }
314
1054
 
315
- if(ts == -1) {
316
- have = 0;
317
- /* text nodes have no ts because each byte is parsed alone */
318
- if(mark_tag != -1 && text) {
319
- if (done) {
320
- if(mark_tag < p-1) {
321
- CAT(tag, p-1);
322
- ELE(sym_text);
323
- }
324
- } else {
325
- CAT(tag, p);
1055
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1056
+ Init_hpricot_scan(runtime);
1057
+ return true;
1058
+ }
1059
+
1060
+ public static class Extra {
1061
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
1062
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
1063
+ sym_allowed, sym_children, sym_comment,
1064
+ sym_cdata, sym_name, sym_parent,
1065
+ sym_raw_attributes, sym_raw_string, sym_tagno,
1066
+ sym_text, sym_EMPTY, sym_CDATA;
1067
+
1068
+ public RubyModule mHpricot;
1069
+ public RubyClass structElem;
1070
+ public RubyClass structAttr;
1071
+ public RubyClass structBasic;
1072
+ public RubyClass cDoc;
1073
+ public RubyClass cCData;
1074
+ public RubyClass cComment;
1075
+ public RubyClass cDocType;
1076
+ public RubyClass cElem;
1077
+ public RubyClass cBogusETag;
1078
+ public RubyClass cText;
1079
+ public RubyClass cXMLDecl;
1080
+ public RubyClass cProcIns;
1081
+ public RubyClass rb_eHpricotParseError;
1082
+ public IRubyObject reProcInsParse;
1083
+
1084
+ public Extra(Ruby runtime) {
1085
+ symAllow = runtime.newSymbol("allow");
1086
+ symDeny = runtime.newSymbol("deny");
1087
+ sym_xmldecl = runtime.newSymbol("xmldecl");
1088
+ sym_doctype = runtime.newSymbol("doctype");
1089
+ sym_procins = runtime.newSymbol("procins");
1090
+ sym_stag = runtime.newSymbol("stag");
1091
+ sym_etag = runtime.newSymbol("etag");
1092
+ sym_emptytag = runtime.newSymbol("emptytag");
1093
+ sym_allowed = runtime.newSymbol("allowed");
1094
+ sym_children = runtime.newSymbol("children");
1095
+ sym_comment = runtime.newSymbol("comment");
1096
+ sym_cdata = runtime.newSymbol("cdata");
1097
+ sym_name = runtime.newSymbol("name");
1098
+ sym_parent = runtime.newSymbol("parent");
1099
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
1100
+ sym_raw_string = runtime.newSymbol("raw_string");
1101
+ sym_tagno = runtime.newSymbol("tagno");
1102
+ sym_text = runtime.newSymbol("text");
1103
+ sym_EMPTY = runtime.newSymbol("EMPTY");
1104
+ sym_CDATA = runtime.newSymbol("CDATA");
326
1105
  }
327
- }
328
- mark_tag = 0;
329
- } else {
330
- have = pe - ts;
331
- System.arraycopy(buf,ts,buf,0,have);
332
- SLIDE(tag);
333
- SLIDE(akey);
334
- SLIDE(aval);
335
- te = (te - ts);
336
- ts = 0;
337
1106
  }
338
- }
339
- return runtime.getNil();
340
- }
341
1107
 
342
- public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
343
- Ruby runtime = recv.getRuntime();
344
- HpricotScanService service = new HpricotScanService();
345
- service.runtime = runtime;
346
- service.xmldecl = runtime.newSymbol("xmldecl");
347
- service.doctype = runtime.newSymbol("doctype");
348
- service.procins = runtime.newSymbol("procins");
349
- service.stag = runtime.newSymbol("stag");
350
- service.etag = runtime.newSymbol("etag");
351
- service.emptytag = runtime.newSymbol("emptytag");
352
- service.comment = runtime.newSymbol("comment");
353
- service.cdata = runtime.newSymbol("cdata");
354
- service.sym_text = runtime.newSymbol("text");
355
- service.block = block;
356
- return service.hpricot_scan(recv, port);
357
- }
1108
+ public static void Init_hpricot_scan(Ruby runtime) {
1109
+ Extra x = new Extra(runtime);
358
1110
 
1111
+ x.mHpricot = runtime.defineModule("Hpricot");
1112
+ x.mHpricot.dataWrapStruct(x);
359
1113
 
360
- public boolean basicLoad(final Ruby runtime) throws IOException {
361
- Init_hpricot_scan(runtime);
362
- return true;
363
- }
1114
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1115
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
364
1116
 
365
- public static void Init_hpricot_scan(Ruby runtime) {
366
- RubyModule mHpricot = runtime.defineModule("Hpricot");
367
- mHpricot.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
368
- CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
369
- mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
370
- mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
371
- rubyApi = JavaEmbedUtils.newObjectAdapter();
372
- }
1117
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1118
+
1119
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
1120
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
1121
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
1122
+
1123
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
1124
+
1125
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
1126
+ x.cCData.defineAnnotatedMethods(CData.class);
1127
+
1128
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
1129
+ x.cComment.defineAnnotatedMethods(Comment.class);
1130
+
1131
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
1132
+ x.cDocType.defineAnnotatedMethods(DocType.class);
1133
+
1134
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
1135
+ x.cElem.defineAnnotatedMethods(Elem.class);
1136
+
1137
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
1138
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
1139
+
1140
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
1141
+ x.cText.defineAnnotatedMethods(Text.class);
1142
+
1143
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
1144
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
1145
+
1146
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
1147
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
1148
+
1149
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
1150
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
1151
+ }
373
1152
  }