hpricot 0.6-jruby
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +62 -0
- data/COPYING +18 -0
- data/README +284 -0
- data/Rakefile +211 -0
- data/ext/hpricot_scan/HpricotScanService.java +1340 -0
- data/ext/hpricot_scan/extconf.rb +6 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_scan.c +5976 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
- data/ext/hpricot_scan/hpricot_scan.rl +273 -0
- data/extras/mingw-rbconfig.rb +176 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +200 -0
- data/lib/hpricot/elements.rb +510 -0
- data/lib/hpricot/htmlinfo.rb +672 -0
- data/lib/hpricot/inspect.rb +107 -0
- data/lib/hpricot/modules.rb +37 -0
- data/lib/hpricot/parse.rb +297 -0
- data/lib/hpricot/tag.rb +228 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +821 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/i686-linux/hpricot_scan.jar +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/test_alter.rb +65 -0
- data/test/test_builder.rb +24 -0
- data/test/test_parser.rb +379 -0
- data/test/test_paths.rb +16 -0
- data/test/test_preserved.rb +66 -0
- data/test/test_xml.rb +28 -0
- metadata +98 -0
@@ -0,0 +1,1340 @@
|
|
1
|
+
|
2
|
+
import java.io.IOException;
|
3
|
+
|
4
|
+
import org.jruby.Ruby;
|
5
|
+
import org.jruby.RubyClass;
|
6
|
+
import org.jruby.RubyHash;
|
7
|
+
import org.jruby.RubyModule;
|
8
|
+
import org.jruby.RubyNumeric;
|
9
|
+
import org.jruby.RubyString;
|
10
|
+
import org.jruby.runtime.Block;
|
11
|
+
import org.jruby.runtime.CallbackFactory;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
13
|
+
import org.jruby.exceptions.RaiseException;
|
14
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
15
|
+
|
16
|
+
public class HpricotScanService implements BasicLibraryService {
|
17
|
+
public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
|
18
|
+
|
19
|
+
public void ELE(IRubyObject N) {
|
20
|
+
if (tokend > tokstart || text) {
|
21
|
+
IRubyObject raw_string = runtime.getNil();
|
22
|
+
ele_open = false; text = false;
|
23
|
+
if (tokstart != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
|
24
|
+
raw_string = runtime.newString(new String(buf,tokstart,tokend-tokstart));
|
25
|
+
}
|
26
|
+
rb_yield_tokens(N, tag[0], attr, raw_string, taint);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
public void SET(IRubyObject[] N, int E) {
|
31
|
+
int mark = 0;
|
32
|
+
if(N == tag) {
|
33
|
+
if(mark_tag == -1 || E == mark_tag) {
|
34
|
+
tag[0] = runtime.newString("");
|
35
|
+
} else if(E > mark_tag) {
|
36
|
+
tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
|
37
|
+
}
|
38
|
+
} else if(N == akey) {
|
39
|
+
if(mark_akey == -1 || E == mark_akey) {
|
40
|
+
akey[0] = runtime.newString("");
|
41
|
+
} else if(E > mark_akey) {
|
42
|
+
akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
|
43
|
+
}
|
44
|
+
} else if(N == aval) {
|
45
|
+
if(mark_aval == -1 || E == mark_aval) {
|
46
|
+
aval[0] = runtime.newString("");
|
47
|
+
} else if(E > mark_aval) {
|
48
|
+
aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
public void CAT(IRubyObject[] N, int E) {
|
54
|
+
if(N[0].isNil()) {
|
55
|
+
SET(N,E);
|
56
|
+
} else {
|
57
|
+
int mark = 0;
|
58
|
+
if(N == tag) {
|
59
|
+
mark = mark_tag;
|
60
|
+
} else if(N == akey) {
|
61
|
+
mark = mark_akey;
|
62
|
+
} else if(N == aval) {
|
63
|
+
mark = mark_aval;
|
64
|
+
}
|
65
|
+
((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
public void SLIDE(Object N) {
|
70
|
+
int mark = 0;
|
71
|
+
if(N == tag) {
|
72
|
+
mark = mark_tag;
|
73
|
+
} else if(N == akey) {
|
74
|
+
mark = mark_akey;
|
75
|
+
} else if(N == aval) {
|
76
|
+
mark = mark_aval;
|
77
|
+
}
|
78
|
+
if(mark > tokstart) {
|
79
|
+
if(N == tag) {
|
80
|
+
mark_tag -= tokstart;
|
81
|
+
} else if(N == akey) {
|
82
|
+
mark_akey -= tokstart;
|
83
|
+
} else if(N == aval) {
|
84
|
+
mark_aval -= tokstart;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
public void ATTR(IRubyObject K, IRubyObject V) {
|
90
|
+
if(!K.isNil()) {
|
91
|
+
if(attr.isNil()) {
|
92
|
+
attr = RubyHash.newHash(runtime);
|
93
|
+
}
|
94
|
+
((RubyHash)attr).aset(K,V);
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
public void ATTR(IRubyObject[] K, IRubyObject V) {
|
99
|
+
ATTR(K[0],V);
|
100
|
+
}
|
101
|
+
|
102
|
+
public void ATTR(IRubyObject K, IRubyObject[] V) {
|
103
|
+
ATTR(K,V[0]);
|
104
|
+
}
|
105
|
+
|
106
|
+
public void ATTR(IRubyObject[] K, IRubyObject[] V) {
|
107
|
+
ATTR(K[0],V[0]);
|
108
|
+
}
|
109
|
+
|
110
|
+
public void TEXT_PASS() {
|
111
|
+
if(!text) {
|
112
|
+
if(ele_open) {
|
113
|
+
ele_open = false;
|
114
|
+
if(tokstart > -1) {
|
115
|
+
mark_tag = tokstart;
|
116
|
+
}
|
117
|
+
} else {
|
118
|
+
mark_tag = p;
|
119
|
+
}
|
120
|
+
attr = runtime.getNil();
|
121
|
+
tag[0] = runtime.getNil();
|
122
|
+
text = true;
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
public void EBLK(IRubyObject N, int T) {
|
127
|
+
CAT(tag, p - T + 1);
|
128
|
+
ELE(N);
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
public void rb_raise(RubyClass error, String message) {
|
133
|
+
throw new RaiseException(runtime, error, message, true);
|
134
|
+
}
|
135
|
+
|
136
|
+
public IRubyObject rb_str_new2(String s) {
|
137
|
+
return runtime.newString(s);
|
138
|
+
}
|
139
|
+
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
static final byte[] _hpricot_scan_actions = {
|
144
|
+
0, 1, 1, 1, 2, 1, 4, 1,
|
145
|
+
5, 1, 6, 1, 7, 1, 8, 1,
|
146
|
+
9, 1, 10, 1, 11, 1, 12, 1,
|
147
|
+
14, 1, 16, 1, 20, 1, 21, 1,
|
148
|
+
22, 1, 24, 1, 25, 1, 26, 1,
|
149
|
+
28, 1, 29, 1, 30, 1, 32, 1,
|
150
|
+
33, 1, 38, 1, 39, 1, 40, 1,
|
151
|
+
41, 1, 42, 1, 43, 1, 44, 1,
|
152
|
+
45, 1, 46, 1, 47, 1, 48, 1,
|
153
|
+
49, 1, 50, 2, 2, 5, 2, 2,
|
154
|
+
6, 2, 2, 11, 2, 2, 12, 2,
|
155
|
+
2, 14, 2, 4, 39, 2, 4, 40,
|
156
|
+
2, 4, 41, 2, 5, 2, 2, 6,
|
157
|
+
14, 2, 7, 6, 2, 7, 14, 2,
|
158
|
+
11, 12, 2, 13, 3, 2, 14, 6,
|
159
|
+
2, 14, 40, 2, 15, 24, 2, 15,
|
160
|
+
28, 2, 15, 32, 2, 15, 45, 2,
|
161
|
+
17, 23, 2, 18, 27, 2, 19, 31,
|
162
|
+
2, 22, 34, 2, 22, 36, 3, 2,
|
163
|
+
6, 14, 3, 2, 14, 6, 3, 6,
|
164
|
+
7, 14, 3, 6, 14, 40, 3, 7,
|
165
|
+
14, 40, 3, 14, 6, 40, 3, 14,
|
166
|
+
13, 3, 3, 22, 0, 37, 3, 22,
|
167
|
+
2, 34, 3, 22, 14, 35, 4, 2,
|
168
|
+
14, 13, 3, 4, 6, 7, 14, 40,
|
169
|
+
4, 22, 2, 14, 35, 4, 22, 6,
|
170
|
+
14, 35, 4, 22, 7, 14, 35, 4,
|
171
|
+
22, 14, 6, 35, 5, 22, 2, 6,
|
172
|
+
14, 35, 5, 22, 2, 14, 6, 35,
|
173
|
+
5, 22, 6, 7, 14, 35
|
174
|
+
};
|
175
|
+
|
176
|
+
static final short[] _hpricot_scan_key_offsets = {
|
177
|
+
0, 3, 4, 5, 6, 7, 8, 9,
|
178
|
+
10, 13, 22, 37, 44, 45, 46, 47,
|
179
|
+
48, 49, 52, 57, 69, 81, 86, 93,
|
180
|
+
94, 95, 100, 101, 105, 106, 107, 121,
|
181
|
+
135, 152, 169, 186, 203, 210, 212, 214,
|
182
|
+
220, 222, 227, 232, 238, 240, 245, 251,
|
183
|
+
265, 266, 267, 268, 269, 270, 271, 272,
|
184
|
+
273, 274, 275, 276, 282, 296, 300, 313,
|
185
|
+
326, 340, 354, 355, 366, 375, 388, 405,
|
186
|
+
423, 441, 450, 461, 480, 499, 510, 521,
|
187
|
+
536, 538, 540, 556, 572, 575, 587, 599,
|
188
|
+
619, 639, 658, 677, 697, 717, 728, 739,
|
189
|
+
751, 763, 775, 791, 794, 809, 811, 813,
|
190
|
+
829, 845, 848, 860, 871, 890, 910, 930,
|
191
|
+
941, 952, 964, 984, 1004, 1016, 1036, 1057,
|
192
|
+
1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
|
193
|
+
1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
|
194
|
+
1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
|
195
|
+
1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
|
196
|
+
1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
|
197
|
+
1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
|
198
|
+
1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
|
199
|
+
1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
|
200
|
+
1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
|
201
|
+
1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
|
202
|
+
1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
|
203
|
+
1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
|
204
|
+
1634, 1636, 1637, 1640
|
205
|
+
};
|
206
|
+
|
207
|
+
static final char[] _hpricot_scan_trans_keys = {
|
208
|
+
45, 68, 91, 45, 79, 67, 84, 89,
|
209
|
+
80, 69, 32, 9, 13, 32, 58, 95,
|
210
|
+
9, 13, 65, 90, 97, 122, 32, 62,
|
211
|
+
63, 91, 95, 9, 13, 45, 46, 48,
|
212
|
+
58, 65, 90, 97, 122, 32, 62, 80,
|
213
|
+
83, 91, 9, 13, 85, 66, 76, 73,
|
214
|
+
67, 32, 9, 13, 32, 34, 39, 9,
|
215
|
+
13, 9, 34, 61, 95, 32, 37, 39,
|
216
|
+
59, 63, 90, 97, 122, 9, 34, 61,
|
217
|
+
95, 32, 37, 39, 59, 63, 90, 97,
|
218
|
+
122, 32, 62, 91, 9, 13, 32, 34,
|
219
|
+
39, 62, 91, 9, 13, 34, 34, 32,
|
220
|
+
62, 91, 9, 13, 93, 32, 62, 9,
|
221
|
+
13, 39, 39, 9, 39, 61, 95, 32,
|
222
|
+
33, 35, 37, 40, 59, 63, 90, 97,
|
223
|
+
122, 9, 39, 61, 95, 32, 33, 35,
|
224
|
+
37, 40, 59, 63, 90, 97, 122, 9,
|
225
|
+
32, 33, 39, 62, 91, 95, 10, 13,
|
226
|
+
35, 37, 40, 59, 61, 90, 97, 122,
|
227
|
+
9, 32, 34, 39, 62, 91, 95, 10,
|
228
|
+
13, 33, 37, 40, 59, 61, 90, 97,
|
229
|
+
122, 9, 32, 33, 39, 62, 91, 95,
|
230
|
+
10, 13, 35, 37, 40, 59, 61, 90,
|
231
|
+
97, 122, 9, 32, 34, 39, 62, 91,
|
232
|
+
95, 10, 13, 33, 37, 40, 59, 61,
|
233
|
+
90, 97, 122, 32, 34, 39, 62, 91,
|
234
|
+
9, 13, 34, 39, 34, 39, 32, 39,
|
235
|
+
62, 91, 9, 13, 39, 93, 32, 62,
|
236
|
+
93, 9, 13, 32, 39, 62, 9, 13,
|
237
|
+
32, 34, 62, 91, 9, 13, 34, 93,
|
238
|
+
32, 34, 62, 9, 13, 32, 39, 62,
|
239
|
+
91, 9, 13, 9, 39, 61, 95, 32,
|
240
|
+
33, 35, 37, 40, 59, 63, 90, 97,
|
241
|
+
122, 89, 83, 84, 69, 77, 67, 68,
|
242
|
+
65, 84, 65, 91, 58, 95, 65, 90,
|
243
|
+
97, 122, 32, 62, 63, 95, 9, 13,
|
244
|
+
45, 46, 48, 58, 65, 90, 97, 122,
|
245
|
+
32, 62, 9, 13, 32, 47, 62, 63,
|
246
|
+
95, 9, 13, 45, 58, 65, 90, 97,
|
247
|
+
122, 32, 47, 62, 63, 95, 9, 13,
|
248
|
+
45, 58, 65, 90, 97, 122, 32, 47,
|
249
|
+
61, 62, 63, 95, 9, 13, 45, 58,
|
250
|
+
65, 90, 97, 122, 32, 47, 61, 62,
|
251
|
+
63, 95, 9, 13, 45, 58, 65, 90,
|
252
|
+
97, 122, 62, 13, 32, 34, 39, 47,
|
253
|
+
60, 62, 9, 10, 11, 12, 13, 32,
|
254
|
+
47, 60, 62, 9, 10, 11, 12, 32,
|
255
|
+
47, 62, 63, 95, 9, 13, 45, 58,
|
256
|
+
65, 90, 97, 122, 13, 32, 47, 60,
|
257
|
+
62, 63, 95, 9, 10, 11, 12, 45,
|
258
|
+
58, 65, 90, 97, 122, 13, 32, 47,
|
259
|
+
60, 61, 62, 63, 95, 9, 10, 11,
|
260
|
+
12, 45, 58, 65, 90, 97, 122, 13,
|
261
|
+
32, 47, 60, 61, 62, 63, 95, 9,
|
262
|
+
10, 11, 12, 45, 58, 65, 90, 97,
|
263
|
+
122, 13, 32, 47, 60, 62, 9, 10,
|
264
|
+
11, 12, 13, 32, 34, 39, 47, 60,
|
265
|
+
62, 9, 10, 11, 12, 13, 32, 34,
|
266
|
+
39, 47, 60, 62, 63, 95, 9, 10,
|
267
|
+
11, 12, 45, 58, 65, 90, 97, 122,
|
268
|
+
13, 32, 34, 39, 47, 60, 62, 63,
|
269
|
+
95, 9, 10, 11, 12, 45, 58, 65,
|
270
|
+
90, 97, 122, 13, 32, 34, 47, 60,
|
271
|
+
62, 92, 9, 10, 11, 12, 13, 32,
|
272
|
+
34, 47, 60, 62, 92, 9, 10, 11,
|
273
|
+
12, 32, 34, 47, 62, 63, 92, 95,
|
274
|
+
9, 13, 45, 58, 65, 90, 97, 122,
|
275
|
+
34, 92, 34, 92, 32, 34, 47, 61,
|
276
|
+
62, 63, 92, 95, 9, 13, 45, 58,
|
277
|
+
65, 90, 97, 122, 32, 34, 47, 61,
|
278
|
+
62, 63, 92, 95, 9, 13, 45, 58,
|
279
|
+
65, 90, 97, 122, 34, 62, 92, 13,
|
280
|
+
32, 34, 39, 47, 60, 62, 92, 9,
|
281
|
+
10, 11, 12, 13, 32, 34, 39, 47,
|
282
|
+
60, 62, 92, 9, 10, 11, 12, 13,
|
283
|
+
32, 34, 39, 47, 60, 62, 63, 92,
|
284
|
+
95, 9, 10, 11, 12, 45, 58, 65,
|
285
|
+
90, 97, 122, 13, 32, 34, 39, 47,
|
286
|
+
60, 62, 63, 92, 95, 9, 10, 11,
|
287
|
+
12, 45, 58, 65, 90, 97, 122, 13,
|
288
|
+
32, 34, 47, 60, 62, 63, 92, 95,
|
289
|
+
9, 10, 11, 12, 45, 58, 65, 90,
|
290
|
+
97, 122, 13, 32, 34, 47, 60, 62,
|
291
|
+
63, 92, 95, 9, 10, 11, 12, 45,
|
292
|
+
58, 65, 90, 97, 122, 13, 32, 34,
|
293
|
+
47, 60, 61, 62, 63, 92, 95, 9,
|
294
|
+
10, 11, 12, 45, 58, 65, 90, 97,
|
295
|
+
122, 13, 32, 34, 47, 60, 61, 62,
|
296
|
+
63, 92, 95, 9, 10, 11, 12, 45,
|
297
|
+
58, 65, 90, 97, 122, 13, 32, 34,
|
298
|
+
47, 60, 62, 92, 9, 10, 11, 12,
|
299
|
+
13, 32, 34, 47, 60, 62, 92, 9,
|
300
|
+
10, 11, 12, 13, 32, 34, 39, 47,
|
301
|
+
60, 62, 92, 9, 10, 11, 12, 13,
|
302
|
+
32, 34, 39, 47, 60, 62, 92, 9,
|
303
|
+
10, 11, 12, 13, 32, 34, 39, 47,
|
304
|
+
60, 62, 92, 9, 10, 11, 12, 32,
|
305
|
+
34, 39, 47, 62, 63, 92, 95, 9,
|
306
|
+
13, 45, 58, 65, 90, 97, 122, 34,
|
307
|
+
39, 92, 32, 39, 47, 62, 63, 92,
|
308
|
+
95, 9, 13, 45, 58, 65, 90, 97,
|
309
|
+
122, 39, 92, 39, 92, 32, 39, 47,
|
310
|
+
61, 62, 63, 92, 95, 9, 13, 45,
|
311
|
+
58, 65, 90, 97, 122, 32, 39, 47,
|
312
|
+
61, 62, 63, 92, 95, 9, 13, 45,
|
313
|
+
58, 65, 90, 97, 122, 39, 62, 92,
|
314
|
+
13, 32, 34, 39, 47, 60, 62, 92,
|
315
|
+
9, 10, 11, 12, 13, 32, 39, 47,
|
316
|
+
60, 62, 92, 9, 10, 11, 12, 13,
|
317
|
+
32, 39, 47, 60, 62, 63, 92, 95,
|
318
|
+
9, 10, 11, 12, 45, 58, 65, 90,
|
319
|
+
97, 122, 13, 32, 39, 47, 60, 61,
|
320
|
+
62, 63, 92, 95, 9, 10, 11, 12,
|
321
|
+
45, 58, 65, 90, 97, 122, 13, 32,
|
322
|
+
39, 47, 60, 61, 62, 63, 92, 95,
|
323
|
+
9, 10, 11, 12, 45, 58, 65, 90,
|
324
|
+
97, 122, 13, 32, 39, 47, 60, 62,
|
325
|
+
92, 9, 10, 11, 12, 13, 32, 39,
|
326
|
+
47, 60, 62, 92, 9, 10, 11, 12,
|
327
|
+
13, 32, 34, 39, 47, 60, 62, 92,
|
328
|
+
9, 10, 11, 12, 13, 32, 34, 39,
|
329
|
+
47, 60, 62, 63, 92, 95, 9, 10,
|
330
|
+
11, 12, 45, 58, 65, 90, 97, 122,
|
331
|
+
13, 32, 34, 39, 47, 60, 62, 63,
|
332
|
+
92, 95, 9, 10, 11, 12, 45, 58,
|
333
|
+
65, 90, 97, 122, 13, 32, 34, 39,
|
334
|
+
47, 60, 62, 92, 9, 10, 11, 12,
|
335
|
+
13, 32, 34, 39, 47, 60, 62, 63,
|
336
|
+
92, 95, 9, 10, 11, 12, 45, 58,
|
337
|
+
65, 90, 97, 122, 13, 32, 34, 39,
|
338
|
+
47, 60, 61, 62, 63, 92, 95, 9,
|
339
|
+
10, 11, 12, 45, 58, 65, 90, 97,
|
340
|
+
122, 32, 34, 39, 47, 61, 62, 63,
|
341
|
+
92, 95, 9, 13, 45, 58, 65, 90,
|
342
|
+
97, 122, 32, 34, 39, 47, 61, 62,
|
343
|
+
63, 92, 95, 9, 13, 45, 58, 65,
|
344
|
+
90, 97, 122, 34, 39, 62, 92, 34,
|
345
|
+
39, 92, 13, 32, 34, 39, 47, 60,
|
346
|
+
62, 92, 9, 10, 11, 12, 13, 32,
|
347
|
+
34, 39, 47, 60, 62, 92, 9, 10,
|
348
|
+
11, 12, 13, 32, 34, 39, 47, 60,
|
349
|
+
62, 63, 92, 95, 9, 10, 11, 12,
|
350
|
+
45, 58, 65, 90, 97, 122, 13, 32,
|
351
|
+
34, 39, 47, 60, 62, 63, 92, 95,
|
352
|
+
9, 10, 11, 12, 45, 58, 65, 90,
|
353
|
+
97, 122, 13, 32, 34, 39, 47, 60,
|
354
|
+
62, 63, 92, 95, 9, 10, 11, 12,
|
355
|
+
45, 58, 65, 90, 97, 122, 13, 32,
|
356
|
+
34, 39, 47, 60, 62, 92, 9, 10,
|
357
|
+
11, 12, 13, 32, 34, 39, 47, 60,
|
358
|
+
62, 92, 9, 10, 11, 12, 13, 32,
|
359
|
+
34, 39, 47, 60, 62, 63, 92, 95,
|
360
|
+
9, 10, 11, 12, 45, 58, 65, 90,
|
361
|
+
97, 122, 32, 34, 39, 47, 62, 63,
|
362
|
+
92, 95, 9, 13, 45, 58, 65, 90,
|
363
|
+
97, 122, 32, 34, 39, 47, 62, 63,
|
364
|
+
92, 95, 9, 13, 45, 58, 65, 90,
|
365
|
+
97, 122, 13, 32, 34, 39, 47, 60,
|
366
|
+
62, 92, 9, 10, 11, 12, 13, 32,
|
367
|
+
34, 39, 47, 60, 61, 62, 63, 92,
|
368
|
+
95, 9, 10, 11, 12, 45, 58, 65,
|
369
|
+
90, 97, 122, 13, 32, 39, 47, 60,
|
370
|
+
62, 63, 92, 95, 9, 10, 11, 12,
|
371
|
+
45, 58, 65, 90, 97, 122, 34, 39,
|
372
|
+
92, 32, 39, 47, 62, 63, 92, 95,
|
373
|
+
9, 13, 45, 58, 65, 90, 97, 122,
|
374
|
+
13, 32, 34, 39, 47, 60, 62, 92,
|
375
|
+
9, 10, 11, 12, 32, 34, 47, 62,
|
376
|
+
63, 92, 95, 9, 13, 45, 58, 65,
|
377
|
+
90, 97, 122, 34, 39, 92, 13, 32,
|
378
|
+
39, 47, 60, 62, 92, 9, 10, 11,
|
379
|
+
12, 34, 92, 39, 92, 13, 32, 34,
|
380
|
+
39, 47, 60, 62, 9, 10, 11, 12,
|
381
|
+
58, 95, 120, 65, 90, 97, 122, 32,
|
382
|
+
63, 95, 9, 13, 45, 46, 48, 58,
|
383
|
+
65, 90, 97, 122, 32, 63, 95, 109,
|
384
|
+
9, 13, 45, 46, 48, 58, 65, 90,
|
385
|
+
97, 122, 32, 63, 95, 108, 9, 13,
|
386
|
+
45, 46, 48, 58, 65, 90, 97, 122,
|
387
|
+
32, 63, 95, 9, 13, 45, 46, 48,
|
388
|
+
58, 65, 90, 97, 122, 101, 114, 115,
|
389
|
+
105, 111, 110, 32, 61, 9, 13, 32,
|
390
|
+
34, 39, 9, 13, 95, 45, 46, 48,
|
391
|
+
58, 65, 90, 97, 122, 34, 95, 45,
|
392
|
+
46, 48, 58, 65, 90, 97, 122, 32,
|
393
|
+
62, 63, 9, 13, 32, 62, 63, 101,
|
394
|
+
115, 9, 13, 62, 110, 99, 111, 100,
|
395
|
+
105, 110, 103, 32, 61, 9, 13, 32,
|
396
|
+
34, 39, 9, 13, 65, 90, 97, 122,
|
397
|
+
34, 95, 45, 46, 48, 57, 65, 90,
|
398
|
+
97, 122, 32, 62, 63, 9, 13, 32,
|
399
|
+
62, 63, 115, 9, 13, 116, 97, 110,
|
400
|
+
100, 97, 108, 111, 110, 101, 32, 61,
|
401
|
+
9, 13, 32, 34, 39, 9, 13, 110,
|
402
|
+
121, 111, 34, 32, 62, 63, 9, 13,
|
403
|
+
101, 115, 110, 121, 111, 39, 101, 115,
|
404
|
+
65, 90, 97, 122, 39, 95, 45, 46,
|
405
|
+
48, 57, 65, 90, 97, 122, 95, 45,
|
406
|
+
46, 48, 58, 65, 90, 97, 122, 39,
|
407
|
+
95, 45, 46, 48, 58, 65, 90, 97,
|
408
|
+
122, 62, 62, 10, 60, 33, 47, 58,
|
409
|
+
63, 95, 65, 90, 97, 122, 39, 93,
|
410
|
+
34, 34, 92, 39, 92, 34, 39, 92,
|
411
|
+
32, 9, 13, 32, 118, 9, 13, 10,
|
412
|
+
45, 45, 10, 93, 93, 10, 62, 63,
|
413
|
+
62, 0
|
414
|
+
};
|
415
|
+
|
416
|
+
static final byte[] _hpricot_scan_single_lengths = {
|
417
|
+
3, 1, 1, 1, 1, 1, 1, 1,
|
418
|
+
1, 3, 5, 5, 1, 1, 1, 1,
|
419
|
+
1, 1, 3, 4, 4, 3, 5, 1,
|
420
|
+
1, 3, 1, 2, 1, 1, 4, 4,
|
421
|
+
7, 7, 7, 7, 5, 2, 2, 4,
|
422
|
+
2, 3, 3, 4, 2, 3, 4, 4,
|
423
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
424
|
+
1, 1, 1, 2, 4, 2, 5, 5,
|
425
|
+
6, 6, 1, 7, 5, 5, 7, 8,
|
426
|
+
8, 5, 7, 9, 9, 7, 7, 7,
|
427
|
+
2, 2, 8, 8, 3, 8, 8, 10,
|
428
|
+
10, 9, 9, 10, 10, 7, 7, 8,
|
429
|
+
8, 8, 8, 3, 7, 2, 2, 8,
|
430
|
+
8, 3, 8, 7, 9, 10, 10, 7,
|
431
|
+
7, 8, 10, 10, 8, 10, 11, 9,
|
432
|
+
9, 4, 3, 8, 8, 10, 10, 10,
|
433
|
+
8, 8, 10, 8, 8, 8, 11, 9,
|
434
|
+
3, 7, 8, 7, 3, 7, 2, 2,
|
435
|
+
7, 3, 3, 4, 4, 3, 1, 1,
|
436
|
+
1, 1, 1, 1, 2, 3, 1, 2,
|
437
|
+
3, 5, 1, 1, 1, 1, 1, 1,
|
438
|
+
1, 1, 2, 3, 0, 2, 3, 4,
|
439
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
440
|
+
1, 2, 3, 2, 1, 1, 3, 1,
|
441
|
+
1, 2, 1, 1, 1, 1, 0, 2,
|
442
|
+
1, 2, 1, 1, 2, 5, 1, 1,
|
443
|
+
1, 2, 2, 3, 1, 2, 2, 1,
|
444
|
+
2, 1, 3, 1
|
445
|
+
};
|
446
|
+
|
447
|
+
static final byte[] _hpricot_scan_range_lengths = {
|
448
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
449
|
+
1, 3, 5, 1, 0, 0, 0, 0,
|
450
|
+
0, 1, 1, 4, 4, 1, 1, 0,
|
451
|
+
0, 1, 0, 1, 0, 0, 5, 5,
|
452
|
+
5, 5, 5, 5, 1, 0, 0, 1,
|
453
|
+
0, 1, 1, 1, 0, 1, 1, 5,
|
454
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
455
|
+
0, 0, 0, 2, 5, 1, 4, 4,
|
456
|
+
4, 4, 0, 2, 2, 4, 5, 5,
|
457
|
+
5, 2, 2, 5, 5, 2, 2, 4,
|
458
|
+
0, 0, 4, 4, 0, 2, 2, 5,
|
459
|
+
5, 5, 5, 5, 5, 2, 2, 2,
|
460
|
+
2, 2, 4, 0, 4, 0, 0, 4,
|
461
|
+
4, 0, 2, 2, 5, 5, 5, 2,
|
462
|
+
2, 2, 5, 5, 2, 5, 5, 4,
|
463
|
+
4, 0, 0, 2, 2, 5, 5, 5,
|
464
|
+
2, 2, 5, 4, 4, 2, 5, 5,
|
465
|
+
0, 4, 2, 4, 0, 2, 0, 0,
|
466
|
+
2, 2, 5, 5, 5, 5, 0, 0,
|
467
|
+
0, 0, 0, 0, 1, 1, 4, 4,
|
468
|
+
1, 1, 0, 0, 0, 0, 0, 0,
|
469
|
+
0, 0, 1, 1, 2, 4, 1, 1,
|
470
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
471
|
+
0, 1, 1, 0, 0, 0, 1, 0,
|
472
|
+
0, 0, 0, 0, 0, 0, 2, 4,
|
473
|
+
4, 4, 0, 0, 0, 2, 0, 0,
|
474
|
+
0, 0, 0, 0, 1, 1, 0, 0,
|
475
|
+
0, 0, 0, 0
|
476
|
+
};
|
477
|
+
|
478
|
+
static final short[] _hpricot_scan_index_offsets = {
|
479
|
+
0, 4, 6, 8, 10, 12, 14, 16,
|
480
|
+
18, 21, 28, 39, 46, 48, 50, 52,
|
481
|
+
54, 56, 59, 64, 73, 82, 87, 94,
|
482
|
+
96, 98, 103, 105, 109, 111, 113, 123,
|
483
|
+
133, 146, 159, 172, 185, 192, 195, 198,
|
484
|
+
204, 207, 212, 217, 223, 226, 231, 237,
|
485
|
+
247, 249, 251, 253, 255, 257, 259, 261,
|
486
|
+
263, 265, 267, 269, 274, 284, 288, 298,
|
487
|
+
308, 319, 330, 332, 342, 350, 360, 373,
|
488
|
+
387, 401, 409, 419, 434, 449, 459, 469,
|
489
|
+
481, 484, 487, 500, 513, 517, 528, 539,
|
490
|
+
555, 571, 586, 601, 617, 633, 643, 653,
|
491
|
+
664, 675, 686, 699, 703, 715, 718, 721,
|
492
|
+
734, 747, 751, 762, 772, 787, 803, 819,
|
493
|
+
829, 839, 850, 866, 882, 893, 909, 926,
|
494
|
+
940, 954, 959, 963, 974, 985, 1001, 1017,
|
495
|
+
1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
|
496
|
+
1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
|
497
|
+
1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
|
498
|
+
1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
|
499
|
+
1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
|
500
|
+
1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
|
501
|
+
1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
|
502
|
+
1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
|
503
|
+
1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
|
504
|
+
1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
|
505
|
+
1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
|
506
|
+
1465, 1468, 1470, 1474
|
507
|
+
};
|
508
|
+
|
509
|
+
static final short[] _hpricot_scan_indicies = {
|
510
|
+
335, 336, 337, 296, 356, 296, 349, 296,
|
511
|
+
399, 296, 401, 296, 354, 296, 350, 296,
|
512
|
+
400, 296, 308, 308, 296, 308, 309, 309,
|
513
|
+
308, 309, 309, 296, 328, 330, 329, 331,
|
514
|
+
329, 328, 329, 329, 329, 329, 296, 310,
|
515
|
+
302, 311, 312, 0, 310, 296, 353, 296,
|
516
|
+
342, 296, 347, 296, 346, 296, 343, 296,
|
517
|
+
304, 304, 296, 304, 305, 306, 304, 296,
|
518
|
+
321, 320, 321, 321, 321, 321, 321, 321,
|
519
|
+
296, 319, 320, 319, 319, 319, 319, 319,
|
520
|
+
319, 296, 298, 302, 0, 298, 296, 298,
|
521
|
+
300, 307, 302, 0, 298, 296, 6, 222,
|
522
|
+
6, 13, 358, 302, 0, 358, 69, 1,
|
523
|
+
0, 1, 302, 1, 69, 6, 182, 6,
|
524
|
+
5, 322, 323, 322, 322, 322, 322, 322,
|
525
|
+
322, 322, 296, 299, 303, 299, 299, 299,
|
526
|
+
299, 299, 299, 299, 296, 297, 297, 299,
|
527
|
+
303, 302, 0, 299, 298, 299, 299, 299,
|
528
|
+
299, 296, 297, 297, 300, 301, 302, 0,
|
529
|
+
299, 298, 299, 299, 299, 299, 296, 186,
|
530
|
+
186, 188, 42, 184, 185, 188, 187, 188,
|
531
|
+
188, 188, 188, 182, 43, 43, 38, 44,
|
532
|
+
40, 34, 41, 37, 41, 41, 41, 41,
|
533
|
+
5, 37, 38, 39, 40, 34, 37, 5,
|
534
|
+
63, 224, 223, 63, 64, 62, 371, 6,
|
535
|
+
40, 34, 371, 5, 35, 36, 34, 26,
|
536
|
+
27, 1, 26, 0, 36, 6, 40, 36,
|
537
|
+
5, 60, 6, 61, 58, 60, 13, 35,
|
538
|
+
59, 58, 59, 6, 61, 59, 13, 183,
|
539
|
+
6, 184, 185, 183, 182, 41, 42, 41,
|
540
|
+
41, 41, 41, 41, 41, 41, 5, 403,
|
541
|
+
296, 351, 296, 352, 296, 345, 296, 348,
|
542
|
+
296, 398, 296, 344, 296, 341, 296, 402,
|
543
|
+
296, 397, 296, 355, 296, 338, 338, 338,
|
544
|
+
338, 296, 332, 334, 333, 333, 332, 333,
|
545
|
+
333, 333, 333, 296, 313, 314, 313, 296,
|
546
|
+
324, 326, 327, 325, 325, 324, 325, 325,
|
547
|
+
325, 296, 315, 317, 318, 316, 316, 315,
|
548
|
+
316, 316, 316, 296, 364, 366, 367, 368,
|
549
|
+
365, 365, 364, 365, 365, 365, 69, 359,
|
550
|
+
361, 362, 162, 360, 360, 359, 360, 360,
|
551
|
+
360, 69, 369, 69, 157, 157, 159, 160,
|
552
|
+
161, 69, 162, 157, 158, 156, 66, 66,
|
553
|
+
68, 69, 70, 66, 67, 65, 363, 361,
|
554
|
+
162, 360, 360, 363, 360, 360, 360, 69,
|
555
|
+
66, 66, 74, 69, 76, 73, 73, 66,
|
556
|
+
67, 73, 73, 73, 65, 132, 132, 135,
|
557
|
+
69, 136, 137, 134, 134, 132, 133, 134,
|
558
|
+
134, 134, 65, 71, 71, 74, 69, 75,
|
559
|
+
76, 73, 73, 71, 72, 73, 73, 73,
|
560
|
+
65, 66, 66, 68, 69, 70, 66, 67,
|
561
|
+
65, 226, 226, 228, 229, 230, 69, 70,
|
562
|
+
226, 227, 156, 163, 163, 159, 160, 161,
|
563
|
+
69, 162, 165, 165, 163, 164, 165, 165,
|
564
|
+
165, 156, 226, 226, 228, 229, 231, 69,
|
565
|
+
76, 165, 165, 226, 227, 165, 165, 165,
|
566
|
+
156, 248, 248, 84, 246, 199, 250, 195,
|
567
|
+
248, 249, 189, 92, 92, 84, 95, 7,
|
568
|
+
96, 97, 92, 93, 91, 372, 3, 48,
|
569
|
+
50, 47, 8, 47, 372, 47, 47, 47,
|
570
|
+
7, 3, 8, 7, 11, 8, 7, 122,
|
571
|
+
3, 124, 125, 126, 123, 8, 123, 122,
|
572
|
+
123, 123, 123, 7, 46, 3, 48, 49,
|
573
|
+
50, 47, 8, 47, 46, 47, 47, 47,
|
574
|
+
7, 3, 45, 8, 7, 190, 190, 192,
|
575
|
+
193, 194, 7, 50, 195, 190, 191, 189,
|
576
|
+
196, 196, 192, 193, 194, 7, 50, 195,
|
577
|
+
196, 197, 189, 196, 196, 192, 193, 194,
|
578
|
+
7, 50, 198, 195, 198, 196, 197, 198,
|
579
|
+
198, 198, 189, 242, 242, 244, 245, 247,
|
580
|
+
7, 103, 198, 195, 198, 242, 243, 198,
|
581
|
+
198, 198, 189, 248, 248, 84, 247, 199,
|
582
|
+
251, 198, 195, 198, 248, 249, 198, 198,
|
583
|
+
198, 189, 92, 92, 84, 101, 7, 103,
|
584
|
+
100, 97, 100, 92, 93, 100, 100, 100,
|
585
|
+
91, 144, 144, 84, 147, 7, 148, 149,
|
586
|
+
146, 97, 146, 144, 145, 146, 146, 146,
|
587
|
+
91, 98, 98, 84, 101, 7, 102, 103,
|
588
|
+
100, 97, 100, 98, 99, 100, 100, 100,
|
589
|
+
91, 92, 92, 84, 95, 7, 96, 97,
|
590
|
+
92, 93, 91, 92, 92, 94, 95, 7,
|
591
|
+
96, 97, 92, 93, 91, 242, 242, 244,
|
592
|
+
245, 246, 7, 96, 195, 242, 243, 189,
|
593
|
+
258, 258, 263, 94, 256, 215, 261, 211,
|
594
|
+
258, 259, 205, 105, 105, 80, 94, 108,
|
595
|
+
9, 109, 110, 105, 106, 104, 373, 10,
|
596
|
+
11, 55, 57, 54, 12, 54, 373, 54,
|
597
|
+
54, 54, 9, 10, 11, 12, 9, 370,
|
598
|
+
3, 31, 33, 30, 4, 30, 370, 30,
|
599
|
+
30, 30, 2, 3, 4, 2, 10, 4,
|
600
|
+
2, 117, 3, 119, 120, 121, 118, 4,
|
601
|
+
118, 117, 118, 118, 118, 2, 29, 3,
|
602
|
+
31, 32, 33, 30, 4, 30, 29, 30,
|
603
|
+
30, 30, 2, 3, 28, 4, 2, 167,
|
604
|
+
167, 169, 170, 171, 2, 33, 172, 167,
|
605
|
+
168, 166, 78, 78, 84, 81, 2, 82,
|
606
|
+
83, 78, 79, 77, 78, 78, 84, 88,
|
607
|
+
2, 90, 87, 83, 87, 78, 79, 87,
|
608
|
+
87, 87, 77, 138, 138, 84, 141, 2,
|
609
|
+
142, 143, 140, 83, 140, 138, 139, 140,
|
610
|
+
140, 140, 77, 85, 85, 84, 88, 2,
|
611
|
+
89, 90, 87, 83, 87, 85, 86, 87,
|
612
|
+
87, 87, 77, 78, 78, 84, 81, 2,
|
613
|
+
82, 83, 78, 79, 77, 78, 78, 80,
|
614
|
+
81, 2, 82, 83, 78, 79, 77, 232,
|
615
|
+
232, 234, 235, 236, 2, 82, 172, 232,
|
616
|
+
233, 166, 173, 173, 169, 170, 171, 2,
|
617
|
+
33, 175, 172, 175, 173, 174, 175, 175,
|
618
|
+
175, 166, 232, 232, 234, 235, 237, 2,
|
619
|
+
90, 175, 172, 175, 232, 233, 175, 175,
|
620
|
+
175, 166, 258, 258, 80, 260, 256, 215,
|
621
|
+
261, 211, 258, 259, 205, 105, 105, 80,
|
622
|
+
94, 114, 9, 116, 113, 110, 113, 105,
|
623
|
+
106, 113, 113, 113, 104, 150, 150, 80,
|
624
|
+
94, 153, 9, 154, 155, 152, 110, 152,
|
625
|
+
150, 151, 152, 152, 152, 104, 53, 10,
|
626
|
+
11, 55, 56, 57, 54, 12, 54, 53,
|
627
|
+
54, 54, 54, 9, 127, 10, 11, 129,
|
628
|
+
130, 131, 128, 12, 128, 127, 128, 128,
|
629
|
+
128, 9, 10, 11, 52, 12, 9, 51,
|
630
|
+
51, 12, 9, 206, 206, 208, 209, 210,
|
631
|
+
9, 57, 211, 206, 207, 205, 212, 212,
|
632
|
+
208, 209, 210, 9, 57, 211, 212, 213,
|
633
|
+
205, 212, 212, 208, 209, 210, 9, 57,
|
634
|
+
214, 211, 214, 212, 213, 214, 214, 214,
|
635
|
+
205, 252, 252, 254, 255, 257, 9, 116,
|
636
|
+
214, 211, 214, 252, 253, 214, 214, 214,
|
637
|
+
205, 258, 258, 80, 260, 257, 215, 262,
|
638
|
+
214, 211, 214, 258, 259, 214, 214, 214,
|
639
|
+
205, 105, 105, 80, 94, 108, 9, 109,
|
640
|
+
110, 105, 106, 104, 105, 105, 107, 107,
|
641
|
+
108, 9, 109, 110, 105, 106, 104, 258,
|
642
|
+
258, 263, 94, 257, 215, 262, 214, 211,
|
643
|
+
214, 258, 259, 214, 214, 214, 205, 218,
|
644
|
+
10, 216, 220, 221, 219, 217, 219, 218,
|
645
|
+
219, 219, 219, 215, 218, 225, 11, 220,
|
646
|
+
221, 219, 217, 219, 218, 219, 219, 219,
|
647
|
+
215, 252, 252, 254, 255, 256, 9, 109,
|
648
|
+
211, 252, 253, 205, 111, 111, 80, 94,
|
649
|
+
114, 9, 115, 116, 113, 110, 113, 111,
|
650
|
+
112, 113, 113, 113, 104, 238, 238, 84,
|
651
|
+
237, 176, 241, 175, 172, 175, 238, 239,
|
652
|
+
175, 175, 175, 166, 10, 216, 217, 215,
|
653
|
+
178, 3, 180, 181, 179, 177, 179, 178,
|
654
|
+
179, 179, 179, 176, 173, 173, 169, 170,
|
655
|
+
171, 2, 33, 172, 173, 174, 166, 201,
|
656
|
+
3, 203, 204, 202, 200, 202, 201, 202,
|
657
|
+
202, 202, 199, 225, 11, 217, 215, 238,
|
658
|
+
238, 84, 236, 176, 240, 172, 238, 239,
|
659
|
+
166, 3, 200, 199, 3, 177, 176, 163,
|
660
|
+
163, 159, 160, 161, 69, 162, 163, 164,
|
661
|
+
156, 339, 339, 340, 339, 339, 296, 15,
|
662
|
+
357, 357, 15, 357, 357, 357, 357, 296,
|
663
|
+
15, 357, 357, 408, 15, 357, 357, 357,
|
664
|
+
357, 296, 15, 357, 357, 404, 15, 357,
|
665
|
+
357, 357, 357, 296, 16, 357, 357, 16,
|
666
|
+
357, 357, 357, 357, 296, 287, 264, 294,
|
667
|
+
264, 396, 264, 387, 264, 393, 264, 268,
|
668
|
+
264, 268, 265, 268, 264, 265, 266, 267,
|
669
|
+
265, 264, 282, 282, 282, 282, 282, 264,
|
670
|
+
275, 276, 276, 276, 276, 276, 264, 269,
|
671
|
+
270, 271, 269, 264, 269, 270, 271, 272,
|
672
|
+
273, 269, 264, 270, 264, 388, 264, 285,
|
673
|
+
264, 394, 264, 385, 264, 289, 264, 390,
|
674
|
+
264, 288, 264, 288, 374, 288, 264, 374,
|
675
|
+
375, 376, 374, 264, 283, 283, 264, 277,
|
676
|
+
278, 278, 278, 278, 278, 264, 274, 270,
|
677
|
+
271, 274, 264, 274, 270, 271, 273, 274,
|
678
|
+
264, 295, 264, 384, 264, 389, 264, 286,
|
679
|
+
264, 284, 264, 290, 264, 395, 264, 391,
|
680
|
+
264, 380, 264, 380, 377, 380, 264, 377,
|
681
|
+
378, 379, 377, 264, 291, 292, 264, 293,
|
682
|
+
264, 279, 264, 381, 270, 271, 381, 264,
|
683
|
+
386, 264, 293, 264, 405, 406, 264, 392,
|
684
|
+
264, 279, 264, 407, 264, 392, 264, 383,
|
685
|
+
383, 264, 277, 281, 281, 281, 281, 281,
|
686
|
+
264, 382, 382, 382, 382, 382, 264, 275,
|
687
|
+
280, 280, 280, 280, 280, 264, 415, 414,
|
688
|
+
422, 421, 24, 25, 23, 19, 20, 21,
|
689
|
+
22, 21, 21, 21, 18, 6, 5, 1,
|
690
|
+
0, 6, 13, 3, 8, 7, 3, 4,
|
691
|
+
2, 10, 11, 12, 9, 15, 15, 14,
|
692
|
+
16, 17, 16, 14, 412, 413, 411, 410,
|
693
|
+
409, 419, 420, 418, 417, 416, 426, 424,
|
694
|
+
427, 425, 424, 423, 0
|
695
|
+
};
|
696
|
+
|
697
|
+
static final short[] _hpricot_scan_trans_targs_wi = {
|
698
|
+
26, 27, 101, 69, 102, 29, 25, 80,
|
699
|
+
81, 99, 100, 79, 122, 24, 204, 212,
|
700
|
+
213, 150, 204, 0, 59, 62, 145, 204,
|
701
|
+
204, 205, 41, 207, 210, 104, 103, 105,
|
702
|
+
106, 210, 40, 41, 42, 36, 37, 46,
|
703
|
+
206, 47, 32, 35, 34, 209, 83, 82,
|
704
|
+
84, 85, 209, 98, 211, 119, 120, 121,
|
705
|
+
123, 211, 44, 45, 43, 208, 38, 39,
|
706
|
+
43, 68, 69, 70, 73, 204, 204, 65,
|
707
|
+
72, 71, 73, 74, 204, 107, 100, 108,
|
708
|
+
108, 111, 210, 112, 70, 104, 110, 109,
|
709
|
+
111, 113, 210, 78, 79, 90, 90, 93,
|
710
|
+
209, 94, 83, 92, 91, 93, 95, 209,
|
711
|
+
97, 98, 117, 117, 128, 211, 129, 119,
|
712
|
+
134, 118, 128, 133, 211, 104, 103, 105,
|
713
|
+
106, 210, 83, 82, 84, 85, 209, 119,
|
714
|
+
120, 121, 123, 211, 65, 72, 71, 73,
|
715
|
+
74, 204, 104, 110, 109, 111, 113, 210,
|
716
|
+
83, 92, 91, 93, 95, 209, 119, 134,
|
717
|
+
118, 128, 133, 211, 68, 144, 74, 142,
|
718
|
+
143, 73, 204, 75, 76, 71, 107, 138,
|
719
|
+
113, 136, 137, 111, 112, 114, 115, 109,
|
720
|
+
101, 102, 100, 103, 105, 210, 29, 39,
|
721
|
+
206, 40, 35, 36, 47, 78, 86, 95,
|
722
|
+
139, 140, 93, 94, 87, 88, 91, 80,
|
723
|
+
81, 79, 82, 84, 209, 97, 124, 133,
|
724
|
+
131, 132, 128, 129, 125, 126, 118, 99,
|
725
|
+
79, 122, 98, 120, 121, 211, 24, 38,
|
726
|
+
43, 100, 75, 76, 77, 141, 73, 73,
|
727
|
+
114, 115, 116, 135, 111, 111, 100, 108,
|
728
|
+
210, 210, 87, 88, 89, 96, 93, 93,
|
729
|
+
79, 90, 209, 209, 125, 126, 127, 130,
|
730
|
+
128, 128, 98, 117, 90, 211, 211, 108,
|
731
|
+
204, 157, 158, 200, 156, 161, 204, 162,
|
732
|
+
163, 176, 175, 160, 159, 174, 173, 190,
|
733
|
+
201, 199, 159, 173, 181, 165, 180, 151,
|
734
|
+
170, 168, 182, 188, 191, 189, 152, 177,
|
735
|
+
204, 33, 22, 31, 23, 34, 204, 32,
|
736
|
+
18, 19, 30, 28, 9, 10, 11, 12,
|
737
|
+
48, 61, 204, 63, 64, 66, 204, 20,
|
738
|
+
21, 20, 31, 32, 63, 62, 66, 204,
|
739
|
+
11, 10, 204, 26, 61, 60, 204, 1,
|
740
|
+
2, 53, 60, 146, 147, 56, 14, 17,
|
741
|
+
55, 52, 16, 15, 21, 3, 7, 50,
|
742
|
+
51, 13, 6, 204, 204, 146, 25, 65,
|
743
|
+
64, 66, 67, 69, 65, 64, 66, 67,
|
744
|
+
204, 204, 100, 39, 79, 98, 171, 172,
|
745
|
+
198, 186, 187, 193, 185, 190, 201, 199,
|
746
|
+
178, 167, 192, 154, 164, 179, 169, 184,
|
747
|
+
195, 155, 166, 183, 153, 58, 54, 4,
|
748
|
+
8, 5, 57, 49, 149, 194, 196, 197,
|
749
|
+
148, 214, 202, 214, 214, 215, 214, 214,
|
750
|
+
216, 203, 216, 216, 217, 216, 216, 218,
|
751
|
+
218, 218, 218, 219
|
752
|
+
};
|
753
|
+
|
754
|
+
static final short[] _hpricot_scan_trans_actions_wi = {
|
755
|
+
0, 0, 0, 7, 0, 0, 21, 0,
|
756
|
+
0, 0, 7, 7, 0, 0, 65, 0,
|
757
|
+
31, 0, 67, 0, 0, 1, 0, 63,
|
758
|
+
132, 178, 0, 144, 147, 0, 174, 23,
|
759
|
+
0, 186, 0, 21, 0, 0, 0, 21,
|
760
|
+
144, 0, 111, 0, 111, 147, 0, 174,
|
761
|
+
23, 0, 186, 7, 147, 0, 174, 23,
|
762
|
+
0, 186, 0, 0, 0, 144, 0, 21,
|
763
|
+
21, 0, 9, 9, 102, 73, 162, 9,
|
764
|
+
9, 174, 117, 0, 170, 0, 9, 9,
|
765
|
+
7, 102, 205, 0, 7, 9, 9, 174,
|
766
|
+
117, 0, 215, 0, 9, 9, 7, 102,
|
767
|
+
205, 0, 9, 9, 174, 117, 0, 215,
|
768
|
+
0, 9, 9, 7, 102, 205, 0, 9,
|
769
|
+
9, 174, 117, 0, 215, 11, 0, 108,
|
770
|
+
11, 210, 11, 0, 108, 11, 210, 11,
|
771
|
+
0, 108, 11, 210, 105, 105, 0, 158,
|
772
|
+
11, 195, 105, 105, 0, 158, 11, 232,
|
773
|
+
105, 105, 0, 158, 11, 232, 105, 105,
|
774
|
+
0, 158, 11, 232, 3, 3, 3, 0,
|
775
|
+
0, 87, 120, 3, 3, 190, 3, 3,
|
776
|
+
3, 0, 7, 87, 3, 3, 3, 190,
|
777
|
+
3, 3, 3, 190, 87, 200, 3, 3,
|
778
|
+
182, 3, 3, 3, 3, 3, 3, 3,
|
779
|
+
7, 0, 87, 3, 3, 3, 190, 3,
|
780
|
+
3, 3, 190, 87, 200, 3, 3, 3,
|
781
|
+
7, 7, 87, 3, 3, 3, 190, 3,
|
782
|
+
75, 3, 3, 190, 87, 200, 3, 3,
|
783
|
+
84, 99, 78, 78, 0, 0, 150, 154,
|
784
|
+
78, 78, 0, 7, 150, 154, 78, 78,
|
785
|
+
220, 226, 78, 78, 7, 0, 150, 154,
|
786
|
+
78, 78, 220, 226, 78, 78, 7, 7,
|
787
|
+
150, 154, 78, 78, 75, 220, 226, 99,
|
788
|
+
69, 0, 0, 0, 0, 0, 49, 0,
|
789
|
+
0, 0, 0, 13, 0, 15, 0, 17,
|
790
|
+
0, 0, 3, 3, 0, 0, 0, 0,
|
791
|
+
0, 0, 0, 3, 3, 0, 0, 0,
|
792
|
+
71, 0, 0, 0, 0, 19, 51, 19,
|
793
|
+
0, 0, 0, 0, 0, 1, 0, 0,
|
794
|
+
0, 0, 55, 0, 114, 0, 53, 0,
|
795
|
+
19, 3, 3, 81, 5, 0, 5, 93,
|
796
|
+
5, 0, 90, 5, 5, 0, 96, 0,
|
797
|
+
0, 0, 1, 25, 25, 0, 0, 0,
|
798
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
799
|
+
0, 0, 0, 61, 59, 0, 0, 0,
|
800
|
+
174, 23, 0, 0, 11, 0, 108, 11,
|
801
|
+
166, 57, 0, 0, 0, 0, 0, 0,
|
802
|
+
0, 0, 0, 0, 0, 0, 3, 3,
|
803
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
804
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
805
|
+
0, 0, 0, 0, 0, 3, 3, 0,
|
806
|
+
0, 35, 0, 33, 123, 31, 37, 135,
|
807
|
+
41, 0, 39, 126, 31, 43, 138, 47,
|
808
|
+
141, 45, 129, 0
|
809
|
+
};
|
810
|
+
|
811
|
+
static final short[] _hpricot_scan_to_state_actions = {
|
812
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
813
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
814
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
815
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
816
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
817
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
818
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
819
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
820
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
821
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
822
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
823
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
824
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
825
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
826
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
827
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
828
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
829
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
830
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
831
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
832
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
833
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
834
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
835
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
836
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
837
|
+
0, 0, 0, 0, 27, 0, 0, 0,
|
838
|
+
0, 0, 0, 0, 0, 0, 27, 0,
|
839
|
+
27, 0, 27, 0
|
840
|
+
};
|
841
|
+
|
842
|
+
static final short[] _hpricot_scan_from_state_actions = {
|
843
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
844
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
845
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
846
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
847
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
848
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
849
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
850
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
851
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
852
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
853
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
854
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
855
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
856
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
857
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
858
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
859
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
860
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
861
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
862
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
863
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
864
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
865
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
866
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
867
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
868
|
+
0, 0, 0, 0, 29, 0, 0, 0,
|
869
|
+
0, 0, 0, 0, 0, 0, 29, 0,
|
870
|
+
29, 0, 29, 0
|
871
|
+
};
|
872
|
+
|
873
|
+
static final int hpricot_scan_start = 204;
|
874
|
+
|
875
|
+
static final int hpricot_scan_error = -1;
|
876
|
+
|
877
|
+
|
878
|
+
public final static int BUFSIZE=16384;
|
879
|
+
|
880
|
+
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
881
|
+
IRubyObject ary;
|
882
|
+
if (sym == runtime.newSymbol("text")) {
|
883
|
+
raw = tag;
|
884
|
+
}
|
885
|
+
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
|
886
|
+
if (taint) {
|
887
|
+
ary.setTaint(true);
|
888
|
+
tag.setTaint(true);
|
889
|
+
attr.setTaint(true);
|
890
|
+
raw.setTaint(true);
|
891
|
+
}
|
892
|
+
block.yield(runtime.getCurrentContext(), ary, null, null, false);
|
893
|
+
}
|
894
|
+
|
895
|
+
|
896
|
+
int cs, act, have = 0, nread = 0, curline = 1, p=-1;
|
897
|
+
boolean text = false;
|
898
|
+
int tokstart=-1, tokend;
|
899
|
+
char[] buf;
|
900
|
+
Ruby runtime;
|
901
|
+
IRubyObject attr, bufsize;
|
902
|
+
IRubyObject[] tag, akey, aval;
|
903
|
+
int mark_tag, mark_akey, mark_aval;
|
904
|
+
boolean done = false, ele_open = false;
|
905
|
+
int buffer_size = 0;
|
906
|
+
boolean taint = false;
|
907
|
+
Block block = null;
|
908
|
+
|
909
|
+
|
910
|
+
IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
|
911
|
+
cdata, sym_text;
|
912
|
+
|
913
|
+
IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
|
914
|
+
attr = bufsize = runtime.getNil();
|
915
|
+
tag = new IRubyObject[]{runtime.getNil()};
|
916
|
+
akey = new IRubyObject[]{runtime.getNil()};
|
917
|
+
aval = new IRubyObject[]{runtime.getNil()};
|
918
|
+
|
919
|
+
RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
|
920
|
+
|
921
|
+
taint = port.isTaint();
|
922
|
+
if ( !port.respondsTo("read")) {
|
923
|
+
if ( port.respondsTo("to_str")) {
|
924
|
+
port = port.callMethod(runtime.getCurrentContext(),"to_str");
|
925
|
+
} else {
|
926
|
+
throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
|
927
|
+
}
|
928
|
+
}
|
929
|
+
|
930
|
+
buffer_size = BUFSIZE;
|
931
|
+
if (recv.getInstanceVariable("@buffer_size") != null) {
|
932
|
+
bufsize = recv.getInstanceVariable("@buffer_size");
|
933
|
+
if (!bufsize.isNil()) {
|
934
|
+
buffer_size = RubyNumeric.fix2int(bufsize);
|
935
|
+
}
|
936
|
+
}
|
937
|
+
buf = new char[buffer_size];
|
938
|
+
|
939
|
+
|
940
|
+
{
|
941
|
+
cs = hpricot_scan_start;
|
942
|
+
tokstart = -1;
|
943
|
+
tokend = -1;
|
944
|
+
act = 0;
|
945
|
+
}
|
946
|
+
|
947
|
+
while( !done ) {
|
948
|
+
IRubyObject str;
|
949
|
+
p = have;
|
950
|
+
int pe;
|
951
|
+
int len, space = buffer_size - have;
|
952
|
+
|
953
|
+
if ( space == 0 ) {
|
954
|
+
/* We've used up the entire buffer storing an already-parsed token
|
955
|
+
* prefix that must be preserved. Likely caused by super-long attributes.
|
956
|
+
* See ticket #13. */
|
957
|
+
rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+".");
|
958
|
+
}
|
959
|
+
|
960
|
+
if (port.respondsTo("read")) {
|
961
|
+
str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
|
962
|
+
} else {
|
963
|
+
str = ((RubyString)port).substr(nread,space);
|
964
|
+
}
|
965
|
+
|
966
|
+
str = str.convertToString();
|
967
|
+
String sss = str.toString();
|
968
|
+
char[] chars = sss.toCharArray();
|
969
|
+
System.arraycopy(chars,0,buf,p,chars.length);
|
970
|
+
|
971
|
+
len = sss.length();
|
972
|
+
nread += len;
|
973
|
+
|
974
|
+
if ( len < space ) {
|
975
|
+
len++;
|
976
|
+
done = true;
|
977
|
+
}
|
978
|
+
|
979
|
+
pe = p + len;
|
980
|
+
char[] data = buf;
|
981
|
+
|
982
|
+
|
983
|
+
{
|
984
|
+
int _klen;
|
985
|
+
int _trans;
|
986
|
+
int _acts;
|
987
|
+
int _nacts;
|
988
|
+
int _keys;
|
989
|
+
|
990
|
+
if ( p != pe ) {
|
991
|
+
_resume: while ( true ) {
|
992
|
+
_again: do {
|
993
|
+
_acts = _hpricot_scan_from_state_actions[cs];
|
994
|
+
_nacts = (int) _hpricot_scan_actions[_acts++];
|
995
|
+
while ( _nacts-- > 0 ) {
|
996
|
+
switch ( _hpricot_scan_actions[_acts++] ) {
|
997
|
+
case 21:
|
998
|
+
{tokstart = p;}
|
999
|
+
break;
|
1000
|
+
}
|
1001
|
+
}
|
1002
|
+
|
1003
|
+
_match: do {
|
1004
|
+
_keys = _hpricot_scan_key_offsets[cs];
|
1005
|
+
_trans = _hpricot_scan_index_offsets[cs];
|
1006
|
+
_klen = _hpricot_scan_single_lengths[cs];
|
1007
|
+
if ( _klen > 0 ) {
|
1008
|
+
int _lower = _keys;
|
1009
|
+
int _mid;
|
1010
|
+
int _upper = _keys + _klen - 1;
|
1011
|
+
while (true) {
|
1012
|
+
if ( _upper < _lower )
|
1013
|
+
break;
|
1014
|
+
|
1015
|
+
_mid = _lower + ((_upper-_lower) >> 1);
|
1016
|
+
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
|
1017
|
+
_upper = _mid - 1;
|
1018
|
+
else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
|
1019
|
+
_lower = _mid + 1;
|
1020
|
+
else {
|
1021
|
+
_trans += (_mid - _keys);
|
1022
|
+
break _match;
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
_keys += _klen;
|
1026
|
+
_trans += _klen;
|
1027
|
+
}
|
1028
|
+
|
1029
|
+
_klen = _hpricot_scan_range_lengths[cs];
|
1030
|
+
if ( _klen > 0 ) {
|
1031
|
+
int _lower = _keys;
|
1032
|
+
int _mid;
|
1033
|
+
int _upper = _keys + (_klen<<1) - 2;
|
1034
|
+
while (true) {
|
1035
|
+
if ( _upper < _lower )
|
1036
|
+
break;
|
1037
|
+
|
1038
|
+
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
1039
|
+
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
|
1040
|
+
_upper = _mid - 2;
|
1041
|
+
else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
|
1042
|
+
_lower = _mid + 2;
|
1043
|
+
else {
|
1044
|
+
_trans += ((_mid - _keys)>>1);
|
1045
|
+
break _match;
|
1046
|
+
}
|
1047
|
+
}
|
1048
|
+
_trans += _klen;
|
1049
|
+
}
|
1050
|
+
} while (false);
|
1051
|
+
|
1052
|
+
_trans = _hpricot_scan_indicies[_trans];
|
1053
|
+
cs = _hpricot_scan_trans_targs_wi[_trans];
|
1054
|
+
|
1055
|
+
if ( _hpricot_scan_trans_actions_wi[_trans] == 0 )
|
1056
|
+
break _again;
|
1057
|
+
|
1058
|
+
_acts = _hpricot_scan_trans_actions_wi[_trans];
|
1059
|
+
_nacts = (int) _hpricot_scan_actions[_acts++];
|
1060
|
+
while ( _nacts-- > 0 )
|
1061
|
+
{
|
1062
|
+
switch ( _hpricot_scan_actions[_acts++] )
|
1063
|
+
{
|
1064
|
+
case 0:
|
1065
|
+
{
|
1066
|
+
if (text) {
|
1067
|
+
CAT(tag, p);
|
1068
|
+
ELE(sym_text);
|
1069
|
+
text = false;
|
1070
|
+
}
|
1071
|
+
attr = runtime.getNil();
|
1072
|
+
tag[0] = runtime.getNil();
|
1073
|
+
mark_tag = -1;
|
1074
|
+
ele_open = true;
|
1075
|
+
}
|
1076
|
+
break;
|
1077
|
+
case 1:
|
1078
|
+
{ mark_tag = p; }
|
1079
|
+
break;
|
1080
|
+
case 2:
|
1081
|
+
{ mark_aval = p; }
|
1082
|
+
break;
|
1083
|
+
case 3:
|
1084
|
+
{ mark_akey = p; }
|
1085
|
+
break;
|
1086
|
+
case 4:
|
1087
|
+
{ SET(tag, p); }
|
1088
|
+
break;
|
1089
|
+
case 5:
|
1090
|
+
{ SET(aval, p); }
|
1091
|
+
break;
|
1092
|
+
case 6:
|
1093
|
+
{
|
1094
|
+
if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
|
1095
|
+
else { SET(aval, p); }
|
1096
|
+
}
|
1097
|
+
break;
|
1098
|
+
case 7:
|
1099
|
+
{ SET(akey, p); }
|
1100
|
+
break;
|
1101
|
+
case 8:
|
1102
|
+
{ SET(aval, p); ATTR(rb_str_new2("version"), aval); }
|
1103
|
+
break;
|
1104
|
+
case 9:
|
1105
|
+
{ SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
|
1106
|
+
break;
|
1107
|
+
case 10:
|
1108
|
+
{ SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
|
1109
|
+
break;
|
1110
|
+
case 11:
|
1111
|
+
{ SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
|
1112
|
+
break;
|
1113
|
+
case 12:
|
1114
|
+
{ SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
|
1115
|
+
break;
|
1116
|
+
case 13:
|
1117
|
+
{
|
1118
|
+
akey[0] = runtime.getNil();
|
1119
|
+
aval[0] = runtime.getNil();
|
1120
|
+
mark_akey = -1;
|
1121
|
+
mark_aval = -1;
|
1122
|
+
}
|
1123
|
+
break;
|
1124
|
+
case 14:
|
1125
|
+
{
|
1126
|
+
ATTR(akey, aval);
|
1127
|
+
}
|
1128
|
+
break;
|
1129
|
+
case 15:
|
1130
|
+
{curline += 1;}
|
1131
|
+
break;
|
1132
|
+
case 16:
|
1133
|
+
{ TEXT_PASS(); }
|
1134
|
+
break;
|
1135
|
+
case 17:
|
1136
|
+
{ EBLK(comment, 3); {cs = 204; if (true) break _again;} }
|
1137
|
+
break;
|
1138
|
+
case 18:
|
1139
|
+
{ EBLK(cdata, 3); {cs = 204; if (true) break _again;} }
|
1140
|
+
break;
|
1141
|
+
case 19:
|
1142
|
+
{ EBLK(procins, 2); {cs = 204; if (true) break _again;} }
|
1143
|
+
break;
|
1144
|
+
case 22:
|
1145
|
+
{tokend = p+1;}
|
1146
|
+
break;
|
1147
|
+
case 23:
|
1148
|
+
{tokend = p+1;{p = ((tokend))-1;}}
|
1149
|
+
break;
|
1150
|
+
case 24:
|
1151
|
+
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1152
|
+
break;
|
1153
|
+
case 25:
|
1154
|
+
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1155
|
+
break;
|
1156
|
+
case 26:
|
1157
|
+
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1158
|
+
break;
|
1159
|
+
case 27:
|
1160
|
+
{tokend = p+1;{p = ((tokend))-1;}}
|
1161
|
+
break;
|
1162
|
+
case 28:
|
1163
|
+
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1164
|
+
break;
|
1165
|
+
case 29:
|
1166
|
+
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1167
|
+
break;
|
1168
|
+
case 30:
|
1169
|
+
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1170
|
+
break;
|
1171
|
+
case 31:
|
1172
|
+
{tokend = p+1;{p = ((tokend))-1;}}
|
1173
|
+
break;
|
1174
|
+
case 32:
|
1175
|
+
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1176
|
+
break;
|
1177
|
+
case 33:
|
1178
|
+
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1179
|
+
break;
|
1180
|
+
case 34:
|
1181
|
+
{act = 8;}
|
1182
|
+
break;
|
1183
|
+
case 35:
|
1184
|
+
{act = 10;}
|
1185
|
+
break;
|
1186
|
+
case 36:
|
1187
|
+
{act = 12;}
|
1188
|
+
break;
|
1189
|
+
case 37:
|
1190
|
+
{act = 15;}
|
1191
|
+
break;
|
1192
|
+
case 38:
|
1193
|
+
{tokend = p+1;{ ELE(xmldecl); }{p = ((tokend))-1;}}
|
1194
|
+
break;
|
1195
|
+
case 39:
|
1196
|
+
{tokend = p+1;{ ELE(doctype); }{p = ((tokend))-1;}}
|
1197
|
+
break;
|
1198
|
+
case 40:
|
1199
|
+
{tokend = p+1;{ ELE(stag); }{p = ((tokend))-1;}}
|
1200
|
+
break;
|
1201
|
+
case 41:
|
1202
|
+
{tokend = p+1;{ ELE(etag); }{p = ((tokend))-1;}}
|
1203
|
+
break;
|
1204
|
+
case 42:
|
1205
|
+
{tokend = p+1;{ ELE(emptytag); }{p = ((tokend))-1;}}
|
1206
|
+
break;
|
1207
|
+
case 43:
|
1208
|
+
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 214; if (true) break _again;}} }{p = ((tokend))-1;}}
|
1209
|
+
break;
|
1210
|
+
case 44:
|
1211
|
+
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 216; if (true) break _again;}} }{p = ((tokend))-1;}}
|
1212
|
+
break;
|
1213
|
+
case 45:
|
1214
|
+
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1215
|
+
break;
|
1216
|
+
case 46:
|
1217
|
+
{tokend = p;{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
|
1218
|
+
break;
|
1219
|
+
case 47:
|
1220
|
+
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1221
|
+
break;
|
1222
|
+
case 48:
|
1223
|
+
{{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
|
1224
|
+
break;
|
1225
|
+
case 49:
|
1226
|
+
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
1227
|
+
break;
|
1228
|
+
case 50:
|
1229
|
+
{ switch( act ) {
|
1230
|
+
case 8:
|
1231
|
+
{ ELE(doctype); }
|
1232
|
+
break;
|
1233
|
+
case 10:
|
1234
|
+
{ ELE(stag); }
|
1235
|
+
break;
|
1236
|
+
case 12:
|
1237
|
+
{ ELE(emptytag); }
|
1238
|
+
break;
|
1239
|
+
case 15:
|
1240
|
+
{ TEXT_PASS(); }
|
1241
|
+
break;
|
1242
|
+
default: break;
|
1243
|
+
}
|
1244
|
+
{p = ((tokend))-1;}}
|
1245
|
+
break;
|
1246
|
+
}
|
1247
|
+
}
|
1248
|
+
|
1249
|
+
} while (false);
|
1250
|
+
_acts = _hpricot_scan_to_state_actions[cs];
|
1251
|
+
_nacts = (int) _hpricot_scan_actions[_acts++];
|
1252
|
+
while ( _nacts-- > 0 ) {
|
1253
|
+
switch ( _hpricot_scan_actions[_acts++] ) {
|
1254
|
+
case 20:
|
1255
|
+
{tokstart = -1;}
|
1256
|
+
break;
|
1257
|
+
}
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
if ( ++p == pe )
|
1261
|
+
break _resume;
|
1262
|
+
}
|
1263
|
+
}
|
1264
|
+
}
|
1265
|
+
|
1266
|
+
if ( cs == hpricot_scan_error ) {
|
1267
|
+
if(!tag[0].isNil()) {
|
1268
|
+
rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
|
1269
|
+
} else {
|
1270
|
+
rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
|
1271
|
+
}
|
1272
|
+
}
|
1273
|
+
|
1274
|
+
if ( done && ele_open ) {
|
1275
|
+
ele_open = false;
|
1276
|
+
if(tokstart > -1) {
|
1277
|
+
mark_tag = tokstart;
|
1278
|
+
tokstart = -1;
|
1279
|
+
text = true;
|
1280
|
+
}
|
1281
|
+
}
|
1282
|
+
|
1283
|
+
if(tokstart == -1) {
|
1284
|
+
have = 0;
|
1285
|
+
/* text nodes have no tokstart because each byte is parsed alone */
|
1286
|
+
if(mark_tag != -1 && text) {
|
1287
|
+
if (done) {
|
1288
|
+
if(mark_tag < p-1) {
|
1289
|
+
CAT(tag, p-1);
|
1290
|
+
ELE(sym_text);
|
1291
|
+
}
|
1292
|
+
} else {
|
1293
|
+
CAT(tag, p);
|
1294
|
+
}
|
1295
|
+
}
|
1296
|
+
mark_tag = 0;
|
1297
|
+
} else {
|
1298
|
+
have = pe - tokstart;
|
1299
|
+
System.arraycopy(buf,tokstart,buf,0,have);
|
1300
|
+
SLIDE(tag);
|
1301
|
+
SLIDE(akey);
|
1302
|
+
SLIDE(aval);
|
1303
|
+
tokend = (tokend - tokstart);
|
1304
|
+
tokstart = 0;
|
1305
|
+
}
|
1306
|
+
}
|
1307
|
+
return runtime.getNil();
|
1308
|
+
}
|
1309
|
+
|
1310
|
+
public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
|
1311
|
+
Ruby runtime = recv.getRuntime();
|
1312
|
+
HpricotScanService service = new HpricotScanService();
|
1313
|
+
service.runtime = runtime;
|
1314
|
+
service.xmldecl = runtime.newSymbol("xmldecl");
|
1315
|
+
service.doctype = runtime.newSymbol("doctype");
|
1316
|
+
service.procins = runtime.newSymbol("procins");
|
1317
|
+
service.stag = runtime.newSymbol("stag");
|
1318
|
+
service.etag = runtime.newSymbol("etag");
|
1319
|
+
service.emptytag = runtime.newSymbol("emptytag");
|
1320
|
+
service.comment = runtime.newSymbol("comment");
|
1321
|
+
service.cdata = runtime.newSymbol("cdata");
|
1322
|
+
service.sym_text = runtime.newSymbol("text");
|
1323
|
+
service.block = block;
|
1324
|
+
return service.hpricot_scan(recv, port);
|
1325
|
+
}
|
1326
|
+
|
1327
|
+
|
1328
|
+
public boolean basicLoad(final Ruby runtime) throws IOException {
|
1329
|
+
Init_hpricot_scan(runtime);
|
1330
|
+
return true;
|
1331
|
+
}
|
1332
|
+
|
1333
|
+
public static void Init_hpricot_scan(Ruby runtime) {
|
1334
|
+
RubyModule mHpricot = runtime.defineModule("Hpricot");
|
1335
|
+
mHpricot.getMetaClass().attr_accessor(new IRubyObject[]{runtime.newSymbol("buffer_size")});
|
1336
|
+
CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
|
1337
|
+
mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
|
1338
|
+
mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
|
1339
|
+
}
|
1340
|
+
}
|