jerryvos-hpricot 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/CHANGELOG +75 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +260 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +201 -0
  8. data/ext/hpricot_scan/HpricotScanService.java +1305 -0
  9. data/ext/hpricot_scan/extconf.rb +6 -0
  10. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  11. data/ext/hpricot_scan/hpricot_css.c +3502 -0
  12. data/ext/hpricot_scan/hpricot_scan.c +6768 -0
  13. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  14. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  15. data/ext/hpricot_scan/hpricot_scan.rl +786 -0
  16. data/extras/mingw-rbconfig.rb +176 -0
  17. data/lib/hpricot.rb +26 -0
  18. data/lib/hpricot/blankslate.rb +63 -0
  19. data/lib/hpricot/builder.rb +216 -0
  20. data/lib/hpricot/elements.rb +510 -0
  21. data/lib/hpricot/htmlinfo.rb +691 -0
  22. data/lib/hpricot/inspect.rb +103 -0
  23. data/lib/hpricot/modules.rb +40 -0
  24. data/lib/hpricot/parse.rb +38 -0
  25. data/lib/hpricot/tag.rb +200 -0
  26. data/lib/hpricot/tags.rb +164 -0
  27. data/lib/hpricot/traverse.rb +838 -0
  28. data/lib/hpricot/xchar.rb +94 -0
  29. data/test/files/basic.xhtml +17 -0
  30. data/test/files/boingboing.html +2266 -0
  31. data/test/files/cy0.html +3653 -0
  32. data/test/files/immob.html +400 -0
  33. data/test/files/pace_application.html +1320 -0
  34. data/test/files/tenderlove.html +16 -0
  35. data/test/files/uswebgen.html +220 -0
  36. data/test/files/utf8.html +1054 -0
  37. data/test/files/week9.html +1723 -0
  38. data/test/files/why.xml +19 -0
  39. data/test/load_files.rb +7 -0
  40. data/test/test_alter.rb +77 -0
  41. data/test/test_builder.rb +37 -0
  42. data/test/test_parser.rb +420 -0
  43. data/test/test_paths.rb +25 -0
  44. data/test/test_preserved.rb +70 -0
  45. data/test/test_xml.rb +28 -0
  46. metadata +107 -0
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ have_header('stdio.h') or exit
3
+ dir_config('fast_xs')
4
+ create_makefile('fast_xs')
@@ -0,0 +1,201 @@
1
+ #define VERSION "0.1"
2
+
3
+ #include <ruby.h>
4
+ #include <assert.h>
5
+ /* #include <stdio.h> */
6
+
7
+ #ifndef RARRAY_LEN
8
+ #define RARRAY_LEN(arr) RARRAY(arr)->len
9
+ #define RARRAY_PTR(arr) RARRAY(arr)->ptr
10
+ #define RSTRING_LEN(str) RSTRING(str)->len
11
+ #define RSTRING_PTR(str) RSTRING(str)->ptr
12
+ #endif
13
+
14
+ static ID unpack_id;
15
+ static VALUE U_fmt, C_fmt;
16
+
17
+ /* give GCC hints for better branch prediction
18
+ * (we layout branches so that ASCII characters are handled faster) */
19
+ #if defined(__GNUC__) && (__GNUC__ >= 3)
20
+ # define likely(x) __builtin_expect (!!(x), 1)
21
+ # define unlikely(x) __builtin_expect (!!(x), 0)
22
+ #else
23
+ # define unlikely(x) (x)
24
+ # define likely(x) (x)
25
+ #endif
26
+
27
+ /* pass-through certain characters for CP-1252 */
28
+ #define p(x) (x-128)
29
+
30
+ static const int cp_1252[] = {
31
+ 8364, /* 128 => 8364, euro sign */
32
+ p(129), /* 129 => 129, pass-through */
33
+ 8218, /* 130 => 8218, single low-9 quotation mark */
34
+ 402, /* 131 => 402, latin small letter f with hook */
35
+ 8222, /* 132 => 8222, double low-9 quotation mark */
36
+ 8230, /* 133 => 8230, horizontal ellipsis */
37
+ 8224, /* 134 => 8224, dagger */
38
+ 8225, /* 135 => 8225, double dagger */
39
+ 710, /* 136 => 710, modifier letter circumflex accent */
40
+ 8240, /* 137 => 8240, per mille sign */
41
+ 352, /* 138 => 352, latin capital letter s with caron */
42
+ 8249, /* 139 => 8249, single left-pointing angle quotation mark */
43
+ 338, /* 140 => 338, latin capital ligature oe */
44
+ p(141), /* 141 => 141, pass-through */
45
+ 381, /* 142 => 381, latin capital letter z with caron */
46
+ p(143), /* 143 => 143, pass-through */
47
+ p(144), /* 144 => 144, pass-through */
48
+ 8216, /* 145 => 8216, left single quotation mark */
49
+ 8217, /* 146 => 8217, right single quotation mark */
50
+ 8220, /* 147 => 8220, left double quotation mark */
51
+ 8221, /* 148 => 8221, right double quotation mark */
52
+ 8226, /* 149 => 8226, bullet */
53
+ 8211, /* 150 => 8211, en dash */
54
+ 8212, /* 151 => 8212, em dash */
55
+ 732, /* 152 => 732, small tilde */
56
+ 8482, /* 153 => 8482, trade mark sign */
57
+ 353, /* 154 => 353, latin small letter s with caron */
58
+ 8250, /* 155 => 8250, single right-pointing angle quotation mark */
59
+ 339, /* 156 => 339, latin small ligature oe */
60
+ p(157), /* 157 => 157, pass-through */
61
+ 382, /* 158 => 382, latin small letter z with caron */
62
+ 376 /* 159 => 376} latin capital letter y with diaeresis */
63
+ };
64
+
65
+ #define VALID_VALUE(n) \
66
+ (n >= 0x20 && n <= 0xD7FF) || \
67
+ (n >= 0xE000 && n <= 0xFFFD) || \
68
+ (n >= 0x10000 && n <= 0x10FFFF)
69
+
70
+ #define CP_1252_ESCAPE(n) do { \
71
+ if (n >= 128 && n <= 159) \
72
+ n = cp_1252[n - 128]; \
73
+ } while(0)
74
+
75
+ #define return_const_len(x) do { \
76
+ memcpy(buf, x, sizeof(x) - 1); \
77
+ return (sizeof(x) - 1); \
78
+ } while (0)
79
+
80
+ static inline size_t bytes_for(int n)
81
+ {
82
+ if (n < 1000)
83
+ return sizeof("&#999;") - 1;
84
+ if (n < 10000)
85
+ return sizeof("&#9999;") - 1;
86
+ if (n < 100000)
87
+ return sizeof("&#99999;") - 1;
88
+ if (n < 1000000)
89
+ return sizeof("&#999999;") - 1;
90
+ /* if (n < 10000000), we won't have cases above 0x10FFFF */
91
+ return sizeof("&#9999999;") - 1;
92
+ }
93
+
94
+ static long escape(char *buf, int n)
95
+ {
96
+ /* handle ASCII first */
97
+ if (likely(n < 128)) {
98
+ if (likely(n >= 0x20 || n == 0x9 || n == 0xA || n == 0xD)) {
99
+ if (unlikely(n == 34))
100
+ return_const_len("&quot;");
101
+ if (unlikely(n == 38))
102
+ return_const_len("&amp;");
103
+ if (unlikely(n == 60))
104
+ return_const_len("&lt;");
105
+ if (unlikely(n == 62))
106
+ return_const_len("&gt;");
107
+ buf[0] = (char)n;
108
+ return 1;
109
+ }
110
+
111
+ buf[0] = '*';
112
+ return 1;
113
+ }
114
+
115
+ CP_1252_ESCAPE(n);
116
+
117
+ if (VALID_VALUE(n)) {
118
+ /* return snprintf(buf, sizeof("&#1114111;"), "&#%i;", n); */
119
+ RUBY_EXTERN const char ruby_digitmap[];
120
+ int rv = 3; /* &#; */
121
+ buf += bytes_for(n);
122
+ *--buf = ';';
123
+ do {
124
+ *--buf = ruby_digitmap[(int)(n % 10)];
125
+ ++rv;
126
+ } while (n /= 10);
127
+ *--buf = '#';
128
+ *--buf = '&';
129
+ return rv;
130
+ }
131
+ buf[0] = '*';
132
+ return 1;
133
+ }
134
+
135
+ #undef return_const_len
136
+
137
+ static long escaped_len(int n)
138
+ {
139
+ if (likely(n < 128)) {
140
+ if (unlikely(n == 34))
141
+ return (sizeof("&quot;") - 1);
142
+ if (unlikely(n == 38))
143
+ return (sizeof("&amp;") - 1);
144
+ if (unlikely(n == 60 || n == 62))
145
+ return (sizeof("&gt;") - 1);
146
+ return 1;
147
+ }
148
+
149
+ CP_1252_ESCAPE(n);
150
+
151
+ if (VALID_VALUE(n))
152
+ return bytes_for(n);
153
+ return 1;
154
+ }
155
+
156
+ static VALUE unpack_utf8(VALUE self)
157
+ {
158
+ return rb_funcall(self, unpack_id, 1, U_fmt);
159
+ }
160
+
161
+ static VALUE unpack_uchar(VALUE self)
162
+ {
163
+ return rb_funcall(self, unpack_id, 1, C_fmt);
164
+ }
165
+
166
+ VALUE fast_xs(VALUE self)
167
+ {
168
+ long i;
169
+ struct RArray *array;
170
+ char *s, *c;
171
+ long s_len = 0;
172
+ VALUE *tmp;
173
+
174
+ array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
175
+
176
+ tmp = RARRAY_PTR(array);
177
+ for (i = RARRAY_LEN(array); --i >= 0; tmp++)
178
+ s_len += escaped_len(NUM2INT(*tmp));
179
+
180
+ c = s = alloca(s_len + 1);
181
+
182
+ tmp = RARRAY_PTR(array);
183
+ for (i = RARRAY_LEN(array); --i >= 0; tmp++)
184
+ c += escape(c, NUM2INT(*tmp));
185
+
186
+ *c = '\0';
187
+ return rb_str_new(s, s_len);
188
+ }
189
+
190
+ void Init_fast_xs(void)
191
+ {
192
+ assert(cp_1252[159 - 128] == 376); /* just in case I skipped a line */
193
+
194
+ unpack_id = rb_intern("unpack");
195
+ U_fmt = rb_str_new("U*", 2);
196
+ C_fmt = rb_str_new("C*", 2);
197
+ rb_global_variable(&U_fmt);
198
+ rb_global_variable(&C_fmt);
199
+
200
+ rb_define_method(rb_cString, "fast_xs", fast_xs, 0);
201
+ }
@@ -0,0 +1,1305 @@
1
+ // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
2
+
3
+ import java.io.IOException;
4
+
5
+ import org.jruby.Ruby;
6
+ import org.jruby.RubyClass;
7
+ import org.jruby.RubyHash;
8
+ import org.jruby.RubyModule;
9
+ import org.jruby.RubyNumeric;
10
+ import org.jruby.RubyObjectAdapter;
11
+ import org.jruby.RubyString;
12
+ import org.jruby.javasupport.JavaEmbedUtils;
13
+ import org.jruby.runtime.Block;
14
+ import org.jruby.runtime.CallbackFactory;
15
+ import org.jruby.runtime.builtin.IRubyObject;
16
+ import org.jruby.exceptions.RaiseException;
17
+ import org.jruby.runtime.load.BasicLibraryService;
18
+
19
+ public class HpricotScanService implements BasicLibraryService {
20
+ public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
21
+ private static RubyObjectAdapter rubyApi;
22
+
23
+ public void ELE(IRubyObject N) {
24
+ if (te > ts || text) {
25
+ IRubyObject raw_string = runtime.getNil();
26
+ ele_open = false; text = false;
27
+ if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
28
+ raw_string = runtime.newString(new String(buf,ts,te-ts));
29
+ }
30
+ rb_yield_tokens(N, tag[0], attr, raw_string, taint);
31
+ }
32
+ }
33
+
34
+ public void SET(IRubyObject[] N, int E) {
35
+ int mark = 0;
36
+ if(N == tag) {
37
+ if(mark_tag == -1 || E == mark_tag) {
38
+ tag[0] = runtime.newString("");
39
+ } else if(E > mark_tag) {
40
+ tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
41
+ }
42
+ } else if(N == akey) {
43
+ if(mark_akey == -1 || E == mark_akey) {
44
+ akey[0] = runtime.newString("");
45
+ } else if(E > mark_akey) {
46
+ akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
47
+ }
48
+ } else if(N == aval) {
49
+ if(mark_aval == -1 || E == mark_aval) {
50
+ aval[0] = runtime.newString("");
51
+ } else if(E > mark_aval) {
52
+ aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
53
+ }
54
+ }
55
+ }
56
+
57
+ public void CAT(IRubyObject[] N, int E) {
58
+ if(N[0].isNil()) {
59
+ SET(N,E);
60
+ } else {
61
+ int mark = 0;
62
+ if(N == tag) {
63
+ mark = mark_tag;
64
+ } else if(N == akey) {
65
+ mark = mark_akey;
66
+ } else if(N == aval) {
67
+ mark = mark_aval;
68
+ }
69
+ ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
70
+ }
71
+ }
72
+
73
+ public void SLIDE(Object N) {
74
+ int mark = 0;
75
+ if(N == tag) {
76
+ mark = mark_tag;
77
+ } else if(N == akey) {
78
+ mark = mark_akey;
79
+ } else if(N == aval) {
80
+ mark = mark_aval;
81
+ }
82
+ if(mark > ts) {
83
+ if(N == tag) {
84
+ mark_tag -= ts;
85
+ } else if(N == akey) {
86
+ mark_akey -= ts;
87
+ } else if(N == aval) {
88
+ mark_aval -= ts;
89
+ }
90
+ }
91
+ }
92
+
93
+ public void ATTR(IRubyObject K, IRubyObject V) {
94
+ if(!K.isNil()) {
95
+ if(attr.isNil()) {
96
+ attr = RubyHash.newHash(runtime);
97
+ }
98
+ ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V);
99
+ // ((RubyHash)attr).aset(K,V);
100
+ }
101
+ }
102
+
103
+ public void ATTR(IRubyObject[] K, IRubyObject V) {
104
+ ATTR(K[0],V);
105
+ }
106
+
107
+ public void ATTR(IRubyObject K, IRubyObject[] V) {
108
+ ATTR(K,V[0]);
109
+ }
110
+
111
+ public void ATTR(IRubyObject[] K, IRubyObject[] V) {
112
+ ATTR(K[0],V[0]);
113
+ }
114
+
115
+ public void TEXT_PASS() {
116
+ if(!text) {
117
+ if(ele_open) {
118
+ ele_open = false;
119
+ if(ts > -1) {
120
+ mark_tag = ts;
121
+ }
122
+ } else {
123
+ mark_tag = p;
124
+ }
125
+ attr = runtime.getNil();
126
+ tag[0] = runtime.getNil();
127
+ text = true;
128
+ }
129
+ }
130
+
131
+ public void EBLK(IRubyObject N, int T) {
132
+ CAT(tag, p - T + 1);
133
+ ELE(N);
134
+ }
135
+
136
+
137
+ public void rb_raise(RubyClass error, String message) {
138
+ throw new RaiseException(runtime, error, message, true);
139
+ }
140
+
141
+ public IRubyObject rb_str_new2(String s) {
142
+ return runtime.newString(s);
143
+ }
144
+
145
+ // line 189 "ext/hpricot_scan/hpricot_scan.java.rl"
146
+
147
+
148
+
149
+ // line 150 "ext/hpricot_scan/HpricotScanService.java"
150
+ private static byte[] init__hpricot_scan_actions_0()
151
+ {
152
+ return new byte [] {
153
+ 0, 1, 1, 1, 2, 1, 4, 1, 5, 1, 6, 1,
154
+ 7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1,
155
+ 14, 1, 16, 1, 20, 1, 21, 1, 22, 1, 24, 1,
156
+ 25, 1, 26, 1, 28, 1, 29, 1, 30, 1, 32, 1,
157
+ 33, 1, 38, 1, 39, 1, 40, 1, 41, 1, 42, 1,
158
+ 43, 1, 44, 1, 45, 1, 46, 1, 47, 1, 48, 1,
159
+ 49, 1, 50, 1, 51, 2, 2, 5, 2, 2, 6, 2,
160
+ 2, 11, 2, 2, 12, 2, 2, 14, 2, 4, 39, 2,
161
+ 4, 40, 2, 4, 41, 2, 5, 2, 2, 6, 14, 2,
162
+ 7, 6, 2, 7, 14, 2, 11, 12, 2, 13, 3, 2,
163
+ 14, 6, 2, 14, 40, 2, 15, 24, 2, 15, 28, 2,
164
+ 15, 32, 2, 15, 45, 2, 17, 23, 2, 18, 27, 2,
165
+ 19, 31, 2, 22, 34, 2, 22, 36, 3, 2, 6, 14,
166
+ 3, 2, 14, 6, 3, 6, 7, 14, 3, 6, 14, 40,
167
+ 3, 7, 14, 40, 3, 11, 2, 12, 3, 14, 6, 40,
168
+ 3, 14, 13, 3, 3, 22, 0, 37, 3, 22, 2, 34,
169
+ 3, 22, 14, 35, 4, 2, 14, 13, 3, 4, 6, 7,
170
+ 14, 40, 4, 22, 2, 14, 35, 4, 22, 6, 14, 35,
171
+ 4, 22, 7, 14, 35, 4, 22, 14, 6, 35, 5, 22,
172
+ 2, 6, 14, 35, 5, 22, 2, 14, 6, 35, 5, 22,
173
+ 6, 7, 14, 35
174
+ };
175
+ }
176
+
177
+ private static final byte _hpricot_scan_actions[] = init__hpricot_scan_actions_0();
178
+
179
+
180
+ private static short[] init__hpricot_scan_key_offsets_0()
181
+ {
182
+ return new short [] {
183
+ 0, 3, 4, 5, 6, 7, 8, 9, 10, 13, 22, 37,
184
+ 44, 45, 46, 47, 48, 49, 52, 57, 69, 81, 86, 93,
185
+ 94, 95, 100, 101, 105, 106, 107, 121, 135, 152, 169, 186,
186
+ 203, 210, 212, 214, 220, 222, 227, 232, 238, 240, 245, 251,
187
+ 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
188
+ 282, 296, 300, 313, 326, 340, 354, 355, 366, 375, 388, 405,
189
+ 423, 441, 450, 461, 480, 499, 510, 521, 536, 538, 540, 556,
190
+ 572, 575, 587, 599, 619, 639, 658, 677, 697, 717, 728, 739,
191
+ 751, 763, 775, 791, 794, 809, 811, 813, 829, 845, 848, 860,
192
+ 871, 890, 910, 930, 941, 952, 964, 984, 1004, 1016, 1036, 1057,
193
+ 1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162, 1182, 1194, 1206, 1226,
194
+ 1242, 1258, 1270, 1291, 1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
195
+ 1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446, 1447, 1448, 1449, 1450,
196
+ 1451, 1455, 1460, 1469, 1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
197
+ 1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527, 1533, 1534, 1535, 1536,
198
+ 1537, 1538, 1539, 1540, 1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
199
+ 1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572, 1582, 1591, 1601, 1602,
200
+ 1603, 1605, 1614, 1615, 1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
201
+ 1634, 1636, 1637, 1640
202
+ };
203
+ }
204
+
205
+ private static final short _hpricot_scan_key_offsets[] = init__hpricot_scan_key_offsets_0();
206
+
207
+
208
+ private static char[] init__hpricot_scan_trans_keys_0()
209
+ {
210
+ return new char [] {
211
+ 45, 68, 91, 45, 79, 67, 84, 89, 80, 69, 32, 9,
212
+ 13, 32, 58, 95, 9, 13, 65, 90, 97, 122, 32, 62,
213
+ 63, 91, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97,
214
+ 122, 32, 62, 80, 83, 91, 9, 13, 85, 66, 76, 73,
215
+ 67, 32, 9, 13, 32, 34, 39, 9, 13, 9, 34, 61,
216
+ 95, 32, 37, 39, 59, 63, 90, 97, 122, 9, 34, 61,
217
+ 95, 32, 37, 39, 59, 63, 90, 97, 122, 32, 62, 91,
218
+ 9, 13, 32, 34, 39, 62, 91, 9, 13, 34, 34, 32,
219
+ 62, 91, 9, 13, 93, 32, 62, 9, 13, 39, 39, 9,
220
+ 39, 61, 95, 32, 33, 35, 37, 40, 59, 63, 90, 97,
221
+ 122, 9, 39, 61, 95, 32, 33, 35, 37, 40, 59, 63,
222
+ 90, 97, 122, 9, 32, 33, 39, 62, 91, 95, 10, 13,
223
+ 35, 37, 40, 59, 61, 90, 97, 122, 9, 32, 34, 39,
224
+ 62, 91, 95, 10, 13, 33, 37, 40, 59, 61, 90, 97,
225
+ 122, 9, 32, 33, 39, 62, 91, 95, 10, 13, 35, 37,
226
+ 40, 59, 61, 90, 97, 122, 9, 32, 34, 39, 62, 91,
227
+ 95, 10, 13, 33, 37, 40, 59, 61, 90, 97, 122, 32,
228
+ 34, 39, 62, 91, 9, 13, 34, 39, 34, 39, 32, 39,
229
+ 62, 91, 9, 13, 39, 93, 32, 62, 93, 9, 13, 32,
230
+ 39, 62, 9, 13, 32, 34, 62, 91, 9, 13, 34, 93,
231
+ 32, 34, 62, 9, 13, 32, 39, 62, 91, 9, 13, 9,
232
+ 39, 61, 95, 32, 33, 35, 37, 40, 59, 63, 90, 97,
233
+ 122, 89, 83, 84, 69, 77, 67, 68, 65, 84, 65, 91,
234
+ 58, 95, 65, 90, 97, 122, 32, 62, 63, 95, 9, 13,
235
+ 45, 46, 48, 58, 65, 90, 97, 122, 32, 62, 9, 13,
236
+ 32, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90, 97,
237
+ 122, 32, 47, 62, 63, 95, 9, 13, 45, 58, 65, 90,
238
+ 97, 122, 32, 47, 61, 62, 63, 95, 9, 13, 45, 58,
239
+ 65, 90, 97, 122, 32, 47, 61, 62, 63, 95, 9, 13,
240
+ 45, 58, 65, 90, 97, 122, 62, 13, 32, 34, 39, 47,
241
+ 60, 62, 9, 10, 11, 12, 13, 32, 47, 60, 62, 9,
242
+ 10, 11, 12, 32, 47, 62, 63, 95, 9, 13, 45, 58,
243
+ 65, 90, 97, 122, 13, 32, 47, 60, 62, 63, 95, 9,
244
+ 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 47,
245
+ 60, 61, 62, 63, 95, 9, 10, 11, 12, 45, 58, 65,
246
+ 90, 97, 122, 13, 32, 47, 60, 61, 62, 63, 95, 9,
247
+ 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 47,
248
+ 60, 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60,
249
+ 62, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62,
250
+ 63, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122,
251
+ 13, 32, 34, 39, 47, 60, 62, 63, 95, 9, 10, 11,
252
+ 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 47, 60,
253
+ 62, 92, 9, 10, 11, 12, 13, 32, 34, 47, 60, 62,
254
+ 92, 9, 10, 11, 12, 32, 34, 47, 62, 63, 92, 95,
255
+ 9, 13, 45, 58, 65, 90, 97, 122, 34, 92, 34, 92,
256
+ 32, 34, 47, 61, 62, 63, 92, 95, 9, 13, 45, 58,
257
+ 65, 90, 97, 122, 32, 34, 47, 61, 62, 63, 92, 95,
258
+ 9, 13, 45, 58, 65, 90, 97, 122, 34, 62, 92, 13,
259
+ 32, 34, 39, 47, 60, 62, 92, 9, 10, 11, 12, 13,
260
+ 32, 34, 39, 47, 60, 62, 92, 9, 10, 11, 12, 13,
261
+ 32, 34, 39, 47, 60, 62, 63, 92, 95, 9, 10, 11,
262
+ 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47,
263
+ 60, 62, 63, 92, 95, 9, 10, 11, 12, 45, 58, 65,
264
+ 90, 97, 122, 13, 32, 34, 47, 60, 62, 63, 92, 95,
265
+ 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32,
266
+ 34, 47, 60, 62, 63, 92, 95, 9, 10, 11, 12, 45,
267
+ 58, 65, 90, 97, 122, 13, 32, 34, 47, 60, 61, 62,
268
+ 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97,
269
+ 122, 13, 32, 34, 47, 60, 61, 62, 63, 92, 95, 9,
270
+ 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34,
271
+ 47, 60, 62, 92, 9, 10, 11, 12, 13, 32, 34, 47,
272
+ 60, 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47,
273
+ 60, 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47,
274
+ 60, 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47,
275
+ 60, 62, 92, 9, 10, 11, 12, 32, 34, 39, 47, 62,
276
+ 63, 92, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34,
277
+ 39, 92, 32, 39, 47, 62, 63, 92, 95, 9, 13, 45,
278
+ 58, 65, 90, 97, 122, 39, 92, 39, 92, 32, 39, 47,
279
+ 61, 62, 63, 92, 95, 9, 13, 45, 58, 65, 90, 97,
280
+ 122, 32, 39, 47, 61, 62, 63, 92, 95, 9, 13, 45,
281
+ 58, 65, 90, 97, 122, 39, 62, 92, 13, 32, 34, 39,
282
+ 47, 60, 62, 92, 9, 10, 11, 12, 13, 32, 39, 47,
283
+ 60, 62, 92, 9, 10, 11, 12, 13, 32, 39, 47, 60,
284
+ 62, 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90,
285
+ 97, 122, 13, 32, 39, 47, 60, 61, 62, 63, 92, 95,
286
+ 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32,
287
+ 39, 47, 60, 61, 62, 63, 92, 95, 9, 10, 11, 12,
288
+ 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60, 62,
289
+ 92, 9, 10, 11, 12, 13, 32, 39, 47, 60, 62, 92,
290
+ 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 92,
291
+ 9, 10, 11, 12, 13, 32, 34, 39, 47, 60, 62, 63,
292
+ 92, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97, 122,
293
+ 13, 32, 34, 39, 47, 60, 62, 63, 92, 95, 9, 10,
294
+ 11, 12, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39,
295
+ 47, 60, 62, 92, 9, 10, 11, 12, 13, 32, 34, 39,
296
+ 47, 60, 62, 63, 92, 95, 9, 10, 11, 12, 45, 58,
297
+ 65, 90, 97, 122, 13, 32, 34, 39, 47, 60, 61, 62,
298
+ 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90, 97,
299
+ 122, 32, 34, 39, 47, 61, 62, 63, 92, 95, 9, 13,
300
+ 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 61, 62,
301
+ 63, 92, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34,
302
+ 39, 62, 92, 34, 39, 92, 13, 32, 34, 39, 47, 60,
303
+ 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60,
304
+ 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60,
305
+ 62, 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90,
306
+ 97, 122, 13, 32, 34, 39, 47, 60, 62, 63, 92, 95,
307
+ 9, 10, 11, 12, 45, 58, 65, 90, 97, 122, 13, 32,
308
+ 34, 39, 47, 60, 62, 63, 92, 95, 9, 10, 11, 12,
309
+ 45, 58, 65, 90, 97, 122, 13, 32, 34, 39, 47, 60,
310
+ 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60,
311
+ 62, 92, 9, 10, 11, 12, 13, 32, 34, 39, 47, 60,
312
+ 62, 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90,
313
+ 97, 122, 32, 34, 39, 47, 62, 63, 92, 95, 9, 13,
314
+ 45, 58, 65, 90, 97, 122, 32, 34, 39, 47, 62, 63,
315
+ 92, 95, 9, 13, 45, 58, 65, 90, 97, 122, 13, 32,
316
+ 34, 39, 47, 60, 62, 92, 9, 10, 11, 12, 13, 32,
317
+ 34, 39, 47, 60, 61, 62, 63, 92, 95, 9, 10, 11,
318
+ 12, 45, 58, 65, 90, 97, 122, 13, 32, 39, 47, 60,
319
+ 62, 63, 92, 95, 9, 10, 11, 12, 45, 58, 65, 90,
320
+ 97, 122, 34, 39, 92, 32, 39, 47, 62, 63, 92, 95,
321
+ 9, 13, 45, 58, 65, 90, 97, 122, 13, 32, 34, 39,
322
+ 47, 60, 62, 92, 9, 10, 11, 12, 32, 34, 47, 62,
323
+ 63, 92, 95, 9, 13, 45, 58, 65, 90, 97, 122, 34,
324
+ 39, 92, 13, 32, 39, 47, 60, 62, 92, 9, 10, 11,
325
+ 12, 34, 92, 39, 92, 13, 32, 34, 39, 47, 60, 62,
326
+ 9, 10, 11, 12, 58, 95, 120, 65, 90, 97, 122, 32,
327
+ 63, 95, 9, 13, 45, 46, 48, 58, 65, 90, 97, 122,
328
+ 32, 63, 95, 109, 9, 13, 45, 46, 48, 58, 65, 90,
329
+ 97, 122, 32, 63, 95, 108, 9, 13, 45, 46, 48, 58,
330
+ 65, 90, 97, 122, 32, 63, 95, 9, 13, 45, 46, 48,
331
+ 58, 65, 90, 97, 122, 101, 114, 115, 105, 111, 110, 32,
332
+ 61, 9, 13, 32, 34, 39, 9, 13, 95, 45, 46, 48,
333
+ 58, 65, 90, 97, 122, 34, 95, 45, 46, 48, 58, 65,
334
+ 90, 97, 122, 32, 62, 63, 9, 13, 32, 62, 63, 101,
335
+ 115, 9, 13, 62, 110, 99, 111, 100, 105, 110, 103, 32,
336
+ 61, 9, 13, 32, 34, 39, 9, 13, 65, 90, 97, 122,
337
+ 34, 95, 45, 46, 48, 57, 65, 90, 97, 122, 32, 62,
338
+ 63, 9, 13, 32, 62, 63, 115, 9, 13, 116, 97, 110,
339
+ 100, 97, 108, 111, 110, 101, 32, 61, 9, 13, 32, 34,
340
+ 39, 9, 13, 110, 121, 111, 34, 32, 62, 63, 9, 13,
341
+ 101, 115, 110, 121, 111, 39, 101, 115, 65, 90, 97, 122,
342
+ 39, 95, 45, 46, 48, 57, 65, 90, 97, 122, 95, 45,
343
+ 46, 48, 58, 65, 90, 97, 122, 39, 95, 45, 46, 48,
344
+ 58, 65, 90, 97, 122, 62, 62, 10, 60, 33, 47, 58,
345
+ 63, 95, 65, 90, 97, 122, 39, 93, 34, 34, 92, 39,
346
+ 92, 34, 39, 92, 32, 9, 13, 32, 118, 9, 13, 10,
347
+ 45, 45, 10, 93, 93, 10, 62, 63, 62, 0
348
+ };
349
+ }
350
+
351
+ private static final char _hpricot_scan_trans_keys[] = init__hpricot_scan_trans_keys_0();
352
+
353
+
354
+ private static byte[] init__hpricot_scan_single_lengths_0()
355
+ {
356
+ return new byte [] {
357
+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 5, 5,
358
+ 1, 1, 1, 1, 1, 1, 3, 4, 4, 3, 5, 1,
359
+ 1, 3, 1, 2, 1, 1, 4, 4, 7, 7, 7, 7,
360
+ 5, 2, 2, 4, 2, 3, 3, 4, 2, 3, 4, 4,
361
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
362
+ 4, 2, 5, 5, 6, 6, 1, 7, 5, 5, 7, 8,
363
+ 8, 5, 7, 9, 9, 7, 7, 7, 2, 2, 8, 8,
364
+ 3, 8, 8, 10, 10, 9, 9, 10, 10, 7, 7, 8,
365
+ 8, 8, 8, 3, 7, 2, 2, 8, 8, 3, 8, 7,
366
+ 9, 10, 10, 7, 7, 8, 10, 10, 8, 10, 11, 9,
367
+ 9, 4, 3, 8, 8, 10, 10, 10, 8, 8, 10, 8,
368
+ 8, 8, 11, 9, 3, 7, 8, 7, 3, 7, 2, 2,
369
+ 7, 3, 3, 4, 4, 3, 1, 1, 1, 1, 1, 1,
370
+ 2, 3, 1, 2, 3, 5, 1, 1, 1, 1, 1, 1,
371
+ 1, 1, 2, 3, 0, 2, 3, 4, 1, 1, 1, 1,
372
+ 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 3, 1,
373
+ 1, 2, 1, 1, 1, 1, 0, 2, 1, 2, 1, 1,
374
+ 2, 5, 1, 1, 1, 2, 2, 3, 1, 2, 2, 1,
375
+ 2, 1, 3, 1
376
+ };
377
+ }
378
+
379
+ private static final byte _hpricot_scan_single_lengths[] = init__hpricot_scan_single_lengths_0();
380
+
381
+
382
+ private static byte[] init__hpricot_scan_range_lengths_0()
383
+ {
384
+ return new byte [] {
385
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 1,
386
+ 0, 0, 0, 0, 0, 1, 1, 4, 4, 1, 1, 0,
387
+ 0, 1, 0, 1, 0, 0, 5, 5, 5, 5, 5, 5,
388
+ 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 5,
389
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
390
+ 5, 1, 4, 4, 4, 4, 0, 2, 2, 4, 5, 5,
391
+ 5, 2, 2, 5, 5, 2, 2, 4, 0, 0, 4, 4,
392
+ 0, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2, 2,
393
+ 2, 2, 4, 0, 4, 0, 0, 4, 4, 0, 2, 2,
394
+ 5, 5, 5, 2, 2, 2, 5, 5, 2, 5, 5, 4,
395
+ 4, 0, 0, 2, 2, 5, 5, 5, 2, 2, 5, 4,
396
+ 4, 2, 5, 5, 0, 4, 2, 4, 0, 2, 0, 0,
397
+ 2, 2, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
398
+ 1, 1, 4, 4, 1, 1, 0, 0, 0, 0, 0, 0,
399
+ 0, 0, 1, 1, 2, 4, 1, 1, 0, 0, 0, 0,
400
+ 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
401
+ 0, 0, 0, 0, 0, 0, 2, 4, 4, 4, 0, 0,
402
+ 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
403
+ 0, 0, 0, 0
404
+ };
405
+ }
406
+
407
+ private static final byte _hpricot_scan_range_lengths[] = init__hpricot_scan_range_lengths_0();
408
+
409
+
410
+ private static short[] init__hpricot_scan_index_offsets_0()
411
+ {
412
+ return new short [] {
413
+ 0, 4, 6, 8, 10, 12, 14, 16, 18, 21, 28, 39,
414
+ 46, 48, 50, 52, 54, 56, 59, 64, 73, 82, 87, 94,
415
+ 96, 98, 103, 105, 109, 111, 113, 123, 133, 146, 159, 172,
416
+ 185, 192, 195, 198, 204, 207, 212, 217, 223, 226, 231, 237,
417
+ 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269,
418
+ 274, 284, 288, 298, 308, 319, 330, 332, 342, 350, 360, 373,
419
+ 387, 401, 409, 419, 434, 449, 459, 469, 481, 484, 487, 500,
420
+ 513, 517, 528, 539, 555, 571, 586, 601, 617, 633, 643, 653,
421
+ 664, 675, 686, 699, 703, 715, 718, 721, 734, 747, 751, 762,
422
+ 772, 787, 803, 819, 829, 839, 850, 866, 882, 893, 909, 926,
423
+ 940, 954, 959, 963, 974, 985, 1001, 1017, 1033, 1044, 1055, 1071,
424
+ 1084, 1097, 1108, 1125, 1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
425
+ 1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255, 1257, 1259, 1261, 1263,
426
+ 1265, 1269, 1274, 1280, 1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
427
+ 1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339, 1345, 1347, 1349, 1351,
428
+ 1353, 1355, 1357, 1359, 1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
429
+ 1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402, 1409, 1415, 1422, 1424,
430
+ 1426, 1429, 1437, 1439, 1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
431
+ 1465, 1468, 1470, 1474
432
+ };
433
+ }
434
+
435
+ private static final short _hpricot_scan_index_offsets[] = init__hpricot_scan_index_offsets_0();
436
+
437
+
438
+ private static short[] init__hpricot_scan_indicies_0()
439
+ {
440
+ return new short [] {
441
+ 1, 2, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0,
442
+ 8, 0, 9, 0, 10, 0, 11, 11, 0, 11, 12, 12,
443
+ 11, 12, 12, 0, 13, 15, 14, 16, 14, 13, 14, 14,
444
+ 14, 14, 0, 17, 18, 19, 20, 21, 17, 0, 22, 0,
445
+ 23, 0, 24, 0, 25, 0, 26, 0, 27, 27, 0, 27,
446
+ 28, 29, 27, 0, 30, 31, 30, 30, 30, 30, 30, 30,
447
+ 0, 32, 33, 32, 32, 32, 32, 32, 32, 0, 34, 18,
448
+ 21, 34, 0, 34, 35, 36, 18, 21, 34, 0, 38, 37,
449
+ 41, 40, 42, 18, 21, 42, 39, 43, 21, 43, 18, 43,
450
+ 39, 38, 44, 41, 45, 46, 47, 46, 46, 46, 46, 46,
451
+ 46, 46, 0, 48, 49, 48, 48, 48, 48, 48, 48, 48,
452
+ 0, 50, 50, 48, 49, 18, 21, 48, 34, 48, 48, 48,
453
+ 48, 0, 50, 50, 35, 51, 18, 21, 48, 34, 48, 48,
454
+ 48, 48, 0, 52, 52, 54, 55, 56, 57, 54, 53, 54,
455
+ 54, 54, 54, 44, 58, 58, 61, 62, 63, 64, 60, 59,
456
+ 60, 60, 60, 60, 45, 59, 61, 65, 63, 64, 59, 45,
457
+ 67, 68, 66, 70, 71, 69, 72, 41, 63, 64, 72, 45,
458
+ 73, 74, 64, 75, 76, 43, 75, 21, 74, 41, 63, 74,
459
+ 45, 77, 41, 78, 79, 77, 40, 73, 80, 79, 80, 41,
460
+ 78, 80, 40, 81, 38, 56, 57, 81, 44, 60, 82, 60,
461
+ 60, 60, 60, 60, 60, 60, 45, 83, 0, 84, 0, 85,
462
+ 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 91,
463
+ 0, 92, 0, 93, 0, 94, 94, 94, 94, 0, 95, 97,
464
+ 96, 96, 95, 96, 96, 96, 96, 0, 98, 99, 98, 0,
465
+ 100, 102, 103, 101, 101, 100, 101, 101, 101, 0, 104, 106,
466
+ 107, 105, 105, 104, 105, 105, 105, 0, 108, 110, 111, 112,
467
+ 109, 109, 108, 109, 109, 109, 39, 113, 115, 116, 117, 114,
468
+ 114, 113, 114, 114, 114, 39, 118, 39, 120, 120, 122, 123,
469
+ 124, 39, 117, 120, 121, 119, 126, 126, 128, 39, 129, 126,
470
+ 127, 125, 130, 115, 117, 114, 114, 130, 114, 114, 114, 39,
471
+ 126, 126, 132, 39, 133, 131, 131, 126, 127, 131, 131, 131,
472
+ 125, 134, 134, 137, 39, 138, 139, 136, 136, 134, 135, 136,
473
+ 136, 136, 125, 140, 140, 132, 39, 142, 133, 131, 131, 140,
474
+ 141, 131, 131, 131, 125, 126, 126, 128, 39, 129, 126, 127,
475
+ 125, 143, 143, 145, 146, 147, 39, 129, 143, 144, 119, 148,
476
+ 148, 122, 123, 124, 39, 117, 150, 150, 148, 149, 150, 150,
477
+ 150, 119, 143, 143, 145, 146, 151, 39, 133, 150, 150, 143,
478
+ 144, 150, 150, 150, 119, 153, 153, 155, 156, 157, 158, 159,
479
+ 153, 154, 152, 161, 161, 163, 164, 165, 166, 167, 161, 162,
480
+ 160, 168, 169, 171, 172, 170, 173, 170, 168, 170, 170, 170,
481
+ 165, 169, 173, 165, 174, 173, 165, 175, 169, 177, 178, 179,
482
+ 176, 173, 176, 175, 176, 176, 176, 165, 180, 169, 171, 181,
483
+ 172, 170, 173, 170, 180, 170, 170, 170, 165, 169, 182, 173,
484
+ 165, 183, 183, 185, 186, 187, 165, 172, 159, 183, 184, 152,
485
+ 188, 188, 185, 186, 187, 165, 172, 159, 188, 189, 152, 188,
486
+ 188, 185, 186, 187, 165, 172, 190, 159, 190, 188, 189, 190,
487
+ 190, 190, 152, 191, 191, 193, 194, 195, 165, 196, 190, 159,
488
+ 190, 191, 192, 190, 190, 190, 152, 153, 153, 155, 195, 157,
489
+ 197, 190, 159, 190, 153, 154, 190, 190, 190, 152, 161, 161,
490
+ 163, 199, 165, 196, 198, 167, 198, 161, 162, 198, 198, 198,
491
+ 160, 200, 200, 163, 203, 165, 204, 205, 202, 167, 202, 200,
492
+ 201, 202, 202, 202, 160, 206, 206, 163, 199, 165, 208, 196,
493
+ 198, 167, 198, 206, 207, 198, 198, 198, 160, 161, 161, 163,
494
+ 164, 165, 166, 167, 161, 162, 160, 161, 161, 209, 164, 165,
495
+ 166, 167, 161, 162, 160, 191, 191, 193, 194, 156, 165, 166,
496
+ 159, 191, 192, 152, 211, 211, 213, 214, 215, 216, 217, 218,
497
+ 211, 212, 210, 220, 220, 222, 209, 223, 224, 225, 226, 220,
498
+ 221, 219, 227, 228, 174, 230, 231, 229, 232, 229, 227, 229,
499
+ 229, 229, 224, 228, 174, 232, 224, 234, 169, 236, 237, 235,
500
+ 238, 235, 234, 235, 235, 235, 233, 169, 238, 233, 228, 238,
501
+ 233, 239, 169, 241, 242, 243, 240, 238, 240, 239, 240, 240,
502
+ 240, 233, 244, 169, 236, 245, 237, 235, 238, 235, 244, 235,
503
+ 235, 235, 233, 169, 246, 238, 233, 248, 248, 250, 251, 252,
504
+ 233, 237, 253, 248, 249, 247, 255, 255, 163, 257, 233, 258,
505
+ 259, 255, 256, 254, 255, 255, 163, 261, 233, 262, 260, 259,
506
+ 260, 255, 256, 260, 260, 260, 254, 263, 263, 163, 266, 233,
507
+ 267, 268, 265, 259, 265, 263, 264, 265, 265, 265, 254, 269,
508
+ 269, 163, 261, 233, 271, 262, 260, 259, 260, 269, 270, 260,
509
+ 260, 260, 254, 255, 255, 163, 257, 233, 258, 259, 255, 256,
510
+ 254, 255, 255, 222, 257, 233, 258, 259, 255, 256, 254, 272,
511
+ 272, 274, 275, 276, 233, 258, 253, 272, 273, 247, 277, 277,
512
+ 250, 251, 252, 233, 237, 279, 253, 279, 277, 278, 279, 279,
513
+ 279, 247, 272, 272, 274, 275, 280, 233, 262, 279, 253, 279,
514
+ 272, 273, 279, 279, 279, 247, 211, 211, 281, 214, 215, 216,
515
+ 217, 218, 211, 212, 210, 220, 220, 222, 209, 283, 224, 284,
516
+ 282, 226, 282, 220, 221, 282, 282, 282, 219, 285, 285, 222,
517
+ 209, 288, 224, 289, 290, 287, 226, 287, 285, 286, 287, 287,
518
+ 287, 219, 291, 228, 174, 230, 292, 231, 229, 232, 229, 291,
519
+ 229, 229, 229, 224, 293, 228, 174, 295, 296, 297, 294, 232,
520
+ 294, 293, 294, 294, 294, 224, 228, 174, 298, 232, 224, 299,
521
+ 299, 232, 224, 300, 300, 302, 303, 304, 224, 231, 218, 300,
522
+ 301, 210, 305, 305, 302, 303, 304, 224, 231, 218, 305, 306,
523
+ 210, 305, 305, 302, 303, 304, 224, 231, 307, 218, 307, 305,
524
+ 306, 307, 307, 307, 210, 308, 308, 310, 311, 312, 224, 284,
525
+ 307, 218, 307, 308, 309, 307, 307, 307, 210, 211, 211, 281,
526
+ 214, 312, 216, 313, 307, 218, 307, 211, 212, 307, 307, 307,
527
+ 210, 220, 220, 222, 209, 223, 224, 225, 226, 220, 221, 219,
528
+ 220, 220, 314, 314, 223, 224, 225, 226, 220, 221, 219, 211,
529
+ 211, 213, 214, 312, 216, 313, 307, 218, 307, 211, 212, 307,
530
+ 307, 307, 210, 315, 316, 317, 319, 320, 318, 321, 318, 315,
531
+ 318, 318, 318, 216, 315, 322, 317, 319, 320, 318, 321, 318,
532
+ 315, 318, 318, 318, 216, 308, 308, 310, 311, 215, 224, 225,
533
+ 218, 308, 309, 210, 323, 323, 222, 209, 283, 224, 325, 284,
534
+ 282, 226, 282, 323, 324, 282, 282, 282, 219, 326, 326, 155,
535
+ 280, 328, 329, 279, 253, 279, 326, 327, 279, 279, 279, 247,
536
+ 316, 317, 321, 216, 330, 331, 333, 334, 332, 335, 332, 330,
537
+ 332, 332, 332, 328, 277, 277, 250, 251, 252, 233, 237, 253,
538
+ 277, 278, 247, 336, 331, 338, 339, 337, 340, 337, 336, 337,
539
+ 337, 337, 157, 322, 317, 321, 216, 326, 326, 155, 276, 328,
540
+ 341, 253, 326, 327, 247, 331, 340, 157, 331, 335, 328, 148,
541
+ 148, 122, 123, 124, 39, 117, 148, 149, 119, 342, 342, 343,
542
+ 342, 342, 0, 344, 345, 345, 344, 345, 345, 345, 345, 0,
543
+ 344, 345, 345, 346, 344, 345, 345, 345, 345, 0, 344, 345,
544
+ 345, 347, 344, 345, 345, 345, 345, 0, 348, 345, 345, 348,
545
+ 345, 345, 345, 345, 0, 350, 349, 351, 349, 352, 349, 353,
546
+ 349, 354, 349, 355, 349, 355, 356, 355, 349, 356, 357, 358,
547
+ 356, 349, 359, 359, 359, 359, 359, 349, 360, 361, 361, 361,
548
+ 361, 361, 349, 362, 363, 364, 362, 349, 362, 363, 364, 365,
549
+ 366, 362, 349, 363, 349, 367, 349, 368, 349, 369, 349, 370,
550
+ 349, 371, 349, 372, 349, 373, 349, 373, 374, 373, 349, 374,
551
+ 375, 376, 374, 349, 377, 377, 349, 378, 379, 379, 379, 379,
552
+ 379, 349, 380, 363, 364, 380, 349, 380, 363, 364, 366, 380,
553
+ 349, 381, 349, 382, 349, 383, 349, 384, 349, 385, 349, 386,
554
+ 349, 387, 349, 388, 349, 389, 349, 389, 390, 389, 349, 390,
555
+ 391, 392, 390, 349, 393, 394, 349, 395, 349, 396, 349, 397,
556
+ 363, 364, 397, 349, 398, 349, 395, 349, 399, 400, 349, 401,
557
+ 349, 396, 349, 402, 349, 401, 349, 403, 403, 349, 378, 404,
558
+ 404, 404, 404, 404, 349, 405, 405, 405, 405, 405, 349, 360,
559
+ 406, 406, 406, 406, 406, 349, 408, 407, 410, 409, 412, 413,
560
+ 411, 415, 416, 417, 418, 417, 417, 417, 414, 41, 45, 43,
561
+ 21, 41, 40, 169, 173, 165, 169, 238, 233, 228, 174, 232,
562
+ 224, 344, 344, 420, 348, 421, 348, 420, 423, 424, 422, 426,
563
+ 425, 428, 429, 427, 431, 430, 433, 434, 435, 432, 434, 436,
564
+ 0
565
+ };
566
+ }
567
+
568
+ private static final short _hpricot_scan_indicies[] = init__hpricot_scan_indicies_0();
569
+
570
+
571
+ private static short[] init__hpricot_scan_trans_targs_0()
572
+ {
573
+ return new short [] {
574
+ 204, 1, 2, 53, 204, 3, 4, 5, 6, 7, 8, 9,
575
+ 10, 11, 10, 204, 26, 11, 204, 12, 48, 26, 13, 14,
576
+ 15, 16, 17, 18, 19, 30, 20, 21, 20, 21, 22, 23,
577
+ 28, 24, 25, 204, 24, 25, 25, 27, 29, 29, 31, 32,
578
+ 31, 32, 33, 34, 35, 36, 47, 32, 206, 40, 35, 36,
579
+ 47, 37, 34, 206, 40, 46, 38, 39, 43, 38, 39, 43,
580
+ 39, 41, 42, 41, 207, 43, 208, 44, 45, 39, 32, 49,
581
+ 50, 51, 52, 21, 54, 55, 56, 57, 58, 204, 60, 61,
582
+ 60, 204, 61, 204, 63, 62, 66, 204, 63, 64, 66, 204,
583
+ 65, 64, 66, 67, 204, 65, 64, 66, 67, 204, 204, 68,
584
+ 144, 74, 142, 143, 73, 68, 69, 70, 73, 204, 69, 71,
585
+ 73, 204, 65, 72, 71, 73, 74, 204, 65, 72, 74, 75,
586
+ 76, 77, 141, 73, 75, 76, 71, 73, 78, 79, 90, 70,
587
+ 93, 80, 209, 94, 78, 79, 90, 70, 93, 80, 209, 94,
588
+ 79, 69, 82, 84, 209, 81, 79, 83, 82, 84, 85, 209,
589
+ 83, 85, 209, 86, 95, 139, 140, 93, 87, 88, 91, 87,
590
+ 88, 89, 96, 93, 209, 209, 91, 93, 83, 92, 91, 93,
591
+ 95, 209, 83, 92, 95, 90, 97, 98, 117, 108, 90, 128,
592
+ 99, 211, 129, 97, 98, 117, 108, 128, 99, 211, 129, 98,
593
+ 100, 120, 121, 211, 122, 101, 100, 103, 105, 210, 102, 104,
594
+ 103, 105, 106, 210, 104, 106, 210, 107, 138, 113, 136, 137,
595
+ 111, 112, 107, 100, 108, 111, 210, 112, 109, 111, 210, 104,
596
+ 110, 109, 111, 113, 210, 104, 110, 113, 114, 115, 116, 135,
597
+ 111, 114, 115, 109, 111, 108, 118, 128, 211, 119, 134, 118,
598
+ 128, 133, 211, 119, 123, 119, 120, 121, 123, 211, 211, 98,
599
+ 124, 133, 131, 132, 128, 125, 126, 118, 125, 126, 127, 130,
600
+ 128, 211, 117, 98, 100, 79, 120, 121, 211, 122, 100, 119,
601
+ 134, 133, 100, 108, 101, 210, 100, 69, 103, 105, 210, 102,
602
+ 79, 82, 84, 209, 81, 210, 146, 147, 212, 146, 148, 149,
603
+ 213, 204, 151, 152, 153, 154, 155, 156, 157, 158, 200, 159,
604
+ 160, 159, 161, 204, 162, 163, 176, 164, 165, 166, 167, 168,
605
+ 169, 170, 171, 172, 198, 173, 174, 173, 175, 177, 178, 179,
606
+ 180, 181, 182, 183, 184, 185, 186, 187, 193, 188, 191, 189,
607
+ 190, 190, 192, 194, 196, 195, 197, 199, 199, 201, 201, 214,
608
+ 214, 216, 216, 204, 204, 205, 204, 0, 59, 62, 145, 204,
609
+ 204, 150, 214, 214, 215, 214, 202, 216, 216, 217, 216, 203,
610
+ 218, 218, 218, 219, 218
611
+ };
612
+ }
613
+
614
+ private static final short _hpricot_scan_trans_targs[] = init__hpricot_scan_trans_targs_0();
615
+
616
+
617
+ private static short[] init__hpricot_scan_trans_actions_0()
618
+ {
619
+ return new short [] {
620
+ 73, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, 0,
621
+ 1, 5, 0, 92, 5, 0, 51, 0, 0, 0, 0, 0,
622
+ 0, 0, 0, 0, 0, 0, 3, 83, 0, 19, 0, 0,
623
+ 0, 3, 86, 75, 0, 21, 0, 0, 3, 0, 3, 83,
624
+ 0, 19, 0, 19, 3, 3, 3, 172, 188, 3, 0, 0,
625
+ 0, 0, 113, 146, 0, 21, 3, 86, 86, 0, 21, 21,
626
+ 0, 21, 0, 0, 146, 0, 146, 0, 0, 3, 113, 0,
627
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 1, 5,
628
+ 0, 98, 0, 55, 5, 0, 5, 95, 0, 116, 0, 53,
629
+ 11, 0, 110, 11, 168, 0, 180, 23, 0, 122, 57, 3,
630
+ 3, 3, 0, 0, 89, 0, 9, 9, 104, 164, 0, 180,
631
+ 119, 176, 107, 107, 0, 160, 11, 201, 9, 9, 0, 80,
632
+ 80, 0, 0, 152, 3, 3, 196, 156, 3, 80, 80, 77,
633
+ 152, 3, 226, 3, 0, 9, 9, 7, 104, 0, 211, 0,
634
+ 0, 7, 180, 23, 192, 0, 7, 11, 0, 110, 11, 216,
635
+ 0, 0, 149, 3, 3, 7, 0, 89, 3, 3, 196, 80,
636
+ 80, 7, 0, 156, 221, 232, 180, 119, 107, 107, 0, 160,
637
+ 11, 238, 9, 9, 0, 7, 3, 80, 80, 101, 77, 152,
638
+ 3, 226, 3, 0, 9, 9, 7, 104, 0, 211, 0, 0,
639
+ 7, 180, 23, 192, 0, 0, 0, 180, 23, 192, 0, 11,
640
+ 0, 110, 11, 216, 0, 0, 149, 3, 3, 3, 0, 7,
641
+ 89, 3, 0, 9, 9, 104, 211, 0, 180, 119, 221, 107,
642
+ 107, 0, 160, 11, 238, 9, 9, 0, 80, 80, 0, 7,
643
+ 152, 3, 3, 196, 156, 77, 180, 119, 221, 107, 107, 0,
644
+ 160, 11, 238, 0, 0, 11, 0, 110, 11, 216, 149, 7,
645
+ 3, 3, 7, 7, 89, 3, 3, 196, 80, 80, 7, 7,
646
+ 156, 232, 7, 3, 77, 77, 196, 89, 206, 3, 101, 9,
647
+ 9, 0, 80, 80, 3, 232, 3, 77, 196, 89, 206, 3,
648
+ 3, 196, 89, 206, 3, 226, 25, 25, 0, 0, 0, 0,
649
+ 31, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
650
+ 13, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0,
651
+ 0, 0, 0, 0, 0, 3, 15, 0, 0, 0, 0, 0,
652
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0,
653
+ 17, 0, 0, 3, 3, 0, 0, 3, 0, 3, 0, 37,
654
+ 137, 43, 140, 63, 134, 184, 69, 0, 0, 1, 0, 65,
655
+ 67, 0, 33, 125, 31, 35, 0, 39, 128, 31, 41, 0,
656
+ 45, 131, 143, 0, 47
657
+ };
658
+ }
659
+
660
+ private static final short _hpricot_scan_trans_actions[] = init__hpricot_scan_trans_actions_0();
661
+
662
+
663
+ private static short[] init__hpricot_scan_to_state_actions_0()
664
+ {
665
+ return new short [] {
666
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
667
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
668
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
669
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
670
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
671
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
672
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
673
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
675
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
676
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
677
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
678
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
679
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
680
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
681
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
682
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
683
+ 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0,
684
+ 27, 0, 27, 0
685
+ };
686
+ }
687
+
688
+ private static final short _hpricot_scan_to_state_actions[] = init__hpricot_scan_to_state_actions_0();
689
+
690
+
691
+ private static short[] init__hpricot_scan_from_state_actions_0()
692
+ {
693
+ return new short [] {
694
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
695
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
696
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
697
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
698
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
699
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
700
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
701
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
702
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
703
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
704
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
705
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
706
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
707
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
708
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
709
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
710
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
711
+ 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0,
712
+ 29, 0, 29, 0
713
+ };
714
+ }
715
+
716
+ private static final short _hpricot_scan_from_state_actions[] = init__hpricot_scan_from_state_actions_0();
717
+
718
+
719
+ private static short[] init__hpricot_scan_eof_trans_0()
720
+ {
721
+ return new short [] {
722
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
723
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
724
+ 40, 40, 40, 40, 1, 40, 1, 1, 1, 1, 1, 1,
725
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
726
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
727
+ 1, 1, 1, 1, 40, 40, 40, 40, 40, 40, 40, 40,
728
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
729
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
730
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
731
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
732
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
733
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
734
+ 40, 1, 1, 1, 1, 1, 350, 350, 350, 350, 350, 350,
735
+ 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350,
736
+ 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350,
737
+ 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 350,
738
+ 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, 408, 410,
739
+ 0, 415, 420, 420, 420, 40, 40, 40, 421, 421, 0, 426,
740
+ 0, 431, 0, 437
741
+ };
742
+ }
743
+
744
+ private static final short _hpricot_scan_eof_trans[] = init__hpricot_scan_eof_trans_0();
745
+
746
+
747
+ static final int hpricot_scan_start = 204;
748
+ static final int hpricot_scan_error = -1;
749
+
750
+ static final int hpricot_scan_en_html_comment = 214;
751
+ static final int hpricot_scan_en_html_cdata = 216;
752
+ static final int hpricot_scan_en_html_procins = 218;
753
+ static final int hpricot_scan_en_main = 204;
754
+
755
+ // line 192 "ext/hpricot_scan/hpricot_scan.java.rl"
756
+
757
+ public final static int BUFSIZE=16384;
758
+
759
+ private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
760
+ IRubyObject ary;
761
+ if (sym == runtime.newSymbol("text")) {
762
+ raw = tag;
763
+ }
764
+ ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
765
+ if (taint) {
766
+ ary.setTaint(true);
767
+ tag.setTaint(true);
768
+ attr.setTaint(true);
769
+ raw.setTaint(true);
770
+ }
771
+ block.yield(runtime.getCurrentContext(), ary, null, null, false);
772
+ }
773
+
774
+
775
+ int cs, act, have = 0, nread = 0, curline = 1, p=-1;
776
+ boolean text = false;
777
+ int ts=-1, te;
778
+ int eof=-1;
779
+ char[] buf;
780
+ Ruby runtime;
781
+ IRubyObject attr, bufsize;
782
+ IRubyObject[] tag, akey, aval;
783
+ int mark_tag, mark_akey, mark_aval;
784
+ boolean done = false, ele_open = false;
785
+ int buffer_size = 0;
786
+ boolean taint = false;
787
+ Block block = null;
788
+
789
+
790
+ IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
791
+ cdata, sym_text;
792
+
793
+ IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
794
+ attr = bufsize = runtime.getNil();
795
+ tag = new IRubyObject[]{runtime.getNil()};
796
+ akey = new IRubyObject[]{runtime.getNil()};
797
+ aval = new IRubyObject[]{runtime.getNil()};
798
+
799
+ RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
800
+
801
+ taint = port.isTaint();
802
+ if ( !port.respondsTo("read")) {
803
+ if ( port.respondsTo("to_str")) {
804
+ port = port.callMethod(runtime.getCurrentContext(),"to_str");
805
+ } else {
806
+ throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
807
+ }
808
+ }
809
+
810
+ buffer_size = BUFSIZE;
811
+ if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
812
+ bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
813
+ if (!bufsize.isNil()) {
814
+ buffer_size = RubyNumeric.fix2int(bufsize);
815
+ }
816
+ }
817
+ buf = new char[buffer_size];
818
+
819
+
820
+ // line 821 "ext/hpricot_scan/HpricotScanService.java"
821
+ {
822
+ cs = hpricot_scan_start;
823
+ ts = -1;
824
+ te = -1;
825
+ act = 0;
826
+ }
827
+ // line 256 "ext/hpricot_scan/hpricot_scan.java.rl"
828
+
829
+ while( !done ) {
830
+ IRubyObject str;
831
+ p = have;
832
+ int pe;
833
+ int len, space = buffer_size - have;
834
+
835
+ if ( space == 0 ) {
836
+ /* We've used up the entire buffer storing an already-parsed token
837
+ * prefix that must be preserved. Likely caused by super-long attributes.
838
+ * See ticket #13. */
839
+ buffer_size += BUFSIZE;
840
+ char[] new_buf = new char[buffer_size];
841
+ System.arraycopy(buf, 0, new_buf, 0, buf.length);
842
+ buf = new_buf;
843
+ space = buffer_size - have;
844
+ }
845
+
846
+ if (port.respondsTo("read")) {
847
+ str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
848
+ } else {
849
+ str = ((RubyString)port).substr(nread,space);
850
+ }
851
+
852
+ str = str.convertToString();
853
+ String sss = str.toString();
854
+ char[] chars = sss.toCharArray();
855
+ System.arraycopy(chars,0,buf,p,chars.length);
856
+
857
+ len = sss.length();
858
+ nread += len;
859
+
860
+ if ( len < space ) {
861
+ len++;
862
+ done = true;
863
+ }
864
+
865
+ pe = p + len;
866
+ char[] data = buf;
867
+
868
+
869
+ // line 870 "ext/hpricot_scan/HpricotScanService.java"
870
+ {
871
+ int _klen;
872
+ int _trans = 0;
873
+ int _acts;
874
+ int _nacts;
875
+ int _keys;
876
+ int _goto_targ = 0;
877
+
878
+ _goto: while (true) {
879
+ switch ( _goto_targ ) {
880
+ case 0:
881
+ if ( p == pe ) {
882
+ _goto_targ = 4;
883
+ continue _goto;
884
+ }
885
+ case 1:
886
+ _acts = _hpricot_scan_from_state_actions[cs];
887
+ _nacts = (int) _hpricot_scan_actions[_acts++];
888
+ while ( _nacts-- > 0 ) {
889
+ switch ( _hpricot_scan_actions[_acts++] ) {
890
+ case 21:
891
+ // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
892
+ {ts = p;}
893
+ break;
894
+ // line 895 "ext/hpricot_scan/HpricotScanService.java"
895
+ }
896
+ }
897
+
898
+ _match: do {
899
+ _keys = _hpricot_scan_key_offsets[cs];
900
+ _trans = _hpricot_scan_index_offsets[cs];
901
+ _klen = _hpricot_scan_single_lengths[cs];
902
+ if ( _klen > 0 ) {
903
+ int _lower = _keys;
904
+ int _mid;
905
+ int _upper = _keys + _klen - 1;
906
+ while (true) {
907
+ if ( _upper < _lower )
908
+ break;
909
+
910
+ _mid = _lower + ((_upper-_lower) >> 1);
911
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
912
+ _upper = _mid - 1;
913
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
914
+ _lower = _mid + 1;
915
+ else {
916
+ _trans += (_mid - _keys);
917
+ break _match;
918
+ }
919
+ }
920
+ _keys += _klen;
921
+ _trans += _klen;
922
+ }
923
+
924
+ _klen = _hpricot_scan_range_lengths[cs];
925
+ if ( _klen > 0 ) {
926
+ int _lower = _keys;
927
+ int _mid;
928
+ int _upper = _keys + (_klen<<1) - 2;
929
+ while (true) {
930
+ if ( _upper < _lower )
931
+ break;
932
+
933
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
934
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
935
+ _upper = _mid - 2;
936
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
937
+ _lower = _mid + 2;
938
+ else {
939
+ _trans += ((_mid - _keys)>>1);
940
+ break _match;
941
+ }
942
+ }
943
+ _trans += _klen;
944
+ }
945
+ } while (false);
946
+
947
+ _trans = _hpricot_scan_indicies[_trans];
948
+ case 3:
949
+ cs = _hpricot_scan_trans_targs[_trans];
950
+
951
+ if ( _hpricot_scan_trans_actions[_trans] != 0 ) {
952
+ _acts = _hpricot_scan_trans_actions[_trans];
953
+ _nacts = (int) _hpricot_scan_actions[_acts++];
954
+ while ( _nacts-- > 0 )
955
+ {
956
+ switch ( _hpricot_scan_actions[_acts++] )
957
+ {
958
+ case 0:
959
+ // line 147 "ext/hpricot_scan/hpricot_scan.java.rl"
960
+ {
961
+ if (text) {
962
+ CAT(tag, p);
963
+ ELE(sym_text);
964
+ text = false;
965
+ }
966
+ attr = runtime.getNil();
967
+ tag[0] = runtime.getNil();
968
+ mark_tag = -1;
969
+ ele_open = true;
970
+ }
971
+ break;
972
+ case 1:
973
+ // line 159 "ext/hpricot_scan/hpricot_scan.java.rl"
974
+ { mark_tag = p; }
975
+ break;
976
+ case 2:
977
+ // line 160 "ext/hpricot_scan/hpricot_scan.java.rl"
978
+ { mark_aval = p; }
979
+ break;
980
+ case 3:
981
+ // line 161 "ext/hpricot_scan/hpricot_scan.java.rl"
982
+ { mark_akey = p; }
983
+ break;
984
+ case 4:
985
+ // line 162 "ext/hpricot_scan/hpricot_scan.java.rl"
986
+ { SET(tag, p); }
987
+ break;
988
+ case 5:
989
+ // line 164 "ext/hpricot_scan/hpricot_scan.java.rl"
990
+ { SET(aval, p); }
991
+ break;
992
+ case 6:
993
+ // line 165 "ext/hpricot_scan/hpricot_scan.java.rl"
994
+ {
995
+ if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
996
+ else { SET(aval, p); }
997
+ }
998
+ break;
999
+ case 7:
1000
+ // line 169 "ext/hpricot_scan/hpricot_scan.java.rl"
1001
+ { SET(akey, p); }
1002
+ break;
1003
+ case 8:
1004
+ // line 170 "ext/hpricot_scan/hpricot_scan.java.rl"
1005
+ { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
1006
+ break;
1007
+ case 9:
1008
+ // line 171 "ext/hpricot_scan/hpricot_scan.java.rl"
1009
+ { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
1010
+ break;
1011
+ case 10:
1012
+ // line 172 "ext/hpricot_scan/hpricot_scan.java.rl"
1013
+ { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
1014
+ break;
1015
+ case 11:
1016
+ // line 173 "ext/hpricot_scan/hpricot_scan.java.rl"
1017
+ { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
1018
+ break;
1019
+ case 12:
1020
+ // line 174 "ext/hpricot_scan/hpricot_scan.java.rl"
1021
+ { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
1022
+ break;
1023
+ case 13:
1024
+ // line 176 "ext/hpricot_scan/hpricot_scan.java.rl"
1025
+ {
1026
+ akey[0] = runtime.getNil();
1027
+ aval[0] = runtime.getNil();
1028
+ mark_akey = -1;
1029
+ mark_aval = -1;
1030
+ }
1031
+ break;
1032
+ case 14:
1033
+ // line 183 "ext/hpricot_scan/hpricot_scan.java.rl"
1034
+ {
1035
+ ATTR(akey, aval);
1036
+ }
1037
+ break;
1038
+ case 15:
1039
+ // line 9 "ext/hpricot_scan/hpricot_scan.java.rl"
1040
+ {curline += 1;}
1041
+ break;
1042
+ case 16:
1043
+ // line 46 "ext/hpricot_scan/hpricot_scan.java.rl"
1044
+ { TEXT_PASS(); }
1045
+ break;
1046
+ case 17:
1047
+ // line 50 "ext/hpricot_scan/hpricot_scan.java.rl"
1048
+ { EBLK(comment, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1049
+ break;
1050
+ case 18:
1051
+ // line 55 "ext/hpricot_scan/hpricot_scan.java.rl"
1052
+ { EBLK(cdata, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1053
+ break;
1054
+ case 19:
1055
+ // line 60 "ext/hpricot_scan/hpricot_scan.java.rl"
1056
+ { EBLK(procins, 2); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
1057
+ break;
1058
+ case 22:
1059
+ // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1060
+ {te = p+1;}
1061
+ break;
1062
+ case 23:
1063
+ // line 50 "ext/hpricot_scan/hpricot_scan.java.rl"
1064
+ {te = p+1;}
1065
+ break;
1066
+ case 24:
1067
+ // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1068
+ {te = p+1;{ TEXT_PASS(); }}
1069
+ break;
1070
+ case 25:
1071
+ // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1072
+ {te = p;p--;{ TEXT_PASS(); }}
1073
+ break;
1074
+ case 26:
1075
+ // line 51 "ext/hpricot_scan/hpricot_scan.java.rl"
1076
+ {{p = ((te))-1;}{ TEXT_PASS(); }}
1077
+ break;
1078
+ case 27:
1079
+ // line 55 "ext/hpricot_scan/hpricot_scan.java.rl"
1080
+ {te = p+1;}
1081
+ break;
1082
+ case 28:
1083
+ // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1084
+ {te = p+1;{ TEXT_PASS(); }}
1085
+ break;
1086
+ case 29:
1087
+ // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1088
+ {te = p;p--;{ TEXT_PASS(); }}
1089
+ break;
1090
+ case 30:
1091
+ // line 56 "ext/hpricot_scan/hpricot_scan.java.rl"
1092
+ {{p = ((te))-1;}{ TEXT_PASS(); }}
1093
+ break;
1094
+ case 31:
1095
+ // line 60 "ext/hpricot_scan/hpricot_scan.java.rl"
1096
+ {te = p+1;}
1097
+ break;
1098
+ case 32:
1099
+ // line 61 "ext/hpricot_scan/hpricot_scan.java.rl"
1100
+ {te = p+1;{ TEXT_PASS(); }}
1101
+ break;
1102
+ case 33:
1103
+ // line 61 "ext/hpricot_scan/hpricot_scan.java.rl"
1104
+ {te = p;p--;{ TEXT_PASS(); }}
1105
+ break;
1106
+ case 34:
1107
+ // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1108
+ {act = 8;}
1109
+ break;
1110
+ case 35:
1111
+ // line 68 "ext/hpricot_scan/hpricot_scan.java.rl"
1112
+ {act = 10;}
1113
+ break;
1114
+ case 36:
1115
+ // line 70 "ext/hpricot_scan/hpricot_scan.java.rl"
1116
+ {act = 12;}
1117
+ break;
1118
+ case 37:
1119
+ // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1120
+ {act = 15;}
1121
+ break;
1122
+ case 38:
1123
+ // line 65 "ext/hpricot_scan/hpricot_scan.java.rl"
1124
+ {te = p+1;{ ELE(xmldecl); }}
1125
+ break;
1126
+ case 39:
1127
+ // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1128
+ {te = p+1;{ ELE(doctype); }}
1129
+ break;
1130
+ case 40:
1131
+ // line 68 "ext/hpricot_scan/hpricot_scan.java.rl"
1132
+ {te = p+1;{ ELE(stag); }}
1133
+ break;
1134
+ case 41:
1135
+ // line 69 "ext/hpricot_scan/hpricot_scan.java.rl"
1136
+ {te = p+1;{ ELE(etag); }}
1137
+ break;
1138
+ case 42:
1139
+ // line 70 "ext/hpricot_scan/hpricot_scan.java.rl"
1140
+ {te = p+1;{ ELE(emptytag); }}
1141
+ break;
1142
+ case 43:
1143
+ // line 71 "ext/hpricot_scan/hpricot_scan.java.rl"
1144
+ {te = p+1;{ {cs = 214; _goto_targ = 2; if (true) continue _goto;} }}
1145
+ break;
1146
+ case 44:
1147
+ // line 72 "ext/hpricot_scan/hpricot_scan.java.rl"
1148
+ {te = p+1;{ {cs = 216; _goto_targ = 2; if (true) continue _goto;} }}
1149
+ break;
1150
+ case 45:
1151
+ // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1152
+ {te = p+1;{ TEXT_PASS(); }}
1153
+ break;
1154
+ case 46:
1155
+ // line 66 "ext/hpricot_scan/hpricot_scan.java.rl"
1156
+ {te = p;p--;{ ELE(doctype); }}
1157
+ break;
1158
+ case 47:
1159
+ // line 67 "ext/hpricot_scan/hpricot_scan.java.rl"
1160
+ {te = p;p--;{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
1161
+ break;
1162
+ case 48:
1163
+ // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1164
+ {te = p;p--;{ TEXT_PASS(); }}
1165
+ break;
1166
+ case 49:
1167
+ // line 67 "ext/hpricot_scan/hpricot_scan.java.rl"
1168
+ {{p = ((te))-1;}{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
1169
+ break;
1170
+ case 50:
1171
+ // line 73 "ext/hpricot_scan/hpricot_scan.java.rl"
1172
+ {{p = ((te))-1;}{ TEXT_PASS(); }}
1173
+ break;
1174
+ case 51:
1175
+ // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1176
+ { switch( act ) {
1177
+ case 8:
1178
+ {{p = ((te))-1;} ELE(doctype); }
1179
+ break;
1180
+ case 10:
1181
+ {{p = ((te))-1;} ELE(stag); }
1182
+ break;
1183
+ case 12:
1184
+ {{p = ((te))-1;} ELE(emptytag); }
1185
+ break;
1186
+ case 15:
1187
+ {{p = ((te))-1;} TEXT_PASS(); }
1188
+ break;
1189
+ }
1190
+ }
1191
+ break;
1192
+ // line 1193 "ext/hpricot_scan/HpricotScanService.java"
1193
+ }
1194
+ }
1195
+ }
1196
+
1197
+ case 2:
1198
+ _acts = _hpricot_scan_to_state_actions[cs];
1199
+ _nacts = (int) _hpricot_scan_actions[_acts++];
1200
+ while ( _nacts-- > 0 ) {
1201
+ switch ( _hpricot_scan_actions[_acts++] ) {
1202
+ case 20:
1203
+ // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1204
+ {ts = -1;}
1205
+ break;
1206
+ // line 1207 "ext/hpricot_scan/HpricotScanService.java"
1207
+ }
1208
+ }
1209
+
1210
+ if ( ++p != pe ) {
1211
+ _goto_targ = 1;
1212
+ continue _goto;
1213
+ }
1214
+ case 4:
1215
+ if ( p == eof )
1216
+ {
1217
+ if ( _hpricot_scan_eof_trans[cs] > 0 ) {
1218
+ _trans = _hpricot_scan_eof_trans[cs] - 1;
1219
+ _goto_targ = 3;
1220
+ continue _goto;
1221
+ }
1222
+ }
1223
+
1224
+ case 5:
1225
+ }
1226
+ break; }
1227
+ }
1228
+ // line 297 "ext/hpricot_scan/hpricot_scan.java.rl"
1229
+
1230
+ if ( cs == hpricot_scan_error ) {
1231
+ if(!tag[0].isNil()) {
1232
+ rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1233
+ } else {
1234
+ rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1235
+ }
1236
+ }
1237
+
1238
+ if ( done && ele_open ) {
1239
+ ele_open = false;
1240
+ if(ts > -1) {
1241
+ mark_tag = ts;
1242
+ ts = -1;
1243
+ text = true;
1244
+ }
1245
+ }
1246
+
1247
+ if(ts == -1) {
1248
+ have = 0;
1249
+ /* text nodes have no ts because each byte is parsed alone */
1250
+ if(mark_tag != -1 && text) {
1251
+ if (done) {
1252
+ if(mark_tag < p-1) {
1253
+ CAT(tag, p-1);
1254
+ ELE(sym_text);
1255
+ }
1256
+ } else {
1257
+ CAT(tag, p);
1258
+ }
1259
+ }
1260
+ mark_tag = 0;
1261
+ } else {
1262
+ have = pe - ts;
1263
+ System.arraycopy(buf,ts,buf,0,have);
1264
+ SLIDE(tag);
1265
+ SLIDE(akey);
1266
+ SLIDE(aval);
1267
+ te = (te - ts);
1268
+ ts = 0;
1269
+ }
1270
+ }
1271
+ return runtime.getNil();
1272
+ }
1273
+
1274
+ public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
1275
+ Ruby runtime = recv.getRuntime();
1276
+ HpricotScanService service = new HpricotScanService();
1277
+ service.runtime = runtime;
1278
+ service.xmldecl = runtime.newSymbol("xmldecl");
1279
+ service.doctype = runtime.newSymbol("doctype");
1280
+ service.procins = runtime.newSymbol("procins");
1281
+ service.stag = runtime.newSymbol("stag");
1282
+ service.etag = runtime.newSymbol("etag");
1283
+ service.emptytag = runtime.newSymbol("emptytag");
1284
+ service.comment = runtime.newSymbol("comment");
1285
+ service.cdata = runtime.newSymbol("cdata");
1286
+ service.sym_text = runtime.newSymbol("text");
1287
+ service.block = block;
1288
+ return service.hpricot_scan(recv, port);
1289
+ }
1290
+
1291
+
1292
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1293
+ Init_hpricot_scan(runtime);
1294
+ return true;
1295
+ }
1296
+
1297
+ public static void Init_hpricot_scan(Ruby runtime) {
1298
+ RubyModule mHpricot = runtime.defineModule("Hpricot");
1299
+ mHpricot.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1300
+ CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
1301
+ mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
1302
+ mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1303
+ rubyApi = JavaEmbedUtils.newObjectAdapter();
1304
+ }
1305
+ }