hpricot 0.8.1-x86-mswin32 → 0.8.2-x86-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/README +6 -15
- data/Rakefile +40 -28
- data/ext/fast_xs/FastXsService.java +13 -1
- data/ext/hpricot_scan/HpricotCss.java +831 -0
- data/ext/hpricot_scan/HpricotScanService.java +1168 -387
- data/ext/hpricot_scan/hpricot_css.c +101 -100
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_scan.c +287 -128
- data/ext/hpricot_scan/hpricot_scan.java.rl +1078 -299
- data/ext/hpricot_scan/hpricot_scan.rl +2 -0
- data/lib/fast_xs.so +0 -0
- data/lib/hpricot/tag.rb +31 -12
- data/lib/hpricot/traverse.rb +1 -0
- data/lib/hpricot_scan.so +0 -0
- data/test/test_alter.rb +21 -2
- data/test/test_parser.rb +8 -0
- data/test/test_preserved.rb +18 -0
- metadata +30 -29
- data/ext/hpricot_scan/test.rb +0 -4
@@ -1,152 +1,519 @@
|
|
1
|
-
// line 1 "
|
1
|
+
// line 1 "hpricot_scan.java.rl"
|
2
2
|
|
3
3
|
import java.io.IOException;
|
4
4
|
|
5
5
|
import org.jruby.Ruby;
|
6
|
+
import org.jruby.RubyArray;
|
6
7
|
import org.jruby.RubyClass;
|
7
8
|
import org.jruby.RubyHash;
|
8
9
|
import org.jruby.RubyModule;
|
9
10
|
import org.jruby.RubyNumeric;
|
11
|
+
import org.jruby.RubyObject;
|
10
12
|
import org.jruby.RubyObjectAdapter;
|
13
|
+
import org.jruby.RubyRegexp;
|
11
14
|
import org.jruby.RubyString;
|
15
|
+
import org.jruby.anno.JRubyMethod;
|
16
|
+
import org.jruby.exceptions.RaiseException;
|
12
17
|
import org.jruby.javasupport.JavaEmbedUtils;
|
18
|
+
import org.jruby.runtime.Arity;
|
13
19
|
import org.jruby.runtime.Block;
|
14
|
-
import org.jruby.runtime.
|
20
|
+
import org.jruby.runtime.ObjectAllocator;
|
21
|
+
import org.jruby.runtime.ThreadContext;
|
15
22
|
import org.jruby.runtime.builtin.IRubyObject;
|
23
|
+
import org.jruby.runtime.callback.Callback;
|
16
24
|
import org.jruby.exceptions.RaiseException;
|
17
25
|
import org.jruby.runtime.load.BasicLibraryService;
|
26
|
+
import org.jruby.util.ByteList;
|
18
27
|
|
19
28
|
public class HpricotScanService implements BasicLibraryService {
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
29
|
+
public static byte[] realloc(byte[] input, int size) {
|
30
|
+
byte[] newArray = new byte[size];
|
31
|
+
System.arraycopy(input, 0, newArray, 0, input.length);
|
32
|
+
return newArray;
|
33
|
+
}
|
34
|
+
|
35
|
+
// hpricot_state
|
36
|
+
public static class State {
|
37
|
+
public IRubyObject doc;
|
38
|
+
public IRubyObject focus;
|
39
|
+
public IRubyObject last;
|
40
|
+
public IRubyObject EC;
|
41
|
+
public boolean xml, strict, fixup;
|
42
|
+
}
|
43
|
+
|
44
|
+
static boolean OPT(IRubyObject opts, String key) {
|
45
|
+
Ruby runtime = opts.getRuntime();
|
46
|
+
return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
|
47
|
+
}
|
48
|
+
|
49
|
+
// H_PROP(name, H_ELE_TAG)
|
50
|
+
public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
|
51
|
+
H_ELE_SET(self, H_ELE_TAG, x);
|
52
|
+
return self;
|
53
|
+
}
|
54
|
+
|
55
|
+
public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
|
56
|
+
H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
|
57
|
+
return self.getRuntime().getTrue();
|
58
|
+
}
|
59
|
+
|
60
|
+
public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
|
61
|
+
return H_ELE_GET(self, H_ELE_TAG);
|
62
|
+
}
|
63
|
+
|
64
|
+
// H_PROP(raw, H_ELE_RAW)
|
65
|
+
public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
|
66
|
+
H_ELE_SET(self, H_ELE_RAW, x);
|
67
|
+
return self;
|
68
|
+
}
|
69
|
+
|
70
|
+
public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
|
71
|
+
H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
|
72
|
+
return self.getRuntime().getTrue();
|
73
|
+
}
|
74
|
+
|
75
|
+
public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
|
76
|
+
return H_ELE_GET(self, H_ELE_RAW);
|
77
|
+
}
|
78
|
+
|
79
|
+
// H_PROP(parent, H_ELE_PARENT)
|
80
|
+
public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
|
81
|
+
H_ELE_SET(self, H_ELE_PARENT, x);
|
82
|
+
return self;
|
83
|
+
}
|
84
|
+
|
85
|
+
public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
|
86
|
+
H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
|
87
|
+
return self.getRuntime().getTrue();
|
88
|
+
}
|
89
|
+
|
90
|
+
public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
|
91
|
+
return H_ELE_GET(self, H_ELE_PARENT);
|
92
|
+
}
|
93
|
+
|
94
|
+
// H_PROP(attr, H_ELE_ATTR)
|
95
|
+
public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
|
96
|
+
H_ELE_SET(self, H_ELE_ATTR, x);
|
97
|
+
return self;
|
98
|
+
}
|
99
|
+
|
100
|
+
public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
|
101
|
+
H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
|
102
|
+
return self.getRuntime().getTrue();
|
103
|
+
}
|
104
|
+
|
105
|
+
public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
|
106
|
+
return H_ELE_GET(self, H_ELE_ATTR);
|
107
|
+
}
|
108
|
+
|
109
|
+
// H_PROP(etag, H_ELE_ETAG)
|
110
|
+
public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
|
111
|
+
H_ELE_SET(self, H_ELE_ETAG, x);
|
112
|
+
return self;
|
113
|
+
}
|
114
|
+
|
115
|
+
public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
|
116
|
+
H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
|
117
|
+
return self.getRuntime().getTrue();
|
118
|
+
}
|
119
|
+
|
120
|
+
public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
|
121
|
+
return H_ELE_GET(self, H_ELE_ETAG);
|
122
|
+
}
|
123
|
+
|
124
|
+
// H_PROP(children, H_ELE_CHILDREN)
|
125
|
+
public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
|
126
|
+
H_ELE_SET(self, H_ELE_CHILDREN, x);
|
127
|
+
return self;
|
128
|
+
}
|
129
|
+
|
130
|
+
public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
|
131
|
+
H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
|
132
|
+
return self.getRuntime().getTrue();
|
133
|
+
}
|
134
|
+
|
135
|
+
public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
|
136
|
+
return H_ELE_GET(self, H_ELE_CHILDREN);
|
137
|
+
}
|
138
|
+
|
139
|
+
// H_ATTR(target)
|
140
|
+
public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
|
141
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
|
142
|
+
return self;
|
143
|
+
}
|
144
|
+
|
145
|
+
public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
|
146
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
|
147
|
+
}
|
148
|
+
|
149
|
+
// H_ATTR(encoding)
|
150
|
+
public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
|
151
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
|
152
|
+
return self;
|
153
|
+
}
|
154
|
+
|
155
|
+
public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
|
156
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
|
157
|
+
}
|
158
|
+
|
159
|
+
// H_ATTR(version)
|
160
|
+
public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
|
161
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
|
162
|
+
return self;
|
163
|
+
}
|
164
|
+
|
165
|
+
public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
|
166
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
|
167
|
+
}
|
168
|
+
|
169
|
+
// H_ATTR(standalone)
|
170
|
+
public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
|
171
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
|
172
|
+
return self;
|
173
|
+
}
|
174
|
+
|
175
|
+
public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
|
176
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
|
177
|
+
}
|
178
|
+
|
179
|
+
// H_ATTR(system_id)
|
180
|
+
public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
|
181
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
|
182
|
+
return self;
|
183
|
+
}
|
184
|
+
|
185
|
+
public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
|
186
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
|
187
|
+
}
|
188
|
+
|
189
|
+
// H_ATTR(public_id)
|
190
|
+
public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
|
191
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
|
192
|
+
return self;
|
193
|
+
}
|
194
|
+
|
195
|
+
public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
|
196
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
|
197
|
+
}
|
198
|
+
|
199
|
+
public static class Scanner {
|
200
|
+
public IRubyObject SET(int mark, int E, IRubyObject org) {
|
201
|
+
if(mark == -1 || E == mark) {
|
202
|
+
return runtime.newString("");
|
203
|
+
} else if(E > mark) {
|
204
|
+
return RubyString.newString(runtime, data, mark, E-mark);
|
205
|
+
} else {
|
206
|
+
return org;
|
207
|
+
}
|
208
|
+
}
|
209
|
+
|
210
|
+
public int SLIDE(int N) {
|
211
|
+
if(N > ts) {
|
212
|
+
return N - ts;
|
213
|
+
} else {
|
214
|
+
return N;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
public IRubyObject CAT(IRubyObject N, int mark, int E) {
|
219
|
+
if(N.isNil()) {
|
220
|
+
return SET(mark, E, N);
|
221
|
+
} else {
|
222
|
+
((RubyString)N).cat(data, mark, E-mark);
|
223
|
+
return N;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
public void ATTR(IRubyObject K, IRubyObject V) {
|
228
|
+
if(!K.isNil()) {
|
229
|
+
if(attr.isNil()) {
|
230
|
+
attr = RubyHash.newHash(runtime);
|
231
|
+
}
|
232
|
+
((RubyHash)attr).fastASet(K, V);
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
public void TEXT_PASS() {
|
237
|
+
if(!text) {
|
238
|
+
if(ele_open) {
|
239
|
+
ele_open = false;
|
240
|
+
if(ts != -1) {
|
241
|
+
mark_tag = ts;
|
242
|
+
}
|
243
|
+
} else {
|
244
|
+
mark_tag = p;
|
245
|
+
}
|
246
|
+
attr = runtime.getNil();
|
247
|
+
tag = runtime.getNil();
|
248
|
+
text = true;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
public void ELE(IRubyObject N) {
|
253
|
+
if(te > ts || text) {
|
254
|
+
int raw = -1;
|
255
|
+
int rawlen = 0;
|
256
|
+
ele_open = false;
|
257
|
+
text = false;
|
258
|
+
|
259
|
+
if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
|
260
|
+
raw = ts;
|
261
|
+
rawlen = te - ts;
|
262
|
+
}
|
263
|
+
|
264
|
+
if(block.isGiven()) {
|
265
|
+
IRubyObject raw_string = runtime.getNil();
|
266
|
+
if(raw != -1) {
|
267
|
+
raw_string = RubyString.newString(runtime, data, raw, rawlen);
|
268
|
+
}
|
269
|
+
yieldTokens(N, tag, attr, runtime.getNil(), taint);
|
270
|
+
} else {
|
271
|
+
hpricotToken(S, N, tag, attr, raw, rawlen, taint);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
|
277
|
+
public void EBLK(IRubyObject N, int T) {
|
278
|
+
tag = CAT(tag, mark_tag, p - T + 1);
|
279
|
+
ELE(N);
|
280
|
+
}
|
281
|
+
|
282
|
+
public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
|
283
|
+
IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
|
284
|
+
if(children.isNil()) {
|
285
|
+
H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
|
286
|
+
}
|
287
|
+
((RubyArray)children).append(ele);
|
288
|
+
H_ELE_SET(ele, H_ELE_PARENT, focus);
|
289
|
+
}
|
290
|
+
|
291
|
+
private static class TokenInfo {
|
292
|
+
public IRubyObject sym;
|
293
|
+
public IRubyObject tag;
|
294
|
+
public IRubyObject attr;
|
295
|
+
public int raw;
|
296
|
+
public int rawlen;
|
297
|
+
public IRubyObject ec;
|
298
|
+
public IRubyObject ele;
|
299
|
+
public Extra x;
|
300
|
+
public Ruby runtime;
|
301
|
+
public Scanner scanner;
|
302
|
+
public State S;
|
303
|
+
|
304
|
+
public void H_ELE(RubyClass klass) {
|
305
|
+
ele = klass.allocate();
|
306
|
+
if(klass == x.cElem) {
|
307
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
308
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
309
|
+
H_ELE_SET(ele, H_ELE_EC, ec);
|
310
|
+
if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
|
311
|
+
H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
312
|
+
}
|
313
|
+
} else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
|
314
|
+
if(klass == x.cBogusETag) {
|
315
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
316
|
+
if(raw != -1) {
|
317
|
+
H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
318
|
+
}
|
319
|
+
} else {
|
320
|
+
if(klass == x.cDocType) {
|
321
|
+
scanner.ATTR(runtime.newSymbol("target"), tag);
|
322
|
+
}
|
323
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
324
|
+
if(klass != x.cProcIns) {
|
325
|
+
tag = runtime.getNil();
|
326
|
+
if(raw != -1) {
|
327
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
331
|
+
}
|
332
|
+
} else {
|
333
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
334
|
+
}
|
335
|
+
S.last = ele;
|
336
|
+
}
|
337
|
+
|
338
|
+
public void hpricotToken(boolean taint) {
|
339
|
+
//
|
340
|
+
// in html mode, fix up start tags incorrectly formed as empty tags
|
341
|
+
//
|
342
|
+
if(!S.xml) {
|
343
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
|
344
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
345
|
+
if(ec.isNil()) {
|
346
|
+
tag = tag.callMethod(scanner.ctx, "downcase");
|
347
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
348
|
+
}
|
349
|
+
}
|
350
|
+
|
351
|
+
if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
|
352
|
+
(sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
|
353
|
+
!(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
|
354
|
+
sym = x.sym_text;
|
355
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
356
|
+
}
|
357
|
+
|
358
|
+
if(!ec.isNil()) {
|
359
|
+
if(sym == x.sym_emptytag) {
|
360
|
+
if(ec != x.sym_EMPTY) {
|
361
|
+
sym = x.sym_stag;
|
362
|
+
}
|
363
|
+
} else if(sym == x.sym_stag) {
|
364
|
+
if(ec == x.sym_EMPTY) {
|
365
|
+
sym = x.sym_emptytag;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
}
|
369
|
+
}
|
370
|
+
|
371
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag) {
|
372
|
+
IRubyObject name = runtime.newFixnum(tag.hashCode());
|
373
|
+
H_ELE(x.cElem);
|
374
|
+
H_ELE_SET(ele, H_ELE_HASH, name);
|
375
|
+
|
376
|
+
if(!S.xml) {
|
377
|
+
IRubyObject match = runtime.getNil(), e = S.focus;
|
378
|
+
while(e != S.doc) {
|
379
|
+
IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
|
380
|
+
if(hEC instanceof RubyHash) {
|
381
|
+
IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
|
382
|
+
if(!has.isNil()) {
|
383
|
+
if(has == runtime.getTrue()) {
|
384
|
+
if(match.isNil()) {
|
385
|
+
match = e;
|
386
|
+
}
|
387
|
+
} else if(has == x.symAllow) {
|
388
|
+
match = S.focus;
|
389
|
+
} else if(has == x.symDeny) {
|
390
|
+
match = runtime.getNil();
|
391
|
+
}
|
392
|
+
}
|
393
|
+
}
|
394
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
395
|
+
}
|
396
|
+
|
397
|
+
if(match.isNil()) {
|
398
|
+
match = S.focus;
|
399
|
+
}
|
400
|
+
S.focus = match;
|
401
|
+
}
|
402
|
+
|
403
|
+
scanner.hpricotAdd(S.focus, ele);
|
404
|
+
|
405
|
+
//
|
406
|
+
// in the case of a start tag that should be empty, just
|
407
|
+
// skip the step that focuses the element. focusing moves
|
408
|
+
// us deeper into the document.
|
409
|
+
//
|
410
|
+
if(sym == x.sym_stag) {
|
411
|
+
if(S.xml || ec != x.sym_EMPTY) {
|
412
|
+
S.focus = ele;
|
413
|
+
S.last = runtime.getNil();
|
414
|
+
}
|
415
|
+
}
|
416
|
+
} else if(sym == x.sym_etag) {
|
417
|
+
IRubyObject name, match = runtime.getNil(), e = S.focus;
|
418
|
+
if(S.strict) {
|
419
|
+
if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
|
420
|
+
tag = runtime.newString("div");
|
421
|
+
}
|
422
|
+
}
|
423
|
+
|
424
|
+
name = runtime.newFixnum(tag.hashCode());
|
425
|
+
while(e != S.doc) {
|
426
|
+
if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
|
427
|
+
match = e;
|
428
|
+
break;
|
429
|
+
}
|
430
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
431
|
+
|
432
|
+
}
|
433
|
+
if(match.isNil()) {
|
434
|
+
H_ELE(x.cBogusETag);
|
435
|
+
scanner.hpricotAdd(S.focus, ele);
|
436
|
+
} else {
|
437
|
+
ele = runtime.getNil();
|
438
|
+
if(raw != -1) {
|
439
|
+
ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
440
|
+
}
|
441
|
+
H_ELE_SET(match, H_ELE_ETAG, ele);
|
442
|
+
S.focus = H_ELE_GET(match, H_ELE_PARENT);
|
443
|
+
S.last = runtime.getNil();
|
444
|
+
|
445
|
+
}
|
446
|
+
} else if(sym == x.sym_cdata) {
|
447
|
+
H_ELE(x.cCData);
|
448
|
+
scanner.hpricotAdd(S.focus, ele);
|
449
|
+
} else if(sym == x.sym_comment) {
|
450
|
+
H_ELE(x.cComment);
|
451
|
+
scanner.hpricotAdd(S.focus, ele);
|
452
|
+
} else if(sym == x.sym_doctype) {
|
453
|
+
H_ELE(x.cDocType);
|
454
|
+
if(S.strict) {
|
455
|
+
RubyHash h = (RubyHash)attr;
|
456
|
+
h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
|
457
|
+
h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
|
458
|
+
}
|
459
|
+
scanner.hpricotAdd(S.focus, ele);
|
460
|
+
} else if(sym == x.sym_procins) {
|
461
|
+
IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
|
462
|
+
tag = RubyRegexp.nth_match(1, match);
|
463
|
+
attr = RubyRegexp.nth_match(2, match);
|
464
|
+
H_ELE(x.cProcIns);
|
465
|
+
scanner.hpricotAdd(S.focus, ele);
|
466
|
+
} else if(sym == x.sym_text) {
|
467
|
+
if(!S.last.isNil() && S.last.getType() == x.cText) {
|
468
|
+
((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
|
469
|
+
} else {
|
470
|
+
H_ELE(x.cText);
|
471
|
+
scanner.hpricotAdd(S.focus, ele);
|
472
|
+
}
|
473
|
+
} else if(sym == x.sym_xmldecl) {
|
474
|
+
H_ELE(x.cXMLDecl);
|
475
|
+
scanner.hpricotAdd(S.focus, ele);
|
476
|
+
}
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
|
481
|
+
TokenInfo t = new TokenInfo();
|
482
|
+
t.sym = _sym;
|
483
|
+
t.tag = _tag;
|
484
|
+
t.attr = _attr;
|
485
|
+
t.raw = _raw;
|
486
|
+
t.rawlen = _rawlen;
|
487
|
+
t.ec = runtime.getNil();
|
488
|
+
t.ele = runtime.getNil();
|
489
|
+
t.x = x;
|
490
|
+
t.runtime = runtime;
|
491
|
+
t.scanner = this;
|
492
|
+
t.S = S;
|
493
|
+
|
494
|
+
t.hpricotToken(taint);
|
495
|
+
}
|
496
|
+
|
497
|
+
public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
498
|
+
if(sym == x.sym_text) {
|
499
|
+
raw = tag;
|
500
|
+
}
|
501
|
+
IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
|
502
|
+
if(taint) {
|
503
|
+
ary.setTaint(true);
|
504
|
+
tag.setTaint(true);
|
505
|
+
attr.setTaint(true);
|
506
|
+
raw.setTaint(true);
|
507
|
+
}
|
508
|
+
|
509
|
+
block.yield(ctx, ary);
|
510
|
+
}
|
511
|
+
|
512
|
+
// line 561 "hpricot_scan.java.rl"
|
513
|
+
|
514
|
+
|
515
|
+
|
516
|
+
// line 517 "HpricotScanService.java"
|
150
517
|
private static byte[] init__hpricot_scan_actions_0()
|
151
518
|
{
|
152
519
|
return new byte [] {
|
@@ -568,7 +935,7 @@ private static short[] init__hpricot_scan_indicies_0()
|
|
568
935
|
private static final short _hpricot_scan_indicies[] = init__hpricot_scan_indicies_0();
|
569
936
|
|
570
937
|
|
571
|
-
private static short[]
|
938
|
+
private static short[] init__hpricot_scan_trans_targs_wi_0()
|
572
939
|
{
|
573
940
|
return new short [] {
|
574
941
|
204, 1, 2, 53, 204, 3, 4, 5, 6, 7, 8, 9,
|
@@ -611,10 +978,10 @@ private static short[] init__hpricot_scan_trans_targs_0()
|
|
611
978
|
};
|
612
979
|
}
|
613
980
|
|
614
|
-
private static final short
|
981
|
+
private static final short _hpricot_scan_trans_targs_wi[] = init__hpricot_scan_trans_targs_wi_0();
|
615
982
|
|
616
983
|
|
617
|
-
private static short[]
|
984
|
+
private static short[] init__hpricot_scan_trans_actions_wi_0()
|
618
985
|
{
|
619
986
|
return new short [] {
|
620
987
|
73, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, 0,
|
@@ -657,7 +1024,7 @@ private static short[] init__hpricot_scan_trans_actions_0()
|
|
657
1024
|
};
|
658
1025
|
}
|
659
1026
|
|
660
|
-
private static final short
|
1027
|
+
private static final short _hpricot_scan_trans_actions_wi[] = init__hpricot_scan_trans_actions_wi_0();
|
661
1028
|
|
662
1029
|
|
663
1030
|
private static short[] init__hpricot_scan_to_state_actions_0()
|
@@ -752,121 +1119,166 @@ static final int hpricot_scan_en_html_cdata = 216;
|
|
752
1119
|
static final int hpricot_scan_en_html_procins = 218;
|
753
1120
|
static final int hpricot_scan_en_main = 204;
|
754
1121
|
|
755
|
-
// line
|
1122
|
+
// line 564 "hpricot_scan.java.rl"
|
756
1123
|
|
757
|
-
public final static int BUFSIZE=16384;
|
1124
|
+
public final static int BUFSIZE = 16384;
|
758
1125
|
|
759
|
-
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
760
|
-
IRubyObject ary;
|
761
|
-
if (sym == runtime.newSymbol("text")) {
|
762
|
-
raw = tag;
|
763
|
-
}
|
764
|
-
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
|
765
|
-
if (taint) {
|
766
|
-
ary.setTaint(true);
|
767
|
-
tag.setTaint(true);
|
768
|
-
attr.setTaint(true);
|
769
|
-
raw.setTaint(true);
|
770
|
-
}
|
771
|
-
block.yield(runtime.getCurrentContext(), ary, null, null, false);
|
772
|
-
}
|
773
1126
|
|
1127
|
+
private int cs, act, have = 0, nread = 0, curline = 1;
|
1128
|
+
private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
|
1129
|
+
private byte[] data;
|
1130
|
+
private State S = null;
|
1131
|
+
private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
|
1132
|
+
private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
|
1133
|
+
private boolean done = false, ele_open = false, taint = false, io = false, text = false;
|
1134
|
+
private int buffer_size = 0;
|
774
1135
|
|
775
|
-
|
776
|
-
boolean text = false;
|
777
|
-
int ts=-1, te;
|
778
|
-
int eof=-1;
|
779
|
-
char[] buf;
|
780
|
-
Ruby runtime;
|
781
|
-
IRubyObject attr, bufsize;
|
782
|
-
IRubyObject[] tag, akey, aval;
|
783
|
-
int mark_tag, mark_akey, mark_aval;
|
784
|
-
boolean done = false, ele_open = false;
|
785
|
-
int buffer_size = 0;
|
786
|
-
boolean taint = false;
|
787
|
-
Block block = null;
|
788
|
-
|
789
|
-
|
790
|
-
IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
|
791
|
-
cdata, sym_text;
|
792
|
-
|
793
|
-
IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
|
794
|
-
attr = bufsize = runtime.getNil();
|
795
|
-
tag = new IRubyObject[]{runtime.getNil()};
|
796
|
-
akey = new IRubyObject[]{runtime.getNil()};
|
797
|
-
aval = new IRubyObject[]{runtime.getNil()};
|
798
|
-
|
799
|
-
RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
|
800
|
-
|
801
|
-
taint = port.isTaint();
|
802
|
-
if ( !port.respondsTo("read")) {
|
803
|
-
if ( port.respondsTo("to_str")) {
|
804
|
-
port = port.callMethod(runtime.getCurrentContext(),"to_str");
|
805
|
-
} else {
|
806
|
-
throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
|
807
|
-
}
|
808
|
-
}
|
1136
|
+
private Extra x;
|
809
1137
|
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
1138
|
+
private IRubyObject self;
|
1139
|
+
private Ruby runtime;
|
1140
|
+
private ThreadContext ctx;
|
1141
|
+
private Block block;
|
1142
|
+
|
1143
|
+
private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
|
1144
|
+
|
1145
|
+
private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
|
1146
|
+
return new RaiseException(runtime, exceptionClass, message, true);
|
1147
|
+
}
|
818
1148
|
|
819
|
-
|
820
|
-
|
1149
|
+
public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
|
1150
|
+
this.self = self;
|
1151
|
+
this.runtime = self.getRuntime();
|
1152
|
+
this.ctx = runtime.getCurrentContext();
|
1153
|
+
this.block = block;
|
1154
|
+
attr = runtime.getNil();
|
1155
|
+
tag = runtime.getNil();
|
1156
|
+
akey = runtime.getNil();
|
1157
|
+
aval = runtime.getNil();
|
1158
|
+
bufsize = runtime.getNil();
|
1159
|
+
|
1160
|
+
this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
|
1161
|
+
|
1162
|
+
this.xmldecl = x.sym_xmldecl;
|
1163
|
+
this.doctype = x.sym_doctype;
|
1164
|
+
this.stag = x.sym_stag;
|
1165
|
+
this.etag = x.sym_etag;
|
1166
|
+
this.emptytag = x.sym_emptytag;
|
1167
|
+
this.comment = x.sym_comment;
|
1168
|
+
this.cdata = x.sym_cdata;
|
1169
|
+
this.procins = x.sym_procins;
|
1170
|
+
|
1171
|
+
port = args[0];
|
1172
|
+
if(args.length == 2) {
|
1173
|
+
opts = args[1];
|
1174
|
+
} else {
|
1175
|
+
opts = runtime.getNil();
|
1176
|
+
}
|
1177
|
+
|
1178
|
+
taint = port.isTaint();
|
1179
|
+
io = port.respondsTo("read");
|
1180
|
+
if(!io) {
|
1181
|
+
if(port.respondsTo("to_str")) {
|
1182
|
+
port = port.callMethod(ctx, "to_str");
|
1183
|
+
port = port.convertToString();
|
1184
|
+
} else {
|
1185
|
+
throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
|
1186
|
+
}
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
if(!(opts instanceof RubyHash)) {
|
1190
|
+
opts = runtime.getNil();
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
if(!block.isGiven()) {
|
1194
|
+
S = new State();
|
1195
|
+
S.doc = x.cDoc.allocate();
|
1196
|
+
S.focus = S.doc;
|
1197
|
+
S.last = runtime.getNil();
|
1198
|
+
S.xml = OPT(opts, "xml");
|
1199
|
+
S.strict = OPT(opts, "xhtml_strict");
|
1200
|
+
S.fixup = OPT(opts, "fixup_tags");
|
1201
|
+
if(S.strict) {
|
1202
|
+
S.fixup = true;
|
1203
|
+
}
|
1204
|
+
S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
|
1205
|
+
S.EC = x.mHpricot.getConstant("ElementContent");
|
1206
|
+
}
|
1207
|
+
|
1208
|
+
buffer_size = BUFSIZE;
|
1209
|
+
if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
|
1210
|
+
bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
|
1211
|
+
if(!bufsize.isNil()) {
|
1212
|
+
buffer_size = RubyNumeric.fix2int(bufsize);
|
1213
|
+
}
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
if(io) {
|
1217
|
+
buf = 0;
|
1218
|
+
data = new byte[buffer_size];
|
1219
|
+
}
|
1220
|
+
}
|
1221
|
+
|
1222
|
+
private int len, space;
|
1223
|
+
// hpricot_scan
|
1224
|
+
public IRubyObject scan() {
|
1225
|
+
|
1226
|
+
// line 1227 "HpricotScanService.java"
|
821
1227
|
{
|
822
1228
|
cs = hpricot_scan_start;
|
823
1229
|
ts = -1;
|
824
1230
|
te = -1;
|
825
1231
|
act = 0;
|
826
1232
|
}
|
827
|
-
// line
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
1233
|
+
// line 667 "hpricot_scan.java.rl"
|
1234
|
+
while(!done) {
|
1235
|
+
p = pe = len = buf;
|
1236
|
+
space = buffer_size - have;
|
1237
|
+
|
1238
|
+
if(io) {
|
1239
|
+
if(space == 0) {
|
1240
|
+
/* We've used up the entire buffer storing an already-parsed token
|
1241
|
+
* prefix that must be preserved. Likely caused by super-long attributes.
|
1242
|
+
* Increase buffer size and continue */
|
1243
|
+
buffer_size += BUFSIZE;
|
1244
|
+
data = realloc(data, buffer_size);
|
1245
|
+
space = buffer_size - have;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
p = have;
|
1249
|
+
IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
|
1250
|
+
ByteList bl = str.convertToString().getByteList();
|
1251
|
+
len = bl.realSize;
|
1252
|
+
System.arraycopy(bl.bytes, bl.begin, data, p, len);
|
1253
|
+
} else {
|
1254
|
+
ByteList bl = port.convertToString().getByteList();
|
1255
|
+
data = bl.bytes;
|
1256
|
+
buf = bl.begin;
|
1257
|
+
p = bl.begin;
|
1258
|
+
len = bl.realSize + 1;
|
1259
|
+
if(p + len >= data.length) {
|
1260
|
+
data = new byte[len];
|
1261
|
+
System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
|
1262
|
+
p = 0;
|
1263
|
+
buf = 0;
|
1264
|
+
}
|
1265
|
+
done = true;
|
1266
|
+
eof = p + len;
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
nread += len;
|
1270
|
+
|
1271
|
+
/* If this is the last buffer, tack on an EOF. */
|
1272
|
+
if(io && len < space) {
|
1273
|
+
data[p + len++] = 0;
|
1274
|
+
eof = p + len;
|
1275
|
+
done = true;
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
pe = p + len;
|
1279
|
+
|
1280
|
+
|
1281
|
+
// line 1282 "HpricotScanService.java"
|
870
1282
|
{
|
871
1283
|
int _klen;
|
872
1284
|
int _trans = 0;
|
@@ -888,10 +1300,10 @@ case 1:
|
|
888
1300
|
while ( _nacts-- > 0 ) {
|
889
1301
|
switch ( _hpricot_scan_actions[_acts++] ) {
|
890
1302
|
case 21:
|
891
|
-
// line 1 "
|
1303
|
+
// line 1 "hpricot_scan.java.rl"
|
892
1304
|
{ts = p;}
|
893
1305
|
break;
|
894
|
-
// line
|
1306
|
+
// line 1307 "HpricotScanService.java"
|
895
1307
|
}
|
896
1308
|
}
|
897
1309
|
|
@@ -946,233 +1358,239 @@ case 1:
|
|
946
1358
|
|
947
1359
|
_trans = _hpricot_scan_indicies[_trans];
|
948
1360
|
case 3:
|
949
|
-
cs =
|
1361
|
+
cs = _hpricot_scan_trans_targs_wi[_trans];
|
950
1362
|
|
951
|
-
if (
|
952
|
-
_acts =
|
1363
|
+
if ( _hpricot_scan_trans_actions_wi[_trans] != 0 ) {
|
1364
|
+
_acts = _hpricot_scan_trans_actions_wi[_trans];
|
953
1365
|
_nacts = (int) _hpricot_scan_actions[_acts++];
|
954
1366
|
while ( _nacts-- > 0 )
|
955
1367
|
{
|
956
1368
|
switch ( _hpricot_scan_actions[_acts++] )
|
957
1369
|
{
|
958
1370
|
case 0:
|
959
|
-
// line
|
1371
|
+
// line 514 "hpricot_scan.java.rl"
|
960
1372
|
{
|
961
|
-
if
|
962
|
-
|
963
|
-
|
964
|
-
|
1373
|
+
if(text) {
|
1374
|
+
tag = CAT(tag, mark_tag, p);
|
1375
|
+
ELE(x.sym_text);
|
1376
|
+
text = false;
|
965
1377
|
}
|
966
1378
|
attr = runtime.getNil();
|
967
|
-
tag
|
1379
|
+
tag = runtime.getNil();
|
968
1380
|
mark_tag = -1;
|
969
1381
|
ele_open = true;
|
970
1382
|
}
|
971
1383
|
break;
|
972
1384
|
case 1:
|
973
|
-
// line
|
1385
|
+
// line 526 "hpricot_scan.java.rl"
|
974
1386
|
{ mark_tag = p; }
|
975
1387
|
break;
|
976
1388
|
case 2:
|
977
|
-
// line
|
1389
|
+
// line 527 "hpricot_scan.java.rl"
|
978
1390
|
{ mark_aval = p; }
|
979
1391
|
break;
|
980
1392
|
case 3:
|
981
|
-
// line
|
1393
|
+
// line 528 "hpricot_scan.java.rl"
|
982
1394
|
{ mark_akey = p; }
|
983
1395
|
break;
|
984
1396
|
case 4:
|
985
|
-
// line
|
986
|
-
{ SET(
|
1397
|
+
// line 529 "hpricot_scan.java.rl"
|
1398
|
+
{ tag = SET(mark_tag, p, tag); }
|
987
1399
|
break;
|
988
1400
|
case 5:
|
989
|
-
// line
|
990
|
-
{ SET(
|
1401
|
+
// line 531 "hpricot_scan.java.rl"
|
1402
|
+
{ aval = SET(mark_aval, p, aval); }
|
991
1403
|
break;
|
992
1404
|
case 6:
|
993
|
-
// line
|
994
|
-
{
|
995
|
-
|
996
|
-
|
1405
|
+
// line 532 "hpricot_scan.java.rl"
|
1406
|
+
{
|
1407
|
+
if(data[p-1] == '"' || data[p-1] == '\'') {
|
1408
|
+
aval = SET(mark_aval, p-1, aval);
|
1409
|
+
} else {
|
1410
|
+
aval = SET(mark_aval, p, aval);
|
1411
|
+
}
|
997
1412
|
}
|
998
1413
|
break;
|
999
1414
|
case 7:
|
1000
|
-
// line
|
1001
|
-
{ SET(
|
1415
|
+
// line 539 "hpricot_scan.java.rl"
|
1416
|
+
{ akey = SET(mark_akey, p, akey); }
|
1002
1417
|
break;
|
1003
1418
|
case 8:
|
1004
|
-
// line
|
1005
|
-
{ SET(
|
1419
|
+
// line 540 "hpricot_scan.java.rl"
|
1420
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
|
1006
1421
|
break;
|
1007
1422
|
case 9:
|
1008
|
-
// line
|
1009
|
-
{ SET(
|
1423
|
+
// line 541 "hpricot_scan.java.rl"
|
1424
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
|
1010
1425
|
break;
|
1011
1426
|
case 10:
|
1012
|
-
// line
|
1013
|
-
{ SET(
|
1427
|
+
// line 542 "hpricot_scan.java.rl"
|
1428
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
|
1014
1429
|
break;
|
1015
1430
|
case 11:
|
1016
|
-
// line
|
1017
|
-
{ SET(
|
1431
|
+
// line 543 "hpricot_scan.java.rl"
|
1432
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
|
1018
1433
|
break;
|
1019
1434
|
case 12:
|
1020
|
-
// line
|
1021
|
-
{ SET(
|
1435
|
+
// line 544 "hpricot_scan.java.rl"
|
1436
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
|
1022
1437
|
break;
|
1023
1438
|
case 13:
|
1024
|
-
// line
|
1025
|
-
{
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1439
|
+
// line 546 "hpricot_scan.java.rl"
|
1440
|
+
{
|
1441
|
+
akey = runtime.getNil();
|
1442
|
+
aval = runtime.getNil();
|
1443
|
+
mark_akey = -1;
|
1444
|
+
mark_aval = -1;
|
1030
1445
|
}
|
1031
1446
|
break;
|
1032
1447
|
case 14:
|
1033
|
-
// line
|
1034
|
-
{
|
1035
|
-
|
1448
|
+
// line 553 "hpricot_scan.java.rl"
|
1449
|
+
{
|
1450
|
+
if(!S.xml) {
|
1451
|
+
akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
|
1452
|
+
}
|
1453
|
+
ATTR(akey, aval);
|
1036
1454
|
}
|
1037
1455
|
break;
|
1038
1456
|
case 15:
|
1039
|
-
// line 9 "
|
1457
|
+
// line 9 "hpricot_scan.java.rl"
|
1040
1458
|
{curline += 1;}
|
1041
1459
|
break;
|
1042
1460
|
case 16:
|
1043
|
-
// line 46 "
|
1461
|
+
// line 46 "hpricot_scan.java.rl"
|
1044
1462
|
{ TEXT_PASS(); }
|
1045
1463
|
break;
|
1046
1464
|
case 17:
|
1047
|
-
// line 50 "
|
1465
|
+
// line 50 "hpricot_scan.java.rl"
|
1048
1466
|
{ EBLK(comment, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
|
1049
1467
|
break;
|
1050
1468
|
case 18:
|
1051
|
-
// line 55 "
|
1469
|
+
// line 55 "hpricot_scan.java.rl"
|
1052
1470
|
{ EBLK(cdata, 3); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
|
1053
1471
|
break;
|
1054
1472
|
case 19:
|
1055
|
-
// line 60 "
|
1473
|
+
// line 60 "hpricot_scan.java.rl"
|
1056
1474
|
{ EBLK(procins, 2); {cs = 204; _goto_targ = 2; if (true) continue _goto;} }
|
1057
1475
|
break;
|
1058
1476
|
case 22:
|
1059
|
-
// line 1 "
|
1477
|
+
// line 1 "hpricot_scan.java.rl"
|
1060
1478
|
{te = p+1;}
|
1061
1479
|
break;
|
1062
1480
|
case 23:
|
1063
|
-
// line 50 "
|
1481
|
+
// line 50 "hpricot_scan.java.rl"
|
1064
1482
|
{te = p+1;}
|
1065
1483
|
break;
|
1066
1484
|
case 24:
|
1067
|
-
// line 51 "
|
1485
|
+
// line 51 "hpricot_scan.java.rl"
|
1068
1486
|
{te = p+1;{ TEXT_PASS(); }}
|
1069
1487
|
break;
|
1070
1488
|
case 25:
|
1071
|
-
// line 51 "
|
1489
|
+
// line 51 "hpricot_scan.java.rl"
|
1072
1490
|
{te = p;p--;{ TEXT_PASS(); }}
|
1073
1491
|
break;
|
1074
1492
|
case 26:
|
1075
|
-
// line 51 "
|
1493
|
+
// line 51 "hpricot_scan.java.rl"
|
1076
1494
|
{{p = ((te))-1;}{ TEXT_PASS(); }}
|
1077
1495
|
break;
|
1078
1496
|
case 27:
|
1079
|
-
// line 55 "
|
1497
|
+
// line 55 "hpricot_scan.java.rl"
|
1080
1498
|
{te = p+1;}
|
1081
1499
|
break;
|
1082
1500
|
case 28:
|
1083
|
-
// line 56 "
|
1501
|
+
// line 56 "hpricot_scan.java.rl"
|
1084
1502
|
{te = p+1;{ TEXT_PASS(); }}
|
1085
1503
|
break;
|
1086
1504
|
case 29:
|
1087
|
-
// line 56 "
|
1505
|
+
// line 56 "hpricot_scan.java.rl"
|
1088
1506
|
{te = p;p--;{ TEXT_PASS(); }}
|
1089
1507
|
break;
|
1090
1508
|
case 30:
|
1091
|
-
// line 56 "
|
1509
|
+
// line 56 "hpricot_scan.java.rl"
|
1092
1510
|
{{p = ((te))-1;}{ TEXT_PASS(); }}
|
1093
1511
|
break;
|
1094
1512
|
case 31:
|
1095
|
-
// line 60 "
|
1513
|
+
// line 60 "hpricot_scan.java.rl"
|
1096
1514
|
{te = p+1;}
|
1097
1515
|
break;
|
1098
1516
|
case 32:
|
1099
|
-
// line 61 "
|
1517
|
+
// line 61 "hpricot_scan.java.rl"
|
1100
1518
|
{te = p+1;{ TEXT_PASS(); }}
|
1101
1519
|
break;
|
1102
1520
|
case 33:
|
1103
|
-
// line 61 "
|
1521
|
+
// line 61 "hpricot_scan.java.rl"
|
1104
1522
|
{te = p;p--;{ TEXT_PASS(); }}
|
1105
1523
|
break;
|
1106
1524
|
case 34:
|
1107
|
-
// line 66 "
|
1525
|
+
// line 66 "hpricot_scan.java.rl"
|
1108
1526
|
{act = 8;}
|
1109
1527
|
break;
|
1110
1528
|
case 35:
|
1111
|
-
// line 68 "
|
1529
|
+
// line 68 "hpricot_scan.java.rl"
|
1112
1530
|
{act = 10;}
|
1113
1531
|
break;
|
1114
1532
|
case 36:
|
1115
|
-
// line 70 "
|
1533
|
+
// line 70 "hpricot_scan.java.rl"
|
1116
1534
|
{act = 12;}
|
1117
1535
|
break;
|
1118
1536
|
case 37:
|
1119
|
-
// line 73 "
|
1537
|
+
// line 73 "hpricot_scan.java.rl"
|
1120
1538
|
{act = 15;}
|
1121
1539
|
break;
|
1122
1540
|
case 38:
|
1123
|
-
// line 65 "
|
1541
|
+
// line 65 "hpricot_scan.java.rl"
|
1124
1542
|
{te = p+1;{ ELE(xmldecl); }}
|
1125
1543
|
break;
|
1126
1544
|
case 39:
|
1127
|
-
// line 66 "
|
1545
|
+
// line 66 "hpricot_scan.java.rl"
|
1128
1546
|
{te = p+1;{ ELE(doctype); }}
|
1129
1547
|
break;
|
1130
1548
|
case 40:
|
1131
|
-
// line 68 "
|
1549
|
+
// line 68 "hpricot_scan.java.rl"
|
1132
1550
|
{te = p+1;{ ELE(stag); }}
|
1133
1551
|
break;
|
1134
1552
|
case 41:
|
1135
|
-
// line 69 "
|
1553
|
+
// line 69 "hpricot_scan.java.rl"
|
1136
1554
|
{te = p+1;{ ELE(etag); }}
|
1137
1555
|
break;
|
1138
1556
|
case 42:
|
1139
|
-
// line 70 "
|
1557
|
+
// line 70 "hpricot_scan.java.rl"
|
1140
1558
|
{te = p+1;{ ELE(emptytag); }}
|
1141
1559
|
break;
|
1142
1560
|
case 43:
|
1143
|
-
// line 71 "
|
1561
|
+
// line 71 "hpricot_scan.java.rl"
|
1144
1562
|
{te = p+1;{ {cs = 214; _goto_targ = 2; if (true) continue _goto;} }}
|
1145
1563
|
break;
|
1146
1564
|
case 44:
|
1147
|
-
// line 72 "
|
1565
|
+
// line 72 "hpricot_scan.java.rl"
|
1148
1566
|
{te = p+1;{ {cs = 216; _goto_targ = 2; if (true) continue _goto;} }}
|
1149
1567
|
break;
|
1150
1568
|
case 45:
|
1151
|
-
// line 73 "
|
1569
|
+
// line 73 "hpricot_scan.java.rl"
|
1152
1570
|
{te = p+1;{ TEXT_PASS(); }}
|
1153
1571
|
break;
|
1154
1572
|
case 46:
|
1155
|
-
// line 66 "
|
1573
|
+
// line 66 "hpricot_scan.java.rl"
|
1156
1574
|
{te = p;p--;{ ELE(doctype); }}
|
1157
1575
|
break;
|
1158
1576
|
case 47:
|
1159
|
-
// line 67 "
|
1577
|
+
// line 67 "hpricot_scan.java.rl"
|
1160
1578
|
{te = p;p--;{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
|
1161
1579
|
break;
|
1162
1580
|
case 48:
|
1163
|
-
// line 73 "
|
1581
|
+
// line 73 "hpricot_scan.java.rl"
|
1164
1582
|
{te = p;p--;{ TEXT_PASS(); }}
|
1165
1583
|
break;
|
1166
1584
|
case 49:
|
1167
|
-
// line 67 "
|
1585
|
+
// line 67 "hpricot_scan.java.rl"
|
1168
1586
|
{{p = ((te))-1;}{ {cs = 218; _goto_targ = 2; if (true) continue _goto;} }}
|
1169
1587
|
break;
|
1170
1588
|
case 50:
|
1171
|
-
// line 73 "
|
1589
|
+
// line 73 "hpricot_scan.java.rl"
|
1172
1590
|
{{p = ((te))-1;}{ TEXT_PASS(); }}
|
1173
1591
|
break;
|
1174
1592
|
case 51:
|
1175
|
-
// line 1 "
|
1593
|
+
// line 1 "hpricot_scan.java.rl"
|
1176
1594
|
{ switch( act ) {
|
1177
1595
|
case 8:
|
1178
1596
|
{{p = ((te))-1;} ELE(doctype); }
|
@@ -1186,10 +1604,11 @@ case 3:
|
|
1186
1604
|
case 15:
|
1187
1605
|
{{p = ((te))-1;} TEXT_PASS(); }
|
1188
1606
|
break;
|
1607
|
+
default: break;
|
1189
1608
|
}
|
1190
1609
|
}
|
1191
1610
|
break;
|
1192
|
-
// line
|
1611
|
+
// line 1612 "HpricotScanService.java"
|
1193
1612
|
}
|
1194
1613
|
}
|
1195
1614
|
}
|
@@ -1200,10 +1619,10 @@ case 2:
|
|
1200
1619
|
while ( _nacts-- > 0 ) {
|
1201
1620
|
switch ( _hpricot_scan_actions[_acts++] ) {
|
1202
1621
|
case 20:
|
1203
|
-
// line 1 "
|
1622
|
+
// line 1 "hpricot_scan.java.rl"
|
1204
1623
|
{ts = -1;}
|
1205
1624
|
break;
|
1206
|
-
// line
|
1625
|
+
// line 1626 "HpricotScanService.java"
|
1207
1626
|
}
|
1208
1627
|
}
|
1209
1628
|
|
@@ -1225,81 +1644,443 @@ case 5:
|
|
1225
1644
|
}
|
1226
1645
|
break; }
|
1227
1646
|
}
|
1228
|
-
// line
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1647
|
+
// line 714 "hpricot_scan.java.rl"
|
1648
|
+
|
1649
|
+
if(cs == hpricot_scan_error) {
|
1650
|
+
if(!tag.isNil()) {
|
1651
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
1652
|
+
} else {
|
1653
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
1654
|
+
}
|
1655
|
+
}
|
1656
|
+
|
1657
|
+
if(done && ele_open) {
|
1658
|
+
ele_open = false;
|
1659
|
+
if(ts > 0) {
|
1660
|
+
mark_tag = ts;
|
1661
|
+
ts = 0;
|
1662
|
+
text = true;
|
1663
|
+
}
|
1664
|
+
}
|
1665
|
+
|
1666
|
+
if(ts == -1) {
|
1667
|
+
have = 0;
|
1668
|
+
if(mark_tag != -1 && text) {
|
1669
|
+
if(done) {
|
1670
|
+
if(mark_tag < p - 1) {
|
1671
|
+
tag = CAT(tag, mark_tag, p-1);
|
1672
|
+
ELE(x.sym_text);
|
1673
|
+
}
|
1674
|
+
} else {
|
1675
|
+
tag = CAT(tag, mark_tag, p);
|
1676
|
+
}
|
1677
|
+
}
|
1678
|
+
if(io) {
|
1679
|
+
mark_tag = 0;
|
1680
|
+
} else {
|
1681
|
+
mark_tag = ((RubyString)port).getByteList().begin;
|
1682
|
+
}
|
1683
|
+
} else if(io) {
|
1684
|
+
have = pe - ts;
|
1685
|
+
System.arraycopy(data, ts, data, buf, have);
|
1686
|
+
mark_tag = SLIDE(mark_tag);
|
1687
|
+
mark_akey = SLIDE(mark_akey);
|
1688
|
+
mark_aval = SLIDE(mark_aval);
|
1689
|
+
te -= ts;
|
1690
|
+
ts = 0;
|
1691
|
+
}
|
1692
|
+
}
|
1693
|
+
|
1694
|
+
if(S != null) {
|
1695
|
+
return S.doc;
|
1696
|
+
}
|
1697
|
+
|
1698
|
+
return runtime.getNil();
|
1699
|
+
}
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
public static class HpricotModule {
|
1703
|
+
// hpricot_scan
|
1704
|
+
@JRubyMethod(module = true, optional = 1, required = 1, frame = true)
|
1705
|
+
public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
|
1706
|
+
return new Scanner(self, args, block).scan();
|
1707
|
+
}
|
1708
|
+
|
1709
|
+
// hpricot_css
|
1710
|
+
@JRubyMethod(module = true)
|
1711
|
+
public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
|
1712
|
+
return new HpricotCss(self, mod, str, node).scan();
|
1713
|
+
}
|
1714
|
+
}
|
1715
|
+
|
1716
|
+
public static class CData {
|
1717
|
+
@JRubyMethod
|
1718
|
+
public static IRubyObject content(IRubyObject self) {
|
1719
|
+
return hpricot_ele_get_name(self);
|
1720
|
+
}
|
1721
|
+
|
1722
|
+
@JRubyMethod(name = "content=")
|
1723
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1724
|
+
return hpricot_ele_set_name(self, value);
|
1725
|
+
}
|
1726
|
+
}
|
1727
|
+
|
1728
|
+
public static class Comment {
|
1729
|
+
@JRubyMethod
|
1730
|
+
public static IRubyObject content(IRubyObject self) {
|
1731
|
+
return hpricot_ele_get_name(self);
|
1732
|
+
}
|
1733
|
+
|
1734
|
+
@JRubyMethod(name = "content=")
|
1735
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1736
|
+
return hpricot_ele_set_name(self, value);
|
1737
|
+
}
|
1738
|
+
}
|
1739
|
+
|
1740
|
+
public static class DocType {
|
1741
|
+
@JRubyMethod
|
1742
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1743
|
+
return hpricot_ele_get_name(self);
|
1744
|
+
}
|
1745
|
+
|
1746
|
+
@JRubyMethod
|
1747
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1748
|
+
return hpricot_ele_clear_name(self);
|
1749
|
+
}
|
1750
|
+
|
1751
|
+
@JRubyMethod
|
1752
|
+
public static IRubyObject target(IRubyObject self) {
|
1753
|
+
return hpricot_ele_get_target(self);
|
1754
|
+
}
|
1755
|
+
|
1756
|
+
@JRubyMethod(name = "target=")
|
1757
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
1758
|
+
return hpricot_ele_set_target(self, value);
|
1759
|
+
}
|
1760
|
+
|
1761
|
+
@JRubyMethod
|
1762
|
+
public static IRubyObject public_id(IRubyObject self) {
|
1763
|
+
return hpricot_ele_get_public_id(self);
|
1764
|
+
}
|
1765
|
+
|
1766
|
+
@JRubyMethod(name = "public_id=")
|
1767
|
+
public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
|
1768
|
+
return hpricot_ele_set_public_id(self, value);
|
1769
|
+
}
|
1770
|
+
|
1771
|
+
@JRubyMethod
|
1772
|
+
public static IRubyObject system_id(IRubyObject self) {
|
1773
|
+
return hpricot_ele_get_system_id(self);
|
1774
|
+
}
|
1775
|
+
|
1776
|
+
@JRubyMethod(name = "system_id=")
|
1777
|
+
public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
|
1778
|
+
return hpricot_ele_set_system_id(self, value);
|
1779
|
+
}
|
1780
|
+
}
|
1781
|
+
|
1782
|
+
public static class Elem {
|
1783
|
+
@JRubyMethod
|
1784
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1785
|
+
return hpricot_ele_clear_raw(self);
|
1786
|
+
}
|
1787
|
+
}
|
1788
|
+
|
1789
|
+
public static class BogusETag {
|
1790
|
+
@JRubyMethod
|
1791
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1792
|
+
return hpricot_ele_get_attr(self);
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
@JRubyMethod
|
1796
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1797
|
+
return hpricot_ele_clear_attr(self);
|
1798
|
+
}
|
1799
|
+
}
|
1800
|
+
|
1801
|
+
public static class Text {
|
1802
|
+
@JRubyMethod
|
1803
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1804
|
+
return hpricot_ele_get_name(self);
|
1805
|
+
}
|
1806
|
+
|
1807
|
+
@JRubyMethod
|
1808
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1809
|
+
return hpricot_ele_clear_name(self);
|
1810
|
+
}
|
1811
|
+
|
1812
|
+
@JRubyMethod
|
1813
|
+
public static IRubyObject content(IRubyObject self) {
|
1814
|
+
return hpricot_ele_get_name(self);
|
1815
|
+
}
|
1816
|
+
|
1817
|
+
@JRubyMethod(name = "content=")
|
1818
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1819
|
+
return hpricot_ele_set_name(self, value);
|
1820
|
+
}
|
1821
|
+
}
|
1822
|
+
|
1823
|
+
public static class XMLDecl {
|
1824
|
+
@JRubyMethod
|
1825
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1826
|
+
return hpricot_ele_get_name(self);
|
1827
|
+
}
|
1828
|
+
|
1829
|
+
@JRubyMethod
|
1830
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1831
|
+
return hpricot_ele_clear_name(self);
|
1832
|
+
}
|
1833
|
+
|
1834
|
+
@JRubyMethod
|
1835
|
+
public static IRubyObject encoding(IRubyObject self) {
|
1836
|
+
return hpricot_ele_get_encoding(self);
|
1837
|
+
}
|
1838
|
+
|
1839
|
+
@JRubyMethod(name = "encoding=")
|
1840
|
+
public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
|
1841
|
+
return hpricot_ele_set_encoding(self, value);
|
1842
|
+
}
|
1843
|
+
|
1844
|
+
@JRubyMethod
|
1845
|
+
public static IRubyObject standalone(IRubyObject self) {
|
1846
|
+
return hpricot_ele_get_standalone(self);
|
1847
|
+
}
|
1848
|
+
|
1849
|
+
@JRubyMethod(name = "standalone=")
|
1850
|
+
public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
|
1851
|
+
return hpricot_ele_set_standalone(self, value);
|
1852
|
+
}
|
1853
|
+
|
1854
|
+
@JRubyMethod
|
1855
|
+
public static IRubyObject version(IRubyObject self) {
|
1856
|
+
return hpricot_ele_get_version(self);
|
1857
|
+
}
|
1858
|
+
|
1859
|
+
@JRubyMethod(name = "version=")
|
1860
|
+
public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
|
1861
|
+
return hpricot_ele_set_version(self, value);
|
1862
|
+
}
|
1863
|
+
}
|
1864
|
+
|
1865
|
+
public static class ProcIns {
|
1866
|
+
@JRubyMethod
|
1867
|
+
public static IRubyObject target(IRubyObject self) {
|
1868
|
+
return hpricot_ele_get_name(self);
|
1869
|
+
}
|
1870
|
+
|
1871
|
+
@JRubyMethod(name = "target=")
|
1872
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
1873
|
+
return hpricot_ele_set_name(self, value);
|
1874
|
+
}
|
1875
|
+
|
1876
|
+
@JRubyMethod
|
1877
|
+
public static IRubyObject content(IRubyObject self) {
|
1878
|
+
return hpricot_ele_get_attr(self);
|
1879
|
+
}
|
1880
|
+
|
1881
|
+
@JRubyMethod(name = "content=")
|
1882
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1883
|
+
return hpricot_ele_set_attr(self, value);
|
1884
|
+
}
|
1885
|
+
}
|
1886
|
+
|
1887
|
+
public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
|
1888
|
+
|
1889
|
+
public final static int H_ELE_TAG = 0;
|
1890
|
+
public final static int H_ELE_PARENT = 1;
|
1891
|
+
public final static int H_ELE_ATTR = 2;
|
1892
|
+
public final static int H_ELE_ETAG = 3;
|
1893
|
+
public final static int H_ELE_RAW = 4;
|
1894
|
+
public final static int H_ELE_EC = 5;
|
1895
|
+
public final static int H_ELE_HASH = 6;
|
1896
|
+
public final static int H_ELE_CHILDREN = 7;
|
1897
|
+
|
1898
|
+
public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
|
1899
|
+
return ((IRubyObject[])recv.dataGetStruct())[n];
|
1900
|
+
}
|
1901
|
+
|
1902
|
+
public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
|
1903
|
+
((IRubyObject[])recv.dataGetStruct())[n] = value;
|
1904
|
+
return value;
|
1905
|
+
}
|
1906
|
+
|
1907
|
+
private static class RefCallback implements Callback {
|
1908
|
+
private final int n;
|
1909
|
+
public RefCallback(int n) { this.n = n; }
|
1910
|
+
|
1911
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
1912
|
+
return H_ELE_GET(recv, n);
|
1913
|
+
}
|
1914
|
+
|
1915
|
+
public Arity getArity() {
|
1916
|
+
return Arity.NO_ARGUMENTS;
|
1917
|
+
}
|
1918
|
+
}
|
1919
|
+
|
1920
|
+
private static class SetCallback implements Callback {
|
1921
|
+
private final int n;
|
1922
|
+
public SetCallback(int n) { this.n = n; }
|
1923
|
+
|
1924
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
1925
|
+
return H_ELE_SET(recv, n, args[0]);
|
1926
|
+
}
|
1927
|
+
|
1928
|
+
public Arity getArity() {
|
1929
|
+
return Arity.ONE_ARGUMENT;
|
1930
|
+
}
|
1236
1931
|
}
|
1932
|
+
|
1933
|
+
private final static Callback[] ref_func = new Callback[]{
|
1934
|
+
new RefCallback(0),
|
1935
|
+
new RefCallback(1),
|
1936
|
+
new RefCallback(2),
|
1937
|
+
new RefCallback(3),
|
1938
|
+
new RefCallback(4),
|
1939
|
+
new RefCallback(5),
|
1940
|
+
new RefCallback(6),
|
1941
|
+
new RefCallback(7),
|
1942
|
+
new RefCallback(8),
|
1943
|
+
new RefCallback(9)};
|
1944
|
+
|
1945
|
+
private final static Callback[] set_func = new Callback[]{
|
1946
|
+
new SetCallback(0),
|
1947
|
+
new SetCallback(1),
|
1948
|
+
new SetCallback(2),
|
1949
|
+
new SetCallback(3),
|
1950
|
+
new SetCallback(4),
|
1951
|
+
new SetCallback(5),
|
1952
|
+
new SetCallback(6),
|
1953
|
+
new SetCallback(7),
|
1954
|
+
new SetCallback(8),
|
1955
|
+
new SetCallback(9)};
|
1956
|
+
|
1957
|
+
public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
|
1958
|
+
// alloc_hpricot_struct
|
1959
|
+
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
|
1960
|
+
RubyClass kurrent = klass;
|
1961
|
+
Object sz = kurrent.fastGetInternalVariable("__size__");
|
1962
|
+
while(sz == null && kurrent != null) {
|
1963
|
+
kurrent = kurrent.getSuperClass();
|
1964
|
+
sz = kurrent.fastGetInternalVariable("__size__");
|
1965
|
+
}
|
1966
|
+
int size = RubyNumeric.fix2int((RubyObject)sz);
|
1967
|
+
RubyObject obj = new RubyObject(runtime, klass);
|
1968
|
+
IRubyObject[] all = new IRubyObject[size];
|
1969
|
+
java.util.Arrays.fill(all, runtime.getNil());
|
1970
|
+
obj.dataWrapStruct(all);
|
1971
|
+
return obj;
|
1972
|
+
}
|
1973
|
+
};
|
1974
|
+
|
1975
|
+
public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
|
1976
|
+
RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
|
1977
|
+
klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
|
1978
|
+
klass.setAllocator(alloc_hpricot_struct);
|
1979
|
+
|
1980
|
+
for(int i = 0; i < members.length; i++) {
|
1981
|
+
String id = members[i].toString();
|
1982
|
+
klass.defineMethod(id, ref_func[i]);
|
1983
|
+
klass.defineMethod(id + "=", set_func[i]);
|
1984
|
+
}
|
1237
1985
|
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
}
|
1986
|
+
return klass;
|
1987
|
+
}
|
1988
|
+
|
1989
|
+
public boolean basicLoad(final Ruby runtime) throws IOException {
|
1990
|
+
Init_hpricot_scan(runtime);
|
1991
|
+
return true;
|
1245
1992
|
}
|
1246
1993
|
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1994
|
+
public static class Extra {
|
1995
|
+
IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
|
1996
|
+
sym_procins, sym_stag, sym_etag, sym_emptytag,
|
1997
|
+
sym_allowed, sym_children, sym_comment,
|
1998
|
+
sym_cdata, sym_name, sym_parent,
|
1999
|
+
sym_raw_attributes, sym_raw_string, sym_tagno,
|
2000
|
+
sym_text, sym_EMPTY, sym_CDATA;
|
2001
|
+
|
2002
|
+
public RubyModule mHpricot;
|
2003
|
+
public RubyClass structElem;
|
2004
|
+
public RubyClass structAttr;
|
2005
|
+
public RubyClass structBasic;
|
2006
|
+
public RubyClass cDoc;
|
2007
|
+
public RubyClass cCData;
|
2008
|
+
public RubyClass cComment;
|
2009
|
+
public RubyClass cDocType;
|
2010
|
+
public RubyClass cElem;
|
2011
|
+
public RubyClass cBogusETag;
|
2012
|
+
public RubyClass cText;
|
2013
|
+
public RubyClass cXMLDecl;
|
2014
|
+
public RubyClass cProcIns;
|
2015
|
+
public RubyClass rb_eHpricotParseError;
|
2016
|
+
public IRubyObject reProcInsParse;
|
2017
|
+
|
2018
|
+
public Extra(Ruby runtime) {
|
2019
|
+
symAllow = runtime.newSymbol("allow");
|
2020
|
+
symDeny = runtime.newSymbol("deny");
|
2021
|
+
sym_xmldecl = runtime.newSymbol("xmldecl");
|
2022
|
+
sym_doctype = runtime.newSymbol("doctype");
|
2023
|
+
sym_procins = runtime.newSymbol("procins");
|
2024
|
+
sym_stag = runtime.newSymbol("stag");
|
2025
|
+
sym_etag = runtime.newSymbol("etag");
|
2026
|
+
sym_emptytag = runtime.newSymbol("emptytag");
|
2027
|
+
sym_allowed = runtime.newSymbol("allowed");
|
2028
|
+
sym_children = runtime.newSymbol("children");
|
2029
|
+
sym_comment = runtime.newSymbol("comment");
|
2030
|
+
sym_cdata = runtime.newSymbol("cdata");
|
2031
|
+
sym_name = runtime.newSymbol("name");
|
2032
|
+
sym_parent = runtime.newSymbol("parent");
|
2033
|
+
sym_raw_attributes = runtime.newSymbol("raw_attributes");
|
2034
|
+
sym_raw_string = runtime.newSymbol("raw_string");
|
2035
|
+
sym_tagno = runtime.newSymbol("tagno");
|
2036
|
+
sym_text = runtime.newSymbol("text");
|
2037
|
+
sym_EMPTY = runtime.newSymbol("EMPTY");
|
2038
|
+
sym_CDATA = runtime.newSymbol("CDATA");
|
1258
2039
|
}
|
1259
|
-
}
|
1260
|
-
mark_tag = 0;
|
1261
|
-
} else {
|
1262
|
-
have = pe - ts;
|
1263
|
-
System.arraycopy(buf,ts,buf,0,have);
|
1264
|
-
SLIDE(tag);
|
1265
|
-
SLIDE(akey);
|
1266
|
-
SLIDE(aval);
|
1267
|
-
te = (te - ts);
|
1268
|
-
ts = 0;
|
1269
2040
|
}
|
1270
|
-
}
|
1271
|
-
return runtime.getNil();
|
1272
|
-
}
|
1273
2041
|
|
1274
|
-
public static
|
1275
|
-
|
1276
|
-
HpricotScanService service = new HpricotScanService();
|
1277
|
-
service.runtime = runtime;
|
1278
|
-
service.xmldecl = runtime.newSymbol("xmldecl");
|
1279
|
-
service.doctype = runtime.newSymbol("doctype");
|
1280
|
-
service.procins = runtime.newSymbol("procins");
|
1281
|
-
service.stag = runtime.newSymbol("stag");
|
1282
|
-
service.etag = runtime.newSymbol("etag");
|
1283
|
-
service.emptytag = runtime.newSymbol("emptytag");
|
1284
|
-
service.comment = runtime.newSymbol("comment");
|
1285
|
-
service.cdata = runtime.newSymbol("cdata");
|
1286
|
-
service.sym_text = runtime.newSymbol("text");
|
1287
|
-
service.block = block;
|
1288
|
-
return service.hpricot_scan(recv, port);
|
1289
|
-
}
|
2042
|
+
public static void Init_hpricot_scan(Ruby runtime) {
|
2043
|
+
Extra x = new Extra(runtime);
|
1290
2044
|
|
2045
|
+
x.mHpricot = runtime.defineModule("Hpricot");
|
2046
|
+
x.mHpricot.dataWrapStruct(x);
|
1291
2047
|
|
1292
|
-
|
1293
|
-
|
1294
|
-
return true;
|
1295
|
-
}
|
2048
|
+
x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
|
2049
|
+
x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
|
1296
2050
|
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
2051
|
+
x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
|
2052
|
+
|
2053
|
+
x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
|
2054
|
+
x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
|
2055
|
+
x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
|
2056
|
+
|
2057
|
+
x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
|
2058
|
+
|
2059
|
+
x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
|
2060
|
+
x.cCData.defineAnnotatedMethods(CData.class);
|
2061
|
+
|
2062
|
+
x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
|
2063
|
+
x.cComment.defineAnnotatedMethods(Comment.class);
|
2064
|
+
|
2065
|
+
x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
|
2066
|
+
x.cDocType.defineAnnotatedMethods(DocType.class);
|
2067
|
+
|
2068
|
+
x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
|
2069
|
+
x.cElem.defineAnnotatedMethods(Elem.class);
|
2070
|
+
|
2071
|
+
x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
|
2072
|
+
x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
|
2073
|
+
|
2074
|
+
x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
|
2075
|
+
x.cText.defineAnnotatedMethods(Text.class);
|
2076
|
+
|
2077
|
+
x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
|
2078
|
+
x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
|
2079
|
+
|
2080
|
+
x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
|
2081
|
+
x.cProcIns.defineAnnotatedMethods(ProcIns.class);
|
2082
|
+
|
2083
|
+
x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
|
2084
|
+
x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
|
2085
|
+
}
|
1305
2086
|
}
|