hpricot 0.8.1-x86-mswin32 → 0.8.2-x86-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/README +6 -15
- data/Rakefile +40 -28
- data/ext/fast_xs/FastXsService.java +13 -1
- data/ext/hpricot_scan/HpricotCss.java +831 -0
- data/ext/hpricot_scan/HpricotScanService.java +1168 -387
- data/ext/hpricot_scan/hpricot_css.c +101 -100
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_scan.c +287 -128
- data/ext/hpricot_scan/hpricot_scan.java.rl +1078 -299
- data/ext/hpricot_scan/hpricot_scan.rl +2 -0
- data/lib/fast_xs.so +0 -0
- data/lib/hpricot/tag.rb +31 -12
- data/lib/hpricot/traverse.rb +1 -0
- data/lib/hpricot_scan.so +0 -0
- data/test/test_alter.rb +21 -2
- data/test/test_parser.rb +8 -0
- data/test/test_preserved.rb +18 -0
- metadata +30 -29
- data/ext/hpricot_scan/test.rb +0 -4
@@ -2,372 +2,1151 @@
|
|
2
2
|
import java.io.IOException;
|
3
3
|
|
4
4
|
import org.jruby.Ruby;
|
5
|
+
import org.jruby.RubyArray;
|
5
6
|
import org.jruby.RubyClass;
|
6
7
|
import org.jruby.RubyHash;
|
7
8
|
import org.jruby.RubyModule;
|
8
9
|
import org.jruby.RubyNumeric;
|
10
|
+
import org.jruby.RubyObject;
|
9
11
|
import org.jruby.RubyObjectAdapter;
|
12
|
+
import org.jruby.RubyRegexp;
|
10
13
|
import org.jruby.RubyString;
|
14
|
+
import org.jruby.anno.JRubyMethod;
|
15
|
+
import org.jruby.exceptions.RaiseException;
|
11
16
|
import org.jruby.javasupport.JavaEmbedUtils;
|
17
|
+
import org.jruby.runtime.Arity;
|
12
18
|
import org.jruby.runtime.Block;
|
13
|
-
import org.jruby.runtime.
|
19
|
+
import org.jruby.runtime.ObjectAllocator;
|
20
|
+
import org.jruby.runtime.ThreadContext;
|
14
21
|
import org.jruby.runtime.builtin.IRubyObject;
|
22
|
+
import org.jruby.runtime.callback.Callback;
|
15
23
|
import org.jruby.exceptions.RaiseException;
|
16
24
|
import org.jruby.runtime.load.BasicLibraryService;
|
25
|
+
import org.jruby.util.ByteList;
|
17
26
|
|
18
27
|
public class HpricotScanService implements BasicLibraryService {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
28
|
+
public static byte[] realloc(byte[] input, int size) {
|
29
|
+
byte[] newArray = new byte[size];
|
30
|
+
System.arraycopy(input, 0, newArray, 0, input.length);
|
31
|
+
return newArray;
|
32
|
+
}
|
33
|
+
|
34
|
+
// hpricot_state
|
35
|
+
public static class State {
|
36
|
+
public IRubyObject doc;
|
37
|
+
public IRubyObject focus;
|
38
|
+
public IRubyObject last;
|
39
|
+
public IRubyObject EC;
|
40
|
+
public boolean xml, strict, fixup;
|
41
|
+
}
|
42
|
+
|
43
|
+
static boolean OPT(IRubyObject opts, String key) {
|
44
|
+
Ruby runtime = opts.getRuntime();
|
45
|
+
return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
|
46
|
+
}
|
47
|
+
|
48
|
+
// H_PROP(name, H_ELE_TAG)
|
49
|
+
public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
|
50
|
+
H_ELE_SET(self, H_ELE_TAG, x);
|
51
|
+
return self;
|
52
|
+
}
|
53
|
+
|
54
|
+
public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
|
55
|
+
H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
|
56
|
+
return self.getRuntime().getTrue();
|
57
|
+
}
|
58
|
+
|
59
|
+
public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
|
60
|
+
return H_ELE_GET(self, H_ELE_TAG);
|
61
|
+
}
|
62
|
+
|
63
|
+
// H_PROP(raw, H_ELE_RAW)
|
64
|
+
public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
|
65
|
+
H_ELE_SET(self, H_ELE_RAW, x);
|
66
|
+
return self;
|
67
|
+
}
|
68
|
+
|
69
|
+
public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
|
70
|
+
H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
|
71
|
+
return self.getRuntime().getTrue();
|
72
|
+
}
|
73
|
+
|
74
|
+
public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
|
75
|
+
return H_ELE_GET(self, H_ELE_RAW);
|
76
|
+
}
|
77
|
+
|
78
|
+
// H_PROP(parent, H_ELE_PARENT)
|
79
|
+
public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
|
80
|
+
H_ELE_SET(self, H_ELE_PARENT, x);
|
81
|
+
return self;
|
82
|
+
}
|
83
|
+
|
84
|
+
public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
|
85
|
+
H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
|
86
|
+
return self.getRuntime().getTrue();
|
87
|
+
}
|
88
|
+
|
89
|
+
public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
|
90
|
+
return H_ELE_GET(self, H_ELE_PARENT);
|
91
|
+
}
|
92
|
+
|
93
|
+
// H_PROP(attr, H_ELE_ATTR)
|
94
|
+
public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
|
95
|
+
H_ELE_SET(self, H_ELE_ATTR, x);
|
96
|
+
return self;
|
97
|
+
}
|
98
|
+
|
99
|
+
public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
|
100
|
+
H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
|
101
|
+
return self.getRuntime().getTrue();
|
102
|
+
}
|
103
|
+
|
104
|
+
public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
|
105
|
+
return H_ELE_GET(self, H_ELE_ATTR);
|
106
|
+
}
|
107
|
+
|
108
|
+
// H_PROP(etag, H_ELE_ETAG)
|
109
|
+
public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
|
110
|
+
H_ELE_SET(self, H_ELE_ETAG, x);
|
111
|
+
return self;
|
112
|
+
}
|
113
|
+
|
114
|
+
public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
|
115
|
+
H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
|
116
|
+
return self.getRuntime().getTrue();
|
117
|
+
}
|
118
|
+
|
119
|
+
public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
|
120
|
+
return H_ELE_GET(self, H_ELE_ETAG);
|
121
|
+
}
|
122
|
+
|
123
|
+
// H_PROP(children, H_ELE_CHILDREN)
|
124
|
+
public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
|
125
|
+
H_ELE_SET(self, H_ELE_CHILDREN, x);
|
126
|
+
return self;
|
127
|
+
}
|
128
|
+
|
129
|
+
public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
|
130
|
+
H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
|
131
|
+
return self.getRuntime().getTrue();
|
132
|
+
}
|
133
|
+
|
134
|
+
public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
|
135
|
+
return H_ELE_GET(self, H_ELE_CHILDREN);
|
136
|
+
}
|
137
|
+
|
138
|
+
// H_ATTR(target)
|
139
|
+
public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
|
140
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
|
141
|
+
return self;
|
142
|
+
}
|
143
|
+
|
144
|
+
public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
|
145
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
|
146
|
+
}
|
147
|
+
|
148
|
+
// H_ATTR(encoding)
|
149
|
+
public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
|
150
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
|
151
|
+
return self;
|
152
|
+
}
|
153
|
+
|
154
|
+
public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
|
155
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
|
156
|
+
}
|
157
|
+
|
158
|
+
// H_ATTR(version)
|
159
|
+
public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
|
160
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
|
161
|
+
return self;
|
162
|
+
}
|
163
|
+
|
164
|
+
public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
|
165
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
|
166
|
+
}
|
167
|
+
|
168
|
+
// H_ATTR(standalone)
|
169
|
+
public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
|
170
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
|
171
|
+
return self;
|
172
|
+
}
|
173
|
+
|
174
|
+
public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
|
175
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
|
176
|
+
}
|
177
|
+
|
178
|
+
// H_ATTR(system_id)
|
179
|
+
public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
|
180
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
|
181
|
+
return self;
|
182
|
+
}
|
183
|
+
|
184
|
+
public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
|
185
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
|
186
|
+
}
|
187
|
+
|
188
|
+
// H_ATTR(public_id)
|
189
|
+
public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
|
190
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
|
191
|
+
return self;
|
192
|
+
}
|
193
|
+
|
194
|
+
public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
|
195
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
|
196
|
+
}
|
197
|
+
|
198
|
+
public static class Scanner {
|
199
|
+
public IRubyObject SET(int mark, int E, IRubyObject org) {
|
200
|
+
if(mark == -1 || E == mark) {
|
201
|
+
return runtime.newString("");
|
202
|
+
} else if(E > mark) {
|
203
|
+
return RubyString.newString(runtime, data, mark, E-mark);
|
204
|
+
} else {
|
205
|
+
return org;
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
public int SLIDE(int N) {
|
210
|
+
if(N > ts) {
|
211
|
+
return N - ts;
|
212
|
+
} else {
|
213
|
+
return N;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
public IRubyObject CAT(IRubyObject N, int mark, int E) {
|
218
|
+
if(N.isNil()) {
|
219
|
+
return SET(mark, E, N);
|
220
|
+
} else {
|
221
|
+
((RubyString)N).cat(data, mark, E-mark);
|
222
|
+
return N;
|
223
|
+
}
|
224
|
+
}
|
225
|
+
|
226
|
+
public void ATTR(IRubyObject K, IRubyObject V) {
|
227
|
+
if(!K.isNil()) {
|
228
|
+
if(attr.isNil()) {
|
229
|
+
attr = RubyHash.newHash(runtime);
|
230
|
+
}
|
231
|
+
((RubyHash)attr).fastASet(K, V);
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
public void TEXT_PASS() {
|
236
|
+
if(!text) {
|
237
|
+
if(ele_open) {
|
238
|
+
ele_open = false;
|
239
|
+
if(ts != -1) {
|
240
|
+
mark_tag = ts;
|
241
|
+
}
|
242
|
+
} else {
|
243
|
+
mark_tag = p;
|
244
|
+
}
|
245
|
+
attr = runtime.getNil();
|
246
|
+
tag = runtime.getNil();
|
247
|
+
text = true;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
public void ELE(IRubyObject N) {
|
252
|
+
if(te > ts || text) {
|
253
|
+
int raw = -1;
|
254
|
+
int rawlen = 0;
|
255
|
+
ele_open = false;
|
256
|
+
text = false;
|
257
|
+
|
258
|
+
if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
|
259
|
+
raw = ts;
|
260
|
+
rawlen = te - ts;
|
261
|
+
}
|
262
|
+
|
263
|
+
if(block.isGiven()) {
|
264
|
+
IRubyObject raw_string = runtime.getNil();
|
265
|
+
if(raw != -1) {
|
266
|
+
raw_string = RubyString.newString(runtime, data, raw, rawlen);
|
267
|
+
}
|
268
|
+
yieldTokens(N, tag, attr, runtime.getNil(), taint);
|
269
|
+
} else {
|
270
|
+
hpricotToken(S, N, tag, attr, raw, rawlen, taint);
|
271
|
+
}
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
|
276
|
+
public void EBLK(IRubyObject N, int T) {
|
277
|
+
tag = CAT(tag, mark_tag, p - T + 1);
|
278
|
+
ELE(N);
|
279
|
+
}
|
280
|
+
|
281
|
+
public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
|
282
|
+
IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
|
283
|
+
if(children.isNil()) {
|
284
|
+
H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
|
285
|
+
}
|
286
|
+
((RubyArray)children).append(ele);
|
287
|
+
H_ELE_SET(ele, H_ELE_PARENT, focus);
|
288
|
+
}
|
289
|
+
|
290
|
+
private static class TokenInfo {
|
291
|
+
public IRubyObject sym;
|
292
|
+
public IRubyObject tag;
|
293
|
+
public IRubyObject attr;
|
294
|
+
public int raw;
|
295
|
+
public int rawlen;
|
296
|
+
public IRubyObject ec;
|
297
|
+
public IRubyObject ele;
|
298
|
+
public Extra x;
|
299
|
+
public Ruby runtime;
|
300
|
+
public Scanner scanner;
|
301
|
+
public State S;
|
302
|
+
|
303
|
+
public void H_ELE(RubyClass klass) {
|
304
|
+
ele = klass.allocate();
|
305
|
+
if(klass == x.cElem) {
|
306
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
307
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
308
|
+
H_ELE_SET(ele, H_ELE_EC, ec);
|
309
|
+
if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
|
310
|
+
H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
311
|
+
}
|
312
|
+
} else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
|
313
|
+
if(klass == x.cBogusETag) {
|
314
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
315
|
+
if(raw != -1) {
|
316
|
+
H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
317
|
+
}
|
318
|
+
} else {
|
319
|
+
if(klass == x.cDocType) {
|
320
|
+
scanner.ATTR(runtime.newSymbol("target"), tag);
|
321
|
+
}
|
322
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
323
|
+
if(klass != x.cProcIns) {
|
324
|
+
tag = runtime.getNil();
|
325
|
+
if(raw != -1) {
|
326
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
327
|
+
}
|
328
|
+
}
|
329
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
330
|
+
}
|
331
|
+
} else {
|
332
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
333
|
+
}
|
334
|
+
S.last = ele;
|
335
|
+
}
|
336
|
+
|
337
|
+
public void hpricotToken(boolean taint) {
|
338
|
+
//
|
339
|
+
// in html mode, fix up start tags incorrectly formed as empty tags
|
340
|
+
//
|
341
|
+
if(!S.xml) {
|
342
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
|
343
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
344
|
+
if(ec.isNil()) {
|
345
|
+
tag = tag.callMethod(scanner.ctx, "downcase");
|
346
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
|
351
|
+
(sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
|
352
|
+
!(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
|
353
|
+
sym = x.sym_text;
|
354
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
355
|
+
}
|
356
|
+
|
357
|
+
if(!ec.isNil()) {
|
358
|
+
if(sym == x.sym_emptytag) {
|
359
|
+
if(ec != x.sym_EMPTY) {
|
360
|
+
sym = x.sym_stag;
|
361
|
+
}
|
362
|
+
} else if(sym == x.sym_stag) {
|
363
|
+
if(ec == x.sym_EMPTY) {
|
364
|
+
sym = x.sym_emptytag;
|
365
|
+
}
|
366
|
+
}
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
370
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag) {
|
371
|
+
IRubyObject name = runtime.newFixnum(tag.hashCode());
|
372
|
+
H_ELE(x.cElem);
|
373
|
+
H_ELE_SET(ele, H_ELE_HASH, name);
|
374
|
+
|
375
|
+
if(!S.xml) {
|
376
|
+
IRubyObject match = runtime.getNil(), e = S.focus;
|
377
|
+
while(e != S.doc) {
|
378
|
+
IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
|
379
|
+
if(hEC instanceof RubyHash) {
|
380
|
+
IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
|
381
|
+
if(!has.isNil()) {
|
382
|
+
if(has == runtime.getTrue()) {
|
383
|
+
if(match.isNil()) {
|
384
|
+
match = e;
|
385
|
+
}
|
386
|
+
} else if(has == x.symAllow) {
|
387
|
+
match = S.focus;
|
388
|
+
} else if(has == x.symDeny) {
|
389
|
+
match = runtime.getNil();
|
390
|
+
}
|
391
|
+
}
|
392
|
+
}
|
393
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
394
|
+
}
|
395
|
+
|
396
|
+
if(match.isNil()) {
|
397
|
+
match = S.focus;
|
398
|
+
}
|
399
|
+
S.focus = match;
|
400
|
+
}
|
401
|
+
|
402
|
+
scanner.hpricotAdd(S.focus, ele);
|
403
|
+
|
404
|
+
//
|
405
|
+
// in the case of a start tag that should be empty, just
|
406
|
+
// skip the step that focuses the element. focusing moves
|
407
|
+
// us deeper into the document.
|
408
|
+
//
|
409
|
+
if(sym == x.sym_stag) {
|
410
|
+
if(S.xml || ec != x.sym_EMPTY) {
|
411
|
+
S.focus = ele;
|
412
|
+
S.last = runtime.getNil();
|
413
|
+
}
|
414
|
+
}
|
415
|
+
} else if(sym == x.sym_etag) {
|
416
|
+
IRubyObject name, match = runtime.getNil(), e = S.focus;
|
417
|
+
if(S.strict) {
|
418
|
+
if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
|
419
|
+
tag = runtime.newString("div");
|
420
|
+
}
|
421
|
+
}
|
422
|
+
|
423
|
+
name = runtime.newFixnum(tag.hashCode());
|
424
|
+
while(e != S.doc) {
|
425
|
+
if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
|
426
|
+
match = e;
|
427
|
+
break;
|
428
|
+
}
|
429
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
430
|
+
|
431
|
+
}
|
432
|
+
if(match.isNil()) {
|
433
|
+
H_ELE(x.cBogusETag);
|
434
|
+
scanner.hpricotAdd(S.focus, ele);
|
435
|
+
} else {
|
436
|
+
ele = runtime.getNil();
|
437
|
+
if(raw != -1) {
|
438
|
+
ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
439
|
+
}
|
440
|
+
H_ELE_SET(match, H_ELE_ETAG, ele);
|
441
|
+
S.focus = H_ELE_GET(match, H_ELE_PARENT);
|
442
|
+
S.last = runtime.getNil();
|
443
|
+
|
444
|
+
}
|
445
|
+
} else if(sym == x.sym_cdata) {
|
446
|
+
H_ELE(x.cCData);
|
447
|
+
scanner.hpricotAdd(S.focus, ele);
|
448
|
+
} else if(sym == x.sym_comment) {
|
449
|
+
H_ELE(x.cComment);
|
450
|
+
scanner.hpricotAdd(S.focus, ele);
|
451
|
+
} else if(sym == x.sym_doctype) {
|
452
|
+
H_ELE(x.cDocType);
|
453
|
+
if(S.strict) {
|
454
|
+
RubyHash h = (RubyHash)attr;
|
455
|
+
h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
|
456
|
+
h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
|
457
|
+
}
|
458
|
+
scanner.hpricotAdd(S.focus, ele);
|
459
|
+
} else if(sym == x.sym_procins) {
|
460
|
+
IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
|
461
|
+
tag = RubyRegexp.nth_match(1, match);
|
462
|
+
attr = RubyRegexp.nth_match(2, match);
|
463
|
+
H_ELE(x.cProcIns);
|
464
|
+
scanner.hpricotAdd(S.focus, ele);
|
465
|
+
} else if(sym == x.sym_text) {
|
466
|
+
if(!S.last.isNil() && S.last.getType() == x.cText) {
|
467
|
+
((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
|
468
|
+
} else {
|
469
|
+
H_ELE(x.cText);
|
470
|
+
scanner.hpricotAdd(S.focus, ele);
|
471
|
+
}
|
472
|
+
} else if(sym == x.sym_xmldecl) {
|
473
|
+
H_ELE(x.cXMLDecl);
|
474
|
+
scanner.hpricotAdd(S.focus, ele);
|
475
|
+
}
|
476
|
+
}
|
477
|
+
}
|
478
|
+
|
479
|
+
public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
|
480
|
+
TokenInfo t = new TokenInfo();
|
481
|
+
t.sym = _sym;
|
482
|
+
t.tag = _tag;
|
483
|
+
t.attr = _attr;
|
484
|
+
t.raw = _raw;
|
485
|
+
t.rawlen = _rawlen;
|
486
|
+
t.ec = runtime.getNil();
|
487
|
+
t.ele = runtime.getNil();
|
488
|
+
t.x = x;
|
489
|
+
t.runtime = runtime;
|
490
|
+
t.scanner = this;
|
491
|
+
t.S = S;
|
492
|
+
|
493
|
+
t.hpricotToken(taint);
|
494
|
+
}
|
495
|
+
|
496
|
+
public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
497
|
+
if(sym == x.sym_text) {
|
498
|
+
raw = tag;
|
499
|
+
}
|
500
|
+
IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
|
501
|
+
if(taint) {
|
502
|
+
ary.setTaint(true);
|
503
|
+
tag.setTaint(true);
|
504
|
+
attr.setTaint(true);
|
505
|
+
raw.setTaint(true);
|
506
|
+
}
|
507
|
+
|
508
|
+
block.yield(ctx, ary);
|
509
|
+
}
|
143
510
|
|
144
511
|
%%{
|
145
512
|
machine hpricot_scan;
|
146
513
|
|
147
514
|
action newEle {
|
148
|
-
if
|
149
|
-
|
150
|
-
|
151
|
-
|
515
|
+
if(text) {
|
516
|
+
tag = CAT(tag, mark_tag, p);
|
517
|
+
ELE(x.sym_text);
|
518
|
+
text = false;
|
152
519
|
}
|
153
520
|
attr = runtime.getNil();
|
154
|
-
tag
|
521
|
+
tag = runtime.getNil();
|
155
522
|
mark_tag = -1;
|
156
523
|
ele_open = true;
|
157
524
|
}
|
158
525
|
|
159
|
-
action _tag
|
526
|
+
action _tag { mark_tag = p; }
|
160
527
|
action _aval { mark_aval = p; }
|
161
528
|
action _akey { mark_akey = p; }
|
162
|
-
action tag
|
163
|
-
action tagc
|
164
|
-
action aval
|
165
|
-
action aunq {
|
166
|
-
|
167
|
-
|
529
|
+
action tag { tag = SET(mark_tag, p, tag); }
|
530
|
+
action tagc { tag = SET(mark_tag, p-1, tag); }
|
531
|
+
action aval { aval = SET(mark_aval, p, aval); }
|
532
|
+
action aunq {
|
533
|
+
if(data[p-1] == '"' || data[p-1] == '\'') {
|
534
|
+
aval = SET(mark_aval, p-1, aval);
|
535
|
+
} else {
|
536
|
+
aval = SET(mark_aval, p, aval);
|
537
|
+
}
|
168
538
|
}
|
169
|
-
action akey { SET(
|
170
|
-
action xmlver { SET(
|
171
|
-
action xmlenc { SET(
|
172
|
-
action xmlsd { SET(
|
173
|
-
action pubid { SET(
|
174
|
-
action sysid { SET(
|
175
|
-
|
176
|
-
action new_attr {
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
539
|
+
action akey { akey = SET(mark_akey, p, akey); }
|
540
|
+
action xmlver { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
|
541
|
+
action xmlenc { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
|
542
|
+
action xmlsd { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
|
543
|
+
action pubid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
|
544
|
+
action sysid { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
|
545
|
+
|
546
|
+
action new_attr {
|
547
|
+
akey = runtime.getNil();
|
548
|
+
aval = runtime.getNil();
|
549
|
+
mark_akey = -1;
|
550
|
+
mark_aval = -1;
|
181
551
|
}
|
182
552
|
|
183
|
-
action save_attr {
|
184
|
-
|
553
|
+
action save_attr {
|
554
|
+
if(!S.xml) {
|
555
|
+
akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
|
556
|
+
}
|
557
|
+
ATTR(akey, aval);
|
185
558
|
}
|
186
559
|
|
187
560
|
include hpricot_common "hpricot_common.rl";
|
188
|
-
|
189
561
|
}%%
|
190
562
|
|
191
563
|
%% write data nofinal;
|
192
564
|
|
193
|
-
public final static int BUFSIZE=16384;
|
565
|
+
public final static int BUFSIZE = 16384;
|
194
566
|
|
195
|
-
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
196
|
-
IRubyObject ary;
|
197
|
-
if (sym == runtime.newSymbol("text")) {
|
198
|
-
raw = tag;
|
199
|
-
}
|
200
|
-
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
|
201
|
-
if (taint) {
|
202
|
-
ary.setTaint(true);
|
203
|
-
tag.setTaint(true);
|
204
|
-
attr.setTaint(true);
|
205
|
-
raw.setTaint(true);
|
206
|
-
}
|
207
|
-
block.yield(runtime.getCurrentContext(), ary, null, null, false);
|
208
|
-
}
|
209
567
|
|
568
|
+
private int cs, act, have = 0, nread = 0, curline = 1;
|
569
|
+
private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
|
570
|
+
private byte[] data;
|
571
|
+
private State S = null;
|
572
|
+
private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
|
573
|
+
private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
|
574
|
+
private boolean done = false, ele_open = false, taint = false, io = false, text = false;
|
575
|
+
private int buffer_size = 0;
|
210
576
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
IRubyObject
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
Block block = null;
|
577
|
+
private Extra x;
|
578
|
+
|
579
|
+
private IRubyObject self;
|
580
|
+
private Ruby runtime;
|
581
|
+
private ThreadContext ctx;
|
582
|
+
private Block block;
|
583
|
+
|
584
|
+
private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
|
585
|
+
|
586
|
+
private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
|
587
|
+
return new RaiseException(runtime, exceptionClass, message, true);
|
588
|
+
}
|
224
589
|
|
590
|
+
public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
|
591
|
+
this.self = self;
|
592
|
+
this.runtime = self.getRuntime();
|
593
|
+
this.ctx = runtime.getCurrentContext();
|
594
|
+
this.block = block;
|
595
|
+
attr = runtime.getNil();
|
596
|
+
tag = runtime.getNil();
|
597
|
+
akey = runtime.getNil();
|
598
|
+
aval = runtime.getNil();
|
599
|
+
bufsize = runtime.getNil();
|
225
600
|
|
226
|
-
|
227
|
-
cdata, sym_text;
|
601
|
+
this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
|
228
602
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
603
|
+
this.xmldecl = x.sym_xmldecl;
|
604
|
+
this.doctype = x.sym_doctype;
|
605
|
+
this.stag = x.sym_stag;
|
606
|
+
this.etag = x.sym_etag;
|
607
|
+
this.emptytag = x.sym_emptytag;
|
608
|
+
this.comment = x.sym_comment;
|
609
|
+
this.cdata = x.sym_cdata;
|
610
|
+
this.procins = x.sym_procins;
|
611
|
+
|
612
|
+
port = args[0];
|
613
|
+
if(args.length == 2) {
|
614
|
+
opts = args[1];
|
615
|
+
} else {
|
616
|
+
opts = runtime.getNil();
|
617
|
+
}
|
618
|
+
|
619
|
+
taint = port.isTaint();
|
620
|
+
io = port.respondsTo("read");
|
621
|
+
if(!io) {
|
622
|
+
if(port.respondsTo("to_str")) {
|
623
|
+
port = port.callMethod(ctx, "to_str");
|
624
|
+
port = port.convertToString();
|
625
|
+
} else {
|
626
|
+
throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
|
627
|
+
}
|
628
|
+
}
|
629
|
+
|
630
|
+
if(!(opts instanceof RubyHash)) {
|
631
|
+
opts = runtime.getNil();
|
632
|
+
}
|
633
|
+
|
634
|
+
if(!block.isGiven()) {
|
635
|
+
S = new State();
|
636
|
+
S.doc = x.cDoc.allocate();
|
637
|
+
S.focus = S.doc;
|
638
|
+
S.last = runtime.getNil();
|
639
|
+
S.xml = OPT(opts, "xml");
|
640
|
+
S.strict = OPT(opts, "xhtml_strict");
|
641
|
+
S.fixup = OPT(opts, "fixup_tags");
|
642
|
+
if(S.strict) {
|
643
|
+
S.fixup = true;
|
644
|
+
}
|
645
|
+
S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
|
646
|
+
S.EC = x.mHpricot.getConstant("ElementContent");
|
647
|
+
}
|
648
|
+
|
649
|
+
buffer_size = BUFSIZE;
|
650
|
+
if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
|
651
|
+
bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
|
652
|
+
if(!bufsize.isNil()) {
|
653
|
+
buffer_size = RubyNumeric.fix2int(bufsize);
|
654
|
+
}
|
655
|
+
}
|
656
|
+
|
657
|
+
if(io) {
|
658
|
+
buf = 0;
|
659
|
+
data = new byte[buffer_size];
|
660
|
+
}
|
661
|
+
}
|
662
|
+
|
663
|
+
private int len, space;
|
664
|
+
// hpricot_scan
|
665
|
+
public IRubyObject scan() {
|
666
|
+
%% write init;
|
667
|
+
while(!done) {
|
668
|
+
p = pe = len = buf;
|
669
|
+
space = buffer_size - have;
|
670
|
+
|
671
|
+
if(io) {
|
672
|
+
if(space == 0) {
|
673
|
+
/* We've used up the entire buffer storing an already-parsed token
|
674
|
+
* prefix that must be preserved. Likely caused by super-long attributes.
|
675
|
+
* Increase buffer size and continue */
|
676
|
+
buffer_size += BUFSIZE;
|
677
|
+
data = realloc(data, buffer_size);
|
678
|
+
space = buffer_size - have;
|
679
|
+
}
|
234
680
|
|
235
|
-
|
681
|
+
p = have;
|
682
|
+
IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
|
683
|
+
ByteList bl = str.convertToString().getByteList();
|
684
|
+
len = bl.realSize;
|
685
|
+
System.arraycopy(bl.bytes, bl.begin, data, p, len);
|
686
|
+
} else {
|
687
|
+
ByteList bl = port.convertToString().getByteList();
|
688
|
+
data = bl.bytes;
|
689
|
+
buf = bl.begin;
|
690
|
+
p = bl.begin;
|
691
|
+
len = bl.realSize + 1;
|
692
|
+
if(p + len >= data.length) {
|
693
|
+
data = new byte[len];
|
694
|
+
System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
|
695
|
+
p = 0;
|
696
|
+
buf = 0;
|
697
|
+
}
|
698
|
+
done = true;
|
699
|
+
eof = p + len;
|
700
|
+
}
|
236
701
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
702
|
+
nread += len;
|
703
|
+
|
704
|
+
/* If this is the last buffer, tack on an EOF. */
|
705
|
+
if(io && len < space) {
|
706
|
+
data[p + len++] = 0;
|
707
|
+
eof = p + len;
|
708
|
+
done = true;
|
709
|
+
}
|
710
|
+
|
711
|
+
pe = p + len;
|
712
|
+
|
713
|
+
%% write exec;
|
714
|
+
|
715
|
+
if(cs == hpricot_scan_error) {
|
716
|
+
if(!tag.isNil()) {
|
717
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
718
|
+
} else {
|
719
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
720
|
+
}
|
721
|
+
}
|
722
|
+
|
723
|
+
if(done && ele_open) {
|
724
|
+
ele_open = false;
|
725
|
+
if(ts > 0) {
|
726
|
+
mark_tag = ts;
|
727
|
+
ts = 0;
|
728
|
+
text = true;
|
729
|
+
}
|
730
|
+
}
|
731
|
+
|
732
|
+
if(ts == -1) {
|
733
|
+
have = 0;
|
734
|
+
if(mark_tag != -1 && text) {
|
735
|
+
if(done) {
|
736
|
+
if(mark_tag < p - 1) {
|
737
|
+
tag = CAT(tag, mark_tag, p-1);
|
738
|
+
ELE(x.sym_text);
|
739
|
+
}
|
740
|
+
} else {
|
741
|
+
tag = CAT(tag, mark_tag, p);
|
742
|
+
}
|
743
|
+
}
|
744
|
+
if(io) {
|
745
|
+
mark_tag = 0;
|
746
|
+
} else {
|
747
|
+
mark_tag = ((RubyString)port).getByteList().begin;
|
748
|
+
}
|
749
|
+
} else if(io) {
|
750
|
+
have = pe - ts;
|
751
|
+
System.arraycopy(data, ts, data, buf, have);
|
752
|
+
mark_tag = SLIDE(mark_tag);
|
753
|
+
mark_akey = SLIDE(mark_akey);
|
754
|
+
mark_aval = SLIDE(mark_aval);
|
755
|
+
te -= ts;
|
756
|
+
ts = 0;
|
757
|
+
}
|
758
|
+
}
|
759
|
+
|
760
|
+
if(S != null) {
|
761
|
+
return S.doc;
|
762
|
+
}
|
763
|
+
|
764
|
+
return runtime.getNil();
|
765
|
+
}
|
243
766
|
}
|
244
|
-
}
|
245
767
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
768
|
+
public static class HpricotModule {
|
769
|
+
// hpricot_scan
|
770
|
+
@JRubyMethod(module = true, optional = 1, required = 1, frame = true)
|
771
|
+
public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
|
772
|
+
return new Scanner(self, args, block).scan();
|
773
|
+
}
|
774
|
+
|
775
|
+
// hpricot_css
|
776
|
+
@JRubyMethod(module = true)
|
777
|
+
public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
|
778
|
+
return new HpricotCss(self, mod, str, node).scan();
|
779
|
+
}
|
251
780
|
}
|
252
|
-
}
|
253
|
-
buf = new char[buffer_size];
|
254
781
|
|
255
|
-
|
782
|
+
public static class CData {
|
783
|
+
@JRubyMethod
|
784
|
+
public static IRubyObject content(IRubyObject self) {
|
785
|
+
return hpricot_ele_get_name(self);
|
786
|
+
}
|
256
787
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
788
|
+
@JRubyMethod(name = "content=")
|
789
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
790
|
+
return hpricot_ele_set_name(self, value);
|
791
|
+
}
|
792
|
+
}
|
793
|
+
|
794
|
+
public static class Comment {
|
795
|
+
@JRubyMethod
|
796
|
+
public static IRubyObject content(IRubyObject self) {
|
797
|
+
return hpricot_ele_get_name(self);
|
798
|
+
}
|
262
799
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
buffer_size += BUFSIZE;
|
268
|
-
char[] new_buf = new char[buffer_size];
|
269
|
-
System.arraycopy(buf, 0, new_buf, 0, buf.length);
|
270
|
-
buf = new_buf;
|
271
|
-
space = buffer_size - have;
|
800
|
+
@JRubyMethod(name = "content=")
|
801
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
802
|
+
return hpricot_ele_set_name(self, value);
|
803
|
+
}
|
272
804
|
}
|
273
805
|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
806
|
+
public static class DocType {
|
807
|
+
@JRubyMethod
|
808
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
809
|
+
return hpricot_ele_get_name(self);
|
810
|
+
}
|
811
|
+
|
812
|
+
@JRubyMethod
|
813
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
814
|
+
return hpricot_ele_clear_name(self);
|
815
|
+
}
|
816
|
+
|
817
|
+
@JRubyMethod
|
818
|
+
public static IRubyObject target(IRubyObject self) {
|
819
|
+
return hpricot_ele_get_target(self);
|
820
|
+
}
|
821
|
+
|
822
|
+
@JRubyMethod(name = "target=")
|
823
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
824
|
+
return hpricot_ele_set_target(self, value);
|
825
|
+
}
|
826
|
+
|
827
|
+
@JRubyMethod
|
828
|
+
public static IRubyObject public_id(IRubyObject self) {
|
829
|
+
return hpricot_ele_get_public_id(self);
|
830
|
+
}
|
831
|
+
|
832
|
+
@JRubyMethod(name = "public_id=")
|
833
|
+
public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
|
834
|
+
return hpricot_ele_set_public_id(self, value);
|
835
|
+
}
|
836
|
+
|
837
|
+
@JRubyMethod
|
838
|
+
public static IRubyObject system_id(IRubyObject self) {
|
839
|
+
return hpricot_ele_get_system_id(self);
|
840
|
+
}
|
841
|
+
|
842
|
+
@JRubyMethod(name = "system_id=")
|
843
|
+
public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
|
844
|
+
return hpricot_ele_set_system_id(self, value);
|
845
|
+
}
|
278
846
|
}
|
279
847
|
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
848
|
+
public static class Elem {
|
849
|
+
@JRubyMethod
|
850
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
851
|
+
return hpricot_ele_clear_raw(self);
|
852
|
+
}
|
853
|
+
}
|
284
854
|
|
285
|
-
|
286
|
-
|
855
|
+
public static class BogusETag {
|
856
|
+
@JRubyMethod
|
857
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
858
|
+
return hpricot_ele_get_attr(self);
|
859
|
+
}
|
287
860
|
|
288
|
-
|
289
|
-
|
290
|
-
|
861
|
+
@JRubyMethod
|
862
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
863
|
+
return hpricot_ele_clear_attr(self);
|
864
|
+
}
|
291
865
|
}
|
292
866
|
|
293
|
-
|
294
|
-
|
867
|
+
public static class Text {
|
868
|
+
@JRubyMethod
|
869
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
870
|
+
return hpricot_ele_get_name(self);
|
871
|
+
}
|
295
872
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
873
|
+
@JRubyMethod
|
874
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
875
|
+
return hpricot_ele_clear_name(self);
|
876
|
+
}
|
877
|
+
|
878
|
+
@JRubyMethod
|
879
|
+
public static IRubyObject content(IRubyObject self) {
|
880
|
+
return hpricot_ele_get_name(self);
|
881
|
+
}
|
882
|
+
|
883
|
+
@JRubyMethod(name = "content=")
|
884
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
885
|
+
return hpricot_ele_set_name(self, value);
|
886
|
+
}
|
304
887
|
}
|
888
|
+
|
889
|
+
public static class XMLDecl {
|
890
|
+
@JRubyMethod
|
891
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
892
|
+
return hpricot_ele_get_name(self);
|
893
|
+
}
|
894
|
+
|
895
|
+
@JRubyMethod
|
896
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
897
|
+
return hpricot_ele_clear_name(self);
|
898
|
+
}
|
899
|
+
|
900
|
+
@JRubyMethod
|
901
|
+
public static IRubyObject encoding(IRubyObject self) {
|
902
|
+
return hpricot_ele_get_encoding(self);
|
903
|
+
}
|
904
|
+
|
905
|
+
@JRubyMethod(name = "encoding=")
|
906
|
+
public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
|
907
|
+
return hpricot_ele_set_encoding(self, value);
|
908
|
+
}
|
909
|
+
|
910
|
+
@JRubyMethod
|
911
|
+
public static IRubyObject standalone(IRubyObject self) {
|
912
|
+
return hpricot_ele_get_standalone(self);
|
913
|
+
}
|
914
|
+
|
915
|
+
@JRubyMethod(name = "standalone=")
|
916
|
+
public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
|
917
|
+
return hpricot_ele_set_standalone(self, value);
|
918
|
+
}
|
919
|
+
|
920
|
+
@JRubyMethod
|
921
|
+
public static IRubyObject version(IRubyObject self) {
|
922
|
+
return hpricot_ele_get_version(self);
|
923
|
+
}
|
924
|
+
|
925
|
+
@JRubyMethod(name = "version=")
|
926
|
+
public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
|
927
|
+
return hpricot_ele_set_version(self, value);
|
928
|
+
}
|
929
|
+
}
|
930
|
+
|
931
|
+
public static class ProcIns {
|
932
|
+
@JRubyMethod
|
933
|
+
public static IRubyObject target(IRubyObject self) {
|
934
|
+
return hpricot_ele_get_name(self);
|
935
|
+
}
|
936
|
+
|
937
|
+
@JRubyMethod(name = "target=")
|
938
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
939
|
+
return hpricot_ele_set_name(self, value);
|
940
|
+
}
|
941
|
+
|
942
|
+
@JRubyMethod
|
943
|
+
public static IRubyObject content(IRubyObject self) {
|
944
|
+
return hpricot_ele_get_attr(self);
|
945
|
+
}
|
946
|
+
|
947
|
+
@JRubyMethod(name = "content=")
|
948
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
949
|
+
return hpricot_ele_set_attr(self, value);
|
950
|
+
}
|
951
|
+
}
|
952
|
+
|
953
|
+
public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
|
954
|
+
|
955
|
+
public final static int H_ELE_TAG = 0;
|
956
|
+
public final static int H_ELE_PARENT = 1;
|
957
|
+
public final static int H_ELE_ATTR = 2;
|
958
|
+
public final static int H_ELE_ETAG = 3;
|
959
|
+
public final static int H_ELE_RAW = 4;
|
960
|
+
public final static int H_ELE_EC = 5;
|
961
|
+
public final static int H_ELE_HASH = 6;
|
962
|
+
public final static int H_ELE_CHILDREN = 7;
|
963
|
+
|
964
|
+
public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
|
965
|
+
return ((IRubyObject[])recv.dataGetStruct())[n];
|
966
|
+
}
|
967
|
+
|
968
|
+
public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
|
969
|
+
((IRubyObject[])recv.dataGetStruct())[n] = value;
|
970
|
+
return value;
|
971
|
+
}
|
972
|
+
|
973
|
+
private static class RefCallback implements Callback {
|
974
|
+
private final int n;
|
975
|
+
public RefCallback(int n) { this.n = n; }
|
976
|
+
|
977
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
978
|
+
return H_ELE_GET(recv, n);
|
979
|
+
}
|
980
|
+
|
981
|
+
public Arity getArity() {
|
982
|
+
return Arity.NO_ARGUMENTS;
|
983
|
+
}
|
984
|
+
}
|
985
|
+
|
986
|
+
private static class SetCallback implements Callback {
|
987
|
+
private final int n;
|
988
|
+
public SetCallback(int n) { this.n = n; }
|
989
|
+
|
990
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
991
|
+
return H_ELE_SET(recv, n, args[0]);
|
992
|
+
}
|
993
|
+
|
994
|
+
public Arity getArity() {
|
995
|
+
return Arity.ONE_ARGUMENT;
|
996
|
+
}
|
997
|
+
}
|
998
|
+
|
999
|
+
private final static Callback[] ref_func = new Callback[]{
|
1000
|
+
new RefCallback(0),
|
1001
|
+
new RefCallback(1),
|
1002
|
+
new RefCallback(2),
|
1003
|
+
new RefCallback(3),
|
1004
|
+
new RefCallback(4),
|
1005
|
+
new RefCallback(5),
|
1006
|
+
new RefCallback(6),
|
1007
|
+
new RefCallback(7),
|
1008
|
+
new RefCallback(8),
|
1009
|
+
new RefCallback(9)};
|
1010
|
+
|
1011
|
+
private final static Callback[] set_func = new Callback[]{
|
1012
|
+
new SetCallback(0),
|
1013
|
+
new SetCallback(1),
|
1014
|
+
new SetCallback(2),
|
1015
|
+
new SetCallback(3),
|
1016
|
+
new SetCallback(4),
|
1017
|
+
new SetCallback(5),
|
1018
|
+
new SetCallback(6),
|
1019
|
+
new SetCallback(7),
|
1020
|
+
new SetCallback(8),
|
1021
|
+
new SetCallback(9)};
|
1022
|
+
|
1023
|
+
public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
|
1024
|
+
// alloc_hpricot_struct
|
1025
|
+
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
|
1026
|
+
RubyClass kurrent = klass;
|
1027
|
+
Object sz = kurrent.fastGetInternalVariable("__size__");
|
1028
|
+
while(sz == null && kurrent != null) {
|
1029
|
+
kurrent = kurrent.getSuperClass();
|
1030
|
+
sz = kurrent.fastGetInternalVariable("__size__");
|
1031
|
+
}
|
1032
|
+
int size = RubyNumeric.fix2int((RubyObject)sz);
|
1033
|
+
RubyObject obj = new RubyObject(runtime, klass);
|
1034
|
+
IRubyObject[] all = new IRubyObject[size];
|
1035
|
+
java.util.Arrays.fill(all, runtime.getNil());
|
1036
|
+
obj.dataWrapStruct(all);
|
1037
|
+
return obj;
|
1038
|
+
}
|
1039
|
+
};
|
1040
|
+
|
1041
|
+
public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
|
1042
|
+
RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
|
1043
|
+
klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
|
1044
|
+
klass.setAllocator(alloc_hpricot_struct);
|
1045
|
+
|
1046
|
+
for(int i = 0; i < members.length; i++) {
|
1047
|
+
String id = members[i].toString();
|
1048
|
+
klass.defineMethod(id, ref_func[i]);
|
1049
|
+
klass.defineMethod(id + "=", set_func[i]);
|
1050
|
+
}
|
305
1051
|
|
306
|
-
|
307
|
-
ele_open = false;
|
308
|
-
if(ts > -1) {
|
309
|
-
mark_tag = ts;
|
310
|
-
ts = -1;
|
311
|
-
text = true;
|
312
|
-
}
|
1052
|
+
return klass;
|
313
1053
|
}
|
314
1054
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
1055
|
+
public boolean basicLoad(final Ruby runtime) throws IOException {
|
1056
|
+
Init_hpricot_scan(runtime);
|
1057
|
+
return true;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
public static class Extra {
|
1061
|
+
IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
|
1062
|
+
sym_procins, sym_stag, sym_etag, sym_emptytag,
|
1063
|
+
sym_allowed, sym_children, sym_comment,
|
1064
|
+
sym_cdata, sym_name, sym_parent,
|
1065
|
+
sym_raw_attributes, sym_raw_string, sym_tagno,
|
1066
|
+
sym_text, sym_EMPTY, sym_CDATA;
|
1067
|
+
|
1068
|
+
public RubyModule mHpricot;
|
1069
|
+
public RubyClass structElem;
|
1070
|
+
public RubyClass structAttr;
|
1071
|
+
public RubyClass structBasic;
|
1072
|
+
public RubyClass cDoc;
|
1073
|
+
public RubyClass cCData;
|
1074
|
+
public RubyClass cComment;
|
1075
|
+
public RubyClass cDocType;
|
1076
|
+
public RubyClass cElem;
|
1077
|
+
public RubyClass cBogusETag;
|
1078
|
+
public RubyClass cText;
|
1079
|
+
public RubyClass cXMLDecl;
|
1080
|
+
public RubyClass cProcIns;
|
1081
|
+
public RubyClass rb_eHpricotParseError;
|
1082
|
+
public IRubyObject reProcInsParse;
|
1083
|
+
|
1084
|
+
public Extra(Ruby runtime) {
|
1085
|
+
symAllow = runtime.newSymbol("allow");
|
1086
|
+
symDeny = runtime.newSymbol("deny");
|
1087
|
+
sym_xmldecl = runtime.newSymbol("xmldecl");
|
1088
|
+
sym_doctype = runtime.newSymbol("doctype");
|
1089
|
+
sym_procins = runtime.newSymbol("procins");
|
1090
|
+
sym_stag = runtime.newSymbol("stag");
|
1091
|
+
sym_etag = runtime.newSymbol("etag");
|
1092
|
+
sym_emptytag = runtime.newSymbol("emptytag");
|
1093
|
+
sym_allowed = runtime.newSymbol("allowed");
|
1094
|
+
sym_children = runtime.newSymbol("children");
|
1095
|
+
sym_comment = runtime.newSymbol("comment");
|
1096
|
+
sym_cdata = runtime.newSymbol("cdata");
|
1097
|
+
sym_name = runtime.newSymbol("name");
|
1098
|
+
sym_parent = runtime.newSymbol("parent");
|
1099
|
+
sym_raw_attributes = runtime.newSymbol("raw_attributes");
|
1100
|
+
sym_raw_string = runtime.newSymbol("raw_string");
|
1101
|
+
sym_tagno = runtime.newSymbol("tagno");
|
1102
|
+
sym_text = runtime.newSymbol("text");
|
1103
|
+
sym_EMPTY = runtime.newSymbol("EMPTY");
|
1104
|
+
sym_CDATA = runtime.newSymbol("CDATA");
|
326
1105
|
}
|
327
|
-
}
|
328
|
-
mark_tag = 0;
|
329
|
-
} else {
|
330
|
-
have = pe - ts;
|
331
|
-
System.arraycopy(buf,ts,buf,0,have);
|
332
|
-
SLIDE(tag);
|
333
|
-
SLIDE(akey);
|
334
|
-
SLIDE(aval);
|
335
|
-
te = (te - ts);
|
336
|
-
ts = 0;
|
337
1106
|
}
|
338
|
-
}
|
339
|
-
return runtime.getNil();
|
340
|
-
}
|
341
1107
|
|
342
|
-
public static
|
343
|
-
|
344
|
-
HpricotScanService service = new HpricotScanService();
|
345
|
-
service.runtime = runtime;
|
346
|
-
service.xmldecl = runtime.newSymbol("xmldecl");
|
347
|
-
service.doctype = runtime.newSymbol("doctype");
|
348
|
-
service.procins = runtime.newSymbol("procins");
|
349
|
-
service.stag = runtime.newSymbol("stag");
|
350
|
-
service.etag = runtime.newSymbol("etag");
|
351
|
-
service.emptytag = runtime.newSymbol("emptytag");
|
352
|
-
service.comment = runtime.newSymbol("comment");
|
353
|
-
service.cdata = runtime.newSymbol("cdata");
|
354
|
-
service.sym_text = runtime.newSymbol("text");
|
355
|
-
service.block = block;
|
356
|
-
return service.hpricot_scan(recv, port);
|
357
|
-
}
|
1108
|
+
public static void Init_hpricot_scan(Ruby runtime) {
|
1109
|
+
Extra x = new Extra(runtime);
|
358
1110
|
|
1111
|
+
x.mHpricot = runtime.defineModule("Hpricot");
|
1112
|
+
x.mHpricot.dataWrapStruct(x);
|
359
1113
|
|
360
|
-
|
361
|
-
|
362
|
-
return true;
|
363
|
-
}
|
1114
|
+
x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
|
1115
|
+
x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
|
364
1116
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
1117
|
+
x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
|
1118
|
+
|
1119
|
+
x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
|
1120
|
+
x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
|
1121
|
+
x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
|
1122
|
+
|
1123
|
+
x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
|
1124
|
+
|
1125
|
+
x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
|
1126
|
+
x.cCData.defineAnnotatedMethods(CData.class);
|
1127
|
+
|
1128
|
+
x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
|
1129
|
+
x.cComment.defineAnnotatedMethods(Comment.class);
|
1130
|
+
|
1131
|
+
x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
|
1132
|
+
x.cDocType.defineAnnotatedMethods(DocType.class);
|
1133
|
+
|
1134
|
+
x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
|
1135
|
+
x.cElem.defineAnnotatedMethods(Elem.class);
|
1136
|
+
|
1137
|
+
x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
|
1138
|
+
x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
|
1139
|
+
|
1140
|
+
x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
|
1141
|
+
x.cText.defineAnnotatedMethods(Text.class);
|
1142
|
+
|
1143
|
+
x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
|
1144
|
+
x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
|
1145
|
+
|
1146
|
+
x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
|
1147
|
+
x.cProcIns.defineAnnotatedMethods(ProcIns.class);
|
1148
|
+
|
1149
|
+
x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
|
1150
|
+
x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
|
1151
|
+
}
|
373
1152
|
}
|