why-hpricot 0.6.210 → 0.7.229
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/Rakefile +14 -9
- data/ext/fast_xs/fast_xs.c +2 -1
- data/ext/hpricot_scan/HpricotScanService.java +1122 -342
- data/ext/hpricot_scan/hpricot_css.c +2112 -2116
- data/ext/hpricot_scan/hpricot_scan.c +1169 -923
- data/ext/hpricot_scan/hpricot_scan.java.rl +1078 -299
- data/ext/hpricot_scan/hpricot_scan.rl +327 -237
- data/lib/hpricot/elements.rb +1 -1
- data/lib/hpricot/inspect.rb +2 -2
- data/lib/hpricot/modules.rb +2 -0
- data/lib/hpricot/tag.rb +43 -22
- data/lib/hpricot/traverse.rb +1 -0
- data/test/test_alter.rb +20 -2
- data/test/test_parser.rb +19 -0
- data/test/test_preserved.rb +9 -0
- metadata +6 -6
data/CHANGELOG
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
= 0.8
|
2
|
+
=== 31st March, 2009
|
3
|
+
* Saving memory and speed by using RStruct-based elements in the C extension.
|
4
|
+
* Bug in tag parsing, causing runaway <script> and <style> tags in HTML.
|
5
|
+
* Problem compiling under Ruby 1.9, due to our_rb_hash_lookup function meant for Ruby 1.8.
|
6
|
+
* CData was missing inner_text method.
|
7
|
+
|
8
|
+
= 0.7
|
9
|
+
=== 17th March, 2009
|
10
|
+
* Rewritten parser routine, much lighter on memory, quite a bit faster.
|
11
|
+
* Friendlier with Ruby 1.9.
|
12
|
+
* Fixes to nth-child and text() selectors.
|
13
|
+
|
1
14
|
= 0.6
|
2
15
|
=== 15th June, 2007
|
3
16
|
* Hpricot for JRuby -- nice work Ola Bini!
|
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ RbConfig = Config unless defined?(RbConfig)
|
|
10
10
|
|
11
11
|
NAME = "hpricot"
|
12
12
|
REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s
|
13
|
-
VERS = ENV['VERSION'] || "0.
|
13
|
+
VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "")
|
14
14
|
PKG = "#{NAME}-#{VERS}"
|
15
15
|
BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class}"
|
16
16
|
CLEAN.include ["ext/hpricot_scan/#{BIN}", "ext/fast_xs/#{BIN}", "lib/**/#{BIN}",
|
@@ -53,7 +53,7 @@ SPEC =
|
|
53
53
|
end
|
54
54
|
|
55
55
|
Win32Spec = SPEC.dup
|
56
|
-
Win32Spec.platform = 'mswin32'
|
56
|
+
Win32Spec.platform = 'x86-mswin32'
|
57
57
|
Win32Spec.files = PKG_FILES + ["lib/hpricot_scan.so", "lib/fast_xs.so"]
|
58
58
|
Win32Spec.extensions = []
|
59
59
|
|
@@ -68,6 +68,7 @@ task :package => [:clean, :ragel]
|
|
68
68
|
desc "Releases packages for all Hpricot packages and platforms."
|
69
69
|
task :release => [:package, :package_win32, :package_jruby]
|
70
70
|
|
71
|
+
|
71
72
|
desc "Run all the tests"
|
72
73
|
Rake::TestTask.new do |t|
|
73
74
|
t.libs << "test"
|
@@ -75,6 +76,8 @@ Rake::TestTask.new do |t|
|
|
75
76
|
t.verbose = true
|
76
77
|
end
|
77
78
|
|
79
|
+
#task :test => [:hpricot_java] if defined?(JRUBY_VERSION)
|
80
|
+
|
78
81
|
Rake::RDocTask.new do |rdoc|
|
79
82
|
rdoc.rdoc_dir = 'doc/rdoc'
|
80
83
|
rdoc.options += RDOC_OPTS
|
@@ -108,7 +111,7 @@ end
|
|
108
111
|
|
109
112
|
file ext_so => ext_files do
|
110
113
|
Dir.chdir(ext) do
|
111
|
-
sh(RUBY_PLATFORM =~ /
|
114
|
+
sh(RUBY_PLATFORM =~ /mswin/ ? 'nmake' : 'make')
|
112
115
|
end
|
113
116
|
cp ext_so, "lib"
|
114
117
|
end
|
@@ -147,7 +150,8 @@ desc "Generates the C scanner code with Ragel."
|
|
147
150
|
task :ragel => [:ragel_version] do
|
148
151
|
if @ragel_v >= 6.1
|
149
152
|
@ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION]
|
150
|
-
|
153
|
+
console_sep = (ENV['COMSPEC'] =~ /cmd\.exe/) ? '&' : ';'
|
154
|
+
sh %{cd ext/hpricot_scan #{console_sep} ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c && ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
|
151
155
|
else
|
152
156
|
STDERR.puts "Ragel 6.1 or greater is required."
|
153
157
|
exit(1)
|
@@ -160,6 +164,7 @@ desc "Generates the Java scanner code using the Ragel table-driven code generati
|
|
160
164
|
task :ragel_java => [:ragel_version] do
|
161
165
|
if @ragel_v >= 6.1
|
162
166
|
puts "compiling with ragel version #{@ragel_v}"
|
167
|
+
sh %{ragel -J -o ext/hpricot_scan/HpricotCss.java ext/hpricot_scan/hpricot_css.java.rl}
|
163
168
|
sh %{ragel -J -o ext/hpricot_scan/HpricotScanService.java ext/hpricot_scan/hpricot_scan.java.rl}
|
164
169
|
else
|
165
170
|
STDERR.puts "Ragel 6.1 or greater is required."
|
@@ -180,7 +185,7 @@ task :package_win32 => ["fast_xs_win32", "hpricot_scan_win32"] do
|
|
180
185
|
Dir.chdir("#{WIN32_PKG_DIR}") do
|
181
186
|
Gem::Builder.new(Win32Spec).build
|
182
187
|
verbose(true) {
|
183
|
-
mv Dir["*.gem"].first, "../pkg
|
188
|
+
mv Dir["*.gem"].first, "../pkg/"
|
184
189
|
}
|
185
190
|
end
|
186
191
|
end
|
@@ -201,20 +206,20 @@ def java_classpath_arg
|
|
201
206
|
classpath ? "-cp #{classpath}" : ""
|
202
207
|
end
|
203
208
|
|
204
|
-
def compile_java(
|
205
|
-
sh %{javac -source 1.
|
209
|
+
def compile_java(filenames, jarname)
|
210
|
+
sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}}
|
206
211
|
sh %{jar cf #{jarname} *.class}
|
207
212
|
end
|
208
213
|
|
209
214
|
task :hpricot_scan_java => [:ragel_java] do
|
210
215
|
Dir.chdir "ext/hpricot_scan" do
|
211
|
-
compile_java("HpricotScanService.java", "hpricot_scan.jar")
|
216
|
+
compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar")
|
212
217
|
end
|
213
218
|
end
|
214
219
|
|
215
220
|
task :fast_xs_java do
|
216
221
|
Dir.chdir "ext/fast_xs" do
|
217
|
-
compile_java("FastXsService.java", "fast_xs.jar")
|
222
|
+
compile_java(["FastXsService.java"], "fast_xs.jar")
|
218
223
|
end
|
219
224
|
end
|
220
225
|
|
data/ext/fast_xs/fast_xs.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
#ifndef RARRAY_LEN
|
8
8
|
#define RARRAY_LEN(arr) RARRAY(arr)->len
|
9
|
+
#define RARRAY_PTR(arr) RARRAY(arr)->ptr
|
9
10
|
#define RSTRING_LEN(str) RSTRING(str)->len
|
10
11
|
#define RSTRING_PTR(str) RSTRING(str)->ptr
|
11
12
|
#endif
|
@@ -115,7 +116,7 @@ static long escape(char *buf, int n)
|
|
115
116
|
|
116
117
|
if (VALID_VALUE(n)) {
|
117
118
|
/* return snprintf(buf, sizeof(""), "&#%i;", n); */
|
118
|
-
|
119
|
+
RUBY_EXTERN const char ruby_digitmap[];
|
119
120
|
int rv = 3; /* &#; */
|
120
121
|
buf += bytes_for(n);
|
121
122
|
*--buf = ';';
|
@@ -3,150 +3,517 @@
|
|
3
3
|
import java.io.IOException;
|
4
4
|
|
5
5
|
import org.jruby.Ruby;
|
6
|
+
import org.jruby.RubyArray;
|
6
7
|
import org.jruby.RubyClass;
|
7
8
|
import org.jruby.RubyHash;
|
8
9
|
import org.jruby.RubyModule;
|
9
10
|
import org.jruby.RubyNumeric;
|
11
|
+
import org.jruby.RubyObject;
|
10
12
|
import org.jruby.RubyObjectAdapter;
|
13
|
+
import org.jruby.RubyRegexp;
|
11
14
|
import org.jruby.RubyString;
|
15
|
+
import org.jruby.anno.JRubyMethod;
|
16
|
+
import org.jruby.exceptions.RaiseException;
|
12
17
|
import org.jruby.javasupport.JavaEmbedUtils;
|
18
|
+
import org.jruby.runtime.Arity;
|
13
19
|
import org.jruby.runtime.Block;
|
14
|
-
import org.jruby.runtime.
|
20
|
+
import org.jruby.runtime.ObjectAllocator;
|
21
|
+
import org.jruby.runtime.ThreadContext;
|
15
22
|
import org.jruby.runtime.builtin.IRubyObject;
|
23
|
+
import org.jruby.runtime.callback.Callback;
|
16
24
|
import org.jruby.exceptions.RaiseException;
|
17
25
|
import org.jruby.runtime.load.BasicLibraryService;
|
26
|
+
import org.jruby.util.ByteList;
|
18
27
|
|
19
28
|
public class HpricotScanService implements BasicLibraryService {
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
29
|
+
public static byte[] realloc(byte[] input, int size) {
|
30
|
+
byte[] newArray = new byte[size];
|
31
|
+
System.arraycopy(input, 0, newArray, 0, input.length);
|
32
|
+
return newArray;
|
33
|
+
}
|
34
|
+
|
35
|
+
// hpricot_state
|
36
|
+
public static class State {
|
37
|
+
public IRubyObject doc;
|
38
|
+
public IRubyObject focus;
|
39
|
+
public IRubyObject last;
|
40
|
+
public IRubyObject EC;
|
41
|
+
public boolean xml, strict, fixup;
|
42
|
+
}
|
43
|
+
|
44
|
+
static boolean OPT(IRubyObject opts, String key) {
|
45
|
+
Ruby runtime = opts.getRuntime();
|
46
|
+
return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
|
47
|
+
}
|
48
|
+
|
49
|
+
// H_PROP(name, H_ELE_TAG)
|
50
|
+
public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
|
51
|
+
H_ELE_SET(self, H_ELE_TAG, x);
|
52
|
+
return self;
|
53
|
+
}
|
54
|
+
|
55
|
+
public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
|
56
|
+
H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
|
57
|
+
return self.getRuntime().getTrue();
|
58
|
+
}
|
59
|
+
|
60
|
+
public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
|
61
|
+
return H_ELE_GET(self, H_ELE_TAG);
|
62
|
+
}
|
63
|
+
|
64
|
+
// H_PROP(raw, H_ELE_RAW)
|
65
|
+
public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
|
66
|
+
H_ELE_SET(self, H_ELE_RAW, x);
|
67
|
+
return self;
|
68
|
+
}
|
69
|
+
|
70
|
+
public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
|
71
|
+
H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
|
72
|
+
return self.getRuntime().getTrue();
|
73
|
+
}
|
74
|
+
|
75
|
+
public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
|
76
|
+
return H_ELE_GET(self, H_ELE_RAW);
|
77
|
+
}
|
78
|
+
|
79
|
+
// H_PROP(parent, H_ELE_PARENT)
|
80
|
+
public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
|
81
|
+
H_ELE_SET(self, H_ELE_PARENT, x);
|
82
|
+
return self;
|
83
|
+
}
|
84
|
+
|
85
|
+
public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
|
86
|
+
H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
|
87
|
+
return self.getRuntime().getTrue();
|
88
|
+
}
|
89
|
+
|
90
|
+
public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
|
91
|
+
return H_ELE_GET(self, H_ELE_PARENT);
|
92
|
+
}
|
93
|
+
|
94
|
+
// H_PROP(attr, H_ELE_ATTR)
|
95
|
+
public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
|
96
|
+
H_ELE_SET(self, H_ELE_ATTR, x);
|
97
|
+
return self;
|
98
|
+
}
|
99
|
+
|
100
|
+
public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
|
101
|
+
H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
|
102
|
+
return self.getRuntime().getTrue();
|
103
|
+
}
|
104
|
+
|
105
|
+
public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
|
106
|
+
return H_ELE_GET(self, H_ELE_ATTR);
|
107
|
+
}
|
108
|
+
|
109
|
+
// H_PROP(etag, H_ELE_ETAG)
|
110
|
+
public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
|
111
|
+
H_ELE_SET(self, H_ELE_ETAG, x);
|
112
|
+
return self;
|
113
|
+
}
|
114
|
+
|
115
|
+
public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
|
116
|
+
H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
|
117
|
+
return self.getRuntime().getTrue();
|
118
|
+
}
|
119
|
+
|
120
|
+
public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
|
121
|
+
return H_ELE_GET(self, H_ELE_ETAG);
|
122
|
+
}
|
123
|
+
|
124
|
+
// H_PROP(children, H_ELE_CHILDREN)
|
125
|
+
public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
|
126
|
+
H_ELE_SET(self, H_ELE_CHILDREN, x);
|
127
|
+
return self;
|
128
|
+
}
|
129
|
+
|
130
|
+
public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
|
131
|
+
H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
|
132
|
+
return self.getRuntime().getTrue();
|
133
|
+
}
|
134
|
+
|
135
|
+
public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
|
136
|
+
return H_ELE_GET(self, H_ELE_CHILDREN);
|
137
|
+
}
|
138
|
+
|
139
|
+
// H_ATTR(target)
|
140
|
+
public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
|
141
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
|
142
|
+
return self;
|
143
|
+
}
|
144
|
+
|
145
|
+
public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
|
146
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
|
147
|
+
}
|
148
|
+
|
149
|
+
// H_ATTR(encoding)
|
150
|
+
public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
|
151
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
|
152
|
+
return self;
|
153
|
+
}
|
154
|
+
|
155
|
+
public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
|
156
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
|
157
|
+
}
|
158
|
+
|
159
|
+
// H_ATTR(version)
|
160
|
+
public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
|
161
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
|
162
|
+
return self;
|
163
|
+
}
|
164
|
+
|
165
|
+
public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
|
166
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
|
167
|
+
}
|
168
|
+
|
169
|
+
// H_ATTR(standalone)
|
170
|
+
public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
|
171
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
|
172
|
+
return self;
|
173
|
+
}
|
174
|
+
|
175
|
+
public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
|
176
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
|
177
|
+
}
|
178
|
+
|
179
|
+
// H_ATTR(system_id)
|
180
|
+
public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
|
181
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
|
182
|
+
return self;
|
183
|
+
}
|
184
|
+
|
185
|
+
public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
|
186
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
|
187
|
+
}
|
188
|
+
|
189
|
+
// H_ATTR(public_id)
|
190
|
+
public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
|
191
|
+
((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
|
192
|
+
return self;
|
193
|
+
}
|
194
|
+
|
195
|
+
public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
|
196
|
+
return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
|
197
|
+
}
|
198
|
+
|
199
|
+
public static class Scanner {
|
200
|
+
public IRubyObject SET(int mark, int E, IRubyObject org) {
|
201
|
+
if(mark == -1 || E == mark) {
|
202
|
+
return runtime.newString("");
|
203
|
+
} else if(E > mark) {
|
204
|
+
return RubyString.newString(runtime, data, mark, E-mark);
|
205
|
+
} else {
|
206
|
+
return org;
|
207
|
+
}
|
208
|
+
}
|
209
|
+
|
210
|
+
public int SLIDE(int N) {
|
211
|
+
if(N > ts) {
|
212
|
+
return N - ts;
|
213
|
+
} else {
|
214
|
+
return N;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
public IRubyObject CAT(IRubyObject N, int mark, int E) {
|
219
|
+
if(N.isNil()) {
|
220
|
+
return SET(mark, E, N);
|
221
|
+
} else {
|
222
|
+
((RubyString)N).cat(data, mark, E-mark);
|
223
|
+
return N;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
public void ATTR(IRubyObject K, IRubyObject V) {
|
228
|
+
if(!K.isNil()) {
|
229
|
+
if(attr.isNil()) {
|
230
|
+
attr = RubyHash.newHash(runtime);
|
231
|
+
}
|
232
|
+
((RubyHash)attr).fastASet(K, V);
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
public void TEXT_PASS() {
|
237
|
+
if(!text) {
|
238
|
+
if(ele_open) {
|
239
|
+
ele_open = false;
|
240
|
+
if(ts != -1) {
|
241
|
+
mark_tag = ts;
|
242
|
+
}
|
243
|
+
} else {
|
244
|
+
mark_tag = p;
|
245
|
+
}
|
246
|
+
attr = runtime.getNil();
|
247
|
+
tag = runtime.getNil();
|
248
|
+
text = true;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
public void ELE(IRubyObject N) {
|
253
|
+
if(te > ts || text) {
|
254
|
+
int raw = -1;
|
255
|
+
int rawlen = 0;
|
256
|
+
ele_open = false;
|
257
|
+
text = false;
|
258
|
+
|
259
|
+
if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
|
260
|
+
raw = ts;
|
261
|
+
rawlen = te - ts;
|
262
|
+
}
|
263
|
+
|
264
|
+
if(block.isGiven()) {
|
265
|
+
IRubyObject raw_string = runtime.getNil();
|
266
|
+
if(raw != -1) {
|
267
|
+
raw_string = RubyString.newString(runtime, data, raw, rawlen);
|
268
|
+
}
|
269
|
+
yieldTokens(N, tag, attr, runtime.getNil(), taint);
|
270
|
+
} else {
|
271
|
+
hpricotToken(S, N, tag, attr, raw, rawlen, taint);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
|
277
|
+
public void EBLK(IRubyObject N, int T) {
|
278
|
+
tag = CAT(tag, mark_tag, p - T + 1);
|
279
|
+
ELE(N);
|
280
|
+
}
|
281
|
+
|
282
|
+
public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
|
283
|
+
IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
|
284
|
+
if(children.isNil()) {
|
285
|
+
H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
|
286
|
+
}
|
287
|
+
((RubyArray)children).append(ele);
|
288
|
+
H_ELE_SET(ele, H_ELE_PARENT, focus);
|
289
|
+
}
|
290
|
+
|
291
|
+
private static class TokenInfo {
|
292
|
+
public IRubyObject sym;
|
293
|
+
public IRubyObject tag;
|
294
|
+
public IRubyObject attr;
|
295
|
+
public int raw;
|
296
|
+
public int rawlen;
|
297
|
+
public IRubyObject ec;
|
298
|
+
public IRubyObject ele;
|
299
|
+
public Extra x;
|
300
|
+
public Ruby runtime;
|
301
|
+
public Scanner scanner;
|
302
|
+
public State S;
|
303
|
+
|
304
|
+
public void H_ELE(RubyClass klass) {
|
305
|
+
ele = klass.allocate();
|
306
|
+
if(klass == x.cElem) {
|
307
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
308
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
309
|
+
H_ELE_SET(ele, H_ELE_EC, ec);
|
310
|
+
if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
|
311
|
+
H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
312
|
+
}
|
313
|
+
} else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
|
314
|
+
if(klass == x.cBogusETag) {
|
315
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
316
|
+
if(raw != -1) {
|
317
|
+
H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
|
318
|
+
}
|
319
|
+
} else {
|
320
|
+
if(klass == x.cDocType) {
|
321
|
+
scanner.ATTR(runtime.newSymbol("target"), tag);
|
322
|
+
}
|
323
|
+
H_ELE_SET(ele, H_ELE_ATTR, attr);
|
324
|
+
if(klass != x.cProcIns) {
|
325
|
+
tag = runtime.getNil();
|
326
|
+
if(raw != -1) {
|
327
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
331
|
+
}
|
332
|
+
} else {
|
333
|
+
H_ELE_SET(ele, H_ELE_TAG, tag);
|
334
|
+
}
|
335
|
+
S.last = ele;
|
336
|
+
}
|
337
|
+
|
338
|
+
public void hpricotToken(boolean taint) {
|
339
|
+
//
|
340
|
+
// in html mode, fix up start tags incorrectly formed as empty tags
|
341
|
+
//
|
342
|
+
if(!S.xml) {
|
343
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
|
344
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
345
|
+
if(ec.isNil()) {
|
346
|
+
tag = tag.callMethod(scanner.ctx, "downcase");
|
347
|
+
ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
|
348
|
+
}
|
349
|
+
}
|
350
|
+
|
351
|
+
if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
|
352
|
+
(sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
|
353
|
+
!(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
|
354
|
+
sym = x.sym_text;
|
355
|
+
tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
356
|
+
}
|
357
|
+
|
358
|
+
if(!ec.isNil()) {
|
359
|
+
if(sym == x.sym_emptytag) {
|
360
|
+
if(ec != x.sym_EMPTY) {
|
361
|
+
sym = x.sym_stag;
|
362
|
+
}
|
363
|
+
} else if(sym == x.sym_stag) {
|
364
|
+
if(ec == x.sym_EMPTY) {
|
365
|
+
sym = x.sym_emptytag;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
}
|
369
|
+
}
|
370
|
+
|
371
|
+
if(sym == x.sym_emptytag || sym == x.sym_stag) {
|
372
|
+
IRubyObject name = runtime.newFixnum(tag.hashCode());
|
373
|
+
H_ELE(x.cElem);
|
374
|
+
H_ELE_SET(ele, H_ELE_HASH, name);
|
375
|
+
|
376
|
+
if(!S.xml) {
|
377
|
+
IRubyObject match = runtime.getNil(), e = S.focus;
|
378
|
+
while(e != S.doc) {
|
379
|
+
IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
|
380
|
+
if(hEC instanceof RubyHash) {
|
381
|
+
IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
|
382
|
+
if(!has.isNil()) {
|
383
|
+
if(has == runtime.getTrue()) {
|
384
|
+
if(match.isNil()) {
|
385
|
+
match = e;
|
386
|
+
}
|
387
|
+
} else if(has == x.symAllow) {
|
388
|
+
match = S.focus;
|
389
|
+
} else if(has == x.symDeny) {
|
390
|
+
match = runtime.getNil();
|
391
|
+
}
|
392
|
+
}
|
393
|
+
}
|
394
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
395
|
+
}
|
396
|
+
|
397
|
+
if(match.isNil()) {
|
398
|
+
match = S.focus;
|
399
|
+
}
|
400
|
+
S.focus = match;
|
401
|
+
}
|
402
|
+
|
403
|
+
scanner.hpricotAdd(S.focus, ele);
|
404
|
+
|
405
|
+
//
|
406
|
+
// in the case of a start tag that should be empty, just
|
407
|
+
// skip the step that focuses the element. focusing moves
|
408
|
+
// us deeper into the document.
|
409
|
+
//
|
410
|
+
if(sym == x.sym_stag) {
|
411
|
+
if(S.xml || ec != x.sym_EMPTY) {
|
412
|
+
S.focus = ele;
|
413
|
+
S.last = runtime.getNil();
|
414
|
+
}
|
415
|
+
}
|
416
|
+
} else if(sym == x.sym_etag) {
|
417
|
+
IRubyObject name, match = runtime.getNil(), e = S.focus;
|
418
|
+
if(S.strict) {
|
419
|
+
if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
|
420
|
+
tag = runtime.newString("div");
|
421
|
+
}
|
422
|
+
}
|
423
|
+
|
424
|
+
name = runtime.newFixnum(tag.hashCode());
|
425
|
+
while(e != S.doc) {
|
426
|
+
if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
|
427
|
+
match = e;
|
428
|
+
break;
|
429
|
+
}
|
430
|
+
e = H_ELE_GET(e, H_ELE_PARENT);
|
431
|
+
|
432
|
+
}
|
433
|
+
if(match.isNil()) {
|
434
|
+
H_ELE(x.cBogusETag);
|
435
|
+
scanner.hpricotAdd(S.focus, ele);
|
436
|
+
} else {
|
437
|
+
ele = runtime.getNil();
|
438
|
+
if(raw != -1) {
|
439
|
+
ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
|
440
|
+
}
|
441
|
+
H_ELE_SET(match, H_ELE_ETAG, ele);
|
442
|
+
S.focus = H_ELE_GET(match, H_ELE_PARENT);
|
443
|
+
S.last = runtime.getNil();
|
444
|
+
|
445
|
+
}
|
446
|
+
} else if(sym == x.sym_cdata) {
|
447
|
+
H_ELE(x.cCData);
|
448
|
+
scanner.hpricotAdd(S.focus, ele);
|
449
|
+
} else if(sym == x.sym_comment) {
|
450
|
+
H_ELE(x.cComment);
|
451
|
+
scanner.hpricotAdd(S.focus, ele);
|
452
|
+
} else if(sym == x.sym_doctype) {
|
453
|
+
H_ELE(x.cDocType);
|
454
|
+
if(S.strict) {
|
455
|
+
RubyHash h = (RubyHash)attr;
|
456
|
+
h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
|
457
|
+
h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
|
458
|
+
}
|
459
|
+
scanner.hpricotAdd(S.focus, ele);
|
460
|
+
} else if(sym == x.sym_procins) {
|
461
|
+
IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
|
462
|
+
tag = RubyRegexp.nth_match(1, match);
|
463
|
+
attr = RubyRegexp.nth_match(2, match);
|
464
|
+
H_ELE(x.cProcIns);
|
465
|
+
scanner.hpricotAdd(S.focus, ele);
|
466
|
+
} else if(sym == x.sym_text) {
|
467
|
+
if(!S.last.isNil() && S.last.getType() == x.cText) {
|
468
|
+
((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
|
469
|
+
} else {
|
470
|
+
H_ELE(x.cText);
|
471
|
+
scanner.hpricotAdd(S.focus, ele);
|
472
|
+
}
|
473
|
+
} else if(sym == x.sym_xmldecl) {
|
474
|
+
H_ELE(x.cXMLDecl);
|
475
|
+
scanner.hpricotAdd(S.focus, ele);
|
476
|
+
}
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
|
481
|
+
TokenInfo t = new TokenInfo();
|
482
|
+
t.sym = _sym;
|
483
|
+
t.tag = _tag;
|
484
|
+
t.attr = _attr;
|
485
|
+
t.raw = _raw;
|
486
|
+
t.rawlen = _rawlen;
|
487
|
+
t.ec = runtime.getNil();
|
488
|
+
t.ele = runtime.getNil();
|
489
|
+
t.x = x;
|
490
|
+
t.runtime = runtime;
|
491
|
+
t.scanner = this;
|
492
|
+
t.S = S;
|
493
|
+
|
494
|
+
t.hpricotToken(taint);
|
495
|
+
}
|
496
|
+
|
497
|
+
public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
498
|
+
if(sym == x.sym_text) {
|
499
|
+
raw = tag;
|
500
|
+
}
|
501
|
+
IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
|
502
|
+
if(taint) {
|
503
|
+
ary.setTaint(true);
|
504
|
+
tag.setTaint(true);
|
505
|
+
attr.setTaint(true);
|
506
|
+
raw.setTaint(true);
|
507
|
+
}
|
508
|
+
|
509
|
+
block.yield(ctx, ary);
|
510
|
+
}
|
511
|
+
|
512
|
+
// line 561 "ext/hpricot_scan/hpricot_scan.java.rl"
|
513
|
+
|
514
|
+
|
515
|
+
|
516
|
+
// line 517 "ext/hpricot_scan/HpricotScanService.java"
|
150
517
|
private static byte[] init__hpricot_scan_actions_0()
|
151
518
|
{
|
152
519
|
return new byte [] {
|
@@ -752,121 +1119,166 @@ static final int hpricot_scan_en_html_cdata = 216;
|
|
752
1119
|
static final int hpricot_scan_en_html_procins = 218;
|
753
1120
|
static final int hpricot_scan_en_main = 204;
|
754
1121
|
|
755
|
-
// line
|
1122
|
+
// line 564 "ext/hpricot_scan/hpricot_scan.java.rl"
|
756
1123
|
|
757
|
-
public final static int BUFSIZE=16384;
|
1124
|
+
public final static int BUFSIZE = 16384;
|
758
1125
|
|
759
|
-
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
760
|
-
IRubyObject ary;
|
761
|
-
if (sym == runtime.newSymbol("text")) {
|
762
|
-
raw = tag;
|
763
|
-
}
|
764
|
-
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
|
765
|
-
if (taint) {
|
766
|
-
ary.setTaint(true);
|
767
|
-
tag.setTaint(true);
|
768
|
-
attr.setTaint(true);
|
769
|
-
raw.setTaint(true);
|
770
|
-
}
|
771
|
-
block.yield(runtime.getCurrentContext(), ary, null, null, false);
|
772
|
-
}
|
773
1126
|
|
1127
|
+
private int cs, act, have = 0, nread = 0, curline = 1;
|
1128
|
+
private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
|
1129
|
+
private byte[] data;
|
1130
|
+
private State S = null;
|
1131
|
+
private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
|
1132
|
+
private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
|
1133
|
+
private boolean done = false, ele_open = false, taint = false, io = false, text = false;
|
1134
|
+
private int buffer_size = 0;
|
774
1135
|
|
775
|
-
|
776
|
-
boolean text = false;
|
777
|
-
int ts=-1, te;
|
778
|
-
int eof=-1;
|
779
|
-
char[] buf;
|
780
|
-
Ruby runtime;
|
781
|
-
IRubyObject attr, bufsize;
|
782
|
-
IRubyObject[] tag, akey, aval;
|
783
|
-
int mark_tag, mark_akey, mark_aval;
|
784
|
-
boolean done = false, ele_open = false;
|
785
|
-
int buffer_size = 0;
|
786
|
-
boolean taint = false;
|
787
|
-
Block block = null;
|
788
|
-
|
789
|
-
|
790
|
-
IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
|
791
|
-
cdata, sym_text;
|
792
|
-
|
793
|
-
IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
|
794
|
-
attr = bufsize = runtime.getNil();
|
795
|
-
tag = new IRubyObject[]{runtime.getNil()};
|
796
|
-
akey = new IRubyObject[]{runtime.getNil()};
|
797
|
-
aval = new IRubyObject[]{runtime.getNil()};
|
798
|
-
|
799
|
-
RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
|
800
|
-
|
801
|
-
taint = port.isTaint();
|
802
|
-
if ( !port.respondsTo("read")) {
|
803
|
-
if ( port.respondsTo("to_str")) {
|
804
|
-
port = port.callMethod(runtime.getCurrentContext(),"to_str");
|
805
|
-
} else {
|
806
|
-
throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
|
807
|
-
}
|
808
|
-
}
|
1136
|
+
private Extra x;
|
809
1137
|
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
1138
|
+
private IRubyObject self;
|
1139
|
+
private Ruby runtime;
|
1140
|
+
private ThreadContext ctx;
|
1141
|
+
private Block block;
|
1142
|
+
|
1143
|
+
private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
|
1144
|
+
|
1145
|
+
private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
|
1146
|
+
return new RaiseException(runtime, exceptionClass, message, true);
|
1147
|
+
}
|
818
1148
|
|
819
|
-
|
820
|
-
|
1149
|
+
public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
|
1150
|
+
this.self = self;
|
1151
|
+
this.runtime = self.getRuntime();
|
1152
|
+
this.ctx = runtime.getCurrentContext();
|
1153
|
+
this.block = block;
|
1154
|
+
attr = runtime.getNil();
|
1155
|
+
tag = runtime.getNil();
|
1156
|
+
akey = runtime.getNil();
|
1157
|
+
aval = runtime.getNil();
|
1158
|
+
bufsize = runtime.getNil();
|
1159
|
+
|
1160
|
+
this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
|
1161
|
+
|
1162
|
+
this.xmldecl = x.sym_xmldecl;
|
1163
|
+
this.doctype = x.sym_doctype;
|
1164
|
+
this.stag = x.sym_stag;
|
1165
|
+
this.etag = x.sym_etag;
|
1166
|
+
this.emptytag = x.sym_emptytag;
|
1167
|
+
this.comment = x.sym_comment;
|
1168
|
+
this.cdata = x.sym_cdata;
|
1169
|
+
this.procins = x.sym_procins;
|
1170
|
+
|
1171
|
+
port = args[0];
|
1172
|
+
if(args.length == 2) {
|
1173
|
+
opts = args[1];
|
1174
|
+
} else {
|
1175
|
+
opts = runtime.getNil();
|
1176
|
+
}
|
1177
|
+
|
1178
|
+
taint = port.isTaint();
|
1179
|
+
io = port.respondsTo("read");
|
1180
|
+
if(!io) {
|
1181
|
+
if(port.respondsTo("to_str")) {
|
1182
|
+
port = port.callMethod(ctx, "to_str");
|
1183
|
+
port = port.convertToString();
|
1184
|
+
} else {
|
1185
|
+
throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
|
1186
|
+
}
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
if(!(opts instanceof RubyHash)) {
|
1190
|
+
opts = runtime.getNil();
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
if(!block.isGiven()) {
|
1194
|
+
S = new State();
|
1195
|
+
S.doc = x.cDoc.allocate();
|
1196
|
+
S.focus = S.doc;
|
1197
|
+
S.last = runtime.getNil();
|
1198
|
+
S.xml = OPT(opts, "xml");
|
1199
|
+
S.strict = OPT(opts, "xhtml_strict");
|
1200
|
+
S.fixup = OPT(opts, "fixup_tags");
|
1201
|
+
if(S.strict) {
|
1202
|
+
S.fixup = true;
|
1203
|
+
}
|
1204
|
+
S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
|
1205
|
+
S.EC = x.mHpricot.getConstant("ElementContent");
|
1206
|
+
}
|
1207
|
+
|
1208
|
+
buffer_size = BUFSIZE;
|
1209
|
+
if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
|
1210
|
+
bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
|
1211
|
+
if(!bufsize.isNil()) {
|
1212
|
+
buffer_size = RubyNumeric.fix2int(bufsize);
|
1213
|
+
}
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
if(io) {
|
1217
|
+
buf = 0;
|
1218
|
+
data = new byte[buffer_size];
|
1219
|
+
}
|
1220
|
+
}
|
1221
|
+
|
1222
|
+
private int len, space;
|
1223
|
+
// hpricot_scan
|
1224
|
+
public IRubyObject scan() {
|
1225
|
+
|
1226
|
+
// line 1227 "ext/hpricot_scan/HpricotScanService.java"
|
821
1227
|
{
|
822
1228
|
cs = hpricot_scan_start;
|
823
1229
|
ts = -1;
|
824
1230
|
te = -1;
|
825
1231
|
act = 0;
|
826
1232
|
}
|
827
|
-
// line
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
1233
|
+
// line 667 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1234
|
+
while(!done) {
|
1235
|
+
p = pe = len = buf;
|
1236
|
+
space = buffer_size - have;
|
1237
|
+
|
1238
|
+
if(io) {
|
1239
|
+
if(space == 0) {
|
1240
|
+
/* We've used up the entire buffer storing an already-parsed token
|
1241
|
+
* prefix that must be preserved. Likely caused by super-long attributes.
|
1242
|
+
* Increase buffer size and continue */
|
1243
|
+
buffer_size += BUFSIZE;
|
1244
|
+
data = realloc(data, buffer_size);
|
1245
|
+
space = buffer_size - have;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
p = have;
|
1249
|
+
IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
|
1250
|
+
ByteList bl = str.convertToString().getByteList();
|
1251
|
+
len = bl.realSize;
|
1252
|
+
System.arraycopy(bl.bytes, bl.begin, data, p, len);
|
1253
|
+
} else {
|
1254
|
+
ByteList bl = port.convertToString().getByteList();
|
1255
|
+
data = bl.bytes;
|
1256
|
+
buf = bl.begin;
|
1257
|
+
p = bl.begin;
|
1258
|
+
len = bl.realSize + 1;
|
1259
|
+
if(p + len >= data.length) {
|
1260
|
+
data = new byte[len];
|
1261
|
+
System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
|
1262
|
+
p = 0;
|
1263
|
+
buf = 0;
|
1264
|
+
}
|
1265
|
+
done = true;
|
1266
|
+
eof = p + len;
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
nread += len;
|
1270
|
+
|
1271
|
+
/* If this is the last buffer, tack on an EOF. */
|
1272
|
+
if(io && len < space) {
|
1273
|
+
data[p + len++] = 0;
|
1274
|
+
eof = p + len;
|
1275
|
+
done = true;
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
pe = p + len;
|
1279
|
+
|
1280
|
+
|
1281
|
+
// line 1282 "ext/hpricot_scan/HpricotScanService.java"
|
870
1282
|
{
|
871
1283
|
int _klen;
|
872
1284
|
int _trans = 0;
|
@@ -891,7 +1303,7 @@ case 1:
|
|
891
1303
|
// line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
|
892
1304
|
{ts = p;}
|
893
1305
|
break;
|
894
|
-
// line
|
1306
|
+
// line 1307 "ext/hpricot_scan/HpricotScanService.java"
|
895
1307
|
}
|
896
1308
|
}
|
897
1309
|
|
@@ -956,83 +1368,89 @@ case 3:
|
|
956
1368
|
switch ( _hpricot_scan_actions[_acts++] )
|
957
1369
|
{
|
958
1370
|
case 0:
|
959
|
-
// line
|
1371
|
+
// line 514 "ext/hpricot_scan/hpricot_scan.java.rl"
|
960
1372
|
{
|
961
|
-
if
|
962
|
-
|
963
|
-
|
964
|
-
|
1373
|
+
if(text) {
|
1374
|
+
tag = CAT(tag, mark_tag, p);
|
1375
|
+
ELE(x.sym_text);
|
1376
|
+
text = false;
|
965
1377
|
}
|
966
1378
|
attr = runtime.getNil();
|
967
|
-
tag
|
1379
|
+
tag = runtime.getNil();
|
968
1380
|
mark_tag = -1;
|
969
1381
|
ele_open = true;
|
970
1382
|
}
|
971
1383
|
break;
|
972
1384
|
case 1:
|
973
|
-
// line
|
1385
|
+
// line 526 "ext/hpricot_scan/hpricot_scan.java.rl"
|
974
1386
|
{ mark_tag = p; }
|
975
1387
|
break;
|
976
1388
|
case 2:
|
977
|
-
// line
|
1389
|
+
// line 527 "ext/hpricot_scan/hpricot_scan.java.rl"
|
978
1390
|
{ mark_aval = p; }
|
979
1391
|
break;
|
980
1392
|
case 3:
|
981
|
-
// line
|
1393
|
+
// line 528 "ext/hpricot_scan/hpricot_scan.java.rl"
|
982
1394
|
{ mark_akey = p; }
|
983
1395
|
break;
|
984
1396
|
case 4:
|
985
|
-
// line
|
986
|
-
{ SET(
|
1397
|
+
// line 529 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1398
|
+
{ tag = SET(mark_tag, p, tag); }
|
987
1399
|
break;
|
988
1400
|
case 5:
|
989
|
-
// line
|
990
|
-
{ SET(
|
1401
|
+
// line 531 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1402
|
+
{ aval = SET(mark_aval, p, aval); }
|
991
1403
|
break;
|
992
1404
|
case 6:
|
993
|
-
// line
|
994
|
-
{
|
995
|
-
|
996
|
-
|
1405
|
+
// line 532 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1406
|
+
{
|
1407
|
+
if(data[p-1] == '"' || data[p-1] == '\'') {
|
1408
|
+
aval = SET(mark_aval, p-1, aval);
|
1409
|
+
} else {
|
1410
|
+
aval = SET(mark_aval, p, aval);
|
1411
|
+
}
|
997
1412
|
}
|
998
1413
|
break;
|
999
1414
|
case 7:
|
1000
|
-
// line
|
1001
|
-
{ SET(
|
1415
|
+
// line 539 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1416
|
+
{ akey = SET(mark_akey, p, akey); }
|
1002
1417
|
break;
|
1003
1418
|
case 8:
|
1004
|
-
// line
|
1005
|
-
{ SET(
|
1419
|
+
// line 540 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1420
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
|
1006
1421
|
break;
|
1007
1422
|
case 9:
|
1008
|
-
// line
|
1009
|
-
{ SET(
|
1423
|
+
// line 541 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1424
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
|
1010
1425
|
break;
|
1011
1426
|
case 10:
|
1012
|
-
// line
|
1013
|
-
{ SET(
|
1427
|
+
// line 542 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1428
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
|
1014
1429
|
break;
|
1015
1430
|
case 11:
|
1016
|
-
// line
|
1017
|
-
{ SET(
|
1431
|
+
// line 543 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1432
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
|
1018
1433
|
break;
|
1019
1434
|
case 12:
|
1020
|
-
// line
|
1021
|
-
{ SET(
|
1435
|
+
// line 544 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1436
|
+
{ aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
|
1022
1437
|
break;
|
1023
1438
|
case 13:
|
1024
|
-
// line
|
1025
|
-
{
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1439
|
+
// line 546 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1440
|
+
{
|
1441
|
+
akey = runtime.getNil();
|
1442
|
+
aval = runtime.getNil();
|
1443
|
+
mark_akey = -1;
|
1444
|
+
mark_aval = -1;
|
1030
1445
|
}
|
1031
1446
|
break;
|
1032
1447
|
case 14:
|
1033
|
-
// line
|
1034
|
-
{
|
1035
|
-
|
1448
|
+
// line 553 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1449
|
+
{
|
1450
|
+
if(!S.xml) {
|
1451
|
+
akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
|
1452
|
+
}
|
1453
|
+
ATTR(akey, aval);
|
1036
1454
|
}
|
1037
1455
|
break;
|
1038
1456
|
case 15:
|
@@ -1189,7 +1607,7 @@ case 3:
|
|
1189
1607
|
}
|
1190
1608
|
}
|
1191
1609
|
break;
|
1192
|
-
// line
|
1610
|
+
// line 1611 "ext/hpricot_scan/HpricotScanService.java"
|
1193
1611
|
}
|
1194
1612
|
}
|
1195
1613
|
}
|
@@ -1203,7 +1621,7 @@ case 2:
|
|
1203
1621
|
// line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1204
1622
|
{ts = -1;}
|
1205
1623
|
break;
|
1206
|
-
// line
|
1624
|
+
// line 1625 "ext/hpricot_scan/HpricotScanService.java"
|
1207
1625
|
}
|
1208
1626
|
}
|
1209
1627
|
|
@@ -1225,81 +1643,443 @@ case 5:
|
|
1225
1643
|
}
|
1226
1644
|
break; }
|
1227
1645
|
}
|
1228
|
-
// line
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1646
|
+
// line 714 "ext/hpricot_scan/hpricot_scan.java.rl"
|
1647
|
+
|
1648
|
+
if(cs == hpricot_scan_error) {
|
1649
|
+
if(!tag.isNil()) {
|
1650
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
1651
|
+
} else {
|
1652
|
+
throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
|
1653
|
+
}
|
1654
|
+
}
|
1655
|
+
|
1656
|
+
if(done && ele_open) {
|
1657
|
+
ele_open = false;
|
1658
|
+
if(ts > 0) {
|
1659
|
+
mark_tag = ts;
|
1660
|
+
ts = 0;
|
1661
|
+
text = true;
|
1662
|
+
}
|
1663
|
+
}
|
1664
|
+
|
1665
|
+
if(ts == -1) {
|
1666
|
+
have = 0;
|
1667
|
+
if(mark_tag != -1 && text) {
|
1668
|
+
if(done) {
|
1669
|
+
if(mark_tag < p - 1) {
|
1670
|
+
tag = CAT(tag, mark_tag, p-1);
|
1671
|
+
ELE(x.sym_text);
|
1672
|
+
}
|
1673
|
+
} else {
|
1674
|
+
tag = CAT(tag, mark_tag, p);
|
1675
|
+
}
|
1676
|
+
}
|
1677
|
+
if(io) {
|
1678
|
+
mark_tag = 0;
|
1679
|
+
} else {
|
1680
|
+
mark_tag = ((RubyString)port).getByteList().begin;
|
1681
|
+
}
|
1682
|
+
} else if(io) {
|
1683
|
+
have = pe - ts;
|
1684
|
+
System.arraycopy(data, ts, data, buf, have);
|
1685
|
+
mark_tag = SLIDE(mark_tag);
|
1686
|
+
mark_akey = SLIDE(mark_akey);
|
1687
|
+
mark_aval = SLIDE(mark_aval);
|
1688
|
+
te -= ts;
|
1689
|
+
ts = 0;
|
1690
|
+
}
|
1691
|
+
}
|
1692
|
+
|
1693
|
+
if(S != null) {
|
1694
|
+
return S.doc;
|
1695
|
+
}
|
1696
|
+
|
1697
|
+
return runtime.getNil();
|
1698
|
+
}
|
1699
|
+
}
|
1700
|
+
|
1701
|
+
public static class HpricotModule {
|
1702
|
+
// hpricot_scan
|
1703
|
+
@JRubyMethod(module = true, optional = 1, required = 1, frame = true)
|
1704
|
+
public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
|
1705
|
+
return new Scanner(self, args, block).scan();
|
1706
|
+
}
|
1707
|
+
|
1708
|
+
// hpricot_css
|
1709
|
+
@JRubyMethod(module = true)
|
1710
|
+
public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
|
1711
|
+
return new HpricotCss(self, mod, str, node).scan();
|
1712
|
+
}
|
1713
|
+
}
|
1714
|
+
|
1715
|
+
public static class CData {
|
1716
|
+
@JRubyMethod
|
1717
|
+
public static IRubyObject content(IRubyObject self) {
|
1718
|
+
return hpricot_ele_get_name(self);
|
1719
|
+
}
|
1720
|
+
|
1721
|
+
@JRubyMethod(name = "content=")
|
1722
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1723
|
+
return hpricot_ele_set_name(self, value);
|
1724
|
+
}
|
1725
|
+
}
|
1726
|
+
|
1727
|
+
public static class Comment {
|
1728
|
+
@JRubyMethod
|
1729
|
+
public static IRubyObject content(IRubyObject self) {
|
1730
|
+
return hpricot_ele_get_name(self);
|
1731
|
+
}
|
1732
|
+
|
1733
|
+
@JRubyMethod(name = "content=")
|
1734
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1735
|
+
return hpricot_ele_set_name(self, value);
|
1736
|
+
}
|
1737
|
+
}
|
1738
|
+
|
1739
|
+
public static class DocType {
|
1740
|
+
@JRubyMethod
|
1741
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1742
|
+
return hpricot_ele_get_name(self);
|
1743
|
+
}
|
1744
|
+
|
1745
|
+
@JRubyMethod
|
1746
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1747
|
+
return hpricot_ele_clear_name(self);
|
1748
|
+
}
|
1749
|
+
|
1750
|
+
@JRubyMethod
|
1751
|
+
public static IRubyObject target(IRubyObject self) {
|
1752
|
+
return hpricot_ele_get_target(self);
|
1753
|
+
}
|
1754
|
+
|
1755
|
+
@JRubyMethod(name = "target=")
|
1756
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
1757
|
+
return hpricot_ele_set_target(self, value);
|
1758
|
+
}
|
1759
|
+
|
1760
|
+
@JRubyMethod
|
1761
|
+
public static IRubyObject public_id(IRubyObject self) {
|
1762
|
+
return hpricot_ele_get_public_id(self);
|
1763
|
+
}
|
1764
|
+
|
1765
|
+
@JRubyMethod(name = "public_id=")
|
1766
|
+
public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
|
1767
|
+
return hpricot_ele_set_public_id(self, value);
|
1768
|
+
}
|
1769
|
+
|
1770
|
+
@JRubyMethod
|
1771
|
+
public static IRubyObject system_id(IRubyObject self) {
|
1772
|
+
return hpricot_ele_get_system_id(self);
|
1773
|
+
}
|
1774
|
+
|
1775
|
+
@JRubyMethod(name = "system_id=")
|
1776
|
+
public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
|
1777
|
+
return hpricot_ele_set_system_id(self, value);
|
1778
|
+
}
|
1779
|
+
}
|
1780
|
+
|
1781
|
+
public static class Elem {
|
1782
|
+
@JRubyMethod
|
1783
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1784
|
+
return hpricot_ele_clear_raw(self);
|
1785
|
+
}
|
1786
|
+
}
|
1787
|
+
|
1788
|
+
public static class BogusETag {
|
1789
|
+
@JRubyMethod
|
1790
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1791
|
+
return hpricot_ele_get_attr(self);
|
1792
|
+
}
|
1793
|
+
|
1794
|
+
@JRubyMethod
|
1795
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1796
|
+
return hpricot_ele_clear_attr(self);
|
1797
|
+
}
|
1798
|
+
}
|
1799
|
+
|
1800
|
+
public static class Text {
|
1801
|
+
@JRubyMethod
|
1802
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1803
|
+
return hpricot_ele_get_name(self);
|
1804
|
+
}
|
1805
|
+
|
1806
|
+
@JRubyMethod
|
1807
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1808
|
+
return hpricot_ele_clear_name(self);
|
1809
|
+
}
|
1810
|
+
|
1811
|
+
@JRubyMethod
|
1812
|
+
public static IRubyObject content(IRubyObject self) {
|
1813
|
+
return hpricot_ele_get_name(self);
|
1814
|
+
}
|
1815
|
+
|
1816
|
+
@JRubyMethod(name = "content=")
|
1817
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1818
|
+
return hpricot_ele_set_name(self, value);
|
1819
|
+
}
|
1820
|
+
}
|
1821
|
+
|
1822
|
+
public static class XMLDecl {
|
1823
|
+
@JRubyMethod
|
1824
|
+
public static IRubyObject raw_string(IRubyObject self) {
|
1825
|
+
return hpricot_ele_get_name(self);
|
1826
|
+
}
|
1827
|
+
|
1828
|
+
@JRubyMethod
|
1829
|
+
public static IRubyObject clear_raw(IRubyObject self) {
|
1830
|
+
return hpricot_ele_clear_name(self);
|
1831
|
+
}
|
1832
|
+
|
1833
|
+
@JRubyMethod
|
1834
|
+
public static IRubyObject encoding(IRubyObject self) {
|
1835
|
+
return hpricot_ele_get_encoding(self);
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
@JRubyMethod(name = "encoding=")
|
1839
|
+
public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
|
1840
|
+
return hpricot_ele_set_encoding(self, value);
|
1841
|
+
}
|
1842
|
+
|
1843
|
+
@JRubyMethod
|
1844
|
+
public static IRubyObject standalone(IRubyObject self) {
|
1845
|
+
return hpricot_ele_get_standalone(self);
|
1846
|
+
}
|
1847
|
+
|
1848
|
+
@JRubyMethod(name = "standalone=")
|
1849
|
+
public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
|
1850
|
+
return hpricot_ele_set_standalone(self, value);
|
1851
|
+
}
|
1852
|
+
|
1853
|
+
@JRubyMethod
|
1854
|
+
public static IRubyObject version(IRubyObject self) {
|
1855
|
+
return hpricot_ele_get_version(self);
|
1856
|
+
}
|
1857
|
+
|
1858
|
+
@JRubyMethod(name = "version=")
|
1859
|
+
public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
|
1860
|
+
return hpricot_ele_set_version(self, value);
|
1861
|
+
}
|
1862
|
+
}
|
1863
|
+
|
1864
|
+
public static class ProcIns {
|
1865
|
+
@JRubyMethod
|
1866
|
+
public static IRubyObject target(IRubyObject self) {
|
1867
|
+
return hpricot_ele_get_name(self);
|
1868
|
+
}
|
1869
|
+
|
1870
|
+
@JRubyMethod(name = "target=")
|
1871
|
+
public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
|
1872
|
+
return hpricot_ele_set_name(self, value);
|
1873
|
+
}
|
1874
|
+
|
1875
|
+
@JRubyMethod
|
1876
|
+
public static IRubyObject content(IRubyObject self) {
|
1877
|
+
return hpricot_ele_get_attr(self);
|
1878
|
+
}
|
1879
|
+
|
1880
|
+
@JRubyMethod(name = "content=")
|
1881
|
+
public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
|
1882
|
+
return hpricot_ele_set_attr(self, value);
|
1883
|
+
}
|
1884
|
+
}
|
1885
|
+
|
1886
|
+
public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
|
1887
|
+
|
1888
|
+
public final static int H_ELE_TAG = 0;
|
1889
|
+
public final static int H_ELE_PARENT = 1;
|
1890
|
+
public final static int H_ELE_ATTR = 2;
|
1891
|
+
public final static int H_ELE_ETAG = 3;
|
1892
|
+
public final static int H_ELE_RAW = 4;
|
1893
|
+
public final static int H_ELE_EC = 5;
|
1894
|
+
public final static int H_ELE_HASH = 6;
|
1895
|
+
public final static int H_ELE_CHILDREN = 7;
|
1896
|
+
|
1897
|
+
public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
|
1898
|
+
return ((IRubyObject[])recv.dataGetStruct())[n];
|
1899
|
+
}
|
1900
|
+
|
1901
|
+
public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
|
1902
|
+
((IRubyObject[])recv.dataGetStruct())[n] = value;
|
1903
|
+
return value;
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
private static class RefCallback implements Callback {
|
1907
|
+
private final int n;
|
1908
|
+
public RefCallback(int n) { this.n = n; }
|
1909
|
+
|
1910
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
1911
|
+
return H_ELE_GET(recv, n);
|
1912
|
+
}
|
1913
|
+
|
1914
|
+
public Arity getArity() {
|
1915
|
+
return Arity.NO_ARGUMENTS;
|
1916
|
+
}
|
1917
|
+
}
|
1918
|
+
|
1919
|
+
private static class SetCallback implements Callback {
|
1920
|
+
private final int n;
|
1921
|
+
public SetCallback(int n) { this.n = n; }
|
1922
|
+
|
1923
|
+
public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
|
1924
|
+
return H_ELE_SET(recv, n, args[0]);
|
1925
|
+
}
|
1926
|
+
|
1927
|
+
public Arity getArity() {
|
1928
|
+
return Arity.ONE_ARGUMENT;
|
1929
|
+
}
|
1236
1930
|
}
|
1931
|
+
|
1932
|
+
private final static Callback[] ref_func = new Callback[]{
|
1933
|
+
new RefCallback(0),
|
1934
|
+
new RefCallback(1),
|
1935
|
+
new RefCallback(2),
|
1936
|
+
new RefCallback(3),
|
1937
|
+
new RefCallback(4),
|
1938
|
+
new RefCallback(5),
|
1939
|
+
new RefCallback(6),
|
1940
|
+
new RefCallback(7),
|
1941
|
+
new RefCallback(8),
|
1942
|
+
new RefCallback(9)};
|
1943
|
+
|
1944
|
+
private final static Callback[] set_func = new Callback[]{
|
1945
|
+
new SetCallback(0),
|
1946
|
+
new SetCallback(1),
|
1947
|
+
new SetCallback(2),
|
1948
|
+
new SetCallback(3),
|
1949
|
+
new SetCallback(4),
|
1950
|
+
new SetCallback(5),
|
1951
|
+
new SetCallback(6),
|
1952
|
+
new SetCallback(7),
|
1953
|
+
new SetCallback(8),
|
1954
|
+
new SetCallback(9)};
|
1955
|
+
|
1956
|
+
public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
|
1957
|
+
// alloc_hpricot_struct
|
1958
|
+
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
|
1959
|
+
RubyClass kurrent = klass;
|
1960
|
+
Object sz = kurrent.fastGetInternalVariable("__size__");
|
1961
|
+
while(sz == null && kurrent != null) {
|
1962
|
+
kurrent = kurrent.getSuperClass();
|
1963
|
+
sz = kurrent.fastGetInternalVariable("__size__");
|
1964
|
+
}
|
1965
|
+
int size = RubyNumeric.fix2int((RubyObject)sz);
|
1966
|
+
RubyObject obj = new RubyObject(runtime, klass);
|
1967
|
+
IRubyObject[] all = new IRubyObject[size];
|
1968
|
+
java.util.Arrays.fill(all, runtime.getNil());
|
1969
|
+
obj.dataWrapStruct(all);
|
1970
|
+
return obj;
|
1971
|
+
}
|
1972
|
+
};
|
1973
|
+
|
1974
|
+
public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
|
1975
|
+
RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
|
1976
|
+
klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
|
1977
|
+
klass.setAllocator(alloc_hpricot_struct);
|
1978
|
+
|
1979
|
+
for(int i = 0; i < members.length; i++) {
|
1980
|
+
String id = members[i].toString();
|
1981
|
+
klass.defineMethod(id, ref_func[i]);
|
1982
|
+
klass.defineMethod(id + "=", set_func[i]);
|
1983
|
+
}
|
1237
1984
|
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
}
|
1985
|
+
return klass;
|
1986
|
+
}
|
1987
|
+
|
1988
|
+
public boolean basicLoad(final Ruby runtime) throws IOException {
|
1989
|
+
Init_hpricot_scan(runtime);
|
1990
|
+
return true;
|
1245
1991
|
}
|
1246
1992
|
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1993
|
+
public static class Extra {
|
1994
|
+
IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
|
1995
|
+
sym_procins, sym_stag, sym_etag, sym_emptytag,
|
1996
|
+
sym_allowed, sym_children, sym_comment,
|
1997
|
+
sym_cdata, sym_name, sym_parent,
|
1998
|
+
sym_raw_attributes, sym_raw_string, sym_tagno,
|
1999
|
+
sym_text, sym_EMPTY, sym_CDATA;
|
2000
|
+
|
2001
|
+
public RubyModule mHpricot;
|
2002
|
+
public RubyClass structElem;
|
2003
|
+
public RubyClass structAttr;
|
2004
|
+
public RubyClass structBasic;
|
2005
|
+
public RubyClass cDoc;
|
2006
|
+
public RubyClass cCData;
|
2007
|
+
public RubyClass cComment;
|
2008
|
+
public RubyClass cDocType;
|
2009
|
+
public RubyClass cElem;
|
2010
|
+
public RubyClass cBogusETag;
|
2011
|
+
public RubyClass cText;
|
2012
|
+
public RubyClass cXMLDecl;
|
2013
|
+
public RubyClass cProcIns;
|
2014
|
+
public RubyClass rb_eHpricotParseError;
|
2015
|
+
public IRubyObject reProcInsParse;
|
2016
|
+
|
2017
|
+
public Extra(Ruby runtime) {
|
2018
|
+
symAllow = runtime.newSymbol("allow");
|
2019
|
+
symDeny = runtime.newSymbol("deny");
|
2020
|
+
sym_xmldecl = runtime.newSymbol("xmldecl");
|
2021
|
+
sym_doctype = runtime.newSymbol("doctype");
|
2022
|
+
sym_procins = runtime.newSymbol("procins");
|
2023
|
+
sym_stag = runtime.newSymbol("stag");
|
2024
|
+
sym_etag = runtime.newSymbol("etag");
|
2025
|
+
sym_emptytag = runtime.newSymbol("emptytag");
|
2026
|
+
sym_allowed = runtime.newSymbol("allowed");
|
2027
|
+
sym_children = runtime.newSymbol("children");
|
2028
|
+
sym_comment = runtime.newSymbol("comment");
|
2029
|
+
sym_cdata = runtime.newSymbol("cdata");
|
2030
|
+
sym_name = runtime.newSymbol("name");
|
2031
|
+
sym_parent = runtime.newSymbol("parent");
|
2032
|
+
sym_raw_attributes = runtime.newSymbol("raw_attributes");
|
2033
|
+
sym_raw_string = runtime.newSymbol("raw_string");
|
2034
|
+
sym_tagno = runtime.newSymbol("tagno");
|
2035
|
+
sym_text = runtime.newSymbol("text");
|
2036
|
+
sym_EMPTY = runtime.newSymbol("EMPTY");
|
2037
|
+
sym_CDATA = runtime.newSymbol("CDATA");
|
1258
2038
|
}
|
1259
|
-
}
|
1260
|
-
mark_tag = 0;
|
1261
|
-
} else {
|
1262
|
-
have = pe - ts;
|
1263
|
-
System.arraycopy(buf,ts,buf,0,have);
|
1264
|
-
SLIDE(tag);
|
1265
|
-
SLIDE(akey);
|
1266
|
-
SLIDE(aval);
|
1267
|
-
te = (te - ts);
|
1268
|
-
ts = 0;
|
1269
2039
|
}
|
1270
|
-
}
|
1271
|
-
return runtime.getNil();
|
1272
|
-
}
|
1273
2040
|
|
1274
|
-
public static
|
1275
|
-
|
1276
|
-
HpricotScanService service = new HpricotScanService();
|
1277
|
-
service.runtime = runtime;
|
1278
|
-
service.xmldecl = runtime.newSymbol("xmldecl");
|
1279
|
-
service.doctype = runtime.newSymbol("doctype");
|
1280
|
-
service.procins = runtime.newSymbol("procins");
|
1281
|
-
service.stag = runtime.newSymbol("stag");
|
1282
|
-
service.etag = runtime.newSymbol("etag");
|
1283
|
-
service.emptytag = runtime.newSymbol("emptytag");
|
1284
|
-
service.comment = runtime.newSymbol("comment");
|
1285
|
-
service.cdata = runtime.newSymbol("cdata");
|
1286
|
-
service.sym_text = runtime.newSymbol("text");
|
1287
|
-
service.block = block;
|
1288
|
-
return service.hpricot_scan(recv, port);
|
1289
|
-
}
|
2041
|
+
public static void Init_hpricot_scan(Ruby runtime) {
|
2042
|
+
Extra x = new Extra(runtime);
|
1290
2043
|
|
2044
|
+
x.mHpricot = runtime.defineModule("Hpricot");
|
2045
|
+
x.mHpricot.dataWrapStruct(x);
|
1291
2046
|
|
1292
|
-
|
1293
|
-
|
1294
|
-
return true;
|
1295
|
-
}
|
2047
|
+
x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
|
2048
|
+
x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
|
1296
2049
|
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
2050
|
+
x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
|
2051
|
+
|
2052
|
+
x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
|
2053
|
+
x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
|
2054
|
+
x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
|
2055
|
+
|
2056
|
+
x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
|
2057
|
+
|
2058
|
+
x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
|
2059
|
+
x.cCData.defineAnnotatedMethods(CData.class);
|
2060
|
+
|
2061
|
+
x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
|
2062
|
+
x.cComment.defineAnnotatedMethods(Comment.class);
|
2063
|
+
|
2064
|
+
x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
|
2065
|
+
x.cDocType.defineAnnotatedMethods(DocType.class);
|
2066
|
+
|
2067
|
+
x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
|
2068
|
+
x.cElem.defineAnnotatedMethods(Elem.class);
|
2069
|
+
|
2070
|
+
x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
|
2071
|
+
x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
|
2072
|
+
|
2073
|
+
x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
|
2074
|
+
x.cText.defineAnnotatedMethods(Text.class);
|
2075
|
+
|
2076
|
+
x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
|
2077
|
+
x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
|
2078
|
+
|
2079
|
+
x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
|
2080
|
+
x.cProcIns.defineAnnotatedMethods(ProcIns.class);
|
2081
|
+
|
2082
|
+
x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
|
2083
|
+
x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
|
2084
|
+
}
|
1305
2085
|
}
|