why-hpricot 0.6.210 → 0.7.229

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,16 @@
1
+ = 0.8
2
+ === 31st March, 2009
3
+ * Saving memory and speed by using RStruct-based elements in the C extension.
4
+ * Bug in tag parsing, causing runaway <script> and <style> tags in HTML.
5
+ * Problem compiling under Ruby 1.9, due to our_rb_hash_lookup function meant for Ruby 1.8.
6
+ * CData was missing inner_text method.
7
+
8
+ = 0.7
9
+ === 17th March, 2009
10
+ * Rewritten parser routine, much lighter on memory, quite a bit faster.
11
+ * Friendlier with Ruby 1.9.
12
+ * Fixes to nth-child and text() selectors.
13
+
1
14
  = 0.6
2
15
  === 15th June, 2007
3
16
  * Hpricot for JRuby -- nice work Ola Bini!
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ RbConfig = Config unless defined?(RbConfig)
10
10
 
11
11
  NAME = "hpricot"
12
12
  REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s
13
- VERS = ENV['VERSION'] || "0.6" + (REV ? ".#{REV}" : "")
13
+ VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "")
14
14
  PKG = "#{NAME}-#{VERS}"
15
15
  BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class}"
16
16
  CLEAN.include ["ext/hpricot_scan/#{BIN}", "ext/fast_xs/#{BIN}", "lib/**/#{BIN}",
@@ -53,7 +53,7 @@ SPEC =
53
53
  end
54
54
 
55
55
  Win32Spec = SPEC.dup
56
- Win32Spec.platform = 'mswin32'
56
+ Win32Spec.platform = 'x86-mswin32'
57
57
  Win32Spec.files = PKG_FILES + ["lib/hpricot_scan.so", "lib/fast_xs.so"]
58
58
  Win32Spec.extensions = []
59
59
 
@@ -68,6 +68,7 @@ task :package => [:clean, :ragel]
68
68
  desc "Releases packages for all Hpricot packages and platforms."
69
69
  task :release => [:package, :package_win32, :package_jruby]
70
70
 
71
+
71
72
  desc "Run all the tests"
72
73
  Rake::TestTask.new do |t|
73
74
  t.libs << "test"
@@ -75,6 +76,8 @@ Rake::TestTask.new do |t|
75
76
  t.verbose = true
76
77
  end
77
78
 
79
+ #task :test => [:hpricot_java] if defined?(JRUBY_VERSION)
80
+
78
81
  Rake::RDocTask.new do |rdoc|
79
82
  rdoc.rdoc_dir = 'doc/rdoc'
80
83
  rdoc.options += RDOC_OPTS
@@ -108,7 +111,7 @@ end
108
111
 
109
112
  file ext_so => ext_files do
110
113
  Dir.chdir(ext) do
111
- sh(RUBY_PLATFORM =~ /win32/ ? 'nmake' : 'make')
114
+ sh(RUBY_PLATFORM =~ /mswin/ ? 'nmake' : 'make')
112
115
  end
113
116
  cp ext_so, "lib"
114
117
  end
@@ -147,7 +150,8 @@ desc "Generates the C scanner code with Ragel."
147
150
  task :ragel => [:ragel_version] do
148
151
  if @ragel_v >= 6.1
149
152
  @ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION]
150
- sh %{cd ext/hpricot_scan; ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c && ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
153
+ console_sep = (ENV['COMSPEC'] =~ /cmd\.exe/) ? '&' : ';'
154
+ sh %{cd ext/hpricot_scan #{console_sep} ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c && ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
151
155
  else
152
156
  STDERR.puts "Ragel 6.1 or greater is required."
153
157
  exit(1)
@@ -160,6 +164,7 @@ desc "Generates the Java scanner code using the Ragel table-driven code generati
160
164
  task :ragel_java => [:ragel_version] do
161
165
  if @ragel_v >= 6.1
162
166
  puts "compiling with ragel version #{@ragel_v}"
167
+ sh %{ragel -J -o ext/hpricot_scan/HpricotCss.java ext/hpricot_scan/hpricot_css.java.rl}
163
168
  sh %{ragel -J -o ext/hpricot_scan/HpricotScanService.java ext/hpricot_scan/hpricot_scan.java.rl}
164
169
  else
165
170
  STDERR.puts "Ragel 6.1 or greater is required."
@@ -180,7 +185,7 @@ task :package_win32 => ["fast_xs_win32", "hpricot_scan_win32"] do
180
185
  Dir.chdir("#{WIN32_PKG_DIR}") do
181
186
  Gem::Builder.new(Win32Spec).build
182
187
  verbose(true) {
183
- mv Dir["*.gem"].first, "../pkg/#{WIN32_PKG_DIR}.gem"
188
+ mv Dir["*.gem"].first, "../pkg/"
184
189
  }
185
190
  end
186
191
  end
@@ -201,20 +206,20 @@ def java_classpath_arg
201
206
  classpath ? "-cp #{classpath}" : ""
202
207
  end
203
208
 
204
- def compile_java(filename, jarname)
205
- sh %{javac -source 1.4 -target 1.4 #{java_classpath_arg} #{filename}}
209
+ def compile_java(filenames, jarname)
210
+ sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}}
206
211
  sh %{jar cf #{jarname} *.class}
207
212
  end
208
213
 
209
214
  task :hpricot_scan_java => [:ragel_java] do
210
215
  Dir.chdir "ext/hpricot_scan" do
211
- compile_java("HpricotScanService.java", "hpricot_scan.jar")
216
+ compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar")
212
217
  end
213
218
  end
214
219
 
215
220
  task :fast_xs_java do
216
221
  Dir.chdir "ext/fast_xs" do
217
- compile_java("FastXsService.java", "fast_xs.jar")
222
+ compile_java(["FastXsService.java"], "fast_xs.jar")
218
223
  end
219
224
  end
220
225
 
@@ -6,6 +6,7 @@
6
6
 
7
7
  #ifndef RARRAY_LEN
8
8
  #define RARRAY_LEN(arr) RARRAY(arr)->len
9
+ #define RARRAY_PTR(arr) RARRAY(arr)->ptr
9
10
  #define RSTRING_LEN(str) RSTRING(str)->len
10
11
  #define RSTRING_PTR(str) RSTRING(str)->ptr
11
12
  #endif
@@ -115,7 +116,7 @@ static long escape(char *buf, int n)
115
116
 
116
117
  if (VALID_VALUE(n)) {
117
118
  /* return snprintf(buf, sizeof("&#1114111;"), "&#%i;", n); */
118
- extern const char ruby_digitmap[];
119
+ RUBY_EXTERN const char ruby_digitmap[];
119
120
  int rv = 3; /* &#; */
120
121
  buf += bytes_for(n);
121
122
  *--buf = ';';
@@ -3,150 +3,517 @@
3
3
  import java.io.IOException;
4
4
 
5
5
  import org.jruby.Ruby;
6
+ import org.jruby.RubyArray;
6
7
  import org.jruby.RubyClass;
7
8
  import org.jruby.RubyHash;
8
9
  import org.jruby.RubyModule;
9
10
  import org.jruby.RubyNumeric;
11
+ import org.jruby.RubyObject;
10
12
  import org.jruby.RubyObjectAdapter;
13
+ import org.jruby.RubyRegexp;
11
14
  import org.jruby.RubyString;
15
+ import org.jruby.anno.JRubyMethod;
16
+ import org.jruby.exceptions.RaiseException;
12
17
  import org.jruby.javasupport.JavaEmbedUtils;
18
+ import org.jruby.runtime.Arity;
13
19
  import org.jruby.runtime.Block;
14
- import org.jruby.runtime.CallbackFactory;
20
+ import org.jruby.runtime.ObjectAllocator;
21
+ import org.jruby.runtime.ThreadContext;
15
22
  import org.jruby.runtime.builtin.IRubyObject;
23
+ import org.jruby.runtime.callback.Callback;
16
24
  import org.jruby.exceptions.RaiseException;
17
25
  import org.jruby.runtime.load.BasicLibraryService;
26
+ import org.jruby.util.ByteList;
18
27
 
19
28
  public class HpricotScanService implements BasicLibraryService {
20
- public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
21
- private static RubyObjectAdapter rubyApi;
22
-
23
- public void ELE(IRubyObject N) {
24
- if (te > ts || text) {
25
- IRubyObject raw_string = runtime.getNil();
26
- ele_open = false; text = false;
27
- if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
28
- raw_string = runtime.newString(new String(buf,ts,te-ts));
29
- }
30
- rb_yield_tokens(N, tag[0], attr, raw_string, taint);
31
- }
32
- }
33
-
34
- public void SET(IRubyObject[] N, int E) {
35
- int mark = 0;
36
- if(N == tag) {
37
- if(mark_tag == -1 || E == mark_tag) {
38
- tag[0] = runtime.newString("");
39
- } else if(E > mark_tag) {
40
- tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
41
- }
42
- } else if(N == akey) {
43
- if(mark_akey == -1 || E == mark_akey) {
44
- akey[0] = runtime.newString("");
45
- } else if(E > mark_akey) {
46
- akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
47
- }
48
- } else if(N == aval) {
49
- if(mark_aval == -1 || E == mark_aval) {
50
- aval[0] = runtime.newString("");
51
- } else if(E > mark_aval) {
52
- aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
53
- }
54
- }
55
- }
56
-
57
- public void CAT(IRubyObject[] N, int E) {
58
- if(N[0].isNil()) {
59
- SET(N,E);
60
- } else {
61
- int mark = 0;
62
- if(N == tag) {
63
- mark = mark_tag;
64
- } else if(N == akey) {
65
- mark = mark_akey;
66
- } else if(N == aval) {
67
- mark = mark_aval;
68
- }
69
- ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
70
- }
71
- }
72
-
73
- public void SLIDE(Object N) {
74
- int mark = 0;
75
- if(N == tag) {
76
- mark = mark_tag;
77
- } else if(N == akey) {
78
- mark = mark_akey;
79
- } else if(N == aval) {
80
- mark = mark_aval;
81
- }
82
- if(mark > ts) {
83
- if(N == tag) {
84
- mark_tag -= ts;
85
- } else if(N == akey) {
86
- mark_akey -= ts;
87
- } else if(N == aval) {
88
- mark_aval -= ts;
89
- }
90
- }
91
- }
92
-
93
- public void ATTR(IRubyObject K, IRubyObject V) {
94
- if(!K.isNil()) {
95
- if(attr.isNil()) {
96
- attr = RubyHash.newHash(runtime);
97
- }
98
- ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V);
99
- // ((RubyHash)attr).aset(K,V);
100
- }
101
- }
102
-
103
- public void ATTR(IRubyObject[] K, IRubyObject V) {
104
- ATTR(K[0],V);
105
- }
106
-
107
- public void ATTR(IRubyObject K, IRubyObject[] V) {
108
- ATTR(K,V[0]);
109
- }
110
-
111
- public void ATTR(IRubyObject[] K, IRubyObject[] V) {
112
- ATTR(K[0],V[0]);
113
- }
114
-
115
- public void TEXT_PASS() {
116
- if(!text) {
117
- if(ele_open) {
118
- ele_open = false;
119
- if(ts > -1) {
120
- mark_tag = ts;
121
- }
122
- } else {
123
- mark_tag = p;
124
- }
125
- attr = runtime.getNil();
126
- tag[0] = runtime.getNil();
127
- text = true;
128
- }
129
- }
130
-
131
- public void EBLK(IRubyObject N, int T) {
132
- CAT(tag, p - T + 1);
133
- ELE(N);
134
- }
135
-
136
-
137
- public void rb_raise(RubyClass error, String message) {
138
- throw new RaiseException(runtime, error, message, true);
139
- }
140
-
141
- public IRubyObject rb_str_new2(String s) {
142
- return runtime.newString(s);
143
- }
144
-
145
- // line 189 "ext/hpricot_scan/hpricot_scan.java.rl"
146
-
147
-
148
-
149
- // line 150 "ext/hpricot_scan/HpricotScanService.java"
29
+ public static byte[] realloc(byte[] input, int size) {
30
+ byte[] newArray = new byte[size];
31
+ System.arraycopy(input, 0, newArray, 0, input.length);
32
+ return newArray;
33
+ }
34
+
35
+ // hpricot_state
36
+ public static class State {
37
+ public IRubyObject doc;
38
+ public IRubyObject focus;
39
+ public IRubyObject last;
40
+ public IRubyObject EC;
41
+ public boolean xml, strict, fixup;
42
+ }
43
+
44
+ static boolean OPT(IRubyObject opts, String key) {
45
+ Ruby runtime = opts.getRuntime();
46
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
47
+ }
48
+
49
+ // H_PROP(name, H_ELE_TAG)
50
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
51
+ H_ELE_SET(self, H_ELE_TAG, x);
52
+ return self;
53
+ }
54
+
55
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
56
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
57
+ return self.getRuntime().getTrue();
58
+ }
59
+
60
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
61
+ return H_ELE_GET(self, H_ELE_TAG);
62
+ }
63
+
64
+ // H_PROP(raw, H_ELE_RAW)
65
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
66
+ H_ELE_SET(self, H_ELE_RAW, x);
67
+ return self;
68
+ }
69
+
70
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
71
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
72
+ return self.getRuntime().getTrue();
73
+ }
74
+
75
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
76
+ return H_ELE_GET(self, H_ELE_RAW);
77
+ }
78
+
79
+ // H_PROP(parent, H_ELE_PARENT)
80
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
81
+ H_ELE_SET(self, H_ELE_PARENT, x);
82
+ return self;
83
+ }
84
+
85
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
86
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
87
+ return self.getRuntime().getTrue();
88
+ }
89
+
90
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
91
+ return H_ELE_GET(self, H_ELE_PARENT);
92
+ }
93
+
94
+ // H_PROP(attr, H_ELE_ATTR)
95
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
96
+ H_ELE_SET(self, H_ELE_ATTR, x);
97
+ return self;
98
+ }
99
+
100
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
101
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
102
+ return self.getRuntime().getTrue();
103
+ }
104
+
105
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
106
+ return H_ELE_GET(self, H_ELE_ATTR);
107
+ }
108
+
109
+ // H_PROP(etag, H_ELE_ETAG)
110
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
111
+ H_ELE_SET(self, H_ELE_ETAG, x);
112
+ return self;
113
+ }
114
+
115
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
116
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
117
+ return self.getRuntime().getTrue();
118
+ }
119
+
120
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
121
+ return H_ELE_GET(self, H_ELE_ETAG);
122
+ }
123
+
124
+ // H_PROP(children, H_ELE_CHILDREN)
125
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
126
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
127
+ return self;
128
+ }
129
+
130
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
131
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
132
+ return self.getRuntime().getTrue();
133
+ }
134
+
135
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
136
+ return H_ELE_GET(self, H_ELE_CHILDREN);
137
+ }
138
+
139
+ // H_ATTR(target)
140
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
141
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
142
+ return self;
143
+ }
144
+
145
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
146
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
147
+ }
148
+
149
+ // H_ATTR(encoding)
150
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
151
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
152
+ return self;
153
+ }
154
+
155
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
156
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
157
+ }
158
+
159
+ // H_ATTR(version)
160
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
161
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
162
+ return self;
163
+ }
164
+
165
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
166
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
167
+ }
168
+
169
+ // H_ATTR(standalone)
170
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
171
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
172
+ return self;
173
+ }
174
+
175
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
176
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
177
+ }
178
+
179
+ // H_ATTR(system_id)
180
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
181
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
182
+ return self;
183
+ }
184
+
185
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
186
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
187
+ }
188
+
189
+ // H_ATTR(public_id)
190
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
191
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
192
+ return self;
193
+ }
194
+
195
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
196
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
197
+ }
198
+
199
+ public static class Scanner {
200
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
201
+ if(mark == -1 || E == mark) {
202
+ return runtime.newString("");
203
+ } else if(E > mark) {
204
+ return RubyString.newString(runtime, data, mark, E-mark);
205
+ } else {
206
+ return org;
207
+ }
208
+ }
209
+
210
+ public int SLIDE(int N) {
211
+ if(N > ts) {
212
+ return N - ts;
213
+ } else {
214
+ return N;
215
+ }
216
+ }
217
+
218
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
219
+ if(N.isNil()) {
220
+ return SET(mark, E, N);
221
+ } else {
222
+ ((RubyString)N).cat(data, mark, E-mark);
223
+ return N;
224
+ }
225
+ }
226
+
227
+ public void ATTR(IRubyObject K, IRubyObject V) {
228
+ if(!K.isNil()) {
229
+ if(attr.isNil()) {
230
+ attr = RubyHash.newHash(runtime);
231
+ }
232
+ ((RubyHash)attr).fastASet(K, V);
233
+ }
234
+ }
235
+
236
+ public void TEXT_PASS() {
237
+ if(!text) {
238
+ if(ele_open) {
239
+ ele_open = false;
240
+ if(ts != -1) {
241
+ mark_tag = ts;
242
+ }
243
+ } else {
244
+ mark_tag = p;
245
+ }
246
+ attr = runtime.getNil();
247
+ tag = runtime.getNil();
248
+ text = true;
249
+ }
250
+ }
251
+
252
+ public void ELE(IRubyObject N) {
253
+ if(te > ts || text) {
254
+ int raw = -1;
255
+ int rawlen = 0;
256
+ ele_open = false;
257
+ text = false;
258
+
259
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
260
+ raw = ts;
261
+ rawlen = te - ts;
262
+ }
263
+
264
+ if(block.isGiven()) {
265
+ IRubyObject raw_string = runtime.getNil();
266
+ if(raw != -1) {
267
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
268
+ }
269
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
270
+ } else {
271
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
272
+ }
273
+ }
274
+ }
275
+
276
+
277
+ public void EBLK(IRubyObject N, int T) {
278
+ tag = CAT(tag, mark_tag, p - T + 1);
279
+ ELE(N);
280
+ }
281
+
282
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
283
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
284
+ if(children.isNil()) {
285
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
286
+ }
287
+ ((RubyArray)children).append(ele);
288
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
289
+ }
290
+
291
+ private static class TokenInfo {
292
+ public IRubyObject sym;
293
+ public IRubyObject tag;
294
+ public IRubyObject attr;
295
+ public int raw;
296
+ public int rawlen;
297
+ public IRubyObject ec;
298
+ public IRubyObject ele;
299
+ public Extra x;
300
+ public Ruby runtime;
301
+ public Scanner scanner;
302
+ public State S;
303
+
304
+ public void H_ELE(RubyClass klass) {
305
+ ele = klass.allocate();
306
+ if(klass == x.cElem) {
307
+ H_ELE_SET(ele, H_ELE_TAG, tag);
308
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
309
+ H_ELE_SET(ele, H_ELE_EC, ec);
310
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
311
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
312
+ }
313
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
314
+ if(klass == x.cBogusETag) {
315
+ H_ELE_SET(ele, H_ELE_TAG, tag);
316
+ if(raw != -1) {
317
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
318
+ }
319
+ } else {
320
+ if(klass == x.cDocType) {
321
+ scanner.ATTR(runtime.newSymbol("target"), tag);
322
+ }
323
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
324
+ if(klass != x.cProcIns) {
325
+ tag = runtime.getNil();
326
+ if(raw != -1) {
327
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
328
+ }
329
+ }
330
+ H_ELE_SET(ele, H_ELE_TAG, tag);
331
+ }
332
+ } else {
333
+ H_ELE_SET(ele, H_ELE_TAG, tag);
334
+ }
335
+ S.last = ele;
336
+ }
337
+
338
+ public void hpricotToken(boolean taint) {
339
+ //
340
+ // in html mode, fix up start tags incorrectly formed as empty tags
341
+ //
342
+ if(!S.xml) {
343
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
344
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
345
+ if(ec.isNil()) {
346
+ tag = tag.callMethod(scanner.ctx, "downcase");
347
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
348
+ }
349
+ }
350
+
351
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
352
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
353
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
354
+ sym = x.sym_text;
355
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
356
+ }
357
+
358
+ if(!ec.isNil()) {
359
+ if(sym == x.sym_emptytag) {
360
+ if(ec != x.sym_EMPTY) {
361
+ sym = x.sym_stag;
362
+ }
363
+ } else if(sym == x.sym_stag) {
364
+ if(ec == x.sym_EMPTY) {
365
+ sym = x.sym_emptytag;
366
+ }
367
+ }
368
+ }
369
+ }
370
+
371
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
372
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
373
+ H_ELE(x.cElem);
374
+ H_ELE_SET(ele, H_ELE_HASH, name);
375
+
376
+ if(!S.xml) {
377
+ IRubyObject match = runtime.getNil(), e = S.focus;
378
+ while(e != S.doc) {
379
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
380
+ if(hEC instanceof RubyHash) {
381
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
382
+ if(!has.isNil()) {
383
+ if(has == runtime.getTrue()) {
384
+ if(match.isNil()) {
385
+ match = e;
386
+ }
387
+ } else if(has == x.symAllow) {
388
+ match = S.focus;
389
+ } else if(has == x.symDeny) {
390
+ match = runtime.getNil();
391
+ }
392
+ }
393
+ }
394
+ e = H_ELE_GET(e, H_ELE_PARENT);
395
+ }
396
+
397
+ if(match.isNil()) {
398
+ match = S.focus;
399
+ }
400
+ S.focus = match;
401
+ }
402
+
403
+ scanner.hpricotAdd(S.focus, ele);
404
+
405
+ //
406
+ // in the case of a start tag that should be empty, just
407
+ // skip the step that focuses the element. focusing moves
408
+ // us deeper into the document.
409
+ //
410
+ if(sym == x.sym_stag) {
411
+ if(S.xml || ec != x.sym_EMPTY) {
412
+ S.focus = ele;
413
+ S.last = runtime.getNil();
414
+ }
415
+ }
416
+ } else if(sym == x.sym_etag) {
417
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
418
+ if(S.strict) {
419
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
420
+ tag = runtime.newString("div");
421
+ }
422
+ }
423
+
424
+ name = runtime.newFixnum(tag.hashCode());
425
+ while(e != S.doc) {
426
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
427
+ match = e;
428
+ break;
429
+ }
430
+ e = H_ELE_GET(e, H_ELE_PARENT);
431
+
432
+ }
433
+ if(match.isNil()) {
434
+ H_ELE(x.cBogusETag);
435
+ scanner.hpricotAdd(S.focus, ele);
436
+ } else {
437
+ ele = runtime.getNil();
438
+ if(raw != -1) {
439
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
440
+ }
441
+ H_ELE_SET(match, H_ELE_ETAG, ele);
442
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
443
+ S.last = runtime.getNil();
444
+
445
+ }
446
+ } else if(sym == x.sym_cdata) {
447
+ H_ELE(x.cCData);
448
+ scanner.hpricotAdd(S.focus, ele);
449
+ } else if(sym == x.sym_comment) {
450
+ H_ELE(x.cComment);
451
+ scanner.hpricotAdd(S.focus, ele);
452
+ } else if(sym == x.sym_doctype) {
453
+ H_ELE(x.cDocType);
454
+ if(S.strict) {
455
+ RubyHash h = (RubyHash)attr;
456
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
457
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
458
+ }
459
+ scanner.hpricotAdd(S.focus, ele);
460
+ } else if(sym == x.sym_procins) {
461
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
462
+ tag = RubyRegexp.nth_match(1, match);
463
+ attr = RubyRegexp.nth_match(2, match);
464
+ H_ELE(x.cProcIns);
465
+ scanner.hpricotAdd(S.focus, ele);
466
+ } else if(sym == x.sym_text) {
467
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
468
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
469
+ } else {
470
+ H_ELE(x.cText);
471
+ scanner.hpricotAdd(S.focus, ele);
472
+ }
473
+ } else if(sym == x.sym_xmldecl) {
474
+ H_ELE(x.cXMLDecl);
475
+ scanner.hpricotAdd(S.focus, ele);
476
+ }
477
+ }
478
+ }
479
+
480
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
481
+ TokenInfo t = new TokenInfo();
482
+ t.sym = _sym;
483
+ t.tag = _tag;
484
+ t.attr = _attr;
485
+ t.raw = _raw;
486
+ t.rawlen = _rawlen;
487
+ t.ec = runtime.getNil();
488
+ t.ele = runtime.getNil();
489
+ t.x = x;
490
+ t.runtime = runtime;
491
+ t.scanner = this;
492
+ t.S = S;
493
+
494
+ t.hpricotToken(taint);
495
+ }
496
+
497
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
498
+ if(sym == x.sym_text) {
499
+ raw = tag;
500
+ }
501
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
502
+ if(taint) {
503
+ ary.setTaint(true);
504
+ tag.setTaint(true);
505
+ attr.setTaint(true);
506
+ raw.setTaint(true);
507
+ }
508
+
509
+ block.yield(ctx, ary);
510
+ }
511
+
512
+ // line 561 "ext/hpricot_scan/hpricot_scan.java.rl"
513
+
514
+
515
+
516
+ // line 517 "ext/hpricot_scan/HpricotScanService.java"
150
517
  private static byte[] init__hpricot_scan_actions_0()
151
518
  {
152
519
  return new byte [] {
@@ -752,121 +1119,166 @@ static final int hpricot_scan_en_html_cdata = 216;
752
1119
  static final int hpricot_scan_en_html_procins = 218;
753
1120
  static final int hpricot_scan_en_main = 204;
754
1121
 
755
- // line 192 "ext/hpricot_scan/hpricot_scan.java.rl"
1122
+ // line 564 "ext/hpricot_scan/hpricot_scan.java.rl"
756
1123
 
757
- public final static int BUFSIZE=16384;
1124
+ public final static int BUFSIZE = 16384;
758
1125
 
759
- private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
760
- IRubyObject ary;
761
- if (sym == runtime.newSymbol("text")) {
762
- raw = tag;
763
- }
764
- ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
765
- if (taint) {
766
- ary.setTaint(true);
767
- tag.setTaint(true);
768
- attr.setTaint(true);
769
- raw.setTaint(true);
770
- }
771
- block.yield(runtime.getCurrentContext(), ary, null, null, false);
772
- }
773
1126
 
1127
+ private int cs, act, have = 0, nread = 0, curline = 1;
1128
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
1129
+ private byte[] data;
1130
+ private State S = null;
1131
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
1132
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
1133
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
1134
+ private int buffer_size = 0;
774
1135
 
775
- int cs, act, have = 0, nread = 0, curline = 1, p=-1;
776
- boolean text = false;
777
- int ts=-1, te;
778
- int eof=-1;
779
- char[] buf;
780
- Ruby runtime;
781
- IRubyObject attr, bufsize;
782
- IRubyObject[] tag, akey, aval;
783
- int mark_tag, mark_akey, mark_aval;
784
- boolean done = false, ele_open = false;
785
- int buffer_size = 0;
786
- boolean taint = false;
787
- Block block = null;
788
-
789
-
790
- IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
791
- cdata, sym_text;
792
-
793
- IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
794
- attr = bufsize = runtime.getNil();
795
- tag = new IRubyObject[]{runtime.getNil()};
796
- akey = new IRubyObject[]{runtime.getNil()};
797
- aval = new IRubyObject[]{runtime.getNil()};
798
-
799
- RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
800
-
801
- taint = port.isTaint();
802
- if ( !port.respondsTo("read")) {
803
- if ( port.respondsTo("to_str")) {
804
- port = port.callMethod(runtime.getCurrentContext(),"to_str");
805
- } else {
806
- throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
807
- }
808
- }
1136
+ private Extra x;
809
1137
 
810
- buffer_size = BUFSIZE;
811
- if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
812
- bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
813
- if (!bufsize.isNil()) {
814
- buffer_size = RubyNumeric.fix2int(bufsize);
815
- }
816
- }
817
- buf = new char[buffer_size];
1138
+ private IRubyObject self;
1139
+ private Ruby runtime;
1140
+ private ThreadContext ctx;
1141
+ private Block block;
1142
+
1143
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
1144
+
1145
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
1146
+ return new RaiseException(runtime, exceptionClass, message, true);
1147
+ }
818
1148
 
819
-
820
- // line 821 "ext/hpricot_scan/HpricotScanService.java"
1149
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
1150
+ this.self = self;
1151
+ this.runtime = self.getRuntime();
1152
+ this.ctx = runtime.getCurrentContext();
1153
+ this.block = block;
1154
+ attr = runtime.getNil();
1155
+ tag = runtime.getNil();
1156
+ akey = runtime.getNil();
1157
+ aval = runtime.getNil();
1158
+ bufsize = runtime.getNil();
1159
+
1160
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
1161
+
1162
+ this.xmldecl = x.sym_xmldecl;
1163
+ this.doctype = x.sym_doctype;
1164
+ this.stag = x.sym_stag;
1165
+ this.etag = x.sym_etag;
1166
+ this.emptytag = x.sym_emptytag;
1167
+ this.comment = x.sym_comment;
1168
+ this.cdata = x.sym_cdata;
1169
+ this.procins = x.sym_procins;
1170
+
1171
+ port = args[0];
1172
+ if(args.length == 2) {
1173
+ opts = args[1];
1174
+ } else {
1175
+ opts = runtime.getNil();
1176
+ }
1177
+
1178
+ taint = port.isTaint();
1179
+ io = port.respondsTo("read");
1180
+ if(!io) {
1181
+ if(port.respondsTo("to_str")) {
1182
+ port = port.callMethod(ctx, "to_str");
1183
+ port = port.convertToString();
1184
+ } else {
1185
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
1186
+ }
1187
+ }
1188
+
1189
+ if(!(opts instanceof RubyHash)) {
1190
+ opts = runtime.getNil();
1191
+ }
1192
+
1193
+ if(!block.isGiven()) {
1194
+ S = new State();
1195
+ S.doc = x.cDoc.allocate();
1196
+ S.focus = S.doc;
1197
+ S.last = runtime.getNil();
1198
+ S.xml = OPT(opts, "xml");
1199
+ S.strict = OPT(opts, "xhtml_strict");
1200
+ S.fixup = OPT(opts, "fixup_tags");
1201
+ if(S.strict) {
1202
+ S.fixup = true;
1203
+ }
1204
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
1205
+ S.EC = x.mHpricot.getConstant("ElementContent");
1206
+ }
1207
+
1208
+ buffer_size = BUFSIZE;
1209
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
1210
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
1211
+ if(!bufsize.isNil()) {
1212
+ buffer_size = RubyNumeric.fix2int(bufsize);
1213
+ }
1214
+ }
1215
+
1216
+ if(io) {
1217
+ buf = 0;
1218
+ data = new byte[buffer_size];
1219
+ }
1220
+ }
1221
+
1222
+ private int len, space;
1223
+ // hpricot_scan
1224
+ public IRubyObject scan() {
1225
+
1226
+ // line 1227 "ext/hpricot_scan/HpricotScanService.java"
821
1227
  {
822
1228
  cs = hpricot_scan_start;
823
1229
  ts = -1;
824
1230
  te = -1;
825
1231
  act = 0;
826
1232
  }
827
- // line 256 "ext/hpricot_scan/hpricot_scan.java.rl"
828
-
829
- while( !done ) {
830
- IRubyObject str;
831
- p = have;
832
- int pe;
833
- int len, space = buffer_size - have;
834
-
835
- if ( space == 0 ) {
836
- /* We've used up the entire buffer storing an already-parsed token
837
- * prefix that must be preserved. Likely caused by super-long attributes.
838
- * See ticket #13. */
839
- buffer_size += BUFSIZE;
840
- char[] new_buf = new char[buffer_size];
841
- System.arraycopy(buf, 0, new_buf, 0, buf.length);
842
- buf = new_buf;
843
- space = buffer_size - have;
844
- }
845
-
846
- if (port.respondsTo("read")) {
847
- str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
848
- } else {
849
- str = ((RubyString)port).substr(nread,space);
850
- }
851
-
852
- str = str.convertToString();
853
- String sss = str.toString();
854
- char[] chars = sss.toCharArray();
855
- System.arraycopy(chars,0,buf,p,chars.length);
856
-
857
- len = sss.length();
858
- nread += len;
859
-
860
- if ( len < space ) {
861
- len++;
862
- done = true;
863
- }
864
-
865
- pe = p + len;
866
- char[] data = buf;
867
-
868
-
869
- // line 870 "ext/hpricot_scan/HpricotScanService.java"
1233
+ // line 667 "ext/hpricot_scan/hpricot_scan.java.rl"
1234
+ while(!done) {
1235
+ p = pe = len = buf;
1236
+ space = buffer_size - have;
1237
+
1238
+ if(io) {
1239
+ if(space == 0) {
1240
+ /* We've used up the entire buffer storing an already-parsed token
1241
+ * prefix that must be preserved. Likely caused by super-long attributes.
1242
+ * Increase buffer size and continue */
1243
+ buffer_size += BUFSIZE;
1244
+ data = realloc(data, buffer_size);
1245
+ space = buffer_size - have;
1246
+ }
1247
+
1248
+ p = have;
1249
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
1250
+ ByteList bl = str.convertToString().getByteList();
1251
+ len = bl.realSize;
1252
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
1253
+ } else {
1254
+ ByteList bl = port.convertToString().getByteList();
1255
+ data = bl.bytes;
1256
+ buf = bl.begin;
1257
+ p = bl.begin;
1258
+ len = bl.realSize + 1;
1259
+ if(p + len >= data.length) {
1260
+ data = new byte[len];
1261
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
1262
+ p = 0;
1263
+ buf = 0;
1264
+ }
1265
+ done = true;
1266
+ eof = p + len;
1267
+ }
1268
+
1269
+ nread += len;
1270
+
1271
+ /* If this is the last buffer, tack on an EOF. */
1272
+ if(io && len < space) {
1273
+ data[p + len++] = 0;
1274
+ eof = p + len;
1275
+ done = true;
1276
+ }
1277
+
1278
+ pe = p + len;
1279
+
1280
+
1281
+ // line 1282 "ext/hpricot_scan/HpricotScanService.java"
870
1282
  {
871
1283
  int _klen;
872
1284
  int _trans = 0;
@@ -891,7 +1303,7 @@ case 1:
891
1303
  // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
892
1304
  {ts = p;}
893
1305
  break;
894
- // line 895 "ext/hpricot_scan/HpricotScanService.java"
1306
+ // line 1307 "ext/hpricot_scan/HpricotScanService.java"
895
1307
  }
896
1308
  }
897
1309
 
@@ -956,83 +1368,89 @@ case 3:
956
1368
  switch ( _hpricot_scan_actions[_acts++] )
957
1369
  {
958
1370
  case 0:
959
- // line 147 "ext/hpricot_scan/hpricot_scan.java.rl"
1371
+ // line 514 "ext/hpricot_scan/hpricot_scan.java.rl"
960
1372
  {
961
- if (text) {
962
- CAT(tag, p);
963
- ELE(sym_text);
964
- text = false;
1373
+ if(text) {
1374
+ tag = CAT(tag, mark_tag, p);
1375
+ ELE(x.sym_text);
1376
+ text = false;
965
1377
  }
966
1378
  attr = runtime.getNil();
967
- tag[0] = runtime.getNil();
1379
+ tag = runtime.getNil();
968
1380
  mark_tag = -1;
969
1381
  ele_open = true;
970
1382
  }
971
1383
  break;
972
1384
  case 1:
973
- // line 159 "ext/hpricot_scan/hpricot_scan.java.rl"
1385
+ // line 526 "ext/hpricot_scan/hpricot_scan.java.rl"
974
1386
  { mark_tag = p; }
975
1387
  break;
976
1388
  case 2:
977
- // line 160 "ext/hpricot_scan/hpricot_scan.java.rl"
1389
+ // line 527 "ext/hpricot_scan/hpricot_scan.java.rl"
978
1390
  { mark_aval = p; }
979
1391
  break;
980
1392
  case 3:
981
- // line 161 "ext/hpricot_scan/hpricot_scan.java.rl"
1393
+ // line 528 "ext/hpricot_scan/hpricot_scan.java.rl"
982
1394
  { mark_akey = p; }
983
1395
  break;
984
1396
  case 4:
985
- // line 162 "ext/hpricot_scan/hpricot_scan.java.rl"
986
- { SET(tag, p); }
1397
+ // line 529 "ext/hpricot_scan/hpricot_scan.java.rl"
1398
+ { tag = SET(mark_tag, p, tag); }
987
1399
  break;
988
1400
  case 5:
989
- // line 164 "ext/hpricot_scan/hpricot_scan.java.rl"
990
- { SET(aval, p); }
1401
+ // line 531 "ext/hpricot_scan/hpricot_scan.java.rl"
1402
+ { aval = SET(mark_aval, p, aval); }
991
1403
  break;
992
1404
  case 6:
993
- // line 165 "ext/hpricot_scan/hpricot_scan.java.rl"
994
- {
995
- if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
996
- else { SET(aval, p); }
1405
+ // line 532 "ext/hpricot_scan/hpricot_scan.java.rl"
1406
+ {
1407
+ if(data[p-1] == '"' || data[p-1] == '\'') {
1408
+ aval = SET(mark_aval, p-1, aval);
1409
+ } else {
1410
+ aval = SET(mark_aval, p, aval);
1411
+ }
997
1412
  }
998
1413
  break;
999
1414
  case 7:
1000
- // line 169 "ext/hpricot_scan/hpricot_scan.java.rl"
1001
- { SET(akey, p); }
1415
+ // line 539 "ext/hpricot_scan/hpricot_scan.java.rl"
1416
+ { akey = SET(mark_akey, p, akey); }
1002
1417
  break;
1003
1418
  case 8:
1004
- // line 170 "ext/hpricot_scan/hpricot_scan.java.rl"
1005
- { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
1419
+ // line 540 "ext/hpricot_scan/hpricot_scan.java.rl"
1420
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
1006
1421
  break;
1007
1422
  case 9:
1008
- // line 171 "ext/hpricot_scan/hpricot_scan.java.rl"
1009
- { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
1423
+ // line 541 "ext/hpricot_scan/hpricot_scan.java.rl"
1424
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
1010
1425
  break;
1011
1426
  case 10:
1012
- // line 172 "ext/hpricot_scan/hpricot_scan.java.rl"
1013
- { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
1427
+ // line 542 "ext/hpricot_scan/hpricot_scan.java.rl"
1428
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
1014
1429
  break;
1015
1430
  case 11:
1016
- // line 173 "ext/hpricot_scan/hpricot_scan.java.rl"
1017
- { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
1431
+ // line 543 "ext/hpricot_scan/hpricot_scan.java.rl"
1432
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
1018
1433
  break;
1019
1434
  case 12:
1020
- // line 174 "ext/hpricot_scan/hpricot_scan.java.rl"
1021
- { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
1435
+ // line 544 "ext/hpricot_scan/hpricot_scan.java.rl"
1436
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
1022
1437
  break;
1023
1438
  case 13:
1024
- // line 176 "ext/hpricot_scan/hpricot_scan.java.rl"
1025
- {
1026
- akey[0] = runtime.getNil();
1027
- aval[0] = runtime.getNil();
1028
- mark_akey = -1;
1029
- mark_aval = -1;
1439
+ // line 546 "ext/hpricot_scan/hpricot_scan.java.rl"
1440
+ {
1441
+ akey = runtime.getNil();
1442
+ aval = runtime.getNil();
1443
+ mark_akey = -1;
1444
+ mark_aval = -1;
1030
1445
  }
1031
1446
  break;
1032
1447
  case 14:
1033
- // line 183 "ext/hpricot_scan/hpricot_scan.java.rl"
1034
- {
1035
- ATTR(akey, aval);
1448
+ // line 553 "ext/hpricot_scan/hpricot_scan.java.rl"
1449
+ {
1450
+ if(!S.xml) {
1451
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
1452
+ }
1453
+ ATTR(akey, aval);
1036
1454
  }
1037
1455
  break;
1038
1456
  case 15:
@@ -1189,7 +1607,7 @@ case 3:
1189
1607
  }
1190
1608
  }
1191
1609
  break;
1192
- // line 1193 "ext/hpricot_scan/HpricotScanService.java"
1610
+ // line 1611 "ext/hpricot_scan/HpricotScanService.java"
1193
1611
  }
1194
1612
  }
1195
1613
  }
@@ -1203,7 +1621,7 @@ case 2:
1203
1621
  // line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
1204
1622
  {ts = -1;}
1205
1623
  break;
1206
- // line 1207 "ext/hpricot_scan/HpricotScanService.java"
1624
+ // line 1625 "ext/hpricot_scan/HpricotScanService.java"
1207
1625
  }
1208
1626
  }
1209
1627
 
@@ -1225,81 +1643,443 @@ case 5:
1225
1643
  }
1226
1644
  break; }
1227
1645
  }
1228
- // line 297 "ext/hpricot_scan/hpricot_scan.java.rl"
1229
-
1230
- if ( cs == hpricot_scan_error ) {
1231
- if(!tag[0].isNil()) {
1232
- rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1233
- } else {
1234
- rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1235
- }
1646
+ // line 714 "ext/hpricot_scan/hpricot_scan.java.rl"
1647
+
1648
+ if(cs == hpricot_scan_error) {
1649
+ if(!tag.isNil()) {
1650
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
1651
+ } else {
1652
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
1653
+ }
1654
+ }
1655
+
1656
+ if(done && ele_open) {
1657
+ ele_open = false;
1658
+ if(ts > 0) {
1659
+ mark_tag = ts;
1660
+ ts = 0;
1661
+ text = true;
1662
+ }
1663
+ }
1664
+
1665
+ if(ts == -1) {
1666
+ have = 0;
1667
+ if(mark_tag != -1 && text) {
1668
+ if(done) {
1669
+ if(mark_tag < p - 1) {
1670
+ tag = CAT(tag, mark_tag, p-1);
1671
+ ELE(x.sym_text);
1672
+ }
1673
+ } else {
1674
+ tag = CAT(tag, mark_tag, p);
1675
+ }
1676
+ }
1677
+ if(io) {
1678
+ mark_tag = 0;
1679
+ } else {
1680
+ mark_tag = ((RubyString)port).getByteList().begin;
1681
+ }
1682
+ } else if(io) {
1683
+ have = pe - ts;
1684
+ System.arraycopy(data, ts, data, buf, have);
1685
+ mark_tag = SLIDE(mark_tag);
1686
+ mark_akey = SLIDE(mark_akey);
1687
+ mark_aval = SLIDE(mark_aval);
1688
+ te -= ts;
1689
+ ts = 0;
1690
+ }
1691
+ }
1692
+
1693
+ if(S != null) {
1694
+ return S.doc;
1695
+ }
1696
+
1697
+ return runtime.getNil();
1698
+ }
1699
+ }
1700
+
1701
+ public static class HpricotModule {
1702
+ // hpricot_scan
1703
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
1704
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
1705
+ return new Scanner(self, args, block).scan();
1706
+ }
1707
+
1708
+ // hpricot_css
1709
+ @JRubyMethod(module = true)
1710
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
1711
+ return new HpricotCss(self, mod, str, node).scan();
1712
+ }
1713
+ }
1714
+
1715
+ public static class CData {
1716
+ @JRubyMethod
1717
+ public static IRubyObject content(IRubyObject self) {
1718
+ return hpricot_ele_get_name(self);
1719
+ }
1720
+
1721
+ @JRubyMethod(name = "content=")
1722
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1723
+ return hpricot_ele_set_name(self, value);
1724
+ }
1725
+ }
1726
+
1727
+ public static class Comment {
1728
+ @JRubyMethod
1729
+ public static IRubyObject content(IRubyObject self) {
1730
+ return hpricot_ele_get_name(self);
1731
+ }
1732
+
1733
+ @JRubyMethod(name = "content=")
1734
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1735
+ return hpricot_ele_set_name(self, value);
1736
+ }
1737
+ }
1738
+
1739
+ public static class DocType {
1740
+ @JRubyMethod
1741
+ public static IRubyObject raw_string(IRubyObject self) {
1742
+ return hpricot_ele_get_name(self);
1743
+ }
1744
+
1745
+ @JRubyMethod
1746
+ public static IRubyObject clear_raw(IRubyObject self) {
1747
+ return hpricot_ele_clear_name(self);
1748
+ }
1749
+
1750
+ @JRubyMethod
1751
+ public static IRubyObject target(IRubyObject self) {
1752
+ return hpricot_ele_get_target(self);
1753
+ }
1754
+
1755
+ @JRubyMethod(name = "target=")
1756
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
1757
+ return hpricot_ele_set_target(self, value);
1758
+ }
1759
+
1760
+ @JRubyMethod
1761
+ public static IRubyObject public_id(IRubyObject self) {
1762
+ return hpricot_ele_get_public_id(self);
1763
+ }
1764
+
1765
+ @JRubyMethod(name = "public_id=")
1766
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
1767
+ return hpricot_ele_set_public_id(self, value);
1768
+ }
1769
+
1770
+ @JRubyMethod
1771
+ public static IRubyObject system_id(IRubyObject self) {
1772
+ return hpricot_ele_get_system_id(self);
1773
+ }
1774
+
1775
+ @JRubyMethod(name = "system_id=")
1776
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
1777
+ return hpricot_ele_set_system_id(self, value);
1778
+ }
1779
+ }
1780
+
1781
+ public static class Elem {
1782
+ @JRubyMethod
1783
+ public static IRubyObject clear_raw(IRubyObject self) {
1784
+ return hpricot_ele_clear_raw(self);
1785
+ }
1786
+ }
1787
+
1788
+ public static class BogusETag {
1789
+ @JRubyMethod
1790
+ public static IRubyObject raw_string(IRubyObject self) {
1791
+ return hpricot_ele_get_attr(self);
1792
+ }
1793
+
1794
+ @JRubyMethod
1795
+ public static IRubyObject clear_raw(IRubyObject self) {
1796
+ return hpricot_ele_clear_attr(self);
1797
+ }
1798
+ }
1799
+
1800
+ public static class Text {
1801
+ @JRubyMethod
1802
+ public static IRubyObject raw_string(IRubyObject self) {
1803
+ return hpricot_ele_get_name(self);
1804
+ }
1805
+
1806
+ @JRubyMethod
1807
+ public static IRubyObject clear_raw(IRubyObject self) {
1808
+ return hpricot_ele_clear_name(self);
1809
+ }
1810
+
1811
+ @JRubyMethod
1812
+ public static IRubyObject content(IRubyObject self) {
1813
+ return hpricot_ele_get_name(self);
1814
+ }
1815
+
1816
+ @JRubyMethod(name = "content=")
1817
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1818
+ return hpricot_ele_set_name(self, value);
1819
+ }
1820
+ }
1821
+
1822
+ public static class XMLDecl {
1823
+ @JRubyMethod
1824
+ public static IRubyObject raw_string(IRubyObject self) {
1825
+ return hpricot_ele_get_name(self);
1826
+ }
1827
+
1828
+ @JRubyMethod
1829
+ public static IRubyObject clear_raw(IRubyObject self) {
1830
+ return hpricot_ele_clear_name(self);
1831
+ }
1832
+
1833
+ @JRubyMethod
1834
+ public static IRubyObject encoding(IRubyObject self) {
1835
+ return hpricot_ele_get_encoding(self);
1836
+ }
1837
+
1838
+ @JRubyMethod(name = "encoding=")
1839
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
1840
+ return hpricot_ele_set_encoding(self, value);
1841
+ }
1842
+
1843
+ @JRubyMethod
1844
+ public static IRubyObject standalone(IRubyObject self) {
1845
+ return hpricot_ele_get_standalone(self);
1846
+ }
1847
+
1848
+ @JRubyMethod(name = "standalone=")
1849
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
1850
+ return hpricot_ele_set_standalone(self, value);
1851
+ }
1852
+
1853
+ @JRubyMethod
1854
+ public static IRubyObject version(IRubyObject self) {
1855
+ return hpricot_ele_get_version(self);
1856
+ }
1857
+
1858
+ @JRubyMethod(name = "version=")
1859
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
1860
+ return hpricot_ele_set_version(self, value);
1861
+ }
1862
+ }
1863
+
1864
+ public static class ProcIns {
1865
+ @JRubyMethod
1866
+ public static IRubyObject target(IRubyObject self) {
1867
+ return hpricot_ele_get_name(self);
1868
+ }
1869
+
1870
+ @JRubyMethod(name = "target=")
1871
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
1872
+ return hpricot_ele_set_name(self, value);
1873
+ }
1874
+
1875
+ @JRubyMethod
1876
+ public static IRubyObject content(IRubyObject self) {
1877
+ return hpricot_ele_get_attr(self);
1878
+ }
1879
+
1880
+ @JRubyMethod(name = "content=")
1881
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
1882
+ return hpricot_ele_set_attr(self, value);
1883
+ }
1884
+ }
1885
+
1886
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
1887
+
1888
+ public final static int H_ELE_TAG = 0;
1889
+ public final static int H_ELE_PARENT = 1;
1890
+ public final static int H_ELE_ATTR = 2;
1891
+ public final static int H_ELE_ETAG = 3;
1892
+ public final static int H_ELE_RAW = 4;
1893
+ public final static int H_ELE_EC = 5;
1894
+ public final static int H_ELE_HASH = 6;
1895
+ public final static int H_ELE_CHILDREN = 7;
1896
+
1897
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
1898
+ return ((IRubyObject[])recv.dataGetStruct())[n];
1899
+ }
1900
+
1901
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
1902
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
1903
+ return value;
1904
+ }
1905
+
1906
+ private static class RefCallback implements Callback {
1907
+ private final int n;
1908
+ public RefCallback(int n) { this.n = n; }
1909
+
1910
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1911
+ return H_ELE_GET(recv, n);
1912
+ }
1913
+
1914
+ public Arity getArity() {
1915
+ return Arity.NO_ARGUMENTS;
1916
+ }
1917
+ }
1918
+
1919
+ private static class SetCallback implements Callback {
1920
+ private final int n;
1921
+ public SetCallback(int n) { this.n = n; }
1922
+
1923
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
1924
+ return H_ELE_SET(recv, n, args[0]);
1925
+ }
1926
+
1927
+ public Arity getArity() {
1928
+ return Arity.ONE_ARGUMENT;
1929
+ }
1236
1930
  }
1931
+
1932
+ private final static Callback[] ref_func = new Callback[]{
1933
+ new RefCallback(0),
1934
+ new RefCallback(1),
1935
+ new RefCallback(2),
1936
+ new RefCallback(3),
1937
+ new RefCallback(4),
1938
+ new RefCallback(5),
1939
+ new RefCallback(6),
1940
+ new RefCallback(7),
1941
+ new RefCallback(8),
1942
+ new RefCallback(9)};
1943
+
1944
+ private final static Callback[] set_func = new Callback[]{
1945
+ new SetCallback(0),
1946
+ new SetCallback(1),
1947
+ new SetCallback(2),
1948
+ new SetCallback(3),
1949
+ new SetCallback(4),
1950
+ new SetCallback(5),
1951
+ new SetCallback(6),
1952
+ new SetCallback(7),
1953
+ new SetCallback(8),
1954
+ new SetCallback(9)};
1955
+
1956
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
1957
+ // alloc_hpricot_struct
1958
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
1959
+ RubyClass kurrent = klass;
1960
+ Object sz = kurrent.fastGetInternalVariable("__size__");
1961
+ while(sz == null && kurrent != null) {
1962
+ kurrent = kurrent.getSuperClass();
1963
+ sz = kurrent.fastGetInternalVariable("__size__");
1964
+ }
1965
+ int size = RubyNumeric.fix2int((RubyObject)sz);
1966
+ RubyObject obj = new RubyObject(runtime, klass);
1967
+ IRubyObject[] all = new IRubyObject[size];
1968
+ java.util.Arrays.fill(all, runtime.getNil());
1969
+ obj.dataWrapStruct(all);
1970
+ return obj;
1971
+ }
1972
+ };
1973
+
1974
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
1975
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
1976
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
1977
+ klass.setAllocator(alloc_hpricot_struct);
1978
+
1979
+ for(int i = 0; i < members.length; i++) {
1980
+ String id = members[i].toString();
1981
+ klass.defineMethod(id, ref_func[i]);
1982
+ klass.defineMethod(id + "=", set_func[i]);
1983
+ }
1237
1984
 
1238
- if ( done && ele_open ) {
1239
- ele_open = false;
1240
- if(ts > -1) {
1241
- mark_tag = ts;
1242
- ts = -1;
1243
- text = true;
1244
- }
1985
+ return klass;
1986
+ }
1987
+
1988
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1989
+ Init_hpricot_scan(runtime);
1990
+ return true;
1245
1991
  }
1246
1992
 
1247
- if(ts == -1) {
1248
- have = 0;
1249
- /* text nodes have no ts because each byte is parsed alone */
1250
- if(mark_tag != -1 && text) {
1251
- if (done) {
1252
- if(mark_tag < p-1) {
1253
- CAT(tag, p-1);
1254
- ELE(sym_text);
1255
- }
1256
- } else {
1257
- CAT(tag, p);
1993
+ public static class Extra {
1994
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
1995
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
1996
+ sym_allowed, sym_children, sym_comment,
1997
+ sym_cdata, sym_name, sym_parent,
1998
+ sym_raw_attributes, sym_raw_string, sym_tagno,
1999
+ sym_text, sym_EMPTY, sym_CDATA;
2000
+
2001
+ public RubyModule mHpricot;
2002
+ public RubyClass structElem;
2003
+ public RubyClass structAttr;
2004
+ public RubyClass structBasic;
2005
+ public RubyClass cDoc;
2006
+ public RubyClass cCData;
2007
+ public RubyClass cComment;
2008
+ public RubyClass cDocType;
2009
+ public RubyClass cElem;
2010
+ public RubyClass cBogusETag;
2011
+ public RubyClass cText;
2012
+ public RubyClass cXMLDecl;
2013
+ public RubyClass cProcIns;
2014
+ public RubyClass rb_eHpricotParseError;
2015
+ public IRubyObject reProcInsParse;
2016
+
2017
+ public Extra(Ruby runtime) {
2018
+ symAllow = runtime.newSymbol("allow");
2019
+ symDeny = runtime.newSymbol("deny");
2020
+ sym_xmldecl = runtime.newSymbol("xmldecl");
2021
+ sym_doctype = runtime.newSymbol("doctype");
2022
+ sym_procins = runtime.newSymbol("procins");
2023
+ sym_stag = runtime.newSymbol("stag");
2024
+ sym_etag = runtime.newSymbol("etag");
2025
+ sym_emptytag = runtime.newSymbol("emptytag");
2026
+ sym_allowed = runtime.newSymbol("allowed");
2027
+ sym_children = runtime.newSymbol("children");
2028
+ sym_comment = runtime.newSymbol("comment");
2029
+ sym_cdata = runtime.newSymbol("cdata");
2030
+ sym_name = runtime.newSymbol("name");
2031
+ sym_parent = runtime.newSymbol("parent");
2032
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
2033
+ sym_raw_string = runtime.newSymbol("raw_string");
2034
+ sym_tagno = runtime.newSymbol("tagno");
2035
+ sym_text = runtime.newSymbol("text");
2036
+ sym_EMPTY = runtime.newSymbol("EMPTY");
2037
+ sym_CDATA = runtime.newSymbol("CDATA");
1258
2038
  }
1259
- }
1260
- mark_tag = 0;
1261
- } else {
1262
- have = pe - ts;
1263
- System.arraycopy(buf,ts,buf,0,have);
1264
- SLIDE(tag);
1265
- SLIDE(akey);
1266
- SLIDE(aval);
1267
- te = (te - ts);
1268
- ts = 0;
1269
2039
  }
1270
- }
1271
- return runtime.getNil();
1272
- }
1273
2040
 
1274
- public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
1275
- Ruby runtime = recv.getRuntime();
1276
- HpricotScanService service = new HpricotScanService();
1277
- service.runtime = runtime;
1278
- service.xmldecl = runtime.newSymbol("xmldecl");
1279
- service.doctype = runtime.newSymbol("doctype");
1280
- service.procins = runtime.newSymbol("procins");
1281
- service.stag = runtime.newSymbol("stag");
1282
- service.etag = runtime.newSymbol("etag");
1283
- service.emptytag = runtime.newSymbol("emptytag");
1284
- service.comment = runtime.newSymbol("comment");
1285
- service.cdata = runtime.newSymbol("cdata");
1286
- service.sym_text = runtime.newSymbol("text");
1287
- service.block = block;
1288
- return service.hpricot_scan(recv, port);
1289
- }
2041
+ public static void Init_hpricot_scan(Ruby runtime) {
2042
+ Extra x = new Extra(runtime);
1290
2043
 
2044
+ x.mHpricot = runtime.defineModule("Hpricot");
2045
+ x.mHpricot.dataWrapStruct(x);
1291
2046
 
1292
- public boolean basicLoad(final Ruby runtime) throws IOException {
1293
- Init_hpricot_scan(runtime);
1294
- return true;
1295
- }
2047
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
2048
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
1296
2049
 
1297
- public static void Init_hpricot_scan(Ruby runtime) {
1298
- RubyModule mHpricot = runtime.defineModule("Hpricot");
1299
- mHpricot.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
1300
- CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
1301
- mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
1302
- mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
1303
- rubyApi = JavaEmbedUtils.newObjectAdapter();
1304
- }
2050
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
2051
+
2052
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
2053
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
2054
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
2055
+
2056
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
2057
+
2058
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
2059
+ x.cCData.defineAnnotatedMethods(CData.class);
2060
+
2061
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
2062
+ x.cComment.defineAnnotatedMethods(Comment.class);
2063
+
2064
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
2065
+ x.cDocType.defineAnnotatedMethods(DocType.class);
2066
+
2067
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
2068
+ x.cElem.defineAnnotatedMethods(Elem.class);
2069
+
2070
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
2071
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
2072
+
2073
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
2074
+ x.cText.defineAnnotatedMethods(Text.class);
2075
+
2076
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
2077
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
2078
+
2079
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
2080
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
2081
+
2082
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
2083
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
2084
+ }
1305
2085
  }