hpricot 0.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,5 +1,17 @@
1
+ = 0.6
2
+ === 15th June, 2007
3
+ * Hpricot for JRuby -- nice work Ola Bini!
4
+ * Inline Markaby for Hpricot documents.
5
+ * XML tags and attributes are no longer downcased like HTML is.
6
+ * new syntax for grabbing everything between two elements using a Range in the search method: (doc/("font".."font/br")) or in nodes_at like so: (doc/"font").nodes_at("*".."br"). Only works with either a pair of siblings or a set of a parent and a sibling.
7
+ * Ignore self-closing endings on tags (such as form) which are containers. Treat them like open parent tags. Reported by Jonathan Nichols on the hpricot list.
8
+ * Escaping of attributes, yanked from Jim Weirich and Sam Ruby's work in Builder.
9
+ * Element#raw_attributes gives unescaped data. Element#attributes gives escaped.
10
+ * Added: Elements#attr, Elements#remove_attr, Elements#remove_class.
11
+ * Added: Traverse#preceding, Traverse#following, Traverse#previous, Traverse#next.
12
+
1
13
  = 0.5
2
- === 31rd January, 2006
14
+ === 31rd January, 2007
3
15
 
4
16
  * support for a[text()="Click Me!"] and h3[text()*="space"] and the like.
5
17
  * Hpricot.buffer_size accessor for increasing Hpricot's buffer if you're encountering huge ASP.NET viewstate attribs.
data/README CHANGED
@@ -257,7 +257,7 @@ So, let's go beyond just trying to fix the hierarchy. The
257
257
 
258
258
  What measures does <tt>:xhtml_strict</tt> take?
259
259
 
260
- 1. Shift elements into their proper containers just like <tt>:fixup_tags</tt>.
260
+ 1. Shift elements into their proper containers just like :fixup_tags.
261
261
  2. Remove unknown elements.
262
262
  3. Remove unknown attributes.
263
263
  4. Remove illegal content.
@@ -270,6 +270,9 @@ on the standard mode. The main difference is that :xml mode won't try to output
270
270
  tags which are friendlier for browsers. For example, if an opening and closing
271
271
  <tt>br</tt> tag is found, XML mode won't try to turn that into an empty element.
272
272
 
273
+ XML mode also doesn't downcase the tags and attributes for you. So pay attention
274
+ to case, friends.
275
+
273
276
  The primary way to use Hpricot's XML mode is to call the Hpricot.XML method:
274
277
 
275
278
  doc = open("http://redhanded.hobix.com/index.xml") do |f|
data/Rakefile CHANGED
@@ -8,35 +8,48 @@ include FileUtils
8
8
 
9
9
  NAME = "hpricot"
10
10
  REV = `svn info`[/Revision: (\d+)/, 1] rescue nil
11
- VERS = ENV['VERSION'] || "0.4" + (REV ? ".#{REV}" : "")
12
- CLEAN.include ['ext/hpricot_scan/*.{bundle,so,obj,pdb,lib,def,exp}', 'ext/hpricot_scan/Makefile',
11
+ VERS = ENV['VERSION'] || "0.6" + (REV ? ".#{REV}" : "")
12
+ PKG = "#{NAME}-#{VERS}"
13
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
14
+ ARCHLIB = "lib/#{::Config::CONFIG['arch']}"
15
+ CLEAN.include ["ext/hpricot_scan/#{BIN}", "lib/**/#{BIN}", 'ext/hpricot_scan/Makefile',
13
16
  '**/.*.sw?', '*.gem', '.config']
14
17
  RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README', '--inline-source']
18
+ PKG_FILES = %w(CHANGELOG COPYING README Rakefile) +
19
+ Dir.glob("{bin,doc,test,lib,extras}/**/*") +
20
+ Dir.glob("ext/**/*.{h,java,c,rb,rl}") +
21
+ %w[ext/hpricot_scan/hpricot_scan.c] # needed because it's generated later
22
+ SPEC =
23
+ Gem::Specification.new do |s|
24
+ s.name = NAME
25
+ s.version = VERS
26
+ s.platform = Gem::Platform::RUBY
27
+ s.has_rdoc = true
28
+ s.rdoc_options += RDOC_OPTS
29
+ s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
30
+ s.summary = "a swift, liberal HTML parser with a fantastic library"
31
+ s.description = s.summary
32
+ s.author = "why the lucky stiff"
33
+ s.email = 'why@ruby-lang.org'
34
+ s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
35
+ s.files = PKG_FILES
36
+ s.require_paths = [ARCHLIB, "lib"]
37
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
38
+ s.bindir = "bin"
39
+ end
15
40
 
16
41
  desc "Does a full compile, test run"
17
42
  task :default => [:compile, :test]
18
43
 
19
- desc "Compiles all extensions"
20
- task :compile => [:hpricot_scan] do
21
- if Dir.glob(File.join("lib","hpricot_scan.*")).length == 0
22
- STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
23
- STDERR.puts "Gem actually failed to build. Your system is"
24
- STDERR.puts "NOT configured properly to build hpricot."
25
- STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
26
- exit(1)
27
- end
28
- end
29
- task :hpricot_scan => [:ragel]
30
-
31
44
  desc "Packages up Hpricot."
32
45
  task :package => [:clean, :ragel]
33
46
 
34
47
  desc "Releases packages for all Hpricot packages and platforms."
35
- task :release => [:package, :rubygems_win32]
48
+ task :release => [:package, :package_win32, :package_jruby]
36
49
 
37
50
  desc "Run all the tests"
38
51
  Rake::TestTask.new do |t|
39
- t.libs << "test"
52
+ t.libs << "test" << ARCHLIB
40
53
  t.test_files = FileList['test/test_*.rb']
41
54
  t.verbose = true
42
55
  end
@@ -48,34 +61,9 @@ Rake::RDocTask.new do |rdoc|
48
61
  rdoc.rdoc_files.add ['README', 'CHANGELOG', 'COPYING', 'lib/**/*.rb']
49
62
  end
50
63
 
51
- spec =
52
- Gem::Specification.new do |s|
53
- s.name = NAME
54
- s.version = VERS
55
- s.platform = Gem::Platform::RUBY
56
- s.has_rdoc = true
57
- s.rdoc_options += RDOC_OPTS
58
- s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
59
- s.summary = "a swift, liberal HTML parser with a fantastic library"
60
- s.description = s.summary
61
- s.author = "why the lucky stiff"
62
- s.email = 'why@ruby-lang.org'
63
- s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
64
-
65
- s.files = %w(COPYING README Rakefile) +
66
- Dir.glob("{bin,doc,test,lib,extras}/**/*") +
67
- Dir.glob("ext/**/*.{h,c,rb,rl}") +
68
- %w[ext/hpricot_scan/hpricot_scan.c] # needed because it's generated later
69
-
70
- s.require_path = "lib"
71
- #s.autorequire = "hpricot" # no no no this is tHe 3v1l
72
- s.extensions = FileList["ext/**/extconf.rb"].to_a
73
- s.bindir = "bin"
74
- end
75
-
76
- Rake::GemPackageTask.new(spec) do |p|
64
+ Rake::GemPackageTask.new(SPEC) do |p|
77
65
  p.need_tar = true
78
- p.gem_spec = spec
66
+ p.gem_spec = SPEC
79
67
  end
80
68
 
81
69
  extension = "hpricot_scan"
@@ -94,6 +82,18 @@ task "lib" do
94
82
  directory "lib"
95
83
  end
96
84
 
85
+ desc "Compiles the Ruby extension"
86
+ task :compile => [:hpricot_scan] do
87
+ if Dir.glob(File.join(ARCHLIB,"hpricot_scan.*")).length == 0
88
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
89
+ STDERR.puts "Gem actually failed to build. Your system is"
90
+ STDERR.puts "NOT configured properly to build hpricot."
91
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
92
+ exit(1)
93
+ end
94
+ end
95
+ task :hpricot_scan => [:ragel]
96
+
97
97
  desc "Builds just the #{extension} extension"
98
98
  task extension.to_sym => ["#{ext}/Makefile", ext_so ]
99
99
 
@@ -105,66 +105,102 @@ file ext_so => ext_files do
105
105
  Dir.chdir(ext) do
106
106
  sh(PLATFORM =~ /win32/ ? 'nmake' : 'make')
107
107
  end
108
- cp ext_so, "lib"
108
+ mkdir_p ARCHLIB
109
+ cp ext_so, ARCHLIB
109
110
  end
110
111
 
111
- desc "Generates the scanner code with Ragel."
112
- task :ragel do
113
- sh %{ragel ext/hpricot_scan/hpricot_scan.rl | rlcodegen -G2 -o ext/hpricot_scan/hpricot_scan.c}
112
+ desc "returns the ragel version"
113
+ task :ragel_version do
114
+ @ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f
114
115
  end
115
116
 
116
- PKG_FILES = FileList[
117
- "test/**/*.{rb,html,xhtml}",
118
- "lib/**/*.rb",
119
- "ext/**/*.{c,rb,h,rl}",
120
- "CHANGELOG", "README", "Rakefile", "COPYING",
121
- "extras/**/*", "lib/hpricot_scan.so"]
117
+ desc "Generates the C scanner code with Ragel."
118
+ task :ragel => [:ragel_version] do
119
+ sh %{ragel ext/hpricot_scan/hpricot_scan.rl | #{@ragel_v >= 5.18 ? 'rlgen-cd' : 'rlcodegen'} -G2 -o ext/hpricot_scan/hpricot_scan.c}
120
+ end
122
121
 
123
- Win32Spec = Gem::Specification.new do |s|
124
- s.name = NAME
125
- s.version = VERS
126
- s.platform = Gem::Platform::WIN32
127
- s.has_rdoc = false
128
- s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
129
- s.summary = "a swift, liberal HTML parser with a fantastic library"
130
- s.description = s.summary
131
- s.author = "why the lucky stiff"
132
- s.email = 'why@ruby-lang.org'
133
- s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
122
+ desc "Generates the Java scanner code with Ragel."
123
+ task :ragel_java => [:ragel_version] do
124
+ sh %{ragel -J ext/hpricot_scan/hpricot_scan.java.rl | #{@ragel_v >= 5.18 ? 'rlgen-java' : 'rlcodegen'} -o ext/hpricot_scan/HpricotScanService.java}
125
+ end
134
126
 
135
- s.files = PKG_FILES
127
+ ### Win32 Packages ###
136
128
 
137
- s.require_path = "lib"
138
- #s.autorequire = "hpricot" # no no no this is tHe 3v1l
139
- s.extensions = []
140
- s.bindir = "bin"
141
- end
129
+ Win32Spec = SPEC.dup
130
+ Win32Spec.platform = Gem::Platform::WIN32
131
+ Win32Spec.files = PKG_FILES + ["#{ARCHLIB}/hpricot_scan.so"]
132
+ Win32Spec.extensions = []
142
133
 
143
- WIN32_PKG_DIR = "hpricot-" + VERS
134
+ WIN32_PKG_DIR = "#{PKG}-mswin32"
144
135
 
136
+ desc "Package up the Win32 distribution."
145
137
  file WIN32_PKG_DIR => [:package] do
146
- sh "tar zxf pkg/#{WIN32_PKG_DIR}.tgz"
138
+ sh "tar zxf pkg/#{PKG}.tgz"
139
+ mv PKG, WIN32_PKG_DIR
147
140
  end
148
141
 
149
142
  desc "Cross-compile the hpricot_scan extension for win32"
150
143
  file "hpricot_scan_win32" => [WIN32_PKG_DIR] do
151
144
  cp "extras/mingw-rbconfig.rb", "#{WIN32_PKG_DIR}/ext/hpricot_scan/rbconfig.rb"
152
145
  sh "cd #{WIN32_PKG_DIR}/ext/hpricot_scan/ && ruby -I. extconf.rb && make"
153
- mv "#{WIN32_PKG_DIR}/ext/hpricot_scan/hpricot_scan.so", "#{WIN32_PKG_DIR}/lib"
146
+ mv "#{WIN32_PKG_DIR}/ext/hpricot_scan/hpricot_scan.so", "#{WIN32_PKG_DIR}/#{ARCHLIB}"
154
147
  end
155
148
 
156
149
  desc "Build the binary RubyGems package for win32"
157
- task :rubygems_win32 => ["hpricot_scan_win32"] do
150
+ task :package_win32 => ["hpricot_scan_win32"] do
158
151
  Dir.chdir("#{WIN32_PKG_DIR}") do
159
152
  Gem::Builder.new(Win32Spec).build
160
153
  verbose(true) {
161
- mv Dir["*.gem"].first, "../pkg/hpricot-#{VERS}-mswin32.gem"
154
+ mv Dir["*.gem"].first, "../pkg/#{WIN32_PKG_DIR}.gem"
162
155
  }
163
156
  end
164
157
  end
165
158
 
166
159
  CLEAN.include WIN32_PKG_DIR
167
160
 
161
+ ### JRuby Packages ###
162
+
163
+ compile_java = proc do
164
+ sh %{javac -source 1.4 -target 1.4 -classpath $JRUBY_HOME/lib/jruby.jar HpricotScanService.java}
165
+ sh %{jar cf hpricot_scan.jar HpricotScanService.class}
166
+ end
167
+
168
+ desc "Compiles the JRuby extension"
169
+ task :hpricot_scan_java => [:ragel_java] do
170
+ Dir.chdir("ext/hpricot_scan", &compile_java)
171
+ end
172
+
173
+ JRubySpec = SPEC.dup
174
+ JRubySpec.platform = 'jruby'
175
+ JRubySpec.files = PKG_FILES + ["#{ARCHLIB}/hpricot_scan.jar"]
176
+ JRubySpec.extensions = []
177
+
178
+ JRUBY_PKG_DIR = "#{PKG}-jruby"
179
+
180
+ desc "Package up the JRuby distribution."
181
+ file JRUBY_PKG_DIR => [:ragel_java, :package] do
182
+ sh "tar zxf pkg/#{PKG}.tgz"
183
+ mv PKG, JRUBY_PKG_DIR
184
+ end
185
+
186
+ desc "Cross-compile the hpricot_scan extension for JRuby"
187
+ file "hpricot_scan_jruby" => [JRUBY_PKG_DIR] do
188
+ Dir.chdir("#{JRUBY_PKG_DIR}/ext/hpricot_scan", &compile_java)
189
+ mv "#{JRUBY_PKG_DIR}/ext/hpricot_scan/hpricot_scan.jar", "#{JRUBY_PKG_DIR}/#{ARCHLIB}"
190
+ end
191
+
192
+ desc "Build the RubyGems package for JRuby"
193
+ task :package_jruby => ["hpricot_scan_jruby"] do
194
+ Dir.chdir("#{JRUBY_PKG_DIR}") do
195
+ Gem::Builder.new(JRubySpec).build
196
+ verbose(true) {
197
+ mv Dir["*.gem"].first, "../pkg/#{JRUBY_PKG_DIR}.gem"
198
+ }
199
+ end
200
+ end
201
+
202
+ CLEAN.include JRUBY_PKG_DIR
203
+
168
204
  task :install do
169
205
  sh %{rake package}
170
206
  sh %{sudo gem install pkg/#{NAME}-#{VERS}}
@@ -0,0 +1,1340 @@
1
+
2
+ import java.io.IOException;
3
+
4
+ import org.jruby.Ruby;
5
+ import org.jruby.RubyClass;
6
+ import org.jruby.RubyHash;
7
+ import org.jruby.RubyModule;
8
+ import org.jruby.RubyNumeric;
9
+ import org.jruby.RubyString;
10
+ import org.jruby.runtime.Block;
11
+ import org.jruby.runtime.CallbackFactory;
12
+ import org.jruby.runtime.builtin.IRubyObject;
13
+ import org.jruby.exceptions.RaiseException;
14
+ import org.jruby.runtime.load.BasicLibraryService;
15
+
16
+ public class HpricotScanService implements BasicLibraryService {
17
+ public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
18
+
19
+ public void ELE(IRubyObject N) {
20
+ if (tokend > tokstart || text) {
21
+ IRubyObject raw_string = runtime.getNil();
22
+ ele_open = false; text = false;
23
+ if (tokstart != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
24
+ raw_string = runtime.newString(new String(buf,tokstart,tokend-tokstart));
25
+ }
26
+ rb_yield_tokens(N, tag[0], attr, raw_string, taint);
27
+ }
28
+ }
29
+
30
+ public void SET(IRubyObject[] N, int E) {
31
+ int mark = 0;
32
+ if(N == tag) {
33
+ if(mark_tag == -1 || E == mark_tag) {
34
+ tag[0] = runtime.newString("");
35
+ } else if(E > mark_tag) {
36
+ tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
37
+ }
38
+ } else if(N == akey) {
39
+ if(mark_akey == -1 || E == mark_akey) {
40
+ akey[0] = runtime.newString("");
41
+ } else if(E > mark_akey) {
42
+ akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
43
+ }
44
+ } else if(N == aval) {
45
+ if(mark_aval == -1 || E == mark_aval) {
46
+ aval[0] = runtime.newString("");
47
+ } else if(E > mark_aval) {
48
+ aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
49
+ }
50
+ }
51
+ }
52
+
53
+ public void CAT(IRubyObject[] N, int E) {
54
+ if(N[0].isNil()) {
55
+ SET(N,E);
56
+ } else {
57
+ int mark = 0;
58
+ if(N == tag) {
59
+ mark = mark_tag;
60
+ } else if(N == akey) {
61
+ mark = mark_akey;
62
+ } else if(N == aval) {
63
+ mark = mark_aval;
64
+ }
65
+ ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
66
+ }
67
+ }
68
+
69
+ public void SLIDE(Object N) {
70
+ int mark = 0;
71
+ if(N == tag) {
72
+ mark = mark_tag;
73
+ } else if(N == akey) {
74
+ mark = mark_akey;
75
+ } else if(N == aval) {
76
+ mark = mark_aval;
77
+ }
78
+ if(mark > tokstart) {
79
+ if(N == tag) {
80
+ mark_tag -= tokstart;
81
+ } else if(N == akey) {
82
+ mark_akey -= tokstart;
83
+ } else if(N == aval) {
84
+ mark_aval -= tokstart;
85
+ }
86
+ }
87
+ }
88
+
89
+ public void ATTR(IRubyObject K, IRubyObject V) {
90
+ if(!K.isNil()) {
91
+ if(attr.isNil()) {
92
+ attr = RubyHash.newHash(runtime);
93
+ }
94
+ ((RubyHash)attr).aset(K,V);
95
+ }
96
+ }
97
+
98
+ public void ATTR(IRubyObject[] K, IRubyObject V) {
99
+ ATTR(K[0],V);
100
+ }
101
+
102
+ public void ATTR(IRubyObject K, IRubyObject[] V) {
103
+ ATTR(K,V[0]);
104
+ }
105
+
106
+ public void ATTR(IRubyObject[] K, IRubyObject[] V) {
107
+ ATTR(K[0],V[0]);
108
+ }
109
+
110
+ public void TEXT_PASS() {
111
+ if(!text) {
112
+ if(ele_open) {
113
+ ele_open = false;
114
+ if(tokstart > -1) {
115
+ mark_tag = tokstart;
116
+ }
117
+ } else {
118
+ mark_tag = p;
119
+ }
120
+ attr = runtime.getNil();
121
+ tag[0] = runtime.getNil();
122
+ text = true;
123
+ }
124
+ }
125
+
126
+ public void EBLK(IRubyObject N, int T) {
127
+ CAT(tag, p - T + 1);
128
+ ELE(N);
129
+ }
130
+
131
+
132
+ public void rb_raise(RubyClass error, String message) {
133
+ throw new RaiseException(runtime, error, message, true);
134
+ }
135
+
136
+ public IRubyObject rb_str_new2(String s) {
137
+ return runtime.newString(s);
138
+ }
139
+
140
+
141
+
142
+
143
+ static final byte[] _hpricot_scan_actions = {
144
+ 0, 1, 1, 1, 2, 1, 4, 1,
145
+ 5, 1, 6, 1, 7, 1, 8, 1,
146
+ 9, 1, 10, 1, 11, 1, 12, 1,
147
+ 14, 1, 16, 1, 20, 1, 21, 1,
148
+ 22, 1, 24, 1, 25, 1, 26, 1,
149
+ 28, 1, 29, 1, 30, 1, 32, 1,
150
+ 33, 1, 38, 1, 39, 1, 40, 1,
151
+ 41, 1, 42, 1, 43, 1, 44, 1,
152
+ 45, 1, 46, 1, 47, 1, 48, 1,
153
+ 49, 1, 50, 2, 2, 5, 2, 2,
154
+ 6, 2, 2, 11, 2, 2, 12, 2,
155
+ 2, 14, 2, 4, 39, 2, 4, 40,
156
+ 2, 4, 41, 2, 5, 2, 2, 6,
157
+ 14, 2, 7, 6, 2, 7, 14, 2,
158
+ 11, 12, 2, 13, 3, 2, 14, 6,
159
+ 2, 14, 40, 2, 15, 24, 2, 15,
160
+ 28, 2, 15, 32, 2, 15, 45, 2,
161
+ 17, 23, 2, 18, 27, 2, 19, 31,
162
+ 2, 22, 34, 2, 22, 36, 3, 2,
163
+ 6, 14, 3, 2, 14, 6, 3, 6,
164
+ 7, 14, 3, 6, 14, 40, 3, 7,
165
+ 14, 40, 3, 14, 6, 40, 3, 14,
166
+ 13, 3, 3, 22, 0, 37, 3, 22,
167
+ 2, 34, 3, 22, 14, 35, 4, 2,
168
+ 14, 13, 3, 4, 6, 7, 14, 40,
169
+ 4, 22, 2, 14, 35, 4, 22, 6,
170
+ 14, 35, 4, 22, 7, 14, 35, 4,
171
+ 22, 14, 6, 35, 5, 22, 2, 6,
172
+ 14, 35, 5, 22, 2, 14, 6, 35,
173
+ 5, 22, 6, 7, 14, 35
174
+ };
175
+
176
+ static final short[] _hpricot_scan_key_offsets = {
177
+ 0, 3, 4, 5, 6, 7, 8, 9,
178
+ 10, 13, 22, 37, 44, 45, 46, 47,
179
+ 48, 49, 52, 57, 69, 81, 86, 93,
180
+ 94, 95, 100, 101, 105, 106, 107, 121,
181
+ 135, 152, 169, 186, 203, 210, 212, 214,
182
+ 220, 222, 227, 232, 238, 240, 245, 251,
183
+ 265, 266, 267, 268, 269, 270, 271, 272,
184
+ 273, 274, 275, 276, 282, 296, 300, 313,
185
+ 326, 340, 354, 355, 366, 375, 388, 405,
186
+ 423, 441, 450, 461, 480, 499, 510, 521,
187
+ 536, 538, 540, 556, 572, 575, 587, 599,
188
+ 619, 639, 658, 677, 697, 717, 728, 739,
189
+ 751, 763, 775, 791, 794, 809, 811, 813,
190
+ 829, 845, 848, 860, 871, 890, 910, 930,
191
+ 941, 952, 964, 984, 1004, 1016, 1036, 1057,
192
+ 1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
193
+ 1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
194
+ 1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
195
+ 1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
196
+ 1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
197
+ 1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
198
+ 1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
199
+ 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
200
+ 1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
201
+ 1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
202
+ 1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
203
+ 1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
204
+ 1634, 1636, 1637, 1640
205
+ };
206
+
207
+ static final char[] _hpricot_scan_trans_keys = {
208
+ 45, 68, 91, 45, 79, 67, 84, 89,
209
+ 80, 69, 32, 9, 13, 32, 58, 95,
210
+ 9, 13, 65, 90, 97, 122, 32, 62,
211
+ 63, 91, 95, 9, 13, 45, 46, 48,
212
+ 58, 65, 90, 97, 122, 32, 62, 80,
213
+ 83, 91, 9, 13, 85, 66, 76, 73,
214
+ 67, 32, 9, 13, 32, 34, 39, 9,
215
+ 13, 9, 34, 61, 95, 32, 37, 39,
216
+ 59, 63, 90, 97, 122, 9, 34, 61,
217
+ 95, 32, 37, 39, 59, 63, 90, 97,
218
+ 122, 32, 62, 91, 9, 13, 32, 34,
219
+ 39, 62, 91, 9, 13, 34, 34, 32,
220
+ 62, 91, 9, 13, 93, 32, 62, 9,
221
+ 13, 39, 39, 9, 39, 61, 95, 32,
222
+ 33, 35, 37, 40, 59, 63, 90, 97,
223
+ 122, 9, 39, 61, 95, 32, 33, 35,
224
+ 37, 40, 59, 63, 90, 97, 122, 9,
225
+ 32, 33, 39, 62, 91, 95, 10, 13,
226
+ 35, 37, 40, 59, 61, 90, 97, 122,
227
+ 9, 32, 34, 39, 62, 91, 95, 10,
228
+ 13, 33, 37, 40, 59, 61, 90, 97,
229
+ 122, 9, 32, 33, 39, 62, 91, 95,
230
+ 10, 13, 35, 37, 40, 59, 61, 90,
231
+ 97, 122, 9, 32, 34, 39, 62, 91,
232
+ 95, 10, 13, 33, 37, 40, 59, 61,
233
+ 90, 97, 122, 32, 34, 39, 62, 91,
234
+ 9, 13, 34, 39, 34, 39, 32, 39,
235
+ 62, 91, 9, 13, 39, 93, 32, 62,
236
+ 93, 9, 13, 32, 39, 62, 9, 13,
237
+ 32, 34, 62, 91, 9, 13, 34, 93,
238
+ 32, 34, 62, 9, 13, 32, 39, 62,
239
+ 91, 9, 13, 9, 39, 61, 95, 32,
240
+ 33, 35, 37, 40, 59, 63, 90, 97,
241
+ 122, 89, 83, 84, 69, 77, 67, 68,
242
+ 65, 84, 65, 91, 58, 95, 65, 90,
243
+ 97, 122, 32, 62, 63, 95, 9, 13,
244
+ 45, 46, 48, 58, 65, 90, 97, 122,
245
+ 32, 62, 9, 13, 32, 47, 62, 63,
246
+ 95, 9, 13, 45, 58, 65, 90, 97,
247
+ 122, 32, 47, 62, 63, 95, 9, 13,
248
+ 45, 58, 65, 90, 97, 122, 32, 47,
249
+ 61, 62, 63, 95, 9, 13, 45, 58,
250
+ 65, 90, 97, 122, 32, 47, 61, 62,
251
+ 63, 95, 9, 13, 45, 58, 65, 90,
252
+ 97, 122, 62, 13, 32, 34, 39, 47,
253
+ 60, 62, 9, 10, 11, 12, 13, 32,
254
+ 47, 60, 62, 9, 10, 11, 12, 32,
255
+ 47, 62, 63, 95, 9, 13, 45, 58,
256
+ 65, 90, 97, 122, 13, 32, 47, 60,
257
+ 62, 63, 95, 9, 10, 11, 12, 45,
258
+ 58, 65, 90, 97, 122, 13, 32, 47,
259
+ 60, 61, 62, 63, 95, 9, 10, 11,
260
+ 12, 45, 58, 65, 90, 97, 122, 13,
261
+ 32, 47, 60, 61, 62, 63, 95, 9,
262
+ 10, 11, 12, 45, 58, 65, 90, 97,
263
+ 122, 13, 32, 47, 60, 62, 9, 10,
264
+ 11, 12, 13, 32, 34, 39, 47, 60,
265
+ 62, 9, 10, 11, 12, 13, 32, 34,
266
+ 39, 47, 60, 62, 63, 95, 9, 10,
267
+ 11, 12, 45, 58, 65, 90, 97, 122,
268
+ 13, 32, 34, 39, 47, 60, 62, 63,
269
+ 95, 9, 10, 11, 12, 45, 58, 65,
270
+ 90, 97, 122, 13, 32, 34, 47, 60,
271
+ 62, 92, 9, 10, 11, 12, 13, 32,
272
+ 34, 47, 60, 62, 92, 9, 10, 11,
273
+ 12, 32, 34, 47, 62, 63, 92, 95,
274
+ 9, 13, 45, 58, 65, 90, 97, 122,
275
+ 34, 92, 34, 92, 32, 34, 47, 61,
276
+ 62, 63, 92, 95, 9, 13, 45, 58,
277
+ 65, 90, 97, 122, 32, 34, 47, 61,
278
+ 62, 63, 92, 95, 9, 13, 45, 58,
279
+ 65, 90, 97, 122, 34, 62, 92, 13,
280
+ 32, 34, 39, 47, 60, 62, 92, 9,
281
+ 10, 11, 12, 13, 32, 34, 39, 47,
282
+ 60, 62, 92, 9, 10, 11, 12, 13,
283
+ 32, 34, 39, 47, 60, 62, 63, 92,
284
+ 95, 9, 10, 11, 12, 45, 58, 65,
285
+ 90, 97, 122, 13, 32, 34, 39, 47,
286
+ 60, 62, 63, 92, 95, 9, 10, 11,
287
+ 12, 45, 58, 65, 90, 97, 122, 13,
288
+ 32, 34, 47, 60, 62, 63, 92, 95,
289
+ 9, 10, 11, 12, 45, 58, 65, 90,
290
+ 97, 122, 13, 32, 34, 47, 60, 62,
291
+ 63, 92, 95, 9, 10, 11, 12, 45,
292
+ 58, 65, 90, 97, 122, 13, 32, 34,
293
+ 47, 60, 61, 62, 63, 92, 95, 9,
294
+ 10, 11, 12, 45, 58, 65, 90, 97,
295
+ 122, 13, 32, 34, 47, 60, 61, 62,
296
+ 63, 92, 95, 9, 10, 11, 12, 45,
297
+ 58, 65, 90, 97, 122, 13, 32, 34,
298
+ 47, 60, 62, 92, 9, 10, 11, 12,
299
+ 13, 32, 34, 47, 60, 62, 92, 9,
300
+ 10, 11, 12, 13, 32, 34, 39, 47,
301
+ 60, 62, 92, 9, 10, 11, 12, 13,
302
+ 32, 34, 39, 47, 60, 62, 92, 9,
303
+ 10, 11, 12, 13, 32, 34, 39, 47,
304
+ 60, 62, 92, 9, 10, 11, 12, 32,
305
+ 34, 39, 47, 62, 63, 92, 95, 9,
306
+ 13, 45, 58, 65, 90, 97, 122, 34,
307
+ 39, 92, 32, 39, 47, 62, 63, 92,
308
+ 95, 9, 13, 45, 58, 65, 90, 97,
309
+ 122, 39, 92, 39, 92, 32, 39, 47,
310
+ 61, 62, 63, 92, 95, 9, 13, 45,
311
+ 58, 65, 90, 97, 122, 32, 39, 47,
312
+ 61, 62, 63, 92, 95, 9, 13, 45,
313
+ 58, 65, 90, 97, 122, 39, 62, 92,
314
+ 13, 32, 34, 39, 47, 60, 62, 92,
315
+ 9, 10, 11, 12, 13, 32, 39, 47,
316
+ 60, 62, 92, 9, 10, 11, 12, 13,
317
+ 32, 39, 47, 60, 62, 63, 92, 95,
318
+ 9, 10, 11, 12, 45, 58, 65, 90,
319
+ 97, 122, 13, 32, 39, 47, 60, 61,
320
+ 62, 63, 92, 95, 9, 10, 11, 12,
321
+ 45, 58, 65, 90, 97, 122, 13, 32,
322
+ 39, 47, 60, 61, 62, 63, 92, 95,
323
+ 9, 10, 11, 12, 45, 58, 65, 90,
324
+ 97, 122, 13, 32, 39, 47, 60, 62,
325
+ 92, 9, 10, 11, 12, 13, 32, 39,
326
+ 47, 60, 62, 92, 9, 10, 11, 12,
327
+ 13, 32, 34, 39, 47, 60, 62, 92,
328
+ 9, 10, 11, 12, 13, 32, 34, 39,
329
+ 47, 60, 62, 63, 92, 95, 9, 10,
330
+ 11, 12, 45, 58, 65, 90, 97, 122,
331
+ 13, 32, 34, 39, 47, 60, 62, 63,
332
+ 92, 95, 9, 10, 11, 12, 45, 58,
333
+ 65, 90, 97, 122, 13, 32, 34, 39,
334
+ 47, 60, 62, 92, 9, 10, 11, 12,
335
+ 13, 32, 34, 39, 47, 60, 62, 63,
336
+ 92, 95, 9, 10, 11, 12, 45, 58,
337
+ 65, 90, 97, 122, 13, 32, 34, 39,
338
+ 47, 60, 61, 62, 63, 92, 95, 9,
339
+ 10, 11, 12, 45, 58, 65, 90, 97,
340
+ 122, 32, 34, 39, 47, 61, 62, 63,
341
+ 92, 95, 9, 13, 45, 58, 65, 90,
342
+ 97, 122, 32, 34, 39, 47, 61, 62,
343
+ 63, 92, 95, 9, 13, 45, 58, 65,
344
+ 90, 97, 122, 34, 39, 62, 92, 34,
345
+ 39, 92, 13, 32, 34, 39, 47, 60,
346
+ 62, 92, 9, 10, 11, 12, 13, 32,
347
+ 34, 39, 47, 60, 62, 92, 9, 10,
348
+ 11, 12, 13, 32, 34, 39, 47, 60,
349
+ 62, 63, 92, 95, 9, 10, 11, 12,
350
+ 45, 58, 65, 90, 97, 122, 13, 32,
351
+ 34, 39, 47, 60, 62, 63, 92, 95,
352
+ 9, 10, 11, 12, 45, 58, 65, 90,
353
+ 97, 122, 13, 32, 34, 39, 47, 60,
354
+ 62, 63, 92, 95, 9, 10, 11, 12,
355
+ 45, 58, 65, 90, 97, 122, 13, 32,
356
+ 34, 39, 47, 60, 62, 92, 9, 10,
357
+ 11, 12, 13, 32, 34, 39, 47, 60,
358
+ 62, 92, 9, 10, 11, 12, 13, 32,
359
+ 34, 39, 47, 60, 62, 63, 92, 95,
360
+ 9, 10, 11, 12, 45, 58, 65, 90,
361
+ 97, 122, 32, 34, 39, 47, 62, 63,
362
+ 92, 95, 9, 13, 45, 58, 65, 90,
363
+ 97, 122, 32, 34, 39, 47, 62, 63,
364
+ 92, 95, 9, 13, 45, 58, 65, 90,
365
+ 97, 122, 13, 32, 34, 39, 47, 60,
366
+ 62, 92, 9, 10, 11, 12, 13, 32,
367
+ 34, 39, 47, 60, 61, 62, 63, 92,
368
+ 95, 9, 10, 11, 12, 45, 58, 65,
369
+ 90, 97, 122, 13, 32, 39, 47, 60,
370
+ 62, 63, 92, 95, 9, 10, 11, 12,
371
+ 45, 58, 65, 90, 97, 122, 34, 39,
372
+ 92, 32, 39, 47, 62, 63, 92, 95,
373
+ 9, 13, 45, 58, 65, 90, 97, 122,
374
+ 13, 32, 34, 39, 47, 60, 62, 92,
375
+ 9, 10, 11, 12, 32, 34, 47, 62,
376
+ 63, 92, 95, 9, 13, 45, 58, 65,
377
+ 90, 97, 122, 34, 39, 92, 13, 32,
378
+ 39, 47, 60, 62, 92, 9, 10, 11,
379
+ 12, 34, 92, 39, 92, 13, 32, 34,
380
+ 39, 47, 60, 62, 9, 10, 11, 12,
381
+ 58, 95, 120, 65, 90, 97, 122, 32,
382
+ 63, 95, 9, 13, 45, 46, 48, 58,
383
+ 65, 90, 97, 122, 32, 63, 95, 109,
384
+ 9, 13, 45, 46, 48, 58, 65, 90,
385
+ 97, 122, 32, 63, 95, 108, 9, 13,
386
+ 45, 46, 48, 58, 65, 90, 97, 122,
387
+ 32, 63, 95, 9, 13, 45, 46, 48,
388
+ 58, 65, 90, 97, 122, 101, 114, 115,
389
+ 105, 111, 110, 32, 61, 9, 13, 32,
390
+ 34, 39, 9, 13, 95, 45, 46, 48,
391
+ 58, 65, 90, 97, 122, 34, 95, 45,
392
+ 46, 48, 58, 65, 90, 97, 122, 32,
393
+ 62, 63, 9, 13, 32, 62, 63, 101,
394
+ 115, 9, 13, 62, 110, 99, 111, 100,
395
+ 105, 110, 103, 32, 61, 9, 13, 32,
396
+ 34, 39, 9, 13, 65, 90, 97, 122,
397
+ 34, 95, 45, 46, 48, 57, 65, 90,
398
+ 97, 122, 32, 62, 63, 9, 13, 32,
399
+ 62, 63, 115, 9, 13, 116, 97, 110,
400
+ 100, 97, 108, 111, 110, 101, 32, 61,
401
+ 9, 13, 32, 34, 39, 9, 13, 110,
402
+ 121, 111, 34, 32, 62, 63, 9, 13,
403
+ 101, 115, 110, 121, 111, 39, 101, 115,
404
+ 65, 90, 97, 122, 39, 95, 45, 46,
405
+ 48, 57, 65, 90, 97, 122, 95, 45,
406
+ 46, 48, 58, 65, 90, 97, 122, 39,
407
+ 95, 45, 46, 48, 58, 65, 90, 97,
408
+ 122, 62, 62, 10, 60, 33, 47, 58,
409
+ 63, 95, 65, 90, 97, 122, 39, 93,
410
+ 34, 34, 92, 39, 92, 34, 39, 92,
411
+ 32, 9, 13, 32, 118, 9, 13, 10,
412
+ 45, 45, 10, 93, 93, 10, 62, 63,
413
+ 62, 0
414
+ };
415
+
416
+ static final byte[] _hpricot_scan_single_lengths = {
417
+ 3, 1, 1, 1, 1, 1, 1, 1,
418
+ 1, 3, 5, 5, 1, 1, 1, 1,
419
+ 1, 1, 3, 4, 4, 3, 5, 1,
420
+ 1, 3, 1, 2, 1, 1, 4, 4,
421
+ 7, 7, 7, 7, 5, 2, 2, 4,
422
+ 2, 3, 3, 4, 2, 3, 4, 4,
423
+ 1, 1, 1, 1, 1, 1, 1, 1,
424
+ 1, 1, 1, 2, 4, 2, 5, 5,
425
+ 6, 6, 1, 7, 5, 5, 7, 8,
426
+ 8, 5, 7, 9, 9, 7, 7, 7,
427
+ 2, 2, 8, 8, 3, 8, 8, 10,
428
+ 10, 9, 9, 10, 10, 7, 7, 8,
429
+ 8, 8, 8, 3, 7, 2, 2, 8,
430
+ 8, 3, 8, 7, 9, 10, 10, 7,
431
+ 7, 8, 10, 10, 8, 10, 11, 9,
432
+ 9, 4, 3, 8, 8, 10, 10, 10,
433
+ 8, 8, 10, 8, 8, 8, 11, 9,
434
+ 3, 7, 8, 7, 3, 7, 2, 2,
435
+ 7, 3, 3, 4, 4, 3, 1, 1,
436
+ 1, 1, 1, 1, 2, 3, 1, 2,
437
+ 3, 5, 1, 1, 1, 1, 1, 1,
438
+ 1, 1, 2, 3, 0, 2, 3, 4,
439
+ 1, 1, 1, 1, 1, 1, 1, 1,
440
+ 1, 2, 3, 2, 1, 1, 3, 1,
441
+ 1, 2, 1, 1, 1, 1, 0, 2,
442
+ 1, 2, 1, 1, 2, 5, 1, 1,
443
+ 1, 2, 2, 3, 1, 2, 2, 1,
444
+ 2, 1, 3, 1
445
+ };
446
+
447
+ static final byte[] _hpricot_scan_range_lengths = {
448
+ 0, 0, 0, 0, 0, 0, 0, 0,
449
+ 1, 3, 5, 1, 0, 0, 0, 0,
450
+ 0, 1, 1, 4, 4, 1, 1, 0,
451
+ 0, 1, 0, 1, 0, 0, 5, 5,
452
+ 5, 5, 5, 5, 1, 0, 0, 1,
453
+ 0, 1, 1, 1, 0, 1, 1, 5,
454
+ 0, 0, 0, 0, 0, 0, 0, 0,
455
+ 0, 0, 0, 2, 5, 1, 4, 4,
456
+ 4, 4, 0, 2, 2, 4, 5, 5,
457
+ 5, 2, 2, 5, 5, 2, 2, 4,
458
+ 0, 0, 4, 4, 0, 2, 2, 5,
459
+ 5, 5, 5, 5, 5, 2, 2, 2,
460
+ 2, 2, 4, 0, 4, 0, 0, 4,
461
+ 4, 0, 2, 2, 5, 5, 5, 2,
462
+ 2, 2, 5, 5, 2, 5, 5, 4,
463
+ 4, 0, 0, 2, 2, 5, 5, 5,
464
+ 2, 2, 5, 4, 4, 2, 5, 5,
465
+ 0, 4, 2, 4, 0, 2, 0, 0,
466
+ 2, 2, 5, 5, 5, 5, 0, 0,
467
+ 0, 0, 0, 0, 1, 1, 4, 4,
468
+ 1, 1, 0, 0, 0, 0, 0, 0,
469
+ 0, 0, 1, 1, 2, 4, 1, 1,
470
+ 0, 0, 0, 0, 0, 0, 0, 0,
471
+ 0, 1, 1, 0, 0, 0, 1, 0,
472
+ 0, 0, 0, 0, 0, 0, 2, 4,
473
+ 4, 4, 0, 0, 0, 2, 0, 0,
474
+ 0, 0, 0, 0, 1, 1, 0, 0,
475
+ 0, 0, 0, 0
476
+ };
477
+
478
+ static final short[] _hpricot_scan_index_offsets = {
479
+ 0, 4, 6, 8, 10, 12, 14, 16,
480
+ 18, 21, 28, 39, 46, 48, 50, 52,
481
+ 54, 56, 59, 64, 73, 82, 87, 94,
482
+ 96, 98, 103, 105, 109, 111, 113, 123,
483
+ 133, 146, 159, 172, 185, 192, 195, 198,
484
+ 204, 207, 212, 217, 223, 226, 231, 237,
485
+ 247, 249, 251, 253, 255, 257, 259, 261,
486
+ 263, 265, 267, 269, 274, 284, 288, 298,
487
+ 308, 319, 330, 332, 342, 350, 360, 373,
488
+ 387, 401, 409, 419, 434, 449, 459, 469,
489
+ 481, 484, 487, 500, 513, 517, 528, 539,
490
+ 555, 571, 586, 601, 617, 633, 643, 653,
491
+ 664, 675, 686, 699, 703, 715, 718, 721,
492
+ 734, 747, 751, 762, 772, 787, 803, 819,
493
+ 829, 839, 850, 866, 882, 893, 909, 926,
494
+ 940, 954, 959, 963, 974, 985, 1001, 1017,
495
+ 1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
496
+ 1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
497
+ 1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
498
+ 1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
499
+ 1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
500
+ 1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
501
+ 1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
502
+ 1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
503
+ 1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
504
+ 1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
505
+ 1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
506
+ 1465, 1468, 1470, 1474
507
+ };
508
+
509
+ static final short[] _hpricot_scan_indicies = {
510
+ 335, 336, 337, 296, 356, 296, 349, 296,
511
+ 399, 296, 401, 296, 354, 296, 350, 296,
512
+ 400, 296, 308, 308, 296, 308, 309, 309,
513
+ 308, 309, 309, 296, 328, 330, 329, 331,
514
+ 329, 328, 329, 329, 329, 329, 296, 310,
515
+ 302, 311, 312, 0, 310, 296, 353, 296,
516
+ 342, 296, 347, 296, 346, 296, 343, 296,
517
+ 304, 304, 296, 304, 305, 306, 304, 296,
518
+ 321, 320, 321, 321, 321, 321, 321, 321,
519
+ 296, 319, 320, 319, 319, 319, 319, 319,
520
+ 319, 296, 298, 302, 0, 298, 296, 298,
521
+ 300, 307, 302, 0, 298, 296, 6, 222,
522
+ 6, 13, 358, 302, 0, 358, 69, 1,
523
+ 0, 1, 302, 1, 69, 6, 182, 6,
524
+ 5, 322, 323, 322, 322, 322, 322, 322,
525
+ 322, 322, 296, 299, 303, 299, 299, 299,
526
+ 299, 299, 299, 299, 296, 297, 297, 299,
527
+ 303, 302, 0, 299, 298, 299, 299, 299,
528
+ 299, 296, 297, 297, 300, 301, 302, 0,
529
+ 299, 298, 299, 299, 299, 299, 296, 186,
530
+ 186, 188, 42, 184, 185, 188, 187, 188,
531
+ 188, 188, 188, 182, 43, 43, 38, 44,
532
+ 40, 34, 41, 37, 41, 41, 41, 41,
533
+ 5, 37, 38, 39, 40, 34, 37, 5,
534
+ 63, 224, 223, 63, 64, 62, 371, 6,
535
+ 40, 34, 371, 5, 35, 36, 34, 26,
536
+ 27, 1, 26, 0, 36, 6, 40, 36,
537
+ 5, 60, 6, 61, 58, 60, 13, 35,
538
+ 59, 58, 59, 6, 61, 59, 13, 183,
539
+ 6, 184, 185, 183, 182, 41, 42, 41,
540
+ 41, 41, 41, 41, 41, 41, 5, 403,
541
+ 296, 351, 296, 352, 296, 345, 296, 348,
542
+ 296, 398, 296, 344, 296, 341, 296, 402,
543
+ 296, 397, 296, 355, 296, 338, 338, 338,
544
+ 338, 296, 332, 334, 333, 333, 332, 333,
545
+ 333, 333, 333, 296, 313, 314, 313, 296,
546
+ 324, 326, 327, 325, 325, 324, 325, 325,
547
+ 325, 296, 315, 317, 318, 316, 316, 315,
548
+ 316, 316, 316, 296, 364, 366, 367, 368,
549
+ 365, 365, 364, 365, 365, 365, 69, 359,
550
+ 361, 362, 162, 360, 360, 359, 360, 360,
551
+ 360, 69, 369, 69, 157, 157, 159, 160,
552
+ 161, 69, 162, 157, 158, 156, 66, 66,
553
+ 68, 69, 70, 66, 67, 65, 363, 361,
554
+ 162, 360, 360, 363, 360, 360, 360, 69,
555
+ 66, 66, 74, 69, 76, 73, 73, 66,
556
+ 67, 73, 73, 73, 65, 132, 132, 135,
557
+ 69, 136, 137, 134, 134, 132, 133, 134,
558
+ 134, 134, 65, 71, 71, 74, 69, 75,
559
+ 76, 73, 73, 71, 72, 73, 73, 73,
560
+ 65, 66, 66, 68, 69, 70, 66, 67,
561
+ 65, 226, 226, 228, 229, 230, 69, 70,
562
+ 226, 227, 156, 163, 163, 159, 160, 161,
563
+ 69, 162, 165, 165, 163, 164, 165, 165,
564
+ 165, 156, 226, 226, 228, 229, 231, 69,
565
+ 76, 165, 165, 226, 227, 165, 165, 165,
566
+ 156, 248, 248, 84, 246, 199, 250, 195,
567
+ 248, 249, 189, 92, 92, 84, 95, 7,
568
+ 96, 97, 92, 93, 91, 372, 3, 48,
569
+ 50, 47, 8, 47, 372, 47, 47, 47,
570
+ 7, 3, 8, 7, 11, 8, 7, 122,
571
+ 3, 124, 125, 126, 123, 8, 123, 122,
572
+ 123, 123, 123, 7, 46, 3, 48, 49,
573
+ 50, 47, 8, 47, 46, 47, 47, 47,
574
+ 7, 3, 45, 8, 7, 190, 190, 192,
575
+ 193, 194, 7, 50, 195, 190, 191, 189,
576
+ 196, 196, 192, 193, 194, 7, 50, 195,
577
+ 196, 197, 189, 196, 196, 192, 193, 194,
578
+ 7, 50, 198, 195, 198, 196, 197, 198,
579
+ 198, 198, 189, 242, 242, 244, 245, 247,
580
+ 7, 103, 198, 195, 198, 242, 243, 198,
581
+ 198, 198, 189, 248, 248, 84, 247, 199,
582
+ 251, 198, 195, 198, 248, 249, 198, 198,
583
+ 198, 189, 92, 92, 84, 101, 7, 103,
584
+ 100, 97, 100, 92, 93, 100, 100, 100,
585
+ 91, 144, 144, 84, 147, 7, 148, 149,
586
+ 146, 97, 146, 144, 145, 146, 146, 146,
587
+ 91, 98, 98, 84, 101, 7, 102, 103,
588
+ 100, 97, 100, 98, 99, 100, 100, 100,
589
+ 91, 92, 92, 84, 95, 7, 96, 97,
590
+ 92, 93, 91, 92, 92, 94, 95, 7,
591
+ 96, 97, 92, 93, 91, 242, 242, 244,
592
+ 245, 246, 7, 96, 195, 242, 243, 189,
593
+ 258, 258, 263, 94, 256, 215, 261, 211,
594
+ 258, 259, 205, 105, 105, 80, 94, 108,
595
+ 9, 109, 110, 105, 106, 104, 373, 10,
596
+ 11, 55, 57, 54, 12, 54, 373, 54,
597
+ 54, 54, 9, 10, 11, 12, 9, 370,
598
+ 3, 31, 33, 30, 4, 30, 370, 30,
599
+ 30, 30, 2, 3, 4, 2, 10, 4,
600
+ 2, 117, 3, 119, 120, 121, 118, 4,
601
+ 118, 117, 118, 118, 118, 2, 29, 3,
602
+ 31, 32, 33, 30, 4, 30, 29, 30,
603
+ 30, 30, 2, 3, 28, 4, 2, 167,
604
+ 167, 169, 170, 171, 2, 33, 172, 167,
605
+ 168, 166, 78, 78, 84, 81, 2, 82,
606
+ 83, 78, 79, 77, 78, 78, 84, 88,
607
+ 2, 90, 87, 83, 87, 78, 79, 87,
608
+ 87, 87, 77, 138, 138, 84, 141, 2,
609
+ 142, 143, 140, 83, 140, 138, 139, 140,
610
+ 140, 140, 77, 85, 85, 84, 88, 2,
611
+ 89, 90, 87, 83, 87, 85, 86, 87,
612
+ 87, 87, 77, 78, 78, 84, 81, 2,
613
+ 82, 83, 78, 79, 77, 78, 78, 80,
614
+ 81, 2, 82, 83, 78, 79, 77, 232,
615
+ 232, 234, 235, 236, 2, 82, 172, 232,
616
+ 233, 166, 173, 173, 169, 170, 171, 2,
617
+ 33, 175, 172, 175, 173, 174, 175, 175,
618
+ 175, 166, 232, 232, 234, 235, 237, 2,
619
+ 90, 175, 172, 175, 232, 233, 175, 175,
620
+ 175, 166, 258, 258, 80, 260, 256, 215,
621
+ 261, 211, 258, 259, 205, 105, 105, 80,
622
+ 94, 114, 9, 116, 113, 110, 113, 105,
623
+ 106, 113, 113, 113, 104, 150, 150, 80,
624
+ 94, 153, 9, 154, 155, 152, 110, 152,
625
+ 150, 151, 152, 152, 152, 104, 53, 10,
626
+ 11, 55, 56, 57, 54, 12, 54, 53,
627
+ 54, 54, 54, 9, 127, 10, 11, 129,
628
+ 130, 131, 128, 12, 128, 127, 128, 128,
629
+ 128, 9, 10, 11, 52, 12, 9, 51,
630
+ 51, 12, 9, 206, 206, 208, 209, 210,
631
+ 9, 57, 211, 206, 207, 205, 212, 212,
632
+ 208, 209, 210, 9, 57, 211, 212, 213,
633
+ 205, 212, 212, 208, 209, 210, 9, 57,
634
+ 214, 211, 214, 212, 213, 214, 214, 214,
635
+ 205, 252, 252, 254, 255, 257, 9, 116,
636
+ 214, 211, 214, 252, 253, 214, 214, 214,
637
+ 205, 258, 258, 80, 260, 257, 215, 262,
638
+ 214, 211, 214, 258, 259, 214, 214, 214,
639
+ 205, 105, 105, 80, 94, 108, 9, 109,
640
+ 110, 105, 106, 104, 105, 105, 107, 107,
641
+ 108, 9, 109, 110, 105, 106, 104, 258,
642
+ 258, 263, 94, 257, 215, 262, 214, 211,
643
+ 214, 258, 259, 214, 214, 214, 205, 218,
644
+ 10, 216, 220, 221, 219, 217, 219, 218,
645
+ 219, 219, 219, 215, 218, 225, 11, 220,
646
+ 221, 219, 217, 219, 218, 219, 219, 219,
647
+ 215, 252, 252, 254, 255, 256, 9, 109,
648
+ 211, 252, 253, 205, 111, 111, 80, 94,
649
+ 114, 9, 115, 116, 113, 110, 113, 111,
650
+ 112, 113, 113, 113, 104, 238, 238, 84,
651
+ 237, 176, 241, 175, 172, 175, 238, 239,
652
+ 175, 175, 175, 166, 10, 216, 217, 215,
653
+ 178, 3, 180, 181, 179, 177, 179, 178,
654
+ 179, 179, 179, 176, 173, 173, 169, 170,
655
+ 171, 2, 33, 172, 173, 174, 166, 201,
656
+ 3, 203, 204, 202, 200, 202, 201, 202,
657
+ 202, 202, 199, 225, 11, 217, 215, 238,
658
+ 238, 84, 236, 176, 240, 172, 238, 239,
659
+ 166, 3, 200, 199, 3, 177, 176, 163,
660
+ 163, 159, 160, 161, 69, 162, 163, 164,
661
+ 156, 339, 339, 340, 339, 339, 296, 15,
662
+ 357, 357, 15, 357, 357, 357, 357, 296,
663
+ 15, 357, 357, 408, 15, 357, 357, 357,
664
+ 357, 296, 15, 357, 357, 404, 15, 357,
665
+ 357, 357, 357, 296, 16, 357, 357, 16,
666
+ 357, 357, 357, 357, 296, 287, 264, 294,
667
+ 264, 396, 264, 387, 264, 393, 264, 268,
668
+ 264, 268, 265, 268, 264, 265, 266, 267,
669
+ 265, 264, 282, 282, 282, 282, 282, 264,
670
+ 275, 276, 276, 276, 276, 276, 264, 269,
671
+ 270, 271, 269, 264, 269, 270, 271, 272,
672
+ 273, 269, 264, 270, 264, 388, 264, 285,
673
+ 264, 394, 264, 385, 264, 289, 264, 390,
674
+ 264, 288, 264, 288, 374, 288, 264, 374,
675
+ 375, 376, 374, 264, 283, 283, 264, 277,
676
+ 278, 278, 278, 278, 278, 264, 274, 270,
677
+ 271, 274, 264, 274, 270, 271, 273, 274,
678
+ 264, 295, 264, 384, 264, 389, 264, 286,
679
+ 264, 284, 264, 290, 264, 395, 264, 391,
680
+ 264, 380, 264, 380, 377, 380, 264, 377,
681
+ 378, 379, 377, 264, 291, 292, 264, 293,
682
+ 264, 279, 264, 381, 270, 271, 381, 264,
683
+ 386, 264, 293, 264, 405, 406, 264, 392,
684
+ 264, 279, 264, 407, 264, 392, 264, 383,
685
+ 383, 264, 277, 281, 281, 281, 281, 281,
686
+ 264, 382, 382, 382, 382, 382, 264, 275,
687
+ 280, 280, 280, 280, 280, 264, 415, 414,
688
+ 422, 421, 24, 25, 23, 19, 20, 21,
689
+ 22, 21, 21, 21, 18, 6, 5, 1,
690
+ 0, 6, 13, 3, 8, 7, 3, 4,
691
+ 2, 10, 11, 12, 9, 15, 15, 14,
692
+ 16, 17, 16, 14, 412, 413, 411, 410,
693
+ 409, 419, 420, 418, 417, 416, 426, 424,
694
+ 427, 425, 424, 423, 0
695
+ };
696
+
697
+ static final short[] _hpricot_scan_trans_targs_wi = {
698
+ 26, 27, 101, 69, 102, 29, 25, 80,
699
+ 81, 99, 100, 79, 122, 24, 204, 212,
700
+ 213, 150, 204, 0, 59, 62, 145, 204,
701
+ 204, 205, 41, 207, 210, 104, 103, 105,
702
+ 106, 210, 40, 41, 42, 36, 37, 46,
703
+ 206, 47, 32, 35, 34, 209, 83, 82,
704
+ 84, 85, 209, 98, 211, 119, 120, 121,
705
+ 123, 211, 44, 45, 43, 208, 38, 39,
706
+ 43, 68, 69, 70, 73, 204, 204, 65,
707
+ 72, 71, 73, 74, 204, 107, 100, 108,
708
+ 108, 111, 210, 112, 70, 104, 110, 109,
709
+ 111, 113, 210, 78, 79, 90, 90, 93,
710
+ 209, 94, 83, 92, 91, 93, 95, 209,
711
+ 97, 98, 117, 117, 128, 211, 129, 119,
712
+ 134, 118, 128, 133, 211, 104, 103, 105,
713
+ 106, 210, 83, 82, 84, 85, 209, 119,
714
+ 120, 121, 123, 211, 65, 72, 71, 73,
715
+ 74, 204, 104, 110, 109, 111, 113, 210,
716
+ 83, 92, 91, 93, 95, 209, 119, 134,
717
+ 118, 128, 133, 211, 68, 144, 74, 142,
718
+ 143, 73, 204, 75, 76, 71, 107, 138,
719
+ 113, 136, 137, 111, 112, 114, 115, 109,
720
+ 101, 102, 100, 103, 105, 210, 29, 39,
721
+ 206, 40, 35, 36, 47, 78, 86, 95,
722
+ 139, 140, 93, 94, 87, 88, 91, 80,
723
+ 81, 79, 82, 84, 209, 97, 124, 133,
724
+ 131, 132, 128, 129, 125, 126, 118, 99,
725
+ 79, 122, 98, 120, 121, 211, 24, 38,
726
+ 43, 100, 75, 76, 77, 141, 73, 73,
727
+ 114, 115, 116, 135, 111, 111, 100, 108,
728
+ 210, 210, 87, 88, 89, 96, 93, 93,
729
+ 79, 90, 209, 209, 125, 126, 127, 130,
730
+ 128, 128, 98, 117, 90, 211, 211, 108,
731
+ 204, 157, 158, 200, 156, 161, 204, 162,
732
+ 163, 176, 175, 160, 159, 174, 173, 190,
733
+ 201, 199, 159, 173, 181, 165, 180, 151,
734
+ 170, 168, 182, 188, 191, 189, 152, 177,
735
+ 204, 33, 22, 31, 23, 34, 204, 32,
736
+ 18, 19, 30, 28, 9, 10, 11, 12,
737
+ 48, 61, 204, 63, 64, 66, 204, 20,
738
+ 21, 20, 31, 32, 63, 62, 66, 204,
739
+ 11, 10, 204, 26, 61, 60, 204, 1,
740
+ 2, 53, 60, 146, 147, 56, 14, 17,
741
+ 55, 52, 16, 15, 21, 3, 7, 50,
742
+ 51, 13, 6, 204, 204, 146, 25, 65,
743
+ 64, 66, 67, 69, 65, 64, 66, 67,
744
+ 204, 204, 100, 39, 79, 98, 171, 172,
745
+ 198, 186, 187, 193, 185, 190, 201, 199,
746
+ 178, 167, 192, 154, 164, 179, 169, 184,
747
+ 195, 155, 166, 183, 153, 58, 54, 4,
748
+ 8, 5, 57, 49, 149, 194, 196, 197,
749
+ 148, 214, 202, 214, 214, 215, 214, 214,
750
+ 216, 203, 216, 216, 217, 216, 216, 218,
751
+ 218, 218, 218, 219
752
+ };
753
+
754
+ static final short[] _hpricot_scan_trans_actions_wi = {
755
+ 0, 0, 0, 7, 0, 0, 21, 0,
756
+ 0, 0, 7, 7, 0, 0, 65, 0,
757
+ 31, 0, 67, 0, 0, 1, 0, 63,
758
+ 132, 178, 0, 144, 147, 0, 174, 23,
759
+ 0, 186, 0, 21, 0, 0, 0, 21,
760
+ 144, 0, 111, 0, 111, 147, 0, 174,
761
+ 23, 0, 186, 7, 147, 0, 174, 23,
762
+ 0, 186, 0, 0, 0, 144, 0, 21,
763
+ 21, 0, 9, 9, 102, 73, 162, 9,
764
+ 9, 174, 117, 0, 170, 0, 9, 9,
765
+ 7, 102, 205, 0, 7, 9, 9, 174,
766
+ 117, 0, 215, 0, 9, 9, 7, 102,
767
+ 205, 0, 9, 9, 174, 117, 0, 215,
768
+ 0, 9, 9, 7, 102, 205, 0, 9,
769
+ 9, 174, 117, 0, 215, 11, 0, 108,
770
+ 11, 210, 11, 0, 108, 11, 210, 11,
771
+ 0, 108, 11, 210, 105, 105, 0, 158,
772
+ 11, 195, 105, 105, 0, 158, 11, 232,
773
+ 105, 105, 0, 158, 11, 232, 105, 105,
774
+ 0, 158, 11, 232, 3, 3, 3, 0,
775
+ 0, 87, 120, 3, 3, 190, 3, 3,
776
+ 3, 0, 7, 87, 3, 3, 3, 190,
777
+ 3, 3, 3, 190, 87, 200, 3, 3,
778
+ 182, 3, 3, 3, 3, 3, 3, 3,
779
+ 7, 0, 87, 3, 3, 3, 190, 3,
780
+ 3, 3, 190, 87, 200, 3, 3, 3,
781
+ 7, 7, 87, 3, 3, 3, 190, 3,
782
+ 75, 3, 3, 190, 87, 200, 3, 3,
783
+ 84, 99, 78, 78, 0, 0, 150, 154,
784
+ 78, 78, 0, 7, 150, 154, 78, 78,
785
+ 220, 226, 78, 78, 7, 0, 150, 154,
786
+ 78, 78, 220, 226, 78, 78, 7, 7,
787
+ 150, 154, 78, 78, 75, 220, 226, 99,
788
+ 69, 0, 0, 0, 0, 0, 49, 0,
789
+ 0, 0, 0, 13, 0, 15, 0, 17,
790
+ 0, 0, 3, 3, 0, 0, 0, 0,
791
+ 0, 0, 0, 3, 3, 0, 0, 0,
792
+ 71, 0, 0, 0, 0, 19, 51, 19,
793
+ 0, 0, 0, 0, 0, 1, 0, 0,
794
+ 0, 0, 55, 0, 114, 0, 53, 0,
795
+ 19, 3, 3, 81, 5, 0, 5, 93,
796
+ 5, 0, 90, 5, 5, 0, 96, 0,
797
+ 0, 0, 1, 25, 25, 0, 0, 0,
798
+ 0, 0, 0, 0, 0, 0, 0, 0,
799
+ 0, 0, 0, 61, 59, 0, 0, 0,
800
+ 174, 23, 0, 0, 11, 0, 108, 11,
801
+ 166, 57, 0, 0, 0, 0, 0, 0,
802
+ 0, 0, 0, 0, 0, 0, 3, 3,
803
+ 0, 0, 0, 0, 0, 0, 0, 0,
804
+ 0, 0, 0, 0, 0, 0, 0, 0,
805
+ 0, 0, 0, 0, 0, 3, 3, 0,
806
+ 0, 35, 0, 33, 123, 31, 37, 135,
807
+ 41, 0, 39, 126, 31, 43, 138, 47,
808
+ 141, 45, 129, 0
809
+ };
810
+
811
+ static final short[] _hpricot_scan_to_state_actions = {
812
+ 0, 0, 0, 0, 0, 0, 0, 0,
813
+ 0, 0, 0, 0, 0, 0, 0, 0,
814
+ 0, 0, 0, 0, 0, 0, 0, 0,
815
+ 0, 0, 0, 0, 0, 0, 0, 0,
816
+ 0, 0, 0, 0, 0, 0, 0, 0,
817
+ 0, 0, 0, 0, 0, 0, 0, 0,
818
+ 0, 0, 0, 0, 0, 0, 0, 0,
819
+ 0, 0, 0, 0, 0, 0, 0, 0,
820
+ 0, 0, 0, 0, 0, 0, 0, 0,
821
+ 0, 0, 0, 0, 0, 0, 0, 0,
822
+ 0, 0, 0, 0, 0, 0, 0, 0,
823
+ 0, 0, 0, 0, 0, 0, 0, 0,
824
+ 0, 0, 0, 0, 0, 0, 0, 0,
825
+ 0, 0, 0, 0, 0, 0, 0, 0,
826
+ 0, 0, 0, 0, 0, 0, 0, 0,
827
+ 0, 0, 0, 0, 0, 0, 0, 0,
828
+ 0, 0, 0, 0, 0, 0, 0, 0,
829
+ 0, 0, 0, 0, 0, 0, 0, 0,
830
+ 0, 0, 0, 0, 0, 0, 0, 0,
831
+ 0, 0, 0, 0, 0, 0, 0, 0,
832
+ 0, 0, 0, 0, 0, 0, 0, 0,
833
+ 0, 0, 0, 0, 0, 0, 0, 0,
834
+ 0, 0, 0, 0, 0, 0, 0, 0,
835
+ 0, 0, 0, 0, 0, 0, 0, 0,
836
+ 0, 0, 0, 0, 0, 0, 0, 0,
837
+ 0, 0, 0, 0, 27, 0, 0, 0,
838
+ 0, 0, 0, 0, 0, 0, 27, 0,
839
+ 27, 0, 27, 0
840
+ };
841
+
842
+ static final short[] _hpricot_scan_from_state_actions = {
843
+ 0, 0, 0, 0, 0, 0, 0, 0,
844
+ 0, 0, 0, 0, 0, 0, 0, 0,
845
+ 0, 0, 0, 0, 0, 0, 0, 0,
846
+ 0, 0, 0, 0, 0, 0, 0, 0,
847
+ 0, 0, 0, 0, 0, 0, 0, 0,
848
+ 0, 0, 0, 0, 0, 0, 0, 0,
849
+ 0, 0, 0, 0, 0, 0, 0, 0,
850
+ 0, 0, 0, 0, 0, 0, 0, 0,
851
+ 0, 0, 0, 0, 0, 0, 0, 0,
852
+ 0, 0, 0, 0, 0, 0, 0, 0,
853
+ 0, 0, 0, 0, 0, 0, 0, 0,
854
+ 0, 0, 0, 0, 0, 0, 0, 0,
855
+ 0, 0, 0, 0, 0, 0, 0, 0,
856
+ 0, 0, 0, 0, 0, 0, 0, 0,
857
+ 0, 0, 0, 0, 0, 0, 0, 0,
858
+ 0, 0, 0, 0, 0, 0, 0, 0,
859
+ 0, 0, 0, 0, 0, 0, 0, 0,
860
+ 0, 0, 0, 0, 0, 0, 0, 0,
861
+ 0, 0, 0, 0, 0, 0, 0, 0,
862
+ 0, 0, 0, 0, 0, 0, 0, 0,
863
+ 0, 0, 0, 0, 0, 0, 0, 0,
864
+ 0, 0, 0, 0, 0, 0, 0, 0,
865
+ 0, 0, 0, 0, 0, 0, 0, 0,
866
+ 0, 0, 0, 0, 0, 0, 0, 0,
867
+ 0, 0, 0, 0, 0, 0, 0, 0,
868
+ 0, 0, 0, 0, 29, 0, 0, 0,
869
+ 0, 0, 0, 0, 0, 0, 29, 0,
870
+ 29, 0, 29, 0
871
+ };
872
+
873
+ static final int hpricot_scan_start = 204;
874
+
875
+ static final int hpricot_scan_error = -1;
876
+
877
+
878
+ public final static int BUFSIZE=16384;
879
+
880
+ private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
881
+ IRubyObject ary;
882
+ if (sym == runtime.newSymbol("text")) {
883
+ raw = tag;
884
+ }
885
+ ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
886
+ if (taint) {
887
+ ary.setTaint(true);
888
+ tag.setTaint(true);
889
+ attr.setTaint(true);
890
+ raw.setTaint(true);
891
+ }
892
+ block.yield(runtime.getCurrentContext(), ary, null, null, false);
893
+ }
894
+
895
+
896
+ int cs, act, have = 0, nread = 0, curline = 1, p=-1;
897
+ boolean text = false;
898
+ int tokstart=-1, tokend;
899
+ char[] buf;
900
+ Ruby runtime;
901
+ IRubyObject attr, bufsize;
902
+ IRubyObject[] tag, akey, aval;
903
+ int mark_tag, mark_akey, mark_aval;
904
+ boolean done = false, ele_open = false;
905
+ int buffer_size = 0;
906
+ boolean taint = false;
907
+ Block block = null;
908
+
909
+
910
+ IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
911
+ cdata, sym_text;
912
+
913
+ IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
914
+ attr = bufsize = runtime.getNil();
915
+ tag = new IRubyObject[]{runtime.getNil()};
916
+ akey = new IRubyObject[]{runtime.getNil()};
917
+ aval = new IRubyObject[]{runtime.getNil()};
918
+
919
+ RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
920
+
921
+ taint = port.isTaint();
922
+ if ( !port.respondsTo("read")) {
923
+ if ( port.respondsTo("to_str")) {
924
+ port = port.callMethod(runtime.getCurrentContext(),"to_str");
925
+ } else {
926
+ throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
927
+ }
928
+ }
929
+
930
+ buffer_size = BUFSIZE;
931
+ if (recv.getInstanceVariable("@buffer_size") != null) {
932
+ bufsize = recv.getInstanceVariable("@buffer_size");
933
+ if (!bufsize.isNil()) {
934
+ buffer_size = RubyNumeric.fix2int(bufsize);
935
+ }
936
+ }
937
+ buf = new char[buffer_size];
938
+
939
+
940
+ {
941
+ cs = hpricot_scan_start;
942
+ tokstart = -1;
943
+ tokend = -1;
944
+ act = 0;
945
+ }
946
+
947
+ while( !done ) {
948
+ IRubyObject str;
949
+ p = have;
950
+ int pe;
951
+ int len, space = buffer_size - have;
952
+
953
+ if ( space == 0 ) {
954
+ /* We've used up the entire buffer storing an already-parsed token
955
+ * prefix that must be preserved. Likely caused by super-long attributes.
956
+ * See ticket #13. */
957
+ rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+".");
958
+ }
959
+
960
+ if (port.respondsTo("read")) {
961
+ str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
962
+ } else {
963
+ str = ((RubyString)port).substr(nread,space);
964
+ }
965
+
966
+ str = str.convertToString();
967
+ String sss = str.toString();
968
+ char[] chars = sss.toCharArray();
969
+ System.arraycopy(chars,0,buf,p,chars.length);
970
+
971
+ len = sss.length();
972
+ nread += len;
973
+
974
+ if ( len < space ) {
975
+ len++;
976
+ done = true;
977
+ }
978
+
979
+ pe = p + len;
980
+ char[] data = buf;
981
+
982
+
983
+ {
984
+ int _klen;
985
+ int _trans;
986
+ int _acts;
987
+ int _nacts;
988
+ int _keys;
989
+
990
+ if ( p != pe ) {
991
+ _resume: while ( true ) {
992
+ _again: do {
993
+ _acts = _hpricot_scan_from_state_actions[cs];
994
+ _nacts = (int) _hpricot_scan_actions[_acts++];
995
+ while ( _nacts-- > 0 ) {
996
+ switch ( _hpricot_scan_actions[_acts++] ) {
997
+ case 21:
998
+ {tokstart = p;}
999
+ break;
1000
+ }
1001
+ }
1002
+
1003
+ _match: do {
1004
+ _keys = _hpricot_scan_key_offsets[cs];
1005
+ _trans = _hpricot_scan_index_offsets[cs];
1006
+ _klen = _hpricot_scan_single_lengths[cs];
1007
+ if ( _klen > 0 ) {
1008
+ int _lower = _keys;
1009
+ int _mid;
1010
+ int _upper = _keys + _klen - 1;
1011
+ while (true) {
1012
+ if ( _upper < _lower )
1013
+ break;
1014
+
1015
+ _mid = _lower + ((_upper-_lower) >> 1);
1016
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
1017
+ _upper = _mid - 1;
1018
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
1019
+ _lower = _mid + 1;
1020
+ else {
1021
+ _trans += (_mid - _keys);
1022
+ break _match;
1023
+ }
1024
+ }
1025
+ _keys += _klen;
1026
+ _trans += _klen;
1027
+ }
1028
+
1029
+ _klen = _hpricot_scan_range_lengths[cs];
1030
+ if ( _klen > 0 ) {
1031
+ int _lower = _keys;
1032
+ int _mid;
1033
+ int _upper = _keys + (_klen<<1) - 2;
1034
+ while (true) {
1035
+ if ( _upper < _lower )
1036
+ break;
1037
+
1038
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
1039
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
1040
+ _upper = _mid - 2;
1041
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
1042
+ _lower = _mid + 2;
1043
+ else {
1044
+ _trans += ((_mid - _keys)>>1);
1045
+ break _match;
1046
+ }
1047
+ }
1048
+ _trans += _klen;
1049
+ }
1050
+ } while (false);
1051
+
1052
+ _trans = _hpricot_scan_indicies[_trans];
1053
+ cs = _hpricot_scan_trans_targs_wi[_trans];
1054
+
1055
+ if ( _hpricot_scan_trans_actions_wi[_trans] == 0 )
1056
+ break _again;
1057
+
1058
+ _acts = _hpricot_scan_trans_actions_wi[_trans];
1059
+ _nacts = (int) _hpricot_scan_actions[_acts++];
1060
+ while ( _nacts-- > 0 )
1061
+ {
1062
+ switch ( _hpricot_scan_actions[_acts++] )
1063
+ {
1064
+ case 0:
1065
+ {
1066
+ if (text) {
1067
+ CAT(tag, p);
1068
+ ELE(sym_text);
1069
+ text = false;
1070
+ }
1071
+ attr = runtime.getNil();
1072
+ tag[0] = runtime.getNil();
1073
+ mark_tag = -1;
1074
+ ele_open = true;
1075
+ }
1076
+ break;
1077
+ case 1:
1078
+ { mark_tag = p; }
1079
+ break;
1080
+ case 2:
1081
+ { mark_aval = p; }
1082
+ break;
1083
+ case 3:
1084
+ { mark_akey = p; }
1085
+ break;
1086
+ case 4:
1087
+ { SET(tag, p); }
1088
+ break;
1089
+ case 5:
1090
+ { SET(aval, p); }
1091
+ break;
1092
+ case 6:
1093
+ {
1094
+ if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
1095
+ else { SET(aval, p); }
1096
+ }
1097
+ break;
1098
+ case 7:
1099
+ { SET(akey, p); }
1100
+ break;
1101
+ case 8:
1102
+ { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
1103
+ break;
1104
+ case 9:
1105
+ { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
1106
+ break;
1107
+ case 10:
1108
+ { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
1109
+ break;
1110
+ case 11:
1111
+ { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
1112
+ break;
1113
+ case 12:
1114
+ { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
1115
+ break;
1116
+ case 13:
1117
+ {
1118
+ akey[0] = runtime.getNil();
1119
+ aval[0] = runtime.getNil();
1120
+ mark_akey = -1;
1121
+ mark_aval = -1;
1122
+ }
1123
+ break;
1124
+ case 14:
1125
+ {
1126
+ ATTR(akey, aval);
1127
+ }
1128
+ break;
1129
+ case 15:
1130
+ {curline += 1;}
1131
+ break;
1132
+ case 16:
1133
+ { TEXT_PASS(); }
1134
+ break;
1135
+ case 17:
1136
+ { EBLK(comment, 3); {cs = 204; if (true) break _again;} }
1137
+ break;
1138
+ case 18:
1139
+ { EBLK(cdata, 3); {cs = 204; if (true) break _again;} }
1140
+ break;
1141
+ case 19:
1142
+ { EBLK(procins, 2); {cs = 204; if (true) break _again;} }
1143
+ break;
1144
+ case 22:
1145
+ {tokend = p+1;}
1146
+ break;
1147
+ case 23:
1148
+ {tokend = p+1;{p = ((tokend))-1;}}
1149
+ break;
1150
+ case 24:
1151
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1152
+ break;
1153
+ case 25:
1154
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1155
+ break;
1156
+ case 26:
1157
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1158
+ break;
1159
+ case 27:
1160
+ {tokend = p+1;{p = ((tokend))-1;}}
1161
+ break;
1162
+ case 28:
1163
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1164
+ break;
1165
+ case 29:
1166
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1167
+ break;
1168
+ case 30:
1169
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1170
+ break;
1171
+ case 31:
1172
+ {tokend = p+1;{p = ((tokend))-1;}}
1173
+ break;
1174
+ case 32:
1175
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1176
+ break;
1177
+ case 33:
1178
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1179
+ break;
1180
+ case 34:
1181
+ {act = 8;}
1182
+ break;
1183
+ case 35:
1184
+ {act = 10;}
1185
+ break;
1186
+ case 36:
1187
+ {act = 12;}
1188
+ break;
1189
+ case 37:
1190
+ {act = 15;}
1191
+ break;
1192
+ case 38:
1193
+ {tokend = p+1;{ ELE(xmldecl); }{p = ((tokend))-1;}}
1194
+ break;
1195
+ case 39:
1196
+ {tokend = p+1;{ ELE(doctype); }{p = ((tokend))-1;}}
1197
+ break;
1198
+ case 40:
1199
+ {tokend = p+1;{ ELE(stag); }{p = ((tokend))-1;}}
1200
+ break;
1201
+ case 41:
1202
+ {tokend = p+1;{ ELE(etag); }{p = ((tokend))-1;}}
1203
+ break;
1204
+ case 42:
1205
+ {tokend = p+1;{ ELE(emptytag); }{p = ((tokend))-1;}}
1206
+ break;
1207
+ case 43:
1208
+ {tokend = p+1;{ {{p = ((tokend))-1;}{cs = 214; if (true) break _again;}} }{p = ((tokend))-1;}}
1209
+ break;
1210
+ case 44:
1211
+ {tokend = p+1;{ {{p = ((tokend))-1;}{cs = 216; if (true) break _again;}} }{p = ((tokend))-1;}}
1212
+ break;
1213
+ case 45:
1214
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1215
+ break;
1216
+ case 46:
1217
+ {tokend = p;{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
1218
+ break;
1219
+ case 47:
1220
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1221
+ break;
1222
+ case 48:
1223
+ {{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
1224
+ break;
1225
+ case 49:
1226
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1227
+ break;
1228
+ case 50:
1229
+ { switch( act ) {
1230
+ case 8:
1231
+ { ELE(doctype); }
1232
+ break;
1233
+ case 10:
1234
+ { ELE(stag); }
1235
+ break;
1236
+ case 12:
1237
+ { ELE(emptytag); }
1238
+ break;
1239
+ case 15:
1240
+ { TEXT_PASS(); }
1241
+ break;
1242
+ default: break;
1243
+ }
1244
+ {p = ((tokend))-1;}}
1245
+ break;
1246
+ }
1247
+ }
1248
+
1249
+ } while (false);
1250
+ _acts = _hpricot_scan_to_state_actions[cs];
1251
+ _nacts = (int) _hpricot_scan_actions[_acts++];
1252
+ while ( _nacts-- > 0 ) {
1253
+ switch ( _hpricot_scan_actions[_acts++] ) {
1254
+ case 20:
1255
+ {tokstart = -1;}
1256
+ break;
1257
+ }
1258
+ }
1259
+
1260
+ if ( ++p == pe )
1261
+ break _resume;
1262
+ }
1263
+ }
1264
+ }
1265
+
1266
+ if ( cs == hpricot_scan_error ) {
1267
+ if(!tag[0].isNil()) {
1268
+ rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1269
+ } else {
1270
+ rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1271
+ }
1272
+ }
1273
+
1274
+ if ( done && ele_open ) {
1275
+ ele_open = false;
1276
+ if(tokstart > -1) {
1277
+ mark_tag = tokstart;
1278
+ tokstart = -1;
1279
+ text = true;
1280
+ }
1281
+ }
1282
+
1283
+ if(tokstart == -1) {
1284
+ have = 0;
1285
+ /* text nodes have no tokstart because each byte is parsed alone */
1286
+ if(mark_tag != -1 && text) {
1287
+ if (done) {
1288
+ if(mark_tag < p-1) {
1289
+ CAT(tag, p-1);
1290
+ ELE(sym_text);
1291
+ }
1292
+ } else {
1293
+ CAT(tag, p);
1294
+ }
1295
+ }
1296
+ mark_tag = 0;
1297
+ } else {
1298
+ have = pe - tokstart;
1299
+ System.arraycopy(buf,tokstart,buf,0,have);
1300
+ SLIDE(tag);
1301
+ SLIDE(akey);
1302
+ SLIDE(aval);
1303
+ tokend = (tokend - tokstart);
1304
+ tokstart = 0;
1305
+ }
1306
+ }
1307
+ return runtime.getNil();
1308
+ }
1309
+
1310
+ public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
1311
+ Ruby runtime = recv.getRuntime();
1312
+ HpricotScanService service = new HpricotScanService();
1313
+ service.runtime = runtime;
1314
+ service.xmldecl = runtime.newSymbol("xmldecl");
1315
+ service.doctype = runtime.newSymbol("doctype");
1316
+ service.procins = runtime.newSymbol("procins");
1317
+ service.stag = runtime.newSymbol("stag");
1318
+ service.etag = runtime.newSymbol("etag");
1319
+ service.emptytag = runtime.newSymbol("emptytag");
1320
+ service.comment = runtime.newSymbol("comment");
1321
+ service.cdata = runtime.newSymbol("cdata");
1322
+ service.sym_text = runtime.newSymbol("text");
1323
+ service.block = block;
1324
+ return service.hpricot_scan(recv, port);
1325
+ }
1326
+
1327
+
1328
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1329
+ Init_hpricot_scan(runtime);
1330
+ return true;
1331
+ }
1332
+
1333
+ public static void Init_hpricot_scan(Ruby runtime) {
1334
+ RubyModule mHpricot = runtime.defineModule("Hpricot");
1335
+ mHpricot.getMetaClass().attr_accessor(new IRubyObject[]{runtime.newSymbol("buffer_size")});
1336
+ CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
1337
+ mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
1338
+ mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
1339
+ }
1340
+ }