hpricot 0.5-mswin32 → 0.6-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,5 +1,17 @@
1
+ = 0.6
2
+ === 15th June, 2007
3
+ * Hpricot for JRuby -- nice work Ola Bini!
4
+ * Inline Markaby for Hpricot documents.
5
+ * XML tags and attributes are no longer downcased like HTML is.
6
+ * new syntax for grabbing everything between two elements using a Range in the search method: (doc/("font".."font/br")) or in nodes_at like so: (doc/"font").nodes_at("*".."br"). Only works with either a pair of siblings or a set of a parent and a sibling.
7
+ * Ignore self-closing endings on tags (such as form) which are containers. Treat them like open parent tags. Reported by Jonathan Nichols on the hpricot list.
8
+ * Escaping of attributes, yanked from Jim Weirich and Sam Ruby's work in Builder.
9
+ * Element#raw_attributes gives unescaped data. Element#attributes gives escaped.
10
+ * Added: Elements#attr, Elements#remove_attr, Elements#remove_class.
11
+ * Added: Traverse#preceding, Traverse#following, Traverse#previous, Traverse#next.
12
+
1
13
  = 0.5
2
- === 31rd January, 2006
14
+ === 31rd January, 2007
3
15
 
4
16
  * support for a[text()="Click Me!"] and h3[text()*="space"] and the like.
5
17
  * Hpricot.buffer_size accessor for increasing Hpricot's buffer if you're encountering huge ASP.NET viewstate attribs.
data/README CHANGED
@@ -257,7 +257,7 @@ So, let's go beyond just trying to fix the hierarchy. The
257
257
 
258
258
  What measures does <tt>:xhtml_strict</tt> take?
259
259
 
260
- 1. Shift elements into their proper containers just like <tt>:fixup_tags</tt>.
260
+ 1. Shift elements into their proper containers just like :fixup_tags.
261
261
  2. Remove unknown elements.
262
262
  3. Remove unknown attributes.
263
263
  4. Remove illegal content.
@@ -270,6 +270,9 @@ on the standard mode. The main difference is that :xml mode won't try to output
270
270
  tags which are friendlier for browsers. For example, if an opening and closing
271
271
  <tt>br</tt> tag is found, XML mode won't try to turn that into an empty element.
272
272
 
273
+ XML mode also doesn't downcase the tags and attributes for you. So pay attention
274
+ to case, friends.
275
+
273
276
  The primary way to use Hpricot's XML mode is to call the Hpricot.XML method:
274
277
 
275
278
  doc = open("http://redhanded.hobix.com/index.xml") do |f|
data/Rakefile CHANGED
@@ -8,35 +8,48 @@ include FileUtils
8
8
 
9
9
  NAME = "hpricot"
10
10
  REV = `svn info`[/Revision: (\d+)/, 1] rescue nil
11
- VERS = ENV['VERSION'] || "0.4" + (REV ? ".#{REV}" : "")
12
- CLEAN.include ['ext/hpricot_scan/*.{bundle,so,obj,pdb,lib,def,exp}', 'ext/hpricot_scan/Makefile',
11
+ VERS = ENV['VERSION'] || "0.6" + (REV ? ".#{REV}" : "")
12
+ PKG = "#{NAME}-#{VERS}"
13
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
14
+ ARCHLIB = "lib/#{::Config::CONFIG['arch']}"
15
+ CLEAN.include ["ext/hpricot_scan/#{BIN}", "lib/**/#{BIN}", 'ext/hpricot_scan/Makefile',
13
16
  '**/.*.sw?', '*.gem', '.config']
14
17
  RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README', '--inline-source']
18
+ PKG_FILES = %w(CHANGELOG COPYING README Rakefile) +
19
+ Dir.glob("{bin,doc,test,lib,extras}/**/*") +
20
+ Dir.glob("ext/**/*.{h,java,c,rb,rl}") +
21
+ %w[ext/hpricot_scan/hpricot_scan.c] # needed because it's generated later
22
+ SPEC =
23
+ Gem::Specification.new do |s|
24
+ s.name = NAME
25
+ s.version = VERS
26
+ s.platform = Gem::Platform::RUBY
27
+ s.has_rdoc = true
28
+ s.rdoc_options += RDOC_OPTS
29
+ s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
30
+ s.summary = "a swift, liberal HTML parser with a fantastic library"
31
+ s.description = s.summary
32
+ s.author = "why the lucky stiff"
33
+ s.email = 'why@ruby-lang.org'
34
+ s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
35
+ s.files = PKG_FILES
36
+ s.require_paths = [ARCHLIB, "lib"]
37
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
38
+ s.bindir = "bin"
39
+ end
15
40
 
16
41
  desc "Does a full compile, test run"
17
42
  task :default => [:compile, :test]
18
43
 
19
- desc "Compiles all extensions"
20
- task :compile => [:hpricot_scan] do
21
- if Dir.glob(File.join("lib","hpricot_scan.*")).length == 0
22
- STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
23
- STDERR.puts "Gem actually failed to build. Your system is"
24
- STDERR.puts "NOT configured properly to build hpricot."
25
- STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
26
- exit(1)
27
- end
28
- end
29
- task :hpricot_scan => [:ragel]
30
-
31
44
  desc "Packages up Hpricot."
32
45
  task :package => [:clean, :ragel]
33
46
 
34
47
  desc "Releases packages for all Hpricot packages and platforms."
35
- task :release => [:package, :rubygems_win32]
48
+ task :release => [:package, :package_win32, :package_jruby]
36
49
 
37
50
  desc "Run all the tests"
38
51
  Rake::TestTask.new do |t|
39
- t.libs << "test"
52
+ t.libs << "test" << ARCHLIB
40
53
  t.test_files = FileList['test/test_*.rb']
41
54
  t.verbose = true
42
55
  end
@@ -48,34 +61,9 @@ Rake::RDocTask.new do |rdoc|
48
61
  rdoc.rdoc_files.add ['README', 'CHANGELOG', 'COPYING', 'lib/**/*.rb']
49
62
  end
50
63
 
51
- spec =
52
- Gem::Specification.new do |s|
53
- s.name = NAME
54
- s.version = VERS
55
- s.platform = Gem::Platform::RUBY
56
- s.has_rdoc = true
57
- s.rdoc_options += RDOC_OPTS
58
- s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
59
- s.summary = "a swift, liberal HTML parser with a fantastic library"
60
- s.description = s.summary
61
- s.author = "why the lucky stiff"
62
- s.email = 'why@ruby-lang.org'
63
- s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
64
-
65
- s.files = %w(COPYING README Rakefile) +
66
- Dir.glob("{bin,doc,test,lib,extras}/**/*") +
67
- Dir.glob("ext/**/*.{h,c,rb,rl}") +
68
- %w[ext/hpricot_scan/hpricot_scan.c] # needed because it's generated later
69
-
70
- s.require_path = "lib"
71
- #s.autorequire = "hpricot" # no no no this is tHe 3v1l
72
- s.extensions = FileList["ext/**/extconf.rb"].to_a
73
- s.bindir = "bin"
74
- end
75
-
76
- Rake::GemPackageTask.new(spec) do |p|
64
+ Rake::GemPackageTask.new(SPEC) do |p|
77
65
  p.need_tar = true
78
- p.gem_spec = spec
66
+ p.gem_spec = SPEC
79
67
  end
80
68
 
81
69
  extension = "hpricot_scan"
@@ -94,6 +82,18 @@ task "lib" do
94
82
  directory "lib"
95
83
  end
96
84
 
85
+ desc "Compiles the Ruby extension"
86
+ task :compile => [:hpricot_scan] do
87
+ if Dir.glob(File.join(ARCHLIB,"hpricot_scan.*")).length == 0
88
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
89
+ STDERR.puts "Gem actually failed to build. Your system is"
90
+ STDERR.puts "NOT configured properly to build hpricot."
91
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
92
+ exit(1)
93
+ end
94
+ end
95
+ task :hpricot_scan => [:ragel]
96
+
97
97
  desc "Builds just the #{extension} extension"
98
98
  task extension.to_sym => ["#{ext}/Makefile", ext_so ]
99
99
 
@@ -105,66 +105,102 @@ file ext_so => ext_files do
105
105
  Dir.chdir(ext) do
106
106
  sh(PLATFORM =~ /win32/ ? 'nmake' : 'make')
107
107
  end
108
- cp ext_so, "lib"
108
+ mkdir_p ARCHLIB
109
+ cp ext_so, ARCHLIB
109
110
  end
110
111
 
111
- desc "Generates the scanner code with Ragel."
112
- task :ragel do
113
- sh %{ragel ext/hpricot_scan/hpricot_scan.rl | rlcodegen -G2 -o ext/hpricot_scan/hpricot_scan.c}
112
+ desc "returns the ragel version"
113
+ task :ragel_version do
114
+ @ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f
114
115
  end
115
116
 
116
- PKG_FILES = FileList[
117
- "test/**/*.{rb,html,xhtml}",
118
- "lib/**/*.rb",
119
- "ext/**/*.{c,rb,h,rl}",
120
- "CHANGELOG", "README", "Rakefile", "COPYING",
121
- "extras/**/*", "lib/hpricot_scan.so"]
117
+ desc "Generates the C scanner code with Ragel."
118
+ task :ragel => [:ragel_version] do
119
+ sh %{ragel ext/hpricot_scan/hpricot_scan.rl | #{@ragel_v >= 5.18 ? 'rlgen-cd' : 'rlcodegen'} -G2 -o ext/hpricot_scan/hpricot_scan.c}
120
+ end
122
121
 
123
- Win32Spec = Gem::Specification.new do |s|
124
- s.name = NAME
125
- s.version = VERS
126
- s.platform = Gem::Platform::WIN32
127
- s.has_rdoc = false
128
- s.extra_rdoc_files = ["README", "CHANGELOG", "COPYING"]
129
- s.summary = "a swift, liberal HTML parser with a fantastic library"
130
- s.description = s.summary
131
- s.author = "why the lucky stiff"
132
- s.email = 'why@ruby-lang.org'
133
- s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
122
+ desc "Generates the Java scanner code with Ragel."
123
+ task :ragel_java => [:ragel_version] do
124
+ sh %{ragel -J ext/hpricot_scan/hpricot_scan.java.rl | #{@ragel_v >= 5.18 ? 'rlgen-java' : 'rlcodegen'} -o ext/hpricot_scan/HpricotScanService.java}
125
+ end
134
126
 
135
- s.files = PKG_FILES
127
+ ### Win32 Packages ###
136
128
 
137
- s.require_path = "lib"
138
- #s.autorequire = "hpricot" # no no no this is tHe 3v1l
139
- s.extensions = []
140
- s.bindir = "bin"
141
- end
129
+ Win32Spec = SPEC.dup
130
+ Win32Spec.platform = Gem::Platform::WIN32
131
+ Win32Spec.files = PKG_FILES + ["#{ARCHLIB}/hpricot_scan.so"]
132
+ Win32Spec.extensions = []
142
133
 
143
- WIN32_PKG_DIR = "hpricot-" + VERS
134
+ WIN32_PKG_DIR = "#{PKG}-mswin32"
144
135
 
136
+ desc "Package up the Win32 distribution."
145
137
  file WIN32_PKG_DIR => [:package] do
146
- sh "tar zxf pkg/#{WIN32_PKG_DIR}.tgz"
138
+ sh "tar zxf pkg/#{PKG}.tgz"
139
+ mv PKG, WIN32_PKG_DIR
147
140
  end
148
141
 
149
142
  desc "Cross-compile the hpricot_scan extension for win32"
150
143
  file "hpricot_scan_win32" => [WIN32_PKG_DIR] do
151
144
  cp "extras/mingw-rbconfig.rb", "#{WIN32_PKG_DIR}/ext/hpricot_scan/rbconfig.rb"
152
145
  sh "cd #{WIN32_PKG_DIR}/ext/hpricot_scan/ && ruby -I. extconf.rb && make"
153
- mv "#{WIN32_PKG_DIR}/ext/hpricot_scan/hpricot_scan.so", "#{WIN32_PKG_DIR}/lib"
146
+ mv "#{WIN32_PKG_DIR}/ext/hpricot_scan/hpricot_scan.so", "#{WIN32_PKG_DIR}/#{ARCHLIB}"
154
147
  end
155
148
 
156
149
  desc "Build the binary RubyGems package for win32"
157
- task :rubygems_win32 => ["hpricot_scan_win32"] do
150
+ task :package_win32 => ["hpricot_scan_win32"] do
158
151
  Dir.chdir("#{WIN32_PKG_DIR}") do
159
152
  Gem::Builder.new(Win32Spec).build
160
153
  verbose(true) {
161
- mv Dir["*.gem"].first, "../pkg/hpricot-#{VERS}-mswin32.gem"
154
+ mv Dir["*.gem"].first, "../pkg/#{WIN32_PKG_DIR}.gem"
162
155
  }
163
156
  end
164
157
  end
165
158
 
166
159
  CLEAN.include WIN32_PKG_DIR
167
160
 
161
+ ### JRuby Packages ###
162
+
163
+ compile_java = proc do
164
+ sh %{javac -source 1.4 -target 1.4 -classpath $JRUBY_HOME/lib/jruby.jar HpricotScanService.java}
165
+ sh %{jar cf hpricot_scan.jar HpricotScanService.class}
166
+ end
167
+
168
+ desc "Compiles the JRuby extension"
169
+ task :hpricot_scan_java => [:ragel_java] do
170
+ Dir.chdir("ext/hpricot_scan", &compile_java)
171
+ end
172
+
173
+ JRubySpec = SPEC.dup
174
+ JRubySpec.platform = 'jruby'
175
+ JRubySpec.files = PKG_FILES + ["#{ARCHLIB}/hpricot_scan.jar"]
176
+ JRubySpec.extensions = []
177
+
178
+ JRUBY_PKG_DIR = "#{PKG}-jruby"
179
+
180
+ desc "Package up the JRuby distribution."
181
+ file JRUBY_PKG_DIR => [:ragel_java, :package] do
182
+ sh "tar zxf pkg/#{PKG}.tgz"
183
+ mv PKG, JRUBY_PKG_DIR
184
+ end
185
+
186
+ desc "Cross-compile the hpricot_scan extension for JRuby"
187
+ file "hpricot_scan_jruby" => [JRUBY_PKG_DIR] do
188
+ Dir.chdir("#{JRUBY_PKG_DIR}/ext/hpricot_scan", &compile_java)
189
+ mv "#{JRUBY_PKG_DIR}/ext/hpricot_scan/hpricot_scan.jar", "#{JRUBY_PKG_DIR}/#{ARCHLIB}"
190
+ end
191
+
192
+ desc "Build the RubyGems package for JRuby"
193
+ task :package_jruby => ["hpricot_scan_jruby"] do
194
+ Dir.chdir("#{JRUBY_PKG_DIR}") do
195
+ Gem::Builder.new(JRubySpec).build
196
+ verbose(true) {
197
+ mv Dir["*.gem"].first, "../pkg/#{JRUBY_PKG_DIR}.gem"
198
+ }
199
+ end
200
+ end
201
+
202
+ CLEAN.include JRUBY_PKG_DIR
203
+
168
204
  task :install do
169
205
  sh %{rake package}
170
206
  sh %{sudo gem install pkg/#{NAME}-#{VERS}}
@@ -0,0 +1,1340 @@
1
+
2
+ import java.io.IOException;
3
+
4
+ import org.jruby.Ruby;
5
+ import org.jruby.RubyClass;
6
+ import org.jruby.RubyHash;
7
+ import org.jruby.RubyModule;
8
+ import org.jruby.RubyNumeric;
9
+ import org.jruby.RubyString;
10
+ import org.jruby.runtime.Block;
11
+ import org.jruby.runtime.CallbackFactory;
12
+ import org.jruby.runtime.builtin.IRubyObject;
13
+ import org.jruby.exceptions.RaiseException;
14
+ import org.jruby.runtime.load.BasicLibraryService;
15
+
16
+ public class HpricotScanService implements BasicLibraryService {
17
+ public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
18
+
19
+ public void ELE(IRubyObject N) {
20
+ if (tokend > tokstart || text) {
21
+ IRubyObject raw_string = runtime.getNil();
22
+ ele_open = false; text = false;
23
+ if (tokstart != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
24
+ raw_string = runtime.newString(new String(buf,tokstart,tokend-tokstart));
25
+ }
26
+ rb_yield_tokens(N, tag[0], attr, raw_string, taint);
27
+ }
28
+ }
29
+
30
+ public void SET(IRubyObject[] N, int E) {
31
+ int mark = 0;
32
+ if(N == tag) {
33
+ if(mark_tag == -1 || E == mark_tag) {
34
+ tag[0] = runtime.newString("");
35
+ } else if(E > mark_tag) {
36
+ tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
37
+ }
38
+ } else if(N == akey) {
39
+ if(mark_akey == -1 || E == mark_akey) {
40
+ akey[0] = runtime.newString("");
41
+ } else if(E > mark_akey) {
42
+ akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
43
+ }
44
+ } else if(N == aval) {
45
+ if(mark_aval == -1 || E == mark_aval) {
46
+ aval[0] = runtime.newString("");
47
+ } else if(E > mark_aval) {
48
+ aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
49
+ }
50
+ }
51
+ }
52
+
53
+ public void CAT(IRubyObject[] N, int E) {
54
+ if(N[0].isNil()) {
55
+ SET(N,E);
56
+ } else {
57
+ int mark = 0;
58
+ if(N == tag) {
59
+ mark = mark_tag;
60
+ } else if(N == akey) {
61
+ mark = mark_akey;
62
+ } else if(N == aval) {
63
+ mark = mark_aval;
64
+ }
65
+ ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
66
+ }
67
+ }
68
+
69
+ public void SLIDE(Object N) {
70
+ int mark = 0;
71
+ if(N == tag) {
72
+ mark = mark_tag;
73
+ } else if(N == akey) {
74
+ mark = mark_akey;
75
+ } else if(N == aval) {
76
+ mark = mark_aval;
77
+ }
78
+ if(mark > tokstart) {
79
+ if(N == tag) {
80
+ mark_tag -= tokstart;
81
+ } else if(N == akey) {
82
+ mark_akey -= tokstart;
83
+ } else if(N == aval) {
84
+ mark_aval -= tokstart;
85
+ }
86
+ }
87
+ }
88
+
89
+ public void ATTR(IRubyObject K, IRubyObject V) {
90
+ if(!K.isNil()) {
91
+ if(attr.isNil()) {
92
+ attr = RubyHash.newHash(runtime);
93
+ }
94
+ ((RubyHash)attr).aset(K,V);
95
+ }
96
+ }
97
+
98
+ public void ATTR(IRubyObject[] K, IRubyObject V) {
99
+ ATTR(K[0],V);
100
+ }
101
+
102
+ public void ATTR(IRubyObject K, IRubyObject[] V) {
103
+ ATTR(K,V[0]);
104
+ }
105
+
106
+ public void ATTR(IRubyObject[] K, IRubyObject[] V) {
107
+ ATTR(K[0],V[0]);
108
+ }
109
+
110
+ public void TEXT_PASS() {
111
+ if(!text) {
112
+ if(ele_open) {
113
+ ele_open = false;
114
+ if(tokstart > -1) {
115
+ mark_tag = tokstart;
116
+ }
117
+ } else {
118
+ mark_tag = p;
119
+ }
120
+ attr = runtime.getNil();
121
+ tag[0] = runtime.getNil();
122
+ text = true;
123
+ }
124
+ }
125
+
126
+ public void EBLK(IRubyObject N, int T) {
127
+ CAT(tag, p - T + 1);
128
+ ELE(N);
129
+ }
130
+
131
+
132
+ public void rb_raise(RubyClass error, String message) {
133
+ throw new RaiseException(runtime, error, message, true);
134
+ }
135
+
136
+ public IRubyObject rb_str_new2(String s) {
137
+ return runtime.newString(s);
138
+ }
139
+
140
+
141
+
142
+
143
+ static final byte[] _hpricot_scan_actions = {
144
+ 0, 1, 1, 1, 2, 1, 4, 1,
145
+ 5, 1, 6, 1, 7, 1, 8, 1,
146
+ 9, 1, 10, 1, 11, 1, 12, 1,
147
+ 14, 1, 16, 1, 20, 1, 21, 1,
148
+ 22, 1, 24, 1, 25, 1, 26, 1,
149
+ 28, 1, 29, 1, 30, 1, 32, 1,
150
+ 33, 1, 38, 1, 39, 1, 40, 1,
151
+ 41, 1, 42, 1, 43, 1, 44, 1,
152
+ 45, 1, 46, 1, 47, 1, 48, 1,
153
+ 49, 1, 50, 2, 2, 5, 2, 2,
154
+ 6, 2, 2, 11, 2, 2, 12, 2,
155
+ 2, 14, 2, 4, 39, 2, 4, 40,
156
+ 2, 4, 41, 2, 5, 2, 2, 6,
157
+ 14, 2, 7, 6, 2, 7, 14, 2,
158
+ 11, 12, 2, 13, 3, 2, 14, 6,
159
+ 2, 14, 40, 2, 15, 24, 2, 15,
160
+ 28, 2, 15, 32, 2, 15, 45, 2,
161
+ 17, 23, 2, 18, 27, 2, 19, 31,
162
+ 2, 22, 34, 2, 22, 36, 3, 2,
163
+ 6, 14, 3, 2, 14, 6, 3, 6,
164
+ 7, 14, 3, 6, 14, 40, 3, 7,
165
+ 14, 40, 3, 14, 6, 40, 3, 14,
166
+ 13, 3, 3, 22, 0, 37, 3, 22,
167
+ 2, 34, 3, 22, 14, 35, 4, 2,
168
+ 14, 13, 3, 4, 6, 7, 14, 40,
169
+ 4, 22, 2, 14, 35, 4, 22, 6,
170
+ 14, 35, 4, 22, 7, 14, 35, 4,
171
+ 22, 14, 6, 35, 5, 22, 2, 6,
172
+ 14, 35, 5, 22, 2, 14, 6, 35,
173
+ 5, 22, 6, 7, 14, 35
174
+ };
175
+
176
+ static final short[] _hpricot_scan_key_offsets = {
177
+ 0, 3, 4, 5, 6, 7, 8, 9,
178
+ 10, 13, 22, 37, 44, 45, 46, 47,
179
+ 48, 49, 52, 57, 69, 81, 86, 93,
180
+ 94, 95, 100, 101, 105, 106, 107, 121,
181
+ 135, 152, 169, 186, 203, 210, 212, 214,
182
+ 220, 222, 227, 232, 238, 240, 245, 251,
183
+ 265, 266, 267, 268, 269, 270, 271, 272,
184
+ 273, 274, 275, 276, 282, 296, 300, 313,
185
+ 326, 340, 354, 355, 366, 375, 388, 405,
186
+ 423, 441, 450, 461, 480, 499, 510, 521,
187
+ 536, 538, 540, 556, 572, 575, 587, 599,
188
+ 619, 639, 658, 677, 697, 717, 728, 739,
189
+ 751, 763, 775, 791, 794, 809, 811, 813,
190
+ 829, 845, 848, 860, 871, 890, 910, 930,
191
+ 941, 952, 964, 984, 1004, 1016, 1036, 1057,
192
+ 1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
193
+ 1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
194
+ 1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
195
+ 1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
196
+ 1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
197
+ 1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
198
+ 1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
199
+ 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
200
+ 1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
201
+ 1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
202
+ 1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
203
+ 1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
204
+ 1634, 1636, 1637, 1640
205
+ };
206
+
207
+ static final char[] _hpricot_scan_trans_keys = {
208
+ 45, 68, 91, 45, 79, 67, 84, 89,
209
+ 80, 69, 32, 9, 13, 32, 58, 95,
210
+ 9, 13, 65, 90, 97, 122, 32, 62,
211
+ 63, 91, 95, 9, 13, 45, 46, 48,
212
+ 58, 65, 90, 97, 122, 32, 62, 80,
213
+ 83, 91, 9, 13, 85, 66, 76, 73,
214
+ 67, 32, 9, 13, 32, 34, 39, 9,
215
+ 13, 9, 34, 61, 95, 32, 37, 39,
216
+ 59, 63, 90, 97, 122, 9, 34, 61,
217
+ 95, 32, 37, 39, 59, 63, 90, 97,
218
+ 122, 32, 62, 91, 9, 13, 32, 34,
219
+ 39, 62, 91, 9, 13, 34, 34, 32,
220
+ 62, 91, 9, 13, 93, 32, 62, 9,
221
+ 13, 39, 39, 9, 39, 61, 95, 32,
222
+ 33, 35, 37, 40, 59, 63, 90, 97,
223
+ 122, 9, 39, 61, 95, 32, 33, 35,
224
+ 37, 40, 59, 63, 90, 97, 122, 9,
225
+ 32, 33, 39, 62, 91, 95, 10, 13,
226
+ 35, 37, 40, 59, 61, 90, 97, 122,
227
+ 9, 32, 34, 39, 62, 91, 95, 10,
228
+ 13, 33, 37, 40, 59, 61, 90, 97,
229
+ 122, 9, 32, 33, 39, 62, 91, 95,
230
+ 10, 13, 35, 37, 40, 59, 61, 90,
231
+ 97, 122, 9, 32, 34, 39, 62, 91,
232
+ 95, 10, 13, 33, 37, 40, 59, 61,
233
+ 90, 97, 122, 32, 34, 39, 62, 91,
234
+ 9, 13, 34, 39, 34, 39, 32, 39,
235
+ 62, 91, 9, 13, 39, 93, 32, 62,
236
+ 93, 9, 13, 32, 39, 62, 9, 13,
237
+ 32, 34, 62, 91, 9, 13, 34, 93,
238
+ 32, 34, 62, 9, 13, 32, 39, 62,
239
+ 91, 9, 13, 9, 39, 61, 95, 32,
240
+ 33, 35, 37, 40, 59, 63, 90, 97,
241
+ 122, 89, 83, 84, 69, 77, 67, 68,
242
+ 65, 84, 65, 91, 58, 95, 65, 90,
243
+ 97, 122, 32, 62, 63, 95, 9, 13,
244
+ 45, 46, 48, 58, 65, 90, 97, 122,
245
+ 32, 62, 9, 13, 32, 47, 62, 63,
246
+ 95, 9, 13, 45, 58, 65, 90, 97,
247
+ 122, 32, 47, 62, 63, 95, 9, 13,
248
+ 45, 58, 65, 90, 97, 122, 32, 47,
249
+ 61, 62, 63, 95, 9, 13, 45, 58,
250
+ 65, 90, 97, 122, 32, 47, 61, 62,
251
+ 63, 95, 9, 13, 45, 58, 65, 90,
252
+ 97, 122, 62, 13, 32, 34, 39, 47,
253
+ 60, 62, 9, 10, 11, 12, 13, 32,
254
+ 47, 60, 62, 9, 10, 11, 12, 32,
255
+ 47, 62, 63, 95, 9, 13, 45, 58,
256
+ 65, 90, 97, 122, 13, 32, 47, 60,
257
+ 62, 63, 95, 9, 10, 11, 12, 45,
258
+ 58, 65, 90, 97, 122, 13, 32, 47,
259
+ 60, 61, 62, 63, 95, 9, 10, 11,
260
+ 12, 45, 58, 65, 90, 97, 122, 13,
261
+ 32, 47, 60, 61, 62, 63, 95, 9,
262
+ 10, 11, 12, 45, 58, 65, 90, 97,
263
+ 122, 13, 32, 47, 60, 62, 9, 10,
264
+ 11, 12, 13, 32, 34, 39, 47, 60,
265
+ 62, 9, 10, 11, 12, 13, 32, 34,
266
+ 39, 47, 60, 62, 63, 95, 9, 10,
267
+ 11, 12, 45, 58, 65, 90, 97, 122,
268
+ 13, 32, 34, 39, 47, 60, 62, 63,
269
+ 95, 9, 10, 11, 12, 45, 58, 65,
270
+ 90, 97, 122, 13, 32, 34, 47, 60,
271
+ 62, 92, 9, 10, 11, 12, 13, 32,
272
+ 34, 47, 60, 62, 92, 9, 10, 11,
273
+ 12, 32, 34, 47, 62, 63, 92, 95,
274
+ 9, 13, 45, 58, 65, 90, 97, 122,
275
+ 34, 92, 34, 92, 32, 34, 47, 61,
276
+ 62, 63, 92, 95, 9, 13, 45, 58,
277
+ 65, 90, 97, 122, 32, 34, 47, 61,
278
+ 62, 63, 92, 95, 9, 13, 45, 58,
279
+ 65, 90, 97, 122, 34, 62, 92, 13,
280
+ 32, 34, 39, 47, 60, 62, 92, 9,
281
+ 10, 11, 12, 13, 32, 34, 39, 47,
282
+ 60, 62, 92, 9, 10, 11, 12, 13,
283
+ 32, 34, 39, 47, 60, 62, 63, 92,
284
+ 95, 9, 10, 11, 12, 45, 58, 65,
285
+ 90, 97, 122, 13, 32, 34, 39, 47,
286
+ 60, 62, 63, 92, 95, 9, 10, 11,
287
+ 12, 45, 58, 65, 90, 97, 122, 13,
288
+ 32, 34, 47, 60, 62, 63, 92, 95,
289
+ 9, 10, 11, 12, 45, 58, 65, 90,
290
+ 97, 122, 13, 32, 34, 47, 60, 62,
291
+ 63, 92, 95, 9, 10, 11, 12, 45,
292
+ 58, 65, 90, 97, 122, 13, 32, 34,
293
+ 47, 60, 61, 62, 63, 92, 95, 9,
294
+ 10, 11, 12, 45, 58, 65, 90, 97,
295
+ 122, 13, 32, 34, 47, 60, 61, 62,
296
+ 63, 92, 95, 9, 10, 11, 12, 45,
297
+ 58, 65, 90, 97, 122, 13, 32, 34,
298
+ 47, 60, 62, 92, 9, 10, 11, 12,
299
+ 13, 32, 34, 47, 60, 62, 92, 9,
300
+ 10, 11, 12, 13, 32, 34, 39, 47,
301
+ 60, 62, 92, 9, 10, 11, 12, 13,
302
+ 32, 34, 39, 47, 60, 62, 92, 9,
303
+ 10, 11, 12, 13, 32, 34, 39, 47,
304
+ 60, 62, 92, 9, 10, 11, 12, 32,
305
+ 34, 39, 47, 62, 63, 92, 95, 9,
306
+ 13, 45, 58, 65, 90, 97, 122, 34,
307
+ 39, 92, 32, 39, 47, 62, 63, 92,
308
+ 95, 9, 13, 45, 58, 65, 90, 97,
309
+ 122, 39, 92, 39, 92, 32, 39, 47,
310
+ 61, 62, 63, 92, 95, 9, 13, 45,
311
+ 58, 65, 90, 97, 122, 32, 39, 47,
312
+ 61, 62, 63, 92, 95, 9, 13, 45,
313
+ 58, 65, 90, 97, 122, 39, 62, 92,
314
+ 13, 32, 34, 39, 47, 60, 62, 92,
315
+ 9, 10, 11, 12, 13, 32, 39, 47,
316
+ 60, 62, 92, 9, 10, 11, 12, 13,
317
+ 32, 39, 47, 60, 62, 63, 92, 95,
318
+ 9, 10, 11, 12, 45, 58, 65, 90,
319
+ 97, 122, 13, 32, 39, 47, 60, 61,
320
+ 62, 63, 92, 95, 9, 10, 11, 12,
321
+ 45, 58, 65, 90, 97, 122, 13, 32,
322
+ 39, 47, 60, 61, 62, 63, 92, 95,
323
+ 9, 10, 11, 12, 45, 58, 65, 90,
324
+ 97, 122, 13, 32, 39, 47, 60, 62,
325
+ 92, 9, 10, 11, 12, 13, 32, 39,
326
+ 47, 60, 62, 92, 9, 10, 11, 12,
327
+ 13, 32, 34, 39, 47, 60, 62, 92,
328
+ 9, 10, 11, 12, 13, 32, 34, 39,
329
+ 47, 60, 62, 63, 92, 95, 9, 10,
330
+ 11, 12, 45, 58, 65, 90, 97, 122,
331
+ 13, 32, 34, 39, 47, 60, 62, 63,
332
+ 92, 95, 9, 10, 11, 12, 45, 58,
333
+ 65, 90, 97, 122, 13, 32, 34, 39,
334
+ 47, 60, 62, 92, 9, 10, 11, 12,
335
+ 13, 32, 34, 39, 47, 60, 62, 63,
336
+ 92, 95, 9, 10, 11, 12, 45, 58,
337
+ 65, 90, 97, 122, 13, 32, 34, 39,
338
+ 47, 60, 61, 62, 63, 92, 95, 9,
339
+ 10, 11, 12, 45, 58, 65, 90, 97,
340
+ 122, 32, 34, 39, 47, 61, 62, 63,
341
+ 92, 95, 9, 13, 45, 58, 65, 90,
342
+ 97, 122, 32, 34, 39, 47, 61, 62,
343
+ 63, 92, 95, 9, 13, 45, 58, 65,
344
+ 90, 97, 122, 34, 39, 62, 92, 34,
345
+ 39, 92, 13, 32, 34, 39, 47, 60,
346
+ 62, 92, 9, 10, 11, 12, 13, 32,
347
+ 34, 39, 47, 60, 62, 92, 9, 10,
348
+ 11, 12, 13, 32, 34, 39, 47, 60,
349
+ 62, 63, 92, 95, 9, 10, 11, 12,
350
+ 45, 58, 65, 90, 97, 122, 13, 32,
351
+ 34, 39, 47, 60, 62, 63, 92, 95,
352
+ 9, 10, 11, 12, 45, 58, 65, 90,
353
+ 97, 122, 13, 32, 34, 39, 47, 60,
354
+ 62, 63, 92, 95, 9, 10, 11, 12,
355
+ 45, 58, 65, 90, 97, 122, 13, 32,
356
+ 34, 39, 47, 60, 62, 92, 9, 10,
357
+ 11, 12, 13, 32, 34, 39, 47, 60,
358
+ 62, 92, 9, 10, 11, 12, 13, 32,
359
+ 34, 39, 47, 60, 62, 63, 92, 95,
360
+ 9, 10, 11, 12, 45, 58, 65, 90,
361
+ 97, 122, 32, 34, 39, 47, 62, 63,
362
+ 92, 95, 9, 13, 45, 58, 65, 90,
363
+ 97, 122, 32, 34, 39, 47, 62, 63,
364
+ 92, 95, 9, 13, 45, 58, 65, 90,
365
+ 97, 122, 13, 32, 34, 39, 47, 60,
366
+ 62, 92, 9, 10, 11, 12, 13, 32,
367
+ 34, 39, 47, 60, 61, 62, 63, 92,
368
+ 95, 9, 10, 11, 12, 45, 58, 65,
369
+ 90, 97, 122, 13, 32, 39, 47, 60,
370
+ 62, 63, 92, 95, 9, 10, 11, 12,
371
+ 45, 58, 65, 90, 97, 122, 34, 39,
372
+ 92, 32, 39, 47, 62, 63, 92, 95,
373
+ 9, 13, 45, 58, 65, 90, 97, 122,
374
+ 13, 32, 34, 39, 47, 60, 62, 92,
375
+ 9, 10, 11, 12, 32, 34, 47, 62,
376
+ 63, 92, 95, 9, 13, 45, 58, 65,
377
+ 90, 97, 122, 34, 39, 92, 13, 32,
378
+ 39, 47, 60, 62, 92, 9, 10, 11,
379
+ 12, 34, 92, 39, 92, 13, 32, 34,
380
+ 39, 47, 60, 62, 9, 10, 11, 12,
381
+ 58, 95, 120, 65, 90, 97, 122, 32,
382
+ 63, 95, 9, 13, 45, 46, 48, 58,
383
+ 65, 90, 97, 122, 32, 63, 95, 109,
384
+ 9, 13, 45, 46, 48, 58, 65, 90,
385
+ 97, 122, 32, 63, 95, 108, 9, 13,
386
+ 45, 46, 48, 58, 65, 90, 97, 122,
387
+ 32, 63, 95, 9, 13, 45, 46, 48,
388
+ 58, 65, 90, 97, 122, 101, 114, 115,
389
+ 105, 111, 110, 32, 61, 9, 13, 32,
390
+ 34, 39, 9, 13, 95, 45, 46, 48,
391
+ 58, 65, 90, 97, 122, 34, 95, 45,
392
+ 46, 48, 58, 65, 90, 97, 122, 32,
393
+ 62, 63, 9, 13, 32, 62, 63, 101,
394
+ 115, 9, 13, 62, 110, 99, 111, 100,
395
+ 105, 110, 103, 32, 61, 9, 13, 32,
396
+ 34, 39, 9, 13, 65, 90, 97, 122,
397
+ 34, 95, 45, 46, 48, 57, 65, 90,
398
+ 97, 122, 32, 62, 63, 9, 13, 32,
399
+ 62, 63, 115, 9, 13, 116, 97, 110,
400
+ 100, 97, 108, 111, 110, 101, 32, 61,
401
+ 9, 13, 32, 34, 39, 9, 13, 110,
402
+ 121, 111, 34, 32, 62, 63, 9, 13,
403
+ 101, 115, 110, 121, 111, 39, 101, 115,
404
+ 65, 90, 97, 122, 39, 95, 45, 46,
405
+ 48, 57, 65, 90, 97, 122, 95, 45,
406
+ 46, 48, 58, 65, 90, 97, 122, 39,
407
+ 95, 45, 46, 48, 58, 65, 90, 97,
408
+ 122, 62, 62, 10, 60, 33, 47, 58,
409
+ 63, 95, 65, 90, 97, 122, 39, 93,
410
+ 34, 34, 92, 39, 92, 34, 39, 92,
411
+ 32, 9, 13, 32, 118, 9, 13, 10,
412
+ 45, 45, 10, 93, 93, 10, 62, 63,
413
+ 62, 0
414
+ };
415
+
416
+ static final byte[] _hpricot_scan_single_lengths = {
417
+ 3, 1, 1, 1, 1, 1, 1, 1,
418
+ 1, 3, 5, 5, 1, 1, 1, 1,
419
+ 1, 1, 3, 4, 4, 3, 5, 1,
420
+ 1, 3, 1, 2, 1, 1, 4, 4,
421
+ 7, 7, 7, 7, 5, 2, 2, 4,
422
+ 2, 3, 3, 4, 2, 3, 4, 4,
423
+ 1, 1, 1, 1, 1, 1, 1, 1,
424
+ 1, 1, 1, 2, 4, 2, 5, 5,
425
+ 6, 6, 1, 7, 5, 5, 7, 8,
426
+ 8, 5, 7, 9, 9, 7, 7, 7,
427
+ 2, 2, 8, 8, 3, 8, 8, 10,
428
+ 10, 9, 9, 10, 10, 7, 7, 8,
429
+ 8, 8, 8, 3, 7, 2, 2, 8,
430
+ 8, 3, 8, 7, 9, 10, 10, 7,
431
+ 7, 8, 10, 10, 8, 10, 11, 9,
432
+ 9, 4, 3, 8, 8, 10, 10, 10,
433
+ 8, 8, 10, 8, 8, 8, 11, 9,
434
+ 3, 7, 8, 7, 3, 7, 2, 2,
435
+ 7, 3, 3, 4, 4, 3, 1, 1,
436
+ 1, 1, 1, 1, 2, 3, 1, 2,
437
+ 3, 5, 1, 1, 1, 1, 1, 1,
438
+ 1, 1, 2, 3, 0, 2, 3, 4,
439
+ 1, 1, 1, 1, 1, 1, 1, 1,
440
+ 1, 2, 3, 2, 1, 1, 3, 1,
441
+ 1, 2, 1, 1, 1, 1, 0, 2,
442
+ 1, 2, 1, 1, 2, 5, 1, 1,
443
+ 1, 2, 2, 3, 1, 2, 2, 1,
444
+ 2, 1, 3, 1
445
+ };
446
+
447
+ static final byte[] _hpricot_scan_range_lengths = {
448
+ 0, 0, 0, 0, 0, 0, 0, 0,
449
+ 1, 3, 5, 1, 0, 0, 0, 0,
450
+ 0, 1, 1, 4, 4, 1, 1, 0,
451
+ 0, 1, 0, 1, 0, 0, 5, 5,
452
+ 5, 5, 5, 5, 1, 0, 0, 1,
453
+ 0, 1, 1, 1, 0, 1, 1, 5,
454
+ 0, 0, 0, 0, 0, 0, 0, 0,
455
+ 0, 0, 0, 2, 5, 1, 4, 4,
456
+ 4, 4, 0, 2, 2, 4, 5, 5,
457
+ 5, 2, 2, 5, 5, 2, 2, 4,
458
+ 0, 0, 4, 4, 0, 2, 2, 5,
459
+ 5, 5, 5, 5, 5, 2, 2, 2,
460
+ 2, 2, 4, 0, 4, 0, 0, 4,
461
+ 4, 0, 2, 2, 5, 5, 5, 2,
462
+ 2, 2, 5, 5, 2, 5, 5, 4,
463
+ 4, 0, 0, 2, 2, 5, 5, 5,
464
+ 2, 2, 5, 4, 4, 2, 5, 5,
465
+ 0, 4, 2, 4, 0, 2, 0, 0,
466
+ 2, 2, 5, 5, 5, 5, 0, 0,
467
+ 0, 0, 0, 0, 1, 1, 4, 4,
468
+ 1, 1, 0, 0, 0, 0, 0, 0,
469
+ 0, 0, 1, 1, 2, 4, 1, 1,
470
+ 0, 0, 0, 0, 0, 0, 0, 0,
471
+ 0, 1, 1, 0, 0, 0, 1, 0,
472
+ 0, 0, 0, 0, 0, 0, 2, 4,
473
+ 4, 4, 0, 0, 0, 2, 0, 0,
474
+ 0, 0, 0, 0, 1, 1, 0, 0,
475
+ 0, 0, 0, 0
476
+ };
477
+
478
+ static final short[] _hpricot_scan_index_offsets = {
479
+ 0, 4, 6, 8, 10, 12, 14, 16,
480
+ 18, 21, 28, 39, 46, 48, 50, 52,
481
+ 54, 56, 59, 64, 73, 82, 87, 94,
482
+ 96, 98, 103, 105, 109, 111, 113, 123,
483
+ 133, 146, 159, 172, 185, 192, 195, 198,
484
+ 204, 207, 212, 217, 223, 226, 231, 237,
485
+ 247, 249, 251, 253, 255, 257, 259, 261,
486
+ 263, 265, 267, 269, 274, 284, 288, 298,
487
+ 308, 319, 330, 332, 342, 350, 360, 373,
488
+ 387, 401, 409, 419, 434, 449, 459, 469,
489
+ 481, 484, 487, 500, 513, 517, 528, 539,
490
+ 555, 571, 586, 601, 617, 633, 643, 653,
491
+ 664, 675, 686, 699, 703, 715, 718, 721,
492
+ 734, 747, 751, 762, 772, 787, 803, 819,
493
+ 829, 839, 850, 866, 882, 893, 909, 926,
494
+ 940, 954, 959, 963, 974, 985, 1001, 1017,
495
+ 1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
496
+ 1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
497
+ 1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
498
+ 1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
499
+ 1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
500
+ 1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
501
+ 1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
502
+ 1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
503
+ 1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
504
+ 1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
505
+ 1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
506
+ 1465, 1468, 1470, 1474
507
+ };
508
+
509
+ static final short[] _hpricot_scan_indicies = {
510
+ 335, 336, 337, 296, 356, 296, 349, 296,
511
+ 399, 296, 401, 296, 354, 296, 350, 296,
512
+ 400, 296, 308, 308, 296, 308, 309, 309,
513
+ 308, 309, 309, 296, 328, 330, 329, 331,
514
+ 329, 328, 329, 329, 329, 329, 296, 310,
515
+ 302, 311, 312, 0, 310, 296, 353, 296,
516
+ 342, 296, 347, 296, 346, 296, 343, 296,
517
+ 304, 304, 296, 304, 305, 306, 304, 296,
518
+ 321, 320, 321, 321, 321, 321, 321, 321,
519
+ 296, 319, 320, 319, 319, 319, 319, 319,
520
+ 319, 296, 298, 302, 0, 298, 296, 298,
521
+ 300, 307, 302, 0, 298, 296, 6, 222,
522
+ 6, 13, 358, 302, 0, 358, 69, 1,
523
+ 0, 1, 302, 1, 69, 6, 182, 6,
524
+ 5, 322, 323, 322, 322, 322, 322, 322,
525
+ 322, 322, 296, 299, 303, 299, 299, 299,
526
+ 299, 299, 299, 299, 296, 297, 297, 299,
527
+ 303, 302, 0, 299, 298, 299, 299, 299,
528
+ 299, 296, 297, 297, 300, 301, 302, 0,
529
+ 299, 298, 299, 299, 299, 299, 296, 186,
530
+ 186, 188, 42, 184, 185, 188, 187, 188,
531
+ 188, 188, 188, 182, 43, 43, 38, 44,
532
+ 40, 34, 41, 37, 41, 41, 41, 41,
533
+ 5, 37, 38, 39, 40, 34, 37, 5,
534
+ 63, 224, 223, 63, 64, 62, 371, 6,
535
+ 40, 34, 371, 5, 35, 36, 34, 26,
536
+ 27, 1, 26, 0, 36, 6, 40, 36,
537
+ 5, 60, 6, 61, 58, 60, 13, 35,
538
+ 59, 58, 59, 6, 61, 59, 13, 183,
539
+ 6, 184, 185, 183, 182, 41, 42, 41,
540
+ 41, 41, 41, 41, 41, 41, 5, 403,
541
+ 296, 351, 296, 352, 296, 345, 296, 348,
542
+ 296, 398, 296, 344, 296, 341, 296, 402,
543
+ 296, 397, 296, 355, 296, 338, 338, 338,
544
+ 338, 296, 332, 334, 333, 333, 332, 333,
545
+ 333, 333, 333, 296, 313, 314, 313, 296,
546
+ 324, 326, 327, 325, 325, 324, 325, 325,
547
+ 325, 296, 315, 317, 318, 316, 316, 315,
548
+ 316, 316, 316, 296, 364, 366, 367, 368,
549
+ 365, 365, 364, 365, 365, 365, 69, 359,
550
+ 361, 362, 162, 360, 360, 359, 360, 360,
551
+ 360, 69, 369, 69, 157, 157, 159, 160,
552
+ 161, 69, 162, 157, 158, 156, 66, 66,
553
+ 68, 69, 70, 66, 67, 65, 363, 361,
554
+ 162, 360, 360, 363, 360, 360, 360, 69,
555
+ 66, 66, 74, 69, 76, 73, 73, 66,
556
+ 67, 73, 73, 73, 65, 132, 132, 135,
557
+ 69, 136, 137, 134, 134, 132, 133, 134,
558
+ 134, 134, 65, 71, 71, 74, 69, 75,
559
+ 76, 73, 73, 71, 72, 73, 73, 73,
560
+ 65, 66, 66, 68, 69, 70, 66, 67,
561
+ 65, 226, 226, 228, 229, 230, 69, 70,
562
+ 226, 227, 156, 163, 163, 159, 160, 161,
563
+ 69, 162, 165, 165, 163, 164, 165, 165,
564
+ 165, 156, 226, 226, 228, 229, 231, 69,
565
+ 76, 165, 165, 226, 227, 165, 165, 165,
566
+ 156, 248, 248, 84, 246, 199, 250, 195,
567
+ 248, 249, 189, 92, 92, 84, 95, 7,
568
+ 96, 97, 92, 93, 91, 372, 3, 48,
569
+ 50, 47, 8, 47, 372, 47, 47, 47,
570
+ 7, 3, 8, 7, 11, 8, 7, 122,
571
+ 3, 124, 125, 126, 123, 8, 123, 122,
572
+ 123, 123, 123, 7, 46, 3, 48, 49,
573
+ 50, 47, 8, 47, 46, 47, 47, 47,
574
+ 7, 3, 45, 8, 7, 190, 190, 192,
575
+ 193, 194, 7, 50, 195, 190, 191, 189,
576
+ 196, 196, 192, 193, 194, 7, 50, 195,
577
+ 196, 197, 189, 196, 196, 192, 193, 194,
578
+ 7, 50, 198, 195, 198, 196, 197, 198,
579
+ 198, 198, 189, 242, 242, 244, 245, 247,
580
+ 7, 103, 198, 195, 198, 242, 243, 198,
581
+ 198, 198, 189, 248, 248, 84, 247, 199,
582
+ 251, 198, 195, 198, 248, 249, 198, 198,
583
+ 198, 189, 92, 92, 84, 101, 7, 103,
584
+ 100, 97, 100, 92, 93, 100, 100, 100,
585
+ 91, 144, 144, 84, 147, 7, 148, 149,
586
+ 146, 97, 146, 144, 145, 146, 146, 146,
587
+ 91, 98, 98, 84, 101, 7, 102, 103,
588
+ 100, 97, 100, 98, 99, 100, 100, 100,
589
+ 91, 92, 92, 84, 95, 7, 96, 97,
590
+ 92, 93, 91, 92, 92, 94, 95, 7,
591
+ 96, 97, 92, 93, 91, 242, 242, 244,
592
+ 245, 246, 7, 96, 195, 242, 243, 189,
593
+ 258, 258, 263, 94, 256, 215, 261, 211,
594
+ 258, 259, 205, 105, 105, 80, 94, 108,
595
+ 9, 109, 110, 105, 106, 104, 373, 10,
596
+ 11, 55, 57, 54, 12, 54, 373, 54,
597
+ 54, 54, 9, 10, 11, 12, 9, 370,
598
+ 3, 31, 33, 30, 4, 30, 370, 30,
599
+ 30, 30, 2, 3, 4, 2, 10, 4,
600
+ 2, 117, 3, 119, 120, 121, 118, 4,
601
+ 118, 117, 118, 118, 118, 2, 29, 3,
602
+ 31, 32, 33, 30, 4, 30, 29, 30,
603
+ 30, 30, 2, 3, 28, 4, 2, 167,
604
+ 167, 169, 170, 171, 2, 33, 172, 167,
605
+ 168, 166, 78, 78, 84, 81, 2, 82,
606
+ 83, 78, 79, 77, 78, 78, 84, 88,
607
+ 2, 90, 87, 83, 87, 78, 79, 87,
608
+ 87, 87, 77, 138, 138, 84, 141, 2,
609
+ 142, 143, 140, 83, 140, 138, 139, 140,
610
+ 140, 140, 77, 85, 85, 84, 88, 2,
611
+ 89, 90, 87, 83, 87, 85, 86, 87,
612
+ 87, 87, 77, 78, 78, 84, 81, 2,
613
+ 82, 83, 78, 79, 77, 78, 78, 80,
614
+ 81, 2, 82, 83, 78, 79, 77, 232,
615
+ 232, 234, 235, 236, 2, 82, 172, 232,
616
+ 233, 166, 173, 173, 169, 170, 171, 2,
617
+ 33, 175, 172, 175, 173, 174, 175, 175,
618
+ 175, 166, 232, 232, 234, 235, 237, 2,
619
+ 90, 175, 172, 175, 232, 233, 175, 175,
620
+ 175, 166, 258, 258, 80, 260, 256, 215,
621
+ 261, 211, 258, 259, 205, 105, 105, 80,
622
+ 94, 114, 9, 116, 113, 110, 113, 105,
623
+ 106, 113, 113, 113, 104, 150, 150, 80,
624
+ 94, 153, 9, 154, 155, 152, 110, 152,
625
+ 150, 151, 152, 152, 152, 104, 53, 10,
626
+ 11, 55, 56, 57, 54, 12, 54, 53,
627
+ 54, 54, 54, 9, 127, 10, 11, 129,
628
+ 130, 131, 128, 12, 128, 127, 128, 128,
629
+ 128, 9, 10, 11, 52, 12, 9, 51,
630
+ 51, 12, 9, 206, 206, 208, 209, 210,
631
+ 9, 57, 211, 206, 207, 205, 212, 212,
632
+ 208, 209, 210, 9, 57, 211, 212, 213,
633
+ 205, 212, 212, 208, 209, 210, 9, 57,
634
+ 214, 211, 214, 212, 213, 214, 214, 214,
635
+ 205, 252, 252, 254, 255, 257, 9, 116,
636
+ 214, 211, 214, 252, 253, 214, 214, 214,
637
+ 205, 258, 258, 80, 260, 257, 215, 262,
638
+ 214, 211, 214, 258, 259, 214, 214, 214,
639
+ 205, 105, 105, 80, 94, 108, 9, 109,
640
+ 110, 105, 106, 104, 105, 105, 107, 107,
641
+ 108, 9, 109, 110, 105, 106, 104, 258,
642
+ 258, 263, 94, 257, 215, 262, 214, 211,
643
+ 214, 258, 259, 214, 214, 214, 205, 218,
644
+ 10, 216, 220, 221, 219, 217, 219, 218,
645
+ 219, 219, 219, 215, 218, 225, 11, 220,
646
+ 221, 219, 217, 219, 218, 219, 219, 219,
647
+ 215, 252, 252, 254, 255, 256, 9, 109,
648
+ 211, 252, 253, 205, 111, 111, 80, 94,
649
+ 114, 9, 115, 116, 113, 110, 113, 111,
650
+ 112, 113, 113, 113, 104, 238, 238, 84,
651
+ 237, 176, 241, 175, 172, 175, 238, 239,
652
+ 175, 175, 175, 166, 10, 216, 217, 215,
653
+ 178, 3, 180, 181, 179, 177, 179, 178,
654
+ 179, 179, 179, 176, 173, 173, 169, 170,
655
+ 171, 2, 33, 172, 173, 174, 166, 201,
656
+ 3, 203, 204, 202, 200, 202, 201, 202,
657
+ 202, 202, 199, 225, 11, 217, 215, 238,
658
+ 238, 84, 236, 176, 240, 172, 238, 239,
659
+ 166, 3, 200, 199, 3, 177, 176, 163,
660
+ 163, 159, 160, 161, 69, 162, 163, 164,
661
+ 156, 339, 339, 340, 339, 339, 296, 15,
662
+ 357, 357, 15, 357, 357, 357, 357, 296,
663
+ 15, 357, 357, 408, 15, 357, 357, 357,
664
+ 357, 296, 15, 357, 357, 404, 15, 357,
665
+ 357, 357, 357, 296, 16, 357, 357, 16,
666
+ 357, 357, 357, 357, 296, 287, 264, 294,
667
+ 264, 396, 264, 387, 264, 393, 264, 268,
668
+ 264, 268, 265, 268, 264, 265, 266, 267,
669
+ 265, 264, 282, 282, 282, 282, 282, 264,
670
+ 275, 276, 276, 276, 276, 276, 264, 269,
671
+ 270, 271, 269, 264, 269, 270, 271, 272,
672
+ 273, 269, 264, 270, 264, 388, 264, 285,
673
+ 264, 394, 264, 385, 264, 289, 264, 390,
674
+ 264, 288, 264, 288, 374, 288, 264, 374,
675
+ 375, 376, 374, 264, 283, 283, 264, 277,
676
+ 278, 278, 278, 278, 278, 264, 274, 270,
677
+ 271, 274, 264, 274, 270, 271, 273, 274,
678
+ 264, 295, 264, 384, 264, 389, 264, 286,
679
+ 264, 284, 264, 290, 264, 395, 264, 391,
680
+ 264, 380, 264, 380, 377, 380, 264, 377,
681
+ 378, 379, 377, 264, 291, 292, 264, 293,
682
+ 264, 279, 264, 381, 270, 271, 381, 264,
683
+ 386, 264, 293, 264, 405, 406, 264, 392,
684
+ 264, 279, 264, 407, 264, 392, 264, 383,
685
+ 383, 264, 277, 281, 281, 281, 281, 281,
686
+ 264, 382, 382, 382, 382, 382, 264, 275,
687
+ 280, 280, 280, 280, 280, 264, 415, 414,
688
+ 422, 421, 24, 25, 23, 19, 20, 21,
689
+ 22, 21, 21, 21, 18, 6, 5, 1,
690
+ 0, 6, 13, 3, 8, 7, 3, 4,
691
+ 2, 10, 11, 12, 9, 15, 15, 14,
692
+ 16, 17, 16, 14, 412, 413, 411, 410,
693
+ 409, 419, 420, 418, 417, 416, 426, 424,
694
+ 427, 425, 424, 423, 0
695
+ };
696
+
697
+ static final short[] _hpricot_scan_trans_targs_wi = {
698
+ 26, 27, 101, 69, 102, 29, 25, 80,
699
+ 81, 99, 100, 79, 122, 24, 204, 212,
700
+ 213, 150, 204, 0, 59, 62, 145, 204,
701
+ 204, 205, 41, 207, 210, 104, 103, 105,
702
+ 106, 210, 40, 41, 42, 36, 37, 46,
703
+ 206, 47, 32, 35, 34, 209, 83, 82,
704
+ 84, 85, 209, 98, 211, 119, 120, 121,
705
+ 123, 211, 44, 45, 43, 208, 38, 39,
706
+ 43, 68, 69, 70, 73, 204, 204, 65,
707
+ 72, 71, 73, 74, 204, 107, 100, 108,
708
+ 108, 111, 210, 112, 70, 104, 110, 109,
709
+ 111, 113, 210, 78, 79, 90, 90, 93,
710
+ 209, 94, 83, 92, 91, 93, 95, 209,
711
+ 97, 98, 117, 117, 128, 211, 129, 119,
712
+ 134, 118, 128, 133, 211, 104, 103, 105,
713
+ 106, 210, 83, 82, 84, 85, 209, 119,
714
+ 120, 121, 123, 211, 65, 72, 71, 73,
715
+ 74, 204, 104, 110, 109, 111, 113, 210,
716
+ 83, 92, 91, 93, 95, 209, 119, 134,
717
+ 118, 128, 133, 211, 68, 144, 74, 142,
718
+ 143, 73, 204, 75, 76, 71, 107, 138,
719
+ 113, 136, 137, 111, 112, 114, 115, 109,
720
+ 101, 102, 100, 103, 105, 210, 29, 39,
721
+ 206, 40, 35, 36, 47, 78, 86, 95,
722
+ 139, 140, 93, 94, 87, 88, 91, 80,
723
+ 81, 79, 82, 84, 209, 97, 124, 133,
724
+ 131, 132, 128, 129, 125, 126, 118, 99,
725
+ 79, 122, 98, 120, 121, 211, 24, 38,
726
+ 43, 100, 75, 76, 77, 141, 73, 73,
727
+ 114, 115, 116, 135, 111, 111, 100, 108,
728
+ 210, 210, 87, 88, 89, 96, 93, 93,
729
+ 79, 90, 209, 209, 125, 126, 127, 130,
730
+ 128, 128, 98, 117, 90, 211, 211, 108,
731
+ 204, 157, 158, 200, 156, 161, 204, 162,
732
+ 163, 176, 175, 160, 159, 174, 173, 190,
733
+ 201, 199, 159, 173, 181, 165, 180, 151,
734
+ 170, 168, 182, 188, 191, 189, 152, 177,
735
+ 204, 33, 22, 31, 23, 34, 204, 32,
736
+ 18, 19, 30, 28, 9, 10, 11, 12,
737
+ 48, 61, 204, 63, 64, 66, 204, 20,
738
+ 21, 20, 31, 32, 63, 62, 66, 204,
739
+ 11, 10, 204, 26, 61, 60, 204, 1,
740
+ 2, 53, 60, 146, 147, 56, 14, 17,
741
+ 55, 52, 16, 15, 21, 3, 7, 50,
742
+ 51, 13, 6, 204, 204, 146, 25, 65,
743
+ 64, 66, 67, 69, 65, 64, 66, 67,
744
+ 204, 204, 100, 39, 79, 98, 171, 172,
745
+ 198, 186, 187, 193, 185, 190, 201, 199,
746
+ 178, 167, 192, 154, 164, 179, 169, 184,
747
+ 195, 155, 166, 183, 153, 58, 54, 4,
748
+ 8, 5, 57, 49, 149, 194, 196, 197,
749
+ 148, 214, 202, 214, 214, 215, 214, 214,
750
+ 216, 203, 216, 216, 217, 216, 216, 218,
751
+ 218, 218, 218, 219
752
+ };
753
+
754
+ static final short[] _hpricot_scan_trans_actions_wi = {
755
+ 0, 0, 0, 7, 0, 0, 21, 0,
756
+ 0, 0, 7, 7, 0, 0, 65, 0,
757
+ 31, 0, 67, 0, 0, 1, 0, 63,
758
+ 132, 178, 0, 144, 147, 0, 174, 23,
759
+ 0, 186, 0, 21, 0, 0, 0, 21,
760
+ 144, 0, 111, 0, 111, 147, 0, 174,
761
+ 23, 0, 186, 7, 147, 0, 174, 23,
762
+ 0, 186, 0, 0, 0, 144, 0, 21,
763
+ 21, 0, 9, 9, 102, 73, 162, 9,
764
+ 9, 174, 117, 0, 170, 0, 9, 9,
765
+ 7, 102, 205, 0, 7, 9, 9, 174,
766
+ 117, 0, 215, 0, 9, 9, 7, 102,
767
+ 205, 0, 9, 9, 174, 117, 0, 215,
768
+ 0, 9, 9, 7, 102, 205, 0, 9,
769
+ 9, 174, 117, 0, 215, 11, 0, 108,
770
+ 11, 210, 11, 0, 108, 11, 210, 11,
771
+ 0, 108, 11, 210, 105, 105, 0, 158,
772
+ 11, 195, 105, 105, 0, 158, 11, 232,
773
+ 105, 105, 0, 158, 11, 232, 105, 105,
774
+ 0, 158, 11, 232, 3, 3, 3, 0,
775
+ 0, 87, 120, 3, 3, 190, 3, 3,
776
+ 3, 0, 7, 87, 3, 3, 3, 190,
777
+ 3, 3, 3, 190, 87, 200, 3, 3,
778
+ 182, 3, 3, 3, 3, 3, 3, 3,
779
+ 7, 0, 87, 3, 3, 3, 190, 3,
780
+ 3, 3, 190, 87, 200, 3, 3, 3,
781
+ 7, 7, 87, 3, 3, 3, 190, 3,
782
+ 75, 3, 3, 190, 87, 200, 3, 3,
783
+ 84, 99, 78, 78, 0, 0, 150, 154,
784
+ 78, 78, 0, 7, 150, 154, 78, 78,
785
+ 220, 226, 78, 78, 7, 0, 150, 154,
786
+ 78, 78, 220, 226, 78, 78, 7, 7,
787
+ 150, 154, 78, 78, 75, 220, 226, 99,
788
+ 69, 0, 0, 0, 0, 0, 49, 0,
789
+ 0, 0, 0, 13, 0, 15, 0, 17,
790
+ 0, 0, 3, 3, 0, 0, 0, 0,
791
+ 0, 0, 0, 3, 3, 0, 0, 0,
792
+ 71, 0, 0, 0, 0, 19, 51, 19,
793
+ 0, 0, 0, 0, 0, 1, 0, 0,
794
+ 0, 0, 55, 0, 114, 0, 53, 0,
795
+ 19, 3, 3, 81, 5, 0, 5, 93,
796
+ 5, 0, 90, 5, 5, 0, 96, 0,
797
+ 0, 0, 1, 25, 25, 0, 0, 0,
798
+ 0, 0, 0, 0, 0, 0, 0, 0,
799
+ 0, 0, 0, 61, 59, 0, 0, 0,
800
+ 174, 23, 0, 0, 11, 0, 108, 11,
801
+ 166, 57, 0, 0, 0, 0, 0, 0,
802
+ 0, 0, 0, 0, 0, 0, 3, 3,
803
+ 0, 0, 0, 0, 0, 0, 0, 0,
804
+ 0, 0, 0, 0, 0, 0, 0, 0,
805
+ 0, 0, 0, 0, 0, 3, 3, 0,
806
+ 0, 35, 0, 33, 123, 31, 37, 135,
807
+ 41, 0, 39, 126, 31, 43, 138, 47,
808
+ 141, 45, 129, 0
809
+ };
810
+
811
+ static final short[] _hpricot_scan_to_state_actions = {
812
+ 0, 0, 0, 0, 0, 0, 0, 0,
813
+ 0, 0, 0, 0, 0, 0, 0, 0,
814
+ 0, 0, 0, 0, 0, 0, 0, 0,
815
+ 0, 0, 0, 0, 0, 0, 0, 0,
816
+ 0, 0, 0, 0, 0, 0, 0, 0,
817
+ 0, 0, 0, 0, 0, 0, 0, 0,
818
+ 0, 0, 0, 0, 0, 0, 0, 0,
819
+ 0, 0, 0, 0, 0, 0, 0, 0,
820
+ 0, 0, 0, 0, 0, 0, 0, 0,
821
+ 0, 0, 0, 0, 0, 0, 0, 0,
822
+ 0, 0, 0, 0, 0, 0, 0, 0,
823
+ 0, 0, 0, 0, 0, 0, 0, 0,
824
+ 0, 0, 0, 0, 0, 0, 0, 0,
825
+ 0, 0, 0, 0, 0, 0, 0, 0,
826
+ 0, 0, 0, 0, 0, 0, 0, 0,
827
+ 0, 0, 0, 0, 0, 0, 0, 0,
828
+ 0, 0, 0, 0, 0, 0, 0, 0,
829
+ 0, 0, 0, 0, 0, 0, 0, 0,
830
+ 0, 0, 0, 0, 0, 0, 0, 0,
831
+ 0, 0, 0, 0, 0, 0, 0, 0,
832
+ 0, 0, 0, 0, 0, 0, 0, 0,
833
+ 0, 0, 0, 0, 0, 0, 0, 0,
834
+ 0, 0, 0, 0, 0, 0, 0, 0,
835
+ 0, 0, 0, 0, 0, 0, 0, 0,
836
+ 0, 0, 0, 0, 0, 0, 0, 0,
837
+ 0, 0, 0, 0, 27, 0, 0, 0,
838
+ 0, 0, 0, 0, 0, 0, 27, 0,
839
+ 27, 0, 27, 0
840
+ };
841
+
842
+ static final short[] _hpricot_scan_from_state_actions = {
843
+ 0, 0, 0, 0, 0, 0, 0, 0,
844
+ 0, 0, 0, 0, 0, 0, 0, 0,
845
+ 0, 0, 0, 0, 0, 0, 0, 0,
846
+ 0, 0, 0, 0, 0, 0, 0, 0,
847
+ 0, 0, 0, 0, 0, 0, 0, 0,
848
+ 0, 0, 0, 0, 0, 0, 0, 0,
849
+ 0, 0, 0, 0, 0, 0, 0, 0,
850
+ 0, 0, 0, 0, 0, 0, 0, 0,
851
+ 0, 0, 0, 0, 0, 0, 0, 0,
852
+ 0, 0, 0, 0, 0, 0, 0, 0,
853
+ 0, 0, 0, 0, 0, 0, 0, 0,
854
+ 0, 0, 0, 0, 0, 0, 0, 0,
855
+ 0, 0, 0, 0, 0, 0, 0, 0,
856
+ 0, 0, 0, 0, 0, 0, 0, 0,
857
+ 0, 0, 0, 0, 0, 0, 0, 0,
858
+ 0, 0, 0, 0, 0, 0, 0, 0,
859
+ 0, 0, 0, 0, 0, 0, 0, 0,
860
+ 0, 0, 0, 0, 0, 0, 0, 0,
861
+ 0, 0, 0, 0, 0, 0, 0, 0,
862
+ 0, 0, 0, 0, 0, 0, 0, 0,
863
+ 0, 0, 0, 0, 0, 0, 0, 0,
864
+ 0, 0, 0, 0, 0, 0, 0, 0,
865
+ 0, 0, 0, 0, 0, 0, 0, 0,
866
+ 0, 0, 0, 0, 0, 0, 0, 0,
867
+ 0, 0, 0, 0, 0, 0, 0, 0,
868
+ 0, 0, 0, 0, 29, 0, 0, 0,
869
+ 0, 0, 0, 0, 0, 0, 29, 0,
870
+ 29, 0, 29, 0
871
+ };
872
+
873
+ static final int hpricot_scan_start = 204;
874
+
875
+ static final int hpricot_scan_error = -1;
876
+
877
+
878
+ public final static int BUFSIZE=16384;
879
+
880
+ private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
881
+ IRubyObject ary;
882
+ if (sym == runtime.newSymbol("text")) {
883
+ raw = tag;
884
+ }
885
+ ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
886
+ if (taint) {
887
+ ary.setTaint(true);
888
+ tag.setTaint(true);
889
+ attr.setTaint(true);
890
+ raw.setTaint(true);
891
+ }
892
+ block.yield(runtime.getCurrentContext(), ary, null, null, false);
893
+ }
894
+
895
+
896
+ int cs, act, have = 0, nread = 0, curline = 1, p=-1;
897
+ boolean text = false;
898
+ int tokstart=-1, tokend;
899
+ char[] buf;
900
+ Ruby runtime;
901
+ IRubyObject attr, bufsize;
902
+ IRubyObject[] tag, akey, aval;
903
+ int mark_tag, mark_akey, mark_aval;
904
+ boolean done = false, ele_open = false;
905
+ int buffer_size = 0;
906
+ boolean taint = false;
907
+ Block block = null;
908
+
909
+
910
+ IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
911
+ cdata, sym_text;
912
+
913
+ IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
914
+ attr = bufsize = runtime.getNil();
915
+ tag = new IRubyObject[]{runtime.getNil()};
916
+ akey = new IRubyObject[]{runtime.getNil()};
917
+ aval = new IRubyObject[]{runtime.getNil()};
918
+
919
+ RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
920
+
921
+ taint = port.isTaint();
922
+ if ( !port.respondsTo("read")) {
923
+ if ( port.respondsTo("to_str")) {
924
+ port = port.callMethod(runtime.getCurrentContext(),"to_str");
925
+ } else {
926
+ throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
927
+ }
928
+ }
929
+
930
+ buffer_size = BUFSIZE;
931
+ if (recv.getInstanceVariable("@buffer_size") != null) {
932
+ bufsize = recv.getInstanceVariable("@buffer_size");
933
+ if (!bufsize.isNil()) {
934
+ buffer_size = RubyNumeric.fix2int(bufsize);
935
+ }
936
+ }
937
+ buf = new char[buffer_size];
938
+
939
+
940
+ {
941
+ cs = hpricot_scan_start;
942
+ tokstart = -1;
943
+ tokend = -1;
944
+ act = 0;
945
+ }
946
+
947
+ while( !done ) {
948
+ IRubyObject str;
949
+ p = have;
950
+ int pe;
951
+ int len, space = buffer_size - have;
952
+
953
+ if ( space == 0 ) {
954
+ /* We've used up the entire buffer storing an already-parsed token
955
+ * prefix that must be preserved. Likely caused by super-long attributes.
956
+ * See ticket #13. */
957
+ rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+".");
958
+ }
959
+
960
+ if (port.respondsTo("read")) {
961
+ str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
962
+ } else {
963
+ str = ((RubyString)port).substr(nread,space);
964
+ }
965
+
966
+ str = str.convertToString();
967
+ String sss = str.toString();
968
+ char[] chars = sss.toCharArray();
969
+ System.arraycopy(chars,0,buf,p,chars.length);
970
+
971
+ len = sss.length();
972
+ nread += len;
973
+
974
+ if ( len < space ) {
975
+ len++;
976
+ done = true;
977
+ }
978
+
979
+ pe = p + len;
980
+ char[] data = buf;
981
+
982
+
983
+ {
984
+ int _klen;
985
+ int _trans;
986
+ int _acts;
987
+ int _nacts;
988
+ int _keys;
989
+
990
+ if ( p != pe ) {
991
+ _resume: while ( true ) {
992
+ _again: do {
993
+ _acts = _hpricot_scan_from_state_actions[cs];
994
+ _nacts = (int) _hpricot_scan_actions[_acts++];
995
+ while ( _nacts-- > 0 ) {
996
+ switch ( _hpricot_scan_actions[_acts++] ) {
997
+ case 21:
998
+ {tokstart = p;}
999
+ break;
1000
+ }
1001
+ }
1002
+
1003
+ _match: do {
1004
+ _keys = _hpricot_scan_key_offsets[cs];
1005
+ _trans = _hpricot_scan_index_offsets[cs];
1006
+ _klen = _hpricot_scan_single_lengths[cs];
1007
+ if ( _klen > 0 ) {
1008
+ int _lower = _keys;
1009
+ int _mid;
1010
+ int _upper = _keys + _klen - 1;
1011
+ while (true) {
1012
+ if ( _upper < _lower )
1013
+ break;
1014
+
1015
+ _mid = _lower + ((_upper-_lower) >> 1);
1016
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
1017
+ _upper = _mid - 1;
1018
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
1019
+ _lower = _mid + 1;
1020
+ else {
1021
+ _trans += (_mid - _keys);
1022
+ break _match;
1023
+ }
1024
+ }
1025
+ _keys += _klen;
1026
+ _trans += _klen;
1027
+ }
1028
+
1029
+ _klen = _hpricot_scan_range_lengths[cs];
1030
+ if ( _klen > 0 ) {
1031
+ int _lower = _keys;
1032
+ int _mid;
1033
+ int _upper = _keys + (_klen<<1) - 2;
1034
+ while (true) {
1035
+ if ( _upper < _lower )
1036
+ break;
1037
+
1038
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
1039
+ if ( data[p] < _hpricot_scan_trans_keys[_mid] )
1040
+ _upper = _mid - 2;
1041
+ else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
1042
+ _lower = _mid + 2;
1043
+ else {
1044
+ _trans += ((_mid - _keys)>>1);
1045
+ break _match;
1046
+ }
1047
+ }
1048
+ _trans += _klen;
1049
+ }
1050
+ } while (false);
1051
+
1052
+ _trans = _hpricot_scan_indicies[_trans];
1053
+ cs = _hpricot_scan_trans_targs_wi[_trans];
1054
+
1055
+ if ( _hpricot_scan_trans_actions_wi[_trans] == 0 )
1056
+ break _again;
1057
+
1058
+ _acts = _hpricot_scan_trans_actions_wi[_trans];
1059
+ _nacts = (int) _hpricot_scan_actions[_acts++];
1060
+ while ( _nacts-- > 0 )
1061
+ {
1062
+ switch ( _hpricot_scan_actions[_acts++] )
1063
+ {
1064
+ case 0:
1065
+ {
1066
+ if (text) {
1067
+ CAT(tag, p);
1068
+ ELE(sym_text);
1069
+ text = false;
1070
+ }
1071
+ attr = runtime.getNil();
1072
+ tag[0] = runtime.getNil();
1073
+ mark_tag = -1;
1074
+ ele_open = true;
1075
+ }
1076
+ break;
1077
+ case 1:
1078
+ { mark_tag = p; }
1079
+ break;
1080
+ case 2:
1081
+ { mark_aval = p; }
1082
+ break;
1083
+ case 3:
1084
+ { mark_akey = p; }
1085
+ break;
1086
+ case 4:
1087
+ { SET(tag, p); }
1088
+ break;
1089
+ case 5:
1090
+ { SET(aval, p); }
1091
+ break;
1092
+ case 6:
1093
+ {
1094
+ if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
1095
+ else { SET(aval, p); }
1096
+ }
1097
+ break;
1098
+ case 7:
1099
+ { SET(akey, p); }
1100
+ break;
1101
+ case 8:
1102
+ { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
1103
+ break;
1104
+ case 9:
1105
+ { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
1106
+ break;
1107
+ case 10:
1108
+ { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
1109
+ break;
1110
+ case 11:
1111
+ { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
1112
+ break;
1113
+ case 12:
1114
+ { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
1115
+ break;
1116
+ case 13:
1117
+ {
1118
+ akey[0] = runtime.getNil();
1119
+ aval[0] = runtime.getNil();
1120
+ mark_akey = -1;
1121
+ mark_aval = -1;
1122
+ }
1123
+ break;
1124
+ case 14:
1125
+ {
1126
+ ATTR(akey, aval);
1127
+ }
1128
+ break;
1129
+ case 15:
1130
+ {curline += 1;}
1131
+ break;
1132
+ case 16:
1133
+ { TEXT_PASS(); }
1134
+ break;
1135
+ case 17:
1136
+ { EBLK(comment, 3); {cs = 204; if (true) break _again;} }
1137
+ break;
1138
+ case 18:
1139
+ { EBLK(cdata, 3); {cs = 204; if (true) break _again;} }
1140
+ break;
1141
+ case 19:
1142
+ { EBLK(procins, 2); {cs = 204; if (true) break _again;} }
1143
+ break;
1144
+ case 22:
1145
+ {tokend = p+1;}
1146
+ break;
1147
+ case 23:
1148
+ {tokend = p+1;{p = ((tokend))-1;}}
1149
+ break;
1150
+ case 24:
1151
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1152
+ break;
1153
+ case 25:
1154
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1155
+ break;
1156
+ case 26:
1157
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1158
+ break;
1159
+ case 27:
1160
+ {tokend = p+1;{p = ((tokend))-1;}}
1161
+ break;
1162
+ case 28:
1163
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1164
+ break;
1165
+ case 29:
1166
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1167
+ break;
1168
+ case 30:
1169
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1170
+ break;
1171
+ case 31:
1172
+ {tokend = p+1;{p = ((tokend))-1;}}
1173
+ break;
1174
+ case 32:
1175
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1176
+ break;
1177
+ case 33:
1178
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1179
+ break;
1180
+ case 34:
1181
+ {act = 8;}
1182
+ break;
1183
+ case 35:
1184
+ {act = 10;}
1185
+ break;
1186
+ case 36:
1187
+ {act = 12;}
1188
+ break;
1189
+ case 37:
1190
+ {act = 15;}
1191
+ break;
1192
+ case 38:
1193
+ {tokend = p+1;{ ELE(xmldecl); }{p = ((tokend))-1;}}
1194
+ break;
1195
+ case 39:
1196
+ {tokend = p+1;{ ELE(doctype); }{p = ((tokend))-1;}}
1197
+ break;
1198
+ case 40:
1199
+ {tokend = p+1;{ ELE(stag); }{p = ((tokend))-1;}}
1200
+ break;
1201
+ case 41:
1202
+ {tokend = p+1;{ ELE(etag); }{p = ((tokend))-1;}}
1203
+ break;
1204
+ case 42:
1205
+ {tokend = p+1;{ ELE(emptytag); }{p = ((tokend))-1;}}
1206
+ break;
1207
+ case 43:
1208
+ {tokend = p+1;{ {{p = ((tokend))-1;}{cs = 214; if (true) break _again;}} }{p = ((tokend))-1;}}
1209
+ break;
1210
+ case 44:
1211
+ {tokend = p+1;{ {{p = ((tokend))-1;}{cs = 216; if (true) break _again;}} }{p = ((tokend))-1;}}
1212
+ break;
1213
+ case 45:
1214
+ {tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1215
+ break;
1216
+ case 46:
1217
+ {tokend = p;{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
1218
+ break;
1219
+ case 47:
1220
+ {tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
1221
+ break;
1222
+ case 48:
1223
+ {{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
1224
+ break;
1225
+ case 49:
1226
+ {{ TEXT_PASS(); }{p = ((tokend))-1;}}
1227
+ break;
1228
+ case 50:
1229
+ { switch( act ) {
1230
+ case 8:
1231
+ { ELE(doctype); }
1232
+ break;
1233
+ case 10:
1234
+ { ELE(stag); }
1235
+ break;
1236
+ case 12:
1237
+ { ELE(emptytag); }
1238
+ break;
1239
+ case 15:
1240
+ { TEXT_PASS(); }
1241
+ break;
1242
+ default: break;
1243
+ }
1244
+ {p = ((tokend))-1;}}
1245
+ break;
1246
+ }
1247
+ }
1248
+
1249
+ } while (false);
1250
+ _acts = _hpricot_scan_to_state_actions[cs];
1251
+ _nacts = (int) _hpricot_scan_actions[_acts++];
1252
+ while ( _nacts-- > 0 ) {
1253
+ switch ( _hpricot_scan_actions[_acts++] ) {
1254
+ case 20:
1255
+ {tokstart = -1;}
1256
+ break;
1257
+ }
1258
+ }
1259
+
1260
+ if ( ++p == pe )
1261
+ break _resume;
1262
+ }
1263
+ }
1264
+ }
1265
+
1266
+ if ( cs == hpricot_scan_error ) {
1267
+ if(!tag[0].isNil()) {
1268
+ rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1269
+ } else {
1270
+ rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
1271
+ }
1272
+ }
1273
+
1274
+ if ( done && ele_open ) {
1275
+ ele_open = false;
1276
+ if(tokstart > -1) {
1277
+ mark_tag = tokstart;
1278
+ tokstart = -1;
1279
+ text = true;
1280
+ }
1281
+ }
1282
+
1283
+ if(tokstart == -1) {
1284
+ have = 0;
1285
+ /* text nodes have no tokstart because each byte is parsed alone */
1286
+ if(mark_tag != -1 && text) {
1287
+ if (done) {
1288
+ if(mark_tag < p-1) {
1289
+ CAT(tag, p-1);
1290
+ ELE(sym_text);
1291
+ }
1292
+ } else {
1293
+ CAT(tag, p);
1294
+ }
1295
+ }
1296
+ mark_tag = 0;
1297
+ } else {
1298
+ have = pe - tokstart;
1299
+ System.arraycopy(buf,tokstart,buf,0,have);
1300
+ SLIDE(tag);
1301
+ SLIDE(akey);
1302
+ SLIDE(aval);
1303
+ tokend = (tokend - tokstart);
1304
+ tokstart = 0;
1305
+ }
1306
+ }
1307
+ return runtime.getNil();
1308
+ }
1309
+
1310
+ public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
1311
+ Ruby runtime = recv.getRuntime();
1312
+ HpricotScanService service = new HpricotScanService();
1313
+ service.runtime = runtime;
1314
+ service.xmldecl = runtime.newSymbol("xmldecl");
1315
+ service.doctype = runtime.newSymbol("doctype");
1316
+ service.procins = runtime.newSymbol("procins");
1317
+ service.stag = runtime.newSymbol("stag");
1318
+ service.etag = runtime.newSymbol("etag");
1319
+ service.emptytag = runtime.newSymbol("emptytag");
1320
+ service.comment = runtime.newSymbol("comment");
1321
+ service.cdata = runtime.newSymbol("cdata");
1322
+ service.sym_text = runtime.newSymbol("text");
1323
+ service.block = block;
1324
+ return service.hpricot_scan(recv, port);
1325
+ }
1326
+
1327
+
1328
+ public boolean basicLoad(final Ruby runtime) throws IOException {
1329
+ Init_hpricot_scan(runtime);
1330
+ return true;
1331
+ }
1332
+
1333
+ public static void Init_hpricot_scan(Ruby runtime) {
1334
+ RubyModule mHpricot = runtime.defineModule("Hpricot");
1335
+ mHpricot.getMetaClass().attr_accessor(new IRubyObject[]{runtime.newSymbol("buffer_size")});
1336
+ CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
1337
+ mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
1338
+ mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
1339
+ }
1340
+ }