webtranslateit-hpricot 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
data/Rakefile ADDED
@@ -0,0 +1,237 @@
1
+ require 'bundler/setup'
2
+ ENV.delete('RUBYOPT') # Don't propagate RUBYOPT/Bundler to subprocesses
3
+ require 'rake/clean'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'rake/testtask'
7
+ begin
8
+ require 'rake/extensiontask'
9
+ rescue LoadError
10
+ abort "To build, please first gem install rake-compiler"
11
+ end
12
+
13
+ RbConfig = Config unless defined?(RbConfig)
14
+
15
+ NAME = "hpricot"
16
+ REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s
17
+ VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "")
18
+ PKG = "#{NAME}-#{VERS}"
19
+ BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class,rbc}"
20
+ CLEAN.include ["#{BIN}", "ext/**/#{BIN}", "lib/**/#{BIN}", "test/**/#{BIN}",
21
+ 'ext/fast_xs/Makefile', 'ext/hpricot_scan/Makefile',
22
+ '**/.*.sw?', '*.gem', '.config', 'pkg', 'lib/hpricot_scan.rb', 'lib/fast_xs.rb']
23
+ RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README.md', '--inline-source']
24
+ PKG_FILES = %w(CHANGELOG COPYING README.md Rakefile) +
25
+ Dir.glob("{bin,doc,test,extras}/**/*") +
26
+ (Dir.glob("lib/**/*.rb") - %w(lib/hpricot_scan.rb lib/fast_xs.rb)) +
27
+ Dir.glob("ext/**/*.{h,java,c,rb,rl}") +
28
+ %w[ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/HpricotScanService.java] # needed because they are generated later
29
+ RAGEL_C_CODE_GENERATION_STYLES = {
30
+ "table_driven" => 'T0',
31
+ "faster_table_driven" => 'T1',
32
+ "flat_table_driven" => 'F0',
33
+ "faster_flat_table_driven" => 'F1',
34
+ "goto_driven" => 'G0',
35
+ "faster_goto_driven" => 'G1',
36
+ "really_fast goto_driven" => 'G2'
37
+ # "n_way_split_really_fast_goto_driven" => 'P<N>'
38
+ }
39
+ DEFAULT_RAGEL_C_CODE_GENERATION = "really_fast goto_driven"
40
+ SPEC =
41
+ Gem::Specification.new do |s|
42
+ s.name = NAME
43
+ s.version = VERS
44
+ s.platform = Gem::Platform::RUBY
45
+ s.has_rdoc = true
46
+ s.rdoc_options += RDOC_OPTS
47
+ s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
48
+ s.summary = "a swift, liberal HTML parser with a fantastic library"
49
+ s.description = s.summary
50
+ s.author = "why the lucky stiff"
51
+ s.email = 'why@ruby-lang.org'
52
+ s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
53
+ s.rubyforge_project = 'hobix'
54
+ s.files = PKG_FILES
55
+ s.require_paths = ["lib"]
56
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
57
+ s.bindir = "bin"
58
+ end
59
+ # Dup the spec before any of its calculated ivars are set (e.g., #cache_file)
60
+ Win32Spec = SPEC.dup
61
+ JRubySpec = SPEC.dup
62
+
63
+ # FAT cross-compile
64
+ # Pass RUBY_CC_VERSION=1.8.7:1.9.2 when packaging for 1.8+1.9 mswin32 binaries
65
+ %w(hpricot_scan fast_xs).each do |target|
66
+ Rake::ExtensionTask.new(target, SPEC) do |ext|
67
+ ext.lib_dir = File.join('lib', target) if ENV['RUBY_CC_VERSION']
68
+ ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
69
+ ext.cross_platform = 'i386-mswin32' # forces the Windows platform instead of the default one
70
+ end
71
+
72
+ # HACK around 1.9.2 cross .def file creation
73
+ def_file = "tmp/i386-mswin32/#{target}/1.9.2/#{target}-i386-mingw32.def"
74
+ directory File.dirname(def_file)
75
+ file def_file => File.dirname(def_file) do |t|
76
+ File.open(t.name, "w") do |f|
77
+ f << "EXPORTS\nInit_#{target}\n"
78
+ end
79
+ end
80
+
81
+ task File.join(File.dirname(def_file), "Makefile") => def_file
82
+ # END HACK
83
+ file "lib/#{target}.rb" do |t|
84
+ File.open(t.name, "w") do |f|
85
+ f.puts %{require "#{target}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{target}"}
86
+ end
87
+ end
88
+ end
89
+ file 'ext/hpricot_scan/extconf.rb' => :ragel
90
+
91
+ desc "set environment variables to build and/or test with debug options"
92
+ task :debug do
93
+ ENV['CFLAGS'] ||= ""
94
+ ENV['CFLAGS'] += " -g -DDEBUG"
95
+ end
96
+
97
+ desc "Does a full compile, test run"
98
+ if defined?(JRUBY_VERSION)
99
+ task :default => [:compile_java, :clean_fat_rb, :test]
100
+ else
101
+ task :default => [:compile, :clean_fat_rb, :test]
102
+ end
103
+
104
+ task :clean_fat_rb do
105
+ rm_f "lib/hpricot_scan.rb"
106
+ rm_f "lib/fast_xs.rb"
107
+ end
108
+
109
+ desc "Packages up Hpricot for all platforms."
110
+ task :package => [:clean]
111
+
112
+ desc "Run all the tests"
113
+ Rake::TestTask.new do |t|
114
+ t.libs << "test"
115
+ t.test_files = FileList['test/test_*.rb']
116
+ t.verbose = true
117
+ end
118
+
119
+ Rake::RDocTask.new do |rdoc|
120
+ rdoc.rdoc_dir = 'doc/rdoc'
121
+ rdoc.options += RDOC_OPTS
122
+ rdoc.main = "README.md"
123
+ rdoc.rdoc_files.add ['README.md', 'CHANGELOG', 'COPYING', 'lib/**/*.rb']
124
+ end
125
+
126
+ Gem::PackageTask.new(SPEC) do |p|
127
+ p.need_tar = true
128
+ p.gem_spec = SPEC
129
+ end
130
+
131
+ ### Win32 Packages ###
132
+ Win32Spec.platform = 'i386-mswin32'
133
+ Win32Spec.files = PKG_FILES + %w(hpricot_scan fast_xs).map do |t|
134
+ unless ENV['RUBY_CC_VERSION']
135
+ file "lib/#{t}/1.8/#{t}.so" do
136
+ abort "ERROR while packaging: re-run for fat win32 gems:\nrake #{ARGV.join(' ')} RUBY_CC_VERSION=1.8.7:1.9.2"
137
+ end
138
+ end
139
+ ["lib/#{t}.rb", "lib/#{t}/1.8/#{t}.so", "lib/#{t}/1.9/#{t}.so"]
140
+ end.flatten
141
+ Win32Spec.extensions = []
142
+
143
+ Gem::PackageTask.new(Win32Spec) do |p|
144
+ p.need_tar = false
145
+ p.gem_spec = Win32Spec
146
+ end
147
+
148
+ JRubySpec.platform = 'java'
149
+ JRubySpec.files = PKG_FILES + ["lib/hpricot_scan.jar", "lib/fast_xs.jar"]
150
+ JRubySpec.extensions = []
151
+
152
+ Gem::PackageTask.new(JRubySpec) do |p|
153
+ p.need_tar = false
154
+ p.gem_spec = JRubySpec
155
+ end
156
+
157
+ desc "Determines the Ragel version and displays it on the console along with the location of the Ragel binary."
158
+ task :ragel_version do
159
+ @ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f
160
+ puts "Using ragel version: #{@ragel_v}, location: #{`which ragel`}"
161
+ @ragel_v
162
+ end
163
+
164
+ desc "Generates the C scanner code with Ragel."
165
+ task :ragel => [:ragel_version] do
166
+ if @ragel_v >= 6.1
167
+ @ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION]
168
+ Dir.chdir("ext/hpricot_scan") do
169
+ sh %{ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c}
170
+ sh %{ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
171
+ end
172
+ else
173
+ STDERR.puts "Ragel 6.1 or greater is required."
174
+ exit(1)
175
+ end
176
+ end
177
+
178
+ # Java only supports the table-driven code
179
+ # generation style at this point.
180
+ desc "Generates the Java scanner code using the Ragel table-driven code generation style."
181
+ task :ragel_java => [:ragel_version] do
182
+ if @ragel_v >= 6.1
183
+ puts "compiling with ragel version #{@ragel_v}"
184
+ Dir.chdir("ext/hpricot_scan") do
185
+ sh %{ragel -J -o HpricotCss.java hpricot_css.java.rl}
186
+ sh %{ragel -J -o HpricotScanService.java hpricot_scan.java.rl}
187
+ end
188
+ else
189
+ STDERR.puts "Ragel 6.1 or greater is required."
190
+ exit(1)
191
+ end
192
+ end
193
+
194
+ ### JRuby Compile ###
195
+
196
+ def java_classpath_arg # myriad of ways to discover JRuby classpath
197
+ begin
198
+ cpath = Java::java.lang.System.getProperty('java.class.path').split(File::PATH_SEPARATOR)
199
+ cpath += Java::java.lang.System.getProperty('sun.boot.class.path').split(File::PATH_SEPARATOR)
200
+ jruby_cpath = cpath.compact.join(File::PATH_SEPARATOR)
201
+ rescue => e
202
+ end
203
+ unless jruby_cpath
204
+ jruby_cpath = ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] &&
205
+ FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"].join(File::PATH_SEPARATOR)
206
+ end
207
+ unless jruby_cpath || ENV['CLASSPATH'] =~ /jruby/
208
+ abort %{WARNING: No JRuby classpath has been set up.
209
+ Define JRUBY_HOME=/path/to/jruby on the command line or in the environment}
210
+ end
211
+ "-cp \"#{jruby_cpath}\""
212
+ end
213
+
214
+ def compile_java(filenames, jarname)
215
+ sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}}
216
+ sh %{jar cf #{jarname} *.class}
217
+ end
218
+
219
+ task :hpricot_scan_java => [:ragel_java] do
220
+ Dir.chdir "ext/hpricot_scan" do
221
+ compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar")
222
+ end
223
+ end
224
+
225
+ task :fast_xs_java do
226
+ Dir.chdir "ext/fast_xs" do
227
+ compile_java(["FastXsService.java"], "fast_xs.jar")
228
+ end
229
+ end
230
+
231
+ %w(hpricot_scan fast_xs).each do |ext|
232
+ file "lib/#{ext}.jar" => "#{ext}_java" do |t|
233
+ mv "ext/#{ext}/#{ext}.jar", "lib"
234
+ end
235
+ task :compile_java => "lib/#{ext}.jar"
236
+ end
237
+