webtranslateit-hpricot 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
data/Rakefile ADDED
@@ -0,0 +1,237 @@
1
+ require 'bundler/setup'
2
+ ENV.delete('RUBYOPT') # Don't propagate RUBYOPT/Bundler to subprocesses
3
+ require 'rake/clean'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'rake/testtask'
7
+ begin
8
+ require 'rake/extensiontask'
9
+ rescue LoadError
10
+ abort "To build, please first gem install rake-compiler"
11
+ end
12
+
13
+ RbConfig = Config unless defined?(RbConfig)
14
+
15
+ NAME = "hpricot"
16
+ REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s
17
+ VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "")
18
+ PKG = "#{NAME}-#{VERS}"
19
+ BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class,rbc}"
20
+ CLEAN.include ["#{BIN}", "ext/**/#{BIN}", "lib/**/#{BIN}", "test/**/#{BIN}",
21
+ 'ext/fast_xs/Makefile', 'ext/hpricot_scan/Makefile',
22
+ '**/.*.sw?', '*.gem', '.config', 'pkg', 'lib/hpricot_scan.rb', 'lib/fast_xs.rb']
23
+ RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README.md', '--inline-source']
24
+ PKG_FILES = %w(CHANGELOG COPYING README.md Rakefile) +
25
+ Dir.glob("{bin,doc,test,extras}/**/*") +
26
+ (Dir.glob("lib/**/*.rb") - %w(lib/hpricot_scan.rb lib/fast_xs.rb)) +
27
+ Dir.glob("ext/**/*.{h,java,c,rb,rl}") +
28
+ %w[ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/HpricotScanService.java] # needed because they are generated later
29
+ RAGEL_C_CODE_GENERATION_STYLES = {
30
+ "table_driven" => 'T0',
31
+ "faster_table_driven" => 'T1',
32
+ "flat_table_driven" => 'F0',
33
+ "faster_flat_table_driven" => 'F1',
34
+ "goto_driven" => 'G0',
35
+ "faster_goto_driven" => 'G1',
36
+ "really_fast goto_driven" => 'G2'
37
+ # "n_way_split_really_fast_goto_driven" => 'P<N>'
38
+ }
39
+ DEFAULT_RAGEL_C_CODE_GENERATION = "really_fast goto_driven"
40
+ SPEC =
41
+ Gem::Specification.new do |s|
42
+ s.name = NAME
43
+ s.version = VERS
44
+ s.platform = Gem::Platform::RUBY
45
+ s.has_rdoc = true
46
+ s.rdoc_options += RDOC_OPTS
47
+ s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
48
+ s.summary = "a swift, liberal HTML parser with a fantastic library"
49
+ s.description = s.summary
50
+ s.author = "why the lucky stiff"
51
+ s.email = 'why@ruby-lang.org'
52
+ s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
53
+ s.rubyforge_project = 'hobix'
54
+ s.files = PKG_FILES
55
+ s.require_paths = ["lib"]
56
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
57
+ s.bindir = "bin"
58
+ end
59
+ # Dup the spec before any of its calculated ivars are set (e.g., #cache_file)
60
+ Win32Spec = SPEC.dup
61
+ JRubySpec = SPEC.dup
62
+
63
+ # FAT cross-compile
64
+ # Pass RUBY_CC_VERSION=1.8.7:1.9.2 when packaging for 1.8+1.9 mswin32 binaries
65
+ %w(hpricot_scan fast_xs).each do |target|
66
+ Rake::ExtensionTask.new(target, SPEC) do |ext|
67
+ ext.lib_dir = File.join('lib', target) if ENV['RUBY_CC_VERSION']
68
+ ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
69
+ ext.cross_platform = 'i386-mswin32' # forces the Windows platform instead of the default one
70
+ end
71
+
72
+ # HACK around 1.9.2 cross .def file creation
73
+ def_file = "tmp/i386-mswin32/#{target}/1.9.2/#{target}-i386-mingw32.def"
74
+ directory File.dirname(def_file)
75
+ file def_file => File.dirname(def_file) do |t|
76
+ File.open(t.name, "w") do |f|
77
+ f << "EXPORTS\nInit_#{target}\n"
78
+ end
79
+ end
80
+
81
+ task File.join(File.dirname(def_file), "Makefile") => def_file
82
+ # END HACK
83
+ file "lib/#{target}.rb" do |t|
84
+ File.open(t.name, "w") do |f|
85
+ f.puts %{require "#{target}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{target}"}
86
+ end
87
+ end
88
+ end
89
+ file 'ext/hpricot_scan/extconf.rb' => :ragel
90
+
91
+ desc "set environment variables to build and/or test with debug options"
92
+ task :debug do
93
+ ENV['CFLAGS'] ||= ""
94
+ ENV['CFLAGS'] += " -g -DDEBUG"
95
+ end
96
+
97
+ desc "Does a full compile, test run"
98
+ if defined?(JRUBY_VERSION)
99
+ task :default => [:compile_java, :clean_fat_rb, :test]
100
+ else
101
+ task :default => [:compile, :clean_fat_rb, :test]
102
+ end
103
+
104
+ task :clean_fat_rb do
105
+ rm_f "lib/hpricot_scan.rb"
106
+ rm_f "lib/fast_xs.rb"
107
+ end
108
+
109
+ desc "Packages up Hpricot for all platforms."
110
+ task :package => [:clean]
111
+
112
+ desc "Run all the tests"
113
+ Rake::TestTask.new do |t|
114
+ t.libs << "test"
115
+ t.test_files = FileList['test/test_*.rb']
116
+ t.verbose = true
117
+ end
118
+
119
+ Rake::RDocTask.new do |rdoc|
120
+ rdoc.rdoc_dir = 'doc/rdoc'
121
+ rdoc.options += RDOC_OPTS
122
+ rdoc.main = "README.md"
123
+ rdoc.rdoc_files.add ['README.md', 'CHANGELOG', 'COPYING', 'lib/**/*.rb']
124
+ end
125
+
126
+ Gem::PackageTask.new(SPEC) do |p|
127
+ p.need_tar = true
128
+ p.gem_spec = SPEC
129
+ end
130
+
131
+ ### Win32 Packages ###
132
+ Win32Spec.platform = 'i386-mswin32'
133
+ Win32Spec.files = PKG_FILES + %w(hpricot_scan fast_xs).map do |t|
134
+ unless ENV['RUBY_CC_VERSION']
135
+ file "lib/#{t}/1.8/#{t}.so" do
136
+ abort "ERROR while packaging: re-run for fat win32 gems:\nrake #{ARGV.join(' ')} RUBY_CC_VERSION=1.8.7:1.9.2"
137
+ end
138
+ end
139
+ ["lib/#{t}.rb", "lib/#{t}/1.8/#{t}.so", "lib/#{t}/1.9/#{t}.so"]
140
+ end.flatten
141
+ Win32Spec.extensions = []
142
+
143
+ Gem::PackageTask.new(Win32Spec) do |p|
144
+ p.need_tar = false
145
+ p.gem_spec = Win32Spec
146
+ end
147
+
148
+ JRubySpec.platform = 'java'
149
+ JRubySpec.files = PKG_FILES + ["lib/hpricot_scan.jar", "lib/fast_xs.jar"]
150
+ JRubySpec.extensions = []
151
+
152
+ Gem::PackageTask.new(JRubySpec) do |p|
153
+ p.need_tar = false
154
+ p.gem_spec = JRubySpec
155
+ end
156
+
157
+ desc "Determines the Ragel version and displays it on the console along with the location of the Ragel binary."
158
+ task :ragel_version do
159
+ @ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f
160
+ puts "Using ragel version: #{@ragel_v}, location: #{`which ragel`}"
161
+ @ragel_v
162
+ end
163
+
164
+ desc "Generates the C scanner code with Ragel."
165
+ task :ragel => [:ragel_version] do
166
+ if @ragel_v >= 6.1
167
+ @ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION]
168
+ Dir.chdir("ext/hpricot_scan") do
169
+ sh %{ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c}
170
+ sh %{ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
171
+ end
172
+ else
173
+ STDERR.puts "Ragel 6.1 or greater is required."
174
+ exit(1)
175
+ end
176
+ end
177
+
178
+ # Java only supports the table-driven code
179
+ # generation style at this point.
180
+ desc "Generates the Java scanner code using the Ragel table-driven code generation style."
181
+ task :ragel_java => [:ragel_version] do
182
+ if @ragel_v >= 6.1
183
+ puts "compiling with ragel version #{@ragel_v}"
184
+ Dir.chdir("ext/hpricot_scan") do
185
+ sh %{ragel -J -o HpricotCss.java hpricot_css.java.rl}
186
+ sh %{ragel -J -o HpricotScanService.java hpricot_scan.java.rl}
187
+ end
188
+ else
189
+ STDERR.puts "Ragel 6.1 or greater is required."
190
+ exit(1)
191
+ end
192
+ end
193
+
194
+ ### JRuby Compile ###
195
+
196
+ def java_classpath_arg # myriad of ways to discover JRuby classpath
197
+ begin
198
+ cpath = Java::java.lang.System.getProperty('java.class.path').split(File::PATH_SEPARATOR)
199
+ cpath += Java::java.lang.System.getProperty('sun.boot.class.path').split(File::PATH_SEPARATOR)
200
+ jruby_cpath = cpath.compact.join(File::PATH_SEPARATOR)
201
+ rescue => e
202
+ end
203
+ unless jruby_cpath
204
+ jruby_cpath = ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] &&
205
+ FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"].join(File::PATH_SEPARATOR)
206
+ end
207
+ unless jruby_cpath || ENV['CLASSPATH'] =~ /jruby/
208
+ abort %{WARNING: No JRuby classpath has been set up.
209
+ Define JRUBY_HOME=/path/to/jruby on the command line or in the environment}
210
+ end
211
+ "-cp \"#{jruby_cpath}\""
212
+ end
213
+
214
+ def compile_java(filenames, jarname)
215
+ sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}}
216
+ sh %{jar cf #{jarname} *.class}
217
+ end
218
+
219
+ task :hpricot_scan_java => [:ragel_java] do
220
+ Dir.chdir "ext/hpricot_scan" do
221
+ compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar")
222
+ end
223
+ end
224
+
225
+ task :fast_xs_java do
226
+ Dir.chdir "ext/fast_xs" do
227
+ compile_java(["FastXsService.java"], "fast_xs.jar")
228
+ end
229
+ end
230
+
231
+ %w(hpricot_scan fast_xs).each do |ext|
232
+ file "lib/#{ext}.jar" => "#{ext}_java" do |t|
233
+ mv "ext/#{ext}/#{ext}.jar", "lib"
234
+ end
235
+ task :compile_java => "lib/#{ext}.jar"
236
+ end
237
+