webtranslateit-hpricot 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
require 'bundler/setup'
|
|
2
|
+
ENV.delete('RUBYOPT') # Don't propagate RUBYOPT/Bundler to subprocesses
|
|
3
|
+
require 'rake/clean'
|
|
4
|
+
require 'rubygems/package_task'
|
|
5
|
+
require 'rdoc/task'
|
|
6
|
+
require 'rake/testtask'
|
|
7
|
+
begin
|
|
8
|
+
require 'rake/extensiontask'
|
|
9
|
+
rescue LoadError
|
|
10
|
+
abort "To build, please first gem install rake-compiler"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
RbConfig = Config unless defined?(RbConfig)
|
|
14
|
+
|
|
15
|
+
NAME = "hpricot"
|
|
16
|
+
REV = (`#{ENV['GIT'] || "git"} rev-list HEAD`.split.length + 1).to_s
|
|
17
|
+
VERS = ENV['VERSION'] || "0.8" + (REV ? ".#{REV}" : "")
|
|
18
|
+
PKG = "#{NAME}-#{VERS}"
|
|
19
|
+
BIN = "*.{bundle,jar,so,o,obj,pdb,lib,def,exp,class,rbc}"
|
|
20
|
+
CLEAN.include ["#{BIN}", "ext/**/#{BIN}", "lib/**/#{BIN}", "test/**/#{BIN}",
|
|
21
|
+
'ext/fast_xs/Makefile', 'ext/hpricot_scan/Makefile',
|
|
22
|
+
'**/.*.sw?', '*.gem', '.config', 'pkg', 'lib/hpricot_scan.rb', 'lib/fast_xs.rb']
|
|
23
|
+
RDOC_OPTS = ['--quiet', '--title', 'The Hpricot Reference', '--main', 'README.md', '--inline-source']
|
|
24
|
+
PKG_FILES = %w(CHANGELOG COPYING README.md Rakefile) +
|
|
25
|
+
Dir.glob("{bin,doc,test,extras}/**/*") +
|
|
26
|
+
(Dir.glob("lib/**/*.rb") - %w(lib/hpricot_scan.rb lib/fast_xs.rb)) +
|
|
27
|
+
Dir.glob("ext/**/*.{h,java,c,rb,rl}") +
|
|
28
|
+
%w[ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/HpricotScanService.java] # needed because they are generated later
|
|
29
|
+
RAGEL_C_CODE_GENERATION_STYLES = {
|
|
30
|
+
"table_driven" => 'T0',
|
|
31
|
+
"faster_table_driven" => 'T1',
|
|
32
|
+
"flat_table_driven" => 'F0',
|
|
33
|
+
"faster_flat_table_driven" => 'F1',
|
|
34
|
+
"goto_driven" => 'G0',
|
|
35
|
+
"faster_goto_driven" => 'G1',
|
|
36
|
+
"really_fast goto_driven" => 'G2'
|
|
37
|
+
# "n_way_split_really_fast_goto_driven" => 'P<N>'
|
|
38
|
+
}
|
|
39
|
+
DEFAULT_RAGEL_C_CODE_GENERATION = "really_fast goto_driven"
|
|
40
|
+
SPEC =
|
|
41
|
+
Gem::Specification.new do |s|
|
|
42
|
+
s.name = NAME
|
|
43
|
+
s.version = VERS
|
|
44
|
+
s.platform = Gem::Platform::RUBY
|
|
45
|
+
s.has_rdoc = true
|
|
46
|
+
s.rdoc_options += RDOC_OPTS
|
|
47
|
+
s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
|
|
48
|
+
s.summary = "a swift, liberal HTML parser with a fantastic library"
|
|
49
|
+
s.description = s.summary
|
|
50
|
+
s.author = "why the lucky stiff"
|
|
51
|
+
s.email = 'why@ruby-lang.org'
|
|
52
|
+
s.homepage = 'http://code.whytheluckystiff.net/hpricot/'
|
|
53
|
+
s.rubyforge_project = 'hobix'
|
|
54
|
+
s.files = PKG_FILES
|
|
55
|
+
s.require_paths = ["lib"]
|
|
56
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
|
57
|
+
s.bindir = "bin"
|
|
58
|
+
end
|
|
59
|
+
# Dup the spec before any of its calculated ivars are set (e.g., #cache_file)
|
|
60
|
+
Win32Spec = SPEC.dup
|
|
61
|
+
JRubySpec = SPEC.dup
|
|
62
|
+
|
|
63
|
+
# FAT cross-compile
|
|
64
|
+
# Pass RUBY_CC_VERSION=1.8.7:1.9.2 when packaging for 1.8+1.9 mswin32 binaries
|
|
65
|
+
%w(hpricot_scan fast_xs).each do |target|
|
|
66
|
+
Rake::ExtensionTask.new(target, SPEC) do |ext|
|
|
67
|
+
ext.lib_dir = File.join('lib', target) if ENV['RUBY_CC_VERSION']
|
|
68
|
+
ext.cross_compile = true # enable cross compilation (requires cross compile toolchain)
|
|
69
|
+
ext.cross_platform = 'i386-mswin32' # forces the Windows platform instead of the default one
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# HACK around 1.9.2 cross .def file creation
|
|
73
|
+
def_file = "tmp/i386-mswin32/#{target}/1.9.2/#{target}-i386-mingw32.def"
|
|
74
|
+
directory File.dirname(def_file)
|
|
75
|
+
file def_file => File.dirname(def_file) do |t|
|
|
76
|
+
File.open(t.name, "w") do |f|
|
|
77
|
+
f << "EXPORTS\nInit_#{target}\n"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
task File.join(File.dirname(def_file), "Makefile") => def_file
|
|
82
|
+
# END HACK
|
|
83
|
+
file "lib/#{target}.rb" do |t|
|
|
84
|
+
File.open(t.name, "w") do |f|
|
|
85
|
+
f.puts %{require "#{target}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{target}"}
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
file 'ext/hpricot_scan/extconf.rb' => :ragel
|
|
90
|
+
|
|
91
|
+
desc "set environment variables to build and/or test with debug options"
|
|
92
|
+
task :debug do
|
|
93
|
+
ENV['CFLAGS'] ||= ""
|
|
94
|
+
ENV['CFLAGS'] += " -g -DDEBUG"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
desc "Does a full compile, test run"
|
|
98
|
+
if defined?(JRUBY_VERSION)
|
|
99
|
+
task :default => [:compile_java, :clean_fat_rb, :test]
|
|
100
|
+
else
|
|
101
|
+
task :default => [:compile, :clean_fat_rb, :test]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
task :clean_fat_rb do
|
|
105
|
+
rm_f "lib/hpricot_scan.rb"
|
|
106
|
+
rm_f "lib/fast_xs.rb"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
desc "Packages up Hpricot for all platforms."
|
|
110
|
+
task :package => [:clean]
|
|
111
|
+
|
|
112
|
+
desc "Run all the tests"
|
|
113
|
+
Rake::TestTask.new do |t|
|
|
114
|
+
t.libs << "test"
|
|
115
|
+
t.test_files = FileList['test/test_*.rb']
|
|
116
|
+
t.verbose = true
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
Rake::RDocTask.new do |rdoc|
|
|
120
|
+
rdoc.rdoc_dir = 'doc/rdoc'
|
|
121
|
+
rdoc.options += RDOC_OPTS
|
|
122
|
+
rdoc.main = "README.md"
|
|
123
|
+
rdoc.rdoc_files.add ['README.md', 'CHANGELOG', 'COPYING', 'lib/**/*.rb']
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
Gem::PackageTask.new(SPEC) do |p|
|
|
127
|
+
p.need_tar = true
|
|
128
|
+
p.gem_spec = SPEC
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
### Win32 Packages ###
|
|
132
|
+
Win32Spec.platform = 'i386-mswin32'
|
|
133
|
+
Win32Spec.files = PKG_FILES + %w(hpricot_scan fast_xs).map do |t|
|
|
134
|
+
unless ENV['RUBY_CC_VERSION']
|
|
135
|
+
file "lib/#{t}/1.8/#{t}.so" do
|
|
136
|
+
abort "ERROR while packaging: re-run for fat win32 gems:\nrake #{ARGV.join(' ')} RUBY_CC_VERSION=1.8.7:1.9.2"
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
["lib/#{t}.rb", "lib/#{t}/1.8/#{t}.so", "lib/#{t}/1.9/#{t}.so"]
|
|
140
|
+
end.flatten
|
|
141
|
+
Win32Spec.extensions = []
|
|
142
|
+
|
|
143
|
+
Gem::PackageTask.new(Win32Spec) do |p|
|
|
144
|
+
p.need_tar = false
|
|
145
|
+
p.gem_spec = Win32Spec
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
JRubySpec.platform = 'java'
|
|
149
|
+
JRubySpec.files = PKG_FILES + ["lib/hpricot_scan.jar", "lib/fast_xs.jar"]
|
|
150
|
+
JRubySpec.extensions = []
|
|
151
|
+
|
|
152
|
+
Gem::PackageTask.new(JRubySpec) do |p|
|
|
153
|
+
p.need_tar = false
|
|
154
|
+
p.gem_spec = JRubySpec
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
desc "Determines the Ragel version and displays it on the console along with the location of the Ragel binary."
|
|
158
|
+
task :ragel_version do
|
|
159
|
+
@ragel_v = `ragel -v`[/(version )(\S*)/,2].to_f
|
|
160
|
+
puts "Using ragel version: #{@ragel_v}, location: #{`which ragel`}"
|
|
161
|
+
@ragel_v
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
desc "Generates the C scanner code with Ragel."
|
|
165
|
+
task :ragel => [:ragel_version] do
|
|
166
|
+
if @ragel_v >= 6.1
|
|
167
|
+
@ragel_c_code_generation_style = RAGEL_C_CODE_GENERATION_STYLES[DEFAULT_RAGEL_C_CODE_GENERATION]
|
|
168
|
+
Dir.chdir("ext/hpricot_scan") do
|
|
169
|
+
sh %{ragel hpricot_scan.rl -#{@ragel_c_code_generation_style} -o hpricot_scan.c}
|
|
170
|
+
sh %{ragel hpricot_css.rl -#{@ragel_c_code_generation_style} -o hpricot_css.c}
|
|
171
|
+
end
|
|
172
|
+
else
|
|
173
|
+
STDERR.puts "Ragel 6.1 or greater is required."
|
|
174
|
+
exit(1)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Java only supports the table-driven code
|
|
179
|
+
# generation style at this point.
|
|
180
|
+
desc "Generates the Java scanner code using the Ragel table-driven code generation style."
|
|
181
|
+
task :ragel_java => [:ragel_version] do
|
|
182
|
+
if @ragel_v >= 6.1
|
|
183
|
+
puts "compiling with ragel version #{@ragel_v}"
|
|
184
|
+
Dir.chdir("ext/hpricot_scan") do
|
|
185
|
+
sh %{ragel -J -o HpricotCss.java hpricot_css.java.rl}
|
|
186
|
+
sh %{ragel -J -o HpricotScanService.java hpricot_scan.java.rl}
|
|
187
|
+
end
|
|
188
|
+
else
|
|
189
|
+
STDERR.puts "Ragel 6.1 or greater is required."
|
|
190
|
+
exit(1)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
### JRuby Compile ###
|
|
195
|
+
|
|
196
|
+
def java_classpath_arg # myriad of ways to discover JRuby classpath
|
|
197
|
+
begin
|
|
198
|
+
cpath = Java::java.lang.System.getProperty('java.class.path').split(File::PATH_SEPARATOR)
|
|
199
|
+
cpath += Java::java.lang.System.getProperty('sun.boot.class.path').split(File::PATH_SEPARATOR)
|
|
200
|
+
jruby_cpath = cpath.compact.join(File::PATH_SEPARATOR)
|
|
201
|
+
rescue => e
|
|
202
|
+
end
|
|
203
|
+
unless jruby_cpath
|
|
204
|
+
jruby_cpath = ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] &&
|
|
205
|
+
FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"].join(File::PATH_SEPARATOR)
|
|
206
|
+
end
|
|
207
|
+
unless jruby_cpath || ENV['CLASSPATH'] =~ /jruby/
|
|
208
|
+
abort %{WARNING: No JRuby classpath has been set up.
|
|
209
|
+
Define JRUBY_HOME=/path/to/jruby on the command line or in the environment}
|
|
210
|
+
end
|
|
211
|
+
"-cp \"#{jruby_cpath}\""
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def compile_java(filenames, jarname)
|
|
215
|
+
sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}}
|
|
216
|
+
sh %{jar cf #{jarname} *.class}
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
task :hpricot_scan_java => [:ragel_java] do
|
|
220
|
+
Dir.chdir "ext/hpricot_scan" do
|
|
221
|
+
compile_java(["HpricotScanService.java", "HpricotCss.java"], "hpricot_scan.jar")
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
task :fast_xs_java do
|
|
226
|
+
Dir.chdir "ext/fast_xs" do
|
|
227
|
+
compile_java(["FastXsService.java"], "fast_xs.jar")
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
%w(hpricot_scan fast_xs).each do |ext|
|
|
232
|
+
file "lib/#{ext}.jar" => "#{ext}_java" do |t|
|
|
233
|
+
mv "ext/#{ext}/#{ext}.jar", "lib"
|
|
234
|
+
end
|
|
235
|
+
task :compile_java => "lib/#{ext}.jar"
|
|
236
|
+
end
|
|
237
|
+
|