webtranslateit-hpricot 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
data/extras/hpricot.png
ADDED
Binary file
|
data/hpricot.gemspec
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = %q{webtranslateit-hpricot}
|
3
|
+
s.version = "0.9.0"
|
4
|
+
|
5
|
+
s.authors = ["why the lucky stiff"]
|
6
|
+
s.date = %q{2012-01-17}
|
7
|
+
s.description = %q{a swift, liberal HTML parser with a fantastic library}
|
8
|
+
s.email = %q{why@ruby-lang.org}
|
9
|
+
s.license = "MIT"
|
10
|
+
s.extensions = ["ext/fast_xs/extconf.rb", "ext/hpricot_scan/extconf.rb"]
|
11
|
+
s.extra_rdoc_files = ["README.md", "CHANGELOG", "COPYING"]
|
12
|
+
s.files = %w(.gitignore CHANGELOG COPYING README.md Rakefile ext/fast_xs/FastXsService.java ext/fast_xs/extconf.rb ext/fast_xs/fast_xs.c ext/hpricot_scan/HpricotCss.java ext/hpricot_scan/HpricotScanService.java ext/hpricot_scan/MANIFEST ext/hpricot_scan/extconf.rb ext/hpricot_scan/hpricot_common.rl ext/hpricot_scan/hpricot_css.c ext/hpricot_scan/hpricot_css.java.rl ext/hpricot_scan/hpricot_css.rl ext/hpricot_scan/hpricot_scan.c ext/hpricot_scan/hpricot_scan.h ext/hpricot_scan/hpricot_scan.java.rl ext/hpricot_scan/hpricot_scan.rl extras/hpricot.png hpricot.gemspec lib/hpricot.rb lib/hpricot/blankslate.rb lib/hpricot/builder.rb lib/hpricot/elements.rb lib/hpricot/htmlinfo.rb lib/hpricot/inspect.rb lib/hpricot/modules.rb lib/hpricot/parse.rb lib/hpricot/tag.rb lib/hpricot/tags.rb lib/hpricot/traverse.rb lib/hpricot/xchar.rb setup.rb test/files/basic.xhtml test/files/boingboing.html test/files/cy0.html test/files/immob.html test/files/pace_application.html test/files/tenderlove.html test/files/uswebgen.html test/files/utf8.html test/files/week9.html test/files/why.xml test/load_files.rb test/nokogiri-bench.rb test/test_alter.rb test/test_builder.rb test/test_parser.rb test/test_paths.rb test/test_preserved.rb test/test_xml.rb)
|
13
|
+
s.has_rdoc = true
|
14
|
+
s.homepage = %q{http://wiki.github.com/hpricot/hpricot}
|
15
|
+
s.rdoc_options = ["--quiet", "--title", "The Hpricot Reference", "--main", "README", "--inline-source"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.summary = %q{a swift, liberal HTML parser with a fantastic library}
|
18
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#--
|
3
|
+
# Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
|
4
|
+
# All rights reserved.
|
5
|
+
|
6
|
+
# Permission is granted for use, copying, modification, distribution,
|
7
|
+
# and distribution of modified versions of this work as long as the
|
8
|
+
# above copyright notice is included.
|
9
|
+
#++
|
10
|
+
|
11
|
+
module Hpricot
|
12
|
+
|
13
|
+
# BlankSlate provides an abstract base class with no predefined
|
14
|
+
# methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
|
15
|
+
# BlankSlate is useful as a base class when writing classes that
|
16
|
+
# depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
|
17
|
+
class BlankSlate
|
18
|
+
class << self
|
19
|
+
|
20
|
+
# Hide the method named +name+ in the BlankSlate class. Don't
|
21
|
+
# hide +instance_eval+ or any method beginning with "__".
|
22
|
+
def hide(name)
|
23
|
+
undef_method name if
|
24
|
+
instance_methods.include?(name.to_s) and
|
25
|
+
name !~ /^(__|instance_eval)/
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
instance_methods.each { |m| hide(m) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Since Ruby is very dynamic, methods added to the ancestors of
|
34
|
+
# BlankSlate <em>after BlankSlate is defined</em> will show up in the
|
35
|
+
# list of available BlankSlate methods. We handle this by defining a
|
36
|
+
# hook in the Object and Kernel classes that will hide any defined
|
37
|
+
module Kernel
|
38
|
+
class << self
|
39
|
+
alias_method :hpricot_slate_method_added, :method_added
|
40
|
+
|
41
|
+
# Detect method additions to Kernel and remove them in the
|
42
|
+
# BlankSlate class.
|
43
|
+
def method_added(name)
|
44
|
+
hpricot_slate_method_added(name)
|
45
|
+
return if self != Kernel
|
46
|
+
Hpricot::BlankSlate.hide(name)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Object
|
52
|
+
class << self
|
53
|
+
alias_method :hpricot_slate_method_added, :method_added
|
54
|
+
|
55
|
+
# Detect method additions to Object and remove them in the
|
56
|
+
# BlankSlate class.
|
57
|
+
def method_added(name)
|
58
|
+
hpricot_slate_method_added(name)
|
59
|
+
return if self != Object
|
60
|
+
Hpricot::BlankSlate.hide(name)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
require 'hpricot/tags'
|
2
|
+
require 'fast_xs'
|
3
|
+
require 'hpricot/blankslate'
|
4
|
+
require 'hpricot/htmlinfo'
|
5
|
+
|
6
|
+
module Hpricot
|
7
|
+
# XML unescape
|
8
|
+
def self.uxs(str)
|
9
|
+
str.to_s.
|
10
|
+
gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
|
11
|
+
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
|
12
|
+
gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.build(ele = Doc.new, assigns = {}, &blk)
|
16
|
+
ele.extend Builder
|
17
|
+
assigns.each do |k, v|
|
18
|
+
ele.instance_variable_set("@#{k}", v)
|
19
|
+
end
|
20
|
+
ele.instance_eval(&blk)
|
21
|
+
ele
|
22
|
+
end
|
23
|
+
|
24
|
+
module Builder
|
25
|
+
|
26
|
+
@@default = {
|
27
|
+
:indent => 0,
|
28
|
+
:output_helpers => true,
|
29
|
+
:output_xml_instruction => true,
|
30
|
+
:output_meta_tag => true,
|
31
|
+
:auto_validation => true,
|
32
|
+
:tagset => Hpricot::XHTMLTransitional,
|
33
|
+
:root_attributes => {
|
34
|
+
:xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
def self.set(option, value)
|
39
|
+
@@default[option] = value
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_child ele
|
43
|
+
ele.parent = self
|
44
|
+
self.children ||= []
|
45
|
+
self.children << ele
|
46
|
+
ele
|
47
|
+
end
|
48
|
+
|
49
|
+
# Write a +string+ to the HTML stream, making sure to escape it.
|
50
|
+
def text!(string)
|
51
|
+
add_child Text.new(string.fast_xs)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Write a +string+ to the HTML stream without escaping it.
|
55
|
+
def text(string)
|
56
|
+
add_child Text.new(string)
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
alias_method :<<, :text
|
60
|
+
alias_method :concat, :text
|
61
|
+
|
62
|
+
# Create a tag named +tag+. Other than the first argument which is the tag name,
|
63
|
+
# the arguments are the same as the tags implemented via method_missing.
|
64
|
+
def tag!(tag, *args, &block)
|
65
|
+
ele_id = nil
|
66
|
+
if @auto_validation and @tagset
|
67
|
+
if !@tagset.tagset.has_key?(tag)
|
68
|
+
raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
|
69
|
+
elsif args.last.respond_to?(:to_hash)
|
70
|
+
attrs = args.last.to_hash
|
71
|
+
|
72
|
+
if @tagset.forms.include?(tag) and attrs[:id]
|
73
|
+
attrs[:name] ||= attrs[:id]
|
74
|
+
end
|
75
|
+
|
76
|
+
attrs.each do |k, v|
|
77
|
+
atname = k.to_s.downcase.intern
|
78
|
+
unless k =~ /:/ or @tagset.tagset[tag].include? atname
|
79
|
+
raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
|
80
|
+
end
|
81
|
+
if atname == :id
|
82
|
+
ele_id = v.to_s
|
83
|
+
if @elements.has_key? ele_id
|
84
|
+
raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# turn arguments into children or attributes
|
92
|
+
childs = []
|
93
|
+
attrs = args.grep(Hash)
|
94
|
+
childs.concat((args - attrs).flatten.map do |x|
|
95
|
+
if x.respond_to? :to_html
|
96
|
+
Hpricot.make(x.to_html)
|
97
|
+
elsif x
|
98
|
+
Text.new(x.fast_xs)
|
99
|
+
end
|
100
|
+
end.flatten)
|
101
|
+
attrs = attrs.inject({}) do |hsh, ath|
|
102
|
+
ath.each do |k, v|
|
103
|
+
hsh[k] = v.to_s.fast_xs if v
|
104
|
+
end
|
105
|
+
hsh
|
106
|
+
end
|
107
|
+
|
108
|
+
# create the element itself
|
109
|
+
tag = tag.to_s
|
110
|
+
f = Elem.new(tag, attrs, childs, ETag.new(tag))
|
111
|
+
|
112
|
+
# build children from the block
|
113
|
+
if block
|
114
|
+
build(f, &block)
|
115
|
+
end
|
116
|
+
|
117
|
+
add_child f
|
118
|
+
f
|
119
|
+
end
|
120
|
+
|
121
|
+
def build(*a, &b)
|
122
|
+
Hpricot.build(*a, &b)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
|
126
|
+
# to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
|
127
|
+
# for this method.
|
128
|
+
#
|
129
|
+
# If the @auto_validation setting is on, this method will check for many common mistakes which
|
130
|
+
# could lead to invalid XHTML.
|
131
|
+
def html_tag(sym, *args, &block)
|
132
|
+
if @auto_validation and @tagset.self_closing.include?(sym) and block
|
133
|
+
raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
|
134
|
+
elsif args.empty? and block.nil?
|
135
|
+
CssProxy.new(self, sym)
|
136
|
+
else
|
137
|
+
tag!(sym, *args, &block)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
XHTMLTransitional.tags.each do |k|
|
142
|
+
class_eval %{
|
143
|
+
def #{k}(*args, &block)
|
144
|
+
html_tag(#{k.inspect}, *args, &block)
|
145
|
+
end
|
146
|
+
}
|
147
|
+
end
|
148
|
+
|
149
|
+
def doctype(target, pub, sys)
|
150
|
+
add_child DocType.new(target, pub, sys)
|
151
|
+
end
|
152
|
+
|
153
|
+
remove_method :head
|
154
|
+
|
155
|
+
# Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
|
156
|
+
# set to <tt>text/html; charset=utf-8</tt>.
|
157
|
+
def head(*args, &block)
|
158
|
+
tag!(:head, *args) do
|
159
|
+
tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
|
160
|
+
instance_eval(&block)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
|
165
|
+
# are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
|
166
|
+
# :lang => "en"</tt>.
|
167
|
+
def xhtml_transitional(attrs = {}, &block)
|
168
|
+
# self.tagset = Hpricot::XHTMLTransitional
|
169
|
+
xhtml_html(attrs, &block)
|
170
|
+
end
|
171
|
+
|
172
|
+
# Builds an html tag with XHTML 1.0 Strict doctype instead.
|
173
|
+
def xhtml_strict(attrs = {}, &block)
|
174
|
+
# self.tagset = Hpricot::XHTMLStrict
|
175
|
+
xhtml_html(attrs, &block)
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
def xhtml_html(attrs = {}, &block)
|
181
|
+
instruct! if @output_xml_instruction
|
182
|
+
doctype(:html, *@@default[:tagset].doctype)
|
183
|
+
tag!(:html, @@default[:root_attributes].merge(attrs), &block)
|
184
|
+
end
|
185
|
+
|
186
|
+
end
|
187
|
+
|
188
|
+
# Class used by Markaby::Builder to store element options. Methods called
|
189
|
+
# against the CssProxy object are added as element classes or IDs.
|
190
|
+
#
|
191
|
+
# See the README for examples.
|
192
|
+
class CssProxy < BlankSlate
|
193
|
+
|
194
|
+
# Creates a CssProxy object.
|
195
|
+
def initialize(builder, sym)
|
196
|
+
@builder, @sym, @attrs = builder, sym, {}
|
197
|
+
end
|
198
|
+
|
199
|
+
# Adds attributes to an element. Bang methods set the :id attribute.
|
200
|
+
# Other methods add to the :class attribute.
|
201
|
+
def method_missing(id_or_class, *args, &block)
|
202
|
+
if (idc = id_or_class.to_s) =~ /!$/
|
203
|
+
@attrs[:id] = $`
|
204
|
+
else
|
205
|
+
@attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
|
206
|
+
end
|
207
|
+
|
208
|
+
if block or args.any?
|
209
|
+
args.push(@attrs)
|
210
|
+
return @builder.tag!(@sym, *args, &block)
|
211
|
+
end
|
212
|
+
|
213
|
+
return self
|
214
|
+
end
|
215
|
+
|
216
|
+
end
|
217
|
+
end
|