hpricot 0.5-mswin32 → 0.6-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -1
- data/README +4 -1
- data/Rakefile +111 -75
- data/ext/hpricot_scan/HpricotScanService.java +1340 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_scan.c +2435 -2181
- data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
- data/ext/hpricot_scan/hpricot_scan.rl +3 -70
- data/lib/hpricot.rb +1 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +200 -0
- data/lib/hpricot/elements.rb +143 -32
- data/lib/hpricot/inspect.rb +7 -3
- data/lib/hpricot/parse.rb +128 -101
- data/lib/hpricot/tag.rb +23 -15
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +93 -33
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/i686-linux/hpricot_scan.so +0 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +1 -1
- data/test/test_alter.rb +65 -0
- data/test/test_builder.rb +24 -0
- data/test/test_parser.rb +92 -4
- data/test/test_preserved.rb +20 -0
- data/test/test_xml.rb +13 -0
- metadata +34 -14
- data/lib/hpricot/text.rb +0 -115
- data/lib/hpricot_scan.so +0 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#--
|
3
|
+
# Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
|
4
|
+
# All rights reserved.
|
5
|
+
|
6
|
+
# Permission is granted for use, copying, modification, distribution,
|
7
|
+
# and distribution of modified versions of this work as long as the
|
8
|
+
# above copyright notice is included.
|
9
|
+
#++
|
10
|
+
|
11
|
+
module Hpricot
|
12
|
+
|
13
|
+
# BlankSlate provides an abstract base class with no predefined
|
14
|
+
# methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
|
15
|
+
# BlankSlate is useful as a base class when writing classes that
|
16
|
+
# depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
|
17
|
+
class BlankSlate
|
18
|
+
class << self
|
19
|
+
|
20
|
+
# Hide the method named +name+ in the BlankSlate class. Don't
|
21
|
+
# hide +instance_eval+ or any method beginning with "__".
|
22
|
+
def hide(name)
|
23
|
+
undef_method name if
|
24
|
+
instance_methods.include?(name.to_s) and
|
25
|
+
name !~ /^(__|instance_eval)/
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
instance_methods.each { |m| hide(m) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Since Ruby is very dynamic, methods added to the ancestors of
|
34
|
+
# BlankSlate <em>after BlankSlate is defined</em> will show up in the
|
35
|
+
# list of available BlankSlate methods. We handle this by defining a
|
36
|
+
# hook in the Object and Kernel classes that will hide any defined
|
37
|
+
module Kernel
|
38
|
+
class << self
|
39
|
+
alias_method :hpricot_slate_method_added, :method_added
|
40
|
+
|
41
|
+
# Detect method additions to Kernel and remove them in the
|
42
|
+
# BlankSlate class.
|
43
|
+
def method_added(name)
|
44
|
+
hpricot_slate_method_added(name)
|
45
|
+
return if self != Kernel
|
46
|
+
Hpricot::BlankSlate.hide(name)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Object
|
52
|
+
class << self
|
53
|
+
alias_method :hpricot_slate_method_added, :method_added
|
54
|
+
|
55
|
+
# Detect method additions to Object and remove them in the
|
56
|
+
# BlankSlate class.
|
57
|
+
def method_added(name)
|
58
|
+
hpricot_slate_method_added(name)
|
59
|
+
return if self != Object
|
60
|
+
Hpricot::BlankSlate.hide(name)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'hpricot/tags'
|
2
|
+
require 'hpricot/xchar'
|
3
|
+
require 'hpricot/blankslate'
|
4
|
+
|
5
|
+
module Hpricot
|
6
|
+
def self.build(ele = Doc.new, assigns = {}, &blk)
|
7
|
+
ele.extend Builder
|
8
|
+
assigns.each do |k, v|
|
9
|
+
ele.instance_variable_set("@#{k}", v)
|
10
|
+
end
|
11
|
+
ele.instance_eval &blk
|
12
|
+
ele
|
13
|
+
end
|
14
|
+
|
15
|
+
module Builder
|
16
|
+
|
17
|
+
@@default = {
|
18
|
+
:indent => 0,
|
19
|
+
:output_helpers => true,
|
20
|
+
:output_xml_instruction => true,
|
21
|
+
:output_meta_tag => true,
|
22
|
+
:auto_validation => true,
|
23
|
+
:tagset => Hpricot::XHTMLTransitional,
|
24
|
+
:root_attributes => {
|
25
|
+
:xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def self.set(option, value)
|
30
|
+
@@default[option] = value
|
31
|
+
end
|
32
|
+
|
33
|
+
# Write a +string+ to the HTML stream, making sure to escape it.
|
34
|
+
def text!(string)
|
35
|
+
@children << Text.new(Hpricot.xs(string))
|
36
|
+
end
|
37
|
+
|
38
|
+
# Write a +string+ to the HTML stream without escaping it.
|
39
|
+
def text(string)
|
40
|
+
@children << Text.new(string)
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
alias_method :<<, :text
|
44
|
+
alias_method :concat, :text
|
45
|
+
|
46
|
+
# Create a tag named +tag+. Other than the first argument which is the tag name,
|
47
|
+
# the arguments are the same as the tags implemented via method_missing.
|
48
|
+
def tag!(tag, *args, &block)
|
49
|
+
ele_id = nil
|
50
|
+
if @auto_validation and @tagset
|
51
|
+
if !@tagset.tagset.has_key?(tag)
|
52
|
+
raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
|
53
|
+
elsif args.last.respond_to?(:to_hash)
|
54
|
+
attrs = args.last.to_hash
|
55
|
+
|
56
|
+
if @tagset.forms.include?(tag) and attrs[:id]
|
57
|
+
attrs[:name] ||= attrs[:id]
|
58
|
+
end
|
59
|
+
|
60
|
+
attrs.each do |k, v|
|
61
|
+
atname = k.to_s.downcase.intern
|
62
|
+
unless k =~ /:/ or @tagset.tagset[tag].include? atname
|
63
|
+
raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
|
64
|
+
end
|
65
|
+
if atname == :id
|
66
|
+
ele_id = v.to_s
|
67
|
+
if @elements.has_key? ele_id
|
68
|
+
raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# turn arguments into children or attributes
|
76
|
+
childs = []
|
77
|
+
attrs = args.grep(Hash)
|
78
|
+
childs.concat((args - attrs).map do |x|
|
79
|
+
if x.respond_to? :to_html
|
80
|
+
Hpricot.make(x.to_html)
|
81
|
+
elsif x
|
82
|
+
Text.new(Hpricot.xs(x))
|
83
|
+
end
|
84
|
+
end.flatten)
|
85
|
+
attrs = attrs.inject({}) do |hsh, ath|
|
86
|
+
ath.each do |k, v|
|
87
|
+
hsh[k] = Hpricot.xs(v.to_s) if v
|
88
|
+
end
|
89
|
+
hsh
|
90
|
+
end
|
91
|
+
|
92
|
+
# create the element itself
|
93
|
+
f = Elem.new(STag.new(tag, attrs), childs, ETag.new(tag))
|
94
|
+
|
95
|
+
# build children from the block
|
96
|
+
if block
|
97
|
+
build(f, &block)
|
98
|
+
end
|
99
|
+
|
100
|
+
@children << f
|
101
|
+
f
|
102
|
+
end
|
103
|
+
|
104
|
+
def build(*a, &b)
|
105
|
+
Hpricot.build(*a, &b)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
|
109
|
+
# to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
|
110
|
+
# for this method.
|
111
|
+
#
|
112
|
+
# If the @auto_validation setting is on, this method will check for many common mistakes which
|
113
|
+
# could lead to invalid XHTML.
|
114
|
+
def html_tag(sym, *args, &block)
|
115
|
+
if @auto_validation and @tagset.self_closing.include?(sym) and block
|
116
|
+
raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
|
117
|
+
elsif args.empty? and block.nil?
|
118
|
+
CssProxy.new(self, sym)
|
119
|
+
else
|
120
|
+
tag!(sym, *args, &block)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
XHTMLTransitional.tags.each do |k|
|
125
|
+
class_eval %{
|
126
|
+
def #{k}(*args, &block)
|
127
|
+
html_tag(#{k.inspect}, *args, &block)
|
128
|
+
end
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def doctype(target, pub, sys)
|
133
|
+
@children << DocType.new(target, pub, sys)
|
134
|
+
end
|
135
|
+
|
136
|
+
remove_method :head
|
137
|
+
|
138
|
+
# Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
|
139
|
+
# set to <tt>text/html; charset=utf-8</tt>.
|
140
|
+
def head(*args, &block)
|
141
|
+
tag!(:head, *args) do
|
142
|
+
tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
|
143
|
+
instance_eval(&block)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
|
148
|
+
# are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
|
149
|
+
# :lang => "en"</tt>.
|
150
|
+
def xhtml_transitional(attrs = {}, &block)
|
151
|
+
# self.tagset = Hpricot::XHTMLTransitional
|
152
|
+
xhtml_html(attrs, &block)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Builds an html tag with XHTML 1.0 Strict doctype instead.
|
156
|
+
def xhtml_strict(attrs = {}, &block)
|
157
|
+
# self.tagset = Hpricot::XHTMLStrict
|
158
|
+
xhtml_html(attrs, &block)
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
def xhtml_html(attrs = {}, &block)
|
164
|
+
instruct! if @output_xml_instruction
|
165
|
+
doctype(:html, *@@default[:tagset].doctype)
|
166
|
+
tag!(:html, @@default[:root_attributes].merge(attrs), &block)
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
# Class used by Markaby::Builder to store element options. Methods called
|
172
|
+
# against the CssProxy object are added as element classes or IDs.
|
173
|
+
#
|
174
|
+
# See the README for examples.
|
175
|
+
class CssProxy < BlankSlate
|
176
|
+
|
177
|
+
# Creates a CssProxy object.
|
178
|
+
def initialize(builder, sym)
|
179
|
+
@builder, @sym, @attrs = builder, sym, {}
|
180
|
+
end
|
181
|
+
|
182
|
+
# Adds attributes to an element. Bang methods set the :id attribute.
|
183
|
+
# Other methods add to the :class attribute.
|
184
|
+
def method_missing(id_or_class, *args, &block)
|
185
|
+
if (idc = id_or_class.to_s) =~ /!$/
|
186
|
+
@attrs[:id] = $`
|
187
|
+
else
|
188
|
+
@attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
|
189
|
+
end
|
190
|
+
|
191
|
+
if block or args.any?
|
192
|
+
args.push(@attrs)
|
193
|
+
return @builder.tag!(@sym, *args, &block)
|
194
|
+
end
|
195
|
+
|
196
|
+
return self
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
end
|
data/lib/hpricot/elements.rb
CHANGED
@@ -50,6 +50,7 @@ module Hpricot
|
|
50
50
|
# Most of the useful element methods are in the mixins Hpricot::Traverse
|
51
51
|
# and Hpricot::Container::Trav.
|
52
52
|
class Elements < Array
|
53
|
+
|
53
54
|
# Searches this list for any elements (or children of these elements) matching
|
54
55
|
# the CSS or XPath expression +expr+. Root is assumed to be the element scanned.
|
55
56
|
#
|
@@ -128,26 +129,26 @@ module Hpricot
|
|
128
129
|
|
129
130
|
# Add to the end of the contents inside each element in this list.
|
130
131
|
# Pass in an HTML +str+, which is turned into Hpricot elements.
|
131
|
-
def append(str)
|
132
|
-
each { |x| x.
|
132
|
+
def append(str = nil, &blk)
|
133
|
+
each { |x| x.html(x.children + Hpricot.make(str, &blk)) }
|
133
134
|
end
|
134
135
|
|
135
136
|
# Add to the start of the contents inside each element in this list.
|
136
137
|
# Pass in an HTML +str+, which is turned into Hpricot elements.
|
137
|
-
def prepend(str)
|
138
|
-
each { |x| x.
|
138
|
+
def prepend(str = nil, &blk)
|
139
|
+
each { |x| x.html(Hpricot.make(str, &blk) + x.children) }
|
139
140
|
end
|
140
141
|
|
141
142
|
# Add some HTML just previous to each element in this list.
|
142
143
|
# Pass in an HTML +str+, which is turned into Hpricot elements.
|
143
|
-
def before(str)
|
144
|
-
each { |x| x.parent.insert_before Hpricot.make(str), x }
|
144
|
+
def before(str = nil, &blk)
|
145
|
+
each { |x| x.parent.insert_before Hpricot.make(str, &blk), x }
|
145
146
|
end
|
146
147
|
|
147
148
|
# Just after each element in this list, add some HTML.
|
148
149
|
# Pass in an HTML +str+, which is turned into Hpricot elements.
|
149
|
-
def after(str)
|
150
|
-
each { |x| x.parent.insert_after Hpricot.make(str), x }
|
150
|
+
def after(str = nil, &blk)
|
151
|
+
each { |x| x.parent.insert_after Hpricot.make(str, &blk), x }
|
151
152
|
end
|
152
153
|
|
153
154
|
# Wraps each element in the list inside the element created by HTML +str+.
|
@@ -158,49 +159,117 @@ module Hpricot
|
|
158
159
|
# wrap(%{<div class="link"><div class="link_inner"></div></div>})
|
159
160
|
#
|
160
161
|
# This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest.
|
161
|
-
def wrap(str)
|
162
|
+
def wrap(str = nil, &blk)
|
162
163
|
each do |x|
|
163
|
-
wrap = Hpricot.make(str)
|
164
|
+
wrap = Hpricot.make(str, &blk)
|
164
165
|
nest = wrap.detect { |w| w.respond_to? :children }
|
165
166
|
unless nest
|
166
167
|
raise Exception, "No wrapping element found."
|
167
168
|
end
|
168
169
|
x.parent.replace_child(x, wrap)
|
169
170
|
nest = nest.children.first until nest.empty?
|
170
|
-
nest.children
|
171
|
+
nest.html(nest.children + [x])
|
171
172
|
end
|
172
173
|
end
|
173
174
|
|
174
|
-
#
|
175
|
-
# a simple pair (<em>attribute name</em>, <em>attribute value</em>):
|
175
|
+
# Gets and sets attributes on all matched elements.
|
176
176
|
#
|
177
|
-
#
|
177
|
+
# Pass in a +key+ on its own and this method will return the string value
|
178
|
+
# assigned to that attribute for the first elements. Or +nil+ if the
|
179
|
+
# attribute isn't found.
|
178
180
|
#
|
179
|
-
#
|
181
|
+
# doc.search("a").attr("href")
|
182
|
+
# #=> "http://hacketyhack.net/"
|
180
183
|
#
|
181
|
-
#
|
184
|
+
# Or, pass in a +key+ and +value+. This will set an attribute for all
|
185
|
+
# matched elements.
|
182
186
|
#
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
187
|
+
# doc.search("p").attr("class", "basic")
|
188
|
+
#
|
189
|
+
# You may also use a Hash to set a series of attributes:
|
190
|
+
#
|
191
|
+
# (doc/"a").attr(:class => "basic", :href => "http://hackety.org/")
|
192
|
+
#
|
193
|
+
# Lastly, a block can be used to rewrite an attribute based on the element
|
194
|
+
# it belongs to. The block will pass in an element. Return from the block
|
195
|
+
# the new value of the attribute.
|
196
|
+
#
|
197
|
+
# records.attr("href") { |e| e['href'] + "#top" }
|
198
|
+
#
|
199
|
+
# This example adds a <tt>#top</tt> anchor to each link.
|
200
|
+
#
|
201
|
+
def attr key, value = nil, &blk
|
202
|
+
if value or blk
|
203
|
+
each do |el|
|
204
|
+
el.set_attribute(key, value || blk[el])
|
188
205
|
end
|
206
|
+
return self
|
207
|
+
end
|
208
|
+
if key.is_a? Hash
|
209
|
+
key.each { |k,v| self.attr(k,v) }
|
210
|
+
return self
|
189
211
|
else
|
190
|
-
|
191
|
-
|
212
|
+
return self[0].get_attribute(key)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
alias_method :set, :attr
|
216
|
+
|
217
|
+
# Adds the class to all matched elements.
|
218
|
+
#
|
219
|
+
# (doc/"p").add_class("bacon")
|
220
|
+
#
|
221
|
+
# Now all paragraphs will have class="bacon".
|
222
|
+
def add_class class_name
|
223
|
+
each do |el|
|
224
|
+
next unless el.respond_to? :get_attribute
|
225
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
226
|
+
el.set_attribute('class', classes.push(class_name).uniq.join(" "))
|
227
|
+
end
|
228
|
+
self
|
229
|
+
end
|
230
|
+
|
231
|
+
# Remove an attribute from each of the matched elements.
|
232
|
+
#
|
233
|
+
# (doc/"input").remove_attr("disabled")
|
234
|
+
#
|
235
|
+
def remove_attr name
|
236
|
+
each do |el|
|
237
|
+
next unless el.respond_to? :remove_attribute
|
238
|
+
el.remove_attribute(name)
|
239
|
+
end
|
240
|
+
self
|
241
|
+
end
|
242
|
+
|
243
|
+
# Removes a class from all matched elements.
|
244
|
+
#
|
245
|
+
# (doc/"span").remove_class("lightgrey")
|
246
|
+
#
|
247
|
+
# Or, to remove all classes:
|
248
|
+
#
|
249
|
+
# (doc/"span").remove_class
|
250
|
+
#
|
251
|
+
def remove_class name = nil
|
252
|
+
each do |el|
|
253
|
+
next unless el.respond_to? :get_attribute
|
254
|
+
if name
|
255
|
+
classes = el.get_attribute('class').to_s.split(" ")
|
256
|
+
el.set_attribute('class', (classes - [name]).uniq.join(" "))
|
257
|
+
else
|
258
|
+
el.remove_attribute("class")
|
192
259
|
end
|
193
260
|
end
|
261
|
+
self
|
194
262
|
end
|
195
263
|
|
196
|
-
ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([
|
197
|
-
BRACK_RE = %r!(\[) *([^\]]*) *\]
|
264
|
+
ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^\]'"]*)'?"? *\]!i
|
265
|
+
BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
|
198
266
|
FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
|
267
|
+
CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
|
199
268
|
CATCH_RE = %r!([:\.#]*)([a-zA-Z0-9\*_-]+)!
|
200
269
|
|
201
270
|
def self.filter(nodes, expr, truth = true)
|
202
271
|
until expr.empty?
|
203
|
-
_, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CATCH_RE})/)
|
272
|
+
_, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CUST_RE}|#{CATCH_RE})/)
|
204
273
|
break unless _
|
205
274
|
|
206
275
|
expr = $'
|
@@ -215,9 +284,15 @@ module Hpricot
|
|
215
284
|
|
216
285
|
if m[0] == ":" && m[1] == "not"
|
217
286
|
nodes, = Elements.filter(nodes, m[2], false)
|
287
|
+
elsif "#{m[0]}#{m[1]}" =~ /^(:even|:odd)$/
|
288
|
+
new_nodes = []
|
289
|
+
nodes.each_with_index {|n,i| new_nodes.push(n) if (i % 2 == (m[1] == "even" ? 0 : 1)) }
|
290
|
+
nodes = new_nodes
|
291
|
+
elsif "#{m[0]}#{m[1]}" =~ /^(:first|:last)$/
|
292
|
+
nodes = [nodes.send(m[1])]
|
218
293
|
else
|
219
|
-
meth = "filter[#{m[0]}#{m[1]}]"
|
220
|
-
if Traverse.method_defined? meth
|
294
|
+
meth = "filter[#{m[0]}#{m[1]}]" unless m[0].empty?
|
295
|
+
if meth and Traverse.method_defined? meth
|
221
296
|
args = m[2..-1]
|
222
297
|
else
|
223
298
|
meth = "filter[#{m[0]}]"
|
@@ -235,6 +310,40 @@ module Hpricot
|
|
235
310
|
[nodes, expr]
|
236
311
|
end
|
237
312
|
|
313
|
+
# Given two elements, attempt to gather an Elements array of everything between
|
314
|
+
# (and including) those two elements.
|
315
|
+
def self.expand(ele1, ele2, excl=false)
|
316
|
+
ary = []
|
317
|
+
offset = excl ? -1 : 0
|
318
|
+
|
319
|
+
if ele1 and ele2
|
320
|
+
# let's quickly take care of siblings
|
321
|
+
if ele1.parent == ele2.parent
|
322
|
+
ary = ele1.parent.children[ele1.node_position..(ele2.node_position+offset)]
|
323
|
+
else
|
324
|
+
# find common parent
|
325
|
+
p, ele1_p = ele1, [ele1]
|
326
|
+
ele1_p.unshift p while p.respond_to?(:parent) and p = p.parent
|
327
|
+
p, ele2_p = ele2, [ele2]
|
328
|
+
ele2_p.unshift p while p.respond_to?(:parent) and p = p.parent
|
329
|
+
common_parent = ele1_p.zip(ele2_p).select { |p1, p2| p1 == p2 }.flatten.last
|
330
|
+
|
331
|
+
child = nil
|
332
|
+
if ele1 == common_parent
|
333
|
+
child = ele2
|
334
|
+
elsif ele2 == common_parent
|
335
|
+
child = ele1
|
336
|
+
end
|
337
|
+
|
338
|
+
if child
|
339
|
+
ary = common_parent.children[0..(child.node_position+offset)]
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
return Elements[*ary]
|
345
|
+
end
|
346
|
+
|
238
347
|
def filter(expr)
|
239
348
|
nodes, = Elements.filter(self, expr)
|
240
349
|
nodes
|
@@ -311,9 +420,9 @@ module Hpricot
|
|
311
420
|
|
312
421
|
filter ':nth-child' do |arg,i|
|
313
422
|
case arg
|
314
|
-
when 'even'; parent.containers.index(self) % 2 == 0
|
315
|
-
when 'odd'; parent.containers.index(self) % 2 == 1
|
316
|
-
else self == parent.containers[arg.to_i]
|
423
|
+
when 'even'; (parent.containers.index(self) + 1) % 2 == 0
|
424
|
+
when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
|
425
|
+
else self == (parent.containers[arg.to_i + 1])
|
317
426
|
end
|
318
427
|
end
|
319
428
|
|
@@ -357,9 +466,11 @@ module Hpricot
|
|
357
466
|
self.comment?
|
358
467
|
end
|
359
468
|
|
360
|
-
filter :contains do |arg
|
469
|
+
filter :contains do |arg, ignore|
|
361
470
|
html.include? arg
|
362
471
|
end
|
472
|
+
|
473
|
+
|
363
474
|
|
364
475
|
pred_procs =
|
365
476
|
{'text()' => proc { |ele, *_| ele.inner_text.strip },
|