hpricot 0.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
4
+ # All rights reserved.
5
+
6
+ # Permission is granted for use, copying, modification, distribution,
7
+ # and distribution of modified versions of this work as long as the
8
+ # above copyright notice is included.
9
+ #++
10
+
11
+ module Hpricot
12
+
13
+ # BlankSlate provides an abstract base class with no predefined
14
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
15
+ # BlankSlate is useful as a base class when writing classes that
16
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
17
+ class BlankSlate
18
+ class << self
19
+
20
+ # Hide the method named +name+ in the BlankSlate class. Don't
21
+ # hide +instance_eval+ or any method beginning with "__".
22
+ def hide(name)
23
+ undef_method name if
24
+ instance_methods.include?(name.to_s) and
25
+ name !~ /^(__|instance_eval)/
26
+ end
27
+ end
28
+
29
+ instance_methods.each { |m| hide(m) }
30
+ end
31
+ end
32
+
33
+ # Since Ruby is very dynamic, methods added to the ancestors of
34
+ # BlankSlate <em>after BlankSlate is defined</em> will show up in the
35
+ # list of available BlankSlate methods. We handle this by defining a
36
+ # hook in the Object and Kernel classes that will hide any defined
37
+ module Kernel
38
+ class << self
39
+ alias_method :hpricot_slate_method_added, :method_added
40
+
41
+ # Detect method additions to Kernel and remove them in the
42
+ # BlankSlate class.
43
+ def method_added(name)
44
+ hpricot_slate_method_added(name)
45
+ return if self != Kernel
46
+ Hpricot::BlankSlate.hide(name)
47
+ end
48
+ end
49
+ end
50
+
51
+ class Object
52
+ class << self
53
+ alias_method :hpricot_slate_method_added, :method_added
54
+
55
+ # Detect method additions to Object and remove them in the
56
+ # BlankSlate class.
57
+ def method_added(name)
58
+ hpricot_slate_method_added(name)
59
+ return if self != Object
60
+ Hpricot::BlankSlate.hide(name)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,200 @@
1
+ require 'hpricot/tags'
2
+ require 'hpricot/xchar'
3
+ require 'hpricot/blankslate'
4
+
5
+ module Hpricot
6
+ def self.build(ele = Doc.new, assigns = {}, &blk)
7
+ ele.extend Builder
8
+ assigns.each do |k, v|
9
+ ele.instance_variable_set("@#{k}", v)
10
+ end
11
+ ele.instance_eval &blk
12
+ ele
13
+ end
14
+
15
+ module Builder
16
+
17
+ @@default = {
18
+ :indent => 0,
19
+ :output_helpers => true,
20
+ :output_xml_instruction => true,
21
+ :output_meta_tag => true,
22
+ :auto_validation => true,
23
+ :tagset => Hpricot::XHTMLTransitional,
24
+ :root_attributes => {
25
+ :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
26
+ }
27
+ }
28
+
29
+ def self.set(option, value)
30
+ @@default[option] = value
31
+ end
32
+
33
+ # Write a +string+ to the HTML stream, making sure to escape it.
34
+ def text!(string)
35
+ @children << Text.new(Hpricot.xs(string))
36
+ end
37
+
38
+ # Write a +string+ to the HTML stream without escaping it.
39
+ def text(string)
40
+ @children << Text.new(string)
41
+ nil
42
+ end
43
+ alias_method :<<, :text
44
+ alias_method :concat, :text
45
+
46
+ # Create a tag named +tag+. Other than the first argument which is the tag name,
47
+ # the arguments are the same as the tags implemented via method_missing.
48
+ def tag!(tag, *args, &block)
49
+ ele_id = nil
50
+ if @auto_validation and @tagset
51
+ if !@tagset.tagset.has_key?(tag)
52
+ raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
53
+ elsif args.last.respond_to?(:to_hash)
54
+ attrs = args.last.to_hash
55
+
56
+ if @tagset.forms.include?(tag) and attrs[:id]
57
+ attrs[:name] ||= attrs[:id]
58
+ end
59
+
60
+ attrs.each do |k, v|
61
+ atname = k.to_s.downcase.intern
62
+ unless k =~ /:/ or @tagset.tagset[tag].include? atname
63
+ raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
64
+ end
65
+ if atname == :id
66
+ ele_id = v.to_s
67
+ if @elements.has_key? ele_id
68
+ raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ # turn arguments into children or attributes
76
+ childs = []
77
+ attrs = args.grep(Hash)
78
+ childs.concat((args - attrs).map do |x|
79
+ if x.respond_to? :to_html
80
+ Hpricot.make(x.to_html)
81
+ elsif x
82
+ Text.new(Hpricot.xs(x))
83
+ end
84
+ end.flatten)
85
+ attrs = attrs.inject({}) do |hsh, ath|
86
+ ath.each do |k, v|
87
+ hsh[k] = Hpricot.xs(v.to_s) if v
88
+ end
89
+ hsh
90
+ end
91
+
92
+ # create the element itself
93
+ f = Elem.new(STag.new(tag, attrs), childs, ETag.new(tag))
94
+
95
+ # build children from the block
96
+ if block
97
+ build(f, &block)
98
+ end
99
+
100
+ @children << f
101
+ f
102
+ end
103
+
104
+ def build(*a, &b)
105
+ Hpricot.build(*a, &b)
106
+ end
107
+
108
+ # Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
109
+ # to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
110
+ # for this method.
111
+ #
112
+ # If the @auto_validation setting is on, this method will check for many common mistakes which
113
+ # could lead to invalid XHTML.
114
+ def html_tag(sym, *args, &block)
115
+ if @auto_validation and @tagset.self_closing.include?(sym) and block
116
+ raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
117
+ elsif args.empty? and block.nil?
118
+ CssProxy.new(self, sym)
119
+ else
120
+ tag!(sym, *args, &block)
121
+ end
122
+ end
123
+
124
+ XHTMLTransitional.tags.each do |k|
125
+ class_eval %{
126
+ def #{k}(*args, &block)
127
+ html_tag(#{k.inspect}, *args, &block)
128
+ end
129
+ }
130
+ end
131
+
132
+ def doctype(target, pub, sys)
133
+ @children << DocType.new(target, pub, sys)
134
+ end
135
+
136
+ remove_method :head
137
+
138
+ # Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
139
+ # set to <tt>text/html; charset=utf-8</tt>.
140
+ def head(*args, &block)
141
+ tag!(:head, *args) do
142
+ tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
143
+ instance_eval(&block)
144
+ end
145
+ end
146
+
147
+ # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
148
+ # are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
149
+ # :lang => "en"</tt>.
150
+ def xhtml_transitional(attrs = {}, &block)
151
+ # self.tagset = Hpricot::XHTMLTransitional
152
+ xhtml_html(attrs, &block)
153
+ end
154
+
155
+ # Builds an html tag with XHTML 1.0 Strict doctype instead.
156
+ def xhtml_strict(attrs = {}, &block)
157
+ # self.tagset = Hpricot::XHTMLStrict
158
+ xhtml_html(attrs, &block)
159
+ end
160
+
161
+ private
162
+
163
+ def xhtml_html(attrs = {}, &block)
164
+ instruct! if @output_xml_instruction
165
+ doctype(:html, *@@default[:tagset].doctype)
166
+ tag!(:html, @@default[:root_attributes].merge(attrs), &block)
167
+ end
168
+
169
+ end
170
+
171
+ # Class used by Markaby::Builder to store element options. Methods called
172
+ # against the CssProxy object are added as element classes or IDs.
173
+ #
174
+ # See the README for examples.
175
+ class CssProxy < BlankSlate
176
+
177
+ # Creates a CssProxy object.
178
+ def initialize(builder, sym)
179
+ @builder, @sym, @attrs = builder, sym, {}
180
+ end
181
+
182
+ # Adds attributes to an element. Bang methods set the :id attribute.
183
+ # Other methods add to the :class attribute.
184
+ def method_missing(id_or_class, *args, &block)
185
+ if (idc = id_or_class.to_s) =~ /!$/
186
+ @attrs[:id] = $`
187
+ else
188
+ @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
189
+ end
190
+
191
+ if block or args.any?
192
+ args.push(@attrs)
193
+ return @builder.tag!(@sym, *args, &block)
194
+ end
195
+
196
+ return self
197
+ end
198
+
199
+ end
200
+ end
@@ -50,6 +50,7 @@ module Hpricot
50
50
  # Most of the useful element methods are in the mixins Hpricot::Traverse
51
51
  # and Hpricot::Container::Trav.
52
52
  class Elements < Array
53
+
53
54
  # Searches this list for any elements (or children of these elements) matching
54
55
  # the CSS or XPath expression +expr+. Root is assumed to be the element scanned.
55
56
  #
@@ -128,26 +129,26 @@ module Hpricot
128
129
 
129
130
  # Add to the end of the contents inside each element in this list.
130
131
  # Pass in an HTML +str+, which is turned into Hpricot elements.
131
- def append(str)
132
- each { |x| x.inner_html += str }
132
+ def append(str = nil, &blk)
133
+ each { |x| x.html(x.children + Hpricot.make(str, &blk)) }
133
134
  end
134
135
 
135
136
  # Add to the start of the contents inside each element in this list.
136
137
  # Pass in an HTML +str+, which is turned into Hpricot elements.
137
- def prepend(str)
138
- each { |x| x.inner_html = str + x.inner_html }
138
+ def prepend(str = nil, &blk)
139
+ each { |x| x.html(Hpricot.make(str, &blk) + x.children) }
139
140
  end
140
141
 
141
142
  # Add some HTML just previous to each element in this list.
142
143
  # Pass in an HTML +str+, which is turned into Hpricot elements.
143
- def before(str)
144
- each { |x| x.parent.insert_before Hpricot.make(str), x }
144
+ def before(str = nil, &blk)
145
+ each { |x| x.parent.insert_before Hpricot.make(str, &blk), x }
145
146
  end
146
147
 
147
148
  # Just after each element in this list, add some HTML.
148
149
  # Pass in an HTML +str+, which is turned into Hpricot elements.
149
- def after(str)
150
- each { |x| x.parent.insert_after Hpricot.make(str), x }
150
+ def after(str = nil, &blk)
151
+ each { |x| x.parent.insert_after Hpricot.make(str, &blk), x }
151
152
  end
152
153
 
153
154
  # Wraps each element in the list inside the element created by HTML +str+.
@@ -158,49 +159,117 @@ module Hpricot
158
159
  # wrap(%{<div class="link"><div class="link_inner"></div></div>})
159
160
  #
160
161
  # This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest.
161
- def wrap(str)
162
+ def wrap(str = nil, &blk)
162
163
  each do |x|
163
- wrap = Hpricot.make(str)
164
+ wrap = Hpricot.make(str, &blk)
164
165
  nest = wrap.detect { |w| w.respond_to? :children }
165
166
  unless nest
166
167
  raise Exception, "No wrapping element found."
167
168
  end
168
169
  x.parent.replace_child(x, wrap)
169
170
  nest = nest.children.first until nest.empty?
170
- nest.children << x
171
+ nest.html(nest.children + [x])
171
172
  end
172
173
  end
173
174
 
174
- # Sets an attribute for all elements in this list. You may use
175
- # a simple pair (<em>attribute name</em>, <em>attribute value</em>):
175
+ # Gets and sets attributes on all matched elements.
176
176
  #
177
- # doc.search('p').set(:class, 'outline')
177
+ # Pass in a +key+ on its own and this method will return the string value
178
+ # assigned to that attribute for the first elements. Or +nil+ if the
179
+ # attribute isn't found.
178
180
  #
179
- # Or, use a hash of pairs:
181
+ # doc.search("a").attr("href")
182
+ # #=> "http://hacketyhack.net/"
180
183
  #
181
- # doc.search('div#sidebar').set(:class => 'outline', :id => 'topbar')
184
+ # Or, pass in a +key+ and +value+. This will set an attribute for all
185
+ # matched elements.
182
186
  #
183
- def set(k, v = nil)
184
- case k
185
- when Hash
186
- each do |node|
187
- k.each { |a,b| node.set_attribute(a, b) }
187
+ # doc.search("p").attr("class", "basic")
188
+ #
189
+ # You may also use a Hash to set a series of attributes:
190
+ #
191
+ # (doc/"a").attr(:class => "basic", :href => "http://hackety.org/")
192
+ #
193
+ # Lastly, a block can be used to rewrite an attribute based on the element
194
+ # it belongs to. The block will pass in an element. Return from the block
195
+ # the new value of the attribute.
196
+ #
197
+ # records.attr("href") { |e| e['href'] + "#top" }
198
+ #
199
+ # This example adds a <tt>#top</tt> anchor to each link.
200
+ #
201
+ def attr key, value = nil, &blk
202
+ if value or blk
203
+ each do |el|
204
+ el.set_attribute(key, value || blk[el])
188
205
  end
206
+ return self
207
+ end
208
+ if key.is_a? Hash
209
+ key.each { |k,v| self.attr(k,v) }
210
+ return self
189
211
  else
190
- each do |node|
191
- node.set_attribute(k, v)
212
+ return self[0].get_attribute(key)
213
+ end
214
+ end
215
+ alias_method :set, :attr
216
+
217
+ # Adds the class to all matched elements.
218
+ #
219
+ # (doc/"p").add_class("bacon")
220
+ #
221
+ # Now all paragraphs will have class="bacon".
222
+ def add_class class_name
223
+ each do |el|
224
+ next unless el.respond_to? :get_attribute
225
+ classes = el.get_attribute('class').to_s.split(" ")
226
+ el.set_attribute('class', classes.push(class_name).uniq.join(" "))
227
+ end
228
+ self
229
+ end
230
+
231
+ # Remove an attribute from each of the matched elements.
232
+ #
233
+ # (doc/"input").remove_attr("disabled")
234
+ #
235
+ def remove_attr name
236
+ each do |el|
237
+ next unless el.respond_to? :remove_attribute
238
+ el.remove_attribute(name)
239
+ end
240
+ self
241
+ end
242
+
243
+ # Removes a class from all matched elements.
244
+ #
245
+ # (doc/"span").remove_class("lightgrey")
246
+ #
247
+ # Or, to remove all classes:
248
+ #
249
+ # (doc/"span").remove_class
250
+ #
251
+ def remove_class name = nil
252
+ each do |el|
253
+ next unless el.respond_to? :get_attribute
254
+ if name
255
+ classes = el.get_attribute('class').to_s.split(" ")
256
+ el.set_attribute('class', (classes - [name]).uniq.join(" "))
257
+ else
258
+ el.remove_attribute("class")
192
259
  end
193
260
  end
261
+ self
194
262
  end
195
263
 
196
- ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i
197
- BRACK_RE = %r!(\[) *([^\]]*) *\]!i
264
+ ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^\]'"]*)'?"? *\]!i
265
+ BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
198
266
  FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
267
+ CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
199
268
  CATCH_RE = %r!([:\.#]*)([a-zA-Z0-9\*_-]+)!
200
269
 
201
270
  def self.filter(nodes, expr, truth = true)
202
271
  until expr.empty?
203
- _, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CATCH_RE})/)
272
+ _, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CUST_RE}|#{CATCH_RE})/)
204
273
  break unless _
205
274
 
206
275
  expr = $'
@@ -215,9 +284,15 @@ module Hpricot
215
284
 
216
285
  if m[0] == ":" && m[1] == "not"
217
286
  nodes, = Elements.filter(nodes, m[2], false)
287
+ elsif "#{m[0]}#{m[1]}" =~ /^(:even|:odd)$/
288
+ new_nodes = []
289
+ nodes.each_with_index {|n,i| new_nodes.push(n) if (i % 2 == (m[1] == "even" ? 0 : 1)) }
290
+ nodes = new_nodes
291
+ elsif "#{m[0]}#{m[1]}" =~ /^(:first|:last)$/
292
+ nodes = [nodes.send(m[1])]
218
293
  else
219
- meth = "filter[#{m[0]}#{m[1]}]"
220
- if Traverse.method_defined? meth
294
+ meth = "filter[#{m[0]}#{m[1]}]" unless m[0].empty?
295
+ if meth and Traverse.method_defined? meth
221
296
  args = m[2..-1]
222
297
  else
223
298
  meth = "filter[#{m[0]}]"
@@ -235,6 +310,40 @@ module Hpricot
235
310
  [nodes, expr]
236
311
  end
237
312
 
313
+ # Given two elements, attempt to gather an Elements array of everything between
314
+ # (and including) those two elements.
315
+ def self.expand(ele1, ele2, excl=false)
316
+ ary = []
317
+ offset = excl ? -1 : 0
318
+
319
+ if ele1 and ele2
320
+ # let's quickly take care of siblings
321
+ if ele1.parent == ele2.parent
322
+ ary = ele1.parent.children[ele1.node_position..(ele2.node_position+offset)]
323
+ else
324
+ # find common parent
325
+ p, ele1_p = ele1, [ele1]
326
+ ele1_p.unshift p while p.respond_to?(:parent) and p = p.parent
327
+ p, ele2_p = ele2, [ele2]
328
+ ele2_p.unshift p while p.respond_to?(:parent) and p = p.parent
329
+ common_parent = ele1_p.zip(ele2_p).select { |p1, p2| p1 == p2 }.flatten.last
330
+
331
+ child = nil
332
+ if ele1 == common_parent
333
+ child = ele2
334
+ elsif ele2 == common_parent
335
+ child = ele1
336
+ end
337
+
338
+ if child
339
+ ary = common_parent.children[0..(child.node_position+offset)]
340
+ end
341
+ end
342
+ end
343
+
344
+ return Elements[*ary]
345
+ end
346
+
238
347
  def filter(expr)
239
348
  nodes, = Elements.filter(self, expr)
240
349
  nodes
@@ -311,9 +420,9 @@ module Hpricot
311
420
 
312
421
  filter ':nth-child' do |arg,i|
313
422
  case arg
314
- when 'even'; parent.containers.index(self) % 2 == 0
315
- when 'odd'; parent.containers.index(self) % 2 == 1
316
- else self == parent.containers[arg.to_i]
423
+ when 'even'; (parent.containers.index(self) + 1) % 2 == 0
424
+ when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
425
+ else self == (parent.containers[arg.to_i + 1])
317
426
  end
318
427
  end
319
428
 
@@ -357,9 +466,11 @@ module Hpricot
357
466
  self.comment?
358
467
  end
359
468
 
360
- filter :contains do |arg,|
469
+ filter :contains do |arg, ignore|
361
470
  html.include? arg
362
471
  end
472
+
473
+
363
474
 
364
475
  pred_procs =
365
476
  {'text()' => proc { |ele, *_| ele.inner_text.strip },