hpricot 0.5-mswin32 → 0.6-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright 2004 by Jim Weirich (jim@weirichhouse.org).
4
+ # All rights reserved.
5
+
6
+ # Permission is granted for use, copying, modification, distribution,
7
+ # and distribution of modified versions of this work as long as the
8
+ # above copyright notice is included.
9
+ #++
10
+
11
+ module Hpricot
12
+
13
+ # BlankSlate provides an abstract base class with no predefined
14
+ # methods (except for <tt>\_\_send__</tt> and <tt>\_\_id__</tt>).
15
+ # BlankSlate is useful as a base class when writing classes that
16
+ # depend upon <tt>method_missing</tt> (e.g. dynamic proxies).
17
+ class BlankSlate
18
+ class << self
19
+
20
+ # Hide the method named +name+ in the BlankSlate class. Don't
21
+ # hide +instance_eval+ or any method beginning with "__".
22
+ def hide(name)
23
+ undef_method name if
24
+ instance_methods.include?(name.to_s) and
25
+ name !~ /^(__|instance_eval)/
26
+ end
27
+ end
28
+
29
+ instance_methods.each { |m| hide(m) }
30
+ end
31
+ end
32
+
33
+ # Since Ruby is very dynamic, methods added to the ancestors of
34
+ # BlankSlate <em>after BlankSlate is defined</em> will show up in the
35
+ # list of available BlankSlate methods. We handle this by defining a
36
+ # hook in the Object and Kernel classes that will hide any defined
37
+ module Kernel
38
+ class << self
39
+ alias_method :hpricot_slate_method_added, :method_added
40
+
41
+ # Detect method additions to Kernel and remove them in the
42
+ # BlankSlate class.
43
+ def method_added(name)
44
+ hpricot_slate_method_added(name)
45
+ return if self != Kernel
46
+ Hpricot::BlankSlate.hide(name)
47
+ end
48
+ end
49
+ end
50
+
51
+ class Object
52
+ class << self
53
+ alias_method :hpricot_slate_method_added, :method_added
54
+
55
+ # Detect method additions to Object and remove them in the
56
+ # BlankSlate class.
57
+ def method_added(name)
58
+ hpricot_slate_method_added(name)
59
+ return if self != Object
60
+ Hpricot::BlankSlate.hide(name)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,200 @@
1
+ require 'hpricot/tags'
2
+ require 'hpricot/xchar'
3
+ require 'hpricot/blankslate'
4
+
5
+ module Hpricot
6
+ def self.build(ele = Doc.new, assigns = {}, &blk)
7
+ ele.extend Builder
8
+ assigns.each do |k, v|
9
+ ele.instance_variable_set("@#{k}", v)
10
+ end
11
+ ele.instance_eval &blk
12
+ ele
13
+ end
14
+
15
+ module Builder
16
+
17
+ @@default = {
18
+ :indent => 0,
19
+ :output_helpers => true,
20
+ :output_xml_instruction => true,
21
+ :output_meta_tag => true,
22
+ :auto_validation => true,
23
+ :tagset => Hpricot::XHTMLTransitional,
24
+ :root_attributes => {
25
+ :xmlns => 'http://www.w3.org/1999/xhtml', :'xml:lang' => 'en', :lang => 'en'
26
+ }
27
+ }
28
+
29
+ def self.set(option, value)
30
+ @@default[option] = value
31
+ end
32
+
33
+ # Write a +string+ to the HTML stream, making sure to escape it.
34
+ def text!(string)
35
+ @children << Text.new(Hpricot.xs(string))
36
+ end
37
+
38
+ # Write a +string+ to the HTML stream without escaping it.
39
+ def text(string)
40
+ @children << Text.new(string)
41
+ nil
42
+ end
43
+ alias_method :<<, :text
44
+ alias_method :concat, :text
45
+
46
+ # Create a tag named +tag+. Other than the first argument which is the tag name,
47
+ # the arguments are the same as the tags implemented via method_missing.
48
+ def tag!(tag, *args, &block)
49
+ ele_id = nil
50
+ if @auto_validation and @tagset
51
+ if !@tagset.tagset.has_key?(tag)
52
+ raise InvalidXhtmlError, "no element `#{tag}' for #{tagset.doctype}"
53
+ elsif args.last.respond_to?(:to_hash)
54
+ attrs = args.last.to_hash
55
+
56
+ if @tagset.forms.include?(tag) and attrs[:id]
57
+ attrs[:name] ||= attrs[:id]
58
+ end
59
+
60
+ attrs.each do |k, v|
61
+ atname = k.to_s.downcase.intern
62
+ unless k =~ /:/ or @tagset.tagset[tag].include? atname
63
+ raise InvalidXhtmlError, "no attribute `#{k}' on #{tag} elements"
64
+ end
65
+ if atname == :id
66
+ ele_id = v.to_s
67
+ if @elements.has_key? ele_id
68
+ raise InvalidXhtmlError, "id `#{ele_id}' already used (id's must be unique)."
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ # turn arguments into children or attributes
76
+ childs = []
77
+ attrs = args.grep(Hash)
78
+ childs.concat((args - attrs).map do |x|
79
+ if x.respond_to? :to_html
80
+ Hpricot.make(x.to_html)
81
+ elsif x
82
+ Text.new(Hpricot.xs(x))
83
+ end
84
+ end.flatten)
85
+ attrs = attrs.inject({}) do |hsh, ath|
86
+ ath.each do |k, v|
87
+ hsh[k] = Hpricot.xs(v.to_s) if v
88
+ end
89
+ hsh
90
+ end
91
+
92
+ # create the element itself
93
+ f = Elem.new(STag.new(tag, attrs), childs, ETag.new(tag))
94
+
95
+ # build children from the block
96
+ if block
97
+ build(f, &block)
98
+ end
99
+
100
+ @children << f
101
+ f
102
+ end
103
+
104
+ def build(*a, &b)
105
+ Hpricot.build(*a, &b)
106
+ end
107
+
108
+ # Every HTML tag method goes through an html_tag call. So, calling <tt>div</tt> is equivalent
109
+ # to calling <tt>html_tag(:div)</tt>. All HTML tags in Hpricot's list are given generated wrappers
110
+ # for this method.
111
+ #
112
+ # If the @auto_validation setting is on, this method will check for many common mistakes which
113
+ # could lead to invalid XHTML.
114
+ def html_tag(sym, *args, &block)
115
+ if @auto_validation and @tagset.self_closing.include?(sym) and block
116
+ raise InvalidXhtmlError, "the `#{sym}' element is self-closing, please remove the block"
117
+ elsif args.empty? and block.nil?
118
+ CssProxy.new(self, sym)
119
+ else
120
+ tag!(sym, *args, &block)
121
+ end
122
+ end
123
+
124
+ XHTMLTransitional.tags.each do |k|
125
+ class_eval %{
126
+ def #{k}(*args, &block)
127
+ html_tag(#{k.inspect}, *args, &block)
128
+ end
129
+ }
130
+ end
131
+
132
+ def doctype(target, pub, sys)
133
+ @children << DocType.new(target, pub, sys)
134
+ end
135
+
136
+ remove_method :head
137
+
138
+ # Builds a head tag. Adds a <tt>meta</tt> tag inside with Content-Type
139
+ # set to <tt>text/html; charset=utf-8</tt>.
140
+ def head(*args, &block)
141
+ tag!(:head, *args) do
142
+ tag!(:meta, "http-equiv" => "Content-Type", "content" => "text/html; charset=utf-8") if @output_meta_tag
143
+ instance_eval(&block)
144
+ end
145
+ end
146
+
147
+ # Builds an html tag. An XML 1.0 instruction and an XHTML 1.0 Transitional doctype
148
+ # are prepended. Also assumes <tt>:xmlns => "http://www.w3.org/1999/xhtml",
149
+ # :lang => "en"</tt>.
150
+ def xhtml_transitional(attrs = {}, &block)
151
+ # self.tagset = Hpricot::XHTMLTransitional
152
+ xhtml_html(attrs, &block)
153
+ end
154
+
155
+ # Builds an html tag with XHTML 1.0 Strict doctype instead.
156
+ def xhtml_strict(attrs = {}, &block)
157
+ # self.tagset = Hpricot::XHTMLStrict
158
+ xhtml_html(attrs, &block)
159
+ end
160
+
161
+ private
162
+
163
+ def xhtml_html(attrs = {}, &block)
164
+ instruct! if @output_xml_instruction
165
+ doctype(:html, *@@default[:tagset].doctype)
166
+ tag!(:html, @@default[:root_attributes].merge(attrs), &block)
167
+ end
168
+
169
+ end
170
+
171
+ # Class used by Markaby::Builder to store element options. Methods called
172
+ # against the CssProxy object are added as element classes or IDs.
173
+ #
174
+ # See the README for examples.
175
+ class CssProxy < BlankSlate
176
+
177
+ # Creates a CssProxy object.
178
+ def initialize(builder, sym)
179
+ @builder, @sym, @attrs = builder, sym, {}
180
+ end
181
+
182
+ # Adds attributes to an element. Bang methods set the :id attribute.
183
+ # Other methods add to the :class attribute.
184
+ def method_missing(id_or_class, *args, &block)
185
+ if (idc = id_or_class.to_s) =~ /!$/
186
+ @attrs[:id] = $`
187
+ else
188
+ @attrs[:class] = @attrs[:class].nil? ? idc : "#{@attrs[:class]} #{idc}".strip
189
+ end
190
+
191
+ if block or args.any?
192
+ args.push(@attrs)
193
+ return @builder.tag!(@sym, *args, &block)
194
+ end
195
+
196
+ return self
197
+ end
198
+
199
+ end
200
+ end
@@ -50,6 +50,7 @@ module Hpricot
50
50
  # Most of the useful element methods are in the mixins Hpricot::Traverse
51
51
  # and Hpricot::Container::Trav.
52
52
  class Elements < Array
53
+
53
54
  # Searches this list for any elements (or children of these elements) matching
54
55
  # the CSS or XPath expression +expr+. Root is assumed to be the element scanned.
55
56
  #
@@ -128,26 +129,26 @@ module Hpricot
128
129
 
129
130
  # Add to the end of the contents inside each element in this list.
130
131
  # Pass in an HTML +str+, which is turned into Hpricot elements.
131
- def append(str)
132
- each { |x| x.inner_html += str }
132
+ def append(str = nil, &blk)
133
+ each { |x| x.html(x.children + Hpricot.make(str, &blk)) }
133
134
  end
134
135
 
135
136
  # Add to the start of the contents inside each element in this list.
136
137
  # Pass in an HTML +str+, which is turned into Hpricot elements.
137
- def prepend(str)
138
- each { |x| x.inner_html = str + x.inner_html }
138
+ def prepend(str = nil, &blk)
139
+ each { |x| x.html(Hpricot.make(str, &blk) + x.children) }
139
140
  end
140
141
 
141
142
  # Add some HTML just previous to each element in this list.
142
143
  # Pass in an HTML +str+, which is turned into Hpricot elements.
143
- def before(str)
144
- each { |x| x.parent.insert_before Hpricot.make(str), x }
144
+ def before(str = nil, &blk)
145
+ each { |x| x.parent.insert_before Hpricot.make(str, &blk), x }
145
146
  end
146
147
 
147
148
  # Just after each element in this list, add some HTML.
148
149
  # Pass in an HTML +str+, which is turned into Hpricot elements.
149
- def after(str)
150
- each { |x| x.parent.insert_after Hpricot.make(str), x }
150
+ def after(str = nil, &blk)
151
+ each { |x| x.parent.insert_after Hpricot.make(str, &blk), x }
151
152
  end
152
153
 
153
154
  # Wraps each element in the list inside the element created by HTML +str+.
@@ -158,49 +159,117 @@ module Hpricot
158
159
  # wrap(%{<div class="link"><div class="link_inner"></div></div>})
159
160
  #
160
161
  # This code wraps every link on the page inside a +div.link+ and a +div.link_inner+ nest.
161
- def wrap(str)
162
+ def wrap(str = nil, &blk)
162
163
  each do |x|
163
- wrap = Hpricot.make(str)
164
+ wrap = Hpricot.make(str, &blk)
164
165
  nest = wrap.detect { |w| w.respond_to? :children }
165
166
  unless nest
166
167
  raise Exception, "No wrapping element found."
167
168
  end
168
169
  x.parent.replace_child(x, wrap)
169
170
  nest = nest.children.first until nest.empty?
170
- nest.children << x
171
+ nest.html(nest.children + [x])
171
172
  end
172
173
  end
173
174
 
174
- # Sets an attribute for all elements in this list. You may use
175
- # a simple pair (<em>attribute name</em>, <em>attribute value</em>):
175
+ # Gets and sets attributes on all matched elements.
176
176
  #
177
- # doc.search('p').set(:class, 'outline')
177
+ # Pass in a +key+ on its own and this method will return the string value
178
+ # assigned to that attribute for the first elements. Or +nil+ if the
179
+ # attribute isn't found.
178
180
  #
179
- # Or, use a hash of pairs:
181
+ # doc.search("a").attr("href")
182
+ # #=> "http://hacketyhack.net/"
180
183
  #
181
- # doc.search('div#sidebar').set(:class => 'outline', :id => 'topbar')
184
+ # Or, pass in a +key+ and +value+. This will set an attribute for all
185
+ # matched elements.
182
186
  #
183
- def set(k, v = nil)
184
- case k
185
- when Hash
186
- each do |node|
187
- k.each { |a,b| node.set_attribute(a, b) }
187
+ # doc.search("p").attr("class", "basic")
188
+ #
189
+ # You may also use a Hash to set a series of attributes:
190
+ #
191
+ # (doc/"a").attr(:class => "basic", :href => "http://hackety.org/")
192
+ #
193
+ # Lastly, a block can be used to rewrite an attribute based on the element
194
+ # it belongs to. The block will pass in an element. Return from the block
195
+ # the new value of the attribute.
196
+ #
197
+ # records.attr("href") { |e| e['href'] + "#top" }
198
+ #
199
+ # This example adds a <tt>#top</tt> anchor to each link.
200
+ #
201
+ def attr key, value = nil, &blk
202
+ if value or blk
203
+ each do |el|
204
+ el.set_attribute(key, value || blk[el])
188
205
  end
206
+ return self
207
+ end
208
+ if key.is_a? Hash
209
+ key.each { |k,v| self.attr(k,v) }
210
+ return self
189
211
  else
190
- each do |node|
191
- node.set_attribute(k, v)
212
+ return self[0].get_attribute(key)
213
+ end
214
+ end
215
+ alias_method :set, :attr
216
+
217
+ # Adds the class to all matched elements.
218
+ #
219
+ # (doc/"p").add_class("bacon")
220
+ #
221
+ # Now all paragraphs will have class="bacon".
222
+ def add_class class_name
223
+ each do |el|
224
+ next unless el.respond_to? :get_attribute
225
+ classes = el.get_attribute('class').to_s.split(" ")
226
+ el.set_attribute('class', classes.push(class_name).uniq.join(" "))
227
+ end
228
+ self
229
+ end
230
+
231
+ # Remove an attribute from each of the matched elements.
232
+ #
233
+ # (doc/"input").remove_attr("disabled")
234
+ #
235
+ def remove_attr name
236
+ each do |el|
237
+ next unless el.respond_to? :remove_attribute
238
+ el.remove_attribute(name)
239
+ end
240
+ self
241
+ end
242
+
243
+ # Removes a class from all matched elements.
244
+ #
245
+ # (doc/"span").remove_class("lightgrey")
246
+ #
247
+ # Or, to remove all classes:
248
+ #
249
+ # (doc/"span").remove_class
250
+ #
251
+ def remove_class name = nil
252
+ each do |el|
253
+ next unless el.respond_to? :get_attribute
254
+ if name
255
+ classes = el.get_attribute('class').to_s.split(" ")
256
+ el.set_attribute('class', (classes - [name]).uniq.join(" "))
257
+ else
258
+ el.remove_attribute("class")
192
259
  end
193
260
  end
261
+ self
194
262
  end
195
263
 
196
- ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i
197
- BRACK_RE = %r!(\[) *([^\]]*) *\]!i
264
+ ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^\]'"]*)'?"? *\]!i
265
+ BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
198
266
  FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
267
+ CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
199
268
  CATCH_RE = %r!([:\.#]*)([a-zA-Z0-9\*_-]+)!
200
269
 
201
270
  def self.filter(nodes, expr, truth = true)
202
271
  until expr.empty?
203
- _, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CATCH_RE})/)
272
+ _, *m = *expr.match(/^(?:#{ATTR_RE}|#{BRACK_RE}|#{FUNC_RE}|#{CUST_RE}|#{CATCH_RE})/)
204
273
  break unless _
205
274
 
206
275
  expr = $'
@@ -215,9 +284,15 @@ module Hpricot
215
284
 
216
285
  if m[0] == ":" && m[1] == "not"
217
286
  nodes, = Elements.filter(nodes, m[2], false)
287
+ elsif "#{m[0]}#{m[1]}" =~ /^(:even|:odd)$/
288
+ new_nodes = []
289
+ nodes.each_with_index {|n,i| new_nodes.push(n) if (i % 2 == (m[1] == "even" ? 0 : 1)) }
290
+ nodes = new_nodes
291
+ elsif "#{m[0]}#{m[1]}" =~ /^(:first|:last)$/
292
+ nodes = [nodes.send(m[1])]
218
293
  else
219
- meth = "filter[#{m[0]}#{m[1]}]"
220
- if Traverse.method_defined? meth
294
+ meth = "filter[#{m[0]}#{m[1]}]" unless m[0].empty?
295
+ if meth and Traverse.method_defined? meth
221
296
  args = m[2..-1]
222
297
  else
223
298
  meth = "filter[#{m[0]}]"
@@ -235,6 +310,40 @@ module Hpricot
235
310
  [nodes, expr]
236
311
  end
237
312
 
313
+ # Given two elements, attempt to gather an Elements array of everything between
314
+ # (and including) those two elements.
315
+ def self.expand(ele1, ele2, excl=false)
316
+ ary = []
317
+ offset = excl ? -1 : 0
318
+
319
+ if ele1 and ele2
320
+ # let's quickly take care of siblings
321
+ if ele1.parent == ele2.parent
322
+ ary = ele1.parent.children[ele1.node_position..(ele2.node_position+offset)]
323
+ else
324
+ # find common parent
325
+ p, ele1_p = ele1, [ele1]
326
+ ele1_p.unshift p while p.respond_to?(:parent) and p = p.parent
327
+ p, ele2_p = ele2, [ele2]
328
+ ele2_p.unshift p while p.respond_to?(:parent) and p = p.parent
329
+ common_parent = ele1_p.zip(ele2_p).select { |p1, p2| p1 == p2 }.flatten.last
330
+
331
+ child = nil
332
+ if ele1 == common_parent
333
+ child = ele2
334
+ elsif ele2 == common_parent
335
+ child = ele1
336
+ end
337
+
338
+ if child
339
+ ary = common_parent.children[0..(child.node_position+offset)]
340
+ end
341
+ end
342
+ end
343
+
344
+ return Elements[*ary]
345
+ end
346
+
238
347
  def filter(expr)
239
348
  nodes, = Elements.filter(self, expr)
240
349
  nodes
@@ -311,9 +420,9 @@ module Hpricot
311
420
 
312
421
  filter ':nth-child' do |arg,i|
313
422
  case arg
314
- when 'even'; parent.containers.index(self) % 2 == 0
315
- when 'odd'; parent.containers.index(self) % 2 == 1
316
- else self == parent.containers[arg.to_i]
423
+ when 'even'; (parent.containers.index(self) + 1) % 2 == 0
424
+ when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
425
+ else self == (parent.containers[arg.to_i + 1])
317
426
  end
318
427
  end
319
428
 
@@ -357,9 +466,11 @@ module Hpricot
357
466
  self.comment?
358
467
  end
359
468
 
360
- filter :contains do |arg,|
469
+ filter :contains do |arg, ignore|
361
470
  html.include? arg
362
471
  end
472
+
473
+
363
474
 
364
475
  pred_procs =
365
476
  {'text()' => proc { |ele, *_| ele.inner_text.strip },