hpricot 0.4-mswin32 → 0.5-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,20 +3,93 @@ require 'uri'
3
3
 
4
4
  module Hpricot
5
5
  module Traverse
6
+ # Is this object the enclosing HTML or XML document?
6
7
  def doc?() Doc::Trav === self end
8
+ # Is this object an HTML or XML element?
7
9
  def elem?() Elem::Trav === self end
10
+ # Is this object an HTML text node?
8
11
  def text?() Text::Trav === self end
12
+ # Is this object an XML declaration?
9
13
  def xmldecl?() XMLDecl::Trav === self end
14
+ # Is this object a doctype tag?
10
15
  def doctype?() DocType::Trav === self end
16
+ # Is this object an XML processing instruction?
11
17
  def procins?() ProcIns::Trav === self end
18
+ # Is this object a comment?
12
19
  def comment?() Comment::Trav === self end
20
+ # Is this object a stranded end tag?
13
21
  def bogusetag?() BogusETag::Trav === self end
14
22
 
23
+ # Builds an HTML string from this node and its contents.
24
+ # If you need to write to a stream, try calling <tt>output(io)</tt>
25
+ # as a method on this object.
15
26
  def to_html
16
27
  output("")
17
28
  end
18
29
  alias_method :to_s, :to_html
19
30
 
31
+ # Attempts to preserve the original HTML of the document, only
32
+ # outputing new tags for elements which have changed.
33
+ def to_original_html
34
+ output("", :preserve => true)
35
+ end
36
+
37
+ # Puts together an array of neighboring nodes based on their proximity
38
+ # to this node. So, for example, to get the next node, you could use
39
+ # <tt>nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1)</tt>.
40
+ #
41
+ # This method also accepts ranges and sets of numbers.
42
+ #
43
+ # ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
44
+ # ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
45
+ # ele.nodes_at(0, 5..6) # the current node and two others
46
+ def nodes_at(*pos)
47
+ sib = parent.children
48
+ i, si = 0, sib.index(self)
49
+ Elements[*
50
+ sib.select do |x|
51
+ sel = case i - si when *pos
52
+ true
53
+ end
54
+ i += 1
55
+ sel
56
+ end
57
+ ]
58
+ end
59
+
60
+ # Returns the node neighboring this node to the south: just below it.
61
+ # This method includes text nodes and comments and such.
62
+ def next_node
63
+ sib = parent.children
64
+ sib[sib.index(self) + 1] if parent
65
+ end
66
+
67
+ # Returns to node neighboring this node to the north: just above it.
68
+ # This method includes text nodes and comments and such.
69
+ def previous_node
70
+ sib = parent.children
71
+ x = sib.index(self) - 1
72
+ sib[x] if sib and x >= 0
73
+ end
74
+
75
+ # Adds elements immediately after this element, contained in the +html+ string.
76
+ def after(html)
77
+ parent.insert_after(Hpricot.make(html), self)
78
+ end
79
+
80
+ # Adds elements immediately before this element, contained in the +html+ string.
81
+ def before(html)
82
+ parent.insert_after(Hpricot.make(html), self)
83
+ end
84
+
85
+
86
+ # Replace this element and its contents with the nodes contained
87
+ # in the +html+ string.
88
+ def swap(html)
89
+ parent.altered!
90
+ parent.replace_child(self, Hpricot.make(html))
91
+ end
92
+
20
93
  def get_subnode(*indexes)
21
94
  n = self
22
95
  indexes.each {|index|
@@ -24,37 +97,36 @@ module Hpricot
24
97
  }
25
98
  n
26
99
  end
27
- end
28
100
 
29
- module Container::Trav
30
- def containers
31
- children.grep(Container::Trav)
32
- end
33
- def replace_child(old, new)
34
- children[children.index(old), 1] = [*new]
35
- end
36
- def insert_before(nodes, ele)
37
- case nodes
38
- when Array
39
- nodes.each { |n| insert_before(n, ele) }
40
- else
41
- children[children.index(ele) || 0, 0] = nodes
101
+ # Builds a string from the text contained in this node. All
102
+ # HTML elements are removed.
103
+ def to_plain_text
104
+ if respond_to? :children
105
+ children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n")
42
106
  end
43
107
  end
44
- def insert_after(nodes, ele)
45
- case nodes
46
- when Array
47
- nodes.each { |n| insert_after(n, ele) }
48
- else
49
- idx = children.index(ele)
50
- children[idx ? idx + 1 : children.length, 0] = nodes
108
+
109
+ # Builds a string from the text contained in this node. All
110
+ # HTML elements are removed.
111
+ def inner_text
112
+ if respond_to? :children
113
+ children.map { |x| x.inner_text }.join
51
114
  end
52
115
  end
116
+ alias_method :innerText, :inner_text
117
+
118
+ # Builds an HTML string from the contents of this node.
53
119
  def inner_html
54
- children.map { |x| x.output("") }.join
120
+ if respond_to? :children
121
+ children.map { |x| x.output("") }.join
122
+ end
55
123
  end
56
124
  alias_method :innerHTML, :inner_html
125
+
126
+ # Inserts new contents into the current node, based on
127
+ # the HTML contained in string +inner+.
57
128
  def inner_html=(inner)
129
+ altered!
58
130
  case inner
59
131
  when String, IO
60
132
  self.children = Hpricot.parse(inner).children
@@ -63,63 +135,137 @@ module Hpricot
63
135
  when nil
64
136
  self.children = []
65
137
  end
138
+ reparent self.children
66
139
  end
67
140
  alias_method :innerHTML=, :inner_html=
141
+
142
+ def reparent(nodes)
143
+ altered!
144
+ [*nodes].each { |e| e.parent = self }
145
+ end
146
+ private :reparent
147
+
148
+ def clean_path(path)
149
+ path.gsub(/^\s+|\s+$/, '')
150
+ end
151
+
152
+ # Builds a unique XPath string for this node, from the
153
+ # root of the document containing it.
154
+ def xpath
155
+ if elem? and has_attribute? 'id'
156
+ "//#{self.name}[@id='#{get_attribute('id')}']"
157
+ else
158
+ sim, id = 0, 0, 0
159
+ parent.children.each do |e|
160
+ id = sim if e == self
161
+ sim += 1 if e.pathname == self.pathname
162
+ end
163
+ p = File.join(parent.xpath, self.pathname)
164
+ p += "[#{id+1}]" if sim >= 2
165
+ p
166
+ end
167
+ end
168
+
169
+ # Builds a unique CSS string for this node, from the
170
+ # root of the document containing it.
171
+ def css_path
172
+ if elem? and has_attribute? 'id'
173
+ "##{get_attribute('id')}"
174
+ else
175
+ sim, i, id = 0, 0, 0
176
+ parent.children.each do |e|
177
+ id = sim if e == self
178
+ sim += 1 if e.pathname == self.pathname
179
+ end
180
+ p = parent.css_path
181
+ p = p ? "#{p} > #{self.pathname}" : self.pathname
182
+ p += ":nth(#{id})" if sim >= 2
183
+ p
184
+ end
185
+ end
186
+
187
+ def node_position
188
+ parent.children.index(self)
189
+ end
190
+
191
+ def position
192
+ parent.children_of_type(self.pathname).index(self)
193
+ end
194
+
195
+ # Searches this node for all elements matching
196
+ # the CSS or XPath +expr+. Returns an Elements array
197
+ # containing the matching nodes. If +blk+ is given, it
198
+ # is used to iterate through the matching set.
68
199
  def search(expr, &blk)
69
200
  last = nil
70
201
  nodes = [self]
71
202
  done = []
72
203
  expr = expr.to_s
204
+ hist = []
73
205
  until expr.empty?
74
206
  expr = clean_path(expr)
75
207
  expr.gsub!(%r!^//!, '')
76
208
 
77
209
  case expr
78
210
  when %r!^/?\.\.!
79
- expr = $'
211
+ last = expr = $'
80
212
  nodes.map! { |node| node.parent }
81
213
  when %r!^[>/]!
82
- expr = $'
83
- nodes = Elements[*nodes.map { |node| node.containers }.flatten]
214
+ last = expr = $'
215
+ nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact]
84
216
  when %r!^\+!
85
- expr = $'
217
+ last = expr = $'
86
218
  nodes.map! do |node|
87
- siblings = node.parent.containers
219
+ siblings = node.parent.children
88
220
  siblings[siblings.index(node)+1]
89
221
  end
90
222
  nodes.compact!
91
223
  when %r!^~!
92
- expr = $'
224
+ last = expr = $'
93
225
  nodes.map! do |node|
94
- siblings = node.parent.containers
226
+ siblings = node.parent.children
95
227
  siblings[(siblings.index(node)+1)..-1]
96
228
  end
97
229
  nodes.flatten!
98
230
  when %r!^[|,]!
99
- expr = " #$'"
231
+ last = expr = " #$'"
100
232
  nodes.shift if nodes.first == self
101
233
  done += nodes
102
234
  nodes = [self]
103
235
  else
104
- m = expr.match %r!^([#.]?)([a-z0-9\\*_-]*)!i
105
- expr = $'
236
+ m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a
237
+ after = $'
238
+ mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
239
+ oop = false
240
+ if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
241
+ after = $'
242
+ m[2] += mt
243
+ expr = after
244
+ end
106
245
  if m[1] == '#'
107
246
  oid = get_element_by_id(m[2])
108
247
  nodes = oid ? [oid] : []
248
+ expr = after
109
249
  else
110
- m[2] = "*" if m[2] == "" || m[1] == "."
250
+ m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "."
111
251
  ret = []
112
252
  nodes.each do |node|
113
253
  case m[2]
114
254
  when '*'
255
+ node.traverse_element { |n| ret << n }
115
256
  else
116
- ret += [*node.get_elements_by_tag_name(m[2])]
257
+ if node.respond_to? :get_elements_by_tag_name
258
+ ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)]
259
+ end
117
260
  end
118
261
  end
119
262
  nodes = ret
120
263
  end
264
+ last = nil
121
265
  end
122
266
 
267
+ hist << expr
268
+ break if hist[-1] == hist[-2]
123
269
  nodes, expr = Elements.filter(nodes, expr)
124
270
  end
125
271
  nodes = done + nodes.flatten.uniq
@@ -132,35 +278,13 @@ module Hpricot
132
278
  end
133
279
  alias_method :/, :search
134
280
 
135
- def at(expr, &blk)
136
- search(expr, &blk).first
281
+ # Find the first matching node for the CSS or XPath
282
+ # +expr+ string.
283
+ def at(expr)
284
+ search(expr).first
137
285
  end
138
286
  alias_method :%, :at
139
287
 
140
- def clean_path(path)
141
- path.gsub(/^\s+|\s+$/, '')
142
- end
143
-
144
- # +each_child+ iterates over each child.
145
- def each_child(&block) # :yields: child_node
146
- children.each(&block)
147
- nil
148
- end
149
-
150
- # +each_child_with_index+ iterates over each child.
151
- def each_child_with_index(&block) # :yields: child_node, index
152
- children.each_with_index(&block)
153
- nil
154
- end
155
-
156
- # +find_element+ searches an element which universal name is specified by
157
- # the arguments.
158
- # It returns nil if not found.
159
- def find_element(*names)
160
- traverse_element(*names) {|e| return e }
161
- nil
162
- end
163
-
164
288
  # +traverse_element+ traverses elements in the tree.
165
289
  # It yields elements in depth first order.
166
290
  #
@@ -200,13 +324,130 @@ module Hpricot
200
324
  nil
201
325
  end
202
326
 
327
+ # Find children of a given +tag_name+.
328
+ #
329
+ # ele.children_of_type('p')
330
+ # #=> [...array of paragraphs...]
331
+ #
332
+ def children_of_type(tag_name)
333
+ if respond_to? :children
334
+ children.find_all do |x|
335
+ x.respond_to?(:pathname) && x.pathname == tag_name
336
+ end
337
+ end
338
+ end
339
+
340
+ end
341
+
342
+ module Container::Trav
343
+ # Return all children of this node which can contain other
344
+ # nodes. This is a good way to get all HTML elements which
345
+ # aren't text, comment, doctype or processing instruction nodes.
346
+ def containers
347
+ children.grep(Container::Trav)
348
+ end
349
+
350
+ # Returns the container node neighboring this node to the south: just below it.
351
+ # By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that.
352
+ # See Hpricot::Traverse#next_node if you need to hunt out all kinds of nodes.
353
+ def next_sibling
354
+ sib = parent.containers
355
+ sib[sib.index(self) + 1] if parent
356
+ end
357
+
358
+ # Returns the container node neighboring this node to the north: just above it.
359
+ # By "container" node, I mean: this method does not find text nodes or comments or cdata or any of that.
360
+ # See Hpricot::Traverse#previous_node if you need to hunt out all kinds of nodes.
361
+ def previous_sibling
362
+ sib = parent.containers
363
+ x = sib.index(self) - 1
364
+ sib[x] if sib and x >= 0
365
+ end
366
+
367
+ # Puts together an array of neighboring sibling elements based on their proximity
368
+ # to this element.
369
+ #
370
+ # This method accepts ranges and sets of numbers.
371
+ #
372
+ # ele.siblings_at(-3..-1, 1..3) # gets three elements before and three after
373
+ # ele.siblings_at(1, 5, 7) # gets three elements at offsets below the current element
374
+ # ele.siblings_at(0, 5..6) # the current element and two others
375
+ #
376
+ # Like the other "sibling" methods, this doesn't find text and comment nodes.
377
+ # Use nodes_at to include those nodes.
378
+ def siblings_at(*pos)
379
+ sib = parent.containers
380
+ i, si = 0, sib.index(self)
381
+ Elements[*
382
+ sib.select do |x|
383
+ sel = case i - si when *pos
384
+ true
385
+ end
386
+ i += 1
387
+ sel
388
+ end
389
+ ]
390
+ end
391
+
392
+ # Replace +old+, a child of the current node, with +new+ node.
393
+ def replace_child(old, new)
394
+ reparent new
395
+ children[children.index(old), 1] = [*new]
396
+ end
397
+
398
+ # Insert +nodes+, an array of HTML elements or a single element,
399
+ # before the node +ele+, a child of the current node.
400
+ def insert_before(nodes, ele)
401
+ case nodes
402
+ when Array
403
+ nodes.each { |n| insert_before(n, ele) }
404
+ else
405
+ reparent nodes
406
+ children[children.index(ele) || 0, 0] = nodes
407
+ end
408
+ end
409
+
410
+ # Insert +nodes+, an array of HTML elements or a single element,
411
+ # after the node +ele+, a child of the current node.
412
+ def insert_after(nodes, ele)
413
+ case nodes
414
+ when Array
415
+ nodes.each { |n| insert_after(n, ele) }
416
+ else
417
+ reparent nodes
418
+ idx = children.index(ele)
419
+ children[idx ? idx + 1 : children.length, 0] = nodes
420
+ end
421
+ end
422
+
423
+ # +each_child+ iterates over each child.
424
+ def each_child(&block) # :yields: child_node
425
+ children.each(&block)
426
+ nil
427
+ end
428
+
429
+ # +each_child_with_index+ iterates over each child.
430
+ def each_child_with_index(&block) # :yields: child_node, index
431
+ children.each_with_index(&block)
432
+ nil
433
+ end
434
+
435
+ # +find_element+ searches an element which universal name is specified by
436
+ # the arguments.
437
+ # It returns nil if not found.
438
+ def find_element(*names)
439
+ traverse_element(*names) {|e| return e }
440
+ nil
441
+ end
442
+
443
+ # Returns a list of CSS classes to which this element belongs.
203
444
  def classes
204
445
  get_attribute('class').to_s.strip.split(/\s+/)
205
446
  end
206
447
 
207
448
  def get_element_by_id(id)
208
449
  traverse_all_element do |ele|
209
- if eid = ele.get_attribute('id')
450
+ if ele.elem? and eid = ele.get_attribute('id')
210
451
  return ele if eid.to_s == id
211
452
  end
212
453
  end
@@ -319,6 +560,12 @@ module Hpricot
319
560
  def traverse_all_element(&block)
320
561
  children.each {|c| c.traverse_all_element(&block) }
321
562
  end
563
+ def xpath
564
+ "/"
565
+ end
566
+ def css_path
567
+ nil
568
+ end
322
569
  end
323
570
 
324
571
  module Elem::Trav
@@ -330,6 +577,7 @@ module Hpricot
330
577
 
331
578
  module Leaf::Trav
332
579
  def traverse_all_element
580
+ yield self
333
581
  end
334
582
  end
335
583
 
@@ -497,12 +745,14 @@ module Hpricot
497
745
  end
498
746
  alias_method :[], :get_attribute
499
747
  def set_attribute(name, val)
748
+ altered!
500
749
  self.attributes ||= {}
501
750
  self.attributes[name.to_s] = val
502
751
  end
503
752
  alias_method :[]=, :set_attribute
504
753
  def remove_attribute(name)
505
754
  if has_attribute? name
755
+ altered!
506
756
  self.attributes.delete(name)
507
757
  end
508
758
  end