hpricot 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -7,7 +7,7 @@ module Hpricot
7
7
  # XML unescape
8
8
  def self.uxs(str)
9
9
  str.to_s.
10
- gsub(/\&(\w+);/) { [NamedCharacters[$1] || ??].pack("U*") }.
10
+ gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
11
11
  gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
12
12
  end
13
13
 
@@ -9,7 +9,7 @@ module Hpricot
9
9
  # Hpricot::Elements[ele1, ele2, ele3]
10
10
  #
11
11
  # Assuming that ele1, ele2 and ele3 contain element objects (Hpricot::Elem,
12
- # Hpricot::Doc, etc.)
12
+ # Hpricot::Doc, etc.)
13
13
  #
14
14
  # == Continuing Searches
15
15
  #
@@ -29,7 +29,7 @@ module Hpricot
29
29
  # doc = Hpricot("That's my <b>spoon</b>, Tyler.")
30
30
  # doc.at("b").swap("<i>fork</i>")
31
31
  # doc.to_html
32
- # #=> "That's my <i>fork</i>, Tyler."
32
+ # #=> "That's my <i>fork</i>, Tyler."
33
33
  #
34
34
  # == Getting More Detailed
35
35
  #
@@ -50,7 +50,7 @@ module Hpricot
50
50
  # Most of the useful element methods are in the mixins Hpricot::Traverse
51
51
  # and Hpricot::Container::Trav.
52
52
  class Elements < Array
53
-
53
+
54
54
  # Searches this list for any elements (or children of these elements) matching
55
55
  # the CSS or XPath expression +expr+. Root is assumed to be the element scanned.
56
56
  #
@@ -65,7 +65,11 @@ module Hpricot
65
65
  #
66
66
  # See Hpricot::Container::Trav.at for more.
67
67
  def at(expr, &blk)
68
- search(expr, &blk).first
68
+ if expr.kind_of? Fixnum
69
+ super
70
+ else
71
+ search(expr, &blk)[0]
72
+ end
69
73
  end
70
74
  alias_method :%, :at
71
75
 
@@ -138,7 +142,7 @@ module Hpricot
138
142
  def prepend(str = nil, &blk)
139
143
  each { |x| x.html(x.make(str, &blk) + x.children) }
140
144
  end
141
-
145
+
142
146
  # Add some HTML just previous to each element in this list.
143
147
  # Pass in an HTML +str+, which is turned into Hpricot elements.
144
148
  def before(str = nil, &blk)
@@ -151,7 +155,7 @@ module Hpricot
151
155
  each { |x| x.parent.insert_after x.make(str, &blk), x }
152
156
  end
153
157
 
154
- # Wraps each element in the list inside the element created by HTML +str+.
158
+ # Wraps each element in the list inside the element created by HTML +str+.
155
159
  # If more than one element is found in the string, Hpricot locates the
156
160
  # deepest spot inside the first element.
157
161
  #
@@ -175,7 +179,7 @@ module Hpricot
175
179
  # Gets and sets attributes on all matched elements.
176
180
  #
177
181
  # Pass in a +key+ on its own and this method will return the string value
178
- # assigned to that attribute for the first elements. Or +nil+ if the
182
+ # assigned to that attribute for the first elements. Or +nil+ if the
179
183
  # attribute isn't found.
180
184
  #
181
185
  # doc.search("a").attr("href")
@@ -185,11 +189,11 @@ module Hpricot
185
189
  # matched elements.
186
190
  #
187
191
  # doc.search("p").attr("class", "basic")
188
- #
192
+ #
189
193
  # You may also use a Hash to set a series of attributes:
190
194
  #
191
195
  # (doc/"a").attr(:class => "basic", :href => "http://hackety.org/")
192
- #
196
+ #
193
197
  # Lastly, a block can be used to rewrite an attribute based on the element
194
198
  # it belongs to. The block will pass in an element. Return from the block
195
199
  # the new value of the attribute.
@@ -203,8 +207,8 @@ module Hpricot
203
207
  each do |el|
204
208
  el.set_attribute(key, value || blk[el])
205
209
  end
206
- return self
207
- end
210
+ return self
211
+ end
208
212
  if key.is_a? Hash
209
213
  key.each { |k,v| self.attr(k,v) }
210
214
  return self
@@ -213,7 +217,7 @@ module Hpricot
213
217
  end
214
218
  end
215
219
  alias_method :set, :attr
216
-
220
+
217
221
  # Adds the class to all matched elements.
218
222
  #
219
223
  # (doc/"p").add_class("bacon")
@@ -237,7 +241,7 @@ module Hpricot
237
241
  next unless el.respond_to? :remove_attribute
238
242
  el.remove_attribute(name)
239
243
  end
240
- self
244
+ self
241
245
  end
242
246
 
243
247
  # Removes a class from all matched elements.
@@ -247,7 +251,7 @@ module Hpricot
247
251
  # Or, to remove all classes:
248
252
  #
249
253
  # (doc/"span").remove_class
250
- #
254
+ #
251
255
  def remove_class name = nil
252
256
  each do |el|
253
257
  next unless el.respond_to? :get_attribute
@@ -258,10 +262,10 @@ module Hpricot
258
262
  el.remove_attribute("class")
259
263
  end
260
264
  end
261
- self
265
+ self
262
266
  end
263
267
 
264
- ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i
268
+ ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i # " (for emacs)
265
269
  BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
266
270
  FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
267
271
  CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
@@ -301,7 +305,7 @@ module Hpricot
301
305
  end
302
306
  end
303
307
  args << -1
304
- nodes = Elements[*nodes.find_all do |x|
308
+ nodes = Elements[*nodes.find_all do |x|
305
309
  args[-1] += 1
306
310
  x.send(meth, *args) ? truth : !truth
307
311
  end]
@@ -419,7 +423,7 @@ module Hpricot
419
423
  end
420
424
 
421
425
  filter ':nth-child' do |arg,i|
422
- case arg
426
+ case arg
423
427
  when 'even'; (parent.containers.index(self) + 1) % 2 == 0
424
428
  when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
425
429
  else self == (parent.containers[arg.to_i - 1])
@@ -429,7 +433,7 @@ module Hpricot
429
433
  filter ":last-child" do |i|
430
434
  self == parent.containers.last
431
435
  end
432
-
436
+
433
437
  filter ":nth-last-child" do |arg,i|
434
438
  self == parent.containers[-1-arg.to_i]
435
439
  end
@@ -451,13 +455,13 @@ module Hpricot
451
455
  end
452
456
 
453
457
  filter :empty do |*a|
454
- containers.length == 0
458
+ elem? && inner_html.length == 0
455
459
  end
456
460
 
457
461
  filter :root do |*a|
458
462
  self.is_a? Hpricot::Doc
459
463
  end
460
-
464
+
461
465
  filter 'text' do |*a|
462
466
  self.text?
463
467
  end
@@ -469,13 +473,13 @@ module Hpricot
469
473
  filter :contains do |arg, ignore|
470
474
  html.include? arg
471
475
  end
472
-
473
-
476
+
477
+
474
478
 
475
479
  pred_procs =
476
480
  {'text()' => proc { |ele, *_| ele.inner_text.strip },
477
481
  '@' => proc { |ele, attr, *_| ele.get_attribute(attr).to_s if ele.elem? }}
478
-
482
+
479
483
  oper_procs =
480
484
  {'=' => proc { |a,b| a == b },
481
485
  '!=' => proc { |a,b| a != b },
@@ -484,7 +484,7 @@ module Hpricot
484
484
  end
485
485
 
486
486
  ElementInclusions =
487
- {"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]}
487
+ {"head"=>["link", "meta", "object", "script", "style" , "noscript"], "body"=>["del", "ins"]}
488
488
  ElementInclusions.each do |k, v|
489
489
  v.each do |name|
490
490
  ElementContent[k][name.hash] = :allow
@@ -43,7 +43,7 @@ module Hpricot
43
43
  def initialize e
44
44
  @element = e
45
45
  end
46
- def [] k
46
+ def [] k
47
47
  Hpricot.uxs((@element.raw_attributes || {})[k])
48
48
  end
49
49
  def []= k, v
@@ -195,7 +195,7 @@ module Hpricot
195
195
  def pathname; "procins()" end
196
196
  def raw_string; output("") end
197
197
  def output(out, opts = {})
198
- out <<
198
+ out <<
199
199
  if_output(opts) do
200
200
  "<?#{target}" +
201
201
  (content ? " #{content}" : "") +
@@ -6,7 +6,7 @@ module Hpricot
6
6
  # Common sets of attributes.
7
7
  AttrCore = [:id, :class, :style, :title]
8
8
  AttrI18n = [:lang, 'xml:lang'.intern, :dir]
9
- AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove,
9
+ AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove,
10
10
  :onmouseout, :onkeypress, :onkeydown, :onkeyup]
11
11
  AttrFocus = [:accesskey, :tabindex, :onfocus, :onblur]
12
12
  AttrHAlign = [:align, :char, :charoff]
@@ -113,11 +113,11 @@ module Hpricot
113
113
  @tagset = XHTMLStrict.tagset.merge \
114
114
  :strike => Attrs,
115
115
  :center => Attrs,
116
- :dir => Attrs + [:compact],
116
+ :dir => Attrs + [:compact],
117
117
  :noframes => Attrs,
118
- :basefont => [:id, :size, :color, :face],
118
+ :basefont => [:id, :size, :color, :face],
119
119
  :u => Attrs,
120
- :menu => Attrs + [:compact],
120
+ :menu => Attrs + [:compact],
121
121
  :iframe => AttrCore + [:longdesc, :name, :src, :frameborder, :marginwidth, :marginheight, :scrolling, :align, :height, :width],
122
122
  :font => AttrCore + AttrI18n + [:size, :color, :face],
123
123
  :s => Attrs,
@@ -106,16 +106,16 @@ module Hpricot
106
106
  # Find all preceding nodes.
107
107
  def preceding
108
108
  sibs = parent.children
109
- si = sibs.index(self)
110
- return Elements[*sibs[0...si]]
111
- end
112
-
109
+ si = sibs.index(self)
110
+ return Elements[*sibs[0...si]]
111
+ end
112
+
113
113
  # Find all nodes which follow the current one.
114
114
  def following
115
- sibs = parent.children
116
- si = sibs.index(self) + 1
117
- return Elements[*sibs[si...sibs.length]]
118
- end
115
+ sibs = parent.children
116
+ si = sibs.index(self) + 1
117
+ return Elements[*sibs[si...sibs.length]]
118
+ end
119
119
 
120
120
  # Adds elements immediately after this element, contained in the +html+ string.
121
121
  def after(html = nil, &blk)
@@ -296,7 +296,7 @@ module Hpricot
296
296
  mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
297
297
  oop = false
298
298
  if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
299
- after = $'
299
+ after = $'
300
300
  m[2] += mt
301
301
  expr = after
302
302
  end
@@ -348,10 +348,10 @@ module Hpricot
348
348
  #
349
349
  # If _names_ are empty, it yields all elements.
350
350
  # If non-empty _names_ are given, it should be list of universal names.
351
- #
351
+ #
352
352
  # A nested element is yielded in depth first order as follows.
353
353
  #
354
- # t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
354
+ # t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
355
355
  # t.traverse_element("a", "c") {|e| p e}
356
356
  # # =>
357
357
  # {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
@@ -363,7 +363,7 @@ module Hpricot
363
363
  # t = Hpricot(<<'End')
364
364
  # <html>
365
365
  # <meta name="robots" content="index,nofollow">
366
- # <meta name="author" content="Who am I?">
366
+ # <meta name="author" content="Who am I?">
367
367
  # </html>
368
368
  # End
369
369
  # t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
@@ -424,19 +424,19 @@ module Hpricot
424
424
 
425
425
  # Find all preceding sibling elements. Like the other "sibling" methods, this weeds
426
426
  # out text and comment nodes.
427
- def preceding_siblings()
428
- sibs = parent.containers
429
- si = sibs.index(self)
430
- return Elements[*sibs[0...si]]
431
- end
432
-
427
+ def preceding_siblings()
428
+ sibs = parent.containers
429
+ si = sibs.index(self)
430
+ return Elements[*sibs[0...si]]
431
+ end
432
+
433
433
  # Find sibling elements which follow the current one. Like the other "sibling" methods, this weeds
434
434
  # out text and comment nodes.
435
- def following_siblings()
436
- sibs = parent.containers
437
- si = sibs.index(self) + 1
438
- return Elements[*sibs[si...sibs.length]]
439
- end
435
+ def following_siblings()
436
+ sibs = parent.containers
437
+ si = sibs.index(self) + 1
438
+ return Elements[*sibs[si...sibs.length]]
439
+ end
440
440
 
441
441
  # Puts together an array of neighboring sibling elements based on their proximity
442
442
  # to this element.
@@ -507,7 +507,7 @@ module Hpricot
507
507
  end
508
508
 
509
509
  # +find_element+ searches an element which universal name is specified by
510
- # the arguments.
510
+ # the arguments.
511
511
  # It returns nil if not found.
512
512
  def find_element(*names)
513
513
  traverse_element(*names) {|e| return e }
@@ -777,7 +777,7 @@ module Hpricot
777
777
  return author if !author.empty?
778
778
  rescue IndexError
779
779
  end
780
- }
780
+ }
781
781
 
782
782
  if channel = find_element('{http://purl.org/rss/1.0/}channel')
783
783
  channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
@@ -10,7 +10,7 @@ module Hpricot
10
10
 
11
11
  ####################################################################
12
12
  # XML Character converter, from Sam Ruby:
13
- # (see http://intertwingly.net/stories/2005/09/28/xchar.rb).
13
+ # (see http://intertwingly.net/stories/2005/09/28/xchar.rb).
14
14
  #
15
15
  module XChar # :nodoc:
16
16
 
@@ -59,7 +59,7 @@ module Hpricot
59
59
  # See http://www.w3.org/TR/REC-xml/#charsets for details.
60
60
  VALID = [
61
61
  0x9, 0xA, 0xD,
62
- (0x20..0xD7FF),
62
+ (0x20..0xD7FF),
63
63
  (0xE000..0xFFFD),
64
64
  (0x10000..0x10FFFF)
65
65
  ]
@@ -86,7 +86,7 @@ module Hpricot
86
86
  # XML unescape
87
87
  def uxs(str)
88
88
  str.to_s.
89
- gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || ??).chr }.
89
+ gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || 63).chr }. # 63 = ?? (query char)
90
90
  gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
91
91
  end
92
92
  end
@@ -9,7 +9,7 @@
9
9
  </head>
10
10
  <body id='body1'>
11
11
  <p>Sample XHTML for <a id="link1" href="http://code.whytheluckystiff.net/mouseHole/">MouseHole 2</a>.</p>
12
- <p class='ohmy'>Please filter <a id="link2" href="http://hobix.com/">me</a>!</p>
12
+ <p class='ohmy'>Please filter <span></span><a id="link2" href="http://hobix.com/">me</a>!</p>
13
13
  <p>The third paragraph</p>
14
14
  <p class="last final"><b>THE FINAL PARAGRAPH</b></p>
15
15
  </body>
@@ -354,6 +354,16 @@ class TestParser < Test::Unit::TestCase
354
354
  assert_equal "HAI", doc.at("body").inner_text
355
355
  end
356
356
 
357
+ # http://github.com/hpricot/hpricot/issues#issue/28
358
+ def test_invalid_inner_text
359
+ assert_equal "A", Hpricot('A&B;').inner_text[0...1]
360
+ end
361
+
362
+ # http://github.com/hpricot/hpricot/issues#issue/25
363
+ def test_encoding_compatibility_error
364
+ Hpricot("<p>\xC3\x9Cber</p><p>M&sup3;</p>").inner_text
365
+ end
366
+
357
367
  # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
358
368
  def test_self_closed_form
359
369
  doc = Hpricot(<<-edoc)
@@ -370,8 +380,8 @@ class TestParser < Test::Unit::TestCase
370
380
  @basic = Hpricot.parse(TestFiles::BASIC)
371
381
  assert_equal 0, (@basic/"title:parent").size
372
382
  assert_equal 3, (@basic/"p:parent").size
373
- assert_equal 1, (@basic/"title:empty").size
374
- assert_equal 1, (@basic/"p:empty").size
383
+ assert_equal 3, (@basic/"link:empty").size
384
+ assert_equal 1, (@basic/"span:empty").size
375
385
  end
376
386
 
377
387
  def test_keep_cdata
@@ -425,4 +435,23 @@ class TestParser < Test::Unit::TestCase
425
435
  assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
426
436
  assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
427
437
  end
438
+
439
+ def test_parsing_html_with_noscript
440
+ doc = Hpricot(<<-edoc)
441
+ <html>
442
+ <head>
443
+ <noscript>
444
+ <meta http-equiv="refresh" content="0; url=http://www.yoursite.com/noscripts.html"/>
445
+ </noscript>
446
+ <meta name="verification" content="7ff5e90iormq5niy6x98j75" />
447
+ </head>
448
+ <body>
449
+ <h1>Testing</h1>
450
+ </body>
451
+ </html>
452
+
453
+ edoc
454
+ assert_equal "7ff5e90iormq5niy6x98j75", doc.at("/html/head/meta[@name='verification']")['content']
455
+ end
456
+
428
457
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpricot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ hash: 57
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 8
9
+ - 3
10
+ version: 0.8.3
5
11
  platform: ruby
6
12
  authors:
7
13
  - why the lucky stiff
@@ -9,7 +15,7 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2009-11-05 00:00:00 -06:00
18
+ date: 2010-11-03 00:00:00 -05:00
13
19
  default_executable:
14
20
  dependencies: []
15
21
 
@@ -21,13 +27,13 @@ extensions:
21
27
  - ext/fast_xs/extconf.rb
22
28
  - ext/hpricot_scan/extconf.rb
23
29
  extra_rdoc_files:
24
- - README
30
+ - README.md
25
31
  - CHANGELOG
26
32
  - COPYING
27
33
  files:
28
34
  - CHANGELOG
29
35
  - COPYING
30
- - README
36
+ - README.md
31
37
  - Rakefile
32
38
  - test/files/basic.xhtml
33
39
  - test/files/boingboing.html
@@ -47,6 +53,7 @@ files:
47
53
  - test/test_paths.rb
48
54
  - test/test_preserved.rb
49
55
  - test/test_xml.rb
56
+ - extras/hpricot.png
50
57
  - lib/hpricot/blankslate.rb
51
58
  - lib/hpricot/builder.rb
52
59
  - lib/hpricot/elements.rb
@@ -59,7 +66,6 @@ files:
59
66
  - lib/hpricot/traverse.rb
60
67
  - lib/hpricot/xchar.rb
61
68
  - lib/hpricot.rb
62
- - extras/mingw-rbconfig.rb
63
69
  - ext/hpricot_scan/hpricot_scan.h
64
70
  - ext/fast_xs/FastXsService.java
65
71
  - ext/hpricot_scan/HpricotCss.java
@@ -84,26 +90,32 @@ rdoc_options:
84
90
  - --title
85
91
  - The Hpricot Reference
86
92
  - --main
87
- - README
93
+ - README.md
88
94
  - --inline-source
89
95
  require_paths:
90
96
  - lib
91
97
  required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
92
99
  requirements:
93
100
  - - ">="
94
101
  - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
95
105
  version: "0"
96
- version:
97
106
  required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
98
108
  requirements:
99
109
  - - ">="
100
110
  - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
101
114
  version: "0"
102
- version:
103
115
  requirements: []
104
116
 
105
117
  rubyforge_project: hobix
106
- rubygems_version: 1.3.5
118
+ rubygems_version: 1.3.7
107
119
  signing_key:
108
120
  specification_version: 3
109
121
  summary: a swift, liberal HTML parser with a fantastic library