hpricot 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -7,7 +7,7 @@ module Hpricot
7
7
  # XML unescape
8
8
  def self.uxs(str)
9
9
  str.to_s.
10
- gsub(/\&(\w+);/) { [NamedCharacters[$1] || ??].pack("U*") }.
10
+ gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
11
11
  gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
12
12
  end
13
13
 
@@ -9,7 +9,7 @@ module Hpricot
9
9
  # Hpricot::Elements[ele1, ele2, ele3]
10
10
  #
11
11
  # Assuming that ele1, ele2 and ele3 contain element objects (Hpricot::Elem,
12
- # Hpricot::Doc, etc.)
12
+ # Hpricot::Doc, etc.)
13
13
  #
14
14
  # == Continuing Searches
15
15
  #
@@ -29,7 +29,7 @@ module Hpricot
29
29
  # doc = Hpricot("That's my <b>spoon</b>, Tyler.")
30
30
  # doc.at("b").swap("<i>fork</i>")
31
31
  # doc.to_html
32
- # #=> "That's my <i>fork</i>, Tyler."
32
+ # #=> "That's my <i>fork</i>, Tyler."
33
33
  #
34
34
  # == Getting More Detailed
35
35
  #
@@ -50,7 +50,7 @@ module Hpricot
50
50
  # Most of the useful element methods are in the mixins Hpricot::Traverse
51
51
  # and Hpricot::Container::Trav.
52
52
  class Elements < Array
53
-
53
+
54
54
  # Searches this list for any elements (or children of these elements) matching
55
55
  # the CSS or XPath expression +expr+. Root is assumed to be the element scanned.
56
56
  #
@@ -65,7 +65,11 @@ module Hpricot
65
65
  #
66
66
  # See Hpricot::Container::Trav.at for more.
67
67
  def at(expr, &blk)
68
- search(expr, &blk).first
68
+ if expr.kind_of? Fixnum
69
+ super
70
+ else
71
+ search(expr, &blk)[0]
72
+ end
69
73
  end
70
74
  alias_method :%, :at
71
75
 
@@ -138,7 +142,7 @@ module Hpricot
138
142
  def prepend(str = nil, &blk)
139
143
  each { |x| x.html(x.make(str, &blk) + x.children) }
140
144
  end
141
-
145
+
142
146
  # Add some HTML just previous to each element in this list.
143
147
  # Pass in an HTML +str+, which is turned into Hpricot elements.
144
148
  def before(str = nil, &blk)
@@ -151,7 +155,7 @@ module Hpricot
151
155
  each { |x| x.parent.insert_after x.make(str, &blk), x }
152
156
  end
153
157
 
154
- # Wraps each element in the list inside the element created by HTML +str+.
158
+ # Wraps each element in the list inside the element created by HTML +str+.
155
159
  # If more than one element is found in the string, Hpricot locates the
156
160
  # deepest spot inside the first element.
157
161
  #
@@ -175,7 +179,7 @@ module Hpricot
175
179
  # Gets and sets attributes on all matched elements.
176
180
  #
177
181
  # Pass in a +key+ on its own and this method will return the string value
178
- # assigned to that attribute for the first elements. Or +nil+ if the
182
+ # assigned to that attribute for the first elements. Or +nil+ if the
179
183
  # attribute isn't found.
180
184
  #
181
185
  # doc.search("a").attr("href")
@@ -185,11 +189,11 @@ module Hpricot
185
189
  # matched elements.
186
190
  #
187
191
  # doc.search("p").attr("class", "basic")
188
- #
192
+ #
189
193
  # You may also use a Hash to set a series of attributes:
190
194
  #
191
195
  # (doc/"a").attr(:class => "basic", :href => "http://hackety.org/")
192
- #
196
+ #
193
197
  # Lastly, a block can be used to rewrite an attribute based on the element
194
198
  # it belongs to. The block will pass in an element. Return from the block
195
199
  # the new value of the attribute.
@@ -203,8 +207,8 @@ module Hpricot
203
207
  each do |el|
204
208
  el.set_attribute(key, value || blk[el])
205
209
  end
206
- return self
207
- end
210
+ return self
211
+ end
208
212
  if key.is_a? Hash
209
213
  key.each { |k,v| self.attr(k,v) }
210
214
  return self
@@ -213,7 +217,7 @@ module Hpricot
213
217
  end
214
218
  end
215
219
  alias_method :set, :attr
216
-
220
+
217
221
  # Adds the class to all matched elements.
218
222
  #
219
223
  # (doc/"p").add_class("bacon")
@@ -237,7 +241,7 @@ module Hpricot
237
241
  next unless el.respond_to? :remove_attribute
238
242
  el.remove_attribute(name)
239
243
  end
240
- self
244
+ self
241
245
  end
242
246
 
243
247
  # Removes a class from all matched elements.
@@ -247,7 +251,7 @@ module Hpricot
247
251
  # Or, to remove all classes:
248
252
  #
249
253
  # (doc/"span").remove_class
250
- #
254
+ #
251
255
  def remove_class name = nil
252
256
  each do |el|
253
257
  next unless el.respond_to? :get_attribute
@@ -258,10 +262,10 @@ module Hpricot
258
262
  el.remove_attribute("class")
259
263
  end
260
264
  end
261
- self
265
+ self
262
266
  end
263
267
 
264
- ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i
268
+ ATTR_RE = %r!\[ *(?:(@)([\w\(\)-]+)|([\w\(\)-]+\(\))) *([~\!\|\*$\^=]*) *'?"?([^'"]*)'?"? *\]!i # " (for emacs)
265
269
  BRACK_RE = %r!(\[) *([^\]]*) *\]+!i
266
270
  FUNC_RE = %r!(:)?([a-zA-Z0-9\*_-]*)\( *[\"']?([^ \)]*?)['\"]? *\)!
267
271
  CUST_RE = %r!(:)([a-zA-Z0-9\*_-]*)()!
@@ -301,7 +305,7 @@ module Hpricot
301
305
  end
302
306
  end
303
307
  args << -1
304
- nodes = Elements[*nodes.find_all do |x|
308
+ nodes = Elements[*nodes.find_all do |x|
305
309
  args[-1] += 1
306
310
  x.send(meth, *args) ? truth : !truth
307
311
  end]
@@ -419,7 +423,7 @@ module Hpricot
419
423
  end
420
424
 
421
425
  filter ':nth-child' do |arg,i|
422
- case arg
426
+ case arg
423
427
  when 'even'; (parent.containers.index(self) + 1) % 2 == 0
424
428
  when 'odd'; (parent.containers.index(self) + 1) % 2 == 1
425
429
  else self == (parent.containers[arg.to_i - 1])
@@ -429,7 +433,7 @@ module Hpricot
429
433
  filter ":last-child" do |i|
430
434
  self == parent.containers.last
431
435
  end
432
-
436
+
433
437
  filter ":nth-last-child" do |arg,i|
434
438
  self == parent.containers[-1-arg.to_i]
435
439
  end
@@ -451,13 +455,13 @@ module Hpricot
451
455
  end
452
456
 
453
457
  filter :empty do |*a|
454
- containers.length == 0
458
+ elem? && inner_html.length == 0
455
459
  end
456
460
 
457
461
  filter :root do |*a|
458
462
  self.is_a? Hpricot::Doc
459
463
  end
460
-
464
+
461
465
  filter 'text' do |*a|
462
466
  self.text?
463
467
  end
@@ -469,13 +473,13 @@ module Hpricot
469
473
  filter :contains do |arg, ignore|
470
474
  html.include? arg
471
475
  end
472
-
473
-
476
+
477
+
474
478
 
475
479
  pred_procs =
476
480
  {'text()' => proc { |ele, *_| ele.inner_text.strip },
477
481
  '@' => proc { |ele, attr, *_| ele.get_attribute(attr).to_s if ele.elem? }}
478
-
482
+
479
483
  oper_procs =
480
484
  {'=' => proc { |a,b| a == b },
481
485
  '!=' => proc { |a,b| a != b },
@@ -484,7 +484,7 @@ module Hpricot
484
484
  end
485
485
 
486
486
  ElementInclusions =
487
- {"head"=>["link", "meta", "object", "script", "style"], "body"=>["del", "ins"]}
487
+ {"head"=>["link", "meta", "object", "script", "style" , "noscript"], "body"=>["del", "ins"]}
488
488
  ElementInclusions.each do |k, v|
489
489
  v.each do |name|
490
490
  ElementContent[k][name.hash] = :allow
@@ -43,7 +43,7 @@ module Hpricot
43
43
  def initialize e
44
44
  @element = e
45
45
  end
46
- def [] k
46
+ def [] k
47
47
  Hpricot.uxs((@element.raw_attributes || {})[k])
48
48
  end
49
49
  def []= k, v
@@ -195,7 +195,7 @@ module Hpricot
195
195
  def pathname; "procins()" end
196
196
  def raw_string; output("") end
197
197
  def output(out, opts = {})
198
- out <<
198
+ out <<
199
199
  if_output(opts) do
200
200
  "<?#{target}" +
201
201
  (content ? " #{content}" : "") +
@@ -6,7 +6,7 @@ module Hpricot
6
6
  # Common sets of attributes.
7
7
  AttrCore = [:id, :class, :style, :title]
8
8
  AttrI18n = [:lang, 'xml:lang'.intern, :dir]
9
- AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove,
9
+ AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove,
10
10
  :onmouseout, :onkeypress, :onkeydown, :onkeyup]
11
11
  AttrFocus = [:accesskey, :tabindex, :onfocus, :onblur]
12
12
  AttrHAlign = [:align, :char, :charoff]
@@ -113,11 +113,11 @@ module Hpricot
113
113
  @tagset = XHTMLStrict.tagset.merge \
114
114
  :strike => Attrs,
115
115
  :center => Attrs,
116
- :dir => Attrs + [:compact],
116
+ :dir => Attrs + [:compact],
117
117
  :noframes => Attrs,
118
- :basefont => [:id, :size, :color, :face],
118
+ :basefont => [:id, :size, :color, :face],
119
119
  :u => Attrs,
120
- :menu => Attrs + [:compact],
120
+ :menu => Attrs + [:compact],
121
121
  :iframe => AttrCore + [:longdesc, :name, :src, :frameborder, :marginwidth, :marginheight, :scrolling, :align, :height, :width],
122
122
  :font => AttrCore + AttrI18n + [:size, :color, :face],
123
123
  :s => Attrs,
@@ -106,16 +106,16 @@ module Hpricot
106
106
  # Find all preceding nodes.
107
107
  def preceding
108
108
  sibs = parent.children
109
- si = sibs.index(self)
110
- return Elements[*sibs[0...si]]
111
- end
112
-
109
+ si = sibs.index(self)
110
+ return Elements[*sibs[0...si]]
111
+ end
112
+
113
113
  # Find all nodes which follow the current one.
114
114
  def following
115
- sibs = parent.children
116
- si = sibs.index(self) + 1
117
- return Elements[*sibs[si...sibs.length]]
118
- end
115
+ sibs = parent.children
116
+ si = sibs.index(self) + 1
117
+ return Elements[*sibs[si...sibs.length]]
118
+ end
119
119
 
120
120
  # Adds elements immediately after this element, contained in the +html+ string.
121
121
  def after(html = nil, &blk)
@@ -296,7 +296,7 @@ module Hpricot
296
296
  mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
297
297
  oop = false
298
298
  if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
299
- after = $'
299
+ after = $'
300
300
  m[2] += mt
301
301
  expr = after
302
302
  end
@@ -348,10 +348,10 @@ module Hpricot
348
348
  #
349
349
  # If _names_ are empty, it yields all elements.
350
350
  # If non-empty _names_ are given, it should be list of universal names.
351
- #
351
+ #
352
352
  # A nested element is yielded in depth first order as follows.
353
353
  #
354
- # t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
354
+ # t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
355
355
  # t.traverse_element("a", "c") {|e| p e}
356
356
  # # =>
357
357
  # {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
@@ -363,7 +363,7 @@ module Hpricot
363
363
  # t = Hpricot(<<'End')
364
364
  # <html>
365
365
  # <meta name="robots" content="index,nofollow">
366
- # <meta name="author" content="Who am I?">
366
+ # <meta name="author" content="Who am I?">
367
367
  # </html>
368
368
  # End
369
369
  # t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
@@ -424,19 +424,19 @@ module Hpricot
424
424
 
425
425
  # Find all preceding sibling elements. Like the other "sibling" methods, this weeds
426
426
  # out text and comment nodes.
427
- def preceding_siblings()
428
- sibs = parent.containers
429
- si = sibs.index(self)
430
- return Elements[*sibs[0...si]]
431
- end
432
-
427
+ def preceding_siblings()
428
+ sibs = parent.containers
429
+ si = sibs.index(self)
430
+ return Elements[*sibs[0...si]]
431
+ end
432
+
433
433
  # Find sibling elements which follow the current one. Like the other "sibling" methods, this weeds
434
434
  # out text and comment nodes.
435
- def following_siblings()
436
- sibs = parent.containers
437
- si = sibs.index(self) + 1
438
- return Elements[*sibs[si...sibs.length]]
439
- end
435
+ def following_siblings()
436
+ sibs = parent.containers
437
+ si = sibs.index(self) + 1
438
+ return Elements[*sibs[si...sibs.length]]
439
+ end
440
440
 
441
441
  # Puts together an array of neighboring sibling elements based on their proximity
442
442
  # to this element.
@@ -507,7 +507,7 @@ module Hpricot
507
507
  end
508
508
 
509
509
  # +find_element+ searches an element which universal name is specified by
510
- # the arguments.
510
+ # the arguments.
511
511
  # It returns nil if not found.
512
512
  def find_element(*names)
513
513
  traverse_element(*names) {|e| return e }
@@ -777,7 +777,7 @@ module Hpricot
777
777
  return author if !author.empty?
778
778
  rescue IndexError
779
779
  end
780
- }
780
+ }
781
781
 
782
782
  if channel = find_element('{http://purl.org/rss/1.0/}channel')
783
783
  channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
@@ -10,7 +10,7 @@ module Hpricot
10
10
 
11
11
  ####################################################################
12
12
  # XML Character converter, from Sam Ruby:
13
- # (see http://intertwingly.net/stories/2005/09/28/xchar.rb).
13
+ # (see http://intertwingly.net/stories/2005/09/28/xchar.rb).
14
14
  #
15
15
  module XChar # :nodoc:
16
16
 
@@ -59,7 +59,7 @@ module Hpricot
59
59
  # See http://www.w3.org/TR/REC-xml/#charsets for details.
60
60
  VALID = [
61
61
  0x9, 0xA, 0xD,
62
- (0x20..0xD7FF),
62
+ (0x20..0xD7FF),
63
63
  (0xE000..0xFFFD),
64
64
  (0x10000..0x10FFFF)
65
65
  ]
@@ -86,7 +86,7 @@ module Hpricot
86
86
  # XML unescape
87
87
  def uxs(str)
88
88
  str.to_s.
89
- gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || ??).chr }.
89
+ gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || 63).chr }. # 63 = ?? (query char)
90
90
  gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
91
91
  end
92
92
  end
@@ -9,7 +9,7 @@
9
9
  </head>
10
10
  <body id='body1'>
11
11
  <p>Sample XHTML for <a id="link1" href="http://code.whytheluckystiff.net/mouseHole/">MouseHole 2</a>.</p>
12
- <p class='ohmy'>Please filter <a id="link2" href="http://hobix.com/">me</a>!</p>
12
+ <p class='ohmy'>Please filter <span></span><a id="link2" href="http://hobix.com/">me</a>!</p>
13
13
  <p>The third paragraph</p>
14
14
  <p class="last final"><b>THE FINAL PARAGRAPH</b></p>
15
15
  </body>
@@ -354,6 +354,16 @@ class TestParser < Test::Unit::TestCase
354
354
  assert_equal "HAI", doc.at("body").inner_text
355
355
  end
356
356
 
357
+ # http://github.com/hpricot/hpricot/issues#issue/28
358
+ def test_invalid_inner_text
359
+ assert_equal "A", Hpricot('A&B;').inner_text[0...1]
360
+ end
361
+
362
+ # http://github.com/hpricot/hpricot/issues#issue/25
363
+ def test_encoding_compatibility_error
364
+ Hpricot("<p>\xC3\x9Cber</p><p>M&sup3;</p>").inner_text
365
+ end
366
+
357
367
  # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
358
368
  def test_self_closed_form
359
369
  doc = Hpricot(<<-edoc)
@@ -370,8 +380,8 @@ class TestParser < Test::Unit::TestCase
370
380
  @basic = Hpricot.parse(TestFiles::BASIC)
371
381
  assert_equal 0, (@basic/"title:parent").size
372
382
  assert_equal 3, (@basic/"p:parent").size
373
- assert_equal 1, (@basic/"title:empty").size
374
- assert_equal 1, (@basic/"p:empty").size
383
+ assert_equal 3, (@basic/"link:empty").size
384
+ assert_equal 1, (@basic/"span:empty").size
375
385
  end
376
386
 
377
387
  def test_keep_cdata
@@ -425,4 +435,23 @@ class TestParser < Test::Unit::TestCase
425
435
  assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
426
436
  assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
427
437
  end
438
+
439
+ def test_parsing_html_with_noscript
440
+ doc = Hpricot(<<-edoc)
441
+ <html>
442
+ <head>
443
+ <noscript>
444
+ <meta http-equiv="refresh" content="0; url=http://www.yoursite.com/noscripts.html"/>
445
+ </noscript>
446
+ <meta name="verification" content="7ff5e90iormq5niy6x98j75" />
447
+ </head>
448
+ <body>
449
+ <h1>Testing</h1>
450
+ </body>
451
+ </html>
452
+
453
+ edoc
454
+ assert_equal "7ff5e90iormq5niy6x98j75", doc.at("/html/head/meta[@name='verification']")['content']
455
+ end
456
+
428
457
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hpricot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ hash: 57
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 8
9
+ - 3
10
+ version: 0.8.3
5
11
  platform: ruby
6
12
  authors:
7
13
  - why the lucky stiff
@@ -9,7 +15,7 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2009-11-05 00:00:00 -06:00
18
+ date: 2010-11-03 00:00:00 -05:00
13
19
  default_executable:
14
20
  dependencies: []
15
21
 
@@ -21,13 +27,13 @@ extensions:
21
27
  - ext/fast_xs/extconf.rb
22
28
  - ext/hpricot_scan/extconf.rb
23
29
  extra_rdoc_files:
24
- - README
30
+ - README.md
25
31
  - CHANGELOG
26
32
  - COPYING
27
33
  files:
28
34
  - CHANGELOG
29
35
  - COPYING
30
- - README
36
+ - README.md
31
37
  - Rakefile
32
38
  - test/files/basic.xhtml
33
39
  - test/files/boingboing.html
@@ -47,6 +53,7 @@ files:
47
53
  - test/test_paths.rb
48
54
  - test/test_preserved.rb
49
55
  - test/test_xml.rb
56
+ - extras/hpricot.png
50
57
  - lib/hpricot/blankslate.rb
51
58
  - lib/hpricot/builder.rb
52
59
  - lib/hpricot/elements.rb
@@ -59,7 +66,6 @@ files:
59
66
  - lib/hpricot/traverse.rb
60
67
  - lib/hpricot/xchar.rb
61
68
  - lib/hpricot.rb
62
- - extras/mingw-rbconfig.rb
63
69
  - ext/hpricot_scan/hpricot_scan.h
64
70
  - ext/fast_xs/FastXsService.java
65
71
  - ext/hpricot_scan/HpricotCss.java
@@ -84,26 +90,32 @@ rdoc_options:
84
90
  - --title
85
91
  - The Hpricot Reference
86
92
  - --main
87
- - README
93
+ - README.md
88
94
  - --inline-source
89
95
  require_paths:
90
96
  - lib
91
97
  required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
92
99
  requirements:
93
100
  - - ">="
94
101
  - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
95
105
  version: "0"
96
- version:
97
106
  required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
98
108
  requirements:
99
109
  - - ">="
100
110
  - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
101
114
  version: "0"
102
- version:
103
115
  requirements: []
104
116
 
105
117
  rubyforge_project: hobix
106
- rubygems_version: 1.3.5
118
+ rubygems_version: 1.3.7
107
119
  signing_key:
108
120
  specification_version: 3
109
121
  summary: a swift, liberal HTML parser with a fantastic library