hpricot 0.6.164 → 0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,66 +2,55 @@ module Hpricot
2
2
  # :stopdoc:
3
3
 
4
4
  class Doc
5
- attr_accessor :children
6
- def initialize(children = [], options = {})
7
- @children = children ? children.each { |c| c.parent = self } : []
8
- @options = options
9
- end
10
5
  def output(out, opts = {})
11
- @children.each do |n|
6
+ children.each do |n|
12
7
  n.output(out, opts)
13
- end
8
+ end if children
14
9
  out
15
10
  end
16
11
  def make(input = nil, &blk)
17
- Hpricot.make(input, @options, &blk)
12
+ Hpricot.make(input, @options, &blk).children
18
13
  end
19
14
  def altered!; end
15
+ def inspect_tree
16
+ children.map { |x| x.inspect_tree }.join if children
17
+ end
20
18
  end
21
19
 
22
20
  class BaseEle
23
- attr_accessor :raw_string, :parent
24
21
  def html_quote(str)
25
22
  "\"" + str.gsub('"', '\\"') + "\""
26
23
  end
27
24
  def if_output(opts)
28
- if opts[:preserve] and not @raw_string.nil?
29
- @raw_string
25
+ if opts[:preserve] and not raw_string.nil?
26
+ raw_string
30
27
  else
31
28
  yield opts
32
29
  end
33
30
  end
34
31
  def pathname; self.name end
35
32
  def altered!
36
- @raw_string = nil
37
- end
38
- def self.alterable(*fields)
39
- attr_accessor(*fields)
40
- fields.each do |f|
41
- define_method("#{f}=") do |v|
42
- altered!
43
- instance_variable_set("@#{f}", v)
44
- end
45
- end
33
+ clear_raw
34
+ end
35
+ def inspect_tree(depth = 0)
36
+ %{#{" " * depth}} + self.class.name.split(/::/).last.downcase + "\n"
46
37
  end
47
38
  end
48
39
 
49
40
  class Elem
50
- attr_accessor :stag, :etag, :children
51
- def initialize(stag, children=nil, etag=nil)
52
- @stag, @etag = stag, etag
53
- @children = children ? children.each { |c| c.parent = self } : []
54
- end
55
- def empty?; @children.empty? end
56
- [:name, :raw_attributes, :parent, :altered!].each do |m|
57
- [m, "#{m}="].each { |m2| define_method(m2) { |*a| [@etag, @stag].inject { |_,t| t.send(m2, *a) if t and t.respond_to?(m2) } } }
41
+ def initialize tag, attrs = nil, children = nil, etag = nil
42
+ self.name, self.raw_attributes, self.children, self.etag =
43
+ tag, attrs, children, etag
58
44
  end
45
+ def empty?; children.nil? or children.empty? end
59
46
  def attributes
60
47
  if raw_attributes
61
48
  raw_attributes.inject({}) do |hsh, (k, v)|
62
49
  hsh[k] = Hpricot.uxs(v)
63
50
  hsh
64
51
  end
52
+ else
53
+ {}
65
54
  end
66
55
  end
67
56
  def to_plain_text
@@ -79,151 +68,128 @@ module Hpricot
79
68
  end
80
69
  def pathname; self.name end
81
70
  def output(out, opts = {})
82
- if empty? and ElementContent[@stag.name] == :EMPTY
83
- @stag.output(out, opts.merge(:style => :empty))
84
- else
85
- @stag.output(out, opts)
86
- @children.each { |n| n.output(out, opts) }
87
- if @etag
88
- @etag.output(out, opts)
89
- elsif !opts[:preserve]
90
- ETag.new(@stag.name).output(out, opts)
71
+ out <<
72
+ if_output(opts) do
73
+ "<#{name}#{attributes_as_html}" +
74
+ ((empty? and not etag) ? " /" : "") +
75
+ ">"
91
76
  end
77
+ if children
78
+ children.each { |n| n.output(out, opts) }
79
+ end
80
+ if etag
81
+ etag.output(out, opts)
82
+ elsif !opts[:preserve] && !empty?
83
+ out <<
84
+ if_output(opts) do
85
+ "</#{name}>"
86
+ end
92
87
  end
93
88
  out
94
89
  end
95
- end
96
-
97
- class STag < BaseEle
98
- def initialize(name, attributes=nil)
99
- @name = name.to_s
100
- @raw_attributes = attributes || {}
101
- end
102
- alterable :name, :raw_attributes
103
90
  def attributes_as_html
104
- if @raw_attributes
105
- @raw_attributes.map do |aname, aval|
91
+ if raw_attributes
92
+ raw_attributes.map do |aname, aval|
106
93
  " #{aname}" +
107
94
  (aval ? "=#{html_quote aval}" : "")
108
95
  end.join
109
96
  end
110
97
  end
111
- def output(out, opts = {})
112
- out <<
113
- if_output(opts) do
114
- "<#{@name}#{attributes_as_html}" +
115
- (opts[:style] == :empty ? " /" : "") +
116
- ">"
117
- end
98
+ def inspect_tree(depth = 0)
99
+ %{#{" " * depth}} + name + "\n" +
100
+ (children ? children.map { |x| x.inspect_tree(depth + 1) }.join : "")
118
101
  end
119
102
  end
120
103
 
121
- class ETag < BaseEle
122
- def initialize(qualified_name)
123
- @name = qualified_name.to_s
124
- end
125
- alterable :name
104
+ class ETag
105
+ def initialize name; self.name = name end
126
106
  def output(out, opts = {})
127
107
  out <<
128
108
  if_output(opts) do
129
- "</#{@name}>"
109
+ "</#{name}>"
130
110
  end
131
111
  end
132
112
  end
133
113
 
134
- class BogusETag < ETag
114
+ class BogusETag
135
115
  def output(out, opts = {}); out << if_output(opts) { '' }; end
136
116
  end
137
117
 
138
- class Text < BaseEle
139
- def initialize(text)
140
- @content = text
141
- end
142
- alterable :content
118
+ class Text
119
+ def initialize content; self.content = content end
143
120
  def pathname; "text()" end
144
121
  def to_s
145
- Hpricot.uxs(@content)
122
+ Hpricot.uxs(content)
146
123
  end
147
124
  alias_method :inner_text, :to_s
148
125
  alias_method :to_plain_text, :to_s
126
+ def << str; self.content << str end
149
127
  def output(out, opts = {})
150
128
  out <<
151
129
  if_output(opts) do
152
- @content
130
+ content.to_s
153
131
  end
154
132
  end
155
133
  end
156
134
 
157
- class CData < Text
135
+ class CData
136
+ def initialize content; self.content = content end
158
137
  alias_method :to_s, :content
159
138
  alias_method :to_plain_text, :content
160
139
  def output(out, opts = {})
161
140
  out <<
162
141
  if_output(opts) do
163
- "<![CDATA[#@content]]>"
142
+ "<![CDATA[#{content}]]>"
164
143
  end
165
144
  end
166
145
  end
167
146
 
168
- class XMLDecl < BaseEle
169
- def initialize(version, encoding, standalone)
170
- @version, @encoding, @standalone = version, encoding, standalone
171
- end
172
- alterable :version, :encoding, :standalone
147
+ class XMLDecl
173
148
  def pathname; "xmldecl()" end
174
149
  def output(out, opts = {})
175
150
  out <<
176
151
  if_output(opts) do
177
- "<?xml version=\"#{@version}\"" +
178
- (@encoding ? " encoding=\"#{encoding}\"" : "") +
179
- (@standalone != nil ? " standalone=\"#{standalone ? 'yes' : 'no'}\"" : "") +
152
+ "<?xml version=\"#{version}\"" +
153
+ (encoding ? " encoding=\"#{encoding}\"" : "") +
154
+ (standalone != nil ? " standalone=\"#{standalone ? 'yes' : 'no'}\"" : "") +
180
155
  "?>"
181
156
  end
182
157
  end
183
158
  end
184
159
 
185
- class DocType < BaseEle
186
- def initialize(target, pubid, sysid)
187
- @target, @public_id, @system_id = target, pubid, sysid
160
+ class DocType
161
+ def initialize target, pub, sys
162
+ self.target, self.public_id, self.system_id = target, pub, sys
188
163
  end
189
- alterable :target, :public_id, :system_id
190
164
  def pathname; "doctype()" end
191
165
  def output(out, opts = {})
192
166
  out <<
193
167
  if_output(opts) do
194
- "<!DOCTYPE #{@target} " +
195
- (@public_id ? "PUBLIC \"#{@public_id}\"" : "SYSTEM") +
196
- (@system_id ? " #{html_quote(@system_id)}" : "") + ">"
168
+ "<!DOCTYPE #{target} " +
169
+ (public_id ? "PUBLIC \"#{public_id}\"" : "SYSTEM") +
170
+ (system_id ? " #{html_quote(system_id)}" : "") + ">"
197
171
  end
198
172
  end
199
173
  end
200
174
 
201
- class ProcIns < BaseEle
202
- def initialize(target, content)
203
- @target, @content = target, content
204
- end
175
+ class ProcIns
205
176
  def pathname; "procins()" end
206
- alterable :target, :content
207
177
  def output(out, opts = {})
208
178
  out <<
209
179
  if_output(opts) do
210
- "<?#{@target}" +
211
- (@content ? " #{@content}" : "") +
180
+ "<?#{target}" +
181
+ (content ? " #{content}" : "") +
212
182
  "?>"
213
183
  end
214
184
  end
215
185
  end
216
186
 
217
- class Comment < BaseEle
218
- def initialize(content)
219
- @content = content
220
- end
187
+ class Comment
221
188
  def pathname; "comment()" end
222
- alterable :content
223
189
  def output(out, opts = {})
224
190
  out <<
225
191
  if_output(opts) do
226
- "<!--#{@content}-->"
192
+ "<!--#{content}-->"
227
193
  end
228
194
  end
229
195
  end
@@ -26,7 +26,7 @@ module Hpricot
26
26
  if parent and parent.respond_to? :make
27
27
  parent.make(input, &blk)
28
28
  else
29
- Hpricot.make(input, &blk)
29
+ Hpricot.make(input, &blk).children
30
30
  end
31
31
  end
32
32
 
@@ -51,7 +51,7 @@ module Hpricot
51
51
  return i if (x.respond_to?(:name) and name == x.name) or
52
52
  (x.text? and name == "text()")
53
53
  i += 1
54
- end
54
+ end if children
55
55
  -1
56
56
  end
57
57
 
@@ -146,16 +146,20 @@ module Hpricot
146
146
  # Builds a string from the text contained in this node. All
147
147
  # HTML elements are removed.
148
148
  def to_plain_text
149
- if respond_to? :children
149
+ if respond_to?(:children) and children
150
150
  children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n")
151
+ else
152
+ ""
151
153
  end
152
154
  end
153
155
 
154
156
  # Builds a string from the text contained in this node. All
155
157
  # HTML elements are removed.
156
158
  def inner_text
157
- if respond_to? :children
159
+ if respond_to?(:children) and children
158
160
  children.map { |x| x.inner_text }.join
161
+ else
162
+ ""
159
163
  end
160
164
  end
161
165
  alias_method :innerText, :inner_text
@@ -172,8 +176,10 @@ module Hpricot
172
176
  end
173
177
  reparent self.children
174
178
  else
175
- if respond_to? :children
179
+ if respond_to?(:children) and children
176
180
  children.map { |x| x.output("") }.join
181
+ else
182
+ ""
177
183
  end
178
184
  end
179
185
  end
@@ -207,7 +213,7 @@ module Hpricot
207
213
  parent.children.each do |e|
208
214
  id = sim if e == self
209
215
  sim += 1 if e.pathname == self.pathname
210
- end
216
+ end if parent.children
211
217
  p = File.join(parent.xpath, self.pathname)
212
218
  p += "[#{id+1}]" if sim >= 2
213
219
  p
@@ -224,7 +230,7 @@ module Hpricot
224
230
  parent.children.each do |e|
225
231
  id = sim if e == self
226
232
  sim += 1 if e.pathname == self.pathname
227
- end
233
+ end if parent.children
228
234
  p = parent.css_path
229
235
  p = p ? "#{p} > #{self.pathname}" : self.pathname
230
236
  p += ":nth(#{id})" if sim >= 2
@@ -489,13 +495,13 @@ module Hpricot
489
495
 
490
496
  # +each_child+ iterates over each child.
491
497
  def each_child(&block) # :yields: child_node
492
- children.each(&block)
498
+ children.each(&block) if children
493
499
  nil
494
500
  end
495
501
 
496
502
  # +each_child_with_index+ iterates over each child.
497
503
  def each_child_with_index(&block) # :yields: child_node, index
498
- children.each_with_index(&block)
504
+ children.each_with_index(&block) if children
499
505
  nil
500
506
  end
501
507
 
@@ -626,7 +632,7 @@ module Hpricot
626
632
  # :stopdoc:
627
633
  module Doc::Trav
628
634
  def traverse_all_element(&block)
629
- children.each {|c| c.traverse_all_element(&block) }
635
+ children.each {|c| c.traverse_all_element(&block) } if children
630
636
  end
631
637
  def xpath
632
638
  "/"
@@ -639,7 +645,7 @@ module Hpricot
639
645
  module Elem::Trav
640
646
  def traverse_all_element(&block)
641
647
  yield self
642
- children.each {|c| c.traverse_all_element(&block) }
648
+ children.each {|c| c.traverse_all_element(&block) } if children
643
649
  end
644
650
  end
645
651
 
@@ -651,14 +657,14 @@ module Hpricot
651
657
 
652
658
  module Doc::Trav
653
659
  def traverse_some_element(name_set, &block)
654
- children.each {|c| c.traverse_some_element(name_set, &block) }
660
+ children.each {|c| c.traverse_some_element(name_set, &block) } if children
655
661
  end
656
662
  end
657
663
 
658
664
  module Elem::Trav
659
665
  def traverse_some_element(name_set, &block)
660
666
  yield self if name_set.include? self.name
661
- children.each {|c| c.traverse_some_element(name_set, &block) }
667
+ children.each {|c| c.traverse_some_element(name_set, &block) } if children
662
668
  end
663
669
  end
664
670
 
@@ -797,7 +803,7 @@ module Hpricot
797
803
  module Doc::Trav
798
804
  def root
799
805
  es = []
800
- children.each {|c| es << c if c.elem? }
806
+ children.each {|c| es << c if c.elem? } if children
801
807
  raise Hpricot::Error, "no element" if es.empty?
802
808
  raise Hpricot::Error, "multiple top elements" if 1 < es.length
803
809
  es[0]
@@ -2263,4 +2263,4 @@ Why are so many drawings from earlier centuries so deliciously weird? Here are a
2263
2263
  -->
2264
2264
  </div>
2265
2265
  </body>
2266
- </html>
2266
+ </html>
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end