hpricot 0.5-mswin32 → 0.6-mswin32

Sign up to get free protection for your applications and to get access to all the features.
@@ -60,10 +60,14 @@ module Hpricot
60
60
  q.group(1, '<', '>') {
61
61
  q.text @name
62
62
 
63
- if @attributes
64
- @attributes.each {|n, t|
63
+ if @raw_attributes
64
+ @raw_attributes.each {|n, t|
65
65
  q.breakable
66
- q.text "#{n}=\"#{t}\""
66
+ if t
67
+ q.text "#{n}=\"#{Hpricot.uxs(t)}\""
68
+ else
69
+ q.text n
70
+ end
67
71
  }
68
72
  end
69
73
  }
@@ -1,7 +1,7 @@
1
1
  require 'hpricot/htmlinfo'
2
2
 
3
- def Hpricot(input, opts = {})
4
- Hpricot.parse(input, opts)
3
+ def Hpricot(input = nil, opts = {}, &blk)
4
+ Hpricot.parse(input, opts, &blk)
5
5
  end
6
6
 
7
7
  module Hpricot
@@ -11,8 +11,8 @@ module Hpricot
11
11
 
12
12
  # Hpricot.parse parses <i>input</i> and return a document tree.
13
13
  # represented by Hpricot::Doc.
14
- def Hpricot.parse(input, opts = {})
15
- Doc.new(make(input, opts))
14
+ def Hpricot.parse(input = nil, opts = {}, &blk)
15
+ Doc.new(make(input, opts, &blk))
16
16
  end
17
17
 
18
18
  # Hpricot::XML parses <i>input</i>, disregarding all the HTML rules
@@ -23,125 +23,152 @@ module Hpricot
23
23
 
24
24
  # :stopdoc:
25
25
 
26
- def Hpricot.make(input, opts = {})
26
+ def Hpricot.make(input = nil, opts = {}, &blk)
27
27
  opts = {:fixup_tags => false}.merge(opts)
28
-
29
- case opts[:encoding]
30
- when nil
31
- when 'utf-8'
32
- unless defined? Encoding::Character::UTF8
33
- raise EncodingError, "The ruby-character-encodings library could not be found for utf-8 mode."
34
- end
35
- else
36
- raise EncodingError, "No encoding option `#{opts[:encoding]}' is available."
28
+ unless input or blk
29
+ raise ArgumentError, "An Hpricot document must be built from an input source (a String) or a block."
37
30
  end
38
31
 
39
- if opts[:xhtml_strict]
40
- opts[:fixup_tags] = true
41
- end
32
+ conv = opts[:xml] ? :to_s : :downcase
42
33
 
43
- stack = [[nil, nil, [], [], [], []]]
44
- Hpricot.scan(input) do |token|
45
- if stack.last[5] == :CDATA and ![:procins, :comment, :cdata].include?(token[0]) and
46
- !(token[0] == :etag and token[1].downcase == stack.last[0])
47
- token[0] = :text
48
- token[1] = token[3] if token[3]
34
+ fragment =
35
+ if input
36
+ case opts[:encoding]
37
+ when nil
38
+ when 'utf-8'
39
+ unless defined? Encoding::Character::UTF8
40
+ raise EncodingError, "The ruby-character-encodings library could not be found for utf-8 mode."
41
+ end
42
+ else
43
+ raise EncodingError, "No encoding option `#{opts[:encoding]}' is available."
44
+ end
45
+
46
+ if opts[:xhtml_strict]
47
+ opts[:fixup_tags] = true
49
48
  end
50
49
 
51
- case token[0]
52
- when :stag
53
- case opts[:encoding] when 'utf-8'
54
- token.map! { |str| u(str) if str.is_a? String }
50
+ stack = [[nil, nil, [], [], [], []]]
51
+ Hpricot.scan(input) do |token|
52
+ if stack.last[5] == :CDATA and ![:procins, :comment, :cdata].include?(token[0]) and
53
+ !(token[0] == :etag and token[1].casecmp(stack.last[0]).zero?)
54
+ token[0] = :text
55
+ token[1] = token[3] if token[3]
55
56
  end
56
57
 
57
- stagname = token[0] = token[1].downcase
58
- if ElementContent[stagname] == :EMPTY and !opts[:xml]
59
- token[0] = :emptytag
60
- stack.last[2] << token
61
- else
62
- unless opts[:xml]
63
- if opts[:fixup_tags]
64
- # obey the tag rules set up by the current element
65
- if ElementContent.has_key? stagname
66
- trans = nil
67
- (stack.length-1).downto(0) do |i|
68
- untags = stack[i][5]
69
- break unless untags.include? stagname
70
- # puts "** ILLEGAL #{stagname} IN #{stack[i][0]}"
71
- trans = i
58
+ if !opts[:xml] and token[0] == :emptytag
59
+ token[1] = token[1].send(conv)
60
+ if ElementContent[token[1].downcase] != :EMPTY
61
+ token[0] = :stag
62
+ end
63
+ end
64
+
65
+ # TODO: downcase instead when parsing attributes?
66
+ if !opts[:xml] and token[2].is_a?(Hash)
67
+ token[2] = token[2].inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
68
+ end
69
+
70
+ case token[0]
71
+ when :stag
72
+ case opts[:encoding] when 'utf-8'
73
+ token.map! { |str| u(str) if str.is_a? String }
74
+ end
75
+
76
+ stagname = token[0] = token[1] = token[1].send(conv)
77
+ if ElementContent[stagname] == :EMPTY and !opts[:xml]
78
+ token[0] = :emptytag
79
+ stack.last[2] << token
80
+ else
81
+ unless opts[:xml]
82
+ if opts[:fixup_tags]
83
+ # obey the tag rules set up by the current element
84
+ if ElementContent.has_key? stagname
85
+ trans = nil
86
+ (stack.length-1).downto(0) do |i|
87
+ untags = stack[i][5]
88
+ break unless untags.include? stagname
89
+ # puts "** ILLEGAL #{stagname} IN #{stack[i][0]}"
90
+ trans = i
91
+ end
92
+ if trans.to_i > 1
93
+ eles = stack.slice!(trans..-1)
94
+ stack.last[2] += eles
95
+ # puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}"
96
+ end
97
+ elsif opts[:xhtml_strict]
98
+ token[2] = {'class' => stagname}
99
+ stagname = token[0] = "div"
72
100
  end
73
- if trans.to_i > 1
74
- eles = stack.slice!(trans..-1)
75
- stack.last[2] += eles
76
- # puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}"
101
+ end
102
+
103
+ # setup tag rules for inside this element
104
+ if ElementContent[stagname] == :CDATA
105
+ uncontainable_tags = :CDATA
106
+ elsif opts[:fixup_tags]
107
+ possible_tags = ElementContent[stagname]
108
+ excluded_tags, included_tags = stack.last[3..4]
109
+ if possible_tags
110
+ excluded_tags = excluded_tags | (ElementExclusions[stagname] || [])
111
+ included_tags = included_tags | (ElementInclusions[stagname] || [])
112
+ containable_tags = (possible_tags | included_tags) - excluded_tags
113
+ uncontainable_tags = ElementContent.keys - containable_tags
114
+ else
115
+ # If the tagname is unknown, it is assumed that any element
116
+ # except excluded can be contained.
117
+ uncontainable_tags = excluded_tags
77
118
  end
78
- elsif opts[:xhtml_strict]
79
- token[2] = {'class' => stagname}
80
- stagname = token[0] = "div"
81
119
  end
82
120
  end
83
-
84
- # setup tag rules for inside this element
85
- if ElementContent[stagname] == :CDATA
86
- uncontainable_tags = :CDATA
87
- elsif opts[:fixup_tags]
88
- possible_tags = ElementContent[stagname]
89
- excluded_tags, included_tags = stack.last[3..4]
90
- if possible_tags
91
- excluded_tags = excluded_tags | (ElementExclusions[stagname] || [])
92
- included_tags = included_tags | (ElementInclusions[stagname] || [])
93
- containable_tags = (possible_tags | included_tags) - excluded_tags
94
- uncontainable_tags = ElementContent.keys - containable_tags
95
- else
96
- # If the tagname is unknown, it is assumed that any element
97
- # except excluded can be contained.
98
- uncontainable_tags = excluded_tags
121
+ unless opts[:xml]
122
+ case token[2] when Hash
123
+ token[2] = token[2].inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
99
124
  end
100
125
  end
126
+ stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags]
101
127
  end
102
- stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags]
103
- end
104
- when :etag
105
- etagname = token[0] = token[1].downcase
106
- if opts[:xhtml_strict] and not ElementContent.has_key? etagname
107
- etagname = token[0] = "div"
108
- end
109
- matched_elem = nil
110
- (stack.length-1).downto(0) do |i|
111
- stagname, = stack[i]
112
- if stagname == etagname
113
- matched_elem = stack[i]
114
- stack[i][1] += token
115
- eles = stack.slice!((i+1)..-1)
116
- stack.last[2] += eles
117
- break
128
+ when :etag
129
+ etagname = token[0] = token[1].send(conv)
130
+ if opts[:xhtml_strict] and not ElementContent.has_key? etagname
131
+ etagname = token[0] = "div"
132
+ end
133
+ matched_elem = nil
134
+ (stack.length-1).downto(0) do |i|
135
+ stagname, = stack[i]
136
+ if stagname == etagname
137
+ matched_elem = stack[i]
138
+ stack[i][1] += token
139
+ eles = stack.slice!((i+1)..-1)
140
+ stack.last[2] += eles
141
+ break
142
+ end
143
+ end
144
+ unless matched_elem
145
+ stack.last[2] << [:bogus_etag, token.first, token.last]
146
+ else
147
+ ele = stack.pop
148
+ stack.last[2] << ele
149
+ end
150
+ when :text
151
+ l = stack.last[2].last
152
+ if l and l[0] == :text
153
+ l[1] += token[1]
154
+ else
155
+ stack.last[2] << token
118
156
  end
119
- end
120
- unless matched_elem
121
- stack.last[2] << [:bogus_etag, token.first, token.last]
122
- else
123
- ele = stack.pop
124
- stack.last[2] << ele
125
- end
126
- when :text
127
- l = stack.last[2].last
128
- if l and l[0] == :text
129
- l[1] += token[1]
130
157
  else
131
158
  stack.last[2] << token
132
159
  end
133
- else
134
- stack.last[2] << token
135
160
  end
136
- end
137
161
 
138
- while 1 < stack.length
139
- ele = stack.pop
140
- stack.last[2] << ele
141
- end
162
+ while 1 < stack.length
163
+ ele = stack.pop
164
+ stack.last[2] << ele
165
+ end
142
166
 
143
- structure_list = stack[0][2]
144
- structure_list.map {|s| build_node(s, opts) }
167
+ structure_list = stack[0][2]
168
+ structure_list.map {|s| build_node(s, opts) }
169
+ elsif blk
170
+ Hpricot.build(&blk).children
171
+ end
145
172
  end
146
173
 
147
174
  def Hpricot.build_node(structure, opts = {})
@@ -3,7 +3,7 @@ module Hpricot
3
3
 
4
4
  class Doc
5
5
  attr_accessor :children
6
- def initialize(children)
6
+ def initialize(children = [])
7
7
  @children = children ? children.each { |c| c.parent = self } : []
8
8
  end
9
9
  def output(out, opts = {})
@@ -49,9 +49,17 @@ module Hpricot
49
49
  @children = children ? children.each { |c| c.parent = self } : []
50
50
  end
51
51
  def empty?; @children.empty? end
52
- [:name, :attributes, :parent, :altered!].each do |m|
52
+ [:name, :raw_attributes, :parent, :altered!].each do |m|
53
53
  [m, "#{m}="].each { |m2| define_method(m2) { |*a| [@etag, @stag].inject { |_,t| t.send(m2, *a) if t and t.respond_to?(m2) } } }
54
54
  end
55
+ def attributes
56
+ if raw_attributes
57
+ raw_attributes.inject({}) do |hsh, (k, v)|
58
+ hsh[k] = Hpricot.uxs(v)
59
+ hsh
60
+ end
61
+ end
62
+ end
55
63
  def to_plain_text
56
64
  if self.name == 'br'
57
65
  "\n"
@@ -84,18 +92,15 @@ module Hpricot
84
92
 
85
93
  class STag < BaseEle
86
94
  def initialize(name, attributes=nil)
87
- @name = name.downcase
88
- @attributes = {}
89
- if attributes
90
- @attributes = attributes.inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
91
- end
95
+ @name = name.to_s
96
+ @raw_attributes = attributes || {}
92
97
  end
93
- alterable :name, :attributes
98
+ alterable :name, :raw_attributes
94
99
  def attributes_as_html
95
- if @attributes
96
- @attributes.map do |aname, aval|
100
+ if @raw_attributes
101
+ @raw_attributes.map do |aname, aval|
97
102
  " #{aname}" +
98
- (aval ? "=#{html_quote(aval)}" : "")
103
+ (aval ? "=\"#{aval}\"" : "")
99
104
  end.join
100
105
  end
101
106
  end
@@ -111,7 +116,7 @@ module Hpricot
111
116
 
112
117
  class ETag < BaseEle
113
118
  def initialize(qualified_name)
114
- @name = qualified_name
119
+ @name = qualified_name.to_s
115
120
  end
116
121
  alterable :name
117
122
  def output(out, opts = {})
@@ -132,8 +137,11 @@ module Hpricot
132
137
  end
133
138
  alterable :content
134
139
  def pathname; "text()" end
135
- alias_method :inner_text, :content
136
- alias_method :to_plain_text, :content
140
+ def to_s
141
+ Hpricot.uxs(@content)
142
+ end
143
+ alias_method :inner_text, :to_s
144
+ alias_method :to_plain_text, :to_s
137
145
  def output(out, opts = {})
138
146
  out <<
139
147
  if_output(opts) do
@@ -143,7 +151,7 @@ module Hpricot
143
151
  end
144
152
 
145
153
  class CData < Text
146
- alias_method :inner_text, :content
154
+ alias_method :to_s, :content
147
155
  alias_method :to_plain_text, :content
148
156
  def output(out, opts = {})
149
157
  out <<
@@ -0,0 +1,164 @@
1
+ module Hpricot
2
+
3
+ FORM_TAGS = [ :form, :input, :select, :textarea ]
4
+ SELF_CLOSING_TAGS = [ :base, :meta, :link, :hr, :br, :param, :img, :area, :input, :col ]
5
+
6
+ # Common sets of attributes.
7
+ AttrCore = [:id, :class, :style, :title]
8
+ AttrI18n = [:lang, 'xml:lang'.intern, :dir]
9
+ AttrEvents = [:onclick, :ondblclick, :onmousedown, :onmouseup, :onmouseover, :onmousemove,
10
+ :onmouseout, :onkeypress, :onkeydown, :onkeyup]
11
+ AttrFocus = [:accesskey, :tabindex, :onfocus, :onblur]
12
+ AttrHAlign = [:align, :char, :charoff]
13
+ AttrVAlign = [:valign]
14
+ Attrs = AttrCore + AttrI18n + AttrEvents
15
+
16
+ # All the tags and attributes from XHTML 1.0 Strict
17
+ class XHTMLStrict
18
+ class << self
19
+ attr_accessor :tags, :tagset, :forms, :self_closing, :doctype
20
+ end
21
+ @doctype = ["-//W3C//DTD XHTML 1.0 Strict//EN", "DTD/xhtml1-strict.dtd"]
22
+ @tagset = {
23
+ :html => AttrI18n + [:id, :xmlns],
24
+ :head => AttrI18n + [:id, :profile],
25
+ :title => AttrI18n + [:id],
26
+ :base => [:href, :id],
27
+ :meta => AttrI18n + [:id, :http, :name, :content, :scheme, 'http-equiv'.intern],
28
+ :link => Attrs + [:charset, :href, :hreflang, :type, :rel, :rev, :media],
29
+ :style => AttrI18n + [:id, :type, :media, :title, 'xml:space'.intern],
30
+ :script => [:id, :charset, :type, :src, :defer, 'xml:space'.intern],
31
+ :noscript => Attrs,
32
+ :body => Attrs + [:onload, :onunload],
33
+ :div => Attrs,
34
+ :p => Attrs,
35
+ :ul => Attrs,
36
+ :ol => Attrs,
37
+ :li => Attrs,
38
+ :dl => Attrs,
39
+ :dt => Attrs,
40
+ :dd => Attrs,
41
+ :address => Attrs,
42
+ :hr => Attrs,
43
+ :pre => Attrs + ['xml:space'.intern],
44
+ :blockquote => Attrs + [:cite],
45
+ :ins => Attrs + [:cite, :datetime],
46
+ :del => Attrs + [:cite, :datetime],
47
+ :a => Attrs + AttrFocus + [:charset, :type, :name, :href, :hreflang, :rel, :rev, :shape, :coords],
48
+ :span => Attrs,
49
+ :bdo => AttrCore + AttrEvents + [:lang, 'xml:lang'.intern, :dir],
50
+ :br => AttrCore,
51
+ :em => Attrs,
52
+ :strong => Attrs,
53
+ :dfn => Attrs,
54
+ :code => Attrs,
55
+ :samp => Attrs,
56
+ :kbd => Attrs,
57
+ :var => Attrs,
58
+ :cite => Attrs,
59
+ :abbr => Attrs,
60
+ :acronym => Attrs,
61
+ :q => Attrs + [:cite],
62
+ :sub => Attrs,
63
+ :sup => Attrs,
64
+ :tt => Attrs,
65
+ :i => Attrs,
66
+ :b => Attrs,
67
+ :big => Attrs,
68
+ :small => Attrs,
69
+ :object => Attrs + [:declare, :classid, :codebase, :data, :type, :codetype, :archive, :standby, :height, :width, :usemap, :name, :tabindex],
70
+ :param => [:id, :name, :value, :valuetype, :type],
71
+ :img => Attrs + [:src, :alt, :longdesc, :height, :width, :usemap, :ismap],
72
+ :map => AttrI18n + AttrEvents + [:id, :class, :style, :title, :name],
73
+ :area => Attrs + AttrFocus + [:shape, :coords, :href, :nohref, :alt],
74
+ :form => Attrs + [:action, :method, :enctype, :onsubmit, :onreset, :accept, :accept],
75
+ :label => Attrs + [:for, :accesskey, :onfocus, :onblur],
76
+ :input => Attrs + AttrFocus + [:type, :name, :value, :checked, :disabled, :readonly, :size, :maxlength, :src, :alt, :usemap, :onselect, :onchange, :accept],
77
+ :select => Attrs + [:name, :size, :multiple, :disabled, :tabindex, :onfocus, :onblur, :onchange],
78
+ :optgroup => Attrs + [:disabled, :label],
79
+ :option => Attrs + [:selected, :disabled, :label, :value],
80
+ :textarea => Attrs + AttrFocus + [:name, :rows, :cols, :disabled, :readonly, :onselect, :onchange],
81
+ :fieldset => Attrs,
82
+ :legend => Attrs + [:accesskey],
83
+ :button => Attrs + AttrFocus + [:name, :value, :type, :disabled],
84
+ :table => Attrs + [:summary, :width, :border, :frame, :rules, :cellspacing, :cellpadding],
85
+ :caption => Attrs,
86
+ :colgroup => Attrs + AttrHAlign + AttrVAlign + [:span, :width],
87
+ :col => Attrs + AttrHAlign + AttrVAlign + [:span, :width],
88
+ :thead => Attrs + AttrHAlign + AttrVAlign,
89
+ :tfoot => Attrs + AttrHAlign + AttrVAlign,
90
+ :tbody => Attrs + AttrHAlign + AttrVAlign,
91
+ :tr => Attrs + AttrHAlign + AttrVAlign,
92
+ :th => Attrs + AttrHAlign + AttrVAlign + [:abbr, :axis, :headers, :scope, :rowspan, :colspan],
93
+ :td => Attrs + AttrHAlign + AttrVAlign + [:abbr, :axis, :headers, :scope, :rowspan, :colspan],
94
+ :h1 => Attrs,
95
+ :h2 => Attrs,
96
+ :h3 => Attrs,
97
+ :h4 => Attrs,
98
+ :h5 => Attrs,
99
+ :h6 => Attrs
100
+ }
101
+
102
+ @tags = @tagset.keys
103
+ @forms = @tags & FORM_TAGS
104
+ @self_closing = @tags & SELF_CLOSING_TAGS
105
+ end
106
+
107
+ # Additional tags found in XHTML 1.0 Transitional
108
+ class XHTMLTransitional
109
+ class << self
110
+ attr_accessor :tags, :tagset, :forms, :self_closing, :doctype
111
+ end
112
+ @doctype = ["-//W3C//DTD XHTML 1.0 Transitional//EN", "DTD/xhtml1-transitional.dtd"]
113
+ @tagset = XHTMLStrict.tagset.merge \
114
+ :strike => Attrs,
115
+ :center => Attrs,
116
+ :dir => Attrs + [:compact],
117
+ :noframes => Attrs,
118
+ :basefont => [:id, :size, :color, :face],
119
+ :u => Attrs,
120
+ :menu => Attrs + [:compact],
121
+ :iframe => AttrCore + [:longdesc, :name, :src, :frameborder, :marginwidth, :marginheight, :scrolling, :align, :height, :width],
122
+ :font => AttrCore + AttrI18n + [:size, :color, :face],
123
+ :s => Attrs,
124
+ :applet => AttrCore + [:codebase, :archive, :code, :object, :alt, :name, :width, :height, :align, :hspace, :vspace],
125
+ :isindex => AttrCore + AttrI18n + [:prompt]
126
+
127
+ # Additional attributes found in XHTML 1.0 Transitional
128
+ { :script => [:language],
129
+ :a => [:target],
130
+ :td => [:bgcolor, :nowrap, :width, :height],
131
+ :p => [:align],
132
+ :h5 => [:align],
133
+ :h3 => [:align],
134
+ :li => [:type, :value],
135
+ :div => [:align],
136
+ :pre => [:width],
137
+ :body => [:background, :bgcolor, :text, :link, :vlink, :alink],
138
+ :ol => [:type, :compact, :start],
139
+ :h4 => [:align],
140
+ :h2 => [:align],
141
+ :object => [:align, :border, :hspace, :vspace],
142
+ :img => [:name, :align, :border, :hspace, :vspace],
143
+ :link => [:target],
144
+ :legend => [:align],
145
+ :dl => [:compact],
146
+ :input => [:align],
147
+ :h6 => [:align],
148
+ :hr => [:align, :noshade, :size, :width],
149
+ :base => [:target],
150
+ :ul => [:type, :compact],
151
+ :br => [:clear],
152
+ :form => [:name, :target],
153
+ :area => [:target],
154
+ :h1 => [:align]
155
+ }.each do |k, v|
156
+ @tagset[k] += v
157
+ end
158
+
159
+ @tags = @tagset.keys
160
+ @forms = @tags & FORM_TAGS
161
+ @self_closing = @tags & SELF_CLOSING_TAGS
162
+ end
163
+
164
+ end