hpricot 0.4-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ require 'pp'
2
+
3
+ module Hpricot
4
+ # :stopdoc:
5
+ class Doc
6
+ def pretty_print(q)
7
+ q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
8
+ end
9
+ alias inspect pretty_print_inspect
10
+ end
11
+
12
+ class Elem
13
+ def pretty_print(q)
14
+ if empty?
15
+ q.group(1, '{emptyelem', '}') {
16
+ q.breakable; q.pp @stag
17
+ }
18
+ else
19
+ q.group(1, "{elem", "}") {
20
+ q.breakable; q.pp @stag
21
+ if @children
22
+ @children.each {|elt| q.breakable; q.pp elt }
23
+ end
24
+ if @etag
25
+ q.breakable; q.pp @etag
26
+ end
27
+ }
28
+ end
29
+ end
30
+ alias inspect pretty_print_inspect
31
+ end
32
+
33
+ module Leaf
34
+ def pretty_print(q)
35
+ q.group(1, '{', '}') {
36
+ q.text self.class.name.sub(/.*::/,'').downcase
37
+ if rs = @raw_string
38
+ rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
39
+ q.breakable
40
+ q.pp line
41
+ }
42
+ elsif self.respond_to? :to_s
43
+ q.breakable
44
+ q.text self.to_s
45
+ end
46
+ }
47
+ end
48
+ alias inspect pretty_print_inspect
49
+ end
50
+
51
+ class STag
52
+ def pretty_print(q)
53
+ q.group(1, '<', '>') {
54
+ q.text @name
55
+
56
+ if @attributes
57
+ @attributes.each {|n, t|
58
+ q.breakable
59
+ q.text "#{n}=\"#{t}\""
60
+ }
61
+ end
62
+ }
63
+ end
64
+ alias inspect pretty_print_inspect
65
+ end
66
+
67
+ class ETag
68
+ def pretty_print(q)
69
+ q.group(1, '</', '>') {
70
+ q.text @name
71
+ }
72
+ end
73
+ alias inspect pretty_print_inspect
74
+ end
75
+
76
+ class BogusETag
77
+ def pretty_print(q)
78
+ q.group(1, '{', '}') {
79
+ q.text self.class.name.sub(/.*::/,'').downcase
80
+ if rs = @raw_string
81
+ q.breakable
82
+ q.text rs
83
+ else
84
+ q.text "</#{@name}>"
85
+ end
86
+ }
87
+ end
88
+ end
89
+ # :startdoc:
90
+ end
@@ -0,0 +1,37 @@
1
+ module Hpricot
2
+ class Name; include Hpricot end
3
+ class Context; include Hpricot end
4
+
5
+ # :stopdoc:
6
+ module Tag; include Hpricot end
7
+ class STag; include Tag end
8
+ class ETag; include Tag end
9
+ # :startdoc:
10
+
11
+ module Node; include Hpricot end
12
+ module Container; include Node end
13
+ class Doc; include Container end
14
+ class Elem; include Container end
15
+ module Leaf; include Node end
16
+ class Text; include Leaf end
17
+ class XMLDecl; include Leaf end
18
+ class DocType; include Leaf end
19
+ class ProcIns; include Leaf end
20
+ class Comment; include Leaf end
21
+ class BogusETag; include Leaf end
22
+
23
+ module Traverse end
24
+ module Container::Trav; include Traverse end
25
+ module Leaf::Trav; include Traverse end
26
+ class Doc; module Trav; include Container::Trav end; include Trav end
27
+ class Elem; module Trav; include Container::Trav end; include Trav end
28
+ class Text; module Trav; include Leaf::Trav end; include Trav end
29
+ class XMLDecl; module Trav; include Leaf::Trav end; include Trav end
30
+ class DocType; module Trav; include Leaf::Trav end; include Trav end
31
+ class ProcIns; module Trav; include Leaf::Trav end; include Trav end
32
+ class Comment; module Trav; include Leaf::Trav end; include Trav end
33
+ class BogusETag; module Trav; include Leaf::Trav end; include Trav end
34
+
35
+ class Error < StandardError; end
36
+ end
37
+
@@ -0,0 +1,286 @@
1
+ require 'hpricot/htmlinfo'
2
+
3
+ def Hpricot(input, opts = {})
4
+ Hpricot.parse(input, opts)
5
+ end
6
+
7
+ module Hpricot
8
+ # Hpricot.parse parses <i>input</i> and return a document tree.
9
+ # represented by Hpricot::Doc.
10
+ def Hpricot.parse(input, opts = {})
11
+ Doc.new(make(input, opts))
12
+ end
13
+
14
+ # :stopdoc:
15
+
16
+ def Hpricot.make(input, opts = {})
17
+ opts = {:fixup_tags => false}.merge(opts)
18
+ stack = [[nil, nil, [], [], [], []]]
19
+ Hpricot.scan(input) do |token|
20
+ if stack.last[5] == :CDATA and !(token[0] == :etag and token[1].downcase == stack.last[0])
21
+ token[0] = :text
22
+ token[1] = token[3] if token[3]
23
+ end
24
+
25
+ case token[0]
26
+ when :stag
27
+ stagname = token[0] = token[1].downcase
28
+ if ElementContent[stagname] == :EMPTY
29
+ token[0] = :emptytag
30
+ stack.last[2] << token
31
+ else
32
+ if opts[:fixup_tags]
33
+ # obey the tag rules set up by the current element
34
+ if ElementContent.has_key? stagname
35
+ trans = nil
36
+ (stack.length-1).downto(0) do |i|
37
+ untags = stack[i][5]
38
+ break unless untags.include? stagname
39
+ # puts "** ILLEGAL #{stagname} IN #{stack[i][0]}"
40
+ trans = i
41
+ end
42
+ if trans.to_i > 1
43
+ eles = stack.slice!(trans..-1)
44
+ stack.last[2] += eles
45
+ # puts "** TRANSPLANTED #{stagname} TO #{stack.last[0]}"
46
+ end
47
+ end
48
+ end
49
+
50
+ # setup tag rules for inside this element
51
+ if ElementContent[stagname] == :CDATA
52
+ uncontainable_tags = :CDATA
53
+ elsif opts[:fixup_tags]
54
+ possible_tags = ElementContent[stagname]
55
+ excluded_tags, included_tags = stack.last[3..4]
56
+ if possible_tags
57
+ excluded_tags = excluded_tags | (ElementExclusions[stagname] || [])
58
+ included_tags = included_tags | (ElementInclusions[stagname] || [])
59
+ containable_tags = (possible_tags | included_tags) - excluded_tags
60
+ uncontainable_tags = ElementContent.keys - containable_tags
61
+ else
62
+ # If the tagname is unknown, it is assumed that any element
63
+ # except excluded can be contained.
64
+ uncontainable_tags = excluded_tags
65
+ end
66
+ end
67
+ stack << [stagname, token, [], excluded_tags, included_tags, uncontainable_tags]
68
+ end
69
+ when :etag
70
+ etagname = token[0] = token[1].downcase
71
+ matched_elem = nil
72
+ (stack.length-1).downto(0) do |i|
73
+ stagname, = stack[i]
74
+ if stagname == etagname
75
+ matched_elem = stack[i]
76
+ stack[i][1] += token
77
+ eles = stack.slice!((i+1)..-1)
78
+ stack.last[2] += eles
79
+ break
80
+ end
81
+ end
82
+ unless matched_elem
83
+ stack.last[2] << [:bogus_etag, token]
84
+ else
85
+ ele = stack.pop
86
+ stack.last[2] << ele
87
+ end
88
+ when :text
89
+ l = stack.last[2].last
90
+ if l and l[0] == :text
91
+ l[1] += token[1]
92
+ else
93
+ stack.last[2] << token
94
+ end
95
+ else
96
+ stack.last[2] << token
97
+ end
98
+ end
99
+
100
+ while 1 < stack.length
101
+ ele = stack.pop
102
+ stack.last[2] << ele
103
+ end
104
+
105
+ structure_list = stack[0][2]
106
+ structure_list.map {|s| build_node(s) }
107
+ end
108
+
109
+ def Hpricot.fix_element(elem, excluded_tags, included_tags)
110
+ tagname, _, attrs, sraw, _, _, _, eraw = elem[1]
111
+ children = elem[2]
112
+ if eraw
113
+ elem[2] = fix_structure_list(children)
114
+ return elem, []
115
+ else
116
+ if ElementContent[tagname] == :EMPTY
117
+ elem[2] = []
118
+ return elem, children
119
+ else
120
+ if ElementContent[tagname] == :CDATA
121
+ possible_tags = []
122
+ else
123
+ possible_tags = ElementContent[tagname]
124
+ end
125
+ if possible_tags
126
+ excluded_tags2 = ElementExclusions[tagname]
127
+ included_tags2 = ElementInclusions[tagname]
128
+ excluded_tags |= excluded_tags2 if excluded_tags2
129
+ included_tags |= included_tags2 if included_tags2
130
+ containable_tags = (possible_tags | included_tags) - excluded_tags
131
+ uncontainable_tags = ElementContent.keys - containable_tags
132
+ else
133
+ # If the tagname is unknown, it is assumed that any element
134
+ # except excluded can be contained.
135
+ uncontainable_tags = excluded_tags
136
+ end
137
+ fixed_children = []
138
+ rest = children
139
+ until rest.empty?
140
+ if String === rest[0][0]
141
+ elem = rest.shift
142
+ elem_tagname = elem[0]
143
+ elem_tagname = elem_tagname.downcase
144
+ if uncontainable_tags.include? elem_tagname
145
+ rest.unshift elem
146
+ break
147
+ else
148
+ fixed_elem, rest2 = fix_element(elem, excluded_tags, included_tags)
149
+ fixed_children << fixed_elem
150
+ rest = rest2 + rest
151
+ end
152
+ else
153
+ fixed_children << rest.shift
154
+ end
155
+ end
156
+ elem[2] = fixed_children
157
+ return elem, rest
158
+ end
159
+ end
160
+ end
161
+
162
+ def Hpricot.build_node(structure)
163
+ case structure[0]
164
+ when String
165
+ tagname, _, attrs, sraw, _, _, _, eraw = structure[1]
166
+ children = structure[2]
167
+ etag = eraw && ETag.parse(tagname, eraw)
168
+ stag = STag.parse(tagname, attrs, sraw, true)
169
+ if !children.empty? || etag
170
+ Elem.new(stag,
171
+ children.map {|c| build_node(c) },
172
+ etag)
173
+ else
174
+ Elem.new(stag)
175
+ end
176
+ when :text
177
+ Text.parse_pcdata(structure[1])
178
+ when :emptytag
179
+ Elem.new(STag.parse(structure[1], structure[2], structure[3], false))
180
+ when :bogus_etag
181
+ BogusETag.parse(structure[1], structure[2])
182
+ when :xmldecl
183
+ XMLDecl.parse(structure[2], structure[3])
184
+ when :doctype
185
+ DocType.parse(structure[1], structure[2], structure[3])
186
+ when :procins
187
+ ProcIns.parse(structure[1], structure[2], structure[3])
188
+ when :comment
189
+ Comment.parse(structure[1])
190
+ when :cdata_content
191
+ Text.parse_cdata_content(structure[1])
192
+ when :cdata
193
+ Text.parse_cdata_section(structure[1])
194
+ else
195
+ raise Exception, "[bug] unknown structure: #{structure.inspect}"
196
+ end
197
+ end
198
+
199
+ def STag.parse(qname, attrs, raw_string, is_stag)
200
+ result = STag.new(qname, attrs)
201
+ result.raw_string = raw_string
202
+ result
203
+ end
204
+
205
+ def ETag.parse(qname, raw_string)
206
+ result = self.new(qname)
207
+ result.raw_string = raw_string
208
+ result
209
+ end
210
+
211
+ def BogusETag.parse(qname, raw_string)
212
+ result = self.new(qname)
213
+ result.raw_string = raw_string
214
+ result
215
+ end
216
+
217
+ def Text.parse_pcdata(raw_string)
218
+ result = Text.new(raw_string)
219
+ result.raw_string = raw_string
220
+ result
221
+ end
222
+
223
+ def Text.parse_cdata_content(raw_string)
224
+ result = Text.new(raw_string)
225
+ result.raw_string = raw_string
226
+ result.instance_variable_set( "@cdata", true )
227
+ result
228
+ end
229
+
230
+ def Text.parse_cdata_section(content)
231
+ result = Text.new(content)
232
+ result.raw_string = "<![CDATA[" + content + "]]>"
233
+ result
234
+ end
235
+
236
+ def XMLDecl.parse(attrs, raw_string)
237
+ attrs ||= {}
238
+ version = attrs['version']
239
+ encoding = attrs['encoding']
240
+ case attrs['standalone']
241
+ when 'yes'
242
+ standalone = true
243
+ when 'no'
244
+ standalone = false
245
+ else
246
+ standalone = nil
247
+ end
248
+
249
+ result = XMLDecl.new(version, encoding, standalone)
250
+ result.raw_string = raw_string
251
+ result
252
+ end
253
+
254
+ def DocType.parse(root_element_name, attrs, raw_string)
255
+ if attrs
256
+ public_identifier = attrs['public_id']
257
+ system_identifier = attrs['system_id']
258
+ end
259
+
260
+ root_element_name = root_element_name.downcase
261
+
262
+ result = DocType.new(root_element_name, public_identifier, system_identifier)
263
+ result.raw_string = raw_string
264
+ result
265
+ end
266
+
267
+ def ProcIns.parse(target, content, raw_string)
268
+ result = ProcIns.new(target, content)
269
+ result.raw_string = raw_string
270
+ result
271
+ end
272
+
273
+ def Comment.parse(content)
274
+ result = Comment.new(content)
275
+ result.raw_string = "<!--" + content + "-->"
276
+ result
277
+ end
278
+
279
+ module Pat
280
+ NameChar = /[-A-Za-z0-9._:]/
281
+ Name = /[A-Za-z_:]#{NameChar}*/
282
+ Nmtoken = /#{NameChar}+/
283
+ end
284
+
285
+ # :startdoc:
286
+ end
@@ -0,0 +1,146 @@
1
+ module Hpricot
2
+ # :stopdoc:
3
+
4
+ class Doc
5
+ attr_accessor :children
6
+ def initialize(children)
7
+ @children = children ? children.each { |c| c.parent = self } : []
8
+ end
9
+ def output(out)
10
+ @children.each do |n|
11
+ n.output(out)
12
+ end
13
+ out
14
+ end
15
+ end
16
+
17
+ class BaseEle
18
+ attr_accessor :raw_string, :parent
19
+ def html_quote(str)
20
+ "\"" + str.gsub('"', '\\"') + "\""
21
+ end
22
+ end
23
+
24
+ class Elem
25
+ attr_accessor :stag, :etag, :children
26
+ def initialize(stag, children=nil, etag=nil)
27
+ @stag, @etag = stag, etag
28
+ @children = children ? children.each { |c| c.parent = self } : []
29
+ end
30
+ def empty?; @children.empty? end
31
+ [:name, :attributes, :parent].each do |m|
32
+ [m, "#{m}="].each { |m2| define_method(m2) { |*a| @stag.send(m2, *a) } }
33
+ end
34
+ def output(out)
35
+ if empty? and ElementContent[@stag.name] == :EMPTY
36
+ @stag.output(out, :style => :empty)
37
+ else
38
+ @stag.output(out)
39
+ @children.each { |n| n.output(out) }
40
+ @stag.output(out, :style => :end)
41
+ end
42
+ out
43
+ end
44
+ end
45
+
46
+ class STag < BaseEle
47
+ def initialize(name, attributes=nil)
48
+ @name = name.downcase
49
+ if attributes
50
+ @attributes = attributes.inject({}) { |hsh,(k,v)| hsh[k.downcase] = v; hsh }
51
+ end
52
+ end
53
+ attr_accessor :name, :attributes
54
+ def attributes_as_html
55
+ if @attributes
56
+ @attributes.map do |aname, aval|
57
+ " #{aname}" +
58
+ (aval ? "=#{html_quote(aval)}" : "")
59
+ end.join
60
+ end
61
+ end
62
+ def output(out, opts = {})
63
+ out <<
64
+ case opts[:style]
65
+ when :end
66
+ "</#{@name}>"
67
+ else
68
+ "<#{@name}#{attributes_as_html}" +
69
+ (opts[:style] == :empty ? " /" : "") +
70
+ ">"
71
+ end
72
+ end
73
+ end
74
+
75
+ class ETag < BaseEle
76
+ def initialize(qualified_name)
77
+ @name = qualified_name
78
+ end
79
+ attr_reader :name
80
+ end
81
+
82
+ class BogusETag < ETag
83
+ def output(out); end
84
+ end
85
+
86
+ class Text < BaseEle
87
+ def initialize(text)
88
+ @content = text
89
+ end
90
+ attr_reader :content
91
+ def output(out)
92
+ out << @content
93
+ end
94
+ end
95
+
96
+ class XMLDecl < BaseEle
97
+ def initialize(version, encoding, standalone)
98
+ @version, @encoding, @standalone = version, encoding, standalone
99
+ end
100
+ attr_reader :version, :encoding, :standalone
101
+ def output(out)
102
+ out <<
103
+ "<?xml version=\"#{@version}\"" +
104
+ (@encoding ? " encoding=\"#{encoding}\"" : "") +
105
+ (@standalone != nil ? " standalone=\"#{standalone ? 'yes' : 'no'}\"" : "") +
106
+ "?>"
107
+ end
108
+ end
109
+
110
+ class DocType < BaseEle
111
+ def initialize(name, pubid, sysid)
112
+ @name, @public_id, @system_id = name, pubid, sysid
113
+ end
114
+ attr_reader :name, :public_id, :system_id
115
+ def output(out)
116
+ out <<
117
+ "<!DOCTYPE #{@name} " +
118
+ (@public_id ? "PUBLIC \"#{@public_id}\"" : "SYSTEM") +
119
+ (@system_id ? " #{html_quote(@system_id)}" : "") + ">"
120
+ end
121
+ end
122
+
123
+ class ProcIns < BaseEle
124
+ def initialize(target, content)
125
+ @target, @content = target, content
126
+ end
127
+ attr_reader :target, :content
128
+ def output(out)
129
+ out << "<?#{@target}" +
130
+ (@content ? " #{@content}" : "") +
131
+ "?>"
132
+ end
133
+ end
134
+
135
+ class Comment < BaseEle
136
+ def initialize(content)
137
+ @content = content
138
+ end
139
+ attr_reader :content
140
+ def output(out)
141
+ out << "<!--#{@content}-->"
142
+ end
143
+ end
144
+
145
+ # :startdoc:
146
+ end