htree 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,108 @@
1
+ require 'pp'
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+ require 'htree/leaf'
5
+ require 'htree/tag'
6
+ require 'htree/output'
7
+ require 'htree/raw_string'
8
+
9
+ module HTree
10
+ # :stopdoc:
11
+ class Doc
12
+ def pretty_print(q)
13
+ q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
14
+ end
15
+ alias inspect pretty_print_inspect
16
+ end
17
+
18
+ class Elem
19
+ def pretty_print(q)
20
+ if @empty
21
+ q.group(1, '{emptyelem', '}') {
22
+ q.breakable; q.pp @stag
23
+ }
24
+ else
25
+ q.group(1, "{elem", "}") {
26
+ q.breakable; q.pp @stag
27
+ @children.each {|elt| q.breakable; q.pp elt }
28
+ if @etag
29
+ q.breakable; q.pp @etag
30
+ end
31
+ }
32
+ end
33
+ end
34
+ alias inspect pretty_print_inspect
35
+ end
36
+
37
+ module Leaf
38
+ def pretty_print(q)
39
+ q.group(1, '{', '}') {
40
+ q.text self.class.name.sub(/.*::/,'').downcase
41
+ if rs = @raw_string
42
+ rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
43
+ q.breakable
44
+ q.pp line
45
+ }
46
+ elsif self.respond_to? :display_xml
47
+ q.breakable
48
+ q.text self.display_xml('')
49
+ end
50
+ }
51
+ end
52
+ alias inspect pretty_print_inspect
53
+ end
54
+
55
+ class Name
56
+ def inspect
57
+ if xmlns?
58
+ @local_name ? "xmlns:#{@local_name}" : "xmlns"
59
+ elsif !@namespace_uri || @namespace_uri.empty?
60
+ @local_name
61
+ elsif @namespace_prefix
62
+ "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
63
+ elsif @namespace_prefix == false
64
+ "-{#{@namespace_uri}}#{@local_name}"
65
+ else
66
+ "{#{@namespace_uri}}#{@local_name}"
67
+ end
68
+ end
69
+ end
70
+
71
+ class STag
72
+ def pretty_print(q)
73
+ q.group(1, '<', '>') {
74
+ q.text @name.inspect
75
+
76
+ @attributes.each {|n, t|
77
+ q.breakable
78
+ q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
79
+ }
80
+ }
81
+ end
82
+ alias inspect pretty_print_inspect
83
+ end
84
+
85
+ class ETag
86
+ def pretty_print(q)
87
+ q.group(1, '</', '>') {
88
+ q.text @qualified_name
89
+ }
90
+ end
91
+ alias inspect pretty_print_inspect
92
+ end
93
+
94
+ class BogusETag
95
+ def pretty_print(q)
96
+ q.group(1, '{', '}') {
97
+ q.text self.class.name.sub(/.*::/,'').downcase
98
+ if rs = @raw_string
99
+ q.breakable
100
+ q.text rs
101
+ else
102
+ q.text "</#{@qualified_name}>"
103
+ end
104
+ }
105
+ end
106
+ end
107
+ # :startdoc:
108
+ end
@@ -0,0 +1,92 @@
1
+ require 'htree/modules'
2
+ require 'htree/raw_string'
3
+
4
+ module HTree
5
+ class XMLDecl
6
+ def initialize(version, encoding=nil, standalone=nil)
7
+ init_raw_string
8
+ if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
9
+ raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
10
+ end
11
+ if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
12
+ raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
13
+ end
14
+ unless standalone == nil || standalone == true || standalone == false
15
+ raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
16
+ end
17
+ @version = version
18
+ @encoding = encoding
19
+ @standalone = standalone
20
+ end
21
+ attr_reader :version, :encoding, :standalone
22
+ end
23
+
24
+ class DocType
25
+ def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
26
+ init_raw_string
27
+ if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
28
+ raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
29
+ end
30
+ if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
31
+ raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
32
+ end
33
+
34
+ @root_element_name = root_element_name
35
+ @public_identifier = public_identifier
36
+ @system_identifier = system_identifier
37
+ end
38
+ attr_reader :root_element_name, :public_identifier, :system_identifier
39
+ end
40
+
41
+ class ProcIns
42
+ # :stopdoc:
43
+ class << self
44
+ alias new! new
45
+ end
46
+ # :startdoc:
47
+
48
+ def ProcIns.new(target, content)
49
+ content = content.gsub(/\?>/, '? >') if content
50
+ new! target, content
51
+ end
52
+
53
+ def initialize(target, content) # :notnew:
54
+ init_raw_string
55
+ if content && /\?>/ =~ content
56
+ raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
57
+ end
58
+ @target = target
59
+ @content = content
60
+ end
61
+ attr_reader :target, :content
62
+ end
63
+
64
+ class Comment
65
+ # :stopdoc:
66
+ class << self
67
+ alias new! new
68
+ end
69
+ # :startdoc:
70
+
71
+ def Comment.new(content)
72
+ content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
73
+ new! content
74
+ end
75
+
76
+ def initialize(content) # :notnew:
77
+ init_raw_string
78
+ if /--/ =~ content || /-\z/ =~ content
79
+ raise HTree::Error, "invalid comment content: #{content.inspect}"
80
+ end
81
+ @content = content
82
+ end
83
+ attr_reader :content
84
+ end
85
+
86
+ class BogusETag
87
+ def initialize(qualified_name)
88
+ init_raw_string
89
+ @etag = ETag.new(qualified_name)
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,369 @@
1
+ require 'htree/modules'
2
+ require 'htree/elem'
3
+ require 'htree/inspect'
4
+
5
+ module HTree
6
+ module Node
7
+ # creates a location object which points to self.
8
+ def make_loc
9
+ self.class::Loc.new(nil, nil, self)
10
+ end
11
+
12
+ # return self.
13
+ def to_node
14
+ self
15
+ end
16
+
17
+ # +subst+ substitutes several subtrees at once.
18
+ #
19
+ # t = HTree('<r><x/><y/><z/></r>')
20
+ # l = t.make_loc
21
+ # t2 = t.subst({
22
+ # l.get_subnode(0, 'k') => 'v',
23
+ # l.get_subnode(0, -1) => HTree('<a/>'),
24
+ # l.get_subnode(0, 1) => nil,
25
+ # l.get_subnode(0, 2, 0) => HTree('<b/>'),
26
+ # })
27
+ # pp t2
28
+ # # =>
29
+ # #<HTree::Doc
30
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
31
+ def subst(pairs)
32
+ pairs = pairs.map {|key, val|
33
+ key = key.index_list(self)
34
+ unless Array === val
35
+ val = [val]
36
+ end
37
+ [key, val]
38
+ }
39
+
40
+ pairs_empty_key, pairs_nonempty_key =
41
+ pairs.partition {|key, val| key.empty? }
42
+ if !pairs_empty_key.empty?
43
+ if !pairs_nonempty_key.empty?
44
+ raise ArgumentError, "cannot substitute a node under substituting tree."
45
+ end
46
+ result = []
47
+ pairs_empty_key.each {|key, val| result.concat val }
48
+ result.compact!
49
+ if result.length == 1
50
+ return result[0]
51
+ else
52
+ raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
53
+ end
54
+ end
55
+ if pairs_nonempty_key.empty?
56
+ return self
57
+ end
58
+
59
+ subst_internal(pairs)
60
+ end
61
+
62
+ def subst_internal(pairs) # :nodoc:
63
+ subnode_pairs = {}
64
+ pairs.each {|key, val|
65
+ k = key.pop
66
+ (subnode_pairs[k] ||= []) << [key, val]
67
+ }
68
+ subnode_pairs = subnode_pairs.map {|k, subpairs|
69
+ s = get_subnode(k)
70
+ subpairs_empty_key, subpairs_nonempty_key =
71
+ subpairs.partition {|key, val| key.empty? }
72
+ if !subpairs_empty_key.empty?
73
+ if !subpairs_nonempty_key.empty?
74
+ raise ArgumentError, "cannot substitute a node under substituting tree."
75
+ end
76
+ r = []
77
+ subpairs_empty_key.each {|key, val| r.concat val }
78
+ [k, r.compact]
79
+ elsif subpairs_nonempty_key.empty?
80
+ [k, s]
81
+ else
82
+ [k, s.subst_internal(subpairs)]
83
+ end
84
+ }
85
+ subst_subnode(subnode_pairs)
86
+ end
87
+ end
88
+
89
+ # :stopdoc:
90
+ class Doc; def node_test_string() 'doc()' end end
91
+ class Elem; def node_test_string() @stag.element_name.qualified_name end end
92
+ class Text; def node_test_string() 'text()' end end
93
+ class BogusETag; def node_test_string() 'bogus-etag()' end end
94
+ class XMLDecl; def node_test_string() 'xml-declaration()' end end
95
+ class DocType; def node_test_string() 'doctype()' end end
96
+ class ProcIns; def node_test_string() 'processing-instruction()' end end
97
+ class Comment; def node_test_string() 'comment()' end end
98
+
99
+ module Container
100
+ def find_loc_step(index)
101
+ if index < 0 || @children.length <= index
102
+ return "*[#{index}]"
103
+ end
104
+
105
+ return @loc_step_children[index].dup if defined? @loc_step_children
106
+
107
+ count = {}
108
+ count.default = 0
109
+
110
+ steps = []
111
+
112
+ @children.each {|c|
113
+ node_test = c.node_test_string
114
+ count[node_test] += 1
115
+ steps << [node_test, count[node_test]]
116
+ }
117
+
118
+ @loc_step_children = []
119
+ steps.each {|node_test, i|
120
+ if count[node_test] == 1
121
+ @loc_step_children << node_test
122
+ else
123
+ @loc_step_children << "#{node_test}[#{i}]"
124
+ end
125
+ }
126
+
127
+ return @loc_step_children[index].dup
128
+ end
129
+ end
130
+
131
+ class Elem
132
+ def find_loc_step(index)
133
+ return super if Integer === index
134
+ if String === index
135
+ index = Name.parse_attribute_name(index, DefaultContext)
136
+ end
137
+ unless Name === index
138
+ raise TypeError, "invalid index: #{index.inspect}"
139
+ end
140
+ "@#{index.qualified_name}"
141
+ end
142
+ end
143
+ # :startdoc:
144
+ end
145
+
146
+ class HTree::Location
147
+ def initialize(parent, index, node) # :nodoc:
148
+ if parent
149
+ @parent = parent
150
+ @index = index
151
+ @node = parent.node.get_subnode(index)
152
+ if !@node.equal?(node)
153
+ raise ArgumentError, "unexpected node"
154
+ end
155
+ else
156
+ @parent = nil
157
+ @index = nil
158
+ @node = node
159
+ end
160
+ if @node && self.class != @node.class::Loc
161
+ raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
162
+ end
163
+ @subloc = {}
164
+ end
165
+ attr_reader :parent, :index, :node
166
+ alias to_node node
167
+
168
+ # return self.
169
+ def make_loc
170
+ self
171
+ end
172
+
173
+ # +top+ returns the originator location.
174
+ #
175
+ # t = HTree('<a><b><c><d>')
176
+ # l = t.make_loc.get_subnode(0, 0, 0, 0)
177
+ # p l, l.top
178
+ # # =>
179
+ # #<HTree::Location: doc()/a/b/c/d>
180
+ # #<HTree::Location: doc()>
181
+ def top
182
+ result = self
183
+ while result.parent
184
+ result = result.parent
185
+ end
186
+ result
187
+ end
188
+
189
+ # +subst_itself+ substitutes the node pointed by the location.
190
+ # It returns the location of substituted node.
191
+ #
192
+ # t1 = HTree('<a><b><c><d>')
193
+ # p t1
194
+ # l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
195
+ # p l1
196
+ # l2 = l1.subst_itself(HTree('<z/>'))
197
+ # p l2
198
+ # t2 = l2.top.to_node
199
+ # p t2
200
+ # # =>
201
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
202
+ # #<HTree::Location: doc()/a/b/c/d>
203
+ # #<HTree::Location: doc()/a/b/c/z>
204
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
205
+ #
206
+ def subst_itself(node)
207
+ if @parent
208
+ new_index = @index
209
+ if !@node
210
+ if Integer === @index
211
+ if @index < 0
212
+ new_index = 0
213
+ elsif @parent.to_node.children.length < @index
214
+ new_index = @parent.to_node.children.length
215
+ end
216
+ end
217
+ end
218
+ @parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
219
+ else
220
+ node.make_loc
221
+ end
222
+ end
223
+
224
+ # +subst+ substitutes several subtrees at once.
225
+ #
226
+ # t = HTree('<r><x/><y/><z/></r>')
227
+ # l = t.make_loc
228
+ # l2 = l.subst({
229
+ # l.root.get_subnode('k') => 'v',
230
+ # l.root.get_subnode(-1) => HTree('<a/>'),
231
+ # l.find_element('y') => nil,
232
+ # l.find_element('z').get_subnode(0) => HTree('<b/>'),
233
+ # })
234
+ # pp l2, l2.to_node
235
+ # # =>
236
+ # #<HTree::Doc::Loc: doc()>
237
+ # #<HTree::Doc
238
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
239
+ def subst(pairs)
240
+ subst_itself(@node.subst(pairs))
241
+ end
242
+
243
+ # +loc_list+ returns an array containing from location's root to itself.
244
+ #
245
+ # t = HTree('<a><b><c>')
246
+ # l = t.make_loc.get_subnode(0, 0, 0)
247
+ # pp l, l.loc_list
248
+ # # =>
249
+ # #<HTree::Location: doc()/a/b/c>
250
+ # [#<HTree::Location: doc()>,
251
+ # #<HTree::Location: doc()/a>,
252
+ # #<HTree::Location: doc()/a/b>,
253
+ # #<HTree::Location: doc()/a/b/c>]
254
+ #
255
+ def loc_list
256
+ loc = self
257
+ result = [self]
258
+ while loc = loc.parent
259
+ result << loc
260
+ end
261
+ result.reverse!
262
+ result
263
+ end
264
+
265
+ # +path+ returns the path of the location.
266
+ #
267
+ # l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
268
+ # l = l.get_subnode(0, 0, 0)
269
+ # p l.path # => "doc()/a/b[1]/text()"
270
+ def path
271
+ result = ''
272
+ loc_list.each {|loc|
273
+ if parent = loc.parent
274
+ result << '/' << parent.node.find_loc_step(loc.index)
275
+ else
276
+ result << loc.node.node_test_string
277
+ end
278
+ }
279
+ result
280
+ end
281
+
282
+ def index_list(node) # :nodoc:
283
+ result = []
284
+ loc = self
285
+ while parent = loc.parent
286
+ return result if loc.to_node.equal? node
287
+ result << loc.index
288
+ loc = parent
289
+ end
290
+ return result if loc.to_node.equal? node
291
+ raise ArgumentError, "the location is not under the node: #{self.path}"
292
+ end
293
+
294
+ # :stopdoc:
295
+ def pretty_print(q)
296
+ q.group(1, "#<#{self.class.name}", '>') {
297
+ q.text ':'
298
+ q.breakable
299
+ loc_list.each {|loc|
300
+ if parent = loc.parent
301
+ q.text '/'
302
+ q.group { q.breakable '' }
303
+ q.text parent.node.find_loc_step(loc.index)
304
+ else
305
+ q.text loc.node.node_test_string
306
+ end
307
+ }
308
+ }
309
+ end
310
+ alias inspect pretty_print_inspect
311
+ # :startdoc:
312
+ end
313
+
314
+ module HTree::Container::Loc
315
+ # +get_subnode+ returns a location object which points to a subnode
316
+ # indexed by _index_.
317
+ def get_subnode_internal(index) # :nodoc:
318
+ return @subloc[index] if @subloc.include? index
319
+ node = @node.get_subnode(index)
320
+ if node
321
+ @subloc[index] = node.class::Loc.new(self, index, node)
322
+ else
323
+ @subloc[index] = HTree::Location.new(self, index, node)
324
+ end
325
+ end
326
+
327
+ # +subst_subnode+ returns the location which refers the substituted tree.
328
+ # loc.subst_subnode(pairs) -> loc
329
+ #
330
+ # t = HTree('<a><b><c>')
331
+ # l = t.make_loc.get_subnode(0, 0)
332
+ # l = l.subst_subnode({0=>HTree('<z/>')})
333
+ # pp t, l.top.to_node
334
+ # # =>
335
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
336
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
337
+ #
338
+ def subst_subnode(pairs)
339
+ self.subst_itself(@node.subst_subnode(pairs))
340
+ end
341
+
342
+ # +children+ returns an array of child locations.
343
+ def children
344
+ (0...@node.children.length).map {|i| get_subnode(i) }
345
+ end
346
+ end
347
+
348
+ class HTree::Elem::Loc
349
+ def context() @node.context end
350
+
351
+ # +element_name+ returns the name of the element name as a Name object.
352
+ def element_name() @node.element_name end
353
+
354
+ def empty_element?() @node.empty_element? end
355
+
356
+ # +each_attribute+ iterates over each attributes.
357
+ def each_attribute
358
+ @node.each_attribute {|attr_name, attr_text|
359
+ attr_loc = get_subnode(attr_name)
360
+ yield attr_name, attr_loc
361
+ }
362
+ end
363
+ end
364
+
365
+ class HTree::Text::Loc
366
+ def to_s() @node.to_s end
367
+ def strip() @node.strip end
368
+ def empty?() @node.empty? end
369
+ end