htree 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,108 @@
1
+ require 'pp'
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+ require 'htree/leaf'
5
+ require 'htree/tag'
6
+ require 'htree/output'
7
+ require 'htree/raw_string'
8
+
9
+ module HTree
10
+ # :stopdoc:
11
+ class Doc
12
+ def pretty_print(q)
13
+ q.object_group(self) { @children.each {|elt| q.breakable; q.pp elt } }
14
+ end
15
+ alias inspect pretty_print_inspect
16
+ end
17
+
18
+ class Elem
19
+ def pretty_print(q)
20
+ if @empty
21
+ q.group(1, '{emptyelem', '}') {
22
+ q.breakable; q.pp @stag
23
+ }
24
+ else
25
+ q.group(1, "{elem", "}") {
26
+ q.breakable; q.pp @stag
27
+ @children.each {|elt| q.breakable; q.pp elt }
28
+ if @etag
29
+ q.breakable; q.pp @etag
30
+ end
31
+ }
32
+ end
33
+ end
34
+ alias inspect pretty_print_inspect
35
+ end
36
+
37
+ module Leaf
38
+ def pretty_print(q)
39
+ q.group(1, '{', '}') {
40
+ q.text self.class.name.sub(/.*::/,'').downcase
41
+ if rs = @raw_string
42
+ rs.scan(/[^\r\n]*(?:\r\n?|\n|[^\r\n]\z)/) {|line|
43
+ q.breakable
44
+ q.pp line
45
+ }
46
+ elsif self.respond_to? :display_xml
47
+ q.breakable
48
+ q.text self.display_xml('')
49
+ end
50
+ }
51
+ end
52
+ alias inspect pretty_print_inspect
53
+ end
54
+
55
+ class Name
56
+ def inspect
57
+ if xmlns?
58
+ @local_name ? "xmlns:#{@local_name}" : "xmlns"
59
+ elsif !@namespace_uri || @namespace_uri.empty?
60
+ @local_name
61
+ elsif @namespace_prefix
62
+ "#{@namespace_prefix}{#{@namespace_uri}}#{@local_name}"
63
+ elsif @namespace_prefix == false
64
+ "-{#{@namespace_uri}}#{@local_name}"
65
+ else
66
+ "{#{@namespace_uri}}#{@local_name}"
67
+ end
68
+ end
69
+ end
70
+
71
+ class STag
72
+ def pretty_print(q)
73
+ q.group(1, '<', '>') {
74
+ q.text @name.inspect
75
+
76
+ @attributes.each {|n, t|
77
+ q.breakable
78
+ q.text "#{n.inspect}=\"#{t.to_attvalue_content}\""
79
+ }
80
+ }
81
+ end
82
+ alias inspect pretty_print_inspect
83
+ end
84
+
85
+ class ETag
86
+ def pretty_print(q)
87
+ q.group(1, '</', '>') {
88
+ q.text @qualified_name
89
+ }
90
+ end
91
+ alias inspect pretty_print_inspect
92
+ end
93
+
94
+ class BogusETag
95
+ def pretty_print(q)
96
+ q.group(1, '{', '}') {
97
+ q.text self.class.name.sub(/.*::/,'').downcase
98
+ if rs = @raw_string
99
+ q.breakable
100
+ q.text rs
101
+ else
102
+ q.text "</#{@qualified_name}>"
103
+ end
104
+ }
105
+ end
106
+ end
107
+ # :startdoc:
108
+ end
@@ -0,0 +1,92 @@
1
+ require 'htree/modules'
2
+ require 'htree/raw_string'
3
+
4
+ module HTree
5
+ class XMLDecl
6
+ def initialize(version, encoding=nil, standalone=nil)
7
+ init_raw_string
8
+ if /\A[a-zA-Z0-9_.:-]+\z/ !~ version
9
+ raise HTree::Error, "invalid version in XML declaration: #{version.inspect}"
10
+ end
11
+ if encoding && /\A[A-Za-z][A-Za-z0-9._-]*\z/ !~ encoding
12
+ raise HTree::Error, "invalid encoding in XML declaration: #{encoding.inspect}"
13
+ end
14
+ unless standalone == nil || standalone == true || standalone == false
15
+ raise HTree::Error, "invalid standalone document declaration in XML declaration: #{standalone.inspect}"
16
+ end
17
+ @version = version
18
+ @encoding = encoding
19
+ @standalone = standalone
20
+ end
21
+ attr_reader :version, :encoding, :standalone
22
+ end
23
+
24
+ class DocType
25
+ def initialize(root_element_name, public_identifier=nil, system_identifier=nil)
26
+ init_raw_string
27
+ if public_identifier && /\A[ \x0d\x0aa-zA-Z0-9\-'()+,.\/:=?;!*\#@$_%]*\z/ !~ public_identifier
28
+ raise HTree::Error, "invalid public identifier in document type declaration: #{public_identifier.inspect}"
29
+ end
30
+ if system_identifier && /"/ =~ system_identifier && /'/ =~ system_identifier
31
+ raise HTree::Error, "invalid system identifier in document type declaration: #{system_identifier.inspect}"
32
+ end
33
+
34
+ @root_element_name = root_element_name
35
+ @public_identifier = public_identifier
36
+ @system_identifier = system_identifier
37
+ end
38
+ attr_reader :root_element_name, :public_identifier, :system_identifier
39
+ end
40
+
41
+ class ProcIns
42
+ # :stopdoc:
43
+ class << self
44
+ alias new! new
45
+ end
46
+ # :startdoc:
47
+
48
+ def ProcIns.new(target, content)
49
+ content = content.gsub(/\?>/, '? >') if content
50
+ new! target, content
51
+ end
52
+
53
+ def initialize(target, content) # :notnew:
54
+ init_raw_string
55
+ if content && /\?>/ =~ content
56
+ raise HTree::Error, "invalid processing instruction content: #{content.inspect}"
57
+ end
58
+ @target = target
59
+ @content = content
60
+ end
61
+ attr_reader :target, :content
62
+ end
63
+
64
+ class Comment
65
+ # :stopdoc:
66
+ class << self
67
+ alias new! new
68
+ end
69
+ # :startdoc:
70
+
71
+ def Comment.new(content)
72
+ content = content.gsub(/-(-+)/) { '-' + ' -' * $1.length }.sub(/-\z/, '- ')
73
+ new! content
74
+ end
75
+
76
+ def initialize(content) # :notnew:
77
+ init_raw_string
78
+ if /--/ =~ content || /-\z/ =~ content
79
+ raise HTree::Error, "invalid comment content: #{content.inspect}"
80
+ end
81
+ @content = content
82
+ end
83
+ attr_reader :content
84
+ end
85
+
86
+ class BogusETag
87
+ def initialize(qualified_name)
88
+ init_raw_string
89
+ @etag = ETag.new(qualified_name)
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,369 @@
1
+ require 'htree/modules'
2
+ require 'htree/elem'
3
+ require 'htree/inspect'
4
+
5
+ module HTree
6
+ module Node
7
+ # creates a location object which points to self.
8
+ def make_loc
9
+ self.class::Loc.new(nil, nil, self)
10
+ end
11
+
12
+ # return self.
13
+ def to_node
14
+ self
15
+ end
16
+
17
+ # +subst+ substitutes several subtrees at once.
18
+ #
19
+ # t = HTree('<r><x/><y/><z/></r>')
20
+ # l = t.make_loc
21
+ # t2 = t.subst({
22
+ # l.get_subnode(0, 'k') => 'v',
23
+ # l.get_subnode(0, -1) => HTree('<a/>'),
24
+ # l.get_subnode(0, 1) => nil,
25
+ # l.get_subnode(0, 2, 0) => HTree('<b/>'),
26
+ # })
27
+ # pp t2
28
+ # # =>
29
+ # #<HTree::Doc
30
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
31
+ def subst(pairs)
32
+ pairs = pairs.map {|key, val|
33
+ key = key.index_list(self)
34
+ unless Array === val
35
+ val = [val]
36
+ end
37
+ [key, val]
38
+ }
39
+
40
+ pairs_empty_key, pairs_nonempty_key =
41
+ pairs.partition {|key, val| key.empty? }
42
+ if !pairs_empty_key.empty?
43
+ if !pairs_nonempty_key.empty?
44
+ raise ArgumentError, "cannot substitute a node under substituting tree."
45
+ end
46
+ result = []
47
+ pairs_empty_key.each {|key, val| result.concat val }
48
+ result.compact!
49
+ if result.length == 1
50
+ return result[0]
51
+ else
52
+ raise ArgumentError, "cannot substitute top node by multiple nodes: #{nodes.inspect}"
53
+ end
54
+ end
55
+ if pairs_nonempty_key.empty?
56
+ return self
57
+ end
58
+
59
+ subst_internal(pairs)
60
+ end
61
+
62
+ def subst_internal(pairs) # :nodoc:
63
+ subnode_pairs = {}
64
+ pairs.each {|key, val|
65
+ k = key.pop
66
+ (subnode_pairs[k] ||= []) << [key, val]
67
+ }
68
+ subnode_pairs = subnode_pairs.map {|k, subpairs|
69
+ s = get_subnode(k)
70
+ subpairs_empty_key, subpairs_nonempty_key =
71
+ subpairs.partition {|key, val| key.empty? }
72
+ if !subpairs_empty_key.empty?
73
+ if !subpairs_nonempty_key.empty?
74
+ raise ArgumentError, "cannot substitute a node under substituting tree."
75
+ end
76
+ r = []
77
+ subpairs_empty_key.each {|key, val| r.concat val }
78
+ [k, r.compact]
79
+ elsif subpairs_nonempty_key.empty?
80
+ [k, s]
81
+ else
82
+ [k, s.subst_internal(subpairs)]
83
+ end
84
+ }
85
+ subst_subnode(subnode_pairs)
86
+ end
87
+ end
88
+
89
+ # :stopdoc:
90
+ class Doc; def node_test_string() 'doc()' end end
91
+ class Elem; def node_test_string() @stag.element_name.qualified_name end end
92
+ class Text; def node_test_string() 'text()' end end
93
+ class BogusETag; def node_test_string() 'bogus-etag()' end end
94
+ class XMLDecl; def node_test_string() 'xml-declaration()' end end
95
+ class DocType; def node_test_string() 'doctype()' end end
96
+ class ProcIns; def node_test_string() 'processing-instruction()' end end
97
+ class Comment; def node_test_string() 'comment()' end end
98
+
99
+ module Container
100
+ def find_loc_step(index)
101
+ if index < 0 || @children.length <= index
102
+ return "*[#{index}]"
103
+ end
104
+
105
+ return @loc_step_children[index].dup if defined? @loc_step_children
106
+
107
+ count = {}
108
+ count.default = 0
109
+
110
+ steps = []
111
+
112
+ @children.each {|c|
113
+ node_test = c.node_test_string
114
+ count[node_test] += 1
115
+ steps << [node_test, count[node_test]]
116
+ }
117
+
118
+ @loc_step_children = []
119
+ steps.each {|node_test, i|
120
+ if count[node_test] == 1
121
+ @loc_step_children << node_test
122
+ else
123
+ @loc_step_children << "#{node_test}[#{i}]"
124
+ end
125
+ }
126
+
127
+ return @loc_step_children[index].dup
128
+ end
129
+ end
130
+
131
+ class Elem
132
+ def find_loc_step(index)
133
+ return super if Integer === index
134
+ if String === index
135
+ index = Name.parse_attribute_name(index, DefaultContext)
136
+ end
137
+ unless Name === index
138
+ raise TypeError, "invalid index: #{index.inspect}"
139
+ end
140
+ "@#{index.qualified_name}"
141
+ end
142
+ end
143
+ # :startdoc:
144
+ end
145
+
146
+ class HTree::Location
147
+ def initialize(parent, index, node) # :nodoc:
148
+ if parent
149
+ @parent = parent
150
+ @index = index
151
+ @node = parent.node.get_subnode(index)
152
+ if !@node.equal?(node)
153
+ raise ArgumentError, "unexpected node"
154
+ end
155
+ else
156
+ @parent = nil
157
+ @index = nil
158
+ @node = node
159
+ end
160
+ if @node && self.class != @node.class::Loc
161
+ raise ArgumentError, "invalid location class: #{self.class} should be #{node.class::Loc}"
162
+ end
163
+ @subloc = {}
164
+ end
165
+ attr_reader :parent, :index, :node
166
+ alias to_node node
167
+
168
+ # return self.
169
+ def make_loc
170
+ self
171
+ end
172
+
173
+ # +top+ returns the originator location.
174
+ #
175
+ # t = HTree('<a><b><c><d>')
176
+ # l = t.make_loc.get_subnode(0, 0, 0, 0)
177
+ # p l, l.top
178
+ # # =>
179
+ # #<HTree::Location: doc()/a/b/c/d>
180
+ # #<HTree::Location: doc()>
181
+ def top
182
+ result = self
183
+ while result.parent
184
+ result = result.parent
185
+ end
186
+ result
187
+ end
188
+
189
+ # +subst_itself+ substitutes the node pointed by the location.
190
+ # It returns the location of substituted node.
191
+ #
192
+ # t1 = HTree('<a><b><c><d>')
193
+ # p t1
194
+ # l1 = t1.make_loc.get_subnode(0, 0, 0, 0)
195
+ # p l1
196
+ # l2 = l1.subst_itself(HTree('<z/>'))
197
+ # p l2
198
+ # t2 = l2.top.to_node
199
+ # p t2
200
+ # # =>
201
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <d>}}}}>
202
+ # #<HTree::Location: doc()/a/b/c/d>
203
+ # #<HTree::Location: doc()/a/b/c/z>
204
+ # #<HTree::Doc {elem <a> {elem <b> {elem <c> {emptyelem <z>}}}}>
205
+ #
206
+ def subst_itself(node)
207
+ if @parent
208
+ new_index = @index
209
+ if !@node
210
+ if Integer === @index
211
+ if @index < 0
212
+ new_index = 0
213
+ elsif @parent.to_node.children.length < @index
214
+ new_index = @parent.to_node.children.length
215
+ end
216
+ end
217
+ end
218
+ @parent.subst_itself(@parent.to_node.subst_subnode({@index=>node})).get_subnode(new_index)
219
+ else
220
+ node.make_loc
221
+ end
222
+ end
223
+
224
+ # +subst+ substitutes several subtrees at once.
225
+ #
226
+ # t = HTree('<r><x/><y/><z/></r>')
227
+ # l = t.make_loc
228
+ # l2 = l.subst({
229
+ # l.root.get_subnode('k') => 'v',
230
+ # l.root.get_subnode(-1) => HTree('<a/>'),
231
+ # l.find_element('y') => nil,
232
+ # l.find_element('z').get_subnode(0) => HTree('<b/>'),
233
+ # })
234
+ # pp l2, l2.to_node
235
+ # # =>
236
+ # #<HTree::Doc::Loc: doc()>
237
+ # #<HTree::Doc
238
+ # {elem <r k="v"> {emptyelem <a>} {emptyelem <x>} {elem <z> {emptyelem <b>}}}>
239
+ def subst(pairs)
240
+ subst_itself(@node.subst(pairs))
241
+ end
242
+
243
+ # +loc_list+ returns an array containing from location's root to itself.
244
+ #
245
+ # t = HTree('<a><b><c>')
246
+ # l = t.make_loc.get_subnode(0, 0, 0)
247
+ # pp l, l.loc_list
248
+ # # =>
249
+ # #<HTree::Location: doc()/a/b/c>
250
+ # [#<HTree::Location: doc()>,
251
+ # #<HTree::Location: doc()/a>,
252
+ # #<HTree::Location: doc()/a/b>,
253
+ # #<HTree::Location: doc()/a/b/c>]
254
+ #
255
+ def loc_list
256
+ loc = self
257
+ result = [self]
258
+ while loc = loc.parent
259
+ result << loc
260
+ end
261
+ result.reverse!
262
+ result
263
+ end
264
+
265
+ # +path+ returns the path of the location.
266
+ #
267
+ # l = HTree.parse("<a><b>x</b><b/><a/>").make_loc
268
+ # l = l.get_subnode(0, 0, 0)
269
+ # p l.path # => "doc()/a/b[1]/text()"
270
+ def path
271
+ result = ''
272
+ loc_list.each {|loc|
273
+ if parent = loc.parent
274
+ result << '/' << parent.node.find_loc_step(loc.index)
275
+ else
276
+ result << loc.node.node_test_string
277
+ end
278
+ }
279
+ result
280
+ end
281
+
282
+ def index_list(node) # :nodoc:
283
+ result = []
284
+ loc = self
285
+ while parent = loc.parent
286
+ return result if loc.to_node.equal? node
287
+ result << loc.index
288
+ loc = parent
289
+ end
290
+ return result if loc.to_node.equal? node
291
+ raise ArgumentError, "the location is not under the node: #{self.path}"
292
+ end
293
+
294
+ # :stopdoc:
295
+ def pretty_print(q)
296
+ q.group(1, "#<#{self.class.name}", '>') {
297
+ q.text ':'
298
+ q.breakable
299
+ loc_list.each {|loc|
300
+ if parent = loc.parent
301
+ q.text '/'
302
+ q.group { q.breakable '' }
303
+ q.text parent.node.find_loc_step(loc.index)
304
+ else
305
+ q.text loc.node.node_test_string
306
+ end
307
+ }
308
+ }
309
+ end
310
+ alias inspect pretty_print_inspect
311
+ # :startdoc:
312
+ end
313
+
314
+ module HTree::Container::Loc
315
+ # +get_subnode+ returns a location object which points to a subnode
316
+ # indexed by _index_.
317
+ def get_subnode_internal(index) # :nodoc:
318
+ return @subloc[index] if @subloc.include? index
319
+ node = @node.get_subnode(index)
320
+ if node
321
+ @subloc[index] = node.class::Loc.new(self, index, node)
322
+ else
323
+ @subloc[index] = HTree::Location.new(self, index, node)
324
+ end
325
+ end
326
+
327
+ # +subst_subnode+ returns the location which refers the substituted tree.
328
+ # loc.subst_subnode(pairs) -> loc
329
+ #
330
+ # t = HTree('<a><b><c>')
331
+ # l = t.make_loc.get_subnode(0, 0)
332
+ # l = l.subst_subnode({0=>HTree('<z/>')})
333
+ # pp t, l.top.to_node
334
+ # # =>
335
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <c>}}}>
336
+ # #<HTree::Doc {elem <a> {elem <b> {emptyelem <z>}}}>
337
+ #
338
+ def subst_subnode(pairs)
339
+ self.subst_itself(@node.subst_subnode(pairs))
340
+ end
341
+
342
+ # +children+ returns an array of child locations.
343
+ def children
344
+ (0...@node.children.length).map {|i| get_subnode(i) }
345
+ end
346
+ end
347
+
348
+ class HTree::Elem::Loc
349
+ def context() @node.context end
350
+
351
+ # +element_name+ returns the name of the element name as a Name object.
352
+ def element_name() @node.element_name end
353
+
354
+ def empty_element?() @node.empty_element? end
355
+
356
+ # +each_attribute+ iterates over each attributes.
357
+ def each_attribute
358
+ @node.each_attribute {|attr_name, attr_text|
359
+ attr_loc = get_subnode(attr_name)
360
+ yield attr_name, attr_loc
361
+ }
362
+ end
363
+ end
364
+
365
+ class HTree::Text::Loc
366
+ def to_s() @node.to_s end
367
+ def strip() @node.strip end
368
+ def empty?() @node.empty? end
369
+ end