htree 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,217 @@
1
+ require 'iconv'
2
+
3
+ module HTree
4
+ class Encoder
5
+ # HTree::Encoder.internal_charset returns the MIME charset corresponding to $KCODE.
6
+ #
7
+ # - 'ISO-8859-1' when $KCODE=='NONE'
8
+ # - 'UTF-8' when $KCODE=='UTF8'
9
+ # - 'EUC-JP' when $KCODE=='EUC'
10
+ # - 'Shift_JIS' when $KCODE=='SJIS'
11
+ #
12
+ # This mapping ignores EUC-KR and various single byte charset other than ISO-8859-1 at least.
13
+ # This should be fixed when Ruby is m17nized.
14
+ def Encoder.internal_charset
15
+ if Object.const_defined? :Encoding
16
+ Encoding.default_external.name
17
+ else
18
+ KcodeCharset[$KCODE]
19
+ end
20
+ end
21
+
22
+ def initialize(output_encoding, internal_encoding=HTree::Encoder.internal_charset)
23
+ @buf = ''
24
+ @internal_encoding = internal_encoding
25
+ @output_encoding = output_encoding
26
+ @ic = Iconv.new(output_encoding, @internal_encoding)
27
+ @charpat = FirstCharPattern[internal_encoding]
28
+ @subcharset_list = SubCharset[output_encoding] || []
29
+ @subcharset_ic = {}
30
+ @subcharset_list.each {|subcharset|
31
+ @subcharset_ic[subcharset] = Iconv.new(subcharset, @internal_encoding)
32
+ }
33
+ @html_output = false
34
+ end
35
+
36
+ # :stopdoc:
37
+ def html_output?
38
+ @html_output
39
+ end
40
+
41
+ def html_output=(flag)
42
+ @html_output = flag
43
+ end
44
+
45
+ def output_cdata_content_do(out, pre, body, post)
46
+ if @html_output
47
+ pre.call
48
+ body.call
49
+ post.call(out)
50
+ else
51
+ body.call
52
+ end
53
+ return out
54
+ end
55
+
56
+ def output_slash_if_xml
57
+ if !@html_output
58
+ output_string('/')
59
+ end
60
+ end
61
+
62
+ def output_cdata_content(content, context)
63
+ if @html_output
64
+ # xxx: should raise an error for non-text node?
65
+ texts = content.grep(HTree::Text)
66
+ text = HTree::Text.concat(*texts)
67
+ text.output_cdata(self)
68
+ else
69
+ content.each {|n| n.output(self, context) }
70
+ end
71
+ end
72
+
73
+ def output_cdata_for_html(*args)
74
+ str = args.join('')
75
+ if %r{</} =~ str
76
+ raise ArgumentError, "cdata contains '</' : #{str.inspect}"
77
+ end
78
+ output_string str
79
+ end
80
+
81
+ def output_string(internal_str, external_str=@ic.iconv(internal_str))
82
+ @buf.force_encoding(external_str.encoding) if @buf.empty? && @buf.respond_to?(:force_encoding) # xxx: should be fixed Ruby itself
83
+ @buf << external_str
84
+ @subcharset_ic.reject! {|subcharset, ic|
85
+ begin
86
+ ic.iconv(internal_str) != external_str
87
+ rescue Iconv::Failure
88
+ true
89
+ end
90
+ }
91
+ nil
92
+ end
93
+
94
+ def output_text(string)
95
+ begin
96
+ output_string string, @ic.iconv(string)
97
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
98
+ success = e.success
99
+ output_string string[0, string.length - e.failed.length], success
100
+ unless /\A./m =~ e.failed
101
+ # xxx: should be configulable?
102
+ #raise ArgumentError, "cannot extract first character: #{e.failed.dump}"
103
+ string = e.failed[1, e.failed.length-1]
104
+ output_string '?'
105
+ retry
106
+ end
107
+ char = $&
108
+ rest = $'
109
+ begin
110
+ ucode = Iconv.conv("UTF-8", @internal_encoding, char).unpack("U")[0]
111
+ char = "&##{ucode};"
112
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
113
+ # xxx: should be configulable?
114
+ char = '?'
115
+ end
116
+ output_string char
117
+ string = rest
118
+ retry
119
+ end
120
+ end
121
+
122
+ ChRef = {
123
+ '&' => '&amp;',
124
+ '<' => '&lt;',
125
+ '>' => '&gt;',
126
+ '"' => '&quot;',
127
+ }
128
+
129
+ def output_dynamic_text(string)
130
+ if string.respond_to? :rcdata
131
+ output_text(string.rcdata.gsub(/[<>]/) { ChRef[$&] })
132
+ else
133
+ output_text(string.to_s.gsub(/[&<>]/) { ChRef[$&] })
134
+ end
135
+ end
136
+
137
+ def output_dynamic_attvalue(string)
138
+ if string.respond_to? :rcdata
139
+ output_text(string.rcdata.gsub(/[<>"]/) { ChRef[$&] })
140
+ else
141
+ output_text(string.to_s.gsub(/[&<>"]/) { ChRef[$&] })
142
+ end
143
+ end
144
+
145
+ # :startdoc:
146
+
147
+ def finish
148
+ external_str = @ic.close
149
+ @buf << external_str
150
+ @subcharset_ic.reject! {|subcharset, ic|
151
+ begin
152
+ ic.close != external_str
153
+ rescue Iconv::Failure
154
+ true
155
+ end
156
+ }
157
+ @buf
158
+ end
159
+
160
+ def finish_with_xmldecl
161
+ content = finish
162
+ xmldecl = Iconv.conv(@output_encoding, 'US-ASCII',
163
+ "<?xml version=\"1.0\" encoding=\"#{minimal_charset}\"?>")
164
+ xmldecl + content
165
+ end
166
+
167
+ def minimal_charset
168
+ @subcharset_list.each {|subcharset|
169
+ if @subcharset_ic.include? subcharset
170
+ return subcharset
171
+ end
172
+ }
173
+ @output_encoding
174
+ end
175
+
176
+ # :stopdoc:
177
+
178
+ KcodeCharset = {
179
+ 'EUC' => 'EUC-JP',
180
+ 'SJIS' => 'Shift_JIS',
181
+ 'UTF8' => 'UTF-8',
182
+ 'NONE' => 'ISO-8859-1',
183
+ }
184
+
185
+ FirstCharPattern = {
186
+ 'EUC-JP' => /\A(?:
187
+ [\x00-\x7f]
188
+ |[\xa1-\xfe][\xa1-\xfe]
189
+ |\x8e[\xa1-\xfe]
190
+ |\x8f[\xa1-\xfe][\xa1-\xfe])/nx,
191
+ 'Shift_JIS' => /\A(?:
192
+ [\x00-\x7f]
193
+ |[\x81-\x9f][\x40-\x7e\x80-\xfc]
194
+ |[\xa1-\xdf]
195
+ |[\xe0-\xfc][\x40-\x7e\x80-\xfc])/nx,
196
+ 'UTF-8' => /\A(?:
197
+ [\x00-\x7f]
198
+ |[\xc0-\xdf][\x80-\xbf]
199
+ |[\xe0-\xef][\x80-\xbf][\x80-\xbf]
200
+ |[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]
201
+ |[\xf8-\xfb][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf]
202
+ |[\xfc-\xfd][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf][\x80-\xbf])/nx,
203
+ 'ISO-8859-1' => /\A[\x00-\xff]/n
204
+ }
205
+
206
+ SubCharset = {
207
+ 'ISO-2022-JP-2' => ['US-ASCII', 'ISO-2022-JP'],
208
+ 'ISO-2022-JP-3' => ['US-ASCII', 'ISO-2022-JP'],
209
+ 'UTF-16BE' => [],
210
+ 'UTF-16LE' => [],
211
+ 'UTF-16' => [],
212
+ }
213
+ SubCharset.default = ['US-ASCII']
214
+
215
+ # :startdoc:
216
+ end
217
+ end
@@ -0,0 +1,219 @@
1
+ require 'htree/doc'
2
+ require 'htree/elem'
3
+ require 'htree/leaf'
4
+ require 'htree/tag'
5
+ require 'htree/raw_string'
6
+ require 'htree/context'
7
+
8
+ module HTree
9
+ # compare tree structures.
10
+ def ==(other)
11
+ check_equality(self, other, :usual_equal_object)
12
+ end
13
+ alias eql? ==
14
+
15
+ # hash value for the tree structure.
16
+ def hash
17
+ return @hash_code if defined? @hash_code
18
+ @hash_code = usual_equal_object.hash
19
+ end
20
+
21
+ # :stopdoc:
22
+
23
+ def usual_equal_object
24
+ return @usual_equal_object if defined? @usual_equal_object
25
+ @usual_equal_object = make_usual_equal_object
26
+ end
27
+
28
+ def make_usual_equal_object
29
+ raise NotImplementedError
30
+ end
31
+
32
+ def exact_equal_object
33
+ return @exact_equal_object if defined? @exact_equal_object
34
+ @exact_equal_object = make_exact_equal_object
35
+ end
36
+
37
+ def make_exact_equal_object
38
+ raise NotImplementedError
39
+ end
40
+
41
+ def exact_equal?(other)
42
+ check_equality(self, other, :exact_equal_object)
43
+ end
44
+
45
+ def check_equality(obj1, obj2, equal_object_method)
46
+ return false unless obj1.class == obj2.class
47
+ if obj1.class == Array
48
+ return false unless obj1.length == obj2.length
49
+ obj1.each_with_index {|c1, i|
50
+ return false unless c1.class == obj2[i].class
51
+ }
52
+ obj1.each_with_index {|c1, i|
53
+ return false unless check_equality(c1, obj2[i], equal_object_method)
54
+ }
55
+ true
56
+ elsif obj1.respond_to? equal_object_method
57
+ o1 = obj1.send(equal_object_method)
58
+ o2 = obj2.send(equal_object_method)
59
+ check_equality(o1, o2, equal_object_method)
60
+ else
61
+ obj1 == obj2
62
+ end
63
+ end
64
+
65
+ class Doc
66
+ alias exact_equal_object children
67
+ alias usual_equal_object children
68
+ end
69
+
70
+ class Elem
71
+ def make_exact_equal_object
72
+ [@stag, @children, @empty, @etag]
73
+ end
74
+
75
+ def make_usual_equal_object
76
+ [@stag, @children]
77
+ end
78
+ end
79
+
80
+ class Name
81
+ def make_exact_equal_object
82
+ [@namespace_prefix, @namespace_uri, @local_name]
83
+ end
84
+
85
+ def make_usual_equal_object
86
+ xmlns? ? @local_name : [@namespace_uri, @local_name]
87
+ end
88
+ end
89
+
90
+ module Util
91
+ module_function
92
+ def cmp_with_nil(a, b)
93
+ if a == nil
94
+ if b == nil
95
+ 0
96
+ else
97
+ -1
98
+ end
99
+ else
100
+ if b == nil
101
+ 1
102
+ else
103
+ a <=> b
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ class Context
110
+ def make_exact_equal_object
111
+ @namespaces.keys.sort {|prefix1, prefix2|
112
+ Util.cmp_with_nil(prefix1, prefix2)
113
+ }.map {|prefix| [prefix, @namespaces[prefix]] }
114
+ end
115
+
116
+ # make_usual_equal_object is not used through STag#make_usual_equal_object
117
+ # NotImplementedError is suitable?
118
+ alias make_usual_equal_object make_exact_equal_object
119
+ end
120
+
121
+ class STag
122
+ def make_exact_equal_object
123
+ [@raw_string,
124
+ @name,
125
+ @attributes.sort {|(n1,t1), (n2, t2)|
126
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
127
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
128
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
129
+ },
130
+ @inherited_context
131
+ ]
132
+ end
133
+
134
+ def make_usual_equal_object
135
+ [@name,
136
+ @attributes.find_all {|n,t| !n.xmlns? }.sort {|(n1,t1), (n2, t2)|
137
+ Util.cmp_with_nil(n1.namespace_prefix, n2.namespace_prefix).nonzero? ||
138
+ Util.cmp_with_nil(n1.namespace_uri, n2.namespace_uri).nonzero? ||
139
+ Util.cmp_with_nil(n1.local_name, n2.local_name)
140
+ }
141
+ ]
142
+ end
143
+
144
+ end
145
+
146
+ class ETag
147
+ def make_exact_equal_object
148
+ [@raw_string, @qualified_name]
149
+ end
150
+
151
+ alias usual_equal_object qualified_name
152
+ end
153
+
154
+ class Text
155
+ def make_exact_equal_object
156
+ [@raw_string, @rcdata]
157
+ end
158
+
159
+ def make_usual_equal_object
160
+ @normalized_rcdata
161
+ end
162
+ end
163
+
164
+ class XMLDecl
165
+ def make_exact_equal_object
166
+ [@raw_string, @version, @encoding, @standalone]
167
+ end
168
+
169
+ def make_usual_equal_object
170
+ [@version, @encoding, @standalone]
171
+ end
172
+ end
173
+
174
+ class DocType
175
+ def make_exact_equal_object
176
+ [@raw_string, @root_element_name, @system_identifier, @public_identifier]
177
+ end
178
+
179
+ def make_usual_equal_object
180
+ [@root_element_name, @system_identifier, @public_identifier]
181
+ end
182
+ end
183
+
184
+ class ProcIns
185
+ def make_exact_equal_object
186
+ [@raw_string, @target, @content]
187
+ end
188
+
189
+ def make_usual_equal_object
190
+ [@target, @content]
191
+ end
192
+ end
193
+
194
+ class Comment
195
+ def make_exact_equal_object
196
+ [@raw_string, @content]
197
+ end
198
+
199
+ alias usual_equal_object content
200
+ end
201
+
202
+ class BogusETag
203
+ def make_exact_equal_object
204
+ [@etag]
205
+ end
206
+
207
+ alias usual_equal_object make_exact_equal_object
208
+ end
209
+
210
+ class Location
211
+ def make_exact_equal_object
212
+ [@parent, @index, @node]
213
+ end
214
+
215
+ alias usual_equal_object make_exact_equal_object
216
+ end
217
+
218
+ # :startdoc:
219
+ end
@@ -0,0 +1,37 @@
1
+ require 'htree/text'
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+
5
+ module HTree
6
+ module Node
7
+ def extract_text
8
+ raise NotImplementedError
9
+ end
10
+ end
11
+
12
+ class Location
13
+ def extract_text
14
+ to_node.extract_text
15
+ end
16
+ end
17
+
18
+ # :stopdoc:
19
+ module Container
20
+ def extract_text
21
+ Text.concat(*@children.map {|n| n.extract_text })
22
+ end
23
+ end
24
+
25
+ module Leaf
26
+ def extract_text
27
+ Text.new('')
28
+ end
29
+ end
30
+
31
+ class Text
32
+ def extract_text
33
+ self
34
+ end
35
+ end
36
+ # :startdoc:
37
+ end