rubysl-rexml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (179) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +1 -0
  8. data/lib/rexml/attlistdecl.rb +62 -0
  9. data/lib/rexml/attribute.rb +185 -0
  10. data/lib/rexml/cdata.rb +67 -0
  11. data/lib/rexml/child.rb +96 -0
  12. data/lib/rexml/comment.rb +80 -0
  13. data/lib/rexml/doctype.rb +271 -0
  14. data/lib/rexml/document.rb +230 -0
  15. data/lib/rexml/dtd/attlistdecl.rb +10 -0
  16. data/lib/rexml/dtd/dtd.rb +51 -0
  17. data/lib/rexml/dtd/elementdecl.rb +17 -0
  18. data/lib/rexml/dtd/entitydecl.rb +56 -0
  19. data/lib/rexml/dtd/notationdecl.rb +39 -0
  20. data/lib/rexml/element.rb +1227 -0
  21. data/lib/rexml/encoding.rb +71 -0
  22. data/lib/rexml/encodings/CP-1252.rb +103 -0
  23. data/lib/rexml/encodings/EUC-JP.rb +35 -0
  24. data/lib/rexml/encodings/ICONV.rb +22 -0
  25. data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
  26. data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
  27. data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
  28. data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
  29. data/lib/rexml/encodings/UNILE.rb +34 -0
  30. data/lib/rexml/encodings/US-ASCII.rb +30 -0
  31. data/lib/rexml/encodings/UTF-16.rb +35 -0
  32. data/lib/rexml/encodings/UTF-8.rb +18 -0
  33. data/lib/rexml/entity.rb +166 -0
  34. data/lib/rexml/formatters/default.rb +109 -0
  35. data/lib/rexml/formatters/pretty.rb +138 -0
  36. data/lib/rexml/formatters/transitive.rb +56 -0
  37. data/lib/rexml/functions.rb +382 -0
  38. data/lib/rexml/instruction.rb +70 -0
  39. data/lib/rexml/light/node.rb +196 -0
  40. data/lib/rexml/namespace.rb +47 -0
  41. data/lib/rexml/node.rb +75 -0
  42. data/lib/rexml/output.rb +24 -0
  43. data/lib/rexml/parent.rb +166 -0
  44. data/lib/rexml/parseexception.rb +51 -0
  45. data/lib/rexml/parsers/baseparser.rb +503 -0
  46. data/lib/rexml/parsers/lightparser.rb +60 -0
  47. data/lib/rexml/parsers/pullparser.rb +196 -0
  48. data/lib/rexml/parsers/sax2parser.rb +238 -0
  49. data/lib/rexml/parsers/streamparser.rb +46 -0
  50. data/lib/rexml/parsers/treeparser.rb +97 -0
  51. data/lib/rexml/parsers/ultralightparser.rb +56 -0
  52. data/lib/rexml/parsers/xpathparser.rb +698 -0
  53. data/lib/rexml/quickpath.rb +266 -0
  54. data/lib/rexml/rexml.rb +32 -0
  55. data/lib/rexml/sax2listener.rb +97 -0
  56. data/lib/rexml/source.rb +251 -0
  57. data/lib/rexml/streamlistener.rb +92 -0
  58. data/lib/rexml/syncenumerator.rb +33 -0
  59. data/lib/rexml/text.rb +344 -0
  60. data/lib/rexml/undefinednamespaceexception.rb +8 -0
  61. data/lib/rexml/validation/relaxng.rb +559 -0
  62. data/lib/rexml/validation/validation.rb +155 -0
  63. data/lib/rexml/validation/validationexception.rb +9 -0
  64. data/lib/rexml/xmldecl.rb +119 -0
  65. data/lib/rexml/xmltokens.rb +18 -0
  66. data/lib/rexml/xpath.rb +66 -0
  67. data/lib/rexml/xpath_parser.rb +792 -0
  68. data/lib/rubysl/rexml.rb +1 -0
  69. data/lib/rubysl/rexml/version.rb +5 -0
  70. data/rubysl-rexml.gemspec +23 -0
  71. data/spec/attribute/clone_spec.rb +10 -0
  72. data/spec/attribute/element_spec.rb +22 -0
  73. data/spec/attribute/equal_value_spec.rb +17 -0
  74. data/spec/attribute/hash_spec.rb +12 -0
  75. data/spec/attribute/initialize_spec.rb +28 -0
  76. data/spec/attribute/inspect_spec.rb +19 -0
  77. data/spec/attribute/namespace_spec.rb +23 -0
  78. data/spec/attribute/node_type_spec.rb +9 -0
  79. data/spec/attribute/prefix_spec.rb +17 -0
  80. data/spec/attribute/remove_spec.rb +19 -0
  81. data/spec/attribute/to_s_spec.rb +13 -0
  82. data/spec/attribute/to_string_spec.rb +14 -0
  83. data/spec/attribute/value_spec.rb +14 -0
  84. data/spec/attribute/write_spec.rb +22 -0
  85. data/spec/attribute/xpath_spec.rb +19 -0
  86. data/spec/attributes/add_spec.rb +6 -0
  87. data/spec/attributes/append_spec.rb +6 -0
  88. data/spec/attributes/delete_all_spec.rb +30 -0
  89. data/spec/attributes/delete_spec.rb +26 -0
  90. data/spec/attributes/each_attribute_spec.rb +24 -0
  91. data/spec/attributes/each_spec.rb +24 -0
  92. data/spec/attributes/element_reference_spec.rb +18 -0
  93. data/spec/attributes/element_set_spec.rb +25 -0
  94. data/spec/attributes/get_attribute_ns_spec.rb +13 -0
  95. data/spec/attributes/get_attribute_spec.rb +28 -0
  96. data/spec/attributes/initialize_spec.rb +18 -0
  97. data/spec/attributes/length_spec.rb +6 -0
  98. data/spec/attributes/namespaces_spec.rb +5 -0
  99. data/spec/attributes/prefixes_spec.rb +23 -0
  100. data/spec/attributes/shared/add.rb +17 -0
  101. data/spec/attributes/shared/length.rb +12 -0
  102. data/spec/attributes/size_spec.rb +6 -0
  103. data/spec/attributes/to_a_spec.rb +20 -0
  104. data/spec/cdata/clone_spec.rb +9 -0
  105. data/spec/cdata/initialize_spec.rb +24 -0
  106. data/spec/cdata/shared/to_s.rb +11 -0
  107. data/spec/cdata/to_s_spec.rb +6 -0
  108. data/spec/cdata/value_spec.rb +6 -0
  109. data/spec/document/add_element_spec.rb +30 -0
  110. data/spec/document/add_spec.rb +60 -0
  111. data/spec/document/clone_spec.rb +19 -0
  112. data/spec/document/doctype_spec.rb +14 -0
  113. data/spec/document/encoding_spec.rb +21 -0
  114. data/spec/document/expanded_name_spec.rb +15 -0
  115. data/spec/document/new_spec.rb +37 -0
  116. data/spec/document/node_type_spec.rb +7 -0
  117. data/spec/document/root_spec.rb +11 -0
  118. data/spec/document/stand_alone_spec.rb +18 -0
  119. data/spec/document/version_spec.rb +13 -0
  120. data/spec/document/write_spec.rb +38 -0
  121. data/spec/document/xml_decl_spec.rb +14 -0
  122. data/spec/element/add_attribute_spec.rb +40 -0
  123. data/spec/element/add_attributes_spec.rb +21 -0
  124. data/spec/element/add_element_spec.rb +38 -0
  125. data/spec/element/add_namespace_spec.rb +23 -0
  126. data/spec/element/add_text_spec.rb +23 -0
  127. data/spec/element/attribute_spec.rb +16 -0
  128. data/spec/element/attributes_spec.rb +18 -0
  129. data/spec/element/cdatas_spec.rb +23 -0
  130. data/spec/element/clone_spec.rb +28 -0
  131. data/spec/element/comments_spec.rb +20 -0
  132. data/spec/element/delete_attribute_spec.rb +38 -0
  133. data/spec/element/delete_element_spec.rb +50 -0
  134. data/spec/element/delete_namespace_spec.rb +24 -0
  135. data/spec/element/document_spec.rb +17 -0
  136. data/spec/element/each_element_with_attribute_spec.rb +34 -0
  137. data/spec/element/each_element_with_text_spec.rb +30 -0
  138. data/spec/element/get_text_spec.rb +17 -0
  139. data/spec/element/has_attributes_spec.rb +16 -0
  140. data/spec/element/has_elements_spec.rb +17 -0
  141. data/spec/element/has_text_spec.rb +15 -0
  142. data/spec/element/inspect_spec.rb +26 -0
  143. data/spec/element/instructions_spec.rb +20 -0
  144. data/spec/element/namespace_spec.rb +26 -0
  145. data/spec/element/namespaces_spec.rb +31 -0
  146. data/spec/element/new_spec.rb +34 -0
  147. data/spec/element/next_element_spec.rb +18 -0
  148. data/spec/element/node_type_spec.rb +7 -0
  149. data/spec/element/prefixes_spec.rb +22 -0
  150. data/spec/element/previous_element_spec.rb +19 -0
  151. data/spec/element/raw_spec.rb +23 -0
  152. data/spec/element/root_spec.rb +27 -0
  153. data/spec/element/text_spec.rb +45 -0
  154. data/spec/element/texts_spec.rb +15 -0
  155. data/spec/element/whitespace_spec.rb +22 -0
  156. data/spec/node/each_recursive_spec.rb +20 -0
  157. data/spec/node/find_first_recursive_spec.rb +24 -0
  158. data/spec/node/index_in_parent_spec.rb +14 -0
  159. data/spec/node/next_sibling_node_spec.rb +20 -0
  160. data/spec/node/parent_spec.rb +20 -0
  161. data/spec/node/previous_sibling_node_spec.rb +20 -0
  162. data/spec/shared/each_element.rb +35 -0
  163. data/spec/shared/elements_to_a.rb +35 -0
  164. data/spec/text/append_spec.rb +9 -0
  165. data/spec/text/clone_spec.rb +9 -0
  166. data/spec/text/comparison_spec.rb +24 -0
  167. data/spec/text/empty_spec.rb +11 -0
  168. data/spec/text/indent_text_spec.rb +23 -0
  169. data/spec/text/inspect_spec.rb +7 -0
  170. data/spec/text/new_spec.rb +48 -0
  171. data/spec/text/node_type_spec.rb +7 -0
  172. data/spec/text/normalize_spec.rb +7 -0
  173. data/spec/text/read_with_substitution_spec.rb +12 -0
  174. data/spec/text/to_s_spec.rb +17 -0
  175. data/spec/text/unnormalize_spec.rb +7 -0
  176. data/spec/text/value_spec.rb +36 -0
  177. data/spec/text/wrap_spec.rb +20 -0
  178. data/spec/text/write_with_substitution_spec.rb +32 -0
  179. metadata +385 -0
@@ -0,0 +1,266 @@
1
+ require 'rexml/functions'
2
+ require 'rexml/xmltokens'
3
+
4
+ module REXML
5
+ class QuickPath
6
+ include Functions
7
+ include XMLTokens
8
+
9
+ EMPTY_HASH = {}
10
+
11
+ def QuickPath::first element, path, namespaces=EMPTY_HASH
12
+ match(element, path, namespaces)[0]
13
+ end
14
+
15
+ def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
16
+ path = "*" unless path
17
+ match(element, path, namespaces).each( &block )
18
+ end
19
+
20
+ def QuickPath::match element, path, namespaces=EMPTY_HASH
21
+ raise "nil is not a valid xpath" unless path
22
+ results = nil
23
+ Functions::namespace_context = namespaces
24
+ case path
25
+ when /^\/([^\/]|$)/u
26
+ # match on root
27
+ path = path[1..-1]
28
+ return [element.root.parent] if path == ''
29
+ results = filter([element.root], path)
30
+ when /^[-\w]*::/u
31
+ results = filter([element], path)
32
+ when /^\*/u
33
+ results = filter(element.to_a, path)
34
+ when /^[\[!\w:]/u
35
+ # match on child
36
+ matches = []
37
+ children = element.to_a
38
+ results = filter(children, path)
39
+ else
40
+ results = filter([element], path)
41
+ end
42
+ return results
43
+ end
44
+
45
+ # Given an array of nodes it filters the array based on the path. The
46
+ # result is that when this method returns, the array will contain elements
47
+ # which match the path
48
+ def QuickPath::filter elements, path
49
+ return elements if path.nil? or path == '' or elements.size == 0
50
+ case path
51
+ when /^\/\//u # Descendant
52
+ return axe( elements, "descendant-or-self", $' )
53
+ when /^\/?\b(\w[-\w]*)\b::/u # Axe
54
+ axe_name = $1
55
+ rest = $'
56
+ return axe( elements, $1, $' )
57
+ when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
58
+ rest = $'
59
+ results = []
60
+ elements.each do |element|
61
+ results |= filter( element.to_a, rest )
62
+ end
63
+ return results
64
+ when /^\/?(\w[-\w]*)\(/u # / Function
65
+ return function( elements, $1, $' )
66
+ when Namespace::NAMESPLIT # Element name
67
+ name = $2
68
+ ns = $1
69
+ rest = $'
70
+ elements.delete_if do |element|
71
+ !(element.kind_of? Element and
72
+ (element.expanded_name == name or
73
+ (element.name == name and
74
+ element.namespace == Functions.namespace_context[ns])))
75
+ end
76
+ return filter( elements, rest )
77
+ when /^\/\[/u
78
+ matches = []
79
+ elements.each do |element|
80
+ matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
81
+ end
82
+ return matches
83
+ when /^\[/u # Predicate
84
+ return predicate( elements, path )
85
+ when /^\/?\.\.\./u # Ancestor
86
+ return axe( elements, "ancestor", $' )
87
+ when /^\/?\.\./u # Parent
88
+ return filter( elements.collect{|e|e.parent}, $' )
89
+ when /^\/?\./u # Self
90
+ return filter( elements, $' )
91
+ when /^\*/u # Any
92
+ results = []
93
+ elements.each do |element|
94
+ results |= filter( [element], $' ) if element.kind_of? Element
95
+ #if element.kind_of? Element
96
+ # children = element.to_a
97
+ # children.delete_if { |child| !child.kind_of?(Element) }
98
+ # results |= filter( children, $' )
99
+ #end
100
+ end
101
+ return results
102
+ end
103
+ return []
104
+ end
105
+
106
+ def QuickPath::axe( elements, axe_name, rest )
107
+ matches = []
108
+ matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
109
+ case axe_name
110
+ when /^descendant/u
111
+ elements.each do |element|
112
+ matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
113
+ end
114
+ when /^ancestor/u
115
+ elements.each do |element|
116
+ while element.parent
117
+ matches << element.parent
118
+ element = element.parent
119
+ end
120
+ end
121
+ matches = filter( matches, rest )
122
+ when "self"
123
+ matches = filter( elements, rest )
124
+ when "child"
125
+ elements.each do |element|
126
+ matches |= filter( element.to_a, rest ) if element.kind_of? Element
127
+ end
128
+ when "attribute"
129
+ elements.each do |element|
130
+ matches << element.attributes[ rest ] if element.kind_of? Element
131
+ end
132
+ when "parent"
133
+ matches = filter(elements.collect{|element| element.parent}.uniq, rest)
134
+ when "following-sibling"
135
+ matches = filter(elements.collect{|element| element.next_sibling}.uniq,
136
+ rest)
137
+ when "previous-sibling"
138
+ matches = filter(elements.collect{|element|
139
+ element.previous_sibling}.uniq, rest )
140
+ end
141
+ return matches.uniq
142
+ end
143
+
144
+ # A predicate filters a node-set with respect to an axis to produce a
145
+ # new node-set. For each node in the node-set to be filtered, the
146
+ # PredicateExpr is evaluated with that node as the context node, with
147
+ # the number of nodes in the node-set as the context size, and with the
148
+ # proximity position of the node in the node-set with respect to the
149
+ # axis as the context position; if PredicateExpr evaluates to true for
150
+ # that node, the node is included in the new node-set; otherwise, it is
151
+ # not included.
152
+ #
153
+ # A PredicateExpr is evaluated by evaluating the Expr and converting
154
+ # the result to a boolean. If the result is a number, the result will
155
+ # be converted to true if the number is equal to the context position
156
+ # and will be converted to false otherwise; if the result is not a
157
+ # number, then the result will be converted as if by a call to the
158
+ # boolean function. Thus a location path para[3] is equivalent to
159
+ # para[position()=3].
160
+ def QuickPath::predicate( elements, path )
161
+ ind = 1
162
+ bcount = 1
163
+ while bcount > 0
164
+ bcount += 1 if path[ind] == ?[
165
+ bcount -= 1 if path[ind] == ?]
166
+ ind += 1
167
+ end
168
+ ind -= 1
169
+ predicate = path[1..ind-1]
170
+ rest = path[ind+1..-1]
171
+
172
+ # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
173
+ predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
174
+ "#$1 #$2 #$3 and #$3 #$4 #$5"
175
+ }
176
+ # Let's do some Ruby trickery to avoid some work:
177
+ predicate.gsub!( /&/u, "&&" )
178
+ predicate.gsub!( /=/u, "==" )
179
+ predicate.gsub!( /@(\w[-\w.]*)/u ) {
180
+ "attribute(\"#$1\")"
181
+ }
182
+ predicate.gsub!( /\bmod\b/u, "%" )
183
+ predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
184
+ fname = $1
185
+ fname.gsub( /-/u, "_" )
186
+ }
187
+
188
+ Functions.pair = [ 0, elements.size ]
189
+ results = []
190
+ elements.each do |element|
191
+ Functions.pair[0] += 1
192
+ Functions.node = element
193
+ res = eval( predicate )
194
+ case res
195
+ when true
196
+ results << element
197
+ when Fixnum
198
+ results << element if Functions.pair[0] == res
199
+ when String
200
+ results << element
201
+ end
202
+ end
203
+ return filter( results, rest )
204
+ end
205
+
206
+ def QuickPath::attribute( name )
207
+ return Functions.node.attributes[name] if Functions.node.kind_of? Element
208
+ end
209
+
210
+ def QuickPath::name()
211
+ return Functions.node.name if Functions.node.kind_of? Element
212
+ end
213
+
214
+ def QuickPath::method_missing( id, *args )
215
+ begin
216
+ Functions.send( id.id2name, *args )
217
+ rescue Exception
218
+ raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
219
+ end
220
+ end
221
+
222
+ def QuickPath::function( elements, fname, rest )
223
+ args = parse_args( elements, rest )
224
+ Functions.pair = [0, elements.size]
225
+ results = []
226
+ elements.each do |element|
227
+ Functions.pair[0] += 1
228
+ Functions.node = element
229
+ res = Functions.send( fname, *args )
230
+ case res
231
+ when true
232
+ results << element
233
+ when Fixnum
234
+ results << element if Functions.pair[0] == res
235
+ end
236
+ end
237
+ return results
238
+ end
239
+
240
+ def QuickPath::parse_args( element, string )
241
+ # /.*?(?:\)|,)/
242
+ arguments = []
243
+ buffer = ""
244
+ while string and string != ""
245
+ c = string[0]
246
+ string.sub!(/^./u, "")
247
+ case c
248
+ when ?,
249
+ # if depth = 1, then we start a new argument
250
+ arguments << evaluate( buffer )
251
+ #arguments << evaluate( string[0..count] )
252
+ when ?(
253
+ # start a new method call
254
+ function( element, buffer, string )
255
+ buffer = ""
256
+ when ?)
257
+ # close the method call and return arguments
258
+ return arguments
259
+ else
260
+ buffer << c
261
+ end
262
+ end
263
+ ""
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
3
+ #
4
+ # REXML is a _pure_ Ruby, XML 1.0 conforming,
5
+ # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
6
+ # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
7
+ # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
8
+ # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
9
+ # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
10
+ # Ruby 1.8, REXML is included in the standard Ruby distribution.
11
+ #
12
+ # Main page:: http://www.germane-software.com/software/rexml
13
+ # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
14
+ # Version:: 3.1.7.2
15
+ # Date:: 2007/275
16
+ # Revision:: $Revision$
17
+ #
18
+ # This API documentation can be downloaded from the REXML home page, or can
19
+ # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
+ #
21
+ # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
+ # or can be accessed
23
+ # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
24
+ module REXML
25
+ COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
26
+ VERSION = "3.1.7.3"
27
+ DATE = "2007/275"
28
+ REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
29
+
30
+ Copyright = COPYRIGHT
31
+ Version = VERSION
32
+ end
@@ -0,0 +1,97 @@
1
+ module REXML
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ # === Missing methods from SAX2
7
+ # ignorable_whitespace
8
+ # === Methods extending SAX2
9
+ # +WARNING+
10
+ # These methods are certainly going to change, until DTDs are fully
11
+ # supported. Be aware of this.
12
+ # start_document
13
+ # end_document
14
+ # doctype
15
+ # elementdecl
16
+ # attlistdecl
17
+ # entitydecl
18
+ # notationdecl
19
+ # cdata
20
+ # xmldecl
21
+ # comment
22
+ module SAX2Listener
23
+ def start_document
24
+ end
25
+ def end_document
26
+ end
27
+ def start_prefix_mapping prefix, uri
28
+ end
29
+ def end_prefix_mapping prefix
30
+ end
31
+ def start_element uri, localname, qname, attributes
32
+ end
33
+ def end_element uri, localname, qname
34
+ end
35
+ def characters text
36
+ end
37
+ def processing_instruction target, data
38
+ end
39
+ # Handles a doctype declaration. Any attributes of the doctype which are
40
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
41
+ # @p name the name of the doctype; EG, "me"
42
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
43
+ # @p long_name the supplied long name, or nil. EG, "foo"
44
+ # @p uri the uri of the doctype, or nil. EG, "bar"
45
+ def doctype name, pub_sys, long_name, uri
46
+ end
47
+ # If a doctype includes an ATTLIST declaration, it will cause this
48
+ # method to be called. The content is the declaration itself, unparsed.
49
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
50
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
51
+ # methods.
52
+ def attlistdecl(element, pairs, contents)
53
+ end
54
+ # <!ELEMENT ...>
55
+ def elementdecl content
56
+ end
57
+ # <!ENTITY ...>
58
+ # The argument passed to this method is an array of the entity
59
+ # declaration. It can be in a number of formats, but in general it
60
+ # returns (example, result):
61
+ # <!ENTITY % YN '"Yes"'>
62
+ # ["%", "YN", "'\"Yes\"'", "\""]
63
+ # <!ENTITY % YN 'Yes'>
64
+ # ["%", "YN", "'Yes'", "s"]
65
+ # <!ENTITY WhatHeSaid "He said %YN;">
66
+ # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
67
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
70
+ # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
71
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
72
+ # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
73
+ def entitydecl name, decl
74
+ end
75
+ # <!NOTATION ...>
76
+ def notationdecl content
77
+ end
78
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
79
+ # @p content "..."
80
+ def cdata content
81
+ end
82
+ # Called when an XML PI is encountered in the document.
83
+ # EG: <?xml version="1.0" encoding="utf"?>
84
+ # @p version the version attribute value. EG, "1.0"
85
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
86
+ # @p standalone the standalone attribute value, or nil. EG, nil
87
+ # @p spaced the declaration is followed by a line break
88
+ def xmldecl version, encoding, standalone
89
+ end
90
+ # Called when a comment is encountered.
91
+ # @p comment The content of the comment
92
+ def comment comment
93
+ end
94
+ def progress position
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,251 @@
1
+ require 'rexml/encoding'
2
+
3
+ module REXML
4
+ # Generates Source-s. USE THIS CLASS.
5
+ class SourceFactory
6
+ # Generates a Source object
7
+ # @param arg Either a String, or an IO
8
+ # @return a Source, or nil if a bad argument was given
9
+ def SourceFactory::create_from(arg)
10
+ if arg.kind_of? String
11
+ Source.new(arg)
12
+ elsif arg.respond_to? :read and
13
+ arg.respond_to? :readline and
14
+ arg.respond_to? :nil? and
15
+ arg.respond_to? :eof?
16
+ IOSource.new(arg)
17
+ elsif arg.kind_of? Source
18
+ arg
19
+ else
20
+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
21
+ "like either a String, an IO, or a Source."
22
+ end
23
+ end
24
+ end
25
+
26
+ # A Source can be searched for patterns, and wraps buffers and other
27
+ # objects and provides consumption of text
28
+ class Source
29
+ include Encoding
30
+ # The current buffer (what we're going to read next)
31
+ attr_reader :buffer
32
+ # The line number of the last consumed text
33
+ attr_reader :line
34
+ attr_reader :encoding
35
+
36
+ # Constructor
37
+ # @param arg must be a String, and should be a valid XML document
38
+ # @param encoding if non-null, sets the encoding of the source to this
39
+ # value, overriding all encoding detection
40
+ def initialize(arg, encoding=nil)
41
+ @orig = @buffer = arg
42
+ if encoding
43
+ self.encoding = encoding
44
+ else
45
+ self.encoding = check_encoding( @buffer )
46
+ end
47
+ @line = 0
48
+ end
49
+
50
+
51
+ # Inherited from Encoding
52
+ # Overridden to support optimized en/decoding
53
+ def encoding=(enc)
54
+ return unless super
55
+ @line_break = encode( '>' )
56
+ if enc != UTF_8
57
+ @buffer = decode(@buffer)
58
+ @to_utf = true
59
+ else
60
+ @to_utf = false
61
+ end
62
+ end
63
+
64
+ # Scans the source for a given pattern. Note, that this is not your
65
+ # usual scan() method. For one thing, the pattern argument has some
66
+ # requirements; for another, the source can be consumed. You can easily
67
+ # confuse this method. Originally, the patterns were easier
68
+ # to construct and this method more robust, because this method
69
+ # generated search regexes on the fly; however, this was
70
+ # computationally expensive and slowed down the entire REXML package
71
+ # considerably, since this is by far the most commonly called method.
72
+ # @param pattern must be a Regexp, and must be in the form of
73
+ # /^\s*(#{your pattern, with no groups})(.*)/. The first group
74
+ # will be returned; the second group is used if the consume flag is
75
+ # set.
76
+ # @param consume if true, the pattern returned will be consumed, leaving
77
+ # everything after it in the Source.
78
+ # @return the pattern, if found, or nil if the Source is empty or the
79
+ # pattern is not found.
80
+ def scan(pattern, cons=false)
81
+ return nil if @buffer.nil?
82
+ rv = @buffer.scan(pattern)
83
+ @buffer = $' if cons and rv.size>0
84
+ rv
85
+ end
86
+
87
+ def read
88
+ end
89
+
90
+ def consume( pattern )
91
+ @buffer = $' if pattern.match( @buffer )
92
+ end
93
+
94
+ def match_to( char, pattern )
95
+ return pattern.match(@buffer)
96
+ end
97
+
98
+ def match_to_consume( char, pattern )
99
+ md = pattern.match(@buffer)
100
+ @buffer = $'
101
+ return md
102
+ end
103
+
104
+ def match(pattern, cons=false)
105
+ md = pattern.match(@buffer)
106
+ @buffer = $' if cons and md
107
+ return md
108
+ end
109
+
110
+ # @return true if the Source is exhausted
111
+ def empty?
112
+ @buffer == ""
113
+ end
114
+
115
+ def position
116
+ @orig.index( @buffer )
117
+ end
118
+
119
+ # @return the current line in the source
120
+ def current_line
121
+ lines = @orig.split
122
+ res = lines.grep @buffer[0..30]
123
+ res = res[-1] if res.kind_of? Array
124
+ lines.index( res ) if res
125
+ end
126
+ end
127
+
128
+ # A Source that wraps an IO. See the Source class for method
129
+ # documentation
130
+ class IOSource < Source
131
+ #attr_reader :block_size
132
+
133
+ # block_size has been deprecated
134
+ def initialize(arg, block_size=500, encoding=nil)
135
+ @er_source = @source = arg
136
+ @to_utf = false
137
+
138
+ # Determining the encoding is a deceptively difficult issue to resolve.
139
+ # First, we check the first two bytes for UTF-16. Then we
140
+ # assume that the encoding is at least ASCII enough for the '>', and
141
+ # we read until we get one of those. This gives us the XML declaration,
142
+ # if there is one. If there isn't one, the file MUST be UTF-8, as per
143
+ # the XML spec. If there is one, we can determine the encoding from
144
+ # it.
145
+ @buffer = ""
146
+ str = @source.read( 2 )
147
+ if encoding
148
+ self.encoding = encoding
149
+ elsif 0xfe == str[0] && 0xff == str[1]
150
+ @line_break = "\000>"
151
+ elsif 0xff == str[0] && 0xfe == str[1]
152
+ @line_break = ">\000"
153
+ elsif 0xef == str[0] && 0xbb == str[1]
154
+ str += @source.read(1)
155
+ str = '' if (0xbf == str[2])
156
+ @line_break = ">"
157
+ else
158
+ @line_break = ">"
159
+ end
160
+ super str+@source.readline( @line_break )
161
+ end
162
+
163
+ def scan(pattern, cons=false)
164
+ rv = super
165
+ # You'll notice that this next section is very similar to the same
166
+ # section in match(), but just a liiittle different. This is
167
+ # because it is a touch faster to do it this way with scan()
168
+ # than the way match() does it; enough faster to warrent duplicating
169
+ # some code
170
+ if rv.size == 0
171
+ until @buffer =~ pattern or @source.nil?
172
+ begin
173
+ # READLINE OPT
174
+ #str = @source.read(@block_size)
175
+ str = @source.readline(@line_break)
176
+ str = decode(str) if @to_utf and str
177
+ @buffer << str
178
+ rescue Iconv::IllegalSequence
179
+ raise
180
+ rescue
181
+ @source = nil
182
+ end
183
+ end
184
+ rv = super
185
+ end
186
+ rv.taint
187
+ rv
188
+ end
189
+
190
+ def read
191
+ begin
192
+ str = @source.readline(@line_break)
193
+ str = decode(str) if @to_utf and str
194
+ @buffer << str
195
+ rescue Exception, NameError
196
+ @source = nil
197
+ end
198
+ end
199
+
200
+ def consume( pattern )
201
+ match( pattern, true )
202
+ end
203
+
204
+ def match( pattern, cons=false )
205
+ rv = pattern.match(@buffer)
206
+ @buffer = $' if cons and rv
207
+ while !rv and @source
208
+ begin
209
+ str = @source.readline(@line_break)
210
+ str = decode(str) if @to_utf and str
211
+ @buffer << str
212
+ rv = pattern.match(@buffer)
213
+ @buffer = $' if cons and rv
214
+ rescue
215
+ @source = nil
216
+ end
217
+ end
218
+ rv.taint
219
+ rv
220
+ end
221
+
222
+ def empty?
223
+ super and ( @source.nil? || @source.eof? )
224
+ end
225
+
226
+ def position
227
+ @er_source.stat.pipe? ? 0 : @er_source.pos
228
+ end
229
+
230
+ # @return the current line in the source
231
+ def current_line
232
+ begin
233
+ pos = @er_source.pos # The byte position in the source
234
+ lineno = @er_source.lineno # The XML < position in the source
235
+ @er_source.rewind
236
+ line = 0 # The \r\n position in the source
237
+ begin
238
+ while @er_source.pos < pos
239
+ @er_source.readline
240
+ line += 1
241
+ end
242
+ rescue
243
+ end
244
+ rescue IOError
245
+ pos = -1
246
+ line = -1
247
+ end
248
+ [pos, lineno, line]
249
+ end
250
+ end
251
+ end