rubysl-rexml 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +1 -0
  8. data/lib/rexml/attlistdecl.rb +62 -0
  9. data/lib/rexml/attribute.rb +185 -0
  10. data/lib/rexml/cdata.rb +67 -0
  11. data/lib/rexml/child.rb +96 -0
  12. data/lib/rexml/comment.rb +80 -0
  13. data/lib/rexml/doctype.rb +271 -0
  14. data/lib/rexml/document.rb +230 -0
  15. data/lib/rexml/dtd/attlistdecl.rb +10 -0
  16. data/lib/rexml/dtd/dtd.rb +51 -0
  17. data/lib/rexml/dtd/elementdecl.rb +17 -0
  18. data/lib/rexml/dtd/entitydecl.rb +56 -0
  19. data/lib/rexml/dtd/notationdecl.rb +39 -0
  20. data/lib/rexml/element.rb +1227 -0
  21. data/lib/rexml/encoding.rb +71 -0
  22. data/lib/rexml/encodings/CP-1252.rb +103 -0
  23. data/lib/rexml/encodings/EUC-JP.rb +35 -0
  24. data/lib/rexml/encodings/ICONV.rb +22 -0
  25. data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
  26. data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
  27. data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
  28. data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
  29. data/lib/rexml/encodings/UNILE.rb +34 -0
  30. data/lib/rexml/encodings/US-ASCII.rb +30 -0
  31. data/lib/rexml/encodings/UTF-16.rb +35 -0
  32. data/lib/rexml/encodings/UTF-8.rb +18 -0
  33. data/lib/rexml/entity.rb +166 -0
  34. data/lib/rexml/formatters/default.rb +109 -0
  35. data/lib/rexml/formatters/pretty.rb +138 -0
  36. data/lib/rexml/formatters/transitive.rb +56 -0
  37. data/lib/rexml/functions.rb +382 -0
  38. data/lib/rexml/instruction.rb +70 -0
  39. data/lib/rexml/light/node.rb +196 -0
  40. data/lib/rexml/namespace.rb +47 -0
  41. data/lib/rexml/node.rb +75 -0
  42. data/lib/rexml/output.rb +24 -0
  43. data/lib/rexml/parent.rb +166 -0
  44. data/lib/rexml/parseexception.rb +51 -0
  45. data/lib/rexml/parsers/baseparser.rb +503 -0
  46. data/lib/rexml/parsers/lightparser.rb +60 -0
  47. data/lib/rexml/parsers/pullparser.rb +196 -0
  48. data/lib/rexml/parsers/sax2parser.rb +238 -0
  49. data/lib/rexml/parsers/streamparser.rb +46 -0
  50. data/lib/rexml/parsers/treeparser.rb +97 -0
  51. data/lib/rexml/parsers/ultralightparser.rb +56 -0
  52. data/lib/rexml/parsers/xpathparser.rb +698 -0
  53. data/lib/rexml/quickpath.rb +266 -0
  54. data/lib/rexml/rexml.rb +32 -0
  55. data/lib/rexml/sax2listener.rb +97 -0
  56. data/lib/rexml/source.rb +251 -0
  57. data/lib/rexml/streamlistener.rb +92 -0
  58. data/lib/rexml/syncenumerator.rb +33 -0
  59. data/lib/rexml/text.rb +344 -0
  60. data/lib/rexml/undefinednamespaceexception.rb +8 -0
  61. data/lib/rexml/validation/relaxng.rb +559 -0
  62. data/lib/rexml/validation/validation.rb +155 -0
  63. data/lib/rexml/validation/validationexception.rb +9 -0
  64. data/lib/rexml/xmldecl.rb +119 -0
  65. data/lib/rexml/xmltokens.rb +18 -0
  66. data/lib/rexml/xpath.rb +66 -0
  67. data/lib/rexml/xpath_parser.rb +792 -0
  68. data/lib/rubysl/rexml.rb +1 -0
  69. data/lib/rubysl/rexml/version.rb +5 -0
  70. data/rubysl-rexml.gemspec +23 -0
  71. data/spec/attribute/clone_spec.rb +10 -0
  72. data/spec/attribute/element_spec.rb +22 -0
  73. data/spec/attribute/equal_value_spec.rb +17 -0
  74. data/spec/attribute/hash_spec.rb +12 -0
  75. data/spec/attribute/initialize_spec.rb +28 -0
  76. data/spec/attribute/inspect_spec.rb +19 -0
  77. data/spec/attribute/namespace_spec.rb +23 -0
  78. data/spec/attribute/node_type_spec.rb +9 -0
  79. data/spec/attribute/prefix_spec.rb +17 -0
  80. data/spec/attribute/remove_spec.rb +19 -0
  81. data/spec/attribute/to_s_spec.rb +13 -0
  82. data/spec/attribute/to_string_spec.rb +14 -0
  83. data/spec/attribute/value_spec.rb +14 -0
  84. data/spec/attribute/write_spec.rb +22 -0
  85. data/spec/attribute/xpath_spec.rb +19 -0
  86. data/spec/attributes/add_spec.rb +6 -0
  87. data/spec/attributes/append_spec.rb +6 -0
  88. data/spec/attributes/delete_all_spec.rb +30 -0
  89. data/spec/attributes/delete_spec.rb +26 -0
  90. data/spec/attributes/each_attribute_spec.rb +24 -0
  91. data/spec/attributes/each_spec.rb +24 -0
  92. data/spec/attributes/element_reference_spec.rb +18 -0
  93. data/spec/attributes/element_set_spec.rb +25 -0
  94. data/spec/attributes/get_attribute_ns_spec.rb +13 -0
  95. data/spec/attributes/get_attribute_spec.rb +28 -0
  96. data/spec/attributes/initialize_spec.rb +18 -0
  97. data/spec/attributes/length_spec.rb +6 -0
  98. data/spec/attributes/namespaces_spec.rb +5 -0
  99. data/spec/attributes/prefixes_spec.rb +23 -0
  100. data/spec/attributes/shared/add.rb +17 -0
  101. data/spec/attributes/shared/length.rb +12 -0
  102. data/spec/attributes/size_spec.rb +6 -0
  103. data/spec/attributes/to_a_spec.rb +20 -0
  104. data/spec/cdata/clone_spec.rb +9 -0
  105. data/spec/cdata/initialize_spec.rb +24 -0
  106. data/spec/cdata/shared/to_s.rb +11 -0
  107. data/spec/cdata/to_s_spec.rb +6 -0
  108. data/spec/cdata/value_spec.rb +6 -0
  109. data/spec/document/add_element_spec.rb +30 -0
  110. data/spec/document/add_spec.rb +60 -0
  111. data/spec/document/clone_spec.rb +19 -0
  112. data/spec/document/doctype_spec.rb +14 -0
  113. data/spec/document/encoding_spec.rb +21 -0
  114. data/spec/document/expanded_name_spec.rb +15 -0
  115. data/spec/document/new_spec.rb +37 -0
  116. data/spec/document/node_type_spec.rb +7 -0
  117. data/spec/document/root_spec.rb +11 -0
  118. data/spec/document/stand_alone_spec.rb +18 -0
  119. data/spec/document/version_spec.rb +13 -0
  120. data/spec/document/write_spec.rb +38 -0
  121. data/spec/document/xml_decl_spec.rb +14 -0
  122. data/spec/element/add_attribute_spec.rb +40 -0
  123. data/spec/element/add_attributes_spec.rb +21 -0
  124. data/spec/element/add_element_spec.rb +38 -0
  125. data/spec/element/add_namespace_spec.rb +23 -0
  126. data/spec/element/add_text_spec.rb +23 -0
  127. data/spec/element/attribute_spec.rb +16 -0
  128. data/spec/element/attributes_spec.rb +18 -0
  129. data/spec/element/cdatas_spec.rb +23 -0
  130. data/spec/element/clone_spec.rb +28 -0
  131. data/spec/element/comments_spec.rb +20 -0
  132. data/spec/element/delete_attribute_spec.rb +38 -0
  133. data/spec/element/delete_element_spec.rb +50 -0
  134. data/spec/element/delete_namespace_spec.rb +24 -0
  135. data/spec/element/document_spec.rb +17 -0
  136. data/spec/element/each_element_with_attribute_spec.rb +34 -0
  137. data/spec/element/each_element_with_text_spec.rb +30 -0
  138. data/spec/element/get_text_spec.rb +17 -0
  139. data/spec/element/has_attributes_spec.rb +16 -0
  140. data/spec/element/has_elements_spec.rb +17 -0
  141. data/spec/element/has_text_spec.rb +15 -0
  142. data/spec/element/inspect_spec.rb +26 -0
  143. data/spec/element/instructions_spec.rb +20 -0
  144. data/spec/element/namespace_spec.rb +26 -0
  145. data/spec/element/namespaces_spec.rb +31 -0
  146. data/spec/element/new_spec.rb +34 -0
  147. data/spec/element/next_element_spec.rb +18 -0
  148. data/spec/element/node_type_spec.rb +7 -0
  149. data/spec/element/prefixes_spec.rb +22 -0
  150. data/spec/element/previous_element_spec.rb +19 -0
  151. data/spec/element/raw_spec.rb +23 -0
  152. data/spec/element/root_spec.rb +27 -0
  153. data/spec/element/text_spec.rb +45 -0
  154. data/spec/element/texts_spec.rb +15 -0
  155. data/spec/element/whitespace_spec.rb +22 -0
  156. data/spec/node/each_recursive_spec.rb +20 -0
  157. data/spec/node/find_first_recursive_spec.rb +24 -0
  158. data/spec/node/index_in_parent_spec.rb +14 -0
  159. data/spec/node/next_sibling_node_spec.rb +20 -0
  160. data/spec/node/parent_spec.rb +20 -0
  161. data/spec/node/previous_sibling_node_spec.rb +20 -0
  162. data/spec/shared/each_element.rb +35 -0
  163. data/spec/shared/elements_to_a.rb +35 -0
  164. data/spec/text/append_spec.rb +9 -0
  165. data/spec/text/clone_spec.rb +9 -0
  166. data/spec/text/comparison_spec.rb +24 -0
  167. data/spec/text/empty_spec.rb +11 -0
  168. data/spec/text/indent_text_spec.rb +23 -0
  169. data/spec/text/inspect_spec.rb +7 -0
  170. data/spec/text/new_spec.rb +48 -0
  171. data/spec/text/node_type_spec.rb +7 -0
  172. data/spec/text/normalize_spec.rb +7 -0
  173. data/spec/text/read_with_substitution_spec.rb +12 -0
  174. data/spec/text/to_s_spec.rb +17 -0
  175. data/spec/text/unnormalize_spec.rb +7 -0
  176. data/spec/text/value_spec.rb +36 -0
  177. data/spec/text/wrap_spec.rb +20 -0
  178. data/spec/text/write_with_substitution_spec.rb +32 -0
  179. metadata +385 -0
@@ -0,0 +1,266 @@
1
+ require 'rexml/functions'
2
+ require 'rexml/xmltokens'
3
+
4
+ module REXML
5
+ class QuickPath
6
+ include Functions
7
+ include XMLTokens
8
+
9
+ EMPTY_HASH = {}
10
+
11
+ def QuickPath::first element, path, namespaces=EMPTY_HASH
12
+ match(element, path, namespaces)[0]
13
+ end
14
+
15
+ def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
16
+ path = "*" unless path
17
+ match(element, path, namespaces).each( &block )
18
+ end
19
+
20
+ def QuickPath::match element, path, namespaces=EMPTY_HASH
21
+ raise "nil is not a valid xpath" unless path
22
+ results = nil
23
+ Functions::namespace_context = namespaces
24
+ case path
25
+ when /^\/([^\/]|$)/u
26
+ # match on root
27
+ path = path[1..-1]
28
+ return [element.root.parent] if path == ''
29
+ results = filter([element.root], path)
30
+ when /^[-\w]*::/u
31
+ results = filter([element], path)
32
+ when /^\*/u
33
+ results = filter(element.to_a, path)
34
+ when /^[\[!\w:]/u
35
+ # match on child
36
+ matches = []
37
+ children = element.to_a
38
+ results = filter(children, path)
39
+ else
40
+ results = filter([element], path)
41
+ end
42
+ return results
43
+ end
44
+
45
+ # Given an array of nodes it filters the array based on the path. The
46
+ # result is that when this method returns, the array will contain elements
47
+ # which match the path
48
+ def QuickPath::filter elements, path
49
+ return elements if path.nil? or path == '' or elements.size == 0
50
+ case path
51
+ when /^\/\//u # Descendant
52
+ return axe( elements, "descendant-or-self", $' )
53
+ when /^\/?\b(\w[-\w]*)\b::/u # Axe
54
+ axe_name = $1
55
+ rest = $'
56
+ return axe( elements, $1, $' )
57
+ when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
58
+ rest = $'
59
+ results = []
60
+ elements.each do |element|
61
+ results |= filter( element.to_a, rest )
62
+ end
63
+ return results
64
+ when /^\/?(\w[-\w]*)\(/u # / Function
65
+ return function( elements, $1, $' )
66
+ when Namespace::NAMESPLIT # Element name
67
+ name = $2
68
+ ns = $1
69
+ rest = $'
70
+ elements.delete_if do |element|
71
+ !(element.kind_of? Element and
72
+ (element.expanded_name == name or
73
+ (element.name == name and
74
+ element.namespace == Functions.namespace_context[ns])))
75
+ end
76
+ return filter( elements, rest )
77
+ when /^\/\[/u
78
+ matches = []
79
+ elements.each do |element|
80
+ matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
81
+ end
82
+ return matches
83
+ when /^\[/u # Predicate
84
+ return predicate( elements, path )
85
+ when /^\/?\.\.\./u # Ancestor
86
+ return axe( elements, "ancestor", $' )
87
+ when /^\/?\.\./u # Parent
88
+ return filter( elements.collect{|e|e.parent}, $' )
89
+ when /^\/?\./u # Self
90
+ return filter( elements, $' )
91
+ when /^\*/u # Any
92
+ results = []
93
+ elements.each do |element|
94
+ results |= filter( [element], $' ) if element.kind_of? Element
95
+ #if element.kind_of? Element
96
+ # children = element.to_a
97
+ # children.delete_if { |child| !child.kind_of?(Element) }
98
+ # results |= filter( children, $' )
99
+ #end
100
+ end
101
+ return results
102
+ end
103
+ return []
104
+ end
105
+
106
+ def QuickPath::axe( elements, axe_name, rest )
107
+ matches = []
108
+ matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
109
+ case axe_name
110
+ when /^descendant/u
111
+ elements.each do |element|
112
+ matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
113
+ end
114
+ when /^ancestor/u
115
+ elements.each do |element|
116
+ while element.parent
117
+ matches << element.parent
118
+ element = element.parent
119
+ end
120
+ end
121
+ matches = filter( matches, rest )
122
+ when "self"
123
+ matches = filter( elements, rest )
124
+ when "child"
125
+ elements.each do |element|
126
+ matches |= filter( element.to_a, rest ) if element.kind_of? Element
127
+ end
128
+ when "attribute"
129
+ elements.each do |element|
130
+ matches << element.attributes[ rest ] if element.kind_of? Element
131
+ end
132
+ when "parent"
133
+ matches = filter(elements.collect{|element| element.parent}.uniq, rest)
134
+ when "following-sibling"
135
+ matches = filter(elements.collect{|element| element.next_sibling}.uniq,
136
+ rest)
137
+ when "previous-sibling"
138
+ matches = filter(elements.collect{|element|
139
+ element.previous_sibling}.uniq, rest )
140
+ end
141
+ return matches.uniq
142
+ end
143
+
144
+ # A predicate filters a node-set with respect to an axis to produce a
145
+ # new node-set. For each node in the node-set to be filtered, the
146
+ # PredicateExpr is evaluated with that node as the context node, with
147
+ # the number of nodes in the node-set as the context size, and with the
148
+ # proximity position of the node in the node-set with respect to the
149
+ # axis as the context position; if PredicateExpr evaluates to true for
150
+ # that node, the node is included in the new node-set; otherwise, it is
151
+ # not included.
152
+ #
153
+ # A PredicateExpr is evaluated by evaluating the Expr and converting
154
+ # the result to a boolean. If the result is a number, the result will
155
+ # be converted to true if the number is equal to the context position
156
+ # and will be converted to false otherwise; if the result is not a
157
+ # number, then the result will be converted as if by a call to the
158
+ # boolean function. Thus a location path para[3] is equivalent to
159
+ # para[position()=3].
160
+ def QuickPath::predicate( elements, path )
161
+ ind = 1
162
+ bcount = 1
163
+ while bcount > 0
164
+ bcount += 1 if path[ind] == ?[
165
+ bcount -= 1 if path[ind] == ?]
166
+ ind += 1
167
+ end
168
+ ind -= 1
169
+ predicate = path[1..ind-1]
170
+ rest = path[ind+1..-1]
171
+
172
+ # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
173
+ predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
174
+ "#$1 #$2 #$3 and #$3 #$4 #$5"
175
+ }
176
+ # Let's do some Ruby trickery to avoid some work:
177
+ predicate.gsub!( /&/u, "&&" )
178
+ predicate.gsub!( /=/u, "==" )
179
+ predicate.gsub!( /@(\w[-\w.]*)/u ) {
180
+ "attribute(\"#$1\")"
181
+ }
182
+ predicate.gsub!( /\bmod\b/u, "%" )
183
+ predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
184
+ fname = $1
185
+ fname.gsub( /-/u, "_" )
186
+ }
187
+
188
+ Functions.pair = [ 0, elements.size ]
189
+ results = []
190
+ elements.each do |element|
191
+ Functions.pair[0] += 1
192
+ Functions.node = element
193
+ res = eval( predicate )
194
+ case res
195
+ when true
196
+ results << element
197
+ when Fixnum
198
+ results << element if Functions.pair[0] == res
199
+ when String
200
+ results << element
201
+ end
202
+ end
203
+ return filter( results, rest )
204
+ end
205
+
206
+ def QuickPath::attribute( name )
207
+ return Functions.node.attributes[name] if Functions.node.kind_of? Element
208
+ end
209
+
210
+ def QuickPath::name()
211
+ return Functions.node.name if Functions.node.kind_of? Element
212
+ end
213
+
214
+ def QuickPath::method_missing( id, *args )
215
+ begin
216
+ Functions.send( id.id2name, *args )
217
+ rescue Exception
218
+ raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
219
+ end
220
+ end
221
+
222
+ def QuickPath::function( elements, fname, rest )
223
+ args = parse_args( elements, rest )
224
+ Functions.pair = [0, elements.size]
225
+ results = []
226
+ elements.each do |element|
227
+ Functions.pair[0] += 1
228
+ Functions.node = element
229
+ res = Functions.send( fname, *args )
230
+ case res
231
+ when true
232
+ results << element
233
+ when Fixnum
234
+ results << element if Functions.pair[0] == res
235
+ end
236
+ end
237
+ return results
238
+ end
239
+
240
+ def QuickPath::parse_args( element, string )
241
+ # /.*?(?:\)|,)/
242
+ arguments = []
243
+ buffer = ""
244
+ while string and string != ""
245
+ c = string[0]
246
+ string.sub!(/^./u, "")
247
+ case c
248
+ when ?,
249
+ # if depth = 1, then we start a new argument
250
+ arguments << evaluate( buffer )
251
+ #arguments << evaluate( string[0..count] )
252
+ when ?(
253
+ # start a new method call
254
+ function( element, buffer, string )
255
+ buffer = ""
256
+ when ?)
257
+ # close the method call and return arguments
258
+ return arguments
259
+ else
260
+ buffer << c
261
+ end
262
+ end
263
+ ""
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
3
+ #
4
+ # REXML is a _pure_ Ruby, XML 1.0 conforming,
5
+ # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
6
+ # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
7
+ # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
8
+ # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
9
+ # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
10
+ # Ruby 1.8, REXML is included in the standard Ruby distribution.
11
+ #
12
+ # Main page:: http://www.germane-software.com/software/rexml
13
+ # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
14
+ # Version:: 3.1.7.2
15
+ # Date:: 2007/275
16
+ # Revision:: $Revision$
17
+ #
18
+ # This API documentation can be downloaded from the REXML home page, or can
19
+ # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
+ #
21
+ # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
+ # or can be accessed
23
+ # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
24
+ module REXML
25
+ COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
26
+ VERSION = "3.1.7.3"
27
+ DATE = "2007/275"
28
+ REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
29
+
30
+ Copyright = COPYRIGHT
31
+ Version = VERSION
32
+ end
@@ -0,0 +1,97 @@
1
+ module REXML
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ # === Missing methods from SAX2
7
+ # ignorable_whitespace
8
+ # === Methods extending SAX2
9
+ # +WARNING+
10
+ # These methods are certainly going to change, until DTDs are fully
11
+ # supported. Be aware of this.
12
+ # start_document
13
+ # end_document
14
+ # doctype
15
+ # elementdecl
16
+ # attlistdecl
17
+ # entitydecl
18
+ # notationdecl
19
+ # cdata
20
+ # xmldecl
21
+ # comment
22
+ module SAX2Listener
23
+ def start_document
24
+ end
25
+ def end_document
26
+ end
27
+ def start_prefix_mapping prefix, uri
28
+ end
29
+ def end_prefix_mapping prefix
30
+ end
31
+ def start_element uri, localname, qname, attributes
32
+ end
33
+ def end_element uri, localname, qname
34
+ end
35
+ def characters text
36
+ end
37
+ def processing_instruction target, data
38
+ end
39
+ # Handles a doctype declaration. Any attributes of the doctype which are
40
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
41
+ # @p name the name of the doctype; EG, "me"
42
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
43
+ # @p long_name the supplied long name, or nil. EG, "foo"
44
+ # @p uri the uri of the doctype, or nil. EG, "bar"
45
+ def doctype name, pub_sys, long_name, uri
46
+ end
47
+ # If a doctype includes an ATTLIST declaration, it will cause this
48
+ # method to be called. The content is the declaration itself, unparsed.
49
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
50
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
51
+ # methods.
52
+ def attlistdecl(element, pairs, contents)
53
+ end
54
+ # <!ELEMENT ...>
55
+ def elementdecl content
56
+ end
57
+ # <!ENTITY ...>
58
+ # The argument passed to this method is an array of the entity
59
+ # declaration. It can be in a number of formats, but in general it
60
+ # returns (example, result):
61
+ # <!ENTITY % YN '"Yes"'>
62
+ # ["%", "YN", "'\"Yes\"'", "\""]
63
+ # <!ENTITY % YN 'Yes'>
64
+ # ["%", "YN", "'Yes'", "s"]
65
+ # <!ENTITY WhatHeSaid "He said %YN;">
66
+ # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
67
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
70
+ # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
71
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
72
+ # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
73
+ def entitydecl name, decl
74
+ end
75
+ # <!NOTATION ...>
76
+ def notationdecl content
77
+ end
78
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
79
+ # @p content "..."
80
+ def cdata content
81
+ end
82
+ # Called when an XML PI is encountered in the document.
83
+ # EG: <?xml version="1.0" encoding="utf"?>
84
+ # @p version the version attribute value. EG, "1.0"
85
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
86
+ # @p standalone the standalone attribute value, or nil. EG, nil
87
+ # @p spaced the declaration is followed by a line break
88
+ def xmldecl version, encoding, standalone
89
+ end
90
+ # Called when a comment is encountered.
91
+ # @p comment The content of the comment
92
+ def comment comment
93
+ end
94
+ def progress position
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,251 @@
1
+ require 'rexml/encoding'
2
+
3
+ module REXML
4
+ # Generates Source-s. USE THIS CLASS.
5
+ class SourceFactory
6
+ # Generates a Source object
7
+ # @param arg Either a String, or an IO
8
+ # @return a Source, or nil if a bad argument was given
9
+ def SourceFactory::create_from(arg)
10
+ if arg.kind_of? String
11
+ Source.new(arg)
12
+ elsif arg.respond_to? :read and
13
+ arg.respond_to? :readline and
14
+ arg.respond_to? :nil? and
15
+ arg.respond_to? :eof?
16
+ IOSource.new(arg)
17
+ elsif arg.kind_of? Source
18
+ arg
19
+ else
20
+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
21
+ "like either a String, an IO, or a Source."
22
+ end
23
+ end
24
+ end
25
+
26
+ # A Source can be searched for patterns, and wraps buffers and other
27
+ # objects and provides consumption of text
28
+ class Source
29
+ include Encoding
30
+ # The current buffer (what we're going to read next)
31
+ attr_reader :buffer
32
+ # The line number of the last consumed text
33
+ attr_reader :line
34
+ attr_reader :encoding
35
+
36
+ # Constructor
37
+ # @param arg must be a String, and should be a valid XML document
38
+ # @param encoding if non-null, sets the encoding of the source to this
39
+ # value, overriding all encoding detection
40
+ def initialize(arg, encoding=nil)
41
+ @orig = @buffer = arg
42
+ if encoding
43
+ self.encoding = encoding
44
+ else
45
+ self.encoding = check_encoding( @buffer )
46
+ end
47
+ @line = 0
48
+ end
49
+
50
+
51
+ # Inherited from Encoding
52
+ # Overridden to support optimized en/decoding
53
+ def encoding=(enc)
54
+ return unless super
55
+ @line_break = encode( '>' )
56
+ if enc != UTF_8
57
+ @buffer = decode(@buffer)
58
+ @to_utf = true
59
+ else
60
+ @to_utf = false
61
+ end
62
+ end
63
+
64
+ # Scans the source for a given pattern. Note, that this is not your
65
+ # usual scan() method. For one thing, the pattern argument has some
66
+ # requirements; for another, the source can be consumed. You can easily
67
+ # confuse this method. Originally, the patterns were easier
68
+ # to construct and this method more robust, because this method
69
+ # generated search regexes on the fly; however, this was
70
+ # computationally expensive and slowed down the entire REXML package
71
+ # considerably, since this is by far the most commonly called method.
72
+ # @param pattern must be a Regexp, and must be in the form of
73
+ # /^\s*(#{your pattern, with no groups})(.*)/. The first group
74
+ # will be returned; the second group is used if the consume flag is
75
+ # set.
76
+ # @param consume if true, the pattern returned will be consumed, leaving
77
+ # everything after it in the Source.
78
+ # @return the pattern, if found, or nil if the Source is empty or the
79
+ # pattern is not found.
80
+ def scan(pattern, cons=false)
81
+ return nil if @buffer.nil?
82
+ rv = @buffer.scan(pattern)
83
+ @buffer = $' if cons and rv.size>0
84
+ rv
85
+ end
86
+
87
+ def read
88
+ end
89
+
90
+ def consume( pattern )
91
+ @buffer = $' if pattern.match( @buffer )
92
+ end
93
+
94
+ def match_to( char, pattern )
95
+ return pattern.match(@buffer)
96
+ end
97
+
98
+ def match_to_consume( char, pattern )
99
+ md = pattern.match(@buffer)
100
+ @buffer = $'
101
+ return md
102
+ end
103
+
104
+ def match(pattern, cons=false)
105
+ md = pattern.match(@buffer)
106
+ @buffer = $' if cons and md
107
+ return md
108
+ end
109
+
110
+ # @return true if the Source is exhausted
111
+ def empty?
112
+ @buffer == ""
113
+ end
114
+
115
+ def position
116
+ @orig.index( @buffer )
117
+ end
118
+
119
+ # @return the current line in the source
120
+ def current_line
121
+ lines = @orig.split
122
+ res = lines.grep @buffer[0..30]
123
+ res = res[-1] if res.kind_of? Array
124
+ lines.index( res ) if res
125
+ end
126
+ end
127
+
128
+ # A Source that wraps an IO. See the Source class for method
129
+ # documentation
130
+ class IOSource < Source
131
+ #attr_reader :block_size
132
+
133
+ # block_size has been deprecated
134
+ def initialize(arg, block_size=500, encoding=nil)
135
+ @er_source = @source = arg
136
+ @to_utf = false
137
+
138
+ # Determining the encoding is a deceptively difficult issue to resolve.
139
+ # First, we check the first two bytes for UTF-16. Then we
140
+ # assume that the encoding is at least ASCII enough for the '>', and
141
+ # we read until we get one of those. This gives us the XML declaration,
142
+ # if there is one. If there isn't one, the file MUST be UTF-8, as per
143
+ # the XML spec. If there is one, we can determine the encoding from
144
+ # it.
145
+ @buffer = ""
146
+ str = @source.read( 2 )
147
+ if encoding
148
+ self.encoding = encoding
149
+ elsif 0xfe == str[0] && 0xff == str[1]
150
+ @line_break = "\000>"
151
+ elsif 0xff == str[0] && 0xfe == str[1]
152
+ @line_break = ">\000"
153
+ elsif 0xef == str[0] && 0xbb == str[1]
154
+ str += @source.read(1)
155
+ str = '' if (0xbf == str[2])
156
+ @line_break = ">"
157
+ else
158
+ @line_break = ">"
159
+ end
160
+ super str+@source.readline( @line_break )
161
+ end
162
+
163
+ def scan(pattern, cons=false)
164
+ rv = super
165
+ # You'll notice that this next section is very similar to the same
166
+ # section in match(), but just a liiittle different. This is
167
+ # because it is a touch faster to do it this way with scan()
168
+ # than the way match() does it; enough faster to warrent duplicating
169
+ # some code
170
+ if rv.size == 0
171
+ until @buffer =~ pattern or @source.nil?
172
+ begin
173
+ # READLINE OPT
174
+ #str = @source.read(@block_size)
175
+ str = @source.readline(@line_break)
176
+ str = decode(str) if @to_utf and str
177
+ @buffer << str
178
+ rescue Iconv::IllegalSequence
179
+ raise
180
+ rescue
181
+ @source = nil
182
+ end
183
+ end
184
+ rv = super
185
+ end
186
+ rv.taint
187
+ rv
188
+ end
189
+
190
+ def read
191
+ begin
192
+ str = @source.readline(@line_break)
193
+ str = decode(str) if @to_utf and str
194
+ @buffer << str
195
+ rescue Exception, NameError
196
+ @source = nil
197
+ end
198
+ end
199
+
200
+ def consume( pattern )
201
+ match( pattern, true )
202
+ end
203
+
204
+ def match( pattern, cons=false )
205
+ rv = pattern.match(@buffer)
206
+ @buffer = $' if cons and rv
207
+ while !rv and @source
208
+ begin
209
+ str = @source.readline(@line_break)
210
+ str = decode(str) if @to_utf and str
211
+ @buffer << str
212
+ rv = pattern.match(@buffer)
213
+ @buffer = $' if cons and rv
214
+ rescue
215
+ @source = nil
216
+ end
217
+ end
218
+ rv.taint
219
+ rv
220
+ end
221
+
222
+ def empty?
223
+ super and ( @source.nil? || @source.eof? )
224
+ end
225
+
226
+ def position
227
+ @er_source.stat.pipe? ? 0 : @er_source.pos
228
+ end
229
+
230
+ # @return the current line in the source
231
+ def current_line
232
+ begin
233
+ pos = @er_source.pos # The byte position in the source
234
+ lineno = @er_source.lineno # The XML < position in the source
235
+ @er_source.rewind
236
+ line = 0 # The \r\n position in the source
237
+ begin
238
+ while @er_source.pos < pos
239
+ @er_source.readline
240
+ line += 1
241
+ end
242
+ rescue
243
+ end
244
+ rescue IOError
245
+ pos = -1
246
+ line = -1
247
+ end
248
+ [pos, lineno, line]
249
+ end
250
+ end
251
+ end