rubysl-rexml 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,266 @@
|
|
1
|
+
require 'rexml/functions'
|
2
|
+
require 'rexml/xmltokens'
|
3
|
+
|
4
|
+
module REXML
|
5
|
+
class QuickPath
|
6
|
+
include Functions
|
7
|
+
include XMLTokens
|
8
|
+
|
9
|
+
EMPTY_HASH = {}
|
10
|
+
|
11
|
+
def QuickPath::first element, path, namespaces=EMPTY_HASH
|
12
|
+
match(element, path, namespaces)[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
|
16
|
+
path = "*" unless path
|
17
|
+
match(element, path, namespaces).each( &block )
|
18
|
+
end
|
19
|
+
|
20
|
+
def QuickPath::match element, path, namespaces=EMPTY_HASH
|
21
|
+
raise "nil is not a valid xpath" unless path
|
22
|
+
results = nil
|
23
|
+
Functions::namespace_context = namespaces
|
24
|
+
case path
|
25
|
+
when /^\/([^\/]|$)/u
|
26
|
+
# match on root
|
27
|
+
path = path[1..-1]
|
28
|
+
return [element.root.parent] if path == ''
|
29
|
+
results = filter([element.root], path)
|
30
|
+
when /^[-\w]*::/u
|
31
|
+
results = filter([element], path)
|
32
|
+
when /^\*/u
|
33
|
+
results = filter(element.to_a, path)
|
34
|
+
when /^[\[!\w:]/u
|
35
|
+
# match on child
|
36
|
+
matches = []
|
37
|
+
children = element.to_a
|
38
|
+
results = filter(children, path)
|
39
|
+
else
|
40
|
+
results = filter([element], path)
|
41
|
+
end
|
42
|
+
return results
|
43
|
+
end
|
44
|
+
|
45
|
+
# Given an array of nodes it filters the array based on the path. The
|
46
|
+
# result is that when this method returns, the array will contain elements
|
47
|
+
# which match the path
|
48
|
+
def QuickPath::filter elements, path
|
49
|
+
return elements if path.nil? or path == '' or elements.size == 0
|
50
|
+
case path
|
51
|
+
when /^\/\//u # Descendant
|
52
|
+
return axe( elements, "descendant-or-self", $' )
|
53
|
+
when /^\/?\b(\w[-\w]*)\b::/u # Axe
|
54
|
+
axe_name = $1
|
55
|
+
rest = $'
|
56
|
+
return axe( elements, $1, $' )
|
57
|
+
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
|
58
|
+
rest = $'
|
59
|
+
results = []
|
60
|
+
elements.each do |element|
|
61
|
+
results |= filter( element.to_a, rest )
|
62
|
+
end
|
63
|
+
return results
|
64
|
+
when /^\/?(\w[-\w]*)\(/u # / Function
|
65
|
+
return function( elements, $1, $' )
|
66
|
+
when Namespace::NAMESPLIT # Element name
|
67
|
+
name = $2
|
68
|
+
ns = $1
|
69
|
+
rest = $'
|
70
|
+
elements.delete_if do |element|
|
71
|
+
!(element.kind_of? Element and
|
72
|
+
(element.expanded_name == name or
|
73
|
+
(element.name == name and
|
74
|
+
element.namespace == Functions.namespace_context[ns])))
|
75
|
+
end
|
76
|
+
return filter( elements, rest )
|
77
|
+
when /^\/\[/u
|
78
|
+
matches = []
|
79
|
+
elements.each do |element|
|
80
|
+
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
|
81
|
+
end
|
82
|
+
return matches
|
83
|
+
when /^\[/u # Predicate
|
84
|
+
return predicate( elements, path )
|
85
|
+
when /^\/?\.\.\./u # Ancestor
|
86
|
+
return axe( elements, "ancestor", $' )
|
87
|
+
when /^\/?\.\./u # Parent
|
88
|
+
return filter( elements.collect{|e|e.parent}, $' )
|
89
|
+
when /^\/?\./u # Self
|
90
|
+
return filter( elements, $' )
|
91
|
+
when /^\*/u # Any
|
92
|
+
results = []
|
93
|
+
elements.each do |element|
|
94
|
+
results |= filter( [element], $' ) if element.kind_of? Element
|
95
|
+
#if element.kind_of? Element
|
96
|
+
# children = element.to_a
|
97
|
+
# children.delete_if { |child| !child.kind_of?(Element) }
|
98
|
+
# results |= filter( children, $' )
|
99
|
+
#end
|
100
|
+
end
|
101
|
+
return results
|
102
|
+
end
|
103
|
+
return []
|
104
|
+
end
|
105
|
+
|
106
|
+
def QuickPath::axe( elements, axe_name, rest )
|
107
|
+
matches = []
|
108
|
+
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
|
109
|
+
case axe_name
|
110
|
+
when /^descendant/u
|
111
|
+
elements.each do |element|
|
112
|
+
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
|
113
|
+
end
|
114
|
+
when /^ancestor/u
|
115
|
+
elements.each do |element|
|
116
|
+
while element.parent
|
117
|
+
matches << element.parent
|
118
|
+
element = element.parent
|
119
|
+
end
|
120
|
+
end
|
121
|
+
matches = filter( matches, rest )
|
122
|
+
when "self"
|
123
|
+
matches = filter( elements, rest )
|
124
|
+
when "child"
|
125
|
+
elements.each do |element|
|
126
|
+
matches |= filter( element.to_a, rest ) if element.kind_of? Element
|
127
|
+
end
|
128
|
+
when "attribute"
|
129
|
+
elements.each do |element|
|
130
|
+
matches << element.attributes[ rest ] if element.kind_of? Element
|
131
|
+
end
|
132
|
+
when "parent"
|
133
|
+
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
|
134
|
+
when "following-sibling"
|
135
|
+
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
|
136
|
+
rest)
|
137
|
+
when "previous-sibling"
|
138
|
+
matches = filter(elements.collect{|element|
|
139
|
+
element.previous_sibling}.uniq, rest )
|
140
|
+
end
|
141
|
+
return matches.uniq
|
142
|
+
end
|
143
|
+
|
144
|
+
# A predicate filters a node-set with respect to an axis to produce a
|
145
|
+
# new node-set. For each node in the node-set to be filtered, the
|
146
|
+
# PredicateExpr is evaluated with that node as the context node, with
|
147
|
+
# the number of nodes in the node-set as the context size, and with the
|
148
|
+
# proximity position of the node in the node-set with respect to the
|
149
|
+
# axis as the context position; if PredicateExpr evaluates to true for
|
150
|
+
# that node, the node is included in the new node-set; otherwise, it is
|
151
|
+
# not included.
|
152
|
+
#
|
153
|
+
# A PredicateExpr is evaluated by evaluating the Expr and converting
|
154
|
+
# the result to a boolean. If the result is a number, the result will
|
155
|
+
# be converted to true if the number is equal to the context position
|
156
|
+
# and will be converted to false otherwise; if the result is not a
|
157
|
+
# number, then the result will be converted as if by a call to the
|
158
|
+
# boolean function. Thus a location path para[3] is equivalent to
|
159
|
+
# para[position()=3].
|
160
|
+
def QuickPath::predicate( elements, path )
|
161
|
+
ind = 1
|
162
|
+
bcount = 1
|
163
|
+
while bcount > 0
|
164
|
+
bcount += 1 if path[ind] == ?[
|
165
|
+
bcount -= 1 if path[ind] == ?]
|
166
|
+
ind += 1
|
167
|
+
end
|
168
|
+
ind -= 1
|
169
|
+
predicate = path[1..ind-1]
|
170
|
+
rest = path[ind+1..-1]
|
171
|
+
|
172
|
+
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
|
173
|
+
predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
|
174
|
+
"#$1 #$2 #$3 and #$3 #$4 #$5"
|
175
|
+
}
|
176
|
+
# Let's do some Ruby trickery to avoid some work:
|
177
|
+
predicate.gsub!( /&/u, "&&" )
|
178
|
+
predicate.gsub!( /=/u, "==" )
|
179
|
+
predicate.gsub!( /@(\w[-\w.]*)/u ) {
|
180
|
+
"attribute(\"#$1\")"
|
181
|
+
}
|
182
|
+
predicate.gsub!( /\bmod\b/u, "%" )
|
183
|
+
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
|
184
|
+
fname = $1
|
185
|
+
fname.gsub( /-/u, "_" )
|
186
|
+
}
|
187
|
+
|
188
|
+
Functions.pair = [ 0, elements.size ]
|
189
|
+
results = []
|
190
|
+
elements.each do |element|
|
191
|
+
Functions.pair[0] += 1
|
192
|
+
Functions.node = element
|
193
|
+
res = eval( predicate )
|
194
|
+
case res
|
195
|
+
when true
|
196
|
+
results << element
|
197
|
+
when Fixnum
|
198
|
+
results << element if Functions.pair[0] == res
|
199
|
+
when String
|
200
|
+
results << element
|
201
|
+
end
|
202
|
+
end
|
203
|
+
return filter( results, rest )
|
204
|
+
end
|
205
|
+
|
206
|
+
def QuickPath::attribute( name )
|
207
|
+
return Functions.node.attributes[name] if Functions.node.kind_of? Element
|
208
|
+
end
|
209
|
+
|
210
|
+
def QuickPath::name()
|
211
|
+
return Functions.node.name if Functions.node.kind_of? Element
|
212
|
+
end
|
213
|
+
|
214
|
+
def QuickPath::method_missing( id, *args )
|
215
|
+
begin
|
216
|
+
Functions.send( id.id2name, *args )
|
217
|
+
rescue Exception
|
218
|
+
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def QuickPath::function( elements, fname, rest )
|
223
|
+
args = parse_args( elements, rest )
|
224
|
+
Functions.pair = [0, elements.size]
|
225
|
+
results = []
|
226
|
+
elements.each do |element|
|
227
|
+
Functions.pair[0] += 1
|
228
|
+
Functions.node = element
|
229
|
+
res = Functions.send( fname, *args )
|
230
|
+
case res
|
231
|
+
when true
|
232
|
+
results << element
|
233
|
+
when Fixnum
|
234
|
+
results << element if Functions.pair[0] == res
|
235
|
+
end
|
236
|
+
end
|
237
|
+
return results
|
238
|
+
end
|
239
|
+
|
240
|
+
def QuickPath::parse_args( element, string )
|
241
|
+
# /.*?(?:\)|,)/
|
242
|
+
arguments = []
|
243
|
+
buffer = ""
|
244
|
+
while string and string != ""
|
245
|
+
c = string[0]
|
246
|
+
string.sub!(/^./u, "")
|
247
|
+
case c
|
248
|
+
when ?,
|
249
|
+
# if depth = 1, then we start a new argument
|
250
|
+
arguments << evaluate( buffer )
|
251
|
+
#arguments << evaluate( string[0..count] )
|
252
|
+
when ?(
|
253
|
+
# start a new method call
|
254
|
+
function( element, buffer, string )
|
255
|
+
buffer = ""
|
256
|
+
when ?)
|
257
|
+
# close the method call and return arguments
|
258
|
+
return arguments
|
259
|
+
else
|
260
|
+
buffer << c
|
261
|
+
end
|
262
|
+
end
|
263
|
+
""
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
data/lib/rexml/rexml.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
|
3
|
+
#
|
4
|
+
# REXML is a _pure_ Ruby, XML 1.0 conforming,
|
5
|
+
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
|
6
|
+
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
|
7
|
+
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
|
8
|
+
# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
|
9
|
+
# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
|
10
|
+
# Ruby 1.8, REXML is included in the standard Ruby distribution.
|
11
|
+
#
|
12
|
+
# Main page:: http://www.germane-software.com/software/rexml
|
13
|
+
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
14
|
+
# Version:: 3.1.7.2
|
15
|
+
# Date:: 2007/275
|
16
|
+
# Revision:: $Revision$
|
17
|
+
#
|
18
|
+
# This API documentation can be downloaded from the REXML home page, or can
|
19
|
+
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
20
|
+
#
|
21
|
+
# A tutorial is available in the REXML distribution in docs/tutorial.html,
|
22
|
+
# or can be accessed
|
23
|
+
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
24
|
+
module REXML
|
25
|
+
COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
|
26
|
+
VERSION = "3.1.7.3"
|
27
|
+
DATE = "2007/275"
|
28
|
+
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
|
29
|
+
|
30
|
+
Copyright = COPYRIGHT
|
31
|
+
Version = VERSION
|
32
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module REXML
|
2
|
+
# A template for stream parser listeners.
|
3
|
+
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
4
|
+
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
5
|
+
# have to parse them out yourself.
|
6
|
+
# === Missing methods from SAX2
|
7
|
+
# ignorable_whitespace
|
8
|
+
# === Methods extending SAX2
|
9
|
+
# +WARNING+
|
10
|
+
# These methods are certainly going to change, until DTDs are fully
|
11
|
+
# supported. Be aware of this.
|
12
|
+
# start_document
|
13
|
+
# end_document
|
14
|
+
# doctype
|
15
|
+
# elementdecl
|
16
|
+
# attlistdecl
|
17
|
+
# entitydecl
|
18
|
+
# notationdecl
|
19
|
+
# cdata
|
20
|
+
# xmldecl
|
21
|
+
# comment
|
22
|
+
module SAX2Listener
|
23
|
+
def start_document
|
24
|
+
end
|
25
|
+
def end_document
|
26
|
+
end
|
27
|
+
def start_prefix_mapping prefix, uri
|
28
|
+
end
|
29
|
+
def end_prefix_mapping prefix
|
30
|
+
end
|
31
|
+
def start_element uri, localname, qname, attributes
|
32
|
+
end
|
33
|
+
def end_element uri, localname, qname
|
34
|
+
end
|
35
|
+
def characters text
|
36
|
+
end
|
37
|
+
def processing_instruction target, data
|
38
|
+
end
|
39
|
+
# Handles a doctype declaration. Any attributes of the doctype which are
|
40
|
+
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
41
|
+
# @p name the name of the doctype; EG, "me"
|
42
|
+
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
43
|
+
# @p long_name the supplied long name, or nil. EG, "foo"
|
44
|
+
# @p uri the uri of the doctype, or nil. EG, "bar"
|
45
|
+
def doctype name, pub_sys, long_name, uri
|
46
|
+
end
|
47
|
+
# If a doctype includes an ATTLIST declaration, it will cause this
|
48
|
+
# method to be called. The content is the declaration itself, unparsed.
|
49
|
+
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
50
|
+
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
51
|
+
# methods.
|
52
|
+
def attlistdecl(element, pairs, contents)
|
53
|
+
end
|
54
|
+
# <!ELEMENT ...>
|
55
|
+
def elementdecl content
|
56
|
+
end
|
57
|
+
# <!ENTITY ...>
|
58
|
+
# The argument passed to this method is an array of the entity
|
59
|
+
# declaration. It can be in a number of formats, but in general it
|
60
|
+
# returns (example, result):
|
61
|
+
# <!ENTITY % YN '"Yes"'>
|
62
|
+
# ["%", "YN", "'\"Yes\"'", "\""]
|
63
|
+
# <!ENTITY % YN 'Yes'>
|
64
|
+
# ["%", "YN", "'Yes'", "s"]
|
65
|
+
# <!ENTITY WhatHeSaid "He said %YN;">
|
66
|
+
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
|
67
|
+
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
68
|
+
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
69
|
+
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
70
|
+
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
71
|
+
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
72
|
+
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
|
73
|
+
def entitydecl name, decl
|
74
|
+
end
|
75
|
+
# <!NOTATION ...>
|
76
|
+
def notationdecl content
|
77
|
+
end
|
78
|
+
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
79
|
+
# @p content "..."
|
80
|
+
def cdata content
|
81
|
+
end
|
82
|
+
# Called when an XML PI is encountered in the document.
|
83
|
+
# EG: <?xml version="1.0" encoding="utf"?>
|
84
|
+
# @p version the version attribute value. EG, "1.0"
|
85
|
+
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
86
|
+
# @p standalone the standalone attribute value, or nil. EG, nil
|
87
|
+
# @p spaced the declaration is followed by a line break
|
88
|
+
def xmldecl version, encoding, standalone
|
89
|
+
end
|
90
|
+
# Called when a comment is encountered.
|
91
|
+
# @p comment The content of the comment
|
92
|
+
def comment comment
|
93
|
+
end
|
94
|
+
def progress position
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/rexml/source.rb
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'rexml/encoding'
|
2
|
+
|
3
|
+
module REXML
|
4
|
+
# Generates Source-s. USE THIS CLASS.
|
5
|
+
class SourceFactory
|
6
|
+
# Generates a Source object
|
7
|
+
# @param arg Either a String, or an IO
|
8
|
+
# @return a Source, or nil if a bad argument was given
|
9
|
+
def SourceFactory::create_from(arg)
|
10
|
+
if arg.kind_of? String
|
11
|
+
Source.new(arg)
|
12
|
+
elsif arg.respond_to? :read and
|
13
|
+
arg.respond_to? :readline and
|
14
|
+
arg.respond_to? :nil? and
|
15
|
+
arg.respond_to? :eof?
|
16
|
+
IOSource.new(arg)
|
17
|
+
elsif arg.kind_of? Source
|
18
|
+
arg
|
19
|
+
else
|
20
|
+
raise "#{arg.class} is not a valid input stream. It must walk \n"+
|
21
|
+
"like either a String, an IO, or a Source."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# A Source can be searched for patterns, and wraps buffers and other
|
27
|
+
# objects and provides consumption of text
|
28
|
+
class Source
|
29
|
+
include Encoding
|
30
|
+
# The current buffer (what we're going to read next)
|
31
|
+
attr_reader :buffer
|
32
|
+
# The line number of the last consumed text
|
33
|
+
attr_reader :line
|
34
|
+
attr_reader :encoding
|
35
|
+
|
36
|
+
# Constructor
|
37
|
+
# @param arg must be a String, and should be a valid XML document
|
38
|
+
# @param encoding if non-null, sets the encoding of the source to this
|
39
|
+
# value, overriding all encoding detection
|
40
|
+
def initialize(arg, encoding=nil)
|
41
|
+
@orig = @buffer = arg
|
42
|
+
if encoding
|
43
|
+
self.encoding = encoding
|
44
|
+
else
|
45
|
+
self.encoding = check_encoding( @buffer )
|
46
|
+
end
|
47
|
+
@line = 0
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# Inherited from Encoding
|
52
|
+
# Overridden to support optimized en/decoding
|
53
|
+
def encoding=(enc)
|
54
|
+
return unless super
|
55
|
+
@line_break = encode( '>' )
|
56
|
+
if enc != UTF_8
|
57
|
+
@buffer = decode(@buffer)
|
58
|
+
@to_utf = true
|
59
|
+
else
|
60
|
+
@to_utf = false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Scans the source for a given pattern. Note, that this is not your
|
65
|
+
# usual scan() method. For one thing, the pattern argument has some
|
66
|
+
# requirements; for another, the source can be consumed. You can easily
|
67
|
+
# confuse this method. Originally, the patterns were easier
|
68
|
+
# to construct and this method more robust, because this method
|
69
|
+
# generated search regexes on the fly; however, this was
|
70
|
+
# computationally expensive and slowed down the entire REXML package
|
71
|
+
# considerably, since this is by far the most commonly called method.
|
72
|
+
# @param pattern must be a Regexp, and must be in the form of
|
73
|
+
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
74
|
+
# will be returned; the second group is used if the consume flag is
|
75
|
+
# set.
|
76
|
+
# @param consume if true, the pattern returned will be consumed, leaving
|
77
|
+
# everything after it in the Source.
|
78
|
+
# @return the pattern, if found, or nil if the Source is empty or the
|
79
|
+
# pattern is not found.
|
80
|
+
def scan(pattern, cons=false)
|
81
|
+
return nil if @buffer.nil?
|
82
|
+
rv = @buffer.scan(pattern)
|
83
|
+
@buffer = $' if cons and rv.size>0
|
84
|
+
rv
|
85
|
+
end
|
86
|
+
|
87
|
+
def read
|
88
|
+
end
|
89
|
+
|
90
|
+
def consume( pattern )
|
91
|
+
@buffer = $' if pattern.match( @buffer )
|
92
|
+
end
|
93
|
+
|
94
|
+
def match_to( char, pattern )
|
95
|
+
return pattern.match(@buffer)
|
96
|
+
end
|
97
|
+
|
98
|
+
def match_to_consume( char, pattern )
|
99
|
+
md = pattern.match(@buffer)
|
100
|
+
@buffer = $'
|
101
|
+
return md
|
102
|
+
end
|
103
|
+
|
104
|
+
def match(pattern, cons=false)
|
105
|
+
md = pattern.match(@buffer)
|
106
|
+
@buffer = $' if cons and md
|
107
|
+
return md
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return true if the Source is exhausted
|
111
|
+
def empty?
|
112
|
+
@buffer == ""
|
113
|
+
end
|
114
|
+
|
115
|
+
def position
|
116
|
+
@orig.index( @buffer )
|
117
|
+
end
|
118
|
+
|
119
|
+
# @return the current line in the source
|
120
|
+
def current_line
|
121
|
+
lines = @orig.split
|
122
|
+
res = lines.grep @buffer[0..30]
|
123
|
+
res = res[-1] if res.kind_of? Array
|
124
|
+
lines.index( res ) if res
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# A Source that wraps an IO. See the Source class for method
|
129
|
+
# documentation
|
130
|
+
class IOSource < Source
|
131
|
+
#attr_reader :block_size
|
132
|
+
|
133
|
+
# block_size has been deprecated
|
134
|
+
def initialize(arg, block_size=500, encoding=nil)
|
135
|
+
@er_source = @source = arg
|
136
|
+
@to_utf = false
|
137
|
+
|
138
|
+
# Determining the encoding is a deceptively difficult issue to resolve.
|
139
|
+
# First, we check the first two bytes for UTF-16. Then we
|
140
|
+
# assume that the encoding is at least ASCII enough for the '>', and
|
141
|
+
# we read until we get one of those. This gives us the XML declaration,
|
142
|
+
# if there is one. If there isn't one, the file MUST be UTF-8, as per
|
143
|
+
# the XML spec. If there is one, we can determine the encoding from
|
144
|
+
# it.
|
145
|
+
@buffer = ""
|
146
|
+
str = @source.read( 2 )
|
147
|
+
if encoding
|
148
|
+
self.encoding = encoding
|
149
|
+
elsif 0xfe == str[0] && 0xff == str[1]
|
150
|
+
@line_break = "\000>"
|
151
|
+
elsif 0xff == str[0] && 0xfe == str[1]
|
152
|
+
@line_break = ">\000"
|
153
|
+
elsif 0xef == str[0] && 0xbb == str[1]
|
154
|
+
str += @source.read(1)
|
155
|
+
str = '' if (0xbf == str[2])
|
156
|
+
@line_break = ">"
|
157
|
+
else
|
158
|
+
@line_break = ">"
|
159
|
+
end
|
160
|
+
super str+@source.readline( @line_break )
|
161
|
+
end
|
162
|
+
|
163
|
+
def scan(pattern, cons=false)
|
164
|
+
rv = super
|
165
|
+
# You'll notice that this next section is very similar to the same
|
166
|
+
# section in match(), but just a liiittle different. This is
|
167
|
+
# because it is a touch faster to do it this way with scan()
|
168
|
+
# than the way match() does it; enough faster to warrent duplicating
|
169
|
+
# some code
|
170
|
+
if rv.size == 0
|
171
|
+
until @buffer =~ pattern or @source.nil?
|
172
|
+
begin
|
173
|
+
# READLINE OPT
|
174
|
+
#str = @source.read(@block_size)
|
175
|
+
str = @source.readline(@line_break)
|
176
|
+
str = decode(str) if @to_utf and str
|
177
|
+
@buffer << str
|
178
|
+
rescue Iconv::IllegalSequence
|
179
|
+
raise
|
180
|
+
rescue
|
181
|
+
@source = nil
|
182
|
+
end
|
183
|
+
end
|
184
|
+
rv = super
|
185
|
+
end
|
186
|
+
rv.taint
|
187
|
+
rv
|
188
|
+
end
|
189
|
+
|
190
|
+
def read
|
191
|
+
begin
|
192
|
+
str = @source.readline(@line_break)
|
193
|
+
str = decode(str) if @to_utf and str
|
194
|
+
@buffer << str
|
195
|
+
rescue Exception, NameError
|
196
|
+
@source = nil
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def consume( pattern )
|
201
|
+
match( pattern, true )
|
202
|
+
end
|
203
|
+
|
204
|
+
def match( pattern, cons=false )
|
205
|
+
rv = pattern.match(@buffer)
|
206
|
+
@buffer = $' if cons and rv
|
207
|
+
while !rv and @source
|
208
|
+
begin
|
209
|
+
str = @source.readline(@line_break)
|
210
|
+
str = decode(str) if @to_utf and str
|
211
|
+
@buffer << str
|
212
|
+
rv = pattern.match(@buffer)
|
213
|
+
@buffer = $' if cons and rv
|
214
|
+
rescue
|
215
|
+
@source = nil
|
216
|
+
end
|
217
|
+
end
|
218
|
+
rv.taint
|
219
|
+
rv
|
220
|
+
end
|
221
|
+
|
222
|
+
def empty?
|
223
|
+
super and ( @source.nil? || @source.eof? )
|
224
|
+
end
|
225
|
+
|
226
|
+
def position
|
227
|
+
@er_source.stat.pipe? ? 0 : @er_source.pos
|
228
|
+
end
|
229
|
+
|
230
|
+
# @return the current line in the source
|
231
|
+
def current_line
|
232
|
+
begin
|
233
|
+
pos = @er_source.pos # The byte position in the source
|
234
|
+
lineno = @er_source.lineno # The XML < position in the source
|
235
|
+
@er_source.rewind
|
236
|
+
line = 0 # The \r\n position in the source
|
237
|
+
begin
|
238
|
+
while @er_source.pos < pos
|
239
|
+
@er_source.readline
|
240
|
+
line += 1
|
241
|
+
end
|
242
|
+
rescue
|
243
|
+
end
|
244
|
+
rescue IOError
|
245
|
+
pos = -1
|
246
|
+
line = -1
|
247
|
+
end
|
248
|
+
[pos, lineno, line]
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|