rubysl-rexml 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,266 @@
|
|
1
|
+
require 'rexml/functions'
|
2
|
+
require 'rexml/xmltokens'
|
3
|
+
|
4
|
+
module REXML
|
5
|
+
class QuickPath
|
6
|
+
include Functions
|
7
|
+
include XMLTokens
|
8
|
+
|
9
|
+
EMPTY_HASH = {}
|
10
|
+
|
11
|
+
def QuickPath::first element, path, namespaces=EMPTY_HASH
|
12
|
+
match(element, path, namespaces)[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
|
16
|
+
path = "*" unless path
|
17
|
+
match(element, path, namespaces).each( &block )
|
18
|
+
end
|
19
|
+
|
20
|
+
def QuickPath::match element, path, namespaces=EMPTY_HASH
|
21
|
+
raise "nil is not a valid xpath" unless path
|
22
|
+
results = nil
|
23
|
+
Functions::namespace_context = namespaces
|
24
|
+
case path
|
25
|
+
when /^\/([^\/]|$)/u
|
26
|
+
# match on root
|
27
|
+
path = path[1..-1]
|
28
|
+
return [element.root.parent] if path == ''
|
29
|
+
results = filter([element.root], path)
|
30
|
+
when /^[-\w]*::/u
|
31
|
+
results = filter([element], path)
|
32
|
+
when /^\*/u
|
33
|
+
results = filter(element.to_a, path)
|
34
|
+
when /^[\[!\w:]/u
|
35
|
+
# match on child
|
36
|
+
matches = []
|
37
|
+
children = element.to_a
|
38
|
+
results = filter(children, path)
|
39
|
+
else
|
40
|
+
results = filter([element], path)
|
41
|
+
end
|
42
|
+
return results
|
43
|
+
end
|
44
|
+
|
45
|
+
# Given an array of nodes it filters the array based on the path. The
|
46
|
+
# result is that when this method returns, the array will contain elements
|
47
|
+
# which match the path
|
48
|
+
def QuickPath::filter elements, path
|
49
|
+
return elements if path.nil? or path == '' or elements.size == 0
|
50
|
+
case path
|
51
|
+
when /^\/\//u # Descendant
|
52
|
+
return axe( elements, "descendant-or-self", $' )
|
53
|
+
when /^\/?\b(\w[-\w]*)\b::/u # Axe
|
54
|
+
axe_name = $1
|
55
|
+
rest = $'
|
56
|
+
return axe( elements, $1, $' )
|
57
|
+
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
|
58
|
+
rest = $'
|
59
|
+
results = []
|
60
|
+
elements.each do |element|
|
61
|
+
results |= filter( element.to_a, rest )
|
62
|
+
end
|
63
|
+
return results
|
64
|
+
when /^\/?(\w[-\w]*)\(/u # / Function
|
65
|
+
return function( elements, $1, $' )
|
66
|
+
when Namespace::NAMESPLIT # Element name
|
67
|
+
name = $2
|
68
|
+
ns = $1
|
69
|
+
rest = $'
|
70
|
+
elements.delete_if do |element|
|
71
|
+
!(element.kind_of? Element and
|
72
|
+
(element.expanded_name == name or
|
73
|
+
(element.name == name and
|
74
|
+
element.namespace == Functions.namespace_context[ns])))
|
75
|
+
end
|
76
|
+
return filter( elements, rest )
|
77
|
+
when /^\/\[/u
|
78
|
+
matches = []
|
79
|
+
elements.each do |element|
|
80
|
+
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
|
81
|
+
end
|
82
|
+
return matches
|
83
|
+
when /^\[/u # Predicate
|
84
|
+
return predicate( elements, path )
|
85
|
+
when /^\/?\.\.\./u # Ancestor
|
86
|
+
return axe( elements, "ancestor", $' )
|
87
|
+
when /^\/?\.\./u # Parent
|
88
|
+
return filter( elements.collect{|e|e.parent}, $' )
|
89
|
+
when /^\/?\./u # Self
|
90
|
+
return filter( elements, $' )
|
91
|
+
when /^\*/u # Any
|
92
|
+
results = []
|
93
|
+
elements.each do |element|
|
94
|
+
results |= filter( [element], $' ) if element.kind_of? Element
|
95
|
+
#if element.kind_of? Element
|
96
|
+
# children = element.to_a
|
97
|
+
# children.delete_if { |child| !child.kind_of?(Element) }
|
98
|
+
# results |= filter( children, $' )
|
99
|
+
#end
|
100
|
+
end
|
101
|
+
return results
|
102
|
+
end
|
103
|
+
return []
|
104
|
+
end
|
105
|
+
|
106
|
+
def QuickPath::axe( elements, axe_name, rest )
|
107
|
+
matches = []
|
108
|
+
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
|
109
|
+
case axe_name
|
110
|
+
when /^descendant/u
|
111
|
+
elements.each do |element|
|
112
|
+
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
|
113
|
+
end
|
114
|
+
when /^ancestor/u
|
115
|
+
elements.each do |element|
|
116
|
+
while element.parent
|
117
|
+
matches << element.parent
|
118
|
+
element = element.parent
|
119
|
+
end
|
120
|
+
end
|
121
|
+
matches = filter( matches, rest )
|
122
|
+
when "self"
|
123
|
+
matches = filter( elements, rest )
|
124
|
+
when "child"
|
125
|
+
elements.each do |element|
|
126
|
+
matches |= filter( element.to_a, rest ) if element.kind_of? Element
|
127
|
+
end
|
128
|
+
when "attribute"
|
129
|
+
elements.each do |element|
|
130
|
+
matches << element.attributes[ rest ] if element.kind_of? Element
|
131
|
+
end
|
132
|
+
when "parent"
|
133
|
+
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
|
134
|
+
when "following-sibling"
|
135
|
+
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
|
136
|
+
rest)
|
137
|
+
when "previous-sibling"
|
138
|
+
matches = filter(elements.collect{|element|
|
139
|
+
element.previous_sibling}.uniq, rest )
|
140
|
+
end
|
141
|
+
return matches.uniq
|
142
|
+
end
|
143
|
+
|
144
|
+
# A predicate filters a node-set with respect to an axis to produce a
|
145
|
+
# new node-set. For each node in the node-set to be filtered, the
|
146
|
+
# PredicateExpr is evaluated with that node as the context node, with
|
147
|
+
# the number of nodes in the node-set as the context size, and with the
|
148
|
+
# proximity position of the node in the node-set with respect to the
|
149
|
+
# axis as the context position; if PredicateExpr evaluates to true for
|
150
|
+
# that node, the node is included in the new node-set; otherwise, it is
|
151
|
+
# not included.
|
152
|
+
#
|
153
|
+
# A PredicateExpr is evaluated by evaluating the Expr and converting
|
154
|
+
# the result to a boolean. If the result is a number, the result will
|
155
|
+
# be converted to true if the number is equal to the context position
|
156
|
+
# and will be converted to false otherwise; if the result is not a
|
157
|
+
# number, then the result will be converted as if by a call to the
|
158
|
+
# boolean function. Thus a location path para[3] is equivalent to
|
159
|
+
# para[position()=3].
|
160
|
+
def QuickPath::predicate( elements, path )
|
161
|
+
ind = 1
|
162
|
+
bcount = 1
|
163
|
+
while bcount > 0
|
164
|
+
bcount += 1 if path[ind] == ?[
|
165
|
+
bcount -= 1 if path[ind] == ?]
|
166
|
+
ind += 1
|
167
|
+
end
|
168
|
+
ind -= 1
|
169
|
+
predicate = path[1..ind-1]
|
170
|
+
rest = path[ind+1..-1]
|
171
|
+
|
172
|
+
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
|
173
|
+
predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
|
174
|
+
"#$1 #$2 #$3 and #$3 #$4 #$5"
|
175
|
+
}
|
176
|
+
# Let's do some Ruby trickery to avoid some work:
|
177
|
+
predicate.gsub!( /&/u, "&&" )
|
178
|
+
predicate.gsub!( /=/u, "==" )
|
179
|
+
predicate.gsub!( /@(\w[-\w.]*)/u ) {
|
180
|
+
"attribute(\"#$1\")"
|
181
|
+
}
|
182
|
+
predicate.gsub!( /\bmod\b/u, "%" )
|
183
|
+
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
|
184
|
+
fname = $1
|
185
|
+
fname.gsub( /-/u, "_" )
|
186
|
+
}
|
187
|
+
|
188
|
+
Functions.pair = [ 0, elements.size ]
|
189
|
+
results = []
|
190
|
+
elements.each do |element|
|
191
|
+
Functions.pair[0] += 1
|
192
|
+
Functions.node = element
|
193
|
+
res = eval( predicate )
|
194
|
+
case res
|
195
|
+
when true
|
196
|
+
results << element
|
197
|
+
when Fixnum
|
198
|
+
results << element if Functions.pair[0] == res
|
199
|
+
when String
|
200
|
+
results << element
|
201
|
+
end
|
202
|
+
end
|
203
|
+
return filter( results, rest )
|
204
|
+
end
|
205
|
+
|
206
|
+
def QuickPath::attribute( name )
|
207
|
+
return Functions.node.attributes[name] if Functions.node.kind_of? Element
|
208
|
+
end
|
209
|
+
|
210
|
+
def QuickPath::name()
|
211
|
+
return Functions.node.name if Functions.node.kind_of? Element
|
212
|
+
end
|
213
|
+
|
214
|
+
def QuickPath::method_missing( id, *args )
|
215
|
+
begin
|
216
|
+
Functions.send( id.id2name, *args )
|
217
|
+
rescue Exception
|
218
|
+
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def QuickPath::function( elements, fname, rest )
|
223
|
+
args = parse_args( elements, rest )
|
224
|
+
Functions.pair = [0, elements.size]
|
225
|
+
results = []
|
226
|
+
elements.each do |element|
|
227
|
+
Functions.pair[0] += 1
|
228
|
+
Functions.node = element
|
229
|
+
res = Functions.send( fname, *args )
|
230
|
+
case res
|
231
|
+
when true
|
232
|
+
results << element
|
233
|
+
when Fixnum
|
234
|
+
results << element if Functions.pair[0] == res
|
235
|
+
end
|
236
|
+
end
|
237
|
+
return results
|
238
|
+
end
|
239
|
+
|
240
|
+
def QuickPath::parse_args( element, string )
|
241
|
+
# /.*?(?:\)|,)/
|
242
|
+
arguments = []
|
243
|
+
buffer = ""
|
244
|
+
while string and string != ""
|
245
|
+
c = string[0]
|
246
|
+
string.sub!(/^./u, "")
|
247
|
+
case c
|
248
|
+
when ?,
|
249
|
+
# if depth = 1, then we start a new argument
|
250
|
+
arguments << evaluate( buffer )
|
251
|
+
#arguments << evaluate( string[0..count] )
|
252
|
+
when ?(
|
253
|
+
# start a new method call
|
254
|
+
function( element, buffer, string )
|
255
|
+
buffer = ""
|
256
|
+
when ?)
|
257
|
+
# close the method call and return arguments
|
258
|
+
return arguments
|
259
|
+
else
|
260
|
+
buffer << c
|
261
|
+
end
|
262
|
+
end
|
263
|
+
""
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
data/lib/rexml/rexml.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
|
3
|
+
#
|
4
|
+
# REXML is a _pure_ Ruby, XML 1.0 conforming,
|
5
|
+
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
|
6
|
+
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
|
7
|
+
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
|
8
|
+
# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
|
9
|
+
# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
|
10
|
+
# Ruby 1.8, REXML is included in the standard Ruby distribution.
|
11
|
+
#
|
12
|
+
# Main page:: http://www.germane-software.com/software/rexml
|
13
|
+
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
|
14
|
+
# Version:: 3.1.7.2
|
15
|
+
# Date:: 2007/275
|
16
|
+
# Revision:: $Revision$
|
17
|
+
#
|
18
|
+
# This API documentation can be downloaded from the REXML home page, or can
|
19
|
+
# be accessed online[http://www.germane-software.com/software/rexml_doc]
|
20
|
+
#
|
21
|
+
# A tutorial is available in the REXML distribution in docs/tutorial.html,
|
22
|
+
# or can be accessed
|
23
|
+
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
|
24
|
+
module REXML
|
25
|
+
COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
|
26
|
+
VERSION = "3.1.7.3"
|
27
|
+
DATE = "2007/275"
|
28
|
+
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
|
29
|
+
|
30
|
+
Copyright = COPYRIGHT
|
31
|
+
Version = VERSION
|
32
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module REXML
|
2
|
+
# A template for stream parser listeners.
|
3
|
+
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
4
|
+
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
5
|
+
# have to parse them out yourself.
|
6
|
+
# === Missing methods from SAX2
|
7
|
+
# ignorable_whitespace
|
8
|
+
# === Methods extending SAX2
|
9
|
+
# +WARNING+
|
10
|
+
# These methods are certainly going to change, until DTDs are fully
|
11
|
+
# supported. Be aware of this.
|
12
|
+
# start_document
|
13
|
+
# end_document
|
14
|
+
# doctype
|
15
|
+
# elementdecl
|
16
|
+
# attlistdecl
|
17
|
+
# entitydecl
|
18
|
+
# notationdecl
|
19
|
+
# cdata
|
20
|
+
# xmldecl
|
21
|
+
# comment
|
22
|
+
module SAX2Listener
|
23
|
+
def start_document
|
24
|
+
end
|
25
|
+
def end_document
|
26
|
+
end
|
27
|
+
def start_prefix_mapping prefix, uri
|
28
|
+
end
|
29
|
+
def end_prefix_mapping prefix
|
30
|
+
end
|
31
|
+
def start_element uri, localname, qname, attributes
|
32
|
+
end
|
33
|
+
def end_element uri, localname, qname
|
34
|
+
end
|
35
|
+
def characters text
|
36
|
+
end
|
37
|
+
def processing_instruction target, data
|
38
|
+
end
|
39
|
+
# Handles a doctype declaration. Any attributes of the doctype which are
|
40
|
+
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
41
|
+
# @p name the name of the doctype; EG, "me"
|
42
|
+
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
43
|
+
# @p long_name the supplied long name, or nil. EG, "foo"
|
44
|
+
# @p uri the uri of the doctype, or nil. EG, "bar"
|
45
|
+
def doctype name, pub_sys, long_name, uri
|
46
|
+
end
|
47
|
+
# If a doctype includes an ATTLIST declaration, it will cause this
|
48
|
+
# method to be called. The content is the declaration itself, unparsed.
|
49
|
+
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
50
|
+
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
51
|
+
# methods.
|
52
|
+
def attlistdecl(element, pairs, contents)
|
53
|
+
end
|
54
|
+
# <!ELEMENT ...>
|
55
|
+
def elementdecl content
|
56
|
+
end
|
57
|
+
# <!ENTITY ...>
|
58
|
+
# The argument passed to this method is an array of the entity
|
59
|
+
# declaration. It can be in a number of formats, but in general it
|
60
|
+
# returns (example, result):
|
61
|
+
# <!ENTITY % YN '"Yes"'>
|
62
|
+
# ["%", "YN", "'\"Yes\"'", "\""]
|
63
|
+
# <!ENTITY % YN 'Yes'>
|
64
|
+
# ["%", "YN", "'Yes'", "s"]
|
65
|
+
# <!ENTITY WhatHeSaid "He said %YN;">
|
66
|
+
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
|
67
|
+
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
68
|
+
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
69
|
+
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
70
|
+
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
71
|
+
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
72
|
+
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
|
73
|
+
def entitydecl name, decl
|
74
|
+
end
|
75
|
+
# <!NOTATION ...>
|
76
|
+
def notationdecl content
|
77
|
+
end
|
78
|
+
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
79
|
+
# @p content "..."
|
80
|
+
def cdata content
|
81
|
+
end
|
82
|
+
# Called when an XML PI is encountered in the document.
|
83
|
+
# EG: <?xml version="1.0" encoding="utf"?>
|
84
|
+
# @p version the version attribute value. EG, "1.0"
|
85
|
+
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
86
|
+
# @p standalone the standalone attribute value, or nil. EG, nil
|
87
|
+
# @p spaced the declaration is followed by a line break
|
88
|
+
def xmldecl version, encoding, standalone
|
89
|
+
end
|
90
|
+
# Called when a comment is encountered.
|
91
|
+
# @p comment The content of the comment
|
92
|
+
def comment comment
|
93
|
+
end
|
94
|
+
def progress position
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/rexml/source.rb
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'rexml/encoding'
|
2
|
+
|
3
|
+
module REXML
|
4
|
+
# Generates Source-s. USE THIS CLASS.
|
5
|
+
class SourceFactory
|
6
|
+
# Generates a Source object
|
7
|
+
# @param arg Either a String, or an IO
|
8
|
+
# @return a Source, or nil if a bad argument was given
|
9
|
+
def SourceFactory::create_from(arg)
|
10
|
+
if arg.kind_of? String
|
11
|
+
Source.new(arg)
|
12
|
+
elsif arg.respond_to? :read and
|
13
|
+
arg.respond_to? :readline and
|
14
|
+
arg.respond_to? :nil? and
|
15
|
+
arg.respond_to? :eof?
|
16
|
+
IOSource.new(arg)
|
17
|
+
elsif arg.kind_of? Source
|
18
|
+
arg
|
19
|
+
else
|
20
|
+
raise "#{arg.class} is not a valid input stream. It must walk \n"+
|
21
|
+
"like either a String, an IO, or a Source."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# A Source can be searched for patterns, and wraps buffers and other
|
27
|
+
# objects and provides consumption of text
|
28
|
+
class Source
|
29
|
+
include Encoding
|
30
|
+
# The current buffer (what we're going to read next)
|
31
|
+
attr_reader :buffer
|
32
|
+
# The line number of the last consumed text
|
33
|
+
attr_reader :line
|
34
|
+
attr_reader :encoding
|
35
|
+
|
36
|
+
# Constructor
|
37
|
+
# @param arg must be a String, and should be a valid XML document
|
38
|
+
# @param encoding if non-null, sets the encoding of the source to this
|
39
|
+
# value, overriding all encoding detection
|
40
|
+
def initialize(arg, encoding=nil)
|
41
|
+
@orig = @buffer = arg
|
42
|
+
if encoding
|
43
|
+
self.encoding = encoding
|
44
|
+
else
|
45
|
+
self.encoding = check_encoding( @buffer )
|
46
|
+
end
|
47
|
+
@line = 0
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# Inherited from Encoding
|
52
|
+
# Overridden to support optimized en/decoding
|
53
|
+
def encoding=(enc)
|
54
|
+
return unless super
|
55
|
+
@line_break = encode( '>' )
|
56
|
+
if enc != UTF_8
|
57
|
+
@buffer = decode(@buffer)
|
58
|
+
@to_utf = true
|
59
|
+
else
|
60
|
+
@to_utf = false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Scans the source for a given pattern. Note, that this is not your
|
65
|
+
# usual scan() method. For one thing, the pattern argument has some
|
66
|
+
# requirements; for another, the source can be consumed. You can easily
|
67
|
+
# confuse this method. Originally, the patterns were easier
|
68
|
+
# to construct and this method more robust, because this method
|
69
|
+
# generated search regexes on the fly; however, this was
|
70
|
+
# computationally expensive and slowed down the entire REXML package
|
71
|
+
# considerably, since this is by far the most commonly called method.
|
72
|
+
# @param pattern must be a Regexp, and must be in the form of
|
73
|
+
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
74
|
+
# will be returned; the second group is used if the consume flag is
|
75
|
+
# set.
|
76
|
+
# @param consume if true, the pattern returned will be consumed, leaving
|
77
|
+
# everything after it in the Source.
|
78
|
+
# @return the pattern, if found, or nil if the Source is empty or the
|
79
|
+
# pattern is not found.
|
80
|
+
def scan(pattern, cons=false)
|
81
|
+
return nil if @buffer.nil?
|
82
|
+
rv = @buffer.scan(pattern)
|
83
|
+
@buffer = $' if cons and rv.size>0
|
84
|
+
rv
|
85
|
+
end
|
86
|
+
|
87
|
+
def read
|
88
|
+
end
|
89
|
+
|
90
|
+
def consume( pattern )
|
91
|
+
@buffer = $' if pattern.match( @buffer )
|
92
|
+
end
|
93
|
+
|
94
|
+
def match_to( char, pattern )
|
95
|
+
return pattern.match(@buffer)
|
96
|
+
end
|
97
|
+
|
98
|
+
def match_to_consume( char, pattern )
|
99
|
+
md = pattern.match(@buffer)
|
100
|
+
@buffer = $'
|
101
|
+
return md
|
102
|
+
end
|
103
|
+
|
104
|
+
def match(pattern, cons=false)
|
105
|
+
md = pattern.match(@buffer)
|
106
|
+
@buffer = $' if cons and md
|
107
|
+
return md
|
108
|
+
end
|
109
|
+
|
110
|
+
# @return true if the Source is exhausted
|
111
|
+
def empty?
|
112
|
+
@buffer == ""
|
113
|
+
end
|
114
|
+
|
115
|
+
def position
|
116
|
+
@orig.index( @buffer )
|
117
|
+
end
|
118
|
+
|
119
|
+
# @return the current line in the source
|
120
|
+
def current_line
|
121
|
+
lines = @orig.split
|
122
|
+
res = lines.grep @buffer[0..30]
|
123
|
+
res = res[-1] if res.kind_of? Array
|
124
|
+
lines.index( res ) if res
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# A Source that wraps an IO. See the Source class for method
|
129
|
+
# documentation
|
130
|
+
class IOSource < Source
|
131
|
+
#attr_reader :block_size
|
132
|
+
|
133
|
+
# block_size has been deprecated
|
134
|
+
def initialize(arg, block_size=500, encoding=nil)
|
135
|
+
@er_source = @source = arg
|
136
|
+
@to_utf = false
|
137
|
+
|
138
|
+
# Determining the encoding is a deceptively difficult issue to resolve.
|
139
|
+
# First, we check the first two bytes for UTF-16. Then we
|
140
|
+
# assume that the encoding is at least ASCII enough for the '>', and
|
141
|
+
# we read until we get one of those. This gives us the XML declaration,
|
142
|
+
# if there is one. If there isn't one, the file MUST be UTF-8, as per
|
143
|
+
# the XML spec. If there is one, we can determine the encoding from
|
144
|
+
# it.
|
145
|
+
@buffer = ""
|
146
|
+
str = @source.read( 2 )
|
147
|
+
if encoding
|
148
|
+
self.encoding = encoding
|
149
|
+
elsif 0xfe == str[0] && 0xff == str[1]
|
150
|
+
@line_break = "\000>"
|
151
|
+
elsif 0xff == str[0] && 0xfe == str[1]
|
152
|
+
@line_break = ">\000"
|
153
|
+
elsif 0xef == str[0] && 0xbb == str[1]
|
154
|
+
str += @source.read(1)
|
155
|
+
str = '' if (0xbf == str[2])
|
156
|
+
@line_break = ">"
|
157
|
+
else
|
158
|
+
@line_break = ">"
|
159
|
+
end
|
160
|
+
super str+@source.readline( @line_break )
|
161
|
+
end
|
162
|
+
|
163
|
+
def scan(pattern, cons=false)
|
164
|
+
rv = super
|
165
|
+
# You'll notice that this next section is very similar to the same
|
166
|
+
# section in match(), but just a liiittle different. This is
|
167
|
+
# because it is a touch faster to do it this way with scan()
|
168
|
+
# than the way match() does it; enough faster to warrent duplicating
|
169
|
+
# some code
|
170
|
+
if rv.size == 0
|
171
|
+
until @buffer =~ pattern or @source.nil?
|
172
|
+
begin
|
173
|
+
# READLINE OPT
|
174
|
+
#str = @source.read(@block_size)
|
175
|
+
str = @source.readline(@line_break)
|
176
|
+
str = decode(str) if @to_utf and str
|
177
|
+
@buffer << str
|
178
|
+
rescue Iconv::IllegalSequence
|
179
|
+
raise
|
180
|
+
rescue
|
181
|
+
@source = nil
|
182
|
+
end
|
183
|
+
end
|
184
|
+
rv = super
|
185
|
+
end
|
186
|
+
rv.taint
|
187
|
+
rv
|
188
|
+
end
|
189
|
+
|
190
|
+
def read
|
191
|
+
begin
|
192
|
+
str = @source.readline(@line_break)
|
193
|
+
str = decode(str) if @to_utf and str
|
194
|
+
@buffer << str
|
195
|
+
rescue Exception, NameError
|
196
|
+
@source = nil
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def consume( pattern )
|
201
|
+
match( pattern, true )
|
202
|
+
end
|
203
|
+
|
204
|
+
def match( pattern, cons=false )
|
205
|
+
rv = pattern.match(@buffer)
|
206
|
+
@buffer = $' if cons and rv
|
207
|
+
while !rv and @source
|
208
|
+
begin
|
209
|
+
str = @source.readline(@line_break)
|
210
|
+
str = decode(str) if @to_utf and str
|
211
|
+
@buffer << str
|
212
|
+
rv = pattern.match(@buffer)
|
213
|
+
@buffer = $' if cons and rv
|
214
|
+
rescue
|
215
|
+
@source = nil
|
216
|
+
end
|
217
|
+
end
|
218
|
+
rv.taint
|
219
|
+
rv
|
220
|
+
end
|
221
|
+
|
222
|
+
def empty?
|
223
|
+
super and ( @source.nil? || @source.eof? )
|
224
|
+
end
|
225
|
+
|
226
|
+
def position
|
227
|
+
@er_source.stat.pipe? ? 0 : @er_source.pos
|
228
|
+
end
|
229
|
+
|
230
|
+
# @return the current line in the source
|
231
|
+
def current_line
|
232
|
+
begin
|
233
|
+
pos = @er_source.pos # The byte position in the source
|
234
|
+
lineno = @er_source.lineno # The XML < position in the source
|
235
|
+
@er_source.rewind
|
236
|
+
line = 0 # The \r\n position in the source
|
237
|
+
begin
|
238
|
+
while @er_source.pos < pos
|
239
|
+
@er_source.readline
|
240
|
+
line += 1
|
241
|
+
end
|
242
|
+
rescue
|
243
|
+
end
|
244
|
+
rescue IOError
|
245
|
+
pos = -1
|
246
|
+
line = -1
|
247
|
+
end
|
248
|
+
[pos, lineno, line]
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|