rubysl-rexml 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,80 @@
|
|
1
|
+
require "rexml/child"
|
2
|
+
|
3
|
+
module REXML
|
4
|
+
##
|
5
|
+
# Represents an XML comment; that is, text between \<!-- ... -->
|
6
|
+
class Comment < Child
|
7
|
+
include Comparable
|
8
|
+
START = "<!--"
|
9
|
+
STOP = "-->"
|
10
|
+
|
11
|
+
# The content text
|
12
|
+
|
13
|
+
attr_accessor :string
|
14
|
+
|
15
|
+
##
|
16
|
+
# Constructor. The first argument can be one of three types:
|
17
|
+
# @param first If String, the contents of this comment are set to the
|
18
|
+
# argument. If Comment, the argument is duplicated. If
|
19
|
+
# Source, the argument is scanned for a comment.
|
20
|
+
# @param second If the first argument is a Source, this argument
|
21
|
+
# should be nil, not supplied, or a Parent to be set as the parent
|
22
|
+
# of this object
|
23
|
+
def initialize( first, second = nil )
|
24
|
+
#puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
|
25
|
+
super(second)
|
26
|
+
if first.kind_of? String
|
27
|
+
@string = first
|
28
|
+
elsif first.kind_of? Comment
|
29
|
+
@string = first.string
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def clone
|
34
|
+
Comment.new self
|
35
|
+
end
|
36
|
+
|
37
|
+
# == DEPRECATED
|
38
|
+
# See REXML::Formatters
|
39
|
+
#
|
40
|
+
# output::
|
41
|
+
# Where to write the string
|
42
|
+
# indent::
|
43
|
+
# An integer. If -1, no indenting will be used; otherwise, the
|
44
|
+
# indentation will be this number of spaces, and children will be
|
45
|
+
# indented an additional amount.
|
46
|
+
# transitive::
|
47
|
+
# Ignored by this class. The contents of comments are never modified.
|
48
|
+
# ie_hack::
|
49
|
+
# Needed for conformity to the child API, but not used by this class.
|
50
|
+
def write( output, indent=-1, transitive=false, ie_hack=false )
|
51
|
+
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
|
52
|
+
indent( output, indent )
|
53
|
+
output << START
|
54
|
+
output << @string
|
55
|
+
output << STOP
|
56
|
+
end
|
57
|
+
|
58
|
+
alias :to_s :string
|
59
|
+
|
60
|
+
##
|
61
|
+
# Compares this Comment to another; the contents of the comment are used
|
62
|
+
# in the comparison.
|
63
|
+
def <=>(other)
|
64
|
+
other.to_s <=> @string
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Compares this Comment to another; the contents of the comment are used
|
69
|
+
# in the comparison.
|
70
|
+
def ==( other )
|
71
|
+
other.kind_of? Comment and
|
72
|
+
(other <=> self) == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
def node_type
|
76
|
+
:comment
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
#vim:ts=2 sw=2 noexpandtab:
|
@@ -0,0 +1,271 @@
|
|
1
|
+
require "rexml/parent"
|
2
|
+
require "rexml/parseexception"
|
3
|
+
require "rexml/namespace"
|
4
|
+
require 'rexml/entity'
|
5
|
+
require 'rexml/attlistdecl'
|
6
|
+
require 'rexml/xmltokens'
|
7
|
+
|
8
|
+
module REXML
|
9
|
+
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
|
10
|
+
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
|
11
|
+
# being used to declare entities used in the document.
|
12
|
+
class DocType < Parent
|
13
|
+
include XMLTokens
|
14
|
+
START = "<!DOCTYPE"
|
15
|
+
STOP = ">"
|
16
|
+
SYSTEM = "SYSTEM"
|
17
|
+
PUBLIC = "PUBLIC"
|
18
|
+
DEFAULT_ENTITIES = {
|
19
|
+
'gt'=>EntityConst::GT,
|
20
|
+
'lt'=>EntityConst::LT,
|
21
|
+
'quot'=>EntityConst::QUOT,
|
22
|
+
"apos"=>EntityConst::APOS
|
23
|
+
}
|
24
|
+
|
25
|
+
# name is the name of the doctype
|
26
|
+
# external_id is the referenced DTD, if given
|
27
|
+
attr_reader :name, :external_id, :entities, :namespaces
|
28
|
+
|
29
|
+
# Constructor
|
30
|
+
#
|
31
|
+
# dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
|
32
|
+
# # <!DOCTYPE foo '-//I/Hate/External/IDs'>
|
33
|
+
# dt = DocType.new( doctype_to_clone )
|
34
|
+
# # Incomplete. Shallow clone of doctype
|
35
|
+
#
|
36
|
+
# +Note+ that the constructor:
|
37
|
+
#
|
38
|
+
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
|
39
|
+
#
|
40
|
+
# is _deprecated_. Do not use it. It will probably disappear.
|
41
|
+
def initialize( first, parent=nil )
|
42
|
+
@entities = DEFAULT_ENTITIES
|
43
|
+
@long_name = @uri = nil
|
44
|
+
if first.kind_of? String
|
45
|
+
super()
|
46
|
+
@name = first
|
47
|
+
@external_id = parent
|
48
|
+
elsif first.kind_of? DocType
|
49
|
+
super( parent )
|
50
|
+
@name = first.name
|
51
|
+
@external_id = first.external_id
|
52
|
+
elsif first.kind_of? Array
|
53
|
+
super( parent )
|
54
|
+
@name = first[0]
|
55
|
+
@external_id = first[1]
|
56
|
+
@long_name = first[2]
|
57
|
+
@uri = first[3]
|
58
|
+
elsif first.kind_of? Source
|
59
|
+
super( parent )
|
60
|
+
parser = Parsers::BaseParser.new( first )
|
61
|
+
event = parser.pull
|
62
|
+
if event[0] == :start_doctype
|
63
|
+
@name, @external_id, @long_name, @uri, = event[1..-1]
|
64
|
+
end
|
65
|
+
else
|
66
|
+
super()
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def node_type
|
71
|
+
:doctype
|
72
|
+
end
|
73
|
+
|
74
|
+
def attributes_of element
|
75
|
+
rv = []
|
76
|
+
each do |child|
|
77
|
+
child.each do |key,val|
|
78
|
+
rv << Attribute.new(key,val)
|
79
|
+
end if child.kind_of? AttlistDecl and child.element_name == element
|
80
|
+
end
|
81
|
+
rv
|
82
|
+
end
|
83
|
+
|
84
|
+
def attribute_of element, attribute
|
85
|
+
att_decl = find do |child|
|
86
|
+
child.kind_of? AttlistDecl and
|
87
|
+
child.element_name == element and
|
88
|
+
child.include? attribute
|
89
|
+
end
|
90
|
+
return nil unless att_decl
|
91
|
+
att_decl[attribute]
|
92
|
+
end
|
93
|
+
|
94
|
+
def clone
|
95
|
+
DocType.new self
|
96
|
+
end
|
97
|
+
|
98
|
+
# output::
|
99
|
+
# Where to write the string
|
100
|
+
# indent::
|
101
|
+
# An integer. If -1, no indentation will be used; otherwise, the
|
102
|
+
# indentation will be this number of spaces, and children will be
|
103
|
+
# indented an additional amount.
|
104
|
+
# transitive::
|
105
|
+
# Ignored
|
106
|
+
# ie_hack::
|
107
|
+
# Ignored
|
108
|
+
def write( output, indent=0, transitive=false, ie_hack=false )
|
109
|
+
f = REXML::Formatters::Default.new
|
110
|
+
indent( output, indent )
|
111
|
+
output << START
|
112
|
+
output << ' '
|
113
|
+
output << @name
|
114
|
+
output << " #@external_id" if @external_id
|
115
|
+
output << " #{@long_name.inspect}" if @long_name
|
116
|
+
output << " #{@uri.inspect}" if @uri
|
117
|
+
unless @children.empty?
|
118
|
+
next_indent = indent + 1
|
119
|
+
output << ' ['
|
120
|
+
child = nil # speed
|
121
|
+
@children.each { |child|
|
122
|
+
output << "\n"
|
123
|
+
f.write( child, output )
|
124
|
+
}
|
125
|
+
output << "\n]"
|
126
|
+
end
|
127
|
+
output << STOP
|
128
|
+
end
|
129
|
+
|
130
|
+
def context
|
131
|
+
@parent.context
|
132
|
+
end
|
133
|
+
|
134
|
+
def entity( name )
|
135
|
+
@entities[name].unnormalized if @entities[name]
|
136
|
+
end
|
137
|
+
|
138
|
+
def add child
|
139
|
+
super(child)
|
140
|
+
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
|
141
|
+
@entities[ child.name ] = child if child.kind_of? Entity
|
142
|
+
end
|
143
|
+
|
144
|
+
# This method retrieves the public identifier identifying the document's
|
145
|
+
# DTD.
|
146
|
+
#
|
147
|
+
# Method contributed by Henrik Martensson
|
148
|
+
def public
|
149
|
+
case @external_id
|
150
|
+
when "SYSTEM"
|
151
|
+
nil
|
152
|
+
when "PUBLIC"
|
153
|
+
strip_quotes(@long_name)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# This method retrieves the system identifier identifying the document's DTD
|
158
|
+
#
|
159
|
+
# Method contributed by Henrik Martensson
|
160
|
+
def system
|
161
|
+
case @external_id
|
162
|
+
when "SYSTEM"
|
163
|
+
strip_quotes(@long_name)
|
164
|
+
when "PUBLIC"
|
165
|
+
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# This method returns a list of notations that have been declared in the
|
170
|
+
# _internal_ DTD subset. Notations in the external DTD subset are not
|
171
|
+
# listed.
|
172
|
+
#
|
173
|
+
# Method contributed by Henrik Martensson
|
174
|
+
def notations
|
175
|
+
children().select {|node| node.kind_of?(REXML::NotationDecl)}
|
176
|
+
end
|
177
|
+
|
178
|
+
# Retrieves a named notation. Only notations declared in the internal
|
179
|
+
# DTD subset can be retrieved.
|
180
|
+
#
|
181
|
+
# Method contributed by Henrik Martensson
|
182
|
+
def notation(name)
|
183
|
+
notations.find { |notation_decl|
|
184
|
+
notation_decl.name == name
|
185
|
+
}
|
186
|
+
end
|
187
|
+
|
188
|
+
private
|
189
|
+
|
190
|
+
# Method contributed by Henrik Martensson
|
191
|
+
def strip_quotes(quoted_string)
|
192
|
+
quoted_string =~ /^[\'\"].*[\´\"]$/ ?
|
193
|
+
quoted_string[1, quoted_string.length-2] :
|
194
|
+
quoted_string
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# We don't really handle any of these since we're not a validating
|
199
|
+
# parser, so we can be pretty dumb about them. All we need to be able
|
200
|
+
# to do is spew them back out on a write()
|
201
|
+
|
202
|
+
# This is an abstract class. You never use this directly; it serves as a
|
203
|
+
# parent class for the specific declarations.
|
204
|
+
class Declaration < Child
|
205
|
+
def initialize src
|
206
|
+
super()
|
207
|
+
@string = src
|
208
|
+
end
|
209
|
+
|
210
|
+
def to_s
|
211
|
+
@string+'>'
|
212
|
+
end
|
213
|
+
|
214
|
+
# == DEPRECATED
|
215
|
+
# See REXML::Formatters
|
216
|
+
#
|
217
|
+
def write( output, indent )
|
218
|
+
output << to_s
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
public
|
223
|
+
class ElementDecl < Declaration
|
224
|
+
def initialize( src )
|
225
|
+
super
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class ExternalEntity < Child
|
230
|
+
def initialize( src )
|
231
|
+
super()
|
232
|
+
@entity = src
|
233
|
+
end
|
234
|
+
def to_s
|
235
|
+
@entity
|
236
|
+
end
|
237
|
+
def write( output, indent )
|
238
|
+
output << @entity
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
class NotationDecl < Child
|
243
|
+
attr_accessor :public, :system
|
244
|
+
def initialize name, middle, pub, sys
|
245
|
+
super(nil)
|
246
|
+
@name = name
|
247
|
+
@middle = middle
|
248
|
+
@public = pub
|
249
|
+
@system = sys
|
250
|
+
end
|
251
|
+
|
252
|
+
def to_s
|
253
|
+
"<!NOTATION #@name #@middle#{
|
254
|
+
@public ? ' ' + public.inspect : ''
|
255
|
+
}#{
|
256
|
+
@system ? ' ' +@system.inspect : ''
|
257
|
+
}>"
|
258
|
+
end
|
259
|
+
|
260
|
+
def write( output, indent=-1 )
|
261
|
+
output << to_s
|
262
|
+
end
|
263
|
+
|
264
|
+
# This method retrieves the name of the notation.
|
265
|
+
#
|
266
|
+
# Method contributed by Henrik Martensson
|
267
|
+
def name
|
268
|
+
@name
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
@@ -0,0 +1,230 @@
|
|
1
|
+
require "rexml/element"
|
2
|
+
require "rexml/xmldecl"
|
3
|
+
require "rexml/source"
|
4
|
+
require "rexml/comment"
|
5
|
+
require "rexml/doctype"
|
6
|
+
require "rexml/instruction"
|
7
|
+
require "rexml/rexml"
|
8
|
+
require "rexml/parseexception"
|
9
|
+
require "rexml/output"
|
10
|
+
require "rexml/parsers/baseparser"
|
11
|
+
require "rexml/parsers/streamparser"
|
12
|
+
require "rexml/parsers/treeparser"
|
13
|
+
|
14
|
+
module REXML
|
15
|
+
# Represents a full XML document, including PIs, a doctype, etc. A
|
16
|
+
# Document has a single child that can be accessed by root().
|
17
|
+
# Note that if you want to have an XML declaration written for a document
|
18
|
+
# you create, you must add one; REXML documents do not write a default
|
19
|
+
# declaration for you. See |DECLARATION| and |write|.
|
20
|
+
class Document < Element
|
21
|
+
# A convenient default XML declaration. If you want an XML declaration,
|
22
|
+
# the easiest way to add one is mydoc << Document::DECLARATION
|
23
|
+
# +DEPRECATED+
|
24
|
+
# Use: mydoc << XMLDecl.default
|
25
|
+
DECLARATION = XMLDecl.default
|
26
|
+
|
27
|
+
# Constructor
|
28
|
+
# @param source if supplied, must be a Document, String, or IO.
|
29
|
+
# Documents have their context and Element attributes cloned.
|
30
|
+
# Strings are expected to be valid XML documents. IOs are expected
|
31
|
+
# to be sources of valid XML documents.
|
32
|
+
# @param context if supplied, contains the context of the document;
|
33
|
+
# this should be a Hash.
|
34
|
+
def initialize( source = nil, context = {} )
|
35
|
+
@entity_expansion_count = 0
|
36
|
+
super()
|
37
|
+
@context = context
|
38
|
+
return if source.nil?
|
39
|
+
if source.kind_of? Document
|
40
|
+
@context = source.context
|
41
|
+
super source
|
42
|
+
else
|
43
|
+
build( source )
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def node_type
|
48
|
+
:document
|
49
|
+
end
|
50
|
+
|
51
|
+
# Should be obvious
|
52
|
+
def clone
|
53
|
+
Document.new self
|
54
|
+
end
|
55
|
+
|
56
|
+
# According to the XML spec, a root node has no expanded name
|
57
|
+
def expanded_name
|
58
|
+
''
|
59
|
+
#d = doc_type
|
60
|
+
#d ? d.name : "UNDEFINED"
|
61
|
+
end
|
62
|
+
|
63
|
+
alias :name :expanded_name
|
64
|
+
|
65
|
+
# We override this, because XMLDecls and DocTypes must go at the start
|
66
|
+
# of the document
|
67
|
+
def add( child )
|
68
|
+
if child.kind_of? XMLDecl
|
69
|
+
@children.unshift child
|
70
|
+
child.parent = self
|
71
|
+
elsif child.kind_of? DocType
|
72
|
+
# Find first Element or DocType node and insert the decl right
|
73
|
+
# before it. If there is no such node, just insert the child at the
|
74
|
+
# end. If there is a child and it is an DocType, then replace it.
|
75
|
+
insert_before_index = 0
|
76
|
+
@children.find { |x|
|
77
|
+
insert_before_index += 1
|
78
|
+
x.kind_of?(Element) || x.kind_of?(DocType)
|
79
|
+
}
|
80
|
+
if @children[ insert_before_index ] # Not null = not end of list
|
81
|
+
if @children[ insert_before_index ].kind_of DocType
|
82
|
+
@children[ insert_before_index ] = child
|
83
|
+
else
|
84
|
+
@children[ index_before_index-1, 0 ] = child
|
85
|
+
end
|
86
|
+
else # Insert at end of list
|
87
|
+
@children[insert_before_index] = child
|
88
|
+
end
|
89
|
+
child.parent = self
|
90
|
+
else
|
91
|
+
rv = super
|
92
|
+
raise "attempted adding second root element to document" if @elements.size > 1
|
93
|
+
rv
|
94
|
+
end
|
95
|
+
end
|
96
|
+
alias :<< :add
|
97
|
+
|
98
|
+
def add_element(arg=nil, arg2=nil)
|
99
|
+
rv = super
|
100
|
+
raise "attempted adding second root element to document" if @elements.size > 1
|
101
|
+
rv
|
102
|
+
end
|
103
|
+
|
104
|
+
# @return the root Element of the document, or nil if this document
|
105
|
+
# has no children.
|
106
|
+
def root
|
107
|
+
elements[1]
|
108
|
+
#self
|
109
|
+
#@children.find { |item| item.kind_of? Element }
|
110
|
+
end
|
111
|
+
|
112
|
+
# @return the DocType child of the document, if one exists,
|
113
|
+
# and nil otherwise.
|
114
|
+
def doctype
|
115
|
+
@children.find { |item| item.kind_of? DocType }
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return the XMLDecl of this document; if no XMLDecl has been
|
119
|
+
# set, the default declaration is returned.
|
120
|
+
def xml_decl
|
121
|
+
rv = @children[0]
|
122
|
+
return rv if rv.kind_of? XMLDecl
|
123
|
+
rv = @children.unshift(XMLDecl.default)[0]
|
124
|
+
end
|
125
|
+
|
126
|
+
# @return the XMLDecl version of this document as a String.
|
127
|
+
# If no XMLDecl has been set, returns the default version.
|
128
|
+
def version
|
129
|
+
xml_decl().version
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return the XMLDecl encoding of this document as a String.
|
133
|
+
# If no XMLDecl has been set, returns the default encoding.
|
134
|
+
def encoding
|
135
|
+
xml_decl().encoding
|
136
|
+
end
|
137
|
+
|
138
|
+
# @return the XMLDecl standalone value of this document as a String.
|
139
|
+
# If no XMLDecl has been set, returns the default setting.
|
140
|
+
def stand_alone?
|
141
|
+
xml_decl().stand_alone?
|
142
|
+
end
|
143
|
+
|
144
|
+
# Write the XML tree out, optionally with indent. This writes out the
|
145
|
+
# entire XML document, including XML declarations, doctype declarations,
|
146
|
+
# and processing instructions (if any are given).
|
147
|
+
#
|
148
|
+
# A controversial point is whether Document should always write the XML
|
149
|
+
# declaration (<?xml version='1.0'?>) whether or not one is given by the
|
150
|
+
# user (or source document). REXML does not write one if one was not
|
151
|
+
# specified, because it adds unnecessary bandwidth to applications such
|
152
|
+
# as XML-RPC.
|
153
|
+
#
|
154
|
+
# See also the classes in the rexml/formatters package for the proper way
|
155
|
+
# to change the default formatting of XML output
|
156
|
+
#
|
157
|
+
# _Examples_
|
158
|
+
# Document.new("<a><b/></a>").serialize
|
159
|
+
#
|
160
|
+
# output_string = ""
|
161
|
+
# tr = Transitive.new( output_string )
|
162
|
+
# Document.new("<a><b/></a>").serialize( tr )
|
163
|
+
#
|
164
|
+
# output::
|
165
|
+
# output an object which supports '<< string'; this is where the
|
166
|
+
# document will be written.
|
167
|
+
# indent::
|
168
|
+
# An integer. If -1, no indenting will be used; otherwise, the
|
169
|
+
# indentation will be twice this number of spaces, and children will be
|
170
|
+
# indented an additional amount. For a value of 3, every item will be
|
171
|
+
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
|
172
|
+
# trans::
|
173
|
+
# If transitive is true and indent is >= 0, then the output will be
|
174
|
+
# pretty-printed in such a way that the added whitespace does not affect
|
175
|
+
# the absolute *value* of the document -- that is, it leaves the value
|
176
|
+
# and number of Text nodes in the document unchanged.
|
177
|
+
# ie_hack::
|
178
|
+
# Internet Explorer is the worst piece of crap to have ever been
|
179
|
+
# written, with the possible exception of Windows itself. Since IE is
|
180
|
+
# unable to parse proper XML, we have to provide a hack to generate XML
|
181
|
+
# that IE's limited abilities can handle. This hack inserts a space
|
182
|
+
# before the /> on empty tags. Defaults to false
|
183
|
+
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
184
|
+
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
185
|
+
output = Output.new( output, xml_decl.encoding )
|
186
|
+
end
|
187
|
+
formatter = if indent > -1
|
188
|
+
if trans
|
189
|
+
REXML::Formatters::Transitive.new( indent, ie_hack )
|
190
|
+
else
|
191
|
+
REXML::Formatters::Pretty.new( indent, ie_hack )
|
192
|
+
end
|
193
|
+
else
|
194
|
+
REXML::Formatters::Default.new( ie_hack )
|
195
|
+
end
|
196
|
+
formatter.write( self, output )
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def Document::parse_stream( source, listener )
|
201
|
+
Parsers::StreamParser.new( source, listener ).parse
|
202
|
+
end
|
203
|
+
|
204
|
+
@@entity_expansion_limit = 10_000
|
205
|
+
|
206
|
+
# Set the entity expansion limit. By default the limit is set to 10000.
|
207
|
+
def Document::entity_expansion_limit=( val )
|
208
|
+
@@entity_expansion_limit = val
|
209
|
+
end
|
210
|
+
|
211
|
+
# Get the entity expansion limit. By default the limit is set to 10000.
|
212
|
+
def Document::entity_expansion_limit
|
213
|
+
return @@entity_expansion_limit
|
214
|
+
end
|
215
|
+
|
216
|
+
attr_reader :entity_expansion_count
|
217
|
+
|
218
|
+
def record_entity_expansion
|
219
|
+
@entity_expansion_count += 1
|
220
|
+
if @entity_expansion_count > @@entity_expansion_limit
|
221
|
+
raise "number of entity expansions exceeded, processing aborted."
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
def build( source )
|
227
|
+
Parsers::TreeParser.new( source, self ).parse
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|