rubysl-rexml 1.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/namespace.rb
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
require 'rexml/xmltokens'
|
2
2
|
|
3
3
|
module REXML
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
# Adds named attributes to an object.
|
5
|
+
module Namespace
|
6
|
+
# The name of the object, valid if set
|
7
|
+
attr_reader :name, :expanded_name
|
8
|
+
# The expanded name of the object, valid if name is set
|
9
|
+
attr_accessor :prefix
|
10
|
+
include XMLTokens
|
11
|
+
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
# Sets the name and the expanded name
|
14
|
+
def name=( name )
|
15
|
+
@expanded_name = name
|
16
|
+
name =~ NAMESPLIT
|
17
|
+
if $1
|
18
|
+
@prefix = $1
|
19
|
+
else
|
20
|
+
@prefix = ""
|
21
|
+
@namespace = ""
|
22
|
+
end
|
23
|
+
@name = $2
|
24
|
+
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
26
|
+
# Compares names optionally WITH namespaces
|
27
|
+
def has_name?( other, ns=nil )
|
28
|
+
if ns
|
29
|
+
return (namespace() == ns and name() == other)
|
30
|
+
elsif other.include? ":"
|
31
|
+
return fully_expanded_name == other
|
32
|
+
else
|
33
|
+
return name == other
|
34
|
+
end
|
35
|
+
end
|
36
36
|
|
37
|
-
|
37
|
+
alias :local_name :name
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
39
|
+
# Fully expand the name, even if the prefix wasn't specified in the
|
40
|
+
# source file.
|
41
|
+
def fully_expanded_name
|
42
|
+
ns = prefix
|
43
|
+
return "#{ns}:#@name" if ns.size > 0
|
44
|
+
return @name
|
45
|
+
end
|
46
|
+
end
|
47
47
|
end
|
data/lib/rexml/node.rb
CHANGED
@@ -3,27 +3,27 @@ require "rexml/formatters/pretty"
|
|
3
3
|
require "rexml/formatters/default"
|
4
4
|
|
5
5
|
module REXML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
# Represents a node in the tree. Nodes are never encountered except as
|
7
|
+
# superclasses of other objects. Nodes have siblings.
|
8
|
+
module Node
|
9
|
+
# @return the next sibling (nil if unset)
|
10
|
+
def next_sibling_node
|
11
|
+
return nil if @parent.nil?
|
12
|
+
@parent[ @parent.index(self) + 1 ]
|
13
|
+
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
# @return the previous sibling (nil if unset)
|
16
|
+
def previous_sibling_node
|
17
|
+
return nil if @parent.nil?
|
18
|
+
ind = @parent.index(self)
|
19
|
+
return nil if ind == 0
|
20
|
+
@parent[ ind - 1 ]
|
21
|
+
end
|
22
22
|
|
23
23
|
# indent::
|
24
24
|
# *DEPRECATED* This parameter is now ignored. See the formatters in the
|
25
25
|
# REXML::Formatters package for changing the output style.
|
26
|
-
|
26
|
+
def to_s indent=nil
|
27
27
|
unless indent.nil?
|
28
28
|
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
|
29
29
|
f = REXML::Formatters::Pretty.new( indent )
|
@@ -33,33 +33,33 @@ module REXML
|
|
33
33
|
f.write( self, rv = "" )
|
34
34
|
end
|
35
35
|
return rv
|
36
|
-
|
36
|
+
end
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
38
|
+
def indent to, ind
|
39
|
+
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
|
40
|
+
indentstyle = @parent.context[:indentstyle]
|
41
|
+
else
|
42
|
+
indentstyle = ' '
|
43
|
+
end
|
44
|
+
to << indentstyle*ind unless ind<1
|
45
|
+
end
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
def parent?
|
48
|
+
false;
|
49
|
+
end
|
50
50
|
|
51
51
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
52
|
+
# Visit all subnodes of +self+ recursively
|
53
|
+
def each_recursive(&block) # :yields: node
|
54
|
+
self.elements.each {|node|
|
55
|
+
block.call(node)
|
56
|
+
node.each_recursive(&block)
|
57
|
+
}
|
58
|
+
end
|
59
59
|
|
60
|
-
|
60
|
+
# Find (and return) first subnode (recursively) for which the block
|
61
61
|
# evaluates to true. Returns +nil+ if none was found.
|
62
|
-
|
62
|
+
def find_first_recursive(&block) # :yields: node
|
63
63
|
each_recursive {|node|
|
64
64
|
return node if block.call(node)
|
65
65
|
}
|
@@ -71,5 +71,5 @@ module REXML
|
|
71
71
|
def index_in_parent
|
72
72
|
parent.index(self)+1
|
73
73
|
end
|
74
|
-
|
74
|
+
end
|
75
75
|
end
|
data/lib/rexml/output.rb
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
require 'rexml/encoding'
|
2
2
|
|
3
3
|
module REXML
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
class Output
|
5
|
+
include Encoding
|
6
|
+
|
7
7
|
attr_reader :encoding
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def initialize real_IO, encd="iso-8859-1"
|
10
|
+
@output = real_IO
|
11
|
+
self.encoding = encd
|
12
|
+
|
13
|
+
@to_utf = encoding != 'UTF-8'
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
if encoding == "UTF-16"
|
16
|
+
@output << "\ufeff".encode("UTF-16BE")
|
17
|
+
self.encoding = "UTF-16BE"
|
18
|
+
end
|
19
|
+
end
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
|
21
|
+
def <<( content )
|
22
|
+
@output << (@to_utf ? self.encode(content) : content)
|
23
|
+
end
|
19
24
|
|
20
25
|
def to_s
|
21
26
|
"Output[#{encoding}]"
|
22
27
|
end
|
23
|
-
|
28
|
+
end
|
24
29
|
end
|
data/lib/rexml/parent.rb
CHANGED
@@ -6,14 +6,14 @@ module REXML
|
|
6
6
|
# object.
|
7
7
|
class Parent < Child
|
8
8
|
include Enumerable
|
9
|
-
|
9
|
+
|
10
10
|
# Constructor
|
11
11
|
# @param parent if supplied, will be set as the parent of this object
|
12
12
|
def initialize parent=nil
|
13
13
|
super(parent)
|
14
14
|
@children = []
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
def add( object )
|
18
18
|
#puts "PARENT GOTS #{size} CHILDREN"
|
19
19
|
object.parent = self
|
@@ -21,47 +21,48 @@ module REXML
|
|
21
21
|
#puts "PARENT NOW GOTS #{size} CHILDREN"
|
22
22
|
object
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
alias :push :add
|
26
26
|
alias :<< :push
|
27
|
-
|
27
|
+
|
28
28
|
def unshift( object )
|
29
29
|
object.parent = self
|
30
30
|
@children.unshift object
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def delete( object )
|
34
34
|
found = false
|
35
35
|
@children.delete_if {|c| c.equal?(object) and found = true }
|
36
36
|
object.parent = nil if found
|
37
|
+
found ? object : nil
|
37
38
|
end
|
38
|
-
|
39
|
+
|
39
40
|
def each(&block)
|
40
41
|
@children.each(&block)
|
41
42
|
end
|
42
|
-
|
43
|
+
|
43
44
|
def delete_if( &block )
|
44
45
|
@children.delete_if(&block)
|
45
46
|
end
|
46
|
-
|
47
|
+
|
47
48
|
def delete_at( index )
|
48
49
|
@children.delete_at index
|
49
50
|
end
|
50
|
-
|
51
|
+
|
51
52
|
def each_index( &block )
|
52
53
|
@children.each_index(&block)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
# Fetches a child at a given index
|
56
57
|
# @param index the Integer index of the child to fetch
|
57
58
|
def []( index )
|
58
59
|
@children[index]
|
59
60
|
end
|
60
|
-
|
61
|
+
|
61
62
|
alias :each_child :each
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
|
64
|
+
|
65
|
+
|
65
66
|
# Set an index entry. See Array.[]=
|
66
67
|
# @param index the index of the element to set
|
67
68
|
# @param opt either the object to set, or an Integer length
|
@@ -71,7 +72,7 @@ module REXML
|
|
71
72
|
args[-1].parent = self
|
72
73
|
@children[*args[0..-2]] = args[-1]
|
73
74
|
end
|
74
|
-
|
75
|
+
|
75
76
|
# Inserts an child before another child
|
76
77
|
# @param child1 this is either an xpath or an Element. If an Element,
|
77
78
|
# child2 will be inserted before child1 in the child list of the parent.
|
@@ -91,7 +92,7 @@ module REXML
|
|
91
92
|
end
|
92
93
|
self
|
93
94
|
end
|
94
|
-
|
95
|
+
|
95
96
|
# Inserts an child after another child
|
96
97
|
# @param child1 this is either an xpath or an Element. If an Element,
|
97
98
|
# child2 will be inserted after child1 in the child list of the parent.
|
@@ -111,11 +112,11 @@ module REXML
|
|
111
112
|
end
|
112
113
|
self
|
113
114
|
end
|
114
|
-
|
115
|
+
|
115
116
|
def to_a
|
116
117
|
@children.dup
|
117
118
|
end
|
118
|
-
|
119
|
+
|
119
120
|
# Fetches the index of a given child
|
120
121
|
# @param child the child to get the index of
|
121
122
|
# @return the index of the child, or nil if the object is not a child
|
@@ -125,24 +126,24 @@ module REXML
|
|
125
126
|
@children.find { |i| count += 1 ; i.hash == child.hash }
|
126
127
|
count
|
127
128
|
end
|
128
|
-
|
129
|
+
|
129
130
|
# @return the number of children of this parent
|
130
131
|
def size
|
131
132
|
@children.size
|
132
133
|
end
|
133
|
-
|
134
|
+
|
134
135
|
alias :length :size
|
135
|
-
|
136
|
+
|
136
137
|
# Replaces one child with another, making sure the nodelist is correct
|
137
138
|
# @param to_replace the child to replace (must be a Child)
|
138
|
-
# @param replacement the child to insert into the nodelist (must be a
|
139
|
+
# @param replacement the child to insert into the nodelist (must be a
|
139
140
|
# Child)
|
140
141
|
def replace_child( to_replace, replacement )
|
141
142
|
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
|
142
143
|
to_replace.parent = nil
|
143
144
|
replacement.parent = self
|
144
145
|
end
|
145
|
-
|
146
|
+
|
146
147
|
# Deeply clones this object. This creates a complete duplicate of this
|
147
148
|
# Parent, including all descendants.
|
148
149
|
def deep_clone
|
@@ -156,9 +157,9 @@ module REXML
|
|
156
157
|
end
|
157
158
|
cl
|
158
159
|
end
|
159
|
-
|
160
|
+
|
160
161
|
alias :children :to_a
|
161
|
-
|
162
|
+
|
162
163
|
def parent?
|
163
164
|
true
|
164
165
|
end
|
data/lib/rexml/parseexception.rb
CHANGED
@@ -28,9 +28,9 @@ module REXML
|
|
28
28
|
err << "\nLine: #{line}\n"
|
29
29
|
err << "Position: #{position}\n"
|
30
30
|
err << "Last 80 unconsumed characters:\n"
|
31
|
-
err << @source.buffer[0..80].gsub(/\n/, ' ')
|
31
|
+
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
err
|
35
35
|
end
|
36
36
|
|
@@ -40,12 +40,12 @@ module REXML
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def line
|
43
|
-
@source.current_line[2] if @source and defined? @source.current_line and
|
43
|
+
@source.current_line[2] if @source and defined? @source.current_line and
|
44
44
|
@source.current_line
|
45
45
|
end
|
46
46
|
|
47
47
|
def context
|
48
48
|
@source.current_line
|
49
49
|
end
|
50
|
-
end
|
50
|
+
end
|
51
51
|
end
|
@@ -25,24 +25,31 @@ module REXML
|
|
25
25
|
#
|
26
26
|
# Nat Price gave me some good ideas for the API.
|
27
27
|
class BaseParser
|
28
|
-
|
28
|
+
LETTER = '[:alpha:]'
|
29
|
+
DIGIT = '[:digit:]'
|
30
|
+
|
31
|
+
COMBININGCHAR = '' # TODO
|
32
|
+
EXTENDER = '' # TODO
|
33
|
+
|
34
|
+
NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
|
29
35
|
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
30
36
|
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
31
37
|
|
32
|
-
NAMECHAR = '[\-\w
|
38
|
+
NAMECHAR = '[\-\w\.:]'
|
33
39
|
NAME = "([\\w:]#{NAMECHAR}*)"
|
34
40
|
NMTOKEN = "(?:#{NAMECHAR})+"
|
35
41
|
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
36
|
-
REFERENCE = "(
|
42
|
+
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
37
43
|
REFERENCE_RE = /#{REFERENCE}/
|
38
44
|
|
39
45
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
46
|
+
DOCTYPE_END = /\A\s*\]\s*>/um
|
40
47
|
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
41
48
|
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
|
42
49
|
COMMENT_START = /\A<!--/u
|
43
50
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
44
51
|
CDATA_START = /\A<!\[CDATA\[/u
|
45
|
-
CDATA_END =
|
52
|
+
CDATA_END = /\A\s*\]\s*>/um
|
46
53
|
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
47
54
|
XMLDECL_START = /\A<\?xml\s/u;
|
48
55
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
@@ -53,13 +60,13 @@ module REXML
|
|
53
60
|
|
54
61
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
55
62
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
56
|
-
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
63
|
+
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
57
64
|
|
58
|
-
ENTITY_START =
|
65
|
+
ENTITY_START = /\A\s*<!ENTITY/
|
59
66
|
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
60
|
-
ELEMENTDECL_START =
|
61
|
-
ELEMENTDECL_PATTERN =
|
62
|
-
SYSTEMENTITY =
|
67
|
+
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
68
|
+
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
69
|
+
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
63
70
|
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
64
71
|
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
65
72
|
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
@@ -68,11 +75,11 @@ module REXML
|
|
68
75
|
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
69
76
|
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
70
77
|
ATTDEF_RE = /#{ATTDEF}/
|
71
|
-
ATTLISTDECL_START =
|
72
|
-
ATTLISTDECL_PATTERN =
|
73
|
-
NOTATIONDECL_START =
|
74
|
-
PUBLIC =
|
75
|
-
SYSTEM =
|
78
|
+
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
79
|
+
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
80
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
81
|
+
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
82
|
+
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
76
83
|
|
77
84
|
TEXT_PATTERN = /\A([^<]*)/um
|
78
85
|
|
@@ -92,11 +99,11 @@ module REXML
|
|
92
99
|
|
93
100
|
EREFERENCE = /&(?!#{NAME};)/
|
94
101
|
|
95
|
-
DEFAULT_ENTITIES = {
|
96
|
-
'gt' => [/>/, '>', '>', />/],
|
97
|
-
'lt' => [/</, '<', '<', /</],
|
98
|
-
'quot' => [/"/, '"', '"', /"/],
|
99
|
-
"apos" => [/'/, "'", "'", /'/]
|
102
|
+
DEFAULT_ENTITIES = {
|
103
|
+
'gt' => [/>/, '>', '>', />/],
|
104
|
+
'lt' => [/</, '<', '<', /</],
|
105
|
+
'quot' => [/"/, '"', '"', /"/],
|
106
|
+
"apos" => [/'/, "'", "'", /'/]
|
100
107
|
}
|
101
108
|
|
102
109
|
|
@@ -108,22 +115,10 @@ module REXML
|
|
108
115
|
|
109
116
|
def initialize( source )
|
110
117
|
self.stream = source
|
118
|
+
@listeners = []
|
111
119
|
end
|
112
120
|
|
113
121
|
def add_listener( listener )
|
114
|
-
if !defined?(@listeners) or !@listeners
|
115
|
-
@listeners = []
|
116
|
-
instance_eval <<-EOL
|
117
|
-
alias :_old_pull :pull
|
118
|
-
def pull
|
119
|
-
event = _old_pull
|
120
|
-
@listeners.each do |listener|
|
121
|
-
listener.receive event
|
122
|
-
end
|
123
|
-
event
|
124
|
-
end
|
125
|
-
EOL
|
126
|
-
end
|
127
122
|
@listeners << listener
|
128
123
|
end
|
129
124
|
|
@@ -167,9 +162,9 @@ module REXML
|
|
167
162
|
# Peek at the +depth+ event in the stack. The first element on the stack
|
168
163
|
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
169
164
|
# stream and return the last event, which is always :end_document.
|
170
|
-
# Be aware that this causes the stream to be parsed up to the +depth+
|
171
|
-
# event, so you can effectively pre-parse the entire document (pull the
|
172
|
-
# entire thing into memory) using this method.
|
165
|
+
# Be aware that this causes the stream to be parsed up to the +depth+
|
166
|
+
# event, so you can effectively pre-parse the entire document (pull the
|
167
|
+
# entire thing into memory) using this method.
|
173
168
|
def peek depth=0
|
174
169
|
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
175
170
|
temp = []
|
@@ -186,6 +181,14 @@ module REXML
|
|
186
181
|
|
187
182
|
# Returns the next event. This is a +PullEvent+ object.
|
188
183
|
def pull
|
184
|
+
pull_event.tap do |event|
|
185
|
+
@listeners.each do |listener|
|
186
|
+
listener.receive event
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def pull_event
|
189
192
|
if @closed
|
190
193
|
x, @closed = @closed, nil
|
191
194
|
return [ :end_element, x ]
|
@@ -210,7 +213,12 @@ module REXML
|
|
210
213
|
version = version[1] unless version.nil?
|
211
214
|
encoding = ENCODING.match(results)
|
212
215
|
encoding = encoding[1] unless encoding.nil?
|
213
|
-
|
216
|
+
if need_source_encoding_update?(encoding)
|
217
|
+
@source.encoding = encoding
|
218
|
+
end
|
219
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
220
|
+
encoding = "UTF-16"
|
221
|
+
end
|
214
222
|
standalone = STANDALONE.match(results)
|
215
223
|
standalone = standalone[1] unless standalone.nil?
|
216
224
|
return [ :xmldecl, version, encoding, standalone ]
|
@@ -242,12 +250,15 @@ module REXML
|
|
242
250
|
@document_status = :after_doctype
|
243
251
|
@source.read if @source.buffer.size<2
|
244
252
|
md = @source.match(/\s*/um, true)
|
253
|
+
if @source.encoding == "UTF-8"
|
254
|
+
@source.buffer.force_encoding(::Encoding::UTF_8)
|
255
|
+
end
|
245
256
|
end
|
246
257
|
end
|
247
258
|
if @document_status == :in_doctype
|
248
259
|
md = @source.match(/\s*(.*?>)/um)
|
249
260
|
case md[1]
|
250
|
-
when SYSTEMENTITY
|
261
|
+
when SYSTEMENTITY
|
251
262
|
match = @source.match( SYSTEMENTITY, true )[1]
|
252
263
|
return [ :externalentity, match ]
|
253
264
|
|
@@ -272,7 +283,8 @@ module REXML
|
|
272
283
|
# External reference
|
273
284
|
match[3] = match[3][1..-2] # PUBID
|
274
285
|
match[4] = match[4][1..-2] # HREF
|
275
|
-
|
286
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
287
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
276
288
|
else
|
277
289
|
match[2] = match[2][1..-2]
|
278
290
|
match.pop if match.size == 4
|
@@ -312,9 +324,9 @@ module REXML
|
|
312
324
|
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
313
325
|
end
|
314
326
|
return [ :notationdecl, *vals ]
|
315
|
-
when
|
327
|
+
when DOCTYPE_END
|
316
328
|
@document_status = :after_doctype
|
317
|
-
@source.match(
|
329
|
+
@source.match( DOCTYPE_END, true )
|
318
330
|
return [ :end_doctype ]
|
319
331
|
end
|
320
332
|
end
|
@@ -326,7 +338,7 @@ module REXML
|
|
326
338
|
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
327
339
|
md = @source.match( CLOSE_MATCH, true )
|
328
340
|
raise REXML::ParseException.new( "Missing end tag for "+
|
329
|
-
"'#{last_tag}' (got \"#{md[1]}\")",
|
341
|
+
"'#{last_tag}' (got \"#{md[1]}\")",
|
330
342
|
@source) unless last_tag == md[1]
|
331
343
|
return [ :end_element, last_tag ]
|
332
344
|
elsif @source.buffer[1] == ?!
|
@@ -335,6 +347,12 @@ module REXML
|
|
335
347
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
336
348
|
if md[0][2] == ?-
|
337
349
|
md = @source.match( COMMENT_PATTERN, true )
|
350
|
+
|
351
|
+
case md[1]
|
352
|
+
when /--/, /-\z/
|
353
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
354
|
+
end
|
355
|
+
|
338
356
|
return [ :comment, md[1] ] if md
|
339
357
|
else
|
340
358
|
md = @source.match( CDATA_PATTERN, true )
|
@@ -353,7 +371,7 @@ module REXML
|
|
353
371
|
unless md
|
354
372
|
# Check for missing attribute quotes
|
355
373
|
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
356
|
-
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
374
|
+
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
357
375
|
end
|
358
376
|
attributes = {}
|
359
377
|
prefixes = Set.new
|
@@ -362,27 +380,33 @@ module REXML
|
|
362
380
|
if md[4].size > 0
|
363
381
|
attrs = md[4].scan( ATTRIBUTE_PATTERN )
|
364
382
|
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
365
|
-
attrs.each
|
366
|
-
if
|
367
|
-
if
|
368
|
-
if
|
383
|
+
attrs.each do |attr_name, prefix, local_part, quote, value|
|
384
|
+
if prefix == "xmlns"
|
385
|
+
if local_part == "xml"
|
386
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
369
387
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
370
388
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
371
389
|
raise REXML::ParseException.new( msg, @source, self )
|
372
390
|
end
|
373
|
-
elsif
|
391
|
+
elsif local_part == "xmlns"
|
374
392
|
msg = "The 'xmlns' prefix must not be declared "+
|
375
393
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
376
394
|
raise REXML::ParseException.new( msg, @source, self)
|
377
395
|
end
|
378
|
-
curr_ns <<
|
379
|
-
elsif
|
380
|
-
prefixes <<
|
396
|
+
curr_ns << local_part
|
397
|
+
elsif prefix
|
398
|
+
prefixes << prefix unless prefix == "xml"
|
399
|
+
end
|
400
|
+
|
401
|
+
if attributes.has_key?(attr_name)
|
402
|
+
msg = "Duplicate attribute #{attr_name.inspect}"
|
403
|
+
raise REXML::ParseException.new(msg, @source, self)
|
381
404
|
end
|
382
|
-
|
383
|
-
|
405
|
+
|
406
|
+
attributes[attr_name] = value
|
407
|
+
end
|
384
408
|
end
|
385
|
-
|
409
|
+
|
386
410
|
# Verify that all of the prefixes have been defined
|
387
411
|
for prefix in prefixes
|
388
412
|
unless @nsstack.find{|k| k.member?(prefix)}
|
@@ -419,6 +443,7 @@ module REXML
|
|
419
443
|
end
|
420
444
|
return [ :dummy ]
|
421
445
|
end
|
446
|
+
private :pull_event
|
422
447
|
|
423
448
|
def entity( reference, entities )
|
424
449
|
value = nil
|
@@ -436,7 +461,7 @@ module REXML
|
|
436
461
|
# Doing it like this rather than in a loop improves the speed
|
437
462
|
copy.gsub!( EREFERENCE, '&' )
|
438
463
|
entities.each do |key, value|
|
439
|
-
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
464
|
+
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
440
465
|
entity_filter.include?(entity)
|
441
466
|
end if entities
|
442
467
|
copy.gsub!( EREFERENCE, '&' )
|
@@ -452,7 +477,7 @@ module REXML
|
|
452
477
|
rv.gsub!( /\r\n?/, "\n" )
|
453
478
|
matches = rv.scan( REFERENCE_RE )
|
454
479
|
return rv if matches.size == 0
|
455
|
-
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
480
|
+
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
456
481
|
m=$1
|
457
482
|
m = "0#{m}" if m[0] == ?x
|
458
483
|
[Integer(m)].pack('U*')
|
@@ -465,19 +490,23 @@ module REXML
|
|
465
490
|
if entity_value
|
466
491
|
re = /&#{entity_reference};/
|
467
492
|
rv.gsub!( re, entity_value )
|
493
|
+
else
|
494
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
495
|
+
rv.gsub!( er[0], er[2] ) if er
|
468
496
|
end
|
469
497
|
end
|
470
498
|
end
|
471
|
-
matches.each do |entity_reference|
|
472
|
-
unless filter and filter.include?(entity_reference)
|
473
|
-
er = DEFAULT_ENTITIES[entity_reference]
|
474
|
-
rv.gsub!( er[0], er[2] ) if er
|
475
|
-
end
|
476
|
-
end
|
477
499
|
rv.gsub!( /&/, '&' )
|
478
500
|
end
|
479
501
|
rv
|
480
502
|
end
|
503
|
+
|
504
|
+
private
|
505
|
+
def need_source_encoding_update?(xml_declaration_encoding)
|
506
|
+
return false if xml_declaration_encoding.nil?
|
507
|
+
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
508
|
+
true
|
509
|
+
end
|
481
510
|
end
|
482
511
|
end
|
483
512
|
end
|