rubysl-rexml 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/namespace.rb
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
require 'rexml/xmltokens'
|
2
2
|
|
3
3
|
module REXML
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
# Adds named attributes to an object.
|
5
|
+
module Namespace
|
6
|
+
# The name of the object, valid if set
|
7
|
+
attr_reader :name, :expanded_name
|
8
|
+
# The expanded name of the object, valid if name is set
|
9
|
+
attr_accessor :prefix
|
10
|
+
include XMLTokens
|
11
|
+
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
# Sets the name and the expanded name
|
14
|
+
def name=( name )
|
15
|
+
@expanded_name = name
|
16
|
+
name =~ NAMESPLIT
|
17
|
+
if $1
|
18
|
+
@prefix = $1
|
19
|
+
else
|
20
|
+
@prefix = ""
|
21
|
+
@namespace = ""
|
22
|
+
end
|
23
|
+
@name = $2
|
24
|
+
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
26
|
+
# Compares names optionally WITH namespaces
|
27
|
+
def has_name?( other, ns=nil )
|
28
|
+
if ns
|
29
|
+
return (namespace() == ns and name() == other)
|
30
|
+
elsif other.include? ":"
|
31
|
+
return fully_expanded_name == other
|
32
|
+
else
|
33
|
+
return name == other
|
34
|
+
end
|
35
|
+
end
|
36
36
|
|
37
|
-
|
37
|
+
alias :local_name :name
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
39
|
+
# Fully expand the name, even if the prefix wasn't specified in the
|
40
|
+
# source file.
|
41
|
+
def fully_expanded_name
|
42
|
+
ns = prefix
|
43
|
+
return "#{ns}:#@name" if ns.size > 0
|
44
|
+
return @name
|
45
|
+
end
|
46
|
+
end
|
47
47
|
end
|
data/lib/rexml/node.rb
CHANGED
@@ -3,27 +3,27 @@ require "rexml/formatters/pretty"
|
|
3
3
|
require "rexml/formatters/default"
|
4
4
|
|
5
5
|
module REXML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
# Represents a node in the tree. Nodes are never encountered except as
|
7
|
+
# superclasses of other objects. Nodes have siblings.
|
8
|
+
module Node
|
9
|
+
# @return the next sibling (nil if unset)
|
10
|
+
def next_sibling_node
|
11
|
+
return nil if @parent.nil?
|
12
|
+
@parent[ @parent.index(self) + 1 ]
|
13
|
+
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
# @return the previous sibling (nil if unset)
|
16
|
+
def previous_sibling_node
|
17
|
+
return nil if @parent.nil?
|
18
|
+
ind = @parent.index(self)
|
19
|
+
return nil if ind == 0
|
20
|
+
@parent[ ind - 1 ]
|
21
|
+
end
|
22
22
|
|
23
23
|
# indent::
|
24
24
|
# *DEPRECATED* This parameter is now ignored. See the formatters in the
|
25
25
|
# REXML::Formatters package for changing the output style.
|
26
|
-
|
26
|
+
def to_s indent=nil
|
27
27
|
unless indent.nil?
|
28
28
|
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
|
29
29
|
f = REXML::Formatters::Pretty.new( indent )
|
@@ -33,33 +33,33 @@ module REXML
|
|
33
33
|
f.write( self, rv = "" )
|
34
34
|
end
|
35
35
|
return rv
|
36
|
-
|
36
|
+
end
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
38
|
+
def indent to, ind
|
39
|
+
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
|
40
|
+
indentstyle = @parent.context[:indentstyle]
|
41
|
+
else
|
42
|
+
indentstyle = ' '
|
43
|
+
end
|
44
|
+
to << indentstyle*ind unless ind<1
|
45
|
+
end
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
def parent?
|
48
|
+
false;
|
49
|
+
end
|
50
50
|
|
51
51
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
52
|
+
# Visit all subnodes of +self+ recursively
|
53
|
+
def each_recursive(&block) # :yields: node
|
54
|
+
self.elements.each {|node|
|
55
|
+
block.call(node)
|
56
|
+
node.each_recursive(&block)
|
57
|
+
}
|
58
|
+
end
|
59
59
|
|
60
|
-
|
60
|
+
# Find (and return) first subnode (recursively) for which the block
|
61
61
|
# evaluates to true. Returns +nil+ if none was found.
|
62
|
-
|
62
|
+
def find_first_recursive(&block) # :yields: node
|
63
63
|
each_recursive {|node|
|
64
64
|
return node if block.call(node)
|
65
65
|
}
|
@@ -71,5 +71,5 @@ module REXML
|
|
71
71
|
def index_in_parent
|
72
72
|
parent.index(self)+1
|
73
73
|
end
|
74
|
-
|
74
|
+
end
|
75
75
|
end
|
data/lib/rexml/output.rb
CHANGED
@@ -1,24 +1,29 @@
|
|
1
1
|
require 'rexml/encoding'
|
2
2
|
|
3
3
|
module REXML
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
class Output
|
5
|
+
include Encoding
|
6
|
+
|
7
7
|
attr_reader :encoding
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def initialize real_IO, encd="iso-8859-1"
|
10
|
+
@output = real_IO
|
11
|
+
self.encoding = encd
|
12
|
+
|
13
|
+
@to_utf = encoding != 'UTF-8'
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
if encoding == "UTF-16"
|
16
|
+
@output << "\ufeff".encode("UTF-16BE")
|
17
|
+
self.encoding = "UTF-16BE"
|
18
|
+
end
|
19
|
+
end
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
|
21
|
+
def <<( content )
|
22
|
+
@output << (@to_utf ? self.encode(content) : content)
|
23
|
+
end
|
19
24
|
|
20
25
|
def to_s
|
21
26
|
"Output[#{encoding}]"
|
22
27
|
end
|
23
|
-
|
28
|
+
end
|
24
29
|
end
|
data/lib/rexml/parent.rb
CHANGED
@@ -6,14 +6,14 @@ module REXML
|
|
6
6
|
# object.
|
7
7
|
class Parent < Child
|
8
8
|
include Enumerable
|
9
|
-
|
9
|
+
|
10
10
|
# Constructor
|
11
11
|
# @param parent if supplied, will be set as the parent of this object
|
12
12
|
def initialize parent=nil
|
13
13
|
super(parent)
|
14
14
|
@children = []
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
def add( object )
|
18
18
|
#puts "PARENT GOTS #{size} CHILDREN"
|
19
19
|
object.parent = self
|
@@ -21,47 +21,48 @@ module REXML
|
|
21
21
|
#puts "PARENT NOW GOTS #{size} CHILDREN"
|
22
22
|
object
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
alias :push :add
|
26
26
|
alias :<< :push
|
27
|
-
|
27
|
+
|
28
28
|
def unshift( object )
|
29
29
|
object.parent = self
|
30
30
|
@children.unshift object
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def delete( object )
|
34
34
|
found = false
|
35
35
|
@children.delete_if {|c| c.equal?(object) and found = true }
|
36
36
|
object.parent = nil if found
|
37
|
+
found ? object : nil
|
37
38
|
end
|
38
|
-
|
39
|
+
|
39
40
|
def each(&block)
|
40
41
|
@children.each(&block)
|
41
42
|
end
|
42
|
-
|
43
|
+
|
43
44
|
def delete_if( &block )
|
44
45
|
@children.delete_if(&block)
|
45
46
|
end
|
46
|
-
|
47
|
+
|
47
48
|
def delete_at( index )
|
48
49
|
@children.delete_at index
|
49
50
|
end
|
50
|
-
|
51
|
+
|
51
52
|
def each_index( &block )
|
52
53
|
@children.each_index(&block)
|
53
54
|
end
|
54
|
-
|
55
|
+
|
55
56
|
# Fetches a child at a given index
|
56
57
|
# @param index the Integer index of the child to fetch
|
57
58
|
def []( index )
|
58
59
|
@children[index]
|
59
60
|
end
|
60
|
-
|
61
|
+
|
61
62
|
alias :each_child :each
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
|
64
|
+
|
65
|
+
|
65
66
|
# Set an index entry. See Array.[]=
|
66
67
|
# @param index the index of the element to set
|
67
68
|
# @param opt either the object to set, or an Integer length
|
@@ -71,7 +72,7 @@ module REXML
|
|
71
72
|
args[-1].parent = self
|
72
73
|
@children[*args[0..-2]] = args[-1]
|
73
74
|
end
|
74
|
-
|
75
|
+
|
75
76
|
# Inserts an child before another child
|
76
77
|
# @param child1 this is either an xpath or an Element. If an Element,
|
77
78
|
# child2 will be inserted before child1 in the child list of the parent.
|
@@ -91,7 +92,7 @@ module REXML
|
|
91
92
|
end
|
92
93
|
self
|
93
94
|
end
|
94
|
-
|
95
|
+
|
95
96
|
# Inserts an child after another child
|
96
97
|
# @param child1 this is either an xpath or an Element. If an Element,
|
97
98
|
# child2 will be inserted after child1 in the child list of the parent.
|
@@ -111,11 +112,11 @@ module REXML
|
|
111
112
|
end
|
112
113
|
self
|
113
114
|
end
|
114
|
-
|
115
|
+
|
115
116
|
def to_a
|
116
117
|
@children.dup
|
117
118
|
end
|
118
|
-
|
119
|
+
|
119
120
|
# Fetches the index of a given child
|
120
121
|
# @param child the child to get the index of
|
121
122
|
# @return the index of the child, or nil if the object is not a child
|
@@ -125,24 +126,24 @@ module REXML
|
|
125
126
|
@children.find { |i| count += 1 ; i.hash == child.hash }
|
126
127
|
count
|
127
128
|
end
|
128
|
-
|
129
|
+
|
129
130
|
# @return the number of children of this parent
|
130
131
|
def size
|
131
132
|
@children.size
|
132
133
|
end
|
133
|
-
|
134
|
+
|
134
135
|
alias :length :size
|
135
|
-
|
136
|
+
|
136
137
|
# Replaces one child with another, making sure the nodelist is correct
|
137
138
|
# @param to_replace the child to replace (must be a Child)
|
138
|
-
# @param replacement the child to insert into the nodelist (must be a
|
139
|
+
# @param replacement the child to insert into the nodelist (must be a
|
139
140
|
# Child)
|
140
141
|
def replace_child( to_replace, replacement )
|
141
142
|
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
|
142
143
|
to_replace.parent = nil
|
143
144
|
replacement.parent = self
|
144
145
|
end
|
145
|
-
|
146
|
+
|
146
147
|
# Deeply clones this object. This creates a complete duplicate of this
|
147
148
|
# Parent, including all descendants.
|
148
149
|
def deep_clone
|
@@ -156,9 +157,9 @@ module REXML
|
|
156
157
|
end
|
157
158
|
cl
|
158
159
|
end
|
159
|
-
|
160
|
+
|
160
161
|
alias :children :to_a
|
161
|
-
|
162
|
+
|
162
163
|
def parent?
|
163
164
|
true
|
164
165
|
end
|
data/lib/rexml/parseexception.rb
CHANGED
@@ -28,9 +28,9 @@ module REXML
|
|
28
28
|
err << "\nLine: #{line}\n"
|
29
29
|
err << "Position: #{position}\n"
|
30
30
|
err << "Last 80 unconsumed characters:\n"
|
31
|
-
err << @source.buffer[0..80].gsub(/\n/, ' ')
|
31
|
+
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
err
|
35
35
|
end
|
36
36
|
|
@@ -40,12 +40,12 @@ module REXML
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def line
|
43
|
-
@source.current_line[2] if @source and defined? @source.current_line and
|
43
|
+
@source.current_line[2] if @source and defined? @source.current_line and
|
44
44
|
@source.current_line
|
45
45
|
end
|
46
46
|
|
47
47
|
def context
|
48
48
|
@source.current_line
|
49
49
|
end
|
50
|
-
end
|
50
|
+
end
|
51
51
|
end
|
@@ -25,24 +25,31 @@ module REXML
|
|
25
25
|
#
|
26
26
|
# Nat Price gave me some good ideas for the API.
|
27
27
|
class BaseParser
|
28
|
-
|
28
|
+
LETTER = '[:alpha:]'
|
29
|
+
DIGIT = '[:digit:]'
|
30
|
+
|
31
|
+
COMBININGCHAR = '' # TODO
|
32
|
+
EXTENDER = '' # TODO
|
33
|
+
|
34
|
+
NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
|
29
35
|
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
30
36
|
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
31
37
|
|
32
|
-
NAMECHAR = '[\-\w
|
38
|
+
NAMECHAR = '[\-\w\.:]'
|
33
39
|
NAME = "([\\w:]#{NAMECHAR}*)"
|
34
40
|
NMTOKEN = "(?:#{NAMECHAR})+"
|
35
41
|
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
36
|
-
REFERENCE = "(
|
42
|
+
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
37
43
|
REFERENCE_RE = /#{REFERENCE}/
|
38
44
|
|
39
45
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
46
|
+
DOCTYPE_END = /\A\s*\]\s*>/um
|
40
47
|
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
41
48
|
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
|
42
49
|
COMMENT_START = /\A<!--/u
|
43
50
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
44
51
|
CDATA_START = /\A<!\[CDATA\[/u
|
45
|
-
CDATA_END =
|
52
|
+
CDATA_END = /\A\s*\]\s*>/um
|
46
53
|
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
47
54
|
XMLDECL_START = /\A<\?xml\s/u;
|
48
55
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
@@ -53,13 +60,13 @@ module REXML
|
|
53
60
|
|
54
61
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
55
62
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
56
|
-
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
63
|
+
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
57
64
|
|
58
|
-
ENTITY_START =
|
65
|
+
ENTITY_START = /\A\s*<!ENTITY/
|
59
66
|
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
60
|
-
ELEMENTDECL_START =
|
61
|
-
ELEMENTDECL_PATTERN =
|
62
|
-
SYSTEMENTITY =
|
67
|
+
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
68
|
+
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
69
|
+
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
63
70
|
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
64
71
|
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
65
72
|
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
@@ -68,11 +75,11 @@ module REXML
|
|
68
75
|
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
69
76
|
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
70
77
|
ATTDEF_RE = /#{ATTDEF}/
|
71
|
-
ATTLISTDECL_START =
|
72
|
-
ATTLISTDECL_PATTERN =
|
73
|
-
NOTATIONDECL_START =
|
74
|
-
PUBLIC =
|
75
|
-
SYSTEM =
|
78
|
+
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
79
|
+
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
80
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
81
|
+
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
82
|
+
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
76
83
|
|
77
84
|
TEXT_PATTERN = /\A([^<]*)/um
|
78
85
|
|
@@ -92,11 +99,11 @@ module REXML
|
|
92
99
|
|
93
100
|
EREFERENCE = /&(?!#{NAME};)/
|
94
101
|
|
95
|
-
DEFAULT_ENTITIES = {
|
96
|
-
'gt' => [/>/, '>', '>', />/],
|
97
|
-
'lt' => [/</, '<', '<', /</],
|
98
|
-
'quot' => [/"/, '"', '"', /"/],
|
99
|
-
"apos" => [/'/, "'", "'", /'/]
|
102
|
+
DEFAULT_ENTITIES = {
|
103
|
+
'gt' => [/>/, '>', '>', />/],
|
104
|
+
'lt' => [/</, '<', '<', /</],
|
105
|
+
'quot' => [/"/, '"', '"', /"/],
|
106
|
+
"apos" => [/'/, "'", "'", /'/]
|
100
107
|
}
|
101
108
|
|
102
109
|
|
@@ -108,22 +115,10 @@ module REXML
|
|
108
115
|
|
109
116
|
def initialize( source )
|
110
117
|
self.stream = source
|
118
|
+
@listeners = []
|
111
119
|
end
|
112
120
|
|
113
121
|
def add_listener( listener )
|
114
|
-
if !defined?(@listeners) or !@listeners
|
115
|
-
@listeners = []
|
116
|
-
instance_eval <<-EOL
|
117
|
-
alias :_old_pull :pull
|
118
|
-
def pull
|
119
|
-
event = _old_pull
|
120
|
-
@listeners.each do |listener|
|
121
|
-
listener.receive event
|
122
|
-
end
|
123
|
-
event
|
124
|
-
end
|
125
|
-
EOL
|
126
|
-
end
|
127
122
|
@listeners << listener
|
128
123
|
end
|
129
124
|
|
@@ -167,9 +162,9 @@ module REXML
|
|
167
162
|
# Peek at the +depth+ event in the stack. The first element on the stack
|
168
163
|
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
169
164
|
# stream and return the last event, which is always :end_document.
|
170
|
-
# Be aware that this causes the stream to be parsed up to the +depth+
|
171
|
-
# event, so you can effectively pre-parse the entire document (pull the
|
172
|
-
# entire thing into memory) using this method.
|
165
|
+
# Be aware that this causes the stream to be parsed up to the +depth+
|
166
|
+
# event, so you can effectively pre-parse the entire document (pull the
|
167
|
+
# entire thing into memory) using this method.
|
173
168
|
def peek depth=0
|
174
169
|
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
175
170
|
temp = []
|
@@ -186,6 +181,14 @@ module REXML
|
|
186
181
|
|
187
182
|
# Returns the next event. This is a +PullEvent+ object.
|
188
183
|
def pull
|
184
|
+
pull_event.tap do |event|
|
185
|
+
@listeners.each do |listener|
|
186
|
+
listener.receive event
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def pull_event
|
189
192
|
if @closed
|
190
193
|
x, @closed = @closed, nil
|
191
194
|
return [ :end_element, x ]
|
@@ -210,7 +213,12 @@ module REXML
|
|
210
213
|
version = version[1] unless version.nil?
|
211
214
|
encoding = ENCODING.match(results)
|
212
215
|
encoding = encoding[1] unless encoding.nil?
|
213
|
-
|
216
|
+
if need_source_encoding_update?(encoding)
|
217
|
+
@source.encoding = encoding
|
218
|
+
end
|
219
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
220
|
+
encoding = "UTF-16"
|
221
|
+
end
|
214
222
|
standalone = STANDALONE.match(results)
|
215
223
|
standalone = standalone[1] unless standalone.nil?
|
216
224
|
return [ :xmldecl, version, encoding, standalone ]
|
@@ -242,12 +250,15 @@ module REXML
|
|
242
250
|
@document_status = :after_doctype
|
243
251
|
@source.read if @source.buffer.size<2
|
244
252
|
md = @source.match(/\s*/um, true)
|
253
|
+
if @source.encoding == "UTF-8"
|
254
|
+
@source.buffer.force_encoding(::Encoding::UTF_8)
|
255
|
+
end
|
245
256
|
end
|
246
257
|
end
|
247
258
|
if @document_status == :in_doctype
|
248
259
|
md = @source.match(/\s*(.*?>)/um)
|
249
260
|
case md[1]
|
250
|
-
when SYSTEMENTITY
|
261
|
+
when SYSTEMENTITY
|
251
262
|
match = @source.match( SYSTEMENTITY, true )[1]
|
252
263
|
return [ :externalentity, match ]
|
253
264
|
|
@@ -272,7 +283,8 @@ module REXML
|
|
272
283
|
# External reference
|
273
284
|
match[3] = match[3][1..-2] # PUBID
|
274
285
|
match[4] = match[4][1..-2] # HREF
|
275
|
-
|
286
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
287
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
276
288
|
else
|
277
289
|
match[2] = match[2][1..-2]
|
278
290
|
match.pop if match.size == 4
|
@@ -312,9 +324,9 @@ module REXML
|
|
312
324
|
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
313
325
|
end
|
314
326
|
return [ :notationdecl, *vals ]
|
315
|
-
when
|
327
|
+
when DOCTYPE_END
|
316
328
|
@document_status = :after_doctype
|
317
|
-
@source.match(
|
329
|
+
@source.match( DOCTYPE_END, true )
|
318
330
|
return [ :end_doctype ]
|
319
331
|
end
|
320
332
|
end
|
@@ -326,7 +338,7 @@ module REXML
|
|
326
338
|
#md = @source.match_to_consume( '>', CLOSE_MATCH)
|
327
339
|
md = @source.match( CLOSE_MATCH, true )
|
328
340
|
raise REXML::ParseException.new( "Missing end tag for "+
|
329
|
-
"'#{last_tag}' (got \"#{md[1]}\")",
|
341
|
+
"'#{last_tag}' (got \"#{md[1]}\")",
|
330
342
|
@source) unless last_tag == md[1]
|
331
343
|
return [ :end_element, last_tag ]
|
332
344
|
elsif @source.buffer[1] == ?!
|
@@ -335,6 +347,12 @@ module REXML
|
|
335
347
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
336
348
|
if md[0][2] == ?-
|
337
349
|
md = @source.match( COMMENT_PATTERN, true )
|
350
|
+
|
351
|
+
case md[1]
|
352
|
+
when /--/, /-\z/
|
353
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
354
|
+
end
|
355
|
+
|
338
356
|
return [ :comment, md[1] ] if md
|
339
357
|
else
|
340
358
|
md = @source.match( CDATA_PATTERN, true )
|
@@ -353,7 +371,7 @@ module REXML
|
|
353
371
|
unless md
|
354
372
|
# Check for missing attribute quotes
|
355
373
|
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
356
|
-
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
374
|
+
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
357
375
|
end
|
358
376
|
attributes = {}
|
359
377
|
prefixes = Set.new
|
@@ -362,27 +380,33 @@ module REXML
|
|
362
380
|
if md[4].size > 0
|
363
381
|
attrs = md[4].scan( ATTRIBUTE_PATTERN )
|
364
382
|
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
365
|
-
attrs.each
|
366
|
-
if
|
367
|
-
if
|
368
|
-
if
|
383
|
+
attrs.each do |attr_name, prefix, local_part, quote, value|
|
384
|
+
if prefix == "xmlns"
|
385
|
+
if local_part == "xml"
|
386
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
369
387
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
370
388
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
371
389
|
raise REXML::ParseException.new( msg, @source, self )
|
372
390
|
end
|
373
|
-
elsif
|
391
|
+
elsif local_part == "xmlns"
|
374
392
|
msg = "The 'xmlns' prefix must not be declared "+
|
375
393
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
376
394
|
raise REXML::ParseException.new( msg, @source, self)
|
377
395
|
end
|
378
|
-
curr_ns <<
|
379
|
-
elsif
|
380
|
-
prefixes <<
|
396
|
+
curr_ns << local_part
|
397
|
+
elsif prefix
|
398
|
+
prefixes << prefix unless prefix == "xml"
|
399
|
+
end
|
400
|
+
|
401
|
+
if attributes.has_key?(attr_name)
|
402
|
+
msg = "Duplicate attribute #{attr_name.inspect}"
|
403
|
+
raise REXML::ParseException.new(msg, @source, self)
|
381
404
|
end
|
382
|
-
|
383
|
-
|
405
|
+
|
406
|
+
attributes[attr_name] = value
|
407
|
+
end
|
384
408
|
end
|
385
|
-
|
409
|
+
|
386
410
|
# Verify that all of the prefixes have been defined
|
387
411
|
for prefix in prefixes
|
388
412
|
unless @nsstack.find{|k| k.member?(prefix)}
|
@@ -419,6 +443,7 @@ module REXML
|
|
419
443
|
end
|
420
444
|
return [ :dummy ]
|
421
445
|
end
|
446
|
+
private :pull_event
|
422
447
|
|
423
448
|
def entity( reference, entities )
|
424
449
|
value = nil
|
@@ -436,7 +461,7 @@ module REXML
|
|
436
461
|
# Doing it like this rather than in a loop improves the speed
|
437
462
|
copy.gsub!( EREFERENCE, '&' )
|
438
463
|
entities.each do |key, value|
|
439
|
-
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
464
|
+
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
440
465
|
entity_filter.include?(entity)
|
441
466
|
end if entities
|
442
467
|
copy.gsub!( EREFERENCE, '&' )
|
@@ -452,7 +477,7 @@ module REXML
|
|
452
477
|
rv.gsub!( /\r\n?/, "\n" )
|
453
478
|
matches = rv.scan( REFERENCE_RE )
|
454
479
|
return rv if matches.size == 0
|
455
|
-
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
480
|
+
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
456
481
|
m=$1
|
457
482
|
m = "0#{m}" if m[0] == ?x
|
458
483
|
[Integer(m)].pack('U*')
|
@@ -465,19 +490,23 @@ module REXML
|
|
465
490
|
if entity_value
|
466
491
|
re = /&#{entity_reference};/
|
467
492
|
rv.gsub!( re, entity_value )
|
493
|
+
else
|
494
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
495
|
+
rv.gsub!( er[0], er[2] ) if er
|
468
496
|
end
|
469
497
|
end
|
470
498
|
end
|
471
|
-
matches.each do |entity_reference|
|
472
|
-
unless filter and filter.include?(entity_reference)
|
473
|
-
er = DEFAULT_ENTITIES[entity_reference]
|
474
|
-
rv.gsub!( er[0], er[2] ) if er
|
475
|
-
end
|
476
|
-
end
|
477
499
|
rv.gsub!( /&/, '&' )
|
478
500
|
end
|
479
501
|
rv
|
480
502
|
end
|
503
|
+
|
504
|
+
private
|
505
|
+
def need_source_encoding_update?(xml_declaration_encoding)
|
506
|
+
return false if xml_declaration_encoding.nil?
|
507
|
+
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
508
|
+
true
|
509
|
+
end
|
481
510
|
end
|
482
511
|
end
|
483
512
|
end
|