rubysl-rexml 1.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,47 +1,47 @@
1
1
  require 'rexml/xmltokens'
2
2
 
3
3
  module REXML
4
- # Adds named attributes to an object.
5
- module Namespace
6
- # The name of the object, valid if set
7
- attr_reader :name, :expanded_name
8
- # The expanded name of the object, valid if name is set
9
- attr_accessor :prefix
10
- include XMLTokens
11
- NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
4
+ # Adds named attributes to an object.
5
+ module Namespace
6
+ # The name of the object, valid if set
7
+ attr_reader :name, :expanded_name
8
+ # The expanded name of the object, valid if name is set
9
+ attr_accessor :prefix
10
+ include XMLTokens
11
+ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
12
12
 
13
- # Sets the name and the expanded name
14
- def name=( name )
15
- @expanded_name = name
16
- name =~ NAMESPLIT
17
- if $1
18
- @prefix = $1
19
- else
20
- @prefix = ""
21
- @namespace = ""
22
- end
23
- @name = $2
24
- end
13
+ # Sets the name and the expanded name
14
+ def name=( name )
15
+ @expanded_name = name
16
+ name =~ NAMESPLIT
17
+ if $1
18
+ @prefix = $1
19
+ else
20
+ @prefix = ""
21
+ @namespace = ""
22
+ end
23
+ @name = $2
24
+ end
25
25
 
26
- # Compares names optionally WITH namespaces
27
- def has_name?( other, ns=nil )
28
- if ns
29
- return (namespace() == ns and name() == other)
30
- elsif other.include? ":"
31
- return fully_expanded_name == other
32
- else
33
- return name == other
34
- end
35
- end
26
+ # Compares names optionally WITH namespaces
27
+ def has_name?( other, ns=nil )
28
+ if ns
29
+ return (namespace() == ns and name() == other)
30
+ elsif other.include? ":"
31
+ return fully_expanded_name == other
32
+ else
33
+ return name == other
34
+ end
35
+ end
36
36
 
37
- alias :local_name :name
37
+ alias :local_name :name
38
38
 
39
- # Fully expand the name, even if the prefix wasn't specified in the
40
- # source file.
41
- def fully_expanded_name
42
- ns = prefix
43
- return "#{ns}:#@name" if ns.size > 0
44
- return @name
45
- end
46
- end
39
+ # Fully expand the name, even if the prefix wasn't specified in the
40
+ # source file.
41
+ def fully_expanded_name
42
+ ns = prefix
43
+ return "#{ns}:#@name" if ns.size > 0
44
+ return @name
45
+ end
46
+ end
47
47
  end
data/lib/rexml/node.rb CHANGED
@@ -3,27 +3,27 @@ require "rexml/formatters/pretty"
3
3
  require "rexml/formatters/default"
4
4
 
5
5
  module REXML
6
- # Represents a node in the tree. Nodes are never encountered except as
7
- # superclasses of other objects. Nodes have siblings.
8
- module Node
9
- # @return the next sibling (nil if unset)
10
- def next_sibling_node
11
- return nil if @parent.nil?
12
- @parent[ @parent.index(self) + 1 ]
13
- end
6
+ # Represents a node in the tree. Nodes are never encountered except as
7
+ # superclasses of other objects. Nodes have siblings.
8
+ module Node
9
+ # @return the next sibling (nil if unset)
10
+ def next_sibling_node
11
+ return nil if @parent.nil?
12
+ @parent[ @parent.index(self) + 1 ]
13
+ end
14
14
 
15
- # @return the previous sibling (nil if unset)
16
- def previous_sibling_node
17
- return nil if @parent.nil?
18
- ind = @parent.index(self)
19
- return nil if ind == 0
20
- @parent[ ind - 1 ]
21
- end
15
+ # @return the previous sibling (nil if unset)
16
+ def previous_sibling_node
17
+ return nil if @parent.nil?
18
+ ind = @parent.index(self)
19
+ return nil if ind == 0
20
+ @parent[ ind - 1 ]
21
+ end
22
22
 
23
23
  # indent::
24
24
  # *DEPRECATED* This parameter is now ignored. See the formatters in the
25
25
  # REXML::Formatters package for changing the output style.
26
- def to_s indent=nil
26
+ def to_s indent=nil
27
27
  unless indent.nil?
28
28
  Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
29
29
  f = REXML::Formatters::Pretty.new( indent )
@@ -33,33 +33,33 @@ module REXML
33
33
  f.write( self, rv = "" )
34
34
  end
35
35
  return rv
36
- end
36
+ end
37
37
 
38
- def indent to, ind
39
- if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
40
- indentstyle = @parent.context[:indentstyle]
41
- else
42
- indentstyle = ' '
43
- end
44
- to << indentstyle*ind unless ind<1
45
- end
38
+ def indent to, ind
39
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
40
+ indentstyle = @parent.context[:indentstyle]
41
+ else
42
+ indentstyle = ' '
43
+ end
44
+ to << indentstyle*ind unless ind<1
45
+ end
46
46
 
47
- def parent?
48
- false;
49
- end
47
+ def parent?
48
+ false;
49
+ end
50
50
 
51
51
 
52
- # Visit all subnodes of +self+ recursively
53
- def each_recursive(&block) # :yields: node
54
- self.elements.each {|node|
55
- block.call(node)
56
- node.each_recursive(&block)
57
- }
58
- end
52
+ # Visit all subnodes of +self+ recursively
53
+ def each_recursive(&block) # :yields: node
54
+ self.elements.each {|node|
55
+ block.call(node)
56
+ node.each_recursive(&block)
57
+ }
58
+ end
59
59
 
60
- # Find (and return) first subnode (recursively) for which the block
60
+ # Find (and return) first subnode (recursively) for which the block
61
61
  # evaluates to true. Returns +nil+ if none was found.
62
- def find_first_recursive(&block) # :yields: node
62
+ def find_first_recursive(&block) # :yields: node
63
63
  each_recursive {|node|
64
64
  return node if block.call(node)
65
65
  }
@@ -71,5 +71,5 @@ module REXML
71
71
  def index_in_parent
72
72
  parent.index(self)+1
73
73
  end
74
- end
74
+ end
75
75
  end
data/lib/rexml/output.rb CHANGED
@@ -1,24 +1,29 @@
1
1
  require 'rexml/encoding'
2
2
 
3
3
  module REXML
4
- class Output
5
- include Encoding
6
-
4
+ class Output
5
+ include Encoding
6
+
7
7
  attr_reader :encoding
8
8
 
9
- def initialize real_IO, encd="iso-8859-1"
10
- @output = real_IO
11
- self.encoding = encd
9
+ def initialize real_IO, encd="iso-8859-1"
10
+ @output = real_IO
11
+ self.encoding = encd
12
+
13
+ @to_utf = encoding != 'UTF-8'
12
14
 
13
- @to_utf = encd == UTF_8 ? false : true
14
- end
15
+ if encoding == "UTF-16"
16
+ @output << "\ufeff".encode("UTF-16BE")
17
+ self.encoding = "UTF-16BE"
18
+ end
19
+ end
15
20
 
16
- def <<( content )
17
- @output << (@to_utf ? self.encode(content) : content)
18
- end
21
+ def <<( content )
22
+ @output << (@to_utf ? self.encode(content) : content)
23
+ end
19
24
 
20
25
  def to_s
21
26
  "Output[#{encoding}]"
22
27
  end
23
- end
28
+ end
24
29
  end
data/lib/rexml/parent.rb CHANGED
@@ -6,14 +6,14 @@ module REXML
6
6
  # object.
7
7
  class Parent < Child
8
8
  include Enumerable
9
-
9
+
10
10
  # Constructor
11
11
  # @param parent if supplied, will be set as the parent of this object
12
12
  def initialize parent=nil
13
13
  super(parent)
14
14
  @children = []
15
15
  end
16
-
16
+
17
17
  def add( object )
18
18
  #puts "PARENT GOTS #{size} CHILDREN"
19
19
  object.parent = self
@@ -21,47 +21,48 @@ module REXML
21
21
  #puts "PARENT NOW GOTS #{size} CHILDREN"
22
22
  object
23
23
  end
24
-
24
+
25
25
  alias :push :add
26
26
  alias :<< :push
27
-
27
+
28
28
  def unshift( object )
29
29
  object.parent = self
30
30
  @children.unshift object
31
31
  end
32
-
32
+
33
33
  def delete( object )
34
34
  found = false
35
35
  @children.delete_if {|c| c.equal?(object) and found = true }
36
36
  object.parent = nil if found
37
+ found ? object : nil
37
38
  end
38
-
39
+
39
40
  def each(&block)
40
41
  @children.each(&block)
41
42
  end
42
-
43
+
43
44
  def delete_if( &block )
44
45
  @children.delete_if(&block)
45
46
  end
46
-
47
+
47
48
  def delete_at( index )
48
49
  @children.delete_at index
49
50
  end
50
-
51
+
51
52
  def each_index( &block )
52
53
  @children.each_index(&block)
53
54
  end
54
-
55
+
55
56
  # Fetches a child at a given index
56
57
  # @param index the Integer index of the child to fetch
57
58
  def []( index )
58
59
  @children[index]
59
60
  end
60
-
61
+
61
62
  alias :each_child :each
62
-
63
-
64
-
63
+
64
+
65
+
65
66
  # Set an index entry. See Array.[]=
66
67
  # @param index the index of the element to set
67
68
  # @param opt either the object to set, or an Integer length
@@ -71,7 +72,7 @@ module REXML
71
72
  args[-1].parent = self
72
73
  @children[*args[0..-2]] = args[-1]
73
74
  end
74
-
75
+
75
76
  # Inserts an child before another child
76
77
  # @param child1 this is either an xpath or an Element. If an Element,
77
78
  # child2 will be inserted before child1 in the child list of the parent.
@@ -91,7 +92,7 @@ module REXML
91
92
  end
92
93
  self
93
94
  end
94
-
95
+
95
96
  # Inserts an child after another child
96
97
  # @param child1 this is either an xpath or an Element. If an Element,
97
98
  # child2 will be inserted after child1 in the child list of the parent.
@@ -111,11 +112,11 @@ module REXML
111
112
  end
112
113
  self
113
114
  end
114
-
115
+
115
116
  def to_a
116
117
  @children.dup
117
118
  end
118
-
119
+
119
120
  # Fetches the index of a given child
120
121
  # @param child the child to get the index of
121
122
  # @return the index of the child, or nil if the object is not a child
@@ -125,24 +126,24 @@ module REXML
125
126
  @children.find { |i| count += 1 ; i.hash == child.hash }
126
127
  count
127
128
  end
128
-
129
+
129
130
  # @return the number of children of this parent
130
131
  def size
131
132
  @children.size
132
133
  end
133
-
134
+
134
135
  alias :length :size
135
-
136
+
136
137
  # Replaces one child with another, making sure the nodelist is correct
137
138
  # @param to_replace the child to replace (must be a Child)
138
- # @param replacement the child to insert into the nodelist (must be a
139
+ # @param replacement the child to insert into the nodelist (must be a
139
140
  # Child)
140
141
  def replace_child( to_replace, replacement )
141
142
  @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
142
143
  to_replace.parent = nil
143
144
  replacement.parent = self
144
145
  end
145
-
146
+
146
147
  # Deeply clones this object. This creates a complete duplicate of this
147
148
  # Parent, including all descendants.
148
149
  def deep_clone
@@ -156,9 +157,9 @@ module REXML
156
157
  end
157
158
  cl
158
159
  end
159
-
160
+
160
161
  alias :children :to_a
161
-
162
+
162
163
  def parent?
163
164
  true
164
165
  end
@@ -28,9 +28,9 @@ module REXML
28
28
  err << "\nLine: #{line}\n"
29
29
  err << "Position: #{position}\n"
30
30
  err << "Last 80 unconsumed characters:\n"
31
- err << @source.buffer[0..80].gsub(/\n/, ' ')
31
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
32
32
  end
33
-
33
+
34
34
  err
35
35
  end
36
36
 
@@ -40,12 +40,12 @@ module REXML
40
40
  end
41
41
 
42
42
  def line
43
- @source.current_line[2] if @source and defined? @source.current_line and
43
+ @source.current_line[2] if @source and defined? @source.current_line and
44
44
  @source.current_line
45
45
  end
46
46
 
47
47
  def context
48
48
  @source.current_line
49
49
  end
50
- end
50
+ end
51
51
  end
@@ -25,24 +25,31 @@ module REXML
25
25
  #
26
26
  # Nat Price gave me some good ideas for the API.
27
27
  class BaseParser
28
- NCNAME_STR= '[\w:][\-\w\d.]*'
28
+ LETTER = '[:alpha:]'
29
+ DIGIT = '[:digit:]'
30
+
31
+ COMBININGCHAR = '' # TODO
32
+ EXTENDER = '' # TODO
33
+
34
+ NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
29
35
  NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
30
36
  UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
31
37
 
32
- NAMECHAR = '[\-\w\d\.:]'
38
+ NAMECHAR = '[\-\w\.:]'
33
39
  NAME = "([\\w:]#{NAMECHAR}*)"
34
40
  NMTOKEN = "(?:#{NAMECHAR})+"
35
41
  NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
36
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
42
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
37
43
  REFERENCE_RE = /#{REFERENCE}/
38
44
 
39
45
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
46
+ DOCTYPE_END = /\A\s*\]\s*>/um
40
47
  DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
41
48
  ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
42
49
  COMMENT_START = /\A<!--/u
43
50
  COMMENT_PATTERN = /<!--(.*?)-->/um
44
51
  CDATA_START = /\A<!\[CDATA\[/u
45
- CDATA_END = /^\s*\]\s*>/um
52
+ CDATA_END = /\A\s*\]\s*>/um
46
53
  CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
47
54
  XMLDECL_START = /\A<\?xml\s/u;
48
55
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
@@ -53,13 +60,13 @@ module REXML
53
60
 
54
61
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
55
62
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
56
- STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
63
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
57
64
 
58
- ENTITY_START = /^\s*<!ENTITY/
65
+ ENTITY_START = /\A\s*<!ENTITY/
59
66
  IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
60
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
61
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
62
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
67
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
68
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
69
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
63
70
  ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
64
71
  NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
65
72
  ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
@@ -68,11 +75,11 @@ module REXML
68
75
  DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
69
76
  ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
70
77
  ATTDEF_RE = /#{ATTDEF}/
71
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
72
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
73
- NOTATIONDECL_START = /^\s*<!NOTATION/um
74
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
75
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
78
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
79
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
80
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
81
+ PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
82
+ SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
76
83
 
77
84
  TEXT_PATTERN = /\A([^<]*)/um
78
85
 
@@ -92,11 +99,11 @@ module REXML
92
99
 
93
100
  EREFERENCE = /&(?!#{NAME};)/
94
101
 
95
- DEFAULT_ENTITIES = {
96
- 'gt' => [/&gt;/, '&gt;', '>', />/],
97
- 'lt' => [/&lt;/, '&lt;', '<', /</],
98
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
99
- "apos" => [/&apos;/, "&apos;", "'", /'/]
102
+ DEFAULT_ENTITIES = {
103
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
104
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
105
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
106
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
100
107
  }
101
108
 
102
109
 
@@ -108,22 +115,10 @@ module REXML
108
115
 
109
116
  def initialize( source )
110
117
  self.stream = source
118
+ @listeners = []
111
119
  end
112
120
 
113
121
  def add_listener( listener )
114
- if !defined?(@listeners) or !@listeners
115
- @listeners = []
116
- instance_eval <<-EOL
117
- alias :_old_pull :pull
118
- def pull
119
- event = _old_pull
120
- @listeners.each do |listener|
121
- listener.receive event
122
- end
123
- event
124
- end
125
- EOL
126
- end
127
122
  @listeners << listener
128
123
  end
129
124
 
@@ -167,9 +162,9 @@ module REXML
167
162
  # Peek at the +depth+ event in the stack. The first element on the stack
168
163
  # is at depth 0. If +depth+ is -1, will parse to the end of the input
169
164
  # stream and return the last event, which is always :end_document.
170
- # Be aware that this causes the stream to be parsed up to the +depth+
171
- # event, so you can effectively pre-parse the entire document (pull the
172
- # entire thing into memory) using this method.
165
+ # Be aware that this causes the stream to be parsed up to the +depth+
166
+ # event, so you can effectively pre-parse the entire document (pull the
167
+ # entire thing into memory) using this method.
173
168
  def peek depth=0
174
169
  raise %Q[Illegal argument "#{depth}"] if depth < -1
175
170
  temp = []
@@ -186,6 +181,14 @@ module REXML
186
181
 
187
182
  # Returns the next event. This is a +PullEvent+ object.
188
183
  def pull
184
+ pull_event.tap do |event|
185
+ @listeners.each do |listener|
186
+ listener.receive event
187
+ end
188
+ end
189
+ end
190
+
191
+ def pull_event
189
192
  if @closed
190
193
  x, @closed = @closed, nil
191
194
  return [ :end_element, x ]
@@ -210,7 +213,12 @@ module REXML
210
213
  version = version[1] unless version.nil?
211
214
  encoding = ENCODING.match(results)
212
215
  encoding = encoding[1] unless encoding.nil?
213
- @source.encoding = encoding
216
+ if need_source_encoding_update?(encoding)
217
+ @source.encoding = encoding
218
+ end
219
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
220
+ encoding = "UTF-16"
221
+ end
214
222
  standalone = STANDALONE.match(results)
215
223
  standalone = standalone[1] unless standalone.nil?
216
224
  return [ :xmldecl, version, encoding, standalone ]
@@ -242,12 +250,15 @@ module REXML
242
250
  @document_status = :after_doctype
243
251
  @source.read if @source.buffer.size<2
244
252
  md = @source.match(/\s*/um, true)
253
+ if @source.encoding == "UTF-8"
254
+ @source.buffer.force_encoding(::Encoding::UTF_8)
255
+ end
245
256
  end
246
257
  end
247
258
  if @document_status == :in_doctype
248
259
  md = @source.match(/\s*(.*?>)/um)
249
260
  case md[1]
250
- when SYSTEMENTITY
261
+ when SYSTEMENTITY
251
262
  match = @source.match( SYSTEMENTITY, true )[1]
252
263
  return [ :externalentity, match ]
253
264
 
@@ -272,7 +283,8 @@ module REXML
272
283
  # External reference
273
284
  match[3] = match[3][1..-2] # PUBID
274
285
  match[4] = match[4][1..-2] # HREF
275
- # match is [ :entity, name, PUBLIC, pubid, href ]
286
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
287
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
276
288
  else
277
289
  match[2] = match[2][1..-2]
278
290
  match.pop if match.size == 4
@@ -312,9 +324,9 @@ module REXML
312
324
  raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
313
325
  end
314
326
  return [ :notationdecl, *vals ]
315
- when CDATA_END
327
+ when DOCTYPE_END
316
328
  @document_status = :after_doctype
317
- @source.match( CDATA_END, true )
329
+ @source.match( DOCTYPE_END, true )
318
330
  return [ :end_doctype ]
319
331
  end
320
332
  end
@@ -326,7 +338,7 @@ module REXML
326
338
  #md = @source.match_to_consume( '>', CLOSE_MATCH)
327
339
  md = @source.match( CLOSE_MATCH, true )
328
340
  raise REXML::ParseException.new( "Missing end tag for "+
329
- "'#{last_tag}' (got \"#{md[1]}\")",
341
+ "'#{last_tag}' (got \"#{md[1]}\")",
330
342
  @source) unless last_tag == md[1]
331
343
  return [ :end_element, last_tag ]
332
344
  elsif @source.buffer[1] == ?!
@@ -335,6 +347,12 @@ module REXML
335
347
  raise REXML::ParseException.new("Malformed node", @source) unless md
336
348
  if md[0][2] == ?-
337
349
  md = @source.match( COMMENT_PATTERN, true )
350
+
351
+ case md[1]
352
+ when /--/, /-\z/
353
+ raise REXML::ParseException.new("Malformed comment", @source)
354
+ end
355
+
338
356
  return [ :comment, md[1] ] if md
339
357
  else
340
358
  md = @source.match( CDATA_PATTERN, true )
@@ -353,7 +371,7 @@ module REXML
353
371
  unless md
354
372
  # Check for missing attribute quotes
355
373
  raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
356
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
357
375
  end
358
376
  attributes = {}
359
377
  prefixes = Set.new
@@ -362,27 +380,33 @@ module REXML
362
380
  if md[4].size > 0
363
381
  attrs = md[4].scan( ATTRIBUTE_PATTERN )
364
382
  raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
365
- attrs.each { |a,b,c,d,e|
366
- if b == "xmlns"
367
- if c == "xml"
368
- if d != "http://www.w3.org/XML/1998/namespace"
383
+ attrs.each do |attr_name, prefix, local_part, quote, value|
384
+ if prefix == "xmlns"
385
+ if local_part == "xml"
386
+ if value != "http://www.w3.org/XML/1998/namespace"
369
387
  msg = "The 'xml' prefix must not be bound to any other namespace "+
370
388
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
371
389
  raise REXML::ParseException.new( msg, @source, self )
372
390
  end
373
- elsif c == "xmlns"
391
+ elsif local_part == "xmlns"
374
392
  msg = "The 'xmlns' prefix must not be declared "+
375
393
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
376
394
  raise REXML::ParseException.new( msg, @source, self)
377
395
  end
378
- curr_ns << c
379
- elsif b
380
- prefixes << b unless b == "xml"
396
+ curr_ns << local_part
397
+ elsif prefix
398
+ prefixes << prefix unless prefix == "xml"
399
+ end
400
+
401
+ if attributes.has_key?(attr_name)
402
+ msg = "Duplicate attribute #{attr_name.inspect}"
403
+ raise REXML::ParseException.new(msg, @source, self)
381
404
  end
382
- attributes[a] = e
383
- }
405
+
406
+ attributes[attr_name] = value
407
+ end
384
408
  end
385
-
409
+
386
410
  # Verify that all of the prefixes have been defined
387
411
  for prefix in prefixes
388
412
  unless @nsstack.find{|k| k.member?(prefix)}
@@ -419,6 +443,7 @@ module REXML
419
443
  end
420
444
  return [ :dummy ]
421
445
  end
446
+ private :pull_event
422
447
 
423
448
  def entity( reference, entities )
424
449
  value = nil
@@ -436,7 +461,7 @@ module REXML
436
461
  # Doing it like this rather than in a loop improves the speed
437
462
  copy.gsub!( EREFERENCE, '&amp;' )
438
463
  entities.each do |key, value|
439
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
464
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
440
465
  entity_filter.include?(entity)
441
466
  end if entities
442
467
  copy.gsub!( EREFERENCE, '&amp;' )
@@ -452,7 +477,7 @@ module REXML
452
477
  rv.gsub!( /\r\n?/, "\n" )
453
478
  matches = rv.scan( REFERENCE_RE )
454
479
  return rv if matches.size == 0
455
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
480
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
456
481
  m=$1
457
482
  m = "0#{m}" if m[0] == ?x
458
483
  [Integer(m)].pack('U*')
@@ -465,19 +490,23 @@ module REXML
465
490
  if entity_value
466
491
  re = /&#{entity_reference};/
467
492
  rv.gsub!( re, entity_value )
493
+ else
494
+ er = DEFAULT_ENTITIES[entity_reference]
495
+ rv.gsub!( er[0], er[2] ) if er
468
496
  end
469
497
  end
470
498
  end
471
- matches.each do |entity_reference|
472
- unless filter and filter.include?(entity_reference)
473
- er = DEFAULT_ENTITIES[entity_reference]
474
- rv.gsub!( er[0], er[2] ) if er
475
- end
476
- end
477
499
  rv.gsub!( /&amp;/, '&' )
478
500
  end
479
501
  rv
480
502
  end
503
+
504
+ private
505
+ def need_source_encoding_update?(xml_declaration_encoding)
506
+ return false if xml_declaration_encoding.nil?
507
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
508
+ true
509
+ end
481
510
  end
482
511
  end
483
512
  end