rubysl-rexml 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,47 +1,47 @@
1
1
  require 'rexml/xmltokens'
2
2
 
3
3
  module REXML
4
- # Adds named attributes to an object.
5
- module Namespace
6
- # The name of the object, valid if set
7
- attr_reader :name, :expanded_name
8
- # The expanded name of the object, valid if name is set
9
- attr_accessor :prefix
10
- include XMLTokens
11
- NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
4
+ # Adds named attributes to an object.
5
+ module Namespace
6
+ # The name of the object, valid if set
7
+ attr_reader :name, :expanded_name
8
+ # The expanded name of the object, valid if name is set
9
+ attr_accessor :prefix
10
+ include XMLTokens
11
+ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
12
12
 
13
- # Sets the name and the expanded name
14
- def name=( name )
15
- @expanded_name = name
16
- name =~ NAMESPLIT
17
- if $1
18
- @prefix = $1
19
- else
20
- @prefix = ""
21
- @namespace = ""
22
- end
23
- @name = $2
24
- end
13
+ # Sets the name and the expanded name
14
+ def name=( name )
15
+ @expanded_name = name
16
+ name =~ NAMESPLIT
17
+ if $1
18
+ @prefix = $1
19
+ else
20
+ @prefix = ""
21
+ @namespace = ""
22
+ end
23
+ @name = $2
24
+ end
25
25
 
26
- # Compares names optionally WITH namespaces
27
- def has_name?( other, ns=nil )
28
- if ns
29
- return (namespace() == ns and name() == other)
30
- elsif other.include? ":"
31
- return fully_expanded_name == other
32
- else
33
- return name == other
34
- end
35
- end
26
+ # Compares names optionally WITH namespaces
27
+ def has_name?( other, ns=nil )
28
+ if ns
29
+ return (namespace() == ns and name() == other)
30
+ elsif other.include? ":"
31
+ return fully_expanded_name == other
32
+ else
33
+ return name == other
34
+ end
35
+ end
36
36
 
37
- alias :local_name :name
37
+ alias :local_name :name
38
38
 
39
- # Fully expand the name, even if the prefix wasn't specified in the
40
- # source file.
41
- def fully_expanded_name
42
- ns = prefix
43
- return "#{ns}:#@name" if ns.size > 0
44
- return @name
45
- end
46
- end
39
+ # Fully expand the name, even if the prefix wasn't specified in the
40
+ # source file.
41
+ def fully_expanded_name
42
+ ns = prefix
43
+ return "#{ns}:#@name" if ns.size > 0
44
+ return @name
45
+ end
46
+ end
47
47
  end
data/lib/rexml/node.rb CHANGED
@@ -3,27 +3,27 @@ require "rexml/formatters/pretty"
3
3
  require "rexml/formatters/default"
4
4
 
5
5
  module REXML
6
- # Represents a node in the tree. Nodes are never encountered except as
7
- # superclasses of other objects. Nodes have siblings.
8
- module Node
9
- # @return the next sibling (nil if unset)
10
- def next_sibling_node
11
- return nil if @parent.nil?
12
- @parent[ @parent.index(self) + 1 ]
13
- end
6
+ # Represents a node in the tree. Nodes are never encountered except as
7
+ # superclasses of other objects. Nodes have siblings.
8
+ module Node
9
+ # @return the next sibling (nil if unset)
10
+ def next_sibling_node
11
+ return nil if @parent.nil?
12
+ @parent[ @parent.index(self) + 1 ]
13
+ end
14
14
 
15
- # @return the previous sibling (nil if unset)
16
- def previous_sibling_node
17
- return nil if @parent.nil?
18
- ind = @parent.index(self)
19
- return nil if ind == 0
20
- @parent[ ind - 1 ]
21
- end
15
+ # @return the previous sibling (nil if unset)
16
+ def previous_sibling_node
17
+ return nil if @parent.nil?
18
+ ind = @parent.index(self)
19
+ return nil if ind == 0
20
+ @parent[ ind - 1 ]
21
+ end
22
22
 
23
23
  # indent::
24
24
  # *DEPRECATED* This parameter is now ignored. See the formatters in the
25
25
  # REXML::Formatters package for changing the output style.
26
- def to_s indent=nil
26
+ def to_s indent=nil
27
27
  unless indent.nil?
28
28
  Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
29
29
  f = REXML::Formatters::Pretty.new( indent )
@@ -33,33 +33,33 @@ module REXML
33
33
  f.write( self, rv = "" )
34
34
  end
35
35
  return rv
36
- end
36
+ end
37
37
 
38
- def indent to, ind
39
- if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
40
- indentstyle = @parent.context[:indentstyle]
41
- else
42
- indentstyle = ' '
43
- end
44
- to << indentstyle*ind unless ind<1
45
- end
38
+ def indent to, ind
39
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
40
+ indentstyle = @parent.context[:indentstyle]
41
+ else
42
+ indentstyle = ' '
43
+ end
44
+ to << indentstyle*ind unless ind<1
45
+ end
46
46
 
47
- def parent?
48
- false;
49
- end
47
+ def parent?
48
+ false;
49
+ end
50
50
 
51
51
 
52
- # Visit all subnodes of +self+ recursively
53
- def each_recursive(&block) # :yields: node
54
- self.elements.each {|node|
55
- block.call(node)
56
- node.each_recursive(&block)
57
- }
58
- end
52
+ # Visit all subnodes of +self+ recursively
53
+ def each_recursive(&block) # :yields: node
54
+ self.elements.each {|node|
55
+ block.call(node)
56
+ node.each_recursive(&block)
57
+ }
58
+ end
59
59
 
60
- # Find (and return) first subnode (recursively) for which the block
60
+ # Find (and return) first subnode (recursively) for which the block
61
61
  # evaluates to true. Returns +nil+ if none was found.
62
- def find_first_recursive(&block) # :yields: node
62
+ def find_first_recursive(&block) # :yields: node
63
63
  each_recursive {|node|
64
64
  return node if block.call(node)
65
65
  }
@@ -71,5 +71,5 @@ module REXML
71
71
  def index_in_parent
72
72
  parent.index(self)+1
73
73
  end
74
- end
74
+ end
75
75
  end
data/lib/rexml/output.rb CHANGED
@@ -1,24 +1,29 @@
1
1
  require 'rexml/encoding'
2
2
 
3
3
  module REXML
4
- class Output
5
- include Encoding
6
-
4
+ class Output
5
+ include Encoding
6
+
7
7
  attr_reader :encoding
8
8
 
9
- def initialize real_IO, encd="iso-8859-1"
10
- @output = real_IO
11
- self.encoding = encd
9
+ def initialize real_IO, encd="iso-8859-1"
10
+ @output = real_IO
11
+ self.encoding = encd
12
+
13
+ @to_utf = encoding != 'UTF-8'
12
14
 
13
- @to_utf = encd == UTF_8 ? false : true
14
- end
15
+ if encoding == "UTF-16"
16
+ @output << "\ufeff".encode("UTF-16BE")
17
+ self.encoding = "UTF-16BE"
18
+ end
19
+ end
15
20
 
16
- def <<( content )
17
- @output << (@to_utf ? self.encode(content) : content)
18
- end
21
+ def <<( content )
22
+ @output << (@to_utf ? self.encode(content) : content)
23
+ end
19
24
 
20
25
  def to_s
21
26
  "Output[#{encoding}]"
22
27
  end
23
- end
28
+ end
24
29
  end
data/lib/rexml/parent.rb CHANGED
@@ -6,14 +6,14 @@ module REXML
6
6
  # object.
7
7
  class Parent < Child
8
8
  include Enumerable
9
-
9
+
10
10
  # Constructor
11
11
  # @param parent if supplied, will be set as the parent of this object
12
12
  def initialize parent=nil
13
13
  super(parent)
14
14
  @children = []
15
15
  end
16
-
16
+
17
17
  def add( object )
18
18
  #puts "PARENT GOTS #{size} CHILDREN"
19
19
  object.parent = self
@@ -21,47 +21,48 @@ module REXML
21
21
  #puts "PARENT NOW GOTS #{size} CHILDREN"
22
22
  object
23
23
  end
24
-
24
+
25
25
  alias :push :add
26
26
  alias :<< :push
27
-
27
+
28
28
  def unshift( object )
29
29
  object.parent = self
30
30
  @children.unshift object
31
31
  end
32
-
32
+
33
33
  def delete( object )
34
34
  found = false
35
35
  @children.delete_if {|c| c.equal?(object) and found = true }
36
36
  object.parent = nil if found
37
+ found ? object : nil
37
38
  end
38
-
39
+
39
40
  def each(&block)
40
41
  @children.each(&block)
41
42
  end
42
-
43
+
43
44
  def delete_if( &block )
44
45
  @children.delete_if(&block)
45
46
  end
46
-
47
+
47
48
  def delete_at( index )
48
49
  @children.delete_at index
49
50
  end
50
-
51
+
51
52
  def each_index( &block )
52
53
  @children.each_index(&block)
53
54
  end
54
-
55
+
55
56
  # Fetches a child at a given index
56
57
  # @param index the Integer index of the child to fetch
57
58
  def []( index )
58
59
  @children[index]
59
60
  end
60
-
61
+
61
62
  alias :each_child :each
62
-
63
-
64
-
63
+
64
+
65
+
65
66
  # Set an index entry. See Array.[]=
66
67
  # @param index the index of the element to set
67
68
  # @param opt either the object to set, or an Integer length
@@ -71,7 +72,7 @@ module REXML
71
72
  args[-1].parent = self
72
73
  @children[*args[0..-2]] = args[-1]
73
74
  end
74
-
75
+
75
76
  # Inserts an child before another child
76
77
  # @param child1 this is either an xpath or an Element. If an Element,
77
78
  # child2 will be inserted before child1 in the child list of the parent.
@@ -91,7 +92,7 @@ module REXML
91
92
  end
92
93
  self
93
94
  end
94
-
95
+
95
96
  # Inserts an child after another child
96
97
  # @param child1 this is either an xpath or an Element. If an Element,
97
98
  # child2 will be inserted after child1 in the child list of the parent.
@@ -111,11 +112,11 @@ module REXML
111
112
  end
112
113
  self
113
114
  end
114
-
115
+
115
116
  def to_a
116
117
  @children.dup
117
118
  end
118
-
119
+
119
120
  # Fetches the index of a given child
120
121
  # @param child the child to get the index of
121
122
  # @return the index of the child, or nil if the object is not a child
@@ -125,24 +126,24 @@ module REXML
125
126
  @children.find { |i| count += 1 ; i.hash == child.hash }
126
127
  count
127
128
  end
128
-
129
+
129
130
  # @return the number of children of this parent
130
131
  def size
131
132
  @children.size
132
133
  end
133
-
134
+
134
135
  alias :length :size
135
-
136
+
136
137
  # Replaces one child with another, making sure the nodelist is correct
137
138
  # @param to_replace the child to replace (must be a Child)
138
- # @param replacement the child to insert into the nodelist (must be a
139
+ # @param replacement the child to insert into the nodelist (must be a
139
140
  # Child)
140
141
  def replace_child( to_replace, replacement )
141
142
  @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
142
143
  to_replace.parent = nil
143
144
  replacement.parent = self
144
145
  end
145
-
146
+
146
147
  # Deeply clones this object. This creates a complete duplicate of this
147
148
  # Parent, including all descendants.
148
149
  def deep_clone
@@ -156,9 +157,9 @@ module REXML
156
157
  end
157
158
  cl
158
159
  end
159
-
160
+
160
161
  alias :children :to_a
161
-
162
+
162
163
  def parent?
163
164
  true
164
165
  end
@@ -28,9 +28,9 @@ module REXML
28
28
  err << "\nLine: #{line}\n"
29
29
  err << "Position: #{position}\n"
30
30
  err << "Last 80 unconsumed characters:\n"
31
- err << @source.buffer[0..80].gsub(/\n/, ' ')
31
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
32
32
  end
33
-
33
+
34
34
  err
35
35
  end
36
36
 
@@ -40,12 +40,12 @@ module REXML
40
40
  end
41
41
 
42
42
  def line
43
- @source.current_line[2] if @source and defined? @source.current_line and
43
+ @source.current_line[2] if @source and defined? @source.current_line and
44
44
  @source.current_line
45
45
  end
46
46
 
47
47
  def context
48
48
  @source.current_line
49
49
  end
50
- end
50
+ end
51
51
  end
@@ -25,24 +25,31 @@ module REXML
25
25
  #
26
26
  # Nat Price gave me some good ideas for the API.
27
27
  class BaseParser
28
- NCNAME_STR= '[\w:][\-\w\d.]*'
28
+ LETTER = '[:alpha:]'
29
+ DIGIT = '[:digit:]'
30
+
31
+ COMBININGCHAR = '' # TODO
32
+ EXTENDER = '' # TODO
33
+
34
+ NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
29
35
  NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
30
36
  UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
31
37
 
32
- NAMECHAR = '[\-\w\d\.:]'
38
+ NAMECHAR = '[\-\w\.:]'
33
39
  NAME = "([\\w:]#{NAMECHAR}*)"
34
40
  NMTOKEN = "(?:#{NAMECHAR})+"
35
41
  NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
36
- REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
42
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
37
43
  REFERENCE_RE = /#{REFERENCE}/
38
44
 
39
45
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
46
+ DOCTYPE_END = /\A\s*\]\s*>/um
40
47
  DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
41
48
  ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
42
49
  COMMENT_START = /\A<!--/u
43
50
  COMMENT_PATTERN = /<!--(.*?)-->/um
44
51
  CDATA_START = /\A<!\[CDATA\[/u
45
- CDATA_END = /^\s*\]\s*>/um
52
+ CDATA_END = /\A\s*\]\s*>/um
46
53
  CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
47
54
  XMLDECL_START = /\A<\?xml\s/u;
48
55
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
@@ -53,13 +60,13 @@ module REXML
53
60
 
54
61
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
55
62
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
56
- STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
63
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
57
64
 
58
- ENTITY_START = /^\s*<!ENTITY/
65
+ ENTITY_START = /\A\s*<!ENTITY/
59
66
  IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
60
- ELEMENTDECL_START = /^\s*<!ELEMENT/um
61
- ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
62
- SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
67
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
68
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
69
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
63
70
  ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
64
71
  NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
65
72
  ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
@@ -68,11 +75,11 @@ module REXML
68
75
  DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
69
76
  ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
70
77
  ATTDEF_RE = /#{ATTDEF}/
71
- ATTLISTDECL_START = /^\s*<!ATTLIST/um
72
- ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
73
- NOTATIONDECL_START = /^\s*<!NOTATION/um
74
- PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
75
- SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
78
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
79
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
80
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
81
+ PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
82
+ SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
76
83
 
77
84
  TEXT_PATTERN = /\A([^<]*)/um
78
85
 
@@ -92,11 +99,11 @@ module REXML
92
99
 
93
100
  EREFERENCE = /&(?!#{NAME};)/
94
101
 
95
- DEFAULT_ENTITIES = {
96
- 'gt' => [/&gt;/, '&gt;', '>', />/],
97
- 'lt' => [/&lt;/, '&lt;', '<', /</],
98
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
99
- "apos" => [/&apos;/, "&apos;", "'", /'/]
102
+ DEFAULT_ENTITIES = {
103
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
104
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
105
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
106
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
100
107
  }
101
108
 
102
109
 
@@ -108,22 +115,10 @@ module REXML
108
115
 
109
116
  def initialize( source )
110
117
  self.stream = source
118
+ @listeners = []
111
119
  end
112
120
 
113
121
  def add_listener( listener )
114
- if !defined?(@listeners) or !@listeners
115
- @listeners = []
116
- instance_eval <<-EOL
117
- alias :_old_pull :pull
118
- def pull
119
- event = _old_pull
120
- @listeners.each do |listener|
121
- listener.receive event
122
- end
123
- event
124
- end
125
- EOL
126
- end
127
122
  @listeners << listener
128
123
  end
129
124
 
@@ -167,9 +162,9 @@ module REXML
167
162
  # Peek at the +depth+ event in the stack. The first element on the stack
168
163
  # is at depth 0. If +depth+ is -1, will parse to the end of the input
169
164
  # stream and return the last event, which is always :end_document.
170
- # Be aware that this causes the stream to be parsed up to the +depth+
171
- # event, so you can effectively pre-parse the entire document (pull the
172
- # entire thing into memory) using this method.
165
+ # Be aware that this causes the stream to be parsed up to the +depth+
166
+ # event, so you can effectively pre-parse the entire document (pull the
167
+ # entire thing into memory) using this method.
173
168
  def peek depth=0
174
169
  raise %Q[Illegal argument "#{depth}"] if depth < -1
175
170
  temp = []
@@ -186,6 +181,14 @@ module REXML
186
181
 
187
182
  # Returns the next event. This is a +PullEvent+ object.
188
183
  def pull
184
+ pull_event.tap do |event|
185
+ @listeners.each do |listener|
186
+ listener.receive event
187
+ end
188
+ end
189
+ end
190
+
191
+ def pull_event
189
192
  if @closed
190
193
  x, @closed = @closed, nil
191
194
  return [ :end_element, x ]
@@ -210,7 +213,12 @@ module REXML
210
213
  version = version[1] unless version.nil?
211
214
  encoding = ENCODING.match(results)
212
215
  encoding = encoding[1] unless encoding.nil?
213
- @source.encoding = encoding
216
+ if need_source_encoding_update?(encoding)
217
+ @source.encoding = encoding
218
+ end
219
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
220
+ encoding = "UTF-16"
221
+ end
214
222
  standalone = STANDALONE.match(results)
215
223
  standalone = standalone[1] unless standalone.nil?
216
224
  return [ :xmldecl, version, encoding, standalone ]
@@ -242,12 +250,15 @@ module REXML
242
250
  @document_status = :after_doctype
243
251
  @source.read if @source.buffer.size<2
244
252
  md = @source.match(/\s*/um, true)
253
+ if @source.encoding == "UTF-8"
254
+ @source.buffer.force_encoding(::Encoding::UTF_8)
255
+ end
245
256
  end
246
257
  end
247
258
  if @document_status == :in_doctype
248
259
  md = @source.match(/\s*(.*?>)/um)
249
260
  case md[1]
250
- when SYSTEMENTITY
261
+ when SYSTEMENTITY
251
262
  match = @source.match( SYSTEMENTITY, true )[1]
252
263
  return [ :externalentity, match ]
253
264
 
@@ -272,7 +283,8 @@ module REXML
272
283
  # External reference
273
284
  match[3] = match[3][1..-2] # PUBID
274
285
  match[4] = match[4][1..-2] # HREF
275
- # match is [ :entity, name, PUBLIC, pubid, href ]
286
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
287
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
276
288
  else
277
289
  match[2] = match[2][1..-2]
278
290
  match.pop if match.size == 4
@@ -312,9 +324,9 @@ module REXML
312
324
  raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
313
325
  end
314
326
  return [ :notationdecl, *vals ]
315
- when CDATA_END
327
+ when DOCTYPE_END
316
328
  @document_status = :after_doctype
317
- @source.match( CDATA_END, true )
329
+ @source.match( DOCTYPE_END, true )
318
330
  return [ :end_doctype ]
319
331
  end
320
332
  end
@@ -326,7 +338,7 @@ module REXML
326
338
  #md = @source.match_to_consume( '>', CLOSE_MATCH)
327
339
  md = @source.match( CLOSE_MATCH, true )
328
340
  raise REXML::ParseException.new( "Missing end tag for "+
329
- "'#{last_tag}' (got \"#{md[1]}\")",
341
+ "'#{last_tag}' (got \"#{md[1]}\")",
330
342
  @source) unless last_tag == md[1]
331
343
  return [ :end_element, last_tag ]
332
344
  elsif @source.buffer[1] == ?!
@@ -335,6 +347,12 @@ module REXML
335
347
  raise REXML::ParseException.new("Malformed node", @source) unless md
336
348
  if md[0][2] == ?-
337
349
  md = @source.match( COMMENT_PATTERN, true )
350
+
351
+ case md[1]
352
+ when /--/, /-\z/
353
+ raise REXML::ParseException.new("Malformed comment", @source)
354
+ end
355
+
338
356
  return [ :comment, md[1] ] if md
339
357
  else
340
358
  md = @source.match( CDATA_PATTERN, true )
@@ -353,7 +371,7 @@ module REXML
353
371
  unless md
354
372
  # Check for missing attribute quotes
355
373
  raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
356
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
357
375
  end
358
376
  attributes = {}
359
377
  prefixes = Set.new
@@ -362,27 +380,33 @@ module REXML
362
380
  if md[4].size > 0
363
381
  attrs = md[4].scan( ATTRIBUTE_PATTERN )
364
382
  raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
365
- attrs.each { |a,b,c,d,e|
366
- if b == "xmlns"
367
- if c == "xml"
368
- if d != "http://www.w3.org/XML/1998/namespace"
383
+ attrs.each do |attr_name, prefix, local_part, quote, value|
384
+ if prefix == "xmlns"
385
+ if local_part == "xml"
386
+ if value != "http://www.w3.org/XML/1998/namespace"
369
387
  msg = "The 'xml' prefix must not be bound to any other namespace "+
370
388
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
371
389
  raise REXML::ParseException.new( msg, @source, self )
372
390
  end
373
- elsif c == "xmlns"
391
+ elsif local_part == "xmlns"
374
392
  msg = "The 'xmlns' prefix must not be declared "+
375
393
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
376
394
  raise REXML::ParseException.new( msg, @source, self)
377
395
  end
378
- curr_ns << c
379
- elsif b
380
- prefixes << b unless b == "xml"
396
+ curr_ns << local_part
397
+ elsif prefix
398
+ prefixes << prefix unless prefix == "xml"
399
+ end
400
+
401
+ if attributes.has_key?(attr_name)
402
+ msg = "Duplicate attribute #{attr_name.inspect}"
403
+ raise REXML::ParseException.new(msg, @source, self)
381
404
  end
382
- attributes[a] = e
383
- }
405
+
406
+ attributes[attr_name] = value
407
+ end
384
408
  end
385
-
409
+
386
410
  # Verify that all of the prefixes have been defined
387
411
  for prefix in prefixes
388
412
  unless @nsstack.find{|k| k.member?(prefix)}
@@ -419,6 +443,7 @@ module REXML
419
443
  end
420
444
  return [ :dummy ]
421
445
  end
446
+ private :pull_event
422
447
 
423
448
  def entity( reference, entities )
424
449
  value = nil
@@ -436,7 +461,7 @@ module REXML
436
461
  # Doing it like this rather than in a loop improves the speed
437
462
  copy.gsub!( EREFERENCE, '&amp;' )
438
463
  entities.each do |key, value|
439
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
464
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
440
465
  entity_filter.include?(entity)
441
466
  end if entities
442
467
  copy.gsub!( EREFERENCE, '&amp;' )
@@ -452,7 +477,7 @@ module REXML
452
477
  rv.gsub!( /\r\n?/, "\n" )
453
478
  matches = rv.scan( REFERENCE_RE )
454
479
  return rv if matches.size == 0
455
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
480
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
456
481
  m=$1
457
482
  m = "0#{m}" if m[0] == ?x
458
483
  [Integer(m)].pack('U*')
@@ -465,19 +490,23 @@ module REXML
465
490
  if entity_value
466
491
  re = /&#{entity_reference};/
467
492
  rv.gsub!( re, entity_value )
493
+ else
494
+ er = DEFAULT_ENTITIES[entity_reference]
495
+ rv.gsub!( er[0], er[2] ) if er
468
496
  end
469
497
  end
470
498
  end
471
- matches.each do |entity_reference|
472
- unless filter and filter.include?(entity_reference)
473
- er = DEFAULT_ENTITIES[entity_reference]
474
- rv.gsub!( er[0], er[2] ) if er
475
- end
476
- end
477
499
  rv.gsub!( /&amp;/, '&' )
478
500
  end
479
501
  rv
480
502
  end
503
+
504
+ private
505
+ def need_source_encoding_update?(xml_declaration_encoding)
506
+ return false if xml_declaration_encoding.nil?
507
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
508
+ true
509
+ end
481
510
  end
482
511
  end
483
512
  end