rubysl-rexml 1.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/rexml.rb CHANGED
@@ -5,27 +5,26 @@
5
5
  # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
6
6
  # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
7
7
  # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
8
- # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
9
- # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
8
+ # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
9
+ # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
10
10
  # Ruby 1.8, REXML is included in the standard Ruby distribution.
11
11
  #
12
12
  # Main page:: http://www.germane-software.com/software/rexml
13
13
  # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
14
- # Version:: 3.1.7.2
15
- # Date:: 2007/275
16
- # Revision:: $Revision$
17
- #
14
+ # Date:: 2008/019
15
+ # Version:: 3.1.7.3
16
+ #
18
17
  # This API documentation can be downloaded from the REXML home page, or can
19
18
  # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
19
  #
21
20
  # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
- # or can be accessed
21
+ # or can be accessed
23
22
  # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
24
23
  module REXML
25
- COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
24
+ COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
25
+ DATE = "2008/019"
26
26
  VERSION = "3.1.7.3"
27
- DATE = "2007/275"
28
- REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip
27
+ REVISION = %w$Revision: 39528 $[1] || ''
29
28
 
30
29
  Copyright = COPYRIGHT
31
30
  Version = VERSION
@@ -1,97 +1,97 @@
1
1
  module REXML
2
- # A template for stream parser listeners.
3
- # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
- # processed; REXML doesn't yet handle doctype entity declarations, so you
5
- # have to parse them out yourself.
6
- # === Missing methods from SAX2
7
- # ignorable_whitespace
8
- # === Methods extending SAX2
9
- # +WARNING+
10
- # These methods are certainly going to change, until DTDs are fully
11
- # supported. Be aware of this.
12
- # start_document
13
- # end_document
14
- # doctype
15
- # elementdecl
16
- # attlistdecl
17
- # entitydecl
18
- # notationdecl
19
- # cdata
20
- # xmldecl
21
- # comment
22
- module SAX2Listener
23
- def start_document
24
- end
25
- def end_document
26
- end
27
- def start_prefix_mapping prefix, uri
28
- end
29
- def end_prefix_mapping prefix
30
- end
31
- def start_element uri, localname, qname, attributes
32
- end
33
- def end_element uri, localname, qname
34
- end
35
- def characters text
36
- end
37
- def processing_instruction target, data
38
- end
39
- # Handles a doctype declaration. Any attributes of the doctype which are
40
- # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
41
- # @p name the name of the doctype; EG, "me"
42
- # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
43
- # @p long_name the supplied long name, or nil. EG, "foo"
44
- # @p uri the uri of the doctype, or nil. EG, "bar"
45
- def doctype name, pub_sys, long_name, uri
46
- end
47
- # If a doctype includes an ATTLIST declaration, it will cause this
48
- # method to be called. The content is the declaration itself, unparsed.
49
- # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
50
- # attr CDATA #REQUIRED". This is the same for all of the .*decl
51
- # methods.
52
- def attlistdecl(element, pairs, contents)
53
- end
54
- # <!ELEMENT ...>
55
- def elementdecl content
56
- end
57
- # <!ENTITY ...>
58
- # The argument passed to this method is an array of the entity
59
- # declaration. It can be in a number of formats, but in general it
60
- # returns (example, result):
61
- # <!ENTITY % YN '"Yes"'>
62
- # ["%", "YN", "'\"Yes\"'", "\""]
63
- # <!ENTITY % YN 'Yes'>
64
- # ["%", "YN", "'Yes'", "s"]
65
- # <!ENTITY WhatHeSaid "He said %YN;">
66
- # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
67
- # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
- # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
- # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
70
- # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
71
- # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
72
- # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
73
- def entitydecl name, decl
74
- end
75
- # <!NOTATION ...>
76
- def notationdecl content
77
- end
78
- # Called when <![CDATA[ ... ]]> is encountered in a document.
79
- # @p content "..."
80
- def cdata content
81
- end
82
- # Called when an XML PI is encountered in the document.
83
- # EG: <?xml version="1.0" encoding="utf"?>
84
- # @p version the version attribute value. EG, "1.0"
85
- # @p encoding the encoding attribute value, or nil. EG, "utf"
86
- # @p standalone the standalone attribute value, or nil. EG, nil
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ # === Missing methods from SAX2
7
+ # ignorable_whitespace
8
+ # === Methods extending SAX2
9
+ # +WARNING+
10
+ # These methods are certainly going to change, until DTDs are fully
11
+ # supported. Be aware of this.
12
+ # start_document
13
+ # end_document
14
+ # doctype
15
+ # elementdecl
16
+ # attlistdecl
17
+ # entitydecl
18
+ # notationdecl
19
+ # cdata
20
+ # xmldecl
21
+ # comment
22
+ module SAX2Listener
23
+ def start_document
24
+ end
25
+ def end_document
26
+ end
27
+ def start_prefix_mapping prefix, uri
28
+ end
29
+ def end_prefix_mapping prefix
30
+ end
31
+ def start_element uri, localname, qname, attributes
32
+ end
33
+ def end_element uri, localname, qname
34
+ end
35
+ def characters text
36
+ end
37
+ def processing_instruction target, data
38
+ end
39
+ # Handles a doctype declaration. Any attributes of the doctype which are
40
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
41
+ # @p name the name of the doctype; EG, "me"
42
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
43
+ # @p long_name the supplied long name, or nil. EG, "foo"
44
+ # @p uri the uri of the doctype, or nil. EG, "bar"
45
+ def doctype name, pub_sys, long_name, uri
46
+ end
47
+ # If a doctype includes an ATTLIST declaration, it will cause this
48
+ # method to be called. The content is the declaration itself, unparsed.
49
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
50
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
51
+ # methods.
52
+ def attlistdecl(element, pairs, contents)
53
+ end
54
+ # <!ELEMENT ...>
55
+ def elementdecl content
56
+ end
57
+ # <!ENTITY ...>
58
+ # The argument passed to this method is an array of the entity
59
+ # declaration. It can be in a number of formats, but in general it
60
+ # returns (example, result):
61
+ # <!ENTITY % YN '"Yes"'>
62
+ # ["%", "YN", "\"Yes\""]
63
+ # <!ENTITY % YN 'Yes'>
64
+ # ["%", "YN", "Yes"]
65
+ # <!ENTITY WhatHeSaid "He said %YN;">
66
+ # ["WhatHeSaid", "He said %YN;"]
67
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
69
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
70
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
71
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
72
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
73
+ def entitydecl declaration
74
+ end
75
+ # <!NOTATION ...>
76
+ def notationdecl name, public_or_system, public_id, system_id
77
+ end
78
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
79
+ # @p content "..."
80
+ def cdata content
81
+ end
82
+ # Called when an XML PI is encountered in the document.
83
+ # EG: <?xml version="1.0" encoding="utf"?>
84
+ # @p version the version attribute value. EG, "1.0"
85
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
86
+ # @p standalone the standalone attribute value, or nil. EG, nil
87
87
  # @p spaced the declaration is followed by a line break
88
- def xmldecl version, encoding, standalone
89
- end
90
- # Called when a comment is encountered.
91
- # @p comment The content of the comment
92
- def comment comment
93
- end
88
+ def xmldecl version, encoding, standalone
89
+ end
90
+ # Called when a comment is encountered.
91
+ # @p comment The content of the comment
92
+ def comment comment
93
+ end
94
94
  def progress position
95
95
  end
96
- end
96
+ end
97
97
  end
@@ -0,0 +1,27 @@
1
+ module REXML
2
+ module Security
3
+ @@entity_expansion_limit = 10_000
4
+
5
+ # Set the entity expansion limit. By default the limit is set to 10000.
6
+ def self.entity_expansion_limit=( val )
7
+ @@entity_expansion_limit = val
8
+ end
9
+
10
+ # Get the entity expansion limit. By default the limit is set to 10000.
11
+ def self.entity_expansion_limit
12
+ return @@entity_expansion_limit
13
+ end
14
+
15
+ @@entity_expansion_text_limit = 10_240
16
+
17
+ # Set the entity expansion limit. By default the limit is set to 10240.
18
+ def self.entity_expansion_text_limit=( val )
19
+ @@entity_expansion_text_limit = val
20
+ end
21
+
22
+ # Get the entity expansion limit. By default the limit is set to 10240.
23
+ def self.entity_expansion_text_limit
24
+ return @@entity_expansion_text_limit
25
+ end
26
+ end
27
+ end
data/lib/rexml/source.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # coding: US-ASCII
1
2
  require 'rexml/encoding'
2
3
 
3
4
  module REXML
@@ -7,13 +8,14 @@ module REXML
7
8
  # @param arg Either a String, or an IO
8
9
  # @return a Source, or nil if a bad argument was given
9
10
  def SourceFactory::create_from(arg)
10
- if arg.kind_of? String
11
- Source.new(arg)
12
- elsif arg.respond_to? :read and
13
- arg.respond_to? :readline and
14
- arg.respond_to? :nil? and
15
- arg.respond_to? :eof?
11
+ if arg.respond_to? :read and
12
+ arg.respond_to? :readline and
13
+ arg.respond_to? :nil? and
14
+ arg.respond_to? :eof?
16
15
  IOSource.new(arg)
16
+ elsif arg.respond_to? :to_str
17
+ require 'stringio'
18
+ IOSource.new(StringIO.new(arg))
17
19
  elsif arg.kind_of? Source
18
20
  arg
19
21
  else
@@ -42,7 +44,7 @@ module REXML
42
44
  if encoding
43
45
  self.encoding = encoding
44
46
  else
45
- self.encoding = check_encoding( @buffer )
47
+ detect_encoding
46
48
  end
47
49
  @line = 0
48
50
  end
@@ -52,22 +54,16 @@ module REXML
52
54
  # Overridden to support optimized en/decoding
53
55
  def encoding=(enc)
54
56
  return unless super
55
- @line_break = encode( '>' )
56
- if enc != UTF_8
57
- @buffer = decode(@buffer)
58
- @to_utf = true
59
- else
60
- @to_utf = false
61
- end
57
+ encoding_updated
62
58
  end
63
59
 
64
60
  # Scans the source for a given pattern. Note, that this is not your
65
61
  # usual scan() method. For one thing, the pattern argument has some
66
62
  # requirements; for another, the source can be consumed. You can easily
67
63
  # confuse this method. Originally, the patterns were easier
68
- # to construct and this method more robust, because this method
69
- # generated search regexes on the fly; however, this was
70
- # computationally expensive and slowed down the entire REXML package
64
+ # to construct and this method more robust, because this method
65
+ # generated search regexes on the fly; however, this was
66
+ # computationally expensive and slowed down the entire REXML package
71
67
  # considerably, since this is by far the most commonly called method.
72
68
  # @param pattern must be a Regexp, and must be in the form of
73
69
  # /^\s*(#{your pattern, with no groups})(.*)/. The first group
@@ -123,6 +119,38 @@ module REXML
123
119
  res = res[-1] if res.kind_of? Array
124
120
  lines.index( res ) if res
125
121
  end
122
+
123
+ private
124
+ def detect_encoding
125
+ buffer_encoding = @buffer.encoding
126
+ detected_encoding = "UTF-8"
127
+ begin
128
+ @buffer.force_encoding("ASCII-8BIT")
129
+ if @buffer[0, 2] == "\xfe\xff"
130
+ @buffer[0, 2] = ""
131
+ detected_encoding = "UTF-16BE"
132
+ elsif @buffer[0, 2] == "\xff\xfe"
133
+ @buffer[0, 2] = ""
134
+ detected_encoding = "UTF-16LE"
135
+ elsif @buffer[0, 3] == "\xef\xbb\xbf"
136
+ @buffer[0, 3] = ""
137
+ detected_encoding = "UTF-8"
138
+ end
139
+ ensure
140
+ @buffer.force_encoding(buffer_encoding)
141
+ end
142
+ self.encoding = detected_encoding
143
+ end
144
+
145
+ def encoding_updated
146
+ if @encoding != 'UTF-8'
147
+ @buffer = decode(@buffer)
148
+ @to_utf = true
149
+ else
150
+ @to_utf = false
151
+ @buffer.force_encoding ::Encoding::UTF_8
152
+ end
153
+ end
126
154
  end
127
155
 
128
156
  # A Source that wraps an IO. See the Source class for method
@@ -134,30 +162,22 @@ module REXML
134
162
  def initialize(arg, block_size=500, encoding=nil)
135
163
  @er_source = @source = arg
136
164
  @to_utf = false
165
+ @pending_buffer = nil
137
166
 
138
- # Determining the encoding is a deceptively difficult issue to resolve.
139
- # First, we check the first two bytes for UTF-16. Then we
140
- # assume that the encoding is at least ASCII enough for the '>', and
141
- # we read until we get one of those. This gives us the XML declaration,
142
- # if there is one. If there isn't one, the file MUST be UTF-8, as per
143
- # the XML spec. If there is one, we can determine the encoding from
144
- # it.
145
- @buffer = ""
146
- str = @source.read( 2 )
147
167
  if encoding
148
- self.encoding = encoding
149
- elsif 0xfe == str[0] && 0xff == str[1]
150
- @line_break = "\000>"
151
- elsif 0xff == str[0] && 0xfe == str[1]
152
- @line_break = ">\000"
153
- elsif 0xef == str[0] && 0xbb == str[1]
154
- str += @source.read(1)
155
- str = '' if (0xbf == str[2])
156
- @line_break = ">"
168
+ super("", encoding)
169
+ else
170
+ super(@source.read(3) || "")
171
+ end
172
+
173
+ if !@to_utf and
174
+ @buffer.respond_to?(:force_encoding) and
175
+ @source.respond_to?(:external_encoding) and
176
+ @source.external_encoding != ::Encoding::UTF_8
177
+ @force_utf8 = true
157
178
  else
158
- @line_break = ">"
179
+ @force_utf8 = false
159
180
  end
160
- super str+@source.readline( @line_break )
161
181
  end
162
182
 
163
183
  def scan(pattern, cons=false)
@@ -170,11 +190,7 @@ module REXML
170
190
  if rv.size == 0
171
191
  until @buffer =~ pattern or @source.nil?
172
192
  begin
173
- # READLINE OPT
174
- #str = @source.read(@block_size)
175
- str = @source.readline(@line_break)
176
- str = decode(str) if @to_utf and str
177
- @buffer << str
193
+ @buffer << readline
178
194
  rescue Iconv::IllegalSequence
179
195
  raise
180
196
  rescue
@@ -189,9 +205,7 @@ module REXML
189
205
 
190
206
  def read
191
207
  begin
192
- str = @source.readline(@line_break)
193
- str = decode(str) if @to_utf and str
194
- @buffer << str
208
+ @buffer << readline
195
209
  rescue Exception, NameError
196
210
  @source = nil
197
211
  end
@@ -206,9 +220,7 @@ module REXML
206
220
  @buffer = $' if cons and rv
207
221
  while !rv and @source
208
222
  begin
209
- str = @source.readline(@line_break)
210
- str = decode(str) if @to_utf and str
211
- @buffer << str
223
+ @buffer << readline
212
224
  rv = pattern.match(@buffer)
213
225
  @buffer = $' if cons and rv
214
226
  rescue
@@ -218,13 +230,13 @@ module REXML
218
230
  rv.taint
219
231
  rv
220
232
  end
221
-
233
+
222
234
  def empty?
223
235
  super and ( @source.nil? || @source.eof? )
224
236
  end
225
237
 
226
238
  def position
227
- @er_source.stat.pipe? ? 0 : @er_source.pos
239
+ @er_source.pos rescue 0
228
240
  end
229
241
 
230
242
  # @return the current line in the source
@@ -247,5 +259,38 @@ module REXML
247
259
  end
248
260
  [pos, lineno, line]
249
261
  end
262
+
263
+ private
264
+ def readline
265
+ str = @source.readline(@line_break)
266
+ if @pending_buffer
267
+ if str.nil?
268
+ str = @pending_buffer
269
+ else
270
+ str = @pending_buffer + str
271
+ end
272
+ @pending_buffer = nil
273
+ end
274
+ return nil if str.nil?
275
+
276
+ if @to_utf
277
+ decode(str)
278
+ else
279
+ str.force_encoding(::Encoding::UTF_8) if @force_utf8
280
+ str
281
+ end
282
+ end
283
+
284
+ def encoding_updated
285
+ case @encoding
286
+ when "UTF-16BE", "UTF-16LE"
287
+ @source.binmode
288
+ @source.set_encoding(@encoding)
289
+ end
290
+ @line_break = encode(">")
291
+ @pending_buffer, @buffer = @buffer, ""
292
+ @pending_buffer.force_encoding(@encoding)
293
+ super
294
+ end
250
295
  end
251
296
  end