rexml 3.1.7.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +10 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +60 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +8 -0
  10. data/lib/rexml/attlistdecl.rb +63 -0
  11. data/lib/rexml/attribute.rb +192 -0
  12. data/lib/rexml/cdata.rb +68 -0
  13. data/lib/rexml/child.rb +97 -0
  14. data/lib/rexml/comment.rb +80 -0
  15. data/lib/rexml/doctype.rb +270 -0
  16. data/lib/rexml/document.rb +291 -0
  17. data/lib/rexml/dtd/attlistdecl.rb +11 -0
  18. data/lib/rexml/dtd/dtd.rb +47 -0
  19. data/lib/rexml/dtd/elementdecl.rb +18 -0
  20. data/lib/rexml/dtd/entitydecl.rb +57 -0
  21. data/lib/rexml/dtd/notationdecl.rb +40 -0
  22. data/lib/rexml/element.rb +1267 -0
  23. data/lib/rexml/encoding.rb +51 -0
  24. data/lib/rexml/entity.rb +171 -0
  25. data/lib/rexml/formatters/default.rb +112 -0
  26. data/lib/rexml/formatters/pretty.rb +142 -0
  27. data/lib/rexml/formatters/transitive.rb +58 -0
  28. data/lib/rexml/functions.rb +447 -0
  29. data/lib/rexml/instruction.rb +71 -0
  30. data/lib/rexml/light/node.rb +196 -0
  31. data/lib/rexml/namespace.rb +48 -0
  32. data/lib/rexml/node.rb +76 -0
  33. data/lib/rexml/output.rb +30 -0
  34. data/lib/rexml/parent.rb +166 -0
  35. data/lib/rexml/parseexception.rb +52 -0
  36. data/lib/rexml/parsers/baseparser.rb +586 -0
  37. data/lib/rexml/parsers/lightparser.rb +59 -0
  38. data/lib/rexml/parsers/pullparser.rb +197 -0
  39. data/lib/rexml/parsers/sax2parser.rb +273 -0
  40. data/lib/rexml/parsers/streamparser.rb +61 -0
  41. data/lib/rexml/parsers/treeparser.rb +101 -0
  42. data/lib/rexml/parsers/ultralightparser.rb +57 -0
  43. data/lib/rexml/parsers/xpathparser.rb +675 -0
  44. data/lib/rexml/quickpath.rb +266 -0
  45. data/lib/rexml/rexml.rb +32 -0
  46. data/lib/rexml/sax2listener.rb +98 -0
  47. data/lib/rexml/security.rb +28 -0
  48. data/lib/rexml/source.rb +298 -0
  49. data/lib/rexml/streamlistener.rb +93 -0
  50. data/lib/rexml/syncenumerator.rb +33 -0
  51. data/lib/rexml/text.rb +424 -0
  52. data/lib/rexml/undefinednamespaceexception.rb +9 -0
  53. data/lib/rexml/validation/relaxng.rb +539 -0
  54. data/lib/rexml/validation/validation.rb +144 -0
  55. data/lib/rexml/validation/validationexception.rb +10 -0
  56. data/lib/rexml/xmldecl.rb +116 -0
  57. data/lib/rexml/xmltokens.rb +85 -0
  58. data/lib/rexml/xpath.rb +81 -0
  59. data/lib/rexml/xpath_parser.rb +934 -0
  60. data/rexml.gemspec +42 -0
  61. metadata +131 -0
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: false
2
+ require_relative 'functions'
3
+ require_relative 'xmltokens'
4
+
5
+ module REXML
6
+ class QuickPath
7
+ include Functions
8
+ include XMLTokens
9
+
10
+ # A base Hash object to be used when initializing a
11
+ # default empty namespaces set.
12
+ EMPTY_HASH = {}
13
+
14
+ def QuickPath::first element, path, namespaces=EMPTY_HASH
15
+ match(element, path, namespaces)[0]
16
+ end
17
+
18
+ def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
19
+ path = "*" unless path
20
+ match(element, path, namespaces).each( &block )
21
+ end
22
+
23
+ def QuickPath::match element, path, namespaces=EMPTY_HASH
24
+ raise "nil is not a valid xpath" unless path
25
+ results = nil
26
+ Functions::namespace_context = namespaces
27
+ case path
28
+ when /^\/([^\/]|$)/u
29
+ # match on root
30
+ path = path[1..-1]
31
+ return [element.root.parent] if path == ''
32
+ results = filter([element.root], path)
33
+ when /^[-\w]*::/u
34
+ results = filter([element], path)
35
+ when /^\*/u
36
+ results = filter(element.to_a, path)
37
+ when /^[\[!\w:]/u
38
+ # match on child
39
+ children = element.to_a
40
+ results = filter(children, path)
41
+ else
42
+ results = filter([element], path)
43
+ end
44
+ return results
45
+ end
46
+
47
+ # Given an array of nodes it filters the array based on the path. The
48
+ # result is that when this method returns, the array will contain elements
49
+ # which match the path
50
+ def QuickPath::filter elements, path
51
+ return elements if path.nil? or path == '' or elements.size == 0
52
+ case path
53
+ when /^\/\//u # Descendant
54
+ return axe( elements, "descendant-or-self", $' )
55
+ when /^\/?\b(\w[-\w]*)\b::/u # Axe
56
+ return axe( elements, $1, $' )
57
+ when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
58
+ rest = $'
59
+ results = []
60
+ elements.each do |element|
61
+ results |= filter( element.to_a, rest )
62
+ end
63
+ return results
64
+ when /^\/?(\w[-\w]*)\(/u # / Function
65
+ return function( elements, $1, $' )
66
+ when Namespace::NAMESPLIT # Element name
67
+ name = $2
68
+ ns = $1
69
+ rest = $'
70
+ elements.delete_if do |element|
71
+ !(element.kind_of? Element and
72
+ (element.expanded_name == name or
73
+ (element.name == name and
74
+ element.namespace == Functions.namespace_context[ns])))
75
+ end
76
+ return filter( elements, rest )
77
+ when /^\/\[/u
78
+ matches = []
79
+ elements.each do |element|
80
+ matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
81
+ end
82
+ return matches
83
+ when /^\[/u # Predicate
84
+ return predicate( elements, path )
85
+ when /^\/?\.\.\./u # Ancestor
86
+ return axe( elements, "ancestor", $' )
87
+ when /^\/?\.\./u # Parent
88
+ return filter( elements.collect{|e|e.parent}, $' )
89
+ when /^\/?\./u # Self
90
+ return filter( elements, $' )
91
+ when /^\*/u # Any
92
+ results = []
93
+ elements.each do |element|
94
+ results |= filter( [element], $' ) if element.kind_of? Element
95
+ #if element.kind_of? Element
96
+ # children = element.to_a
97
+ # children.delete_if { |child| !child.kind_of?(Element) }
98
+ # results |= filter( children, $' )
99
+ #end
100
+ end
101
+ return results
102
+ end
103
+ return []
104
+ end
105
+
106
+ def QuickPath::axe( elements, axe_name, rest )
107
+ matches = []
108
+ matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
109
+ case axe_name
110
+ when /^descendant/u
111
+ elements.each do |element|
112
+ matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
113
+ end
114
+ when /^ancestor/u
115
+ elements.each do |element|
116
+ while element.parent
117
+ matches << element.parent
118
+ element = element.parent
119
+ end
120
+ end
121
+ matches = filter( matches, rest )
122
+ when "self"
123
+ matches = filter( elements, rest )
124
+ when "child"
125
+ elements.each do |element|
126
+ matches |= filter( element.to_a, rest ) if element.kind_of? Element
127
+ end
128
+ when "attribute"
129
+ elements.each do |element|
130
+ matches << element.attributes[ rest ] if element.kind_of? Element
131
+ end
132
+ when "parent"
133
+ matches = filter(elements.collect{|element| element.parent}.uniq, rest)
134
+ when "following-sibling"
135
+ matches = filter(elements.collect{|element| element.next_sibling}.uniq,
136
+ rest)
137
+ when "previous-sibling"
138
+ matches = filter(elements.collect{|element|
139
+ element.previous_sibling}.uniq, rest )
140
+ end
141
+ return matches.uniq
142
+ end
143
+
144
+ OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
145
+ # A predicate filters a node-set with respect to an axis to produce a
146
+ # new node-set. For each node in the node-set to be filtered, the
147
+ # PredicateExpr is evaluated with that node as the context node, with
148
+ # the number of nodes in the node-set as the context size, and with the
149
+ # proximity position of the node in the node-set with respect to the
150
+ # axis as the context position; if PredicateExpr evaluates to true for
151
+ # that node, the node is included in the new node-set; otherwise, it is
152
+ # not included.
153
+ #
154
+ # A PredicateExpr is evaluated by evaluating the Expr and converting
155
+ # the result to a boolean. If the result is a number, the result will
156
+ # be converted to true if the number is equal to the context position
157
+ # and will be converted to false otherwise; if the result is not a
158
+ # number, then the result will be converted as if by a call to the
159
+ # boolean function. Thus a location path para[3] is equivalent to
160
+ # para[position()=3].
161
+ def QuickPath::predicate( elements, path )
162
+ ind = 1
163
+ bcount = 1
164
+ while bcount > 0
165
+ bcount += 1 if path[ind] == ?[
166
+ bcount -= 1 if path[ind] == ?]
167
+ ind += 1
168
+ end
169
+ ind -= 1
170
+ predicate = path[1..ind-1]
171
+ rest = path[ind+1..-1]
172
+
173
+ # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
174
+ #
175
+ predicate.gsub!(
176
+ /#{OPERAND_}\s*([<>=])\s*#{OPERAND_}\s*([<>=])\s*#{OPERAND_}/u,
177
+ '\1 \2 \3 and \3 \4 \5' )
178
+ # Let's do some Ruby trickery to avoid some work:
179
+ predicate.gsub!( /&/u, "&&" )
180
+ predicate.gsub!( /=/u, "==" )
181
+ predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
182
+ predicate.gsub!( /\bmod\b/u, "%" )
183
+ predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
184
+ fname = $1
185
+ fname.gsub( /-/u, "_" )
186
+ }
187
+
188
+ Functions.pair = [ 0, elements.size ]
189
+ results = []
190
+ elements.each do |element|
191
+ Functions.pair[0] += 1
192
+ Functions.node = element
193
+ res = eval( predicate )
194
+ case res
195
+ when true
196
+ results << element
197
+ when Integer
198
+ results << element if Functions.pair[0] == res
199
+ when String
200
+ results << element
201
+ end
202
+ end
203
+ return filter( results, rest )
204
+ end
205
+
206
+ def QuickPath::attribute( name )
207
+ return Functions.node.attributes[name] if Functions.node.kind_of? Element
208
+ end
209
+
210
+ def QuickPath::name()
211
+ return Functions.node.name if Functions.node.kind_of? Element
212
+ end
213
+
214
+ def QuickPath::method_missing( id, *args )
215
+ begin
216
+ Functions.send( id.id2name, *args )
217
+ rescue Exception
218
+ raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
219
+ end
220
+ end
221
+
222
+ def QuickPath::function( elements, fname, rest )
223
+ args = parse_args( elements, rest )
224
+ Functions.pair = [0, elements.size]
225
+ results = []
226
+ elements.each do |element|
227
+ Functions.pair[0] += 1
228
+ Functions.node = element
229
+ res = Functions.send( fname, *args )
230
+ case res
231
+ when true
232
+ results << element
233
+ when Integer
234
+ results << element if Functions.pair[0] == res
235
+ end
236
+ end
237
+ return results
238
+ end
239
+
240
+ def QuickPath::parse_args( element, string )
241
+ # /.*?(?:\)|,)/
242
+ arguments = []
243
+ buffer = ""
244
+ while string and string != ""
245
+ c = string[0]
246
+ string.sub!(/^./u, "")
247
+ case c
248
+ when ?,
249
+ # if depth = 1, then we start a new argument
250
+ arguments << evaluate( buffer )
251
+ #arguments << evaluate( string[0..count] )
252
+ when ?(
253
+ # start a new method call
254
+ function( element, buffer, string )
255
+ buffer = ""
256
+ when ?)
257
+ # close the method call and return arguments
258
+ return arguments
259
+ else
260
+ buffer << c
261
+ end
262
+ end
263
+ ""
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ # frozen_string_literal: false
3
+ # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
4
+ #
5
+ # REXML is a _pure_ Ruby, XML 1.0 conforming,
6
+ # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
7
+ # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
8
+ # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
9
+ # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
10
+ # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
11
+ # Ruby 1.8, REXML is included in the standard Ruby distribution.
12
+ #
13
+ # Main page:: http://www.germane-software.com/software/rexml
14
+ # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
15
+ # Date:: 2008/019
16
+ # Version:: 3.1.7.3
17
+ #
18
+ # This API documentation can be downloaded from the REXML home page, or can
19
+ # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
+ #
21
+ # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
+ # or can be accessed
23
+ # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
24
+ module REXML
25
+ COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
26
+ DATE = "2008/019"
27
+ VERSION = "3.1.7.3"
28
+ REVISION = ""
29
+
30
+ Copyright = COPYRIGHT
31
+ Version = VERSION
32
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: false
2
+ module REXML
3
+ # A template for stream parser listeners.
4
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
5
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
6
+ # have to parse them out yourself.
7
+ # === Missing methods from SAX2
8
+ # ignorable_whitespace
9
+ # === Methods extending SAX2
10
+ # +WARNING+
11
+ # These methods are certainly going to change, until DTDs are fully
12
+ # supported. Be aware of this.
13
+ # start_document
14
+ # end_document
15
+ # doctype
16
+ # elementdecl
17
+ # attlistdecl
18
+ # entitydecl
19
+ # notationdecl
20
+ # cdata
21
+ # xmldecl
22
+ # comment
23
+ module SAX2Listener
24
+ def start_document
25
+ end
26
+ def end_document
27
+ end
28
+ def start_prefix_mapping prefix, uri
29
+ end
30
+ def end_prefix_mapping prefix
31
+ end
32
+ def start_element uri, localname, qname, attributes
33
+ end
34
+ def end_element uri, localname, qname
35
+ end
36
+ def characters text
37
+ end
38
+ def processing_instruction target, data
39
+ end
40
+ # Handles a doctype declaration. Any attributes of the doctype which are
41
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
42
+ # @p name the name of the doctype; EG, "me"
43
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
44
+ # @p long_name the supplied long name, or nil. EG, "foo"
45
+ # @p uri the uri of the doctype, or nil. EG, "bar"
46
+ def doctype name, pub_sys, long_name, uri
47
+ end
48
+ # If a doctype includes an ATTLIST declaration, it will cause this
49
+ # method to be called. The content is the declaration itself, unparsed.
50
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
51
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
52
+ # methods.
53
+ def attlistdecl(element, pairs, contents)
54
+ end
55
+ # <!ELEMENT ...>
56
+ def elementdecl content
57
+ end
58
+ # <!ENTITY ...>
59
+ # The argument passed to this method is an array of the entity
60
+ # declaration. It can be in a number of formats, but in general it
61
+ # returns (example, result):
62
+ # <!ENTITY % YN '"Yes"'>
63
+ # ["%", "YN", "\"Yes\""]
64
+ # <!ENTITY % YN 'Yes'>
65
+ # ["%", "YN", "Yes"]
66
+ # <!ENTITY WhatHeSaid "He said %YN;">
67
+ # ["WhatHeSaid", "He said %YN;"]
68
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
69
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
70
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
71
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
72
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
73
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
74
+ def entitydecl declaration
75
+ end
76
+ # <!NOTATION ...>
77
+ def notationdecl name, public_or_system, public_id, system_id
78
+ end
79
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
80
+ # @p content "..."
81
+ def cdata content
82
+ end
83
+ # Called when an XML PI is encountered in the document.
84
+ # EG: <?xml version="1.0" encoding="utf"?>
85
+ # @p version the version attribute value. EG, "1.0"
86
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
87
+ # @p standalone the standalone attribute value, or nil. EG, nil
88
+ # @p spaced the declaration is followed by a line break
89
+ def xmldecl version, encoding, standalone
90
+ end
91
+ # Called when a comment is encountered.
92
+ # @p comment The content of the comment
93
+ def comment comment
94
+ end
95
+ def progress position
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: false
2
+ module REXML
3
+ module Security
4
+ @@entity_expansion_limit = 10_000
5
+
6
+ # Set the entity expansion limit. By default the limit is set to 10000.
7
+ def self.entity_expansion_limit=( val )
8
+ @@entity_expansion_limit = val
9
+ end
10
+
11
+ # Get the entity expansion limit. By default the limit is set to 10000.
12
+ def self.entity_expansion_limit
13
+ return @@entity_expansion_limit
14
+ end
15
+
16
+ @@entity_expansion_text_limit = 10_240
17
+
18
+ # Set the entity expansion limit. By default the limit is set to 10240.
19
+ def self.entity_expansion_text_limit=( val )
20
+ @@entity_expansion_text_limit = val
21
+ end
22
+
23
+ # Get the entity expansion limit. By default the limit is set to 10240.
24
+ def self.entity_expansion_text_limit
25
+ return @@entity_expansion_text_limit
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,298 @@
1
+ # coding: US-ASCII
2
+ # frozen_string_literal: false
3
+ require_relative 'encoding'
4
+
5
+ module REXML
6
+ # Generates Source-s. USE THIS CLASS.
7
+ class SourceFactory
8
+ # Generates a Source object
9
+ # @param arg Either a String, or an IO
10
+ # @return a Source, or nil if a bad argument was given
11
+ def SourceFactory::create_from(arg)
12
+ if arg.respond_to? :read and
13
+ arg.respond_to? :readline and
14
+ arg.respond_to? :nil? and
15
+ arg.respond_to? :eof?
16
+ IOSource.new(arg)
17
+ elsif arg.respond_to? :to_str
18
+ require 'stringio'
19
+ IOSource.new(StringIO.new(arg))
20
+ elsif arg.kind_of? Source
21
+ arg
22
+ else
23
+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
24
+ "like either a String, an IO, or a Source."
25
+ end
26
+ end
27
+ end
28
+
29
+ # A Source can be searched for patterns, and wraps buffers and other
30
+ # objects and provides consumption of text
31
+ class Source
32
+ include Encoding
33
+ # The current buffer (what we're going to read next)
34
+ attr_reader :buffer
35
+ # The line number of the last consumed text
36
+ attr_reader :line
37
+ attr_reader :encoding
38
+
39
+ # Constructor
40
+ # @param arg must be a String, and should be a valid XML document
41
+ # @param encoding if non-null, sets the encoding of the source to this
42
+ # value, overriding all encoding detection
43
+ def initialize(arg, encoding=nil)
44
+ @orig = @buffer = arg
45
+ if encoding
46
+ self.encoding = encoding
47
+ else
48
+ detect_encoding
49
+ end
50
+ @line = 0
51
+ end
52
+
53
+
54
+ # Inherited from Encoding
55
+ # Overridden to support optimized en/decoding
56
+ def encoding=(enc)
57
+ return unless super
58
+ encoding_updated
59
+ end
60
+
61
+ # Scans the source for a given pattern. Note, that this is not your
62
+ # usual scan() method. For one thing, the pattern argument has some
63
+ # requirements; for another, the source can be consumed. You can easily
64
+ # confuse this method. Originally, the patterns were easier
65
+ # to construct and this method more robust, because this method
66
+ # generated search regexps on the fly; however, this was
67
+ # computationally expensive and slowed down the entire REXML package
68
+ # considerably, since this is by far the most commonly called method.
69
+ # @param pattern must be a Regexp, and must be in the form of
70
+ # /^\s*(#{your pattern, with no groups})(.*)/. The first group
71
+ # will be returned; the second group is used if the consume flag is
72
+ # set.
73
+ # @param consume if true, the pattern returned will be consumed, leaving
74
+ # everything after it in the Source.
75
+ # @return the pattern, if found, or nil if the Source is empty or the
76
+ # pattern is not found.
77
+ def scan(pattern, cons=false)
78
+ return nil if @buffer.nil?
79
+ rv = @buffer.scan(pattern)
80
+ @buffer = $' if cons and rv.size>0
81
+ rv
82
+ end
83
+
84
+ def read
85
+ end
86
+
87
+ def consume( pattern )
88
+ @buffer = $' if pattern.match( @buffer )
89
+ end
90
+
91
+ def match_to( char, pattern )
92
+ return pattern.match(@buffer)
93
+ end
94
+
95
+ def match_to_consume( char, pattern )
96
+ md = pattern.match(@buffer)
97
+ @buffer = $'
98
+ return md
99
+ end
100
+
101
+ def match(pattern, cons=false)
102
+ md = pattern.match(@buffer)
103
+ @buffer = $' if cons and md
104
+ return md
105
+ end
106
+
107
+ # @return true if the Source is exhausted
108
+ def empty?
109
+ @buffer == ""
110
+ end
111
+
112
+ def position
113
+ @orig.index( @buffer )
114
+ end
115
+
116
+ # @return the current line in the source
117
+ def current_line
118
+ lines = @orig.split
119
+ res = lines.grep @buffer[0..30]
120
+ res = res[-1] if res.kind_of? Array
121
+ lines.index( res ) if res
122
+ end
123
+
124
+ private
125
+ def detect_encoding
126
+ buffer_encoding = @buffer.encoding
127
+ detected_encoding = "UTF-8"
128
+ begin
129
+ @buffer.force_encoding("ASCII-8BIT")
130
+ if @buffer[0, 2] == "\xfe\xff"
131
+ @buffer[0, 2] = ""
132
+ detected_encoding = "UTF-16BE"
133
+ elsif @buffer[0, 2] == "\xff\xfe"
134
+ @buffer[0, 2] = ""
135
+ detected_encoding = "UTF-16LE"
136
+ elsif @buffer[0, 3] == "\xef\xbb\xbf"
137
+ @buffer[0, 3] = ""
138
+ detected_encoding = "UTF-8"
139
+ end
140
+ ensure
141
+ @buffer.force_encoding(buffer_encoding)
142
+ end
143
+ self.encoding = detected_encoding
144
+ end
145
+
146
+ def encoding_updated
147
+ if @encoding != 'UTF-8'
148
+ @buffer = decode(@buffer)
149
+ @to_utf = true
150
+ else
151
+ @to_utf = false
152
+ @buffer.force_encoding ::Encoding::UTF_8
153
+ end
154
+ end
155
+ end
156
+
157
+ # A Source that wraps an IO. See the Source class for method
158
+ # documentation
159
+ class IOSource < Source
160
+ #attr_reader :block_size
161
+
162
+ # block_size has been deprecated
163
+ def initialize(arg, block_size=500, encoding=nil)
164
+ @er_source = @source = arg
165
+ @to_utf = false
166
+ @pending_buffer = nil
167
+
168
+ if encoding
169
+ super("", encoding)
170
+ else
171
+ super(@source.read(3) || "")
172
+ end
173
+
174
+ if !@to_utf and
175
+ @buffer.respond_to?(:force_encoding) and
176
+ @source.respond_to?(:external_encoding) and
177
+ @source.external_encoding != ::Encoding::UTF_8
178
+ @force_utf8 = true
179
+ else
180
+ @force_utf8 = false
181
+ end
182
+ end
183
+
184
+ def scan(pattern, cons=false)
185
+ rv = super
186
+ # You'll notice that this next section is very similar to the same
187
+ # section in match(), but just a liiittle different. This is
188
+ # because it is a touch faster to do it this way with scan()
189
+ # than the way match() does it; enough faster to warrant duplicating
190
+ # some code
191
+ if rv.size == 0
192
+ until @buffer =~ pattern or @source.nil?
193
+ begin
194
+ @buffer << readline
195
+ rescue Iconv::IllegalSequence
196
+ raise
197
+ rescue
198
+ @source = nil
199
+ end
200
+ end
201
+ rv = super
202
+ end
203
+ rv.taint
204
+ rv
205
+ end
206
+
207
+ def read
208
+ begin
209
+ @buffer << readline
210
+ rescue Exception, NameError
211
+ @source = nil
212
+ end
213
+ end
214
+
215
+ def consume( pattern )
216
+ match( pattern, true )
217
+ end
218
+
219
+ def match( pattern, cons=false )
220
+ rv = pattern.match(@buffer)
221
+ @buffer = $' if cons and rv
222
+ while !rv and @source
223
+ begin
224
+ @buffer << readline
225
+ rv = pattern.match(@buffer)
226
+ @buffer = $' if cons and rv
227
+ rescue
228
+ @source = nil
229
+ end
230
+ end
231
+ rv.taint
232
+ rv
233
+ end
234
+
235
+ def empty?
236
+ super and ( @source.nil? || @source.eof? )
237
+ end
238
+
239
+ def position
240
+ @er_source.pos rescue 0
241
+ end
242
+
243
+ # @return the current line in the source
244
+ def current_line
245
+ begin
246
+ pos = @er_source.pos # The byte position in the source
247
+ lineno = @er_source.lineno # The XML < position in the source
248
+ @er_source.rewind
249
+ line = 0 # The \r\n position in the source
250
+ begin
251
+ while @er_source.pos < pos
252
+ @er_source.readline
253
+ line += 1
254
+ end
255
+ rescue
256
+ end
257
+ @er_source.seek(pos)
258
+ rescue IOError
259
+ pos = -1
260
+ line = -1
261
+ end
262
+ [pos, lineno, line]
263
+ end
264
+
265
+ private
266
+ def readline
267
+ str = @source.readline(@line_break)
268
+ if @pending_buffer
269
+ if str.nil?
270
+ str = @pending_buffer
271
+ else
272
+ str = @pending_buffer + str
273
+ end
274
+ @pending_buffer = nil
275
+ end
276
+ return nil if str.nil?
277
+
278
+ if @to_utf
279
+ decode(str)
280
+ else
281
+ str.force_encoding(::Encoding::UTF_8) if @force_utf8
282
+ str
283
+ end
284
+ end
285
+
286
+ def encoding_updated
287
+ case @encoding
288
+ when "UTF-16BE", "UTF-16LE"
289
+ @source.binmode
290
+ @source.set_encoding(@encoding, @encoding)
291
+ end
292
+ @line_break = encode(">")
293
+ @pending_buffer, @buffer = @buffer, ""
294
+ @pending_buffer.force_encoding(@encoding)
295
+ super
296
+ end
297
+ end
298
+ end