rubysl-rexml 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,71 +1,50 @@
1
- # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
1
+ # coding: US-ASCII
2
2
  module REXML
3
3
  module Encoding
4
- @encoding_methods = {}
5
- def self.register(enc, &block)
6
- @encoding_methods[enc] = block
7
- end
8
- def self.apply(obj, enc)
9
- @encoding_methods[enc][obj]
10
- end
11
- def self.encoding_method(enc)
12
- @encoding_methods[enc]
13
- end
14
-
15
- # Native, default format is UTF-8, so it is declared here rather than in
16
- # an encodings/ definition.
17
- UTF_8 = 'UTF-8'
18
- UTF_16 = 'UTF-16'
19
- UNILE = 'UNILE'
20
-
21
4
  # ID ---> Encoding name
22
5
  attr_reader :encoding
23
- def encoding=( enc )
24
- old_verbosity = $VERBOSE
25
- begin
26
- $VERBOSE = false
27
- enc = enc.nil? ? nil : enc.upcase
28
- return false if defined? @encoding and enc == @encoding
29
- if enc and enc != UTF_8
30
- @encoding = enc
31
- raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
32
- @encoding.untaint
33
- begin
34
- require 'rexml/encodings/ICONV.rb'
35
- Encoding.apply(self, "ICONV")
36
- rescue LoadError, Exception
37
- begin
38
- enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
39
- require enc_file
40
- Encoding.apply(self, @encoding)
41
- rescue LoadError => err
42
- puts err.message
43
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
44
- end
45
- end
46
- else
47
- @encoding = UTF_8
48
- require 'rexml/encodings/UTF-8.rb'
49
- Encoding.apply(self, @encoding)
6
+ def encoding=(encoding)
7
+ encoding = encoding.name if encoding.is_a?(Encoding)
8
+ if encoding.is_a?(String)
9
+ original_encoding = encoding
10
+ encoding = find_encoding(encoding)
11
+ unless encoding
12
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
50
13
  end
51
- ensure
52
- $VERBOSE = old_verbosity
14
+ end
15
+ return false if defined?(@encoding) and encoding == @encoding
16
+ if encoding
17
+ @encoding = encoding.upcase
18
+ else
19
+ @encoding = 'UTF-8'
53
20
  end
54
21
  true
55
22
  end
56
23
 
57
- def check_encoding str
58
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
59
- if str[0] == 0xfe && str[1] == 0xff
60
- str[0,2] = ""
61
- return UTF_16
62
- elsif str[0] == 0xff && str[1] == 0xfe
63
- str[0,2] = ""
64
- return UNILE
24
+ def encode(string)
25
+ string.encode(@encoding)
26
+ end
27
+
28
+ def decode(string)
29
+ string.encode(::Encoding::UTF_8, @encoding)
30
+ end
31
+
32
+ private
33
+ def find_encoding(name)
34
+ case name
35
+ when /\Ashift-jis\z/i
36
+ return "SHIFT_JIS"
37
+ when /\ACP-(\d+)\z/
38
+ name = "CP#{$1}"
39
+ when /\AUTF-8\z/i
40
+ return name
41
+ end
42
+ begin
43
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
44
+ rescue ::Encoding::ConverterNotFoundError
45
+ return nil
65
46
  end
66
- str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
67
- return $3.upcase if $3
68
- return UTF_8
47
+ name
69
48
  end
70
49
  end
71
50
  end
data/lib/rexml/entity.rb CHANGED
@@ -3,164 +3,164 @@ require 'rexml/source'
3
3
  require 'rexml/xmltokens'
4
4
 
5
5
  module REXML
6
- # God, I hate DTDs. I really do. Why this idiot standard still
7
- # plagues us is beyond me.
8
- class Entity < Child
9
- include XMLTokens
10
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
15
- PEREFERENCE = "%#{NAME};"
16
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
6
+ # God, I hate DTDs. I really do. Why this idiot standard still
7
+ # plagues us is beyond me.
8
+ class Entity < Child
9
+ include XMLTokens
10
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
15
+ PEREFERENCE = "%#{NAME};"
16
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
22
22
 
23
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid
24
24
 
25
- # Create a new entity. Simple entities can be constructed by passing a
26
- # name, value to the constructor; this creates a generic, plain entity
27
- # reference. For anything more complicated, you have to pass a Source to
28
- # the constructor with the entity definiton, or use the accessor methods.
29
- # +WARNING+: There is no validation of entity state except when the entity
30
- # is read from a stream. If you start poking around with the accessors,
31
- # you can easily create a non-conformant Entity. The best thing to do is
32
- # dump the stupid DTDs and use XMLSchema instead.
33
- #
34
- # e = Entity.new( 'amp', '&' )
35
- def initialize stream, value=nil, parent=nil, reference=false
36
- super(parent)
37
- @ndata = @pubid = @value = @external = nil
38
- if stream.kind_of? Array
39
- @name = stream[1]
40
- if stream[-1] == '%'
41
- @reference = true
42
- stream.pop
43
- else
44
- @reference = false
45
- end
46
- if stream[2] =~ /SYSTEM|PUBLIC/
47
- @external = stream[2]
48
- if @external == 'SYSTEM'
49
- @ref = stream[3]
50
- @ndata = stream[4] if stream.size == 5
51
- else
52
- @pubid = stream[3]
53
- @ref = stream[4]
54
- end
55
- else
56
- @value = stream[2]
57
- end
58
- else
59
- @reference = reference
60
- @external = nil
61
- @name = stream
62
- @value = value
63
- end
64
- end
25
+ # Create a new entity. Simple entities can be constructed by passing a
26
+ # name, value to the constructor; this creates a generic, plain entity
27
+ # reference. For anything more complicated, you have to pass a Source to
28
+ # the constructor with the entity definition, or use the accessor methods.
29
+ # +WARNING+: There is no validation of entity state except when the entity
30
+ # is read from a stream. If you start poking around with the accessors,
31
+ # you can easily create a non-conformant Entity. The best thing to do is
32
+ # dump the stupid DTDs and use XMLSchema instead.
33
+ #
34
+ # e = Entity.new( 'amp', '&' )
35
+ def initialize stream, value=nil, parent=nil, reference=false
36
+ super(parent)
37
+ @ndata = @pubid = @value = @external = nil
38
+ if stream.kind_of? Array
39
+ @name = stream[1]
40
+ if stream[-1] == '%'
41
+ @reference = true
42
+ stream.pop
43
+ else
44
+ @reference = false
45
+ end
46
+ if stream[2] =~ /SYSTEM|PUBLIC/
47
+ @external = stream[2]
48
+ if @external == 'SYSTEM'
49
+ @ref = stream[3]
50
+ @ndata = stream[4] if stream.size == 5
51
+ else
52
+ @pubid = stream[3]
53
+ @ref = stream[4]
54
+ end
55
+ else
56
+ @value = stream[2]
57
+ end
58
+ else
59
+ @reference = reference
60
+ @external = nil
61
+ @name = stream
62
+ @value = value
63
+ end
64
+ end
65
65
 
66
- # Evaluates whether the given string matchs an entity definition,
67
- # returning true if so, and false otherwise.
68
- def Entity::matches? string
69
- (ENTITYDECL =~ string) == 0
70
- end
66
+ # Evaluates whether the given string matchs an entity definition,
67
+ # returning true if so, and false otherwise.
68
+ def Entity::matches? string
69
+ (ENTITYDECL =~ string) == 0
70
+ end
71
71
 
72
- # Evaluates to the unnormalized value of this entity; that is, replacing
73
- # all entities -- both %ent; and &ent; entities. This differs from
74
- # +value()+ in that +value+ only replaces %ent; entities.
75
- def unnormalized
76
- document.record_entity_expansion unless document.nil?
77
- v = value()
78
- return nil if v.nil?
79
- @unnormalized = Text::unnormalize(v, parent)
80
- @unnormalized
81
- end
72
+ # Evaluates to the unnormalized value of this entity; that is, replacing
73
+ # all entities -- both %ent; and &ent; entities. This differs from
74
+ # +value()+ in that +value+ only replaces %ent; entities.
75
+ def unnormalized
76
+ document.record_entity_expansion unless document.nil?
77
+ v = value()
78
+ return nil if v.nil?
79
+ @unnormalized = Text::unnormalize(v, parent)
80
+ @unnormalized
81
+ end
82
82
 
83
- #once :unnormalized
83
+ #once :unnormalized
84
84
 
85
- # Returns the value of this entity unprocessed -- raw. This is the
86
- # normalized value; that is, with all %ent; and &ent; entities intact
87
- def normalized
88
- @value
89
- end
85
+ # Returns the value of this entity unprocessed -- raw. This is the
86
+ # normalized value; that is, with all %ent; and &ent; entities intact
87
+ def normalized
88
+ @value
89
+ end
90
90
 
91
- # Write out a fully formed, correct entity definition (assuming the Entity
92
- # object itself is valid.)
91
+ # Write out a fully formed, correct entity definition (assuming the Entity
92
+ # object itself is valid.)
93
93
  #
94
94
  # out::
95
95
  # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
96
96
  # output
97
97
  # indent::
98
98
  # *DEPRECATED* and ignored
99
- def write out, indent=-1
100
- out << '<!ENTITY '
101
- out << '% ' if @reference
102
- out << @name
103
- out << ' '
104
- if @external
105
- out << @external << ' '
106
- if @pubid
107
- q = @pubid.include?('"')?"'":'"'
108
- out << q << @pubid << q << ' '
109
- end
110
- q = @ref.include?('"')?"'":'"'
111
- out << q << @ref << q
112
- out << ' NDATA ' << @ndata if @ndata
113
- else
114
- q = @value.include?('"')?"'":'"'
115
- out << q << @value << q
116
- end
117
- out << '>'
118
- end
99
+ def write out, indent=-1
100
+ out << '<!ENTITY '
101
+ out << '% ' if @reference
102
+ out << @name
103
+ out << ' '
104
+ if @external
105
+ out << @external << ' '
106
+ if @pubid
107
+ q = @pubid.include?('"')?"'":'"'
108
+ out << q << @pubid << q << ' '
109
+ end
110
+ q = @ref.include?('"')?"'":'"'
111
+ out << q << @ref << q
112
+ out << ' NDATA ' << @ndata if @ndata
113
+ else
114
+ q = @value.include?('"')?"'":'"'
115
+ out << q << @value << q
116
+ end
117
+ out << '>'
118
+ end
119
119
 
120
- # Returns this entity as a string. See write().
121
- def to_s
122
- rv = ''
123
- write rv
124
- rv
125
- end
120
+ # Returns this entity as a string. See write().
121
+ def to_s
122
+ rv = ''
123
+ write rv
124
+ rv
125
+ end
126
126
 
127
- PEREFERENCE_RE = /#{PEREFERENCE}/um
128
- # Returns the value of this entity. At the moment, only internal entities
129
- # are processed. If the value contains internal references (IE,
130
- # %blah;), those are replaced with their values. IE, if the doctype
131
- # contains:
132
- # <!ENTITY % foo "bar">
133
- # <!ENTITY yada "nanoo %foo; nanoo>
134
- # then:
135
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
136
- def value
137
- if @value
138
- matches = @value.scan(PEREFERENCE_RE)
139
- rv = @value.clone
140
- if @parent
141
- matches.each do |entity_reference|
142
- entity_value = @parent.entity( entity_reference[0] )
143
- rv.gsub!( /%#{entity_reference};/um, entity_value )
144
- end
145
- end
146
- return rv
147
- end
148
- nil
149
- end
150
- end
127
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
128
+ # Returns the value of this entity. At the moment, only internal entities
129
+ # are processed. If the value contains internal references (IE,
130
+ # %blah;), those are replaced with their values. IE, if the doctype
131
+ # contains:
132
+ # <!ENTITY % foo "bar">
133
+ # <!ENTITY yada "nanoo %foo; nanoo>
134
+ # then:
135
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
136
+ def value
137
+ if @value
138
+ matches = @value.scan(PEREFERENCE_RE)
139
+ rv = @value.clone
140
+ if @parent
141
+ matches.each do |entity_reference|
142
+ entity_value = @parent.entity( entity_reference[0] )
143
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
144
+ end
145
+ end
146
+ return rv
147
+ end
148
+ nil
149
+ end
150
+ end
151
151
 
152
- # This is a set of entity constants -- the ones defined in the XML
153
- # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
154
- module EntityConst
155
- # +>+
156
- GT = Entity.new( 'gt', '>' )
157
- # +<+
158
- LT = Entity.new( 'lt', '<' )
159
- # +&+
160
- AMP = Entity.new( 'amp', '&' )
161
- # +"+
162
- QUOT = Entity.new( 'quot', '"' )
163
- # +'+
164
- APOS = Entity.new( 'apos', "'" )
165
- end
152
+ # This is a set of entity constants -- the ones defined in the XML
153
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
154
+ module EntityConst
155
+ # +>+
156
+ GT = Entity.new( 'gt', '>' )
157
+ # +<+
158
+ LT = Entity.new( 'lt', '<' )
159
+ # +&+
160
+ AMP = Entity.new( 'amp', '&' )
161
+ # +"+
162
+ QUOT = Entity.new( 'quot', '"' )
163
+ # +'+
164
+ APOS = Entity.new( 'apos', "'" )
165
+ end
166
166
  end
@@ -21,8 +21,8 @@ module REXML
21
21
  def write( node, output )
22
22
  case node
23
23
 
24
- when Document
25
- if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
24
+ when Document
25
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
26
26
  output = Output.new( output, node.xml_decl.encoding )
27
27
  end
28
28
  write_document( node, output )
@@ -63,14 +63,16 @@ module REXML
63
63
  def write_element( node, output )
64
64
  output << "<#{node.expanded_name}"
65
65
 
66
- node.attributes.each_attribute do |attr|
66
+ node.attributes.to_a.map { |a|
67
+ Hash === a ? a.values : a
68
+ }.flatten.sort_by {|attr| attr.name}.each do |attr|
67
69
  output << " "
68
70
  attr.write( output )
69
71
  end unless node.attributes.empty?
70
72
 
71
73
  if node.children.empty?
72
74
  output << " " if @ie_hack
73
- output << "/"
75
+ output << "/"
74
76
  else
75
77
  output << ">"
76
78
  node.children.each { |child|
@@ -24,13 +24,14 @@ module REXML
24
24
  # is undefined. Defaults to 2.
25
25
  # ie_hack::
26
26
  # If true, the printer will insert whitespace before closing empty
27
- # tags, thereby allowing Internet Explorer's feeble XML parser to
27
+ # tags, thereby allowing Internet Explorer's XML parser to
28
28
  # function. Defaults to false.
29
29
  def initialize( indentation=2, ie_hack=false )
30
30
  @indentation = indentation
31
31
  @level = 0
32
32
  @ie_hack = ie_hack
33
33
  @width = 80
34
+ @compact = false
34
35
  end
35
36
 
36
37
  protected
@@ -47,7 +48,7 @@ module REXML
47
48
  if @ie_hack
48
49
  output << " "
49
50
  end
50
- output << "/"
51
+ output << "/"
51
52
  else
52
53
  output << ">"
53
54
  # If compact and all children are text, and if the formatted output
@@ -87,7 +88,7 @@ module REXML
87
88
  s = node.to_s()
88
89
  s.gsub!(/\s/,' ')
89
90
  s.squeeze!(" ")
90
- s = wrap(s, 80-@level)
91
+ s = wrap(s, @width - @level)
91
92
  s = indent_text(s, @level, " ", true)
92
93
  output << (' '*@level + s)
93
94
  end
@@ -125,11 +126,13 @@ module REXML
125
126
  end
126
127
 
127
128
  def wrap(string, width)
128
- # Recursively wrap string at width.
129
- return string if string.length <= width
130
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
131
- return string if place.nil?
132
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
129
+ parts = []
130
+ while string.length > width and place = string.rindex(' ', width)
131
+ parts << string[0...place]
132
+ string = string[place+1..-1]
133
+ end
134
+ parts << string
135
+ parts.join("\n")
133
136
  end
134
137
 
135
138
  end