rubysl-rexml 1.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -2
  3. data/lib/rexml/attlistdecl.rb +56 -56
  4. data/lib/rexml/attribute.rb +155 -149
  5. data/lib/rexml/cdata.rb +48 -48
  6. data/lib/rexml/child.rb +82 -82
  7. data/lib/rexml/comment.rb +59 -59
  8. data/lib/rexml/doctype.rb +22 -24
  9. data/lib/rexml/document.rb +185 -129
  10. data/lib/rexml/dtd/attlistdecl.rb +7 -7
  11. data/lib/rexml/dtd/dtd.rb +41 -41
  12. data/lib/rexml/dtd/elementdecl.rb +13 -13
  13. data/lib/rexml/dtd/entitydecl.rb +49 -49
  14. data/lib/rexml/dtd/notationdecl.rb +32 -32
  15. data/lib/rexml/element.rb +122 -107
  16. data/lib/rexml/encoding.rb +37 -58
  17. data/lib/rexml/entity.rb +144 -144
  18. data/lib/rexml/formatters/default.rb +6 -4
  19. data/lib/rexml/formatters/pretty.rb +11 -8
  20. data/lib/rexml/formatters/transitive.rb +4 -3
  21. data/lib/rexml/functions.rb +33 -21
  22. data/lib/rexml/instruction.rb +49 -49
  23. data/lib/rexml/light/node.rb +190 -191
  24. data/lib/rexml/namespace.rb +39 -39
  25. data/lib/rexml/node.rb +38 -38
  26. data/lib/rexml/output.rb +17 -12
  27. data/lib/rexml/parent.rb +26 -25
  28. data/lib/rexml/parseexception.rb +4 -4
  29. data/lib/rexml/parsers/baseparser.rb +90 -61
  30. data/lib/rexml/parsers/lightparser.rb +41 -43
  31. data/lib/rexml/parsers/pullparser.rb +1 -1
  32. data/lib/rexml/parsers/sax2parser.rb +233 -198
  33. data/lib/rexml/parsers/streamparser.rb +6 -2
  34. data/lib/rexml/parsers/treeparser.rb +9 -6
  35. data/lib/rexml/parsers/ultralightparser.rb +40 -40
  36. data/lib/rexml/parsers/xpathparser.rb +51 -52
  37. data/lib/rexml/quickpath.rb +247 -248
  38. data/lib/rexml/rexml.rb +9 -10
  39. data/lib/rexml/sax2listener.rb +92 -92
  40. data/lib/rexml/security.rb +27 -0
  41. data/lib/rexml/source.rb +95 -50
  42. data/lib/rexml/streamlistener.rb +90 -90
  43. data/lib/rexml/syncenumerator.rb +3 -4
  44. data/lib/rexml/text.rb +157 -76
  45. data/lib/rexml/validation/relaxng.rb +18 -18
  46. data/lib/rexml/validation/validation.rb +5 -5
  47. data/lib/rexml/xmldecl.rb +59 -63
  48. data/lib/rexml/xmltokens.rb +14 -14
  49. data/lib/rexml/xpath.rb +67 -53
  50. data/lib/rexml/xpath_parser.rb +49 -38
  51. data/lib/rubysl/rexml.rb +1 -0
  52. data/lib/rubysl/rexml/version.rb +1 -1
  53. data/rubysl-rexml.gemspec +3 -1
  54. metadata +19 -28
  55. data/lib/rexml/encodings/CP-1252.rb +0 -103
  56. data/lib/rexml/encodings/EUC-JP.rb +0 -35
  57. data/lib/rexml/encodings/ICONV.rb +0 -22
  58. data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
  59. data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
  60. data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
  61. data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
  62. data/lib/rexml/encodings/UNILE.rb +0 -34
  63. data/lib/rexml/encodings/US-ASCII.rb +0 -30
  64. data/lib/rexml/encodings/UTF-16.rb +0 -35
  65. data/lib/rexml/encodings/UTF-8.rb +0 -18
@@ -1,71 +1,50 @@
1
- # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
1
+ # coding: US-ASCII
2
2
  module REXML
3
3
  module Encoding
4
- @encoding_methods = {}
5
- def self.register(enc, &block)
6
- @encoding_methods[enc] = block
7
- end
8
- def self.apply(obj, enc)
9
- @encoding_methods[enc][obj]
10
- end
11
- def self.encoding_method(enc)
12
- @encoding_methods[enc]
13
- end
14
-
15
- # Native, default format is UTF-8, so it is declared here rather than in
16
- # an encodings/ definition.
17
- UTF_8 = 'UTF-8'
18
- UTF_16 = 'UTF-16'
19
- UNILE = 'UNILE'
20
-
21
4
  # ID ---> Encoding name
22
5
  attr_reader :encoding
23
- def encoding=( enc )
24
- old_verbosity = $VERBOSE
25
- begin
26
- $VERBOSE = false
27
- enc = enc.nil? ? nil : enc.upcase
28
- return false if defined? @encoding and enc == @encoding
29
- if enc and enc != UTF_8
30
- @encoding = enc
31
- raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
32
- @encoding.untaint
33
- begin
34
- require 'rexml/encodings/ICONV.rb'
35
- Encoding.apply(self, "ICONV")
36
- rescue LoadError, Exception
37
- begin
38
- enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
39
- require enc_file
40
- Encoding.apply(self, @encoding)
41
- rescue LoadError => err
42
- puts err.message
43
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
44
- end
45
- end
46
- else
47
- @encoding = UTF_8
48
- require 'rexml/encodings/UTF-8.rb'
49
- Encoding.apply(self, @encoding)
6
+ def encoding=(encoding)
7
+ encoding = encoding.name if encoding.is_a?(Encoding)
8
+ if encoding.is_a?(String)
9
+ original_encoding = encoding
10
+ encoding = find_encoding(encoding)
11
+ unless encoding
12
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
50
13
  end
51
- ensure
52
- $VERBOSE = old_verbosity
14
+ end
15
+ return false if defined?(@encoding) and encoding == @encoding
16
+ if encoding
17
+ @encoding = encoding.upcase
18
+ else
19
+ @encoding = 'UTF-8'
53
20
  end
54
21
  true
55
22
  end
56
23
 
57
- def check_encoding str
58
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
59
- if str[0] == 0xfe && str[1] == 0xff
60
- str[0,2] = ""
61
- return UTF_16
62
- elsif str[0] == 0xff && str[1] == 0xfe
63
- str[0,2] = ""
64
- return UNILE
24
+ def encode(string)
25
+ string.encode(@encoding)
26
+ end
27
+
28
+ def decode(string)
29
+ string.encode(::Encoding::UTF_8, @encoding)
30
+ end
31
+
32
+ private
33
+ def find_encoding(name)
34
+ case name
35
+ when /\Ashift-jis\z/i
36
+ return "SHIFT_JIS"
37
+ when /\ACP-(\d+)\z/
38
+ name = "CP#{$1}"
39
+ when /\AUTF-8\z/i
40
+ return name
41
+ end
42
+ begin
43
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
44
+ rescue ::Encoding::ConverterNotFoundError
45
+ return nil
65
46
  end
66
- str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
67
- return $3.upcase if $3
68
- return UTF_8
47
+ name
69
48
  end
70
49
  end
71
50
  end
data/lib/rexml/entity.rb CHANGED
@@ -3,164 +3,164 @@ require 'rexml/source'
3
3
  require 'rexml/xmltokens'
4
4
 
5
5
  module REXML
6
- # God, I hate DTDs. I really do. Why this idiot standard still
7
- # plagues us is beyond me.
8
- class Entity < Child
9
- include XMLTokens
10
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
15
- PEREFERENCE = "%#{NAME};"
16
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
6
+ # God, I hate DTDs. I really do. Why this idiot standard still
7
+ # plagues us is beyond me.
8
+ class Entity < Child
9
+ include XMLTokens
10
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
11
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
12
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
13
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
14
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
15
+ PEREFERENCE = "%#{NAME};"
16
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
17
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
18
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
19
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
20
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
21
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
22
22
 
23
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid
24
24
 
25
- # Create a new entity. Simple entities can be constructed by passing a
26
- # name, value to the constructor; this creates a generic, plain entity
27
- # reference. For anything more complicated, you have to pass a Source to
28
- # the constructor with the entity definiton, or use the accessor methods.
29
- # +WARNING+: There is no validation of entity state except when the entity
30
- # is read from a stream. If you start poking around with the accessors,
31
- # you can easily create a non-conformant Entity. The best thing to do is
32
- # dump the stupid DTDs and use XMLSchema instead.
33
- #
34
- # e = Entity.new( 'amp', '&' )
35
- def initialize stream, value=nil, parent=nil, reference=false
36
- super(parent)
37
- @ndata = @pubid = @value = @external = nil
38
- if stream.kind_of? Array
39
- @name = stream[1]
40
- if stream[-1] == '%'
41
- @reference = true
42
- stream.pop
43
- else
44
- @reference = false
45
- end
46
- if stream[2] =~ /SYSTEM|PUBLIC/
47
- @external = stream[2]
48
- if @external == 'SYSTEM'
49
- @ref = stream[3]
50
- @ndata = stream[4] if stream.size == 5
51
- else
52
- @pubid = stream[3]
53
- @ref = stream[4]
54
- end
55
- else
56
- @value = stream[2]
57
- end
58
- else
59
- @reference = reference
60
- @external = nil
61
- @name = stream
62
- @value = value
63
- end
64
- end
25
+ # Create a new entity. Simple entities can be constructed by passing a
26
+ # name, value to the constructor; this creates a generic, plain entity
27
+ # reference. For anything more complicated, you have to pass a Source to
28
+ # the constructor with the entity definition, or use the accessor methods.
29
+ # +WARNING+: There is no validation of entity state except when the entity
30
+ # is read from a stream. If you start poking around with the accessors,
31
+ # you can easily create a non-conformant Entity. The best thing to do is
32
+ # dump the stupid DTDs and use XMLSchema instead.
33
+ #
34
+ # e = Entity.new( 'amp', '&' )
35
+ def initialize stream, value=nil, parent=nil, reference=false
36
+ super(parent)
37
+ @ndata = @pubid = @value = @external = nil
38
+ if stream.kind_of? Array
39
+ @name = stream[1]
40
+ if stream[-1] == '%'
41
+ @reference = true
42
+ stream.pop
43
+ else
44
+ @reference = false
45
+ end
46
+ if stream[2] =~ /SYSTEM|PUBLIC/
47
+ @external = stream[2]
48
+ if @external == 'SYSTEM'
49
+ @ref = stream[3]
50
+ @ndata = stream[4] if stream.size == 5
51
+ else
52
+ @pubid = stream[3]
53
+ @ref = stream[4]
54
+ end
55
+ else
56
+ @value = stream[2]
57
+ end
58
+ else
59
+ @reference = reference
60
+ @external = nil
61
+ @name = stream
62
+ @value = value
63
+ end
64
+ end
65
65
 
66
- # Evaluates whether the given string matchs an entity definition,
67
- # returning true if so, and false otherwise.
68
- def Entity::matches? string
69
- (ENTITYDECL =~ string) == 0
70
- end
66
+ # Evaluates whether the given string matchs an entity definition,
67
+ # returning true if so, and false otherwise.
68
+ def Entity::matches? string
69
+ (ENTITYDECL =~ string) == 0
70
+ end
71
71
 
72
- # Evaluates to the unnormalized value of this entity; that is, replacing
73
- # all entities -- both %ent; and &ent; entities. This differs from
74
- # +value()+ in that +value+ only replaces %ent; entities.
75
- def unnormalized
76
- document.record_entity_expansion unless document.nil?
77
- v = value()
78
- return nil if v.nil?
79
- @unnormalized = Text::unnormalize(v, parent)
80
- @unnormalized
81
- end
72
+ # Evaluates to the unnormalized value of this entity; that is, replacing
73
+ # all entities -- both %ent; and &ent; entities. This differs from
74
+ # +value()+ in that +value+ only replaces %ent; entities.
75
+ def unnormalized
76
+ document.record_entity_expansion unless document.nil?
77
+ v = value()
78
+ return nil if v.nil?
79
+ @unnormalized = Text::unnormalize(v, parent)
80
+ @unnormalized
81
+ end
82
82
 
83
- #once :unnormalized
83
+ #once :unnormalized
84
84
 
85
- # Returns the value of this entity unprocessed -- raw. This is the
86
- # normalized value; that is, with all %ent; and &ent; entities intact
87
- def normalized
88
- @value
89
- end
85
+ # Returns the value of this entity unprocessed -- raw. This is the
86
+ # normalized value; that is, with all %ent; and &ent; entities intact
87
+ def normalized
88
+ @value
89
+ end
90
90
 
91
- # Write out a fully formed, correct entity definition (assuming the Entity
92
- # object itself is valid.)
91
+ # Write out a fully formed, correct entity definition (assuming the Entity
92
+ # object itself is valid.)
93
93
  #
94
94
  # out::
95
95
  # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
96
96
  # output
97
97
  # indent::
98
98
  # *DEPRECATED* and ignored
99
- def write out, indent=-1
100
- out << '<!ENTITY '
101
- out << '% ' if @reference
102
- out << @name
103
- out << ' '
104
- if @external
105
- out << @external << ' '
106
- if @pubid
107
- q = @pubid.include?('"')?"'":'"'
108
- out << q << @pubid << q << ' '
109
- end
110
- q = @ref.include?('"')?"'":'"'
111
- out << q << @ref << q
112
- out << ' NDATA ' << @ndata if @ndata
113
- else
114
- q = @value.include?('"')?"'":'"'
115
- out << q << @value << q
116
- end
117
- out << '>'
118
- end
99
+ def write out, indent=-1
100
+ out << '<!ENTITY '
101
+ out << '% ' if @reference
102
+ out << @name
103
+ out << ' '
104
+ if @external
105
+ out << @external << ' '
106
+ if @pubid
107
+ q = @pubid.include?('"')?"'":'"'
108
+ out << q << @pubid << q << ' '
109
+ end
110
+ q = @ref.include?('"')?"'":'"'
111
+ out << q << @ref << q
112
+ out << ' NDATA ' << @ndata if @ndata
113
+ else
114
+ q = @value.include?('"')?"'":'"'
115
+ out << q << @value << q
116
+ end
117
+ out << '>'
118
+ end
119
119
 
120
- # Returns this entity as a string. See write().
121
- def to_s
122
- rv = ''
123
- write rv
124
- rv
125
- end
120
+ # Returns this entity as a string. See write().
121
+ def to_s
122
+ rv = ''
123
+ write rv
124
+ rv
125
+ end
126
126
 
127
- PEREFERENCE_RE = /#{PEREFERENCE}/um
128
- # Returns the value of this entity. At the moment, only internal entities
129
- # are processed. If the value contains internal references (IE,
130
- # %blah;), those are replaced with their values. IE, if the doctype
131
- # contains:
132
- # <!ENTITY % foo "bar">
133
- # <!ENTITY yada "nanoo %foo; nanoo>
134
- # then:
135
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
136
- def value
137
- if @value
138
- matches = @value.scan(PEREFERENCE_RE)
139
- rv = @value.clone
140
- if @parent
141
- matches.each do |entity_reference|
142
- entity_value = @parent.entity( entity_reference[0] )
143
- rv.gsub!( /%#{entity_reference};/um, entity_value )
144
- end
145
- end
146
- return rv
147
- end
148
- nil
149
- end
150
- end
127
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
128
+ # Returns the value of this entity. At the moment, only internal entities
129
+ # are processed. If the value contains internal references (IE,
130
+ # %blah;), those are replaced with their values. IE, if the doctype
131
+ # contains:
132
+ # <!ENTITY % foo "bar">
133
+ # <!ENTITY yada "nanoo %foo; nanoo>
134
+ # then:
135
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
136
+ def value
137
+ if @value
138
+ matches = @value.scan(PEREFERENCE_RE)
139
+ rv = @value.clone
140
+ if @parent
141
+ matches.each do |entity_reference|
142
+ entity_value = @parent.entity( entity_reference[0] )
143
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
144
+ end
145
+ end
146
+ return rv
147
+ end
148
+ nil
149
+ end
150
+ end
151
151
 
152
- # This is a set of entity constants -- the ones defined in the XML
153
- # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
154
- module EntityConst
155
- # +>+
156
- GT = Entity.new( 'gt', '>' )
157
- # +<+
158
- LT = Entity.new( 'lt', '<' )
159
- # +&+
160
- AMP = Entity.new( 'amp', '&' )
161
- # +"+
162
- QUOT = Entity.new( 'quot', '"' )
163
- # +'+
164
- APOS = Entity.new( 'apos', "'" )
165
- end
152
+ # This is a set of entity constants -- the ones defined in the XML
153
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
154
+ module EntityConst
155
+ # +>+
156
+ GT = Entity.new( 'gt', '>' )
157
+ # +<+
158
+ LT = Entity.new( 'lt', '<' )
159
+ # +&+
160
+ AMP = Entity.new( 'amp', '&' )
161
+ # +"+
162
+ QUOT = Entity.new( 'quot', '"' )
163
+ # +'+
164
+ APOS = Entity.new( 'apos', "'" )
165
+ end
166
166
  end
@@ -21,8 +21,8 @@ module REXML
21
21
  def write( node, output )
22
22
  case node
23
23
 
24
- when Document
25
- if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
24
+ when Document
25
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
26
26
  output = Output.new( output, node.xml_decl.encoding )
27
27
  end
28
28
  write_document( node, output )
@@ -63,14 +63,16 @@ module REXML
63
63
  def write_element( node, output )
64
64
  output << "<#{node.expanded_name}"
65
65
 
66
- node.attributes.each_attribute do |attr|
66
+ node.attributes.to_a.map { |a|
67
+ Hash === a ? a.values : a
68
+ }.flatten.sort_by {|attr| attr.name}.each do |attr|
67
69
  output << " "
68
70
  attr.write( output )
69
71
  end unless node.attributes.empty?
70
72
 
71
73
  if node.children.empty?
72
74
  output << " " if @ie_hack
73
- output << "/"
75
+ output << "/"
74
76
  else
75
77
  output << ">"
76
78
  node.children.each { |child|
@@ -24,13 +24,14 @@ module REXML
24
24
  # is undefined. Defaults to 2.
25
25
  # ie_hack::
26
26
  # If true, the printer will insert whitespace before closing empty
27
- # tags, thereby allowing Internet Explorer's feeble XML parser to
27
+ # tags, thereby allowing Internet Explorer's XML parser to
28
28
  # function. Defaults to false.
29
29
  def initialize( indentation=2, ie_hack=false )
30
30
  @indentation = indentation
31
31
  @level = 0
32
32
  @ie_hack = ie_hack
33
33
  @width = 80
34
+ @compact = false
34
35
  end
35
36
 
36
37
  protected
@@ -47,7 +48,7 @@ module REXML
47
48
  if @ie_hack
48
49
  output << " "
49
50
  end
50
- output << "/"
51
+ output << "/"
51
52
  else
52
53
  output << ">"
53
54
  # If compact and all children are text, and if the formatted output
@@ -87,7 +88,7 @@ module REXML
87
88
  s = node.to_s()
88
89
  s.gsub!(/\s/,' ')
89
90
  s.squeeze!(" ")
90
- s = wrap(s, 80-@level)
91
+ s = wrap(s, @width - @level)
91
92
  s = indent_text(s, @level, " ", true)
92
93
  output << (' '*@level + s)
93
94
  end
@@ -125,11 +126,13 @@ module REXML
125
126
  end
126
127
 
127
128
  def wrap(string, width)
128
- # Recursively wrap string at width.
129
- return string if string.length <= width
130
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
131
- return string if place.nil?
132
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
129
+ parts = []
130
+ while string.length > width and place = string.rindex(' ', width)
131
+ parts << string[0...place]
132
+ string = string[place+1..-1]
133
+ end
134
+ parts << string
135
+ parts.join("\n")
133
136
  end
134
137
 
135
138
  end