rexml 3.1.7.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +10 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +60 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +8 -0
  10. data/lib/rexml/attlistdecl.rb +63 -0
  11. data/lib/rexml/attribute.rb +192 -0
  12. data/lib/rexml/cdata.rb +68 -0
  13. data/lib/rexml/child.rb +97 -0
  14. data/lib/rexml/comment.rb +80 -0
  15. data/lib/rexml/doctype.rb +270 -0
  16. data/lib/rexml/document.rb +291 -0
  17. data/lib/rexml/dtd/attlistdecl.rb +11 -0
  18. data/lib/rexml/dtd/dtd.rb +47 -0
  19. data/lib/rexml/dtd/elementdecl.rb +18 -0
  20. data/lib/rexml/dtd/entitydecl.rb +57 -0
  21. data/lib/rexml/dtd/notationdecl.rb +40 -0
  22. data/lib/rexml/element.rb +1267 -0
  23. data/lib/rexml/encoding.rb +51 -0
  24. data/lib/rexml/entity.rb +171 -0
  25. data/lib/rexml/formatters/default.rb +112 -0
  26. data/lib/rexml/formatters/pretty.rb +142 -0
  27. data/lib/rexml/formatters/transitive.rb +58 -0
  28. data/lib/rexml/functions.rb +447 -0
  29. data/lib/rexml/instruction.rb +71 -0
  30. data/lib/rexml/light/node.rb +196 -0
  31. data/lib/rexml/namespace.rb +48 -0
  32. data/lib/rexml/node.rb +76 -0
  33. data/lib/rexml/output.rb +30 -0
  34. data/lib/rexml/parent.rb +166 -0
  35. data/lib/rexml/parseexception.rb +52 -0
  36. data/lib/rexml/parsers/baseparser.rb +586 -0
  37. data/lib/rexml/parsers/lightparser.rb +59 -0
  38. data/lib/rexml/parsers/pullparser.rb +197 -0
  39. data/lib/rexml/parsers/sax2parser.rb +273 -0
  40. data/lib/rexml/parsers/streamparser.rb +61 -0
  41. data/lib/rexml/parsers/treeparser.rb +101 -0
  42. data/lib/rexml/parsers/ultralightparser.rb +57 -0
  43. data/lib/rexml/parsers/xpathparser.rb +675 -0
  44. data/lib/rexml/quickpath.rb +266 -0
  45. data/lib/rexml/rexml.rb +32 -0
  46. data/lib/rexml/sax2listener.rb +98 -0
  47. data/lib/rexml/security.rb +28 -0
  48. data/lib/rexml/source.rb +298 -0
  49. data/lib/rexml/streamlistener.rb +93 -0
  50. data/lib/rexml/syncenumerator.rb +33 -0
  51. data/lib/rexml/text.rb +424 -0
  52. data/lib/rexml/undefinednamespaceexception.rb +9 -0
  53. data/lib/rexml/validation/relaxng.rb +539 -0
  54. data/lib/rexml/validation/validation.rb +144 -0
  55. data/lib/rexml/validation/validationexception.rb +10 -0
  56. data/lib/rexml/xmldecl.rb +116 -0
  57. data/lib/rexml/xmltokens.rb +85 -0
  58. data/lib/rexml/xpath.rb +81 -0
  59. data/lib/rexml/xpath_parser.rb +934 -0
  60. data/rexml.gemspec +42 -0
  61. metadata +131 -0
@@ -0,0 +1,51 @@
1
+ # coding: US-ASCII
2
+ # frozen_string_literal: false
3
+ module REXML
4
+ module Encoding
5
+ # ID ---> Encoding name
6
+ attr_reader :encoding
7
+ def encoding=(encoding)
8
+ encoding = encoding.name if encoding.is_a?(Encoding)
9
+ if encoding.is_a?(String)
10
+ original_encoding = encoding
11
+ encoding = find_encoding(encoding)
12
+ unless encoding
13
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
14
+ end
15
+ end
16
+ return false if defined?(@encoding) and encoding == @encoding
17
+ if encoding
18
+ @encoding = encoding.upcase
19
+ else
20
+ @encoding = 'UTF-8'
21
+ end
22
+ true
23
+ end
24
+
25
+ def encode(string)
26
+ string.encode(@encoding)
27
+ end
28
+
29
+ def decode(string)
30
+ string.encode(::Encoding::UTF_8, @encoding)
31
+ end
32
+
33
+ private
34
+ def find_encoding(name)
35
+ case name
36
+ when /\Ashift-jis\z/i
37
+ return "SHIFT_JIS"
38
+ when /\ACP-(\d+)\z/
39
+ name = "CP#{$1}"
40
+ when /\AUTF-8\z/i
41
+ return name
42
+ end
43
+ begin
44
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
45
+ rescue ::Encoding::ConverterNotFoundError
46
+ return nil
47
+ end
48
+ name
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: false
2
+ require_relative 'child'
3
+ require_relative 'source'
4
+ require_relative 'xmltokens'
5
+
6
+ module REXML
7
+ class Entity < Child
8
+ include XMLTokens
9
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
10
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
11
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
12
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
+ PEREFERENCE = "%#{NAME};"
15
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
18
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
19
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
+
22
+ attr_reader :name, :external, :ref, :ndata, :pubid
23
+
24
+ # Create a new entity. Simple entities can be constructed by passing a
25
+ # name, value to the constructor; this creates a generic, plain entity
26
+ # reference. For anything more complicated, you have to pass a Source to
27
+ # the constructor with the entity definition, or use the accessor methods.
28
+ # +WARNING+: There is no validation of entity state except when the entity
29
+ # is read from a stream. If you start poking around with the accessors,
30
+ # you can easily create a non-conformant Entity.
31
+ #
32
+ # e = Entity.new( 'amp', '&' )
33
+ def initialize stream, value=nil, parent=nil, reference=false
34
+ super(parent)
35
+ @ndata = @pubid = @value = @external = nil
36
+ if stream.kind_of? Array
37
+ @name = stream[1]
38
+ if stream[-1] == '%'
39
+ @reference = true
40
+ stream.pop
41
+ else
42
+ @reference = false
43
+ end
44
+ if stream[2] =~ /SYSTEM|PUBLIC/
45
+ @external = stream[2]
46
+ if @external == 'SYSTEM'
47
+ @ref = stream[3]
48
+ @ndata = stream[4] if stream.size == 5
49
+ else
50
+ @pubid = stream[3]
51
+ @ref = stream[4]
52
+ end
53
+ else
54
+ @value = stream[2]
55
+ end
56
+ else
57
+ @reference = reference
58
+ @external = nil
59
+ @name = stream
60
+ @value = value
61
+ end
62
+ end
63
+
64
+ # Evaluates whether the given string matches an entity definition,
65
+ # returning true if so, and false otherwise.
66
+ def Entity::matches? string
67
+ (ENTITYDECL =~ string) == 0
68
+ end
69
+
70
+ # Evaluates to the unnormalized value of this entity; that is, replacing
71
+ # all entities -- both %ent; and &ent; entities. This differs from
72
+ # +value()+ in that +value+ only replaces %ent; entities.
73
+ def unnormalized
74
+ document.record_entity_expansion unless document.nil?
75
+ v = value()
76
+ return nil if v.nil?
77
+ @unnormalized = Text::unnormalize(v, parent)
78
+ @unnormalized
79
+ end
80
+
81
+ #once :unnormalized
82
+
83
+ # Returns the value of this entity unprocessed -- raw. This is the
84
+ # normalized value; that is, with all %ent; and &ent; entities intact
85
+ def normalized
86
+ @value
87
+ end
88
+
89
+ # Write out a fully formed, correct entity definition (assuming the Entity
90
+ # object itself is valid.)
91
+ #
92
+ # out::
93
+ # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
94
+ # output
95
+ # indent::
96
+ # *DEPRECATED* and ignored
97
+ def write out, indent=-1
98
+ out << '<!ENTITY '
99
+ out << '% ' if @reference
100
+ out << @name
101
+ out << ' '
102
+ if @external
103
+ out << @external << ' '
104
+ if @pubid
105
+ q = @pubid.include?('"')?"'":'"'
106
+ out << q << @pubid << q << ' '
107
+ end
108
+ q = @ref.include?('"')?"'":'"'
109
+ out << q << @ref << q
110
+ out << ' NDATA ' << @ndata if @ndata
111
+ else
112
+ q = @value.include?('"')?"'":'"'
113
+ out << q << @value << q
114
+ end
115
+ out << '>'
116
+ end
117
+
118
+ # Returns this entity as a string. See write().
119
+ def to_s
120
+ rv = ''
121
+ write rv
122
+ rv
123
+ end
124
+
125
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
126
+ # Returns the value of this entity. At the moment, only internal entities
127
+ # are processed. If the value contains internal references (IE,
128
+ # %blah;), those are replaced with their values. IE, if the doctype
129
+ # contains:
130
+ # <!ENTITY % foo "bar">
131
+ # <!ENTITY yada "nanoo %foo; nanoo>
132
+ # then:
133
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
+ def value
135
+ if @value
136
+ matches = @value.scan(PEREFERENCE_RE)
137
+ rv = @value.clone
138
+ if @parent
139
+ sum = 0
140
+ matches.each do |entity_reference|
141
+ entity_value = @parent.entity( entity_reference[0] )
142
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
143
+ raise "entity expansion has grown too large"
144
+ else
145
+ sum += entity_value.bytesize
146
+ end
147
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
148
+ end
149
+ end
150
+ return rv
151
+ end
152
+ nil
153
+ end
154
+ end
155
+
156
+ # This is a set of entity constants -- the ones defined in the XML
157
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
158
+ # CAUTION: these entities does not have parent and document
159
+ module EntityConst
160
+ # +>+
161
+ GT = Entity.new( 'gt', '>' )
162
+ # +<+
163
+ LT = Entity.new( 'lt', '<' )
164
+ # +&+
165
+ AMP = Entity.new( 'amp', '&' )
166
+ # +"+
167
+ QUOT = Entity.new( 'quot', '"' )
168
+ # +'+
169
+ APOS = Entity.new( 'apos', "'" )
170
+ end
171
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: false
2
+ module REXML
3
+ module Formatters
4
+ class Default
5
+ # Prints out the XML document with no formatting -- except if ie_hack is
6
+ # set.
7
+ #
8
+ # ie_hack::
9
+ # If set to true, then inserts whitespace before the close of an empty
10
+ # tag, so that IE's bad XML parser doesn't choke.
11
+ def initialize( ie_hack=false )
12
+ @ie_hack = ie_hack
13
+ end
14
+
15
+ # Writes the node to some output.
16
+ #
17
+ # node::
18
+ # The node to write
19
+ # output::
20
+ # A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
21
+ # change the output encoding.
22
+ def write( node, output )
23
+ case node
24
+
25
+ when Document
26
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
27
+ output = Output.new( output, node.xml_decl.encoding )
28
+ end
29
+ write_document( node, output )
30
+
31
+ when Element
32
+ write_element( node, output )
33
+
34
+ when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
35
+ Attribute, AttlistDecl
36
+ node.write( output,-1 )
37
+
38
+ when Instruction
39
+ write_instruction( node, output )
40
+
41
+ when DocType, XMLDecl
42
+ node.write( output )
43
+
44
+ when Comment
45
+ write_comment( node, output )
46
+
47
+ when CData
48
+ write_cdata( node, output )
49
+
50
+ when Text
51
+ write_text( node, output )
52
+
53
+ else
54
+ raise Exception.new("XML FORMATTING ERROR")
55
+
56
+ end
57
+ end
58
+
59
+ protected
60
+ def write_document( node, output )
61
+ node.children.each { |child| write( child, output ) }
62
+ end
63
+
64
+ def write_element( node, output )
65
+ output << "<#{node.expanded_name}"
66
+
67
+ node.attributes.to_a.map { |a|
68
+ Hash === a ? a.values : a
69
+ }.flatten.sort_by {|attr| attr.name || '' }.each do |attr|
70
+ output << " "
71
+ attr.write( output )
72
+ end unless node.attributes.empty?
73
+
74
+ if node.children.empty?
75
+ output << " " if @ie_hack
76
+ output << "/"
77
+ else
78
+ output << ">"
79
+ node.children.each { |child|
80
+ write( child, output )
81
+ }
82
+ output << "</#{node.expanded_name}"
83
+ end
84
+ output << ">"
85
+ end
86
+
87
+ def write_text( node, output )
88
+ output << node.to_s()
89
+ end
90
+
91
+ def write_comment( node, output )
92
+ output << Comment::START
93
+ output << node.to_s
94
+ output << Comment::STOP
95
+ end
96
+
97
+ def write_cdata( node, output )
98
+ output << CData::START
99
+ output << node.to_s
100
+ output << CData::STOP
101
+ end
102
+
103
+ def write_instruction( node, output )
104
+ output << Instruction::START.sub(/\\/u, '')
105
+ output << (node.target || '')
106
+ output << ' '
107
+ output << (node.content || '')
108
+ output << Instruction::STOP.sub(/\\/u, '')
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: false
2
+ require_relative 'default'
3
+
4
+ module REXML
5
+ module Formatters
6
+ # Pretty-prints an XML document. This destroys whitespace in text nodes
7
+ # and will insert carriage returns and indentations.
8
+ #
9
+ # TODO: Add an option to print attributes on new lines
10
+ class Pretty < Default
11
+
12
+ # If compact is set to true, then the formatter will attempt to use as
13
+ # little space as possible
14
+ attr_accessor :compact
15
+ # The width of a page. Used for formatting text
16
+ attr_accessor :width
17
+
18
+ # Create a new pretty printer.
19
+ #
20
+ # output::
21
+ # An object implementing '<<(String)', to which the output will be written.
22
+ # indentation::
23
+ # An integer greater than 0. The indentation of each level will be
24
+ # this number of spaces. If this is < 1, the behavior of this object
25
+ # is undefined. Defaults to 2.
26
+ # ie_hack::
27
+ # If true, the printer will insert whitespace before closing empty
28
+ # tags, thereby allowing Internet Explorer's XML parser to
29
+ # function. Defaults to false.
30
+ def initialize( indentation=2, ie_hack=false )
31
+ @indentation = indentation
32
+ @level = 0
33
+ @ie_hack = ie_hack
34
+ @width = 80
35
+ @compact = false
36
+ end
37
+
38
+ protected
39
+ def write_element(node, output)
40
+ output << ' '*@level
41
+ output << "<#{node.expanded_name}"
42
+
43
+ node.attributes.each_attribute do |attr|
44
+ output << " "
45
+ attr.write( output )
46
+ end unless node.attributes.empty?
47
+
48
+ if node.children.empty?
49
+ if @ie_hack
50
+ output << " "
51
+ end
52
+ output << "/"
53
+ else
54
+ output << ">"
55
+ # If compact and all children are text, and if the formatted output
56
+ # is less than the specified width, then try to print everything on
57
+ # one line
58
+ skip = false
59
+ if compact
60
+ if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
61
+ string = ""
62
+ old_level = @level
63
+ @level = 0
64
+ node.children.each { |child| write( child, string ) }
65
+ @level = old_level
66
+ if string.length < @width
67
+ output << string
68
+ skip = true
69
+ end
70
+ end
71
+ end
72
+ unless skip
73
+ output << "\n"
74
+ @level += @indentation
75
+ node.children.each { |child|
76
+ next if child.kind_of?(Text) and child.to_s.strip.length == 0
77
+ write( child, output )
78
+ output << "\n"
79
+ }
80
+ @level -= @indentation
81
+ output << ' '*@level
82
+ end
83
+ output << "</#{node.expanded_name}"
84
+ end
85
+ output << ">"
86
+ end
87
+
88
+ def write_text( node, output )
89
+ s = node.to_s()
90
+ s.gsub!(/\s/,' ')
91
+ s.squeeze!(" ")
92
+ s = wrap(s, @width - @level)
93
+ s = indent_text(s, @level, " ", true)
94
+ output << (' '*@level + s)
95
+ end
96
+
97
+ def write_comment( node, output)
98
+ output << ' ' * @level
99
+ super
100
+ end
101
+
102
+ def write_cdata( node, output)
103
+ output << ' ' * @level
104
+ super
105
+ end
106
+
107
+ def write_document( node, output )
108
+ # Ok, this is a bit odd. All XML documents have an XML declaration,
109
+ # but it may not write itself if the user didn't specifically add it,
110
+ # either through the API or in the input document. If it doesn't write
111
+ # itself, then we don't need a carriage return... which makes this
112
+ # logic more complex.
113
+ node.children.each { |child|
114
+ next if child == node.children[-1] and child.instance_of?(Text)
115
+ unless child == node.children[0] or child.instance_of?(Text) or
116
+ (child == node.children[1] and !node.children[0].writethis)
117
+ output << "\n"
118
+ end
119
+ write( child, output )
120
+ }
121
+ end
122
+
123
+ private
124
+ def indent_text(string, level=1, style="\t", indentfirstline=true)
125
+ return string if level < 0
126
+ string.gsub(/\n/, "\n#{style*level}")
127
+ end
128
+
129
+ def wrap(string, width)
130
+ parts = []
131
+ while string.length > width and place = string.rindex(' ', width)
132
+ parts << string[0...place]
133
+ string = string[place+1..-1]
134
+ end
135
+ parts << string
136
+ parts.join("\n")
137
+ end
138
+
139
+ end
140
+ end
141
+ end
142
+