rubysl-rexml 1.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/encoding.rb
CHANGED
@@ -1,71 +1,50 @@
|
|
1
|
-
#
|
1
|
+
# coding: US-ASCII
|
2
2
|
module REXML
|
3
3
|
module Encoding
|
4
|
-
@encoding_methods = {}
|
5
|
-
def self.register(enc, &block)
|
6
|
-
@encoding_methods[enc] = block
|
7
|
-
end
|
8
|
-
def self.apply(obj, enc)
|
9
|
-
@encoding_methods[enc][obj]
|
10
|
-
end
|
11
|
-
def self.encoding_method(enc)
|
12
|
-
@encoding_methods[enc]
|
13
|
-
end
|
14
|
-
|
15
|
-
# Native, default format is UTF-8, so it is declared here rather than in
|
16
|
-
# an encodings/ definition.
|
17
|
-
UTF_8 = 'UTF-8'
|
18
|
-
UTF_16 = 'UTF-16'
|
19
|
-
UNILE = 'UNILE'
|
20
|
-
|
21
4
|
# ID ---> Encoding name
|
22
5
|
attr_reader :encoding
|
23
|
-
def encoding=(
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@encoding = enc
|
31
|
-
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
32
|
-
@encoding.untaint
|
33
|
-
begin
|
34
|
-
require 'rexml/encodings/ICONV.rb'
|
35
|
-
Encoding.apply(self, "ICONV")
|
36
|
-
rescue LoadError, Exception
|
37
|
-
begin
|
38
|
-
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
39
|
-
require enc_file
|
40
|
-
Encoding.apply(self, @encoding)
|
41
|
-
rescue LoadError => err
|
42
|
-
puts err.message
|
43
|
-
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
44
|
-
end
|
45
|
-
end
|
46
|
-
else
|
47
|
-
@encoding = UTF_8
|
48
|
-
require 'rexml/encodings/UTF-8.rb'
|
49
|
-
Encoding.apply(self, @encoding)
|
6
|
+
def encoding=(encoding)
|
7
|
+
encoding = encoding.name if encoding.is_a?(Encoding)
|
8
|
+
if encoding.is_a?(String)
|
9
|
+
original_encoding = encoding
|
10
|
+
encoding = find_encoding(encoding)
|
11
|
+
unless encoding
|
12
|
+
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
50
13
|
end
|
51
|
-
|
52
|
-
|
14
|
+
end
|
15
|
+
return false if defined?(@encoding) and encoding == @encoding
|
16
|
+
if encoding
|
17
|
+
@encoding = encoding.upcase
|
18
|
+
else
|
19
|
+
@encoding = 'UTF-8'
|
53
20
|
end
|
54
21
|
true
|
55
22
|
end
|
56
23
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
24
|
+
def encode(string)
|
25
|
+
string.encode(@encoding)
|
26
|
+
end
|
27
|
+
|
28
|
+
def decode(string)
|
29
|
+
string.encode(::Encoding::UTF_8, @encoding)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def find_encoding(name)
|
34
|
+
case name
|
35
|
+
when /\Ashift-jis\z/i
|
36
|
+
return "SHIFT_JIS"
|
37
|
+
when /\ACP-(\d+)\z/
|
38
|
+
name = "CP#{$1}"
|
39
|
+
when /\AUTF-8\z/i
|
40
|
+
return name
|
41
|
+
end
|
42
|
+
begin
|
43
|
+
::Encoding::Converter.search_convpath(name, 'UTF-8')
|
44
|
+
rescue ::Encoding::ConverterNotFoundError
|
45
|
+
return nil
|
65
46
|
end
|
66
|
-
|
67
|
-
return $3.upcase if $3
|
68
|
-
return UTF_8
|
47
|
+
name
|
69
48
|
end
|
70
49
|
end
|
71
50
|
end
|
data/lib/rexml/entity.rb
CHANGED
@@ -3,164 +3,164 @@ require 'rexml/source'
|
|
3
3
|
require 'rexml/xmltokens'
|
4
4
|
|
5
5
|
module REXML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
6
|
+
# God, I hate DTDs. I really do. Why this idiot standard still
|
7
|
+
# plagues us is beyond me.
|
8
|
+
class Entity < Child
|
9
|
+
include XMLTokens
|
10
|
+
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
11
|
+
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
12
|
+
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
13
|
+
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
14
|
+
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
15
|
+
PEREFERENCE = "%#{NAME};"
|
16
|
+
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
17
|
+
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
18
|
+
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
19
|
+
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
20
|
+
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
21
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
22
22
|
|
23
|
-
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
25
|
+
# Create a new entity. Simple entities can be constructed by passing a
|
26
|
+
# name, value to the constructor; this creates a generic, plain entity
|
27
|
+
# reference. For anything more complicated, you have to pass a Source to
|
28
|
+
# the constructor with the entity definition, or use the accessor methods.
|
29
|
+
# +WARNING+: There is no validation of entity state except when the entity
|
30
|
+
# is read from a stream. If you start poking around with the accessors,
|
31
|
+
# you can easily create a non-conformant Entity. The best thing to do is
|
32
|
+
# dump the stupid DTDs and use XMLSchema instead.
|
33
|
+
#
|
34
|
+
# e = Entity.new( 'amp', '&' )
|
35
|
+
def initialize stream, value=nil, parent=nil, reference=false
|
36
|
+
super(parent)
|
37
|
+
@ndata = @pubid = @value = @external = nil
|
38
|
+
if stream.kind_of? Array
|
39
|
+
@name = stream[1]
|
40
|
+
if stream[-1] == '%'
|
41
|
+
@reference = true
|
42
|
+
stream.pop
|
43
|
+
else
|
44
|
+
@reference = false
|
45
|
+
end
|
46
|
+
if stream[2] =~ /SYSTEM|PUBLIC/
|
47
|
+
@external = stream[2]
|
48
|
+
if @external == 'SYSTEM'
|
49
|
+
@ref = stream[3]
|
50
|
+
@ndata = stream[4] if stream.size == 5
|
51
|
+
else
|
52
|
+
@pubid = stream[3]
|
53
|
+
@ref = stream[4]
|
54
|
+
end
|
55
|
+
else
|
56
|
+
@value = stream[2]
|
57
|
+
end
|
58
|
+
else
|
59
|
+
@reference = reference
|
60
|
+
@external = nil
|
61
|
+
@name = stream
|
62
|
+
@value = value
|
63
|
+
end
|
64
|
+
end
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
# Evaluates whether the given string matchs an entity definition,
|
67
|
+
# returning true if so, and false otherwise.
|
68
|
+
def Entity::matches? string
|
69
|
+
(ENTITYDECL =~ string) == 0
|
70
|
+
end
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
72
|
+
# Evaluates to the unnormalized value of this entity; that is, replacing
|
73
|
+
# all entities -- both %ent; and &ent; entities. This differs from
|
74
|
+
# +value()+ in that +value+ only replaces %ent; entities.
|
75
|
+
def unnormalized
|
76
|
+
document.record_entity_expansion unless document.nil?
|
77
|
+
v = value()
|
78
|
+
return nil if v.nil?
|
79
|
+
@unnormalized = Text::unnormalize(v, parent)
|
80
|
+
@unnormalized
|
81
|
+
end
|
82
82
|
|
83
|
-
|
83
|
+
#once :unnormalized
|
84
84
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
# Returns the value of this entity unprocessed -- raw. This is the
|
86
|
+
# normalized value; that is, with all %ent; and &ent; entities intact
|
87
|
+
def normalized
|
88
|
+
@value
|
89
|
+
end
|
90
90
|
|
91
|
-
|
92
|
-
|
91
|
+
# Write out a fully formed, correct entity definition (assuming the Entity
|
92
|
+
# object itself is valid.)
|
93
93
|
#
|
94
94
|
# out::
|
95
95
|
# An object implementing <TT><<<TT> to which the entity will be
|
96
96
|
# output
|
97
97
|
# indent::
|
98
98
|
# *DEPRECATED* and ignored
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
99
|
+
def write out, indent=-1
|
100
|
+
out << '<!ENTITY '
|
101
|
+
out << '% ' if @reference
|
102
|
+
out << @name
|
103
|
+
out << ' '
|
104
|
+
if @external
|
105
|
+
out << @external << ' '
|
106
|
+
if @pubid
|
107
|
+
q = @pubid.include?('"')?"'":'"'
|
108
|
+
out << q << @pubid << q << ' '
|
109
|
+
end
|
110
|
+
q = @ref.include?('"')?"'":'"'
|
111
|
+
out << q << @ref << q
|
112
|
+
out << ' NDATA ' << @ndata if @ndata
|
113
|
+
else
|
114
|
+
q = @value.include?('"')?"'":'"'
|
115
|
+
out << q << @value << q
|
116
|
+
end
|
117
|
+
out << '>'
|
118
|
+
end
|
119
119
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
120
|
+
# Returns this entity as a string. See write().
|
121
|
+
def to_s
|
122
|
+
rv = ''
|
123
|
+
write rv
|
124
|
+
rv
|
125
|
+
end
|
126
126
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
127
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
128
|
+
# Returns the value of this entity. At the moment, only internal entities
|
129
|
+
# are processed. If the value contains internal references (IE,
|
130
|
+
# %blah;), those are replaced with their values. IE, if the doctype
|
131
|
+
# contains:
|
132
|
+
# <!ENTITY % foo "bar">
|
133
|
+
# <!ENTITY yada "nanoo %foo; nanoo>
|
134
|
+
# then:
|
135
|
+
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
136
|
+
def value
|
137
|
+
if @value
|
138
|
+
matches = @value.scan(PEREFERENCE_RE)
|
139
|
+
rv = @value.clone
|
140
|
+
if @parent
|
141
|
+
matches.each do |entity_reference|
|
142
|
+
entity_value = @parent.entity( entity_reference[0] )
|
143
|
+
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
144
|
+
end
|
145
|
+
end
|
146
|
+
return rv
|
147
|
+
end
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
end
|
151
151
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
152
|
+
# This is a set of entity constants -- the ones defined in the XML
|
153
|
+
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
|
154
|
+
module EntityConst
|
155
|
+
# +>+
|
156
|
+
GT = Entity.new( 'gt', '>' )
|
157
|
+
# +<+
|
158
|
+
LT = Entity.new( 'lt', '<' )
|
159
|
+
# +&+
|
160
|
+
AMP = Entity.new( 'amp', '&' )
|
161
|
+
# +"+
|
162
|
+
QUOT = Entity.new( 'quot', '"' )
|
163
|
+
# +'+
|
164
|
+
APOS = Entity.new( 'apos', "'" )
|
165
|
+
end
|
166
166
|
end
|
@@ -21,8 +21,8 @@ module REXML
|
|
21
21
|
def write( node, output )
|
22
22
|
case node
|
23
23
|
|
24
|
-
when Document
|
25
|
-
if node.xml_decl.encoding !=
|
24
|
+
when Document
|
25
|
+
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
|
26
26
|
output = Output.new( output, node.xml_decl.encoding )
|
27
27
|
end
|
28
28
|
write_document( node, output )
|
@@ -63,14 +63,16 @@ module REXML
|
|
63
63
|
def write_element( node, output )
|
64
64
|
output << "<#{node.expanded_name}"
|
65
65
|
|
66
|
-
node.attributes.
|
66
|
+
node.attributes.to_a.map { |a|
|
67
|
+
Hash === a ? a.values : a
|
68
|
+
}.flatten.sort_by {|attr| attr.name}.each do |attr|
|
67
69
|
output << " "
|
68
70
|
attr.write( output )
|
69
71
|
end unless node.attributes.empty?
|
70
72
|
|
71
73
|
if node.children.empty?
|
72
74
|
output << " " if @ie_hack
|
73
|
-
output << "/"
|
75
|
+
output << "/"
|
74
76
|
else
|
75
77
|
output << ">"
|
76
78
|
node.children.each { |child|
|
@@ -24,13 +24,14 @@ module REXML
|
|
24
24
|
# is undefined. Defaults to 2.
|
25
25
|
# ie_hack::
|
26
26
|
# If true, the printer will insert whitespace before closing empty
|
27
|
-
# tags, thereby allowing Internet Explorer's
|
27
|
+
# tags, thereby allowing Internet Explorer's XML parser to
|
28
28
|
# function. Defaults to false.
|
29
29
|
def initialize( indentation=2, ie_hack=false )
|
30
30
|
@indentation = indentation
|
31
31
|
@level = 0
|
32
32
|
@ie_hack = ie_hack
|
33
33
|
@width = 80
|
34
|
+
@compact = false
|
34
35
|
end
|
35
36
|
|
36
37
|
protected
|
@@ -47,7 +48,7 @@ module REXML
|
|
47
48
|
if @ie_hack
|
48
49
|
output << " "
|
49
50
|
end
|
50
|
-
output << "/"
|
51
|
+
output << "/"
|
51
52
|
else
|
52
53
|
output << ">"
|
53
54
|
# If compact and all children are text, and if the formatted output
|
@@ -87,7 +88,7 @@ module REXML
|
|
87
88
|
s = node.to_s()
|
88
89
|
s.gsub!(/\s/,' ')
|
89
90
|
s.squeeze!(" ")
|
90
|
-
s = wrap(s,
|
91
|
+
s = wrap(s, @width - @level)
|
91
92
|
s = indent_text(s, @level, " ", true)
|
92
93
|
output << (' '*@level + s)
|
93
94
|
end
|
@@ -125,11 +126,13 @@ module REXML
|
|
125
126
|
end
|
126
127
|
|
127
128
|
def wrap(string, width)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
129
|
+
parts = []
|
130
|
+
while string.length > width and place = string.rindex(' ', width)
|
131
|
+
parts << string[0...place]
|
132
|
+
string = string[place+1..-1]
|
133
|
+
end
|
134
|
+
parts << string
|
135
|
+
parts.join("\n")
|
133
136
|
end
|
134
137
|
|
135
138
|
end
|