rubysl-rexml 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/lib/rexml/attlistdecl.rb +56 -56
- data/lib/rexml/attribute.rb +155 -149
- data/lib/rexml/cdata.rb +48 -48
- data/lib/rexml/child.rb +82 -82
- data/lib/rexml/comment.rb +59 -59
- data/lib/rexml/doctype.rb +22 -24
- data/lib/rexml/document.rb +185 -129
- data/lib/rexml/dtd/attlistdecl.rb +7 -7
- data/lib/rexml/dtd/dtd.rb +41 -41
- data/lib/rexml/dtd/elementdecl.rb +13 -13
- data/lib/rexml/dtd/entitydecl.rb +49 -49
- data/lib/rexml/dtd/notationdecl.rb +32 -32
- data/lib/rexml/element.rb +122 -107
- data/lib/rexml/encoding.rb +37 -58
- data/lib/rexml/entity.rb +144 -144
- data/lib/rexml/formatters/default.rb +6 -4
- data/lib/rexml/formatters/pretty.rb +11 -8
- data/lib/rexml/formatters/transitive.rb +4 -3
- data/lib/rexml/functions.rb +33 -21
- data/lib/rexml/instruction.rb +49 -49
- data/lib/rexml/light/node.rb +190 -191
- data/lib/rexml/namespace.rb +39 -39
- data/lib/rexml/node.rb +38 -38
- data/lib/rexml/output.rb +17 -12
- data/lib/rexml/parent.rb +26 -25
- data/lib/rexml/parseexception.rb +4 -4
- data/lib/rexml/parsers/baseparser.rb +90 -61
- data/lib/rexml/parsers/lightparser.rb +41 -43
- data/lib/rexml/parsers/pullparser.rb +1 -1
- data/lib/rexml/parsers/sax2parser.rb +233 -198
- data/lib/rexml/parsers/streamparser.rb +6 -2
- data/lib/rexml/parsers/treeparser.rb +9 -6
- data/lib/rexml/parsers/ultralightparser.rb +40 -40
- data/lib/rexml/parsers/xpathparser.rb +51 -52
- data/lib/rexml/quickpath.rb +247 -248
- data/lib/rexml/rexml.rb +9 -10
- data/lib/rexml/sax2listener.rb +92 -92
- data/lib/rexml/security.rb +27 -0
- data/lib/rexml/source.rb +95 -50
- data/lib/rexml/streamlistener.rb +90 -90
- data/lib/rexml/syncenumerator.rb +3 -4
- data/lib/rexml/text.rb +157 -76
- data/lib/rexml/validation/relaxng.rb +18 -18
- data/lib/rexml/validation/validation.rb +5 -5
- data/lib/rexml/xmldecl.rb +59 -63
- data/lib/rexml/xmltokens.rb +14 -14
- data/lib/rexml/xpath.rb +67 -53
- data/lib/rexml/xpath_parser.rb +49 -38
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +1 -1
- data/rubysl-rexml.gemspec +3 -1
- metadata +19 -28
- data/lib/rexml/encodings/CP-1252.rb +0 -103
- data/lib/rexml/encodings/EUC-JP.rb +0 -35
- data/lib/rexml/encodings/ICONV.rb +0 -22
- data/lib/rexml/encodings/ISO-8859-1.rb +0 -7
- data/lib/rexml/encodings/ISO-8859-15.rb +0 -72
- data/lib/rexml/encodings/SHIFT-JIS.rb +0 -37
- data/lib/rexml/encodings/SHIFT_JIS.rb +0 -1
- data/lib/rexml/encodings/UNILE.rb +0 -34
- data/lib/rexml/encodings/US-ASCII.rb +0 -30
- data/lib/rexml/encodings/UTF-16.rb +0 -35
- data/lib/rexml/encodings/UTF-8.rb +0 -18
data/lib/rexml/encoding.rb
CHANGED
@@ -1,71 +1,50 @@
|
|
1
|
-
#
|
1
|
+
# coding: US-ASCII
|
2
2
|
module REXML
|
3
3
|
module Encoding
|
4
|
-
@encoding_methods = {}
|
5
|
-
def self.register(enc, &block)
|
6
|
-
@encoding_methods[enc] = block
|
7
|
-
end
|
8
|
-
def self.apply(obj, enc)
|
9
|
-
@encoding_methods[enc][obj]
|
10
|
-
end
|
11
|
-
def self.encoding_method(enc)
|
12
|
-
@encoding_methods[enc]
|
13
|
-
end
|
14
|
-
|
15
|
-
# Native, default format is UTF-8, so it is declared here rather than in
|
16
|
-
# an encodings/ definition.
|
17
|
-
UTF_8 = 'UTF-8'
|
18
|
-
UTF_16 = 'UTF-16'
|
19
|
-
UNILE = 'UNILE'
|
20
|
-
|
21
4
|
# ID ---> Encoding name
|
22
5
|
attr_reader :encoding
|
23
|
-
def encoding=(
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@encoding = enc
|
31
|
-
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
32
|
-
@encoding.untaint
|
33
|
-
begin
|
34
|
-
require 'rexml/encodings/ICONV.rb'
|
35
|
-
Encoding.apply(self, "ICONV")
|
36
|
-
rescue LoadError, Exception
|
37
|
-
begin
|
38
|
-
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
39
|
-
require enc_file
|
40
|
-
Encoding.apply(self, @encoding)
|
41
|
-
rescue LoadError => err
|
42
|
-
puts err.message
|
43
|
-
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
44
|
-
end
|
45
|
-
end
|
46
|
-
else
|
47
|
-
@encoding = UTF_8
|
48
|
-
require 'rexml/encodings/UTF-8.rb'
|
49
|
-
Encoding.apply(self, @encoding)
|
6
|
+
def encoding=(encoding)
|
7
|
+
encoding = encoding.name if encoding.is_a?(Encoding)
|
8
|
+
if encoding.is_a?(String)
|
9
|
+
original_encoding = encoding
|
10
|
+
encoding = find_encoding(encoding)
|
11
|
+
unless encoding
|
12
|
+
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
50
13
|
end
|
51
|
-
|
52
|
-
|
14
|
+
end
|
15
|
+
return false if defined?(@encoding) and encoding == @encoding
|
16
|
+
if encoding
|
17
|
+
@encoding = encoding.upcase
|
18
|
+
else
|
19
|
+
@encoding = 'UTF-8'
|
53
20
|
end
|
54
21
|
true
|
55
22
|
end
|
56
23
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
24
|
+
def encode(string)
|
25
|
+
string.encode(@encoding)
|
26
|
+
end
|
27
|
+
|
28
|
+
def decode(string)
|
29
|
+
string.encode(::Encoding::UTF_8, @encoding)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def find_encoding(name)
|
34
|
+
case name
|
35
|
+
when /\Ashift-jis\z/i
|
36
|
+
return "SHIFT_JIS"
|
37
|
+
when /\ACP-(\d+)\z/
|
38
|
+
name = "CP#{$1}"
|
39
|
+
when /\AUTF-8\z/i
|
40
|
+
return name
|
41
|
+
end
|
42
|
+
begin
|
43
|
+
::Encoding::Converter.search_convpath(name, 'UTF-8')
|
44
|
+
rescue ::Encoding::ConverterNotFoundError
|
45
|
+
return nil
|
65
46
|
end
|
66
|
-
|
67
|
-
return $3.upcase if $3
|
68
|
-
return UTF_8
|
47
|
+
name
|
69
48
|
end
|
70
49
|
end
|
71
50
|
end
|
data/lib/rexml/entity.rb
CHANGED
@@ -3,164 +3,164 @@ require 'rexml/source'
|
|
3
3
|
require 'rexml/xmltokens'
|
4
4
|
|
5
5
|
module REXML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
6
|
+
# God, I hate DTDs. I really do. Why this idiot standard still
|
7
|
+
# plagues us is beyond me.
|
8
|
+
class Entity < Child
|
9
|
+
include XMLTokens
|
10
|
+
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
11
|
+
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
12
|
+
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
13
|
+
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
14
|
+
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
15
|
+
PEREFERENCE = "%#{NAME};"
|
16
|
+
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
17
|
+
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
18
|
+
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
19
|
+
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
20
|
+
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
21
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
22
22
|
|
23
|
-
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
25
|
+
# Create a new entity. Simple entities can be constructed by passing a
|
26
|
+
# name, value to the constructor; this creates a generic, plain entity
|
27
|
+
# reference. For anything more complicated, you have to pass a Source to
|
28
|
+
# the constructor with the entity definition, or use the accessor methods.
|
29
|
+
# +WARNING+: There is no validation of entity state except when the entity
|
30
|
+
# is read from a stream. If you start poking around with the accessors,
|
31
|
+
# you can easily create a non-conformant Entity. The best thing to do is
|
32
|
+
# dump the stupid DTDs and use XMLSchema instead.
|
33
|
+
#
|
34
|
+
# e = Entity.new( 'amp', '&' )
|
35
|
+
def initialize stream, value=nil, parent=nil, reference=false
|
36
|
+
super(parent)
|
37
|
+
@ndata = @pubid = @value = @external = nil
|
38
|
+
if stream.kind_of? Array
|
39
|
+
@name = stream[1]
|
40
|
+
if stream[-1] == '%'
|
41
|
+
@reference = true
|
42
|
+
stream.pop
|
43
|
+
else
|
44
|
+
@reference = false
|
45
|
+
end
|
46
|
+
if stream[2] =~ /SYSTEM|PUBLIC/
|
47
|
+
@external = stream[2]
|
48
|
+
if @external == 'SYSTEM'
|
49
|
+
@ref = stream[3]
|
50
|
+
@ndata = stream[4] if stream.size == 5
|
51
|
+
else
|
52
|
+
@pubid = stream[3]
|
53
|
+
@ref = stream[4]
|
54
|
+
end
|
55
|
+
else
|
56
|
+
@value = stream[2]
|
57
|
+
end
|
58
|
+
else
|
59
|
+
@reference = reference
|
60
|
+
@external = nil
|
61
|
+
@name = stream
|
62
|
+
@value = value
|
63
|
+
end
|
64
|
+
end
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
# Evaluates whether the given string matchs an entity definition,
|
67
|
+
# returning true if so, and false otherwise.
|
68
|
+
def Entity::matches? string
|
69
|
+
(ENTITYDECL =~ string) == 0
|
70
|
+
end
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
72
|
+
# Evaluates to the unnormalized value of this entity; that is, replacing
|
73
|
+
# all entities -- both %ent; and &ent; entities. This differs from
|
74
|
+
# +value()+ in that +value+ only replaces %ent; entities.
|
75
|
+
def unnormalized
|
76
|
+
document.record_entity_expansion unless document.nil?
|
77
|
+
v = value()
|
78
|
+
return nil if v.nil?
|
79
|
+
@unnormalized = Text::unnormalize(v, parent)
|
80
|
+
@unnormalized
|
81
|
+
end
|
82
82
|
|
83
|
-
|
83
|
+
#once :unnormalized
|
84
84
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
# Returns the value of this entity unprocessed -- raw. This is the
|
86
|
+
# normalized value; that is, with all %ent; and &ent; entities intact
|
87
|
+
def normalized
|
88
|
+
@value
|
89
|
+
end
|
90
90
|
|
91
|
-
|
92
|
-
|
91
|
+
# Write out a fully formed, correct entity definition (assuming the Entity
|
92
|
+
# object itself is valid.)
|
93
93
|
#
|
94
94
|
# out::
|
95
95
|
# An object implementing <TT><<<TT> to which the entity will be
|
96
96
|
# output
|
97
97
|
# indent::
|
98
98
|
# *DEPRECATED* and ignored
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
99
|
+
def write out, indent=-1
|
100
|
+
out << '<!ENTITY '
|
101
|
+
out << '% ' if @reference
|
102
|
+
out << @name
|
103
|
+
out << ' '
|
104
|
+
if @external
|
105
|
+
out << @external << ' '
|
106
|
+
if @pubid
|
107
|
+
q = @pubid.include?('"')?"'":'"'
|
108
|
+
out << q << @pubid << q << ' '
|
109
|
+
end
|
110
|
+
q = @ref.include?('"')?"'":'"'
|
111
|
+
out << q << @ref << q
|
112
|
+
out << ' NDATA ' << @ndata if @ndata
|
113
|
+
else
|
114
|
+
q = @value.include?('"')?"'":'"'
|
115
|
+
out << q << @value << q
|
116
|
+
end
|
117
|
+
out << '>'
|
118
|
+
end
|
119
119
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
120
|
+
# Returns this entity as a string. See write().
|
121
|
+
def to_s
|
122
|
+
rv = ''
|
123
|
+
write rv
|
124
|
+
rv
|
125
|
+
end
|
126
126
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
127
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
128
|
+
# Returns the value of this entity. At the moment, only internal entities
|
129
|
+
# are processed. If the value contains internal references (IE,
|
130
|
+
# %blah;), those are replaced with their values. IE, if the doctype
|
131
|
+
# contains:
|
132
|
+
# <!ENTITY % foo "bar">
|
133
|
+
# <!ENTITY yada "nanoo %foo; nanoo>
|
134
|
+
# then:
|
135
|
+
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
136
|
+
def value
|
137
|
+
if @value
|
138
|
+
matches = @value.scan(PEREFERENCE_RE)
|
139
|
+
rv = @value.clone
|
140
|
+
if @parent
|
141
|
+
matches.each do |entity_reference|
|
142
|
+
entity_value = @parent.entity( entity_reference[0] )
|
143
|
+
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
144
|
+
end
|
145
|
+
end
|
146
|
+
return rv
|
147
|
+
end
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
end
|
151
151
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
152
|
+
# This is a set of entity constants -- the ones defined in the XML
|
153
|
+
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
|
154
|
+
module EntityConst
|
155
|
+
# +>+
|
156
|
+
GT = Entity.new( 'gt', '>' )
|
157
|
+
# +<+
|
158
|
+
LT = Entity.new( 'lt', '<' )
|
159
|
+
# +&+
|
160
|
+
AMP = Entity.new( 'amp', '&' )
|
161
|
+
# +"+
|
162
|
+
QUOT = Entity.new( 'quot', '"' )
|
163
|
+
# +'+
|
164
|
+
APOS = Entity.new( 'apos', "'" )
|
165
|
+
end
|
166
166
|
end
|
@@ -21,8 +21,8 @@ module REXML
|
|
21
21
|
def write( node, output )
|
22
22
|
case node
|
23
23
|
|
24
|
-
when Document
|
25
|
-
if node.xml_decl.encoding !=
|
24
|
+
when Document
|
25
|
+
if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
|
26
26
|
output = Output.new( output, node.xml_decl.encoding )
|
27
27
|
end
|
28
28
|
write_document( node, output )
|
@@ -63,14 +63,16 @@ module REXML
|
|
63
63
|
def write_element( node, output )
|
64
64
|
output << "<#{node.expanded_name}"
|
65
65
|
|
66
|
-
node.attributes.
|
66
|
+
node.attributes.to_a.map { |a|
|
67
|
+
Hash === a ? a.values : a
|
68
|
+
}.flatten.sort_by {|attr| attr.name}.each do |attr|
|
67
69
|
output << " "
|
68
70
|
attr.write( output )
|
69
71
|
end unless node.attributes.empty?
|
70
72
|
|
71
73
|
if node.children.empty?
|
72
74
|
output << " " if @ie_hack
|
73
|
-
output << "/"
|
75
|
+
output << "/"
|
74
76
|
else
|
75
77
|
output << ">"
|
76
78
|
node.children.each { |child|
|
@@ -24,13 +24,14 @@ module REXML
|
|
24
24
|
# is undefined. Defaults to 2.
|
25
25
|
# ie_hack::
|
26
26
|
# If true, the printer will insert whitespace before closing empty
|
27
|
-
# tags, thereby allowing Internet Explorer's
|
27
|
+
# tags, thereby allowing Internet Explorer's XML parser to
|
28
28
|
# function. Defaults to false.
|
29
29
|
def initialize( indentation=2, ie_hack=false )
|
30
30
|
@indentation = indentation
|
31
31
|
@level = 0
|
32
32
|
@ie_hack = ie_hack
|
33
33
|
@width = 80
|
34
|
+
@compact = false
|
34
35
|
end
|
35
36
|
|
36
37
|
protected
|
@@ -47,7 +48,7 @@ module REXML
|
|
47
48
|
if @ie_hack
|
48
49
|
output << " "
|
49
50
|
end
|
50
|
-
output << "/"
|
51
|
+
output << "/"
|
51
52
|
else
|
52
53
|
output << ">"
|
53
54
|
# If compact and all children are text, and if the formatted output
|
@@ -87,7 +88,7 @@ module REXML
|
|
87
88
|
s = node.to_s()
|
88
89
|
s.gsub!(/\s/,' ')
|
89
90
|
s.squeeze!(" ")
|
90
|
-
s = wrap(s,
|
91
|
+
s = wrap(s, @width - @level)
|
91
92
|
s = indent_text(s, @level, " ", true)
|
92
93
|
output << (' '*@level + s)
|
93
94
|
end
|
@@ -125,11 +126,13 @@ module REXML
|
|
125
126
|
end
|
126
127
|
|
127
128
|
def wrap(string, width)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
129
|
+
parts = []
|
130
|
+
while string.length > width and place = string.rindex(' ', width)
|
131
|
+
parts << string[0...place]
|
132
|
+
string = string[place+1..-1]
|
133
|
+
end
|
134
|
+
parts << string
|
135
|
+
parts.join("\n")
|
133
136
|
end
|
134
137
|
|
135
138
|
end
|