rubysl-rexml 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
module REXML
|
2
|
+
# A template for stream parser listeners.
|
3
|
+
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
|
4
|
+
# processed; REXML doesn't yet handle doctype entity declarations, so you
|
5
|
+
# have to parse them out yourself.
|
6
|
+
module StreamListener
|
7
|
+
# Called when a tag is encountered.
|
8
|
+
# @p name the tag name
|
9
|
+
# @p attrs an array of arrays of attribute/value pairs, suitable for
|
10
|
+
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
|
11
|
+
# will result in
|
12
|
+
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
|
13
|
+
def tag_start name, attrs
|
14
|
+
end
|
15
|
+
# Called when the end tag is reached. In the case of <tag/>, tag_end
|
16
|
+
# will be called immidiately after tag_start
|
17
|
+
# @p the name of the tag
|
18
|
+
def tag_end name
|
19
|
+
end
|
20
|
+
# Called when text is encountered in the document
|
21
|
+
# @p text the text content.
|
22
|
+
def text text
|
23
|
+
end
|
24
|
+
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
|
25
|
+
# @p name the instruction name; in the example, "xsl"
|
26
|
+
# @p instruction the rest of the instruction. In the example,
|
27
|
+
# "sheet='foo'"
|
28
|
+
def instruction name, instruction
|
29
|
+
end
|
30
|
+
# Called when a comment is encountered.
|
31
|
+
# @p comment The content of the comment
|
32
|
+
def comment comment
|
33
|
+
end
|
34
|
+
# Handles a doctype declaration. Any attributes of the doctype which are
|
35
|
+
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
|
36
|
+
# @p name the name of the doctype; EG, "me"
|
37
|
+
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
|
38
|
+
# @p long_name the supplied long name, or nil. EG, "foo"
|
39
|
+
# @p uri the uri of the doctype, or nil. EG, "bar"
|
40
|
+
def doctype name, pub_sys, long_name, uri
|
41
|
+
end
|
42
|
+
# Called when the doctype is done
|
43
|
+
def doctype_end
|
44
|
+
end
|
45
|
+
# If a doctype includes an ATTLIST declaration, it will cause this
|
46
|
+
# method to be called. The content is the declaration itself, unparsed.
|
47
|
+
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
|
48
|
+
# attr CDATA #REQUIRED". This is the same for all of the .*decl
|
49
|
+
# methods.
|
50
|
+
def attlistdecl element_name, attributes, raw_content
|
51
|
+
end
|
52
|
+
# <!ELEMENT ...>
|
53
|
+
def elementdecl content
|
54
|
+
end
|
55
|
+
# <!ENTITY ...>
|
56
|
+
# The argument passed to this method is an array of the entity
|
57
|
+
# declaration. It can be in a number of formats, but in general it
|
58
|
+
# returns (example, result):
|
59
|
+
# <!ENTITY % YN '"Yes"'>
|
60
|
+
# ["%", "YN", "'\"Yes\"'", "\""]
|
61
|
+
# <!ENTITY % YN 'Yes'>
|
62
|
+
# ["%", "YN", "'Yes'", "s"]
|
63
|
+
# <!ENTITY WhatHeSaid "He said %YN;">
|
64
|
+
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
|
65
|
+
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
66
|
+
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
67
|
+
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
|
68
|
+
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
|
69
|
+
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
|
70
|
+
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
|
71
|
+
def entitydecl content
|
72
|
+
end
|
73
|
+
# <!NOTATION ...>
|
74
|
+
def notationdecl content
|
75
|
+
end
|
76
|
+
# Called when %foo; is encountered in a doctype declaration.
|
77
|
+
# @p content "foo"
|
78
|
+
def entity content
|
79
|
+
end
|
80
|
+
# Called when <![CDATA[ ... ]]> is encountered in a document.
|
81
|
+
# @p content "..."
|
82
|
+
def cdata content
|
83
|
+
end
|
84
|
+
# Called when an XML PI is encountered in the document.
|
85
|
+
# EG: <?xml version="1.0" encoding="utf"?>
|
86
|
+
# @p version the version attribute value. EG, "1.0"
|
87
|
+
# @p encoding the encoding attribute value, or nil. EG, "utf"
|
88
|
+
# @p standalone the standalone attribute value, or nil. EG, nil
|
89
|
+
def xmldecl version, encoding, standalone
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module REXML
|
2
|
+
class SyncEnumerator
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
# Creates a new SyncEnumerator which enumerates rows of given
|
6
|
+
# Enumerable objects.
|
7
|
+
def initialize(*enums)
|
8
|
+
@gens = enums
|
9
|
+
@biggest = @gens[0]
|
10
|
+
@gens.each {|x| @biggest = x if x.size > @biggest.size }
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns the number of enumerated Enumerable objects, i.e. the size
|
14
|
+
# of each row.
|
15
|
+
def size
|
16
|
+
@gens.size
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns the number of enumerated Enumerable objects, i.e. the size
|
20
|
+
# of each row.
|
21
|
+
def length
|
22
|
+
@gens.length
|
23
|
+
end
|
24
|
+
|
25
|
+
# Enumerates rows of the Enumerable objects.
|
26
|
+
def each
|
27
|
+
@biggest.zip( *@gens ) {|a|
|
28
|
+
yield(*a[1..-1])
|
29
|
+
}
|
30
|
+
self
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/rexml/text.rb
ADDED
@@ -0,0 +1,344 @@
|
|
1
|
+
require 'rexml/entity'
|
2
|
+
require 'rexml/doctype'
|
3
|
+
require 'rexml/child'
|
4
|
+
require 'rexml/doctype'
|
5
|
+
require 'rexml/parseexception'
|
6
|
+
|
7
|
+
module REXML
|
8
|
+
# Represents text nodes in an XML document
|
9
|
+
class Text < Child
|
10
|
+
include Comparable
|
11
|
+
# The order in which the substitutions occur
|
12
|
+
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
|
13
|
+
SUBSTITUTES = ['&', '<', '>', '"', ''', ' ']
|
14
|
+
# Characters which are substituted in written strings
|
15
|
+
SLAICEPS = [ '<', '>', '"', "'", '&' ]
|
16
|
+
SETUTITSBUS = [ /</u, />/u, /"/u, /'/u, /&/u ]
|
17
|
+
|
18
|
+
# If +raw+ is true, then REXML leaves the value alone
|
19
|
+
attr_accessor :raw
|
20
|
+
|
21
|
+
ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
|
22
|
+
NUMERICENTITY = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
23
|
+
|
24
|
+
# Constructor
|
25
|
+
# +arg+ if a String, the content is set to the String. If a Text,
|
26
|
+
# the object is shallowly cloned.
|
27
|
+
#
|
28
|
+
# +respect_whitespace+ (boolean, false) if true, whitespace is
|
29
|
+
# respected
|
30
|
+
#
|
31
|
+
# +parent+ (nil) if this is a Parent object, the parent
|
32
|
+
# will be set to this.
|
33
|
+
#
|
34
|
+
# +raw+ (nil) This argument can be given three values.
|
35
|
+
# If true, then the value of used to construct this object is expected to
|
36
|
+
# contain no unescaped XML markup, and REXML will not change the text. If
|
37
|
+
# this value is false, the string may contain any characters, and REXML will
|
38
|
+
# escape any and all defined entities whose values are contained in the
|
39
|
+
# text. If this value is nil (the default), then the raw value of the
|
40
|
+
# parent will be used as the raw value for this node. If there is no raw
|
41
|
+
# value for the parent, and no value is supplied, the default is false.
|
42
|
+
# Use this field if you have entities defined for some text, and you don't
|
43
|
+
# want REXML to escape that text in output.
|
44
|
+
# Text.new( "<&", false, nil, false ) #-> "<&"
|
45
|
+
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
|
46
|
+
# Text.new( "<&", false, nil, true ) #-> Parse exception
|
47
|
+
# Text.new( "<&", false, nil, true ) #-> "<&"
|
48
|
+
# # Assume that the entity "s" is defined to be "sean"
|
49
|
+
# # and that the entity "r" is defined to be "russell"
|
50
|
+
# Text.new( "sean russell" ) #-> "&s; &r;"
|
51
|
+
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
|
52
|
+
#
|
53
|
+
# +entity_filter+ (nil) This can be an array of entities to match in the
|
54
|
+
# supplied text. This argument is only useful if +raw+ is set to false.
|
55
|
+
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
|
56
|
+
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
|
57
|
+
# In the last example, the +entity_filter+ argument is ignored.
|
58
|
+
#
|
59
|
+
# +pattern+ INTERNAL USE ONLY
|
60
|
+
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
|
61
|
+
entity_filter=nil, illegal=ILLEGAL )
|
62
|
+
|
63
|
+
@raw = false
|
64
|
+
|
65
|
+
if parent
|
66
|
+
super( parent )
|
67
|
+
@raw = parent.raw
|
68
|
+
else
|
69
|
+
@parent = nil
|
70
|
+
end
|
71
|
+
|
72
|
+
@raw = raw unless raw.nil?
|
73
|
+
@entity_filter = entity_filter
|
74
|
+
@normalized = @unnormalized = nil
|
75
|
+
|
76
|
+
if arg.kind_of? String
|
77
|
+
@string = arg.clone
|
78
|
+
@string.squeeze!(" \n\t") unless respect_whitespace
|
79
|
+
elsif arg.kind_of? Text
|
80
|
+
@string = arg.to_s
|
81
|
+
@raw = arg.raw
|
82
|
+
elsif
|
83
|
+
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
|
84
|
+
end
|
85
|
+
|
86
|
+
@string.gsub!( /\r\n?/, "\n" )
|
87
|
+
|
88
|
+
# check for illegal characters
|
89
|
+
if @raw
|
90
|
+
if @string =~ illegal
|
91
|
+
raise "Illegal character '#{$1}' in raw string \"#{@string}\""
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def node_type
|
97
|
+
:text
|
98
|
+
end
|
99
|
+
|
100
|
+
def empty?
|
101
|
+
@string.size==0
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def clone
|
106
|
+
return Text.new(self)
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Appends text to this text node. The text is appended in the +raw+ mode
|
111
|
+
# of this text node.
|
112
|
+
def <<( to_append )
|
113
|
+
@string << to_append.gsub( /\r\n?/, "\n" )
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# +other+ a String or a Text
|
118
|
+
# +returns+ the result of (to_s <=> arg.to_s)
|
119
|
+
def <=>( other )
|
120
|
+
to_s() <=> other.to_s
|
121
|
+
end
|
122
|
+
|
123
|
+
REFERENCE = /#{Entity::REFERENCE}/
|
124
|
+
# Returns the string value of this text node. This string is always
|
125
|
+
# escaped, meaning that it is a valid XML text node string, and all
|
126
|
+
# entities that can be escaped, have been inserted. This method respects
|
127
|
+
# the entity filter set in the constructor.
|
128
|
+
#
|
129
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
130
|
+
# # entity "r" is defined to be "russell"
|
131
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
132
|
+
# t.to_s #-> "< & &s; russell"
|
133
|
+
# t = Text.new( "< & &s; russell", false, nil, false )
|
134
|
+
# t.to_s #-> "< & &s; russell"
|
135
|
+
# u = Text.new( "sean russell", false, nil, true )
|
136
|
+
# u.to_s #-> "sean russell"
|
137
|
+
def to_s
|
138
|
+
return @string if @raw
|
139
|
+
return @normalized if @normalized
|
140
|
+
|
141
|
+
doctype = nil
|
142
|
+
if @parent
|
143
|
+
doc = @parent.document
|
144
|
+
doctype = doc.doctype if doc
|
145
|
+
end
|
146
|
+
|
147
|
+
@normalized = Text::normalize( @string, doctype, @entity_filter )
|
148
|
+
end
|
149
|
+
|
150
|
+
def inspect
|
151
|
+
@string.inspect
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns the string value of this text. This is the text without
|
155
|
+
# entities, as it might be used programmatically, or printed to the
|
156
|
+
# console. This ignores the 'raw' attribute setting, and any
|
157
|
+
# entity_filter.
|
158
|
+
#
|
159
|
+
# # Assume that the entity "s" is defined to be "sean", and that the
|
160
|
+
# # entity "r" is defined to be "russell"
|
161
|
+
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
|
162
|
+
# t.value #-> "< & sean russell"
|
163
|
+
# t = Text.new( "< & &s; russell", false, nil, false )
|
164
|
+
# t.value #-> "< & sean russell"
|
165
|
+
# u = Text.new( "sean russell", false, nil, true )
|
166
|
+
# u.value #-> "sean russell"
|
167
|
+
def value
|
168
|
+
@unnormalized if @unnormalized
|
169
|
+
doctype = nil
|
170
|
+
if @parent
|
171
|
+
doc = @parent.document
|
172
|
+
doctype = doc.doctype if doc
|
173
|
+
end
|
174
|
+
@unnormalized = Text::unnormalize( @string, doctype )
|
175
|
+
end
|
176
|
+
|
177
|
+
# Sets the contents of this text node. This expects the text to be
|
178
|
+
# unnormalized. It returns self.
|
179
|
+
#
|
180
|
+
# e = Element.new( "a" )
|
181
|
+
# e.add_text( "foo" ) # <a>foo</a>
|
182
|
+
# e[0].value = "bar" # <a>bar</a>
|
183
|
+
# e[0].value = "<a>" # <a><a></a>
|
184
|
+
def value=( val )
|
185
|
+
@string = val.gsub( /\r\n?/, "\n" )
|
186
|
+
@unnormalized = nil
|
187
|
+
@normalized = nil
|
188
|
+
@raw = false
|
189
|
+
end
|
190
|
+
|
191
|
+
def wrap(string, width, addnewline=false)
|
192
|
+
# Recursively wrap string at width.
|
193
|
+
return string if string.length <= width
|
194
|
+
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
195
|
+
if addnewline then
|
196
|
+
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
197
|
+
else
|
198
|
+
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
203
|
+
return string if level < 0
|
204
|
+
new_string = ''
|
205
|
+
string.each { |line|
|
206
|
+
indent_string = style * level
|
207
|
+
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
208
|
+
new_string << new_line
|
209
|
+
}
|
210
|
+
new_string.strip! unless indentfirstline
|
211
|
+
return new_string
|
212
|
+
end
|
213
|
+
|
214
|
+
# == DEPRECATED
|
215
|
+
# See REXML::Formatters
|
216
|
+
#
|
217
|
+
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
218
|
+
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
219
|
+
formatter = if indent > -1
|
220
|
+
REXML::Formatters::Pretty.new( indent )
|
221
|
+
else
|
222
|
+
REXML::Formatters::Default.new
|
223
|
+
end
|
224
|
+
formatter.write( self, writer )
|
225
|
+
end
|
226
|
+
|
227
|
+
# FIXME
|
228
|
+
# This probably won't work properly
|
229
|
+
def xpath
|
230
|
+
path = @parent.xpath
|
231
|
+
path += "/text()"
|
232
|
+
return path
|
233
|
+
end
|
234
|
+
|
235
|
+
# Writes out text, substituting special characters beforehand.
|
236
|
+
# +out+ A String, IO, or any other object supporting <<( String )
|
237
|
+
# +input+ the text to substitute and the write out
|
238
|
+
#
|
239
|
+
# z=utf8.unpack("U*")
|
240
|
+
# ascOut=""
|
241
|
+
# z.each{|r|
|
242
|
+
# if r < 0x100
|
243
|
+
# ascOut.concat(r.chr)
|
244
|
+
# else
|
245
|
+
# ascOut.concat(sprintf("&#x%x;", r))
|
246
|
+
# end
|
247
|
+
# }
|
248
|
+
# puts ascOut
|
249
|
+
def write_with_substitution out, input
|
250
|
+
copy = input.clone
|
251
|
+
# Doing it like this rather than in a loop improves the speed
|
252
|
+
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
|
253
|
+
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
|
254
|
+
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
|
255
|
+
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
|
256
|
+
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
|
257
|
+
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
|
258
|
+
out << copy
|
259
|
+
end
|
260
|
+
|
261
|
+
# Reads text, substituting entities
|
262
|
+
def Text::read_with_substitution( input, illegal=nil )
|
263
|
+
copy = input.clone
|
264
|
+
|
265
|
+
if copy =~ illegal
|
266
|
+
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
|
267
|
+
end if illegal
|
268
|
+
|
269
|
+
copy.gsub!( /\r\n?/, "\n" )
|
270
|
+
if copy.include? ?&
|
271
|
+
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
|
272
|
+
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
|
273
|
+
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
|
274
|
+
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
|
275
|
+
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
|
276
|
+
copy.gsub!( /�*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
|
277
|
+
m=$1
|
278
|
+
#m='0' if m==''
|
279
|
+
m = "0#{m}" if m[0] == ?x
|
280
|
+
[Integer(m)].pack('U*')
|
281
|
+
}
|
282
|
+
end
|
283
|
+
copy
|
284
|
+
end
|
285
|
+
|
286
|
+
EREFERENCE = /&(?!#{Entity::NAME};)/
|
287
|
+
# Escapes all possible entities
|
288
|
+
def Text::normalize( input, doctype=nil, entity_filter=nil )
|
289
|
+
copy = input.to_s
|
290
|
+
# Doing it like this rather than in a loop improves the speed
|
291
|
+
#copy = copy.gsub( EREFERENCE, '&' )
|
292
|
+
copy = copy.gsub( "&", "&" )
|
293
|
+
if doctype
|
294
|
+
# Replace all ampersands that aren't part of an entity
|
295
|
+
doctype.entities.each_value do |entity|
|
296
|
+
copy = copy.gsub( entity.value,
|
297
|
+
"&#{entity.name};" ) if entity.value and
|
298
|
+
not( entity_filter and entity_filter.include?(entity) )
|
299
|
+
end
|
300
|
+
else
|
301
|
+
# Replace all ampersands that aren't part of an entity
|
302
|
+
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
303
|
+
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
304
|
+
end
|
305
|
+
end
|
306
|
+
copy
|
307
|
+
end
|
308
|
+
|
309
|
+
# Unescapes all possible entities
|
310
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
311
|
+
rv = string.clone
|
312
|
+
rv.gsub!( /\r\n?/, "\n" )
|
313
|
+
matches = rv.scan( REFERENCE )
|
314
|
+
return rv if matches.size == 0
|
315
|
+
rv.gsub!( NUMERICENTITY ) {|m|
|
316
|
+
m=$1
|
317
|
+
m = "0#{m}" if m[0] == ?x
|
318
|
+
[Integer(m)].pack('U*')
|
319
|
+
}
|
320
|
+
matches.collect!{|x|x[0]}.compact!
|
321
|
+
if matches.size > 0
|
322
|
+
if doctype
|
323
|
+
matches.each do |entity_reference|
|
324
|
+
unless filter and filter.include?(entity_reference)
|
325
|
+
entity_value = doctype.entity( entity_reference )
|
326
|
+
re = /&#{entity_reference};/
|
327
|
+
rv.gsub!( re, entity_value ) if entity_value
|
328
|
+
end
|
329
|
+
end
|
330
|
+
else
|
331
|
+
matches.each do |entity_reference|
|
332
|
+
unless filter and filter.include?(entity_reference)
|
333
|
+
entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
|
334
|
+
re = /&#{entity_reference};/
|
335
|
+
rv.gsub!( re, entity_value.value ) if entity_value
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
rv.gsub!( /&/, '&' )
|
340
|
+
end
|
341
|
+
rv
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end
|