rubysl-rexml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (179) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +1 -0
  8. data/lib/rexml/attlistdecl.rb +62 -0
  9. data/lib/rexml/attribute.rb +185 -0
  10. data/lib/rexml/cdata.rb +67 -0
  11. data/lib/rexml/child.rb +96 -0
  12. data/lib/rexml/comment.rb +80 -0
  13. data/lib/rexml/doctype.rb +271 -0
  14. data/lib/rexml/document.rb +230 -0
  15. data/lib/rexml/dtd/attlistdecl.rb +10 -0
  16. data/lib/rexml/dtd/dtd.rb +51 -0
  17. data/lib/rexml/dtd/elementdecl.rb +17 -0
  18. data/lib/rexml/dtd/entitydecl.rb +56 -0
  19. data/lib/rexml/dtd/notationdecl.rb +39 -0
  20. data/lib/rexml/element.rb +1227 -0
  21. data/lib/rexml/encoding.rb +71 -0
  22. data/lib/rexml/encodings/CP-1252.rb +103 -0
  23. data/lib/rexml/encodings/EUC-JP.rb +35 -0
  24. data/lib/rexml/encodings/ICONV.rb +22 -0
  25. data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
  26. data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
  27. data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
  28. data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
  29. data/lib/rexml/encodings/UNILE.rb +34 -0
  30. data/lib/rexml/encodings/US-ASCII.rb +30 -0
  31. data/lib/rexml/encodings/UTF-16.rb +35 -0
  32. data/lib/rexml/encodings/UTF-8.rb +18 -0
  33. data/lib/rexml/entity.rb +166 -0
  34. data/lib/rexml/formatters/default.rb +109 -0
  35. data/lib/rexml/formatters/pretty.rb +138 -0
  36. data/lib/rexml/formatters/transitive.rb +56 -0
  37. data/lib/rexml/functions.rb +382 -0
  38. data/lib/rexml/instruction.rb +70 -0
  39. data/lib/rexml/light/node.rb +196 -0
  40. data/lib/rexml/namespace.rb +47 -0
  41. data/lib/rexml/node.rb +75 -0
  42. data/lib/rexml/output.rb +24 -0
  43. data/lib/rexml/parent.rb +166 -0
  44. data/lib/rexml/parseexception.rb +51 -0
  45. data/lib/rexml/parsers/baseparser.rb +503 -0
  46. data/lib/rexml/parsers/lightparser.rb +60 -0
  47. data/lib/rexml/parsers/pullparser.rb +196 -0
  48. data/lib/rexml/parsers/sax2parser.rb +238 -0
  49. data/lib/rexml/parsers/streamparser.rb +46 -0
  50. data/lib/rexml/parsers/treeparser.rb +97 -0
  51. data/lib/rexml/parsers/ultralightparser.rb +56 -0
  52. data/lib/rexml/parsers/xpathparser.rb +698 -0
  53. data/lib/rexml/quickpath.rb +266 -0
  54. data/lib/rexml/rexml.rb +32 -0
  55. data/lib/rexml/sax2listener.rb +97 -0
  56. data/lib/rexml/source.rb +251 -0
  57. data/lib/rexml/streamlistener.rb +92 -0
  58. data/lib/rexml/syncenumerator.rb +33 -0
  59. data/lib/rexml/text.rb +344 -0
  60. data/lib/rexml/undefinednamespaceexception.rb +8 -0
  61. data/lib/rexml/validation/relaxng.rb +559 -0
  62. data/lib/rexml/validation/validation.rb +155 -0
  63. data/lib/rexml/validation/validationexception.rb +9 -0
  64. data/lib/rexml/xmldecl.rb +119 -0
  65. data/lib/rexml/xmltokens.rb +18 -0
  66. data/lib/rexml/xpath.rb +66 -0
  67. data/lib/rexml/xpath_parser.rb +792 -0
  68. data/lib/rubysl/rexml.rb +1 -0
  69. data/lib/rubysl/rexml/version.rb +5 -0
  70. data/rubysl-rexml.gemspec +23 -0
  71. data/spec/attribute/clone_spec.rb +10 -0
  72. data/spec/attribute/element_spec.rb +22 -0
  73. data/spec/attribute/equal_value_spec.rb +17 -0
  74. data/spec/attribute/hash_spec.rb +12 -0
  75. data/spec/attribute/initialize_spec.rb +28 -0
  76. data/spec/attribute/inspect_spec.rb +19 -0
  77. data/spec/attribute/namespace_spec.rb +23 -0
  78. data/spec/attribute/node_type_spec.rb +9 -0
  79. data/spec/attribute/prefix_spec.rb +17 -0
  80. data/spec/attribute/remove_spec.rb +19 -0
  81. data/spec/attribute/to_s_spec.rb +13 -0
  82. data/spec/attribute/to_string_spec.rb +14 -0
  83. data/spec/attribute/value_spec.rb +14 -0
  84. data/spec/attribute/write_spec.rb +22 -0
  85. data/spec/attribute/xpath_spec.rb +19 -0
  86. data/spec/attributes/add_spec.rb +6 -0
  87. data/spec/attributes/append_spec.rb +6 -0
  88. data/spec/attributes/delete_all_spec.rb +30 -0
  89. data/spec/attributes/delete_spec.rb +26 -0
  90. data/spec/attributes/each_attribute_spec.rb +24 -0
  91. data/spec/attributes/each_spec.rb +24 -0
  92. data/spec/attributes/element_reference_spec.rb +18 -0
  93. data/spec/attributes/element_set_spec.rb +25 -0
  94. data/spec/attributes/get_attribute_ns_spec.rb +13 -0
  95. data/spec/attributes/get_attribute_spec.rb +28 -0
  96. data/spec/attributes/initialize_spec.rb +18 -0
  97. data/spec/attributes/length_spec.rb +6 -0
  98. data/spec/attributes/namespaces_spec.rb +5 -0
  99. data/spec/attributes/prefixes_spec.rb +23 -0
  100. data/spec/attributes/shared/add.rb +17 -0
  101. data/spec/attributes/shared/length.rb +12 -0
  102. data/spec/attributes/size_spec.rb +6 -0
  103. data/spec/attributes/to_a_spec.rb +20 -0
  104. data/spec/cdata/clone_spec.rb +9 -0
  105. data/spec/cdata/initialize_spec.rb +24 -0
  106. data/spec/cdata/shared/to_s.rb +11 -0
  107. data/spec/cdata/to_s_spec.rb +6 -0
  108. data/spec/cdata/value_spec.rb +6 -0
  109. data/spec/document/add_element_spec.rb +30 -0
  110. data/spec/document/add_spec.rb +60 -0
  111. data/spec/document/clone_spec.rb +19 -0
  112. data/spec/document/doctype_spec.rb +14 -0
  113. data/spec/document/encoding_spec.rb +21 -0
  114. data/spec/document/expanded_name_spec.rb +15 -0
  115. data/spec/document/new_spec.rb +37 -0
  116. data/spec/document/node_type_spec.rb +7 -0
  117. data/spec/document/root_spec.rb +11 -0
  118. data/spec/document/stand_alone_spec.rb +18 -0
  119. data/spec/document/version_spec.rb +13 -0
  120. data/spec/document/write_spec.rb +38 -0
  121. data/spec/document/xml_decl_spec.rb +14 -0
  122. data/spec/element/add_attribute_spec.rb +40 -0
  123. data/spec/element/add_attributes_spec.rb +21 -0
  124. data/spec/element/add_element_spec.rb +38 -0
  125. data/spec/element/add_namespace_spec.rb +23 -0
  126. data/spec/element/add_text_spec.rb +23 -0
  127. data/spec/element/attribute_spec.rb +16 -0
  128. data/spec/element/attributes_spec.rb +18 -0
  129. data/spec/element/cdatas_spec.rb +23 -0
  130. data/spec/element/clone_spec.rb +28 -0
  131. data/spec/element/comments_spec.rb +20 -0
  132. data/spec/element/delete_attribute_spec.rb +38 -0
  133. data/spec/element/delete_element_spec.rb +50 -0
  134. data/spec/element/delete_namespace_spec.rb +24 -0
  135. data/spec/element/document_spec.rb +17 -0
  136. data/spec/element/each_element_with_attribute_spec.rb +34 -0
  137. data/spec/element/each_element_with_text_spec.rb +30 -0
  138. data/spec/element/get_text_spec.rb +17 -0
  139. data/spec/element/has_attributes_spec.rb +16 -0
  140. data/spec/element/has_elements_spec.rb +17 -0
  141. data/spec/element/has_text_spec.rb +15 -0
  142. data/spec/element/inspect_spec.rb +26 -0
  143. data/spec/element/instructions_spec.rb +20 -0
  144. data/spec/element/namespace_spec.rb +26 -0
  145. data/spec/element/namespaces_spec.rb +31 -0
  146. data/spec/element/new_spec.rb +34 -0
  147. data/spec/element/next_element_spec.rb +18 -0
  148. data/spec/element/node_type_spec.rb +7 -0
  149. data/spec/element/prefixes_spec.rb +22 -0
  150. data/spec/element/previous_element_spec.rb +19 -0
  151. data/spec/element/raw_spec.rb +23 -0
  152. data/spec/element/root_spec.rb +27 -0
  153. data/spec/element/text_spec.rb +45 -0
  154. data/spec/element/texts_spec.rb +15 -0
  155. data/spec/element/whitespace_spec.rb +22 -0
  156. data/spec/node/each_recursive_spec.rb +20 -0
  157. data/spec/node/find_first_recursive_spec.rb +24 -0
  158. data/spec/node/index_in_parent_spec.rb +14 -0
  159. data/spec/node/next_sibling_node_spec.rb +20 -0
  160. data/spec/node/parent_spec.rb +20 -0
  161. data/spec/node/previous_sibling_node_spec.rb +20 -0
  162. data/spec/shared/each_element.rb +35 -0
  163. data/spec/shared/elements_to_a.rb +35 -0
  164. data/spec/text/append_spec.rb +9 -0
  165. data/spec/text/clone_spec.rb +9 -0
  166. data/spec/text/comparison_spec.rb +24 -0
  167. data/spec/text/empty_spec.rb +11 -0
  168. data/spec/text/indent_text_spec.rb +23 -0
  169. data/spec/text/inspect_spec.rb +7 -0
  170. data/spec/text/new_spec.rb +48 -0
  171. data/spec/text/node_type_spec.rb +7 -0
  172. data/spec/text/normalize_spec.rb +7 -0
  173. data/spec/text/read_with_substitution_spec.rb +12 -0
  174. data/spec/text/to_s_spec.rb +17 -0
  175. data/spec/text/unnormalize_spec.rb +7 -0
  176. data/spec/text/value_spec.rb +36 -0
  177. data/spec/text/wrap_spec.rb +20 -0
  178. data/spec/text/write_with_substitution_spec.rb +32 -0
  179. metadata +385 -0
@@ -0,0 +1,92 @@
1
+ module REXML
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ module StreamListener
7
+ # Called when a tag is encountered.
8
+ # @p name the tag name
9
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
10
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
+ # will result in
12
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
+ def tag_start name, attrs
14
+ end
15
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
16
+ # will be called immidiately after tag_start
17
+ # @p the name of the tag
18
+ def tag_end name
19
+ end
20
+ # Called when text is encountered in the document
21
+ # @p text the text content.
22
+ def text text
23
+ end
24
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
+ # @p name the instruction name; in the example, "xsl"
26
+ # @p instruction the rest of the instruction. In the example,
27
+ # "sheet='foo'"
28
+ def instruction name, instruction
29
+ end
30
+ # Called when a comment is encountered.
31
+ # @p comment The content of the comment
32
+ def comment comment
33
+ end
34
+ # Handles a doctype declaration. Any attributes of the doctype which are
35
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
+ # @p name the name of the doctype; EG, "me"
37
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
+ # @p long_name the supplied long name, or nil. EG, "foo"
39
+ # @p uri the uri of the doctype, or nil. EG, "bar"
40
+ def doctype name, pub_sys, long_name, uri
41
+ end
42
+ # Called when the doctype is done
43
+ def doctype_end
44
+ end
45
+ # If a doctype includes an ATTLIST declaration, it will cause this
46
+ # method to be called. The content is the declaration itself, unparsed.
47
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
+ # methods.
50
+ def attlistdecl element_name, attributes, raw_content
51
+ end
52
+ # <!ELEMENT ...>
53
+ def elementdecl content
54
+ end
55
+ # <!ENTITY ...>
56
+ # The argument passed to this method is an array of the entity
57
+ # declaration. It can be in a number of formats, but in general it
58
+ # returns (example, result):
59
+ # <!ENTITY % YN '"Yes"'>
60
+ # ["%", "YN", "'\"Yes\"'", "\""]
61
+ # <!ENTITY % YN 'Yes'>
62
+ # ["%", "YN", "'Yes'", "s"]
63
+ # <!ENTITY WhatHeSaid "He said %YN;">
64
+ # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
65
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
+ # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
67
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
+ # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
71
+ def entitydecl content
72
+ end
73
+ # <!NOTATION ...>
74
+ def notationdecl content
75
+ end
76
+ # Called when %foo; is encountered in a doctype declaration.
77
+ # @p content "foo"
78
+ def entity content
79
+ end
80
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
81
+ # @p content "..."
82
+ def cdata content
83
+ end
84
+ # Called when an XML PI is encountered in the document.
85
+ # EG: <?xml version="1.0" encoding="utf"?>
86
+ # @p version the version attribute value. EG, "1.0"
87
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
88
+ # @p standalone the standalone attribute value, or nil. EG, nil
89
+ def xmldecl version, encoding, standalone
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,33 @@
1
+ module REXML
2
+ class SyncEnumerator
3
+ include Enumerable
4
+
5
+ # Creates a new SyncEnumerator which enumerates rows of given
6
+ # Enumerable objects.
7
+ def initialize(*enums)
8
+ @gens = enums
9
+ @biggest = @gens[0]
10
+ @gens.each {|x| @biggest = x if x.size > @biggest.size }
11
+ end
12
+
13
+ # Returns the number of enumerated Enumerable objects, i.e. the size
14
+ # of each row.
15
+ def size
16
+ @gens.size
17
+ end
18
+
19
+ # Returns the number of enumerated Enumerable objects, i.e. the size
20
+ # of each row.
21
+ def length
22
+ @gens.length
23
+ end
24
+
25
+ # Enumerates rows of the Enumerable objects.
26
+ def each
27
+ @biggest.zip( *@gens ) {|a|
28
+ yield(*a[1..-1])
29
+ }
30
+ self
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,344 @@
1
+ require 'rexml/entity'
2
+ require 'rexml/doctype'
3
+ require 'rexml/child'
4
+ require 'rexml/doctype'
5
+ require 'rexml/parseexception'
6
+
7
+ module REXML
8
+ # Represents text nodes in an XML document
9
+ class Text < Child
10
+ include Comparable
11
+ # The order in which the substitutions occur
12
+ SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
13
+ SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
14
+ # Characters which are substituted in written strings
15
+ SLAICEPS = [ '<', '>', '"', "'", '&' ]
16
+ SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
17
+
18
+ # If +raw+ is true, then REXML leaves the value alone
19
+ attr_accessor :raw
20
+
21
+ ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
22
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
23
+
24
+ # Constructor
25
+ # +arg+ if a String, the content is set to the String. If a Text,
26
+ # the object is shallowly cloned.
27
+ #
28
+ # +respect_whitespace+ (boolean, false) if true, whitespace is
29
+ # respected
30
+ #
31
+ # +parent+ (nil) if this is a Parent object, the parent
32
+ # will be set to this.
33
+ #
34
+ # +raw+ (nil) This argument can be given three values.
35
+ # If true, then the value of used to construct this object is expected to
36
+ # contain no unescaped XML markup, and REXML will not change the text. If
37
+ # this value is false, the string may contain any characters, and REXML will
38
+ # escape any and all defined entities whose values are contained in the
39
+ # text. If this value is nil (the default), then the raw value of the
40
+ # parent will be used as the raw value for this node. If there is no raw
41
+ # value for the parent, and no value is supplied, the default is false.
42
+ # Use this field if you have entities defined for some text, and you don't
43
+ # want REXML to escape that text in output.
44
+ # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
45
+ # Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
46
+ # Text.new( "<&", false, nil, true ) #-> Parse exception
47
+ # Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
48
+ # # Assume that the entity "s" is defined to be "sean"
49
+ # # and that the entity "r" is defined to be "russell"
50
+ # Text.new( "sean russell" ) #-> "&s; &r;"
51
+ # Text.new( "sean russell", false, nil, true ) #-> "sean russell"
52
+ #
53
+ # +entity_filter+ (nil) This can be an array of entities to match in the
54
+ # supplied text. This argument is only useful if +raw+ is set to false.
55
+ # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
56
+ # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
57
+ # In the last example, the +entity_filter+ argument is ignored.
58
+ #
59
+ # +pattern+ INTERNAL USE ONLY
60
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61
+ entity_filter=nil, illegal=ILLEGAL )
62
+
63
+ @raw = false
64
+
65
+ if parent
66
+ super( parent )
67
+ @raw = parent.raw
68
+ else
69
+ @parent = nil
70
+ end
71
+
72
+ @raw = raw unless raw.nil?
73
+ @entity_filter = entity_filter
74
+ @normalized = @unnormalized = nil
75
+
76
+ if arg.kind_of? String
77
+ @string = arg.clone
78
+ @string.squeeze!(" \n\t") unless respect_whitespace
79
+ elsif arg.kind_of? Text
80
+ @string = arg.to_s
81
+ @raw = arg.raw
82
+ elsif
83
+ raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
84
+ end
85
+
86
+ @string.gsub!( /\r\n?/, "\n" )
87
+
88
+ # check for illegal characters
89
+ if @raw
90
+ if @string =~ illegal
91
+ raise "Illegal character '#{$1}' in raw string \"#{@string}\""
92
+ end
93
+ end
94
+ end
95
+
96
+ def node_type
97
+ :text
98
+ end
99
+
100
+ def empty?
101
+ @string.size==0
102
+ end
103
+
104
+
105
+ def clone
106
+ return Text.new(self)
107
+ end
108
+
109
+
110
+ # Appends text to this text node. The text is appended in the +raw+ mode
111
+ # of this text node.
112
+ def <<( to_append )
113
+ @string << to_append.gsub( /\r\n?/, "\n" )
114
+ end
115
+
116
+
117
+ # +other+ a String or a Text
118
+ # +returns+ the result of (to_s <=> arg.to_s)
119
+ def <=>( other )
120
+ to_s() <=> other.to_s
121
+ end
122
+
123
+ REFERENCE = /#{Entity::REFERENCE}/
124
+ # Returns the string value of this text node. This string is always
125
+ # escaped, meaning that it is a valid XML text node string, and all
126
+ # entities that can be escaped, have been inserted. This method respects
127
+ # the entity filter set in the constructor.
128
+ #
129
+ # # Assume that the entity "s" is defined to be "sean", and that the
130
+ # # entity "r" is defined to be "russell"
131
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
132
+ # t.to_s #-> "&lt; &amp; &s; russell"
133
+ # t = Text.new( "< & &s; russell", false, nil, false )
134
+ # t.to_s #-> "&lt; &amp; &s; russell"
135
+ # u = Text.new( "sean russell", false, nil, true )
136
+ # u.to_s #-> "sean russell"
137
+ def to_s
138
+ return @string if @raw
139
+ return @normalized if @normalized
140
+
141
+ doctype = nil
142
+ if @parent
143
+ doc = @parent.document
144
+ doctype = doc.doctype if doc
145
+ end
146
+
147
+ @normalized = Text::normalize( @string, doctype, @entity_filter )
148
+ end
149
+
150
+ def inspect
151
+ @string.inspect
152
+ end
153
+
154
+ # Returns the string value of this text. This is the text without
155
+ # entities, as it might be used programmatically, or printed to the
156
+ # console. This ignores the 'raw' attribute setting, and any
157
+ # entity_filter.
158
+ #
159
+ # # Assume that the entity "s" is defined to be "sean", and that the
160
+ # # entity "r" is defined to be "russell"
161
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
162
+ # t.value #-> "< & sean russell"
163
+ # t = Text.new( "< & &s; russell", false, nil, false )
164
+ # t.value #-> "< & sean russell"
165
+ # u = Text.new( "sean russell", false, nil, true )
166
+ # u.value #-> "sean russell"
167
+ def value
168
+ @unnormalized if @unnormalized
169
+ doctype = nil
170
+ if @parent
171
+ doc = @parent.document
172
+ doctype = doc.doctype if doc
173
+ end
174
+ @unnormalized = Text::unnormalize( @string, doctype )
175
+ end
176
+
177
+ # Sets the contents of this text node. This expects the text to be
178
+ # unnormalized. It returns self.
179
+ #
180
+ # e = Element.new( "a" )
181
+ # e.add_text( "foo" ) # <a>foo</a>
182
+ # e[0].value = "bar" # <a>bar</a>
183
+ # e[0].value = "<a>" # <a>&lt;a&gt;</a>
184
+ def value=( val )
185
+ @string = val.gsub( /\r\n?/, "\n" )
186
+ @unnormalized = nil
187
+ @normalized = nil
188
+ @raw = false
189
+ end
190
+
191
+ def wrap(string, width, addnewline=false)
192
+ # Recursively wrap string at width.
193
+ return string if string.length <= width
194
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195
+ if addnewline then
196
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197
+ else
198
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199
+ end
200
+ end
201
+
202
+ def indent_text(string, level=1, style="\t", indentfirstline=true)
203
+ return string if level < 0
204
+ new_string = ''
205
+ string.each { |line|
206
+ indent_string = style * level
207
+ new_line = (indent_string + line).sub(/[\s]+$/,'')
208
+ new_string << new_line
209
+ }
210
+ new_string.strip! unless indentfirstline
211
+ return new_string
212
+ end
213
+
214
+ # == DEPRECATED
215
+ # See REXML::Formatters
216
+ #
217
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
218
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
219
+ formatter = if indent > -1
220
+ REXML::Formatters::Pretty.new( indent )
221
+ else
222
+ REXML::Formatters::Default.new
223
+ end
224
+ formatter.write( self, writer )
225
+ end
226
+
227
+ # FIXME
228
+ # This probably won't work properly
229
+ def xpath
230
+ path = @parent.xpath
231
+ path += "/text()"
232
+ return path
233
+ end
234
+
235
+ # Writes out text, substituting special characters beforehand.
236
+ # +out+ A String, IO, or any other object supporting <<( String )
237
+ # +input+ the text to substitute and the write out
238
+ #
239
+ # z=utf8.unpack("U*")
240
+ # ascOut=""
241
+ # z.each{|r|
242
+ # if r < 0x100
243
+ # ascOut.concat(r.chr)
244
+ # else
245
+ # ascOut.concat(sprintf("&#x%x;", r))
246
+ # end
247
+ # }
248
+ # puts ascOut
249
+ def write_with_substitution out, input
250
+ copy = input.clone
251
+ # Doing it like this rather than in a loop improves the speed
252
+ copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
253
+ copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
254
+ copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
255
+ copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
256
+ copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
257
+ copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
258
+ out << copy
259
+ end
260
+
261
+ # Reads text, substituting entities
262
+ def Text::read_with_substitution( input, illegal=nil )
263
+ copy = input.clone
264
+
265
+ if copy =~ illegal
266
+ raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267
+ end if illegal
268
+
269
+ copy.gsub!( /\r\n?/, "\n" )
270
+ if copy.include? ?&
271
+ copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
272
+ copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
273
+ copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274
+ copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275
+ copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
277
+ m=$1
278
+ #m='0' if m==''
279
+ m = "0#{m}" if m[0] == ?x
280
+ [Integer(m)].pack('U*')
281
+ }
282
+ end
283
+ copy
284
+ end
285
+
286
+ EREFERENCE = /&(?!#{Entity::NAME};)/
287
+ # Escapes all possible entities
288
+ def Text::normalize( input, doctype=nil, entity_filter=nil )
289
+ copy = input.to_s
290
+ # Doing it like this rather than in a loop improves the speed
291
+ #copy = copy.gsub( EREFERENCE, '&amp;' )
292
+ copy = copy.gsub( "&", "&amp;" )
293
+ if doctype
294
+ # Replace all ampersands that aren't part of an entity
295
+ doctype.entities.each_value do |entity|
296
+ copy = copy.gsub( entity.value,
297
+ "&#{entity.name};" ) if entity.value and
298
+ not( entity_filter and entity_filter.include?(entity) )
299
+ end
300
+ else
301
+ # Replace all ampersands that aren't part of an entity
302
+ DocType::DEFAULT_ENTITIES.each_value do |entity|
303
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
304
+ end
305
+ end
306
+ copy
307
+ end
308
+
309
+ # Unescapes all possible entities
310
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311
+ rv = string.clone
312
+ rv.gsub!( /\r\n?/, "\n" )
313
+ matches = rv.scan( REFERENCE )
314
+ return rv if matches.size == 0
315
+ rv.gsub!( NUMERICENTITY ) {|m|
316
+ m=$1
317
+ m = "0#{m}" if m[0] == ?x
318
+ [Integer(m)].pack('U*')
319
+ }
320
+ matches.collect!{|x|x[0]}.compact!
321
+ if matches.size > 0
322
+ if doctype
323
+ matches.each do |entity_reference|
324
+ unless filter and filter.include?(entity_reference)
325
+ entity_value = doctype.entity( entity_reference )
326
+ re = /&#{entity_reference};/
327
+ rv.gsub!( re, entity_value ) if entity_value
328
+ end
329
+ end
330
+ else
331
+ matches.each do |entity_reference|
332
+ unless filter and filter.include?(entity_reference)
333
+ entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334
+ re = /&#{entity_reference};/
335
+ rv.gsub!( re, entity_value.value ) if entity_value
336
+ end
337
+ end
338
+ end
339
+ rv.gsub!( /&amp;/, '&' )
340
+ end
341
+ rv
342
+ end
343
+ end
344
+ end