rubysl-rexml 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +1 -0
  8. data/lib/rexml/attlistdecl.rb +62 -0
  9. data/lib/rexml/attribute.rb +185 -0
  10. data/lib/rexml/cdata.rb +67 -0
  11. data/lib/rexml/child.rb +96 -0
  12. data/lib/rexml/comment.rb +80 -0
  13. data/lib/rexml/doctype.rb +271 -0
  14. data/lib/rexml/document.rb +230 -0
  15. data/lib/rexml/dtd/attlistdecl.rb +10 -0
  16. data/lib/rexml/dtd/dtd.rb +51 -0
  17. data/lib/rexml/dtd/elementdecl.rb +17 -0
  18. data/lib/rexml/dtd/entitydecl.rb +56 -0
  19. data/lib/rexml/dtd/notationdecl.rb +39 -0
  20. data/lib/rexml/element.rb +1227 -0
  21. data/lib/rexml/encoding.rb +71 -0
  22. data/lib/rexml/encodings/CP-1252.rb +103 -0
  23. data/lib/rexml/encodings/EUC-JP.rb +35 -0
  24. data/lib/rexml/encodings/ICONV.rb +22 -0
  25. data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
  26. data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
  27. data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
  28. data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
  29. data/lib/rexml/encodings/UNILE.rb +34 -0
  30. data/lib/rexml/encodings/US-ASCII.rb +30 -0
  31. data/lib/rexml/encodings/UTF-16.rb +35 -0
  32. data/lib/rexml/encodings/UTF-8.rb +18 -0
  33. data/lib/rexml/entity.rb +166 -0
  34. data/lib/rexml/formatters/default.rb +109 -0
  35. data/lib/rexml/formatters/pretty.rb +138 -0
  36. data/lib/rexml/formatters/transitive.rb +56 -0
  37. data/lib/rexml/functions.rb +382 -0
  38. data/lib/rexml/instruction.rb +70 -0
  39. data/lib/rexml/light/node.rb +196 -0
  40. data/lib/rexml/namespace.rb +47 -0
  41. data/lib/rexml/node.rb +75 -0
  42. data/lib/rexml/output.rb +24 -0
  43. data/lib/rexml/parent.rb +166 -0
  44. data/lib/rexml/parseexception.rb +51 -0
  45. data/lib/rexml/parsers/baseparser.rb +503 -0
  46. data/lib/rexml/parsers/lightparser.rb +60 -0
  47. data/lib/rexml/parsers/pullparser.rb +196 -0
  48. data/lib/rexml/parsers/sax2parser.rb +238 -0
  49. data/lib/rexml/parsers/streamparser.rb +46 -0
  50. data/lib/rexml/parsers/treeparser.rb +97 -0
  51. data/lib/rexml/parsers/ultralightparser.rb +56 -0
  52. data/lib/rexml/parsers/xpathparser.rb +698 -0
  53. data/lib/rexml/quickpath.rb +266 -0
  54. data/lib/rexml/rexml.rb +32 -0
  55. data/lib/rexml/sax2listener.rb +97 -0
  56. data/lib/rexml/source.rb +251 -0
  57. data/lib/rexml/streamlistener.rb +92 -0
  58. data/lib/rexml/syncenumerator.rb +33 -0
  59. data/lib/rexml/text.rb +344 -0
  60. data/lib/rexml/undefinednamespaceexception.rb +8 -0
  61. data/lib/rexml/validation/relaxng.rb +559 -0
  62. data/lib/rexml/validation/validation.rb +155 -0
  63. data/lib/rexml/validation/validationexception.rb +9 -0
  64. data/lib/rexml/xmldecl.rb +119 -0
  65. data/lib/rexml/xmltokens.rb +18 -0
  66. data/lib/rexml/xpath.rb +66 -0
  67. data/lib/rexml/xpath_parser.rb +792 -0
  68. data/lib/rubysl/rexml.rb +1 -0
  69. data/lib/rubysl/rexml/version.rb +5 -0
  70. data/rubysl-rexml.gemspec +23 -0
  71. data/spec/attribute/clone_spec.rb +10 -0
  72. data/spec/attribute/element_spec.rb +22 -0
  73. data/spec/attribute/equal_value_spec.rb +17 -0
  74. data/spec/attribute/hash_spec.rb +12 -0
  75. data/spec/attribute/initialize_spec.rb +28 -0
  76. data/spec/attribute/inspect_spec.rb +19 -0
  77. data/spec/attribute/namespace_spec.rb +23 -0
  78. data/spec/attribute/node_type_spec.rb +9 -0
  79. data/spec/attribute/prefix_spec.rb +17 -0
  80. data/spec/attribute/remove_spec.rb +19 -0
  81. data/spec/attribute/to_s_spec.rb +13 -0
  82. data/spec/attribute/to_string_spec.rb +14 -0
  83. data/spec/attribute/value_spec.rb +14 -0
  84. data/spec/attribute/write_spec.rb +22 -0
  85. data/spec/attribute/xpath_spec.rb +19 -0
  86. data/spec/attributes/add_spec.rb +6 -0
  87. data/spec/attributes/append_spec.rb +6 -0
  88. data/spec/attributes/delete_all_spec.rb +30 -0
  89. data/spec/attributes/delete_spec.rb +26 -0
  90. data/spec/attributes/each_attribute_spec.rb +24 -0
  91. data/spec/attributes/each_spec.rb +24 -0
  92. data/spec/attributes/element_reference_spec.rb +18 -0
  93. data/spec/attributes/element_set_spec.rb +25 -0
  94. data/spec/attributes/get_attribute_ns_spec.rb +13 -0
  95. data/spec/attributes/get_attribute_spec.rb +28 -0
  96. data/spec/attributes/initialize_spec.rb +18 -0
  97. data/spec/attributes/length_spec.rb +6 -0
  98. data/spec/attributes/namespaces_spec.rb +5 -0
  99. data/spec/attributes/prefixes_spec.rb +23 -0
  100. data/spec/attributes/shared/add.rb +17 -0
  101. data/spec/attributes/shared/length.rb +12 -0
  102. data/spec/attributes/size_spec.rb +6 -0
  103. data/spec/attributes/to_a_spec.rb +20 -0
  104. data/spec/cdata/clone_spec.rb +9 -0
  105. data/spec/cdata/initialize_spec.rb +24 -0
  106. data/spec/cdata/shared/to_s.rb +11 -0
  107. data/spec/cdata/to_s_spec.rb +6 -0
  108. data/spec/cdata/value_spec.rb +6 -0
  109. data/spec/document/add_element_spec.rb +30 -0
  110. data/spec/document/add_spec.rb +60 -0
  111. data/spec/document/clone_spec.rb +19 -0
  112. data/spec/document/doctype_spec.rb +14 -0
  113. data/spec/document/encoding_spec.rb +21 -0
  114. data/spec/document/expanded_name_spec.rb +15 -0
  115. data/spec/document/new_spec.rb +37 -0
  116. data/spec/document/node_type_spec.rb +7 -0
  117. data/spec/document/root_spec.rb +11 -0
  118. data/spec/document/stand_alone_spec.rb +18 -0
  119. data/spec/document/version_spec.rb +13 -0
  120. data/spec/document/write_spec.rb +38 -0
  121. data/spec/document/xml_decl_spec.rb +14 -0
  122. data/spec/element/add_attribute_spec.rb +40 -0
  123. data/spec/element/add_attributes_spec.rb +21 -0
  124. data/spec/element/add_element_spec.rb +38 -0
  125. data/spec/element/add_namespace_spec.rb +23 -0
  126. data/spec/element/add_text_spec.rb +23 -0
  127. data/spec/element/attribute_spec.rb +16 -0
  128. data/spec/element/attributes_spec.rb +18 -0
  129. data/spec/element/cdatas_spec.rb +23 -0
  130. data/spec/element/clone_spec.rb +28 -0
  131. data/spec/element/comments_spec.rb +20 -0
  132. data/spec/element/delete_attribute_spec.rb +38 -0
  133. data/spec/element/delete_element_spec.rb +50 -0
  134. data/spec/element/delete_namespace_spec.rb +24 -0
  135. data/spec/element/document_spec.rb +17 -0
  136. data/spec/element/each_element_with_attribute_spec.rb +34 -0
  137. data/spec/element/each_element_with_text_spec.rb +30 -0
  138. data/spec/element/get_text_spec.rb +17 -0
  139. data/spec/element/has_attributes_spec.rb +16 -0
  140. data/spec/element/has_elements_spec.rb +17 -0
  141. data/spec/element/has_text_spec.rb +15 -0
  142. data/spec/element/inspect_spec.rb +26 -0
  143. data/spec/element/instructions_spec.rb +20 -0
  144. data/spec/element/namespace_spec.rb +26 -0
  145. data/spec/element/namespaces_spec.rb +31 -0
  146. data/spec/element/new_spec.rb +34 -0
  147. data/spec/element/next_element_spec.rb +18 -0
  148. data/spec/element/node_type_spec.rb +7 -0
  149. data/spec/element/prefixes_spec.rb +22 -0
  150. data/spec/element/previous_element_spec.rb +19 -0
  151. data/spec/element/raw_spec.rb +23 -0
  152. data/spec/element/root_spec.rb +27 -0
  153. data/spec/element/text_spec.rb +45 -0
  154. data/spec/element/texts_spec.rb +15 -0
  155. data/spec/element/whitespace_spec.rb +22 -0
  156. data/spec/node/each_recursive_spec.rb +20 -0
  157. data/spec/node/find_first_recursive_spec.rb +24 -0
  158. data/spec/node/index_in_parent_spec.rb +14 -0
  159. data/spec/node/next_sibling_node_spec.rb +20 -0
  160. data/spec/node/parent_spec.rb +20 -0
  161. data/spec/node/previous_sibling_node_spec.rb +20 -0
  162. data/spec/shared/each_element.rb +35 -0
  163. data/spec/shared/elements_to_a.rb +35 -0
  164. data/spec/text/append_spec.rb +9 -0
  165. data/spec/text/clone_spec.rb +9 -0
  166. data/spec/text/comparison_spec.rb +24 -0
  167. data/spec/text/empty_spec.rb +11 -0
  168. data/spec/text/indent_text_spec.rb +23 -0
  169. data/spec/text/inspect_spec.rb +7 -0
  170. data/spec/text/new_spec.rb +48 -0
  171. data/spec/text/node_type_spec.rb +7 -0
  172. data/spec/text/normalize_spec.rb +7 -0
  173. data/spec/text/read_with_substitution_spec.rb +12 -0
  174. data/spec/text/to_s_spec.rb +17 -0
  175. data/spec/text/unnormalize_spec.rb +7 -0
  176. data/spec/text/value_spec.rb +36 -0
  177. data/spec/text/wrap_spec.rb +20 -0
  178. data/spec/text/write_with_substitution_spec.rb +32 -0
  179. metadata +385 -0
@@ -0,0 +1,92 @@
1
+ module REXML
2
+ # A template for stream parser listeners.
3
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
4
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
5
+ # have to parse them out yourself.
6
+ module StreamListener
7
+ # Called when a tag is encountered.
8
+ # @p name the tag name
9
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
10
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
11
+ # will result in
12
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
13
+ def tag_start name, attrs
14
+ end
15
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
16
+ # will be called immidiately after tag_start
17
+ # @p the name of the tag
18
+ def tag_end name
19
+ end
20
+ # Called when text is encountered in the document
21
+ # @p text the text content.
22
+ def text text
23
+ end
24
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
25
+ # @p name the instruction name; in the example, "xsl"
26
+ # @p instruction the rest of the instruction. In the example,
27
+ # "sheet='foo'"
28
+ def instruction name, instruction
29
+ end
30
+ # Called when a comment is encountered.
31
+ # @p comment The content of the comment
32
+ def comment comment
33
+ end
34
+ # Handles a doctype declaration. Any attributes of the doctype which are
35
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
36
+ # @p name the name of the doctype; EG, "me"
37
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
38
+ # @p long_name the supplied long name, or nil. EG, "foo"
39
+ # @p uri the uri of the doctype, or nil. EG, "bar"
40
+ def doctype name, pub_sys, long_name, uri
41
+ end
42
+ # Called when the doctype is done
43
+ def doctype_end
44
+ end
45
+ # If a doctype includes an ATTLIST declaration, it will cause this
46
+ # method to be called. The content is the declaration itself, unparsed.
47
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
48
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
49
+ # methods.
50
+ def attlistdecl element_name, attributes, raw_content
51
+ end
52
+ # <!ELEMENT ...>
53
+ def elementdecl content
54
+ end
55
+ # <!ENTITY ...>
56
+ # The argument passed to this method is an array of the entity
57
+ # declaration. It can be in a number of formats, but in general it
58
+ # returns (example, result):
59
+ # <!ENTITY % YN '"Yes"'>
60
+ # ["%", "YN", "'\"Yes\"'", "\""]
61
+ # <!ENTITY % YN 'Yes'>
62
+ # ["%", "YN", "'Yes'", "s"]
63
+ # <!ENTITY WhatHeSaid "He said %YN;">
64
+ # ["WhatHeSaid", "\"He said %YN;\"", "YN"]
65
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
66
+ # ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
67
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
68
+ # ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
69
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
70
+ # ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
71
+ def entitydecl content
72
+ end
73
+ # <!NOTATION ...>
74
+ def notationdecl content
75
+ end
76
+ # Called when %foo; is encountered in a doctype declaration.
77
+ # @p content "foo"
78
+ def entity content
79
+ end
80
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
81
+ # @p content "..."
82
+ def cdata content
83
+ end
84
+ # Called when an XML PI is encountered in the document.
85
+ # EG: <?xml version="1.0" encoding="utf"?>
86
+ # @p version the version attribute value. EG, "1.0"
87
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
88
+ # @p standalone the standalone attribute value, or nil. EG, nil
89
+ def xmldecl version, encoding, standalone
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,33 @@
1
+ module REXML
2
+ class SyncEnumerator
3
+ include Enumerable
4
+
5
+ # Creates a new SyncEnumerator which enumerates rows of given
6
+ # Enumerable objects.
7
+ def initialize(*enums)
8
+ @gens = enums
9
+ @biggest = @gens[0]
10
+ @gens.each {|x| @biggest = x if x.size > @biggest.size }
11
+ end
12
+
13
+ # Returns the number of enumerated Enumerable objects, i.e. the size
14
+ # of each row.
15
+ def size
16
+ @gens.size
17
+ end
18
+
19
+ # Returns the number of enumerated Enumerable objects, i.e. the size
20
+ # of each row.
21
+ def length
22
+ @gens.length
23
+ end
24
+
25
+ # Enumerates rows of the Enumerable objects.
26
+ def each
27
+ @biggest.zip( *@gens ) {|a|
28
+ yield(*a[1..-1])
29
+ }
30
+ self
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,344 @@
1
+ require 'rexml/entity'
2
+ require 'rexml/doctype'
3
+ require 'rexml/child'
4
+ require 'rexml/doctype'
5
+ require 'rexml/parseexception'
6
+
7
+ module REXML
8
+ # Represents text nodes in an XML document
9
+ class Text < Child
10
+ include Comparable
11
+ # The order in which the substitutions occur
12
+ SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
13
+ SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
14
+ # Characters which are substituted in written strings
15
+ SLAICEPS = [ '<', '>', '"', "'", '&' ]
16
+ SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
17
+
18
+ # If +raw+ is true, then REXML leaves the value alone
19
+ attr_accessor :raw
20
+
21
+ ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
22
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
23
+
24
+ # Constructor
25
+ # +arg+ if a String, the content is set to the String. If a Text,
26
+ # the object is shallowly cloned.
27
+ #
28
+ # +respect_whitespace+ (boolean, false) if true, whitespace is
29
+ # respected
30
+ #
31
+ # +parent+ (nil) if this is a Parent object, the parent
32
+ # will be set to this.
33
+ #
34
+ # +raw+ (nil) This argument can be given three values.
35
+ # If true, then the value of used to construct this object is expected to
36
+ # contain no unescaped XML markup, and REXML will not change the text. If
37
+ # this value is false, the string may contain any characters, and REXML will
38
+ # escape any and all defined entities whose values are contained in the
39
+ # text. If this value is nil (the default), then the raw value of the
40
+ # parent will be used as the raw value for this node. If there is no raw
41
+ # value for the parent, and no value is supplied, the default is false.
42
+ # Use this field if you have entities defined for some text, and you don't
43
+ # want REXML to escape that text in output.
44
+ # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
45
+ # Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
46
+ # Text.new( "<&", false, nil, true ) #-> Parse exception
47
+ # Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
48
+ # # Assume that the entity "s" is defined to be "sean"
49
+ # # and that the entity "r" is defined to be "russell"
50
+ # Text.new( "sean russell" ) #-> "&s; &r;"
51
+ # Text.new( "sean russell", false, nil, true ) #-> "sean russell"
52
+ #
53
+ # +entity_filter+ (nil) This can be an array of entities to match in the
54
+ # supplied text. This argument is only useful if +raw+ is set to false.
55
+ # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
56
+ # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
57
+ # In the last example, the +entity_filter+ argument is ignored.
58
+ #
59
+ # +pattern+ INTERNAL USE ONLY
60
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61
+ entity_filter=nil, illegal=ILLEGAL )
62
+
63
+ @raw = false
64
+
65
+ if parent
66
+ super( parent )
67
+ @raw = parent.raw
68
+ else
69
+ @parent = nil
70
+ end
71
+
72
+ @raw = raw unless raw.nil?
73
+ @entity_filter = entity_filter
74
+ @normalized = @unnormalized = nil
75
+
76
+ if arg.kind_of? String
77
+ @string = arg.clone
78
+ @string.squeeze!(" \n\t") unless respect_whitespace
79
+ elsif arg.kind_of? Text
80
+ @string = arg.to_s
81
+ @raw = arg.raw
82
+ elsif
83
+ raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
84
+ end
85
+
86
+ @string.gsub!( /\r\n?/, "\n" )
87
+
88
+ # check for illegal characters
89
+ if @raw
90
+ if @string =~ illegal
91
+ raise "Illegal character '#{$1}' in raw string \"#{@string}\""
92
+ end
93
+ end
94
+ end
95
+
96
+ def node_type
97
+ :text
98
+ end
99
+
100
+ def empty?
101
+ @string.size==0
102
+ end
103
+
104
+
105
+ def clone
106
+ return Text.new(self)
107
+ end
108
+
109
+
110
+ # Appends text to this text node. The text is appended in the +raw+ mode
111
+ # of this text node.
112
+ def <<( to_append )
113
+ @string << to_append.gsub( /\r\n?/, "\n" )
114
+ end
115
+
116
+
117
+ # +other+ a String or a Text
118
+ # +returns+ the result of (to_s <=> arg.to_s)
119
+ def <=>( other )
120
+ to_s() <=> other.to_s
121
+ end
122
+
123
+ REFERENCE = /#{Entity::REFERENCE}/
124
+ # Returns the string value of this text node. This string is always
125
+ # escaped, meaning that it is a valid XML text node string, and all
126
+ # entities that can be escaped, have been inserted. This method respects
127
+ # the entity filter set in the constructor.
128
+ #
129
+ # # Assume that the entity "s" is defined to be "sean", and that the
130
+ # # entity "r" is defined to be "russell"
131
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
132
+ # t.to_s #-> "&lt; &amp; &s; russell"
133
+ # t = Text.new( "< & &s; russell", false, nil, false )
134
+ # t.to_s #-> "&lt; &amp; &s; russell"
135
+ # u = Text.new( "sean russell", false, nil, true )
136
+ # u.to_s #-> "sean russell"
137
+ def to_s
138
+ return @string if @raw
139
+ return @normalized if @normalized
140
+
141
+ doctype = nil
142
+ if @parent
143
+ doc = @parent.document
144
+ doctype = doc.doctype if doc
145
+ end
146
+
147
+ @normalized = Text::normalize( @string, doctype, @entity_filter )
148
+ end
149
+
150
+ def inspect
151
+ @string.inspect
152
+ end
153
+
154
+ # Returns the string value of this text. This is the text without
155
+ # entities, as it might be used programmatically, or printed to the
156
+ # console. This ignores the 'raw' attribute setting, and any
157
+ # entity_filter.
158
+ #
159
+ # # Assume that the entity "s" is defined to be "sean", and that the
160
+ # # entity "r" is defined to be "russell"
161
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
162
+ # t.value #-> "< & sean russell"
163
+ # t = Text.new( "< & &s; russell", false, nil, false )
164
+ # t.value #-> "< & sean russell"
165
+ # u = Text.new( "sean russell", false, nil, true )
166
+ # u.value #-> "sean russell"
167
+ def value
168
+ @unnormalized if @unnormalized
169
+ doctype = nil
170
+ if @parent
171
+ doc = @parent.document
172
+ doctype = doc.doctype if doc
173
+ end
174
+ @unnormalized = Text::unnormalize( @string, doctype )
175
+ end
176
+
177
+ # Sets the contents of this text node. This expects the text to be
178
+ # unnormalized. It returns self.
179
+ #
180
+ # e = Element.new( "a" )
181
+ # e.add_text( "foo" ) # <a>foo</a>
182
+ # e[0].value = "bar" # <a>bar</a>
183
+ # e[0].value = "<a>" # <a>&lt;a&gt;</a>
184
+ def value=( val )
185
+ @string = val.gsub( /\r\n?/, "\n" )
186
+ @unnormalized = nil
187
+ @normalized = nil
188
+ @raw = false
189
+ end
190
+
191
+ def wrap(string, width, addnewline=false)
192
+ # Recursively wrap string at width.
193
+ return string if string.length <= width
194
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195
+ if addnewline then
196
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197
+ else
198
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199
+ end
200
+ end
201
+
202
+ def indent_text(string, level=1, style="\t", indentfirstline=true)
203
+ return string if level < 0
204
+ new_string = ''
205
+ string.each { |line|
206
+ indent_string = style * level
207
+ new_line = (indent_string + line).sub(/[\s]+$/,'')
208
+ new_string << new_line
209
+ }
210
+ new_string.strip! unless indentfirstline
211
+ return new_string
212
+ end
213
+
214
+ # == DEPRECATED
215
+ # See REXML::Formatters
216
+ #
217
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
218
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
219
+ formatter = if indent > -1
220
+ REXML::Formatters::Pretty.new( indent )
221
+ else
222
+ REXML::Formatters::Default.new
223
+ end
224
+ formatter.write( self, writer )
225
+ end
226
+
227
+ # FIXME
228
+ # This probably won't work properly
229
+ def xpath
230
+ path = @parent.xpath
231
+ path += "/text()"
232
+ return path
233
+ end
234
+
235
+ # Writes out text, substituting special characters beforehand.
236
+ # +out+ A String, IO, or any other object supporting <<( String )
237
+ # +input+ the text to substitute and the write out
238
+ #
239
+ # z=utf8.unpack("U*")
240
+ # ascOut=""
241
+ # z.each{|r|
242
+ # if r < 0x100
243
+ # ascOut.concat(r.chr)
244
+ # else
245
+ # ascOut.concat(sprintf("&#x%x;", r))
246
+ # end
247
+ # }
248
+ # puts ascOut
249
+ def write_with_substitution out, input
250
+ copy = input.clone
251
+ # Doing it like this rather than in a loop improves the speed
252
+ copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
253
+ copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
254
+ copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
255
+ copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
256
+ copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
257
+ copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
258
+ out << copy
259
+ end
260
+
261
+ # Reads text, substituting entities
262
+ def Text::read_with_substitution( input, illegal=nil )
263
+ copy = input.clone
264
+
265
+ if copy =~ illegal
266
+ raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267
+ end if illegal
268
+
269
+ copy.gsub!( /\r\n?/, "\n" )
270
+ if copy.include? ?&
271
+ copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
272
+ copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
273
+ copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274
+ copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275
+ copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
277
+ m=$1
278
+ #m='0' if m==''
279
+ m = "0#{m}" if m[0] == ?x
280
+ [Integer(m)].pack('U*')
281
+ }
282
+ end
283
+ copy
284
+ end
285
+
286
+ EREFERENCE = /&(?!#{Entity::NAME};)/
287
+ # Escapes all possible entities
288
+ def Text::normalize( input, doctype=nil, entity_filter=nil )
289
+ copy = input.to_s
290
+ # Doing it like this rather than in a loop improves the speed
291
+ #copy = copy.gsub( EREFERENCE, '&amp;' )
292
+ copy = copy.gsub( "&", "&amp;" )
293
+ if doctype
294
+ # Replace all ampersands that aren't part of an entity
295
+ doctype.entities.each_value do |entity|
296
+ copy = copy.gsub( entity.value,
297
+ "&#{entity.name};" ) if entity.value and
298
+ not( entity_filter and entity_filter.include?(entity) )
299
+ end
300
+ else
301
+ # Replace all ampersands that aren't part of an entity
302
+ DocType::DEFAULT_ENTITIES.each_value do |entity|
303
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
304
+ end
305
+ end
306
+ copy
307
+ end
308
+
309
+ # Unescapes all possible entities
310
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311
+ rv = string.clone
312
+ rv.gsub!( /\r\n?/, "\n" )
313
+ matches = rv.scan( REFERENCE )
314
+ return rv if matches.size == 0
315
+ rv.gsub!( NUMERICENTITY ) {|m|
316
+ m=$1
317
+ m = "0#{m}" if m[0] == ?x
318
+ [Integer(m)].pack('U*')
319
+ }
320
+ matches.collect!{|x|x[0]}.compact!
321
+ if matches.size > 0
322
+ if doctype
323
+ matches.each do |entity_reference|
324
+ unless filter and filter.include?(entity_reference)
325
+ entity_value = doctype.entity( entity_reference )
326
+ re = /&#{entity_reference};/
327
+ rv.gsub!( re, entity_value ) if entity_value
328
+ end
329
+ end
330
+ else
331
+ matches.each do |entity_reference|
332
+ unless filter and filter.include?(entity_reference)
333
+ entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334
+ re = /&#{entity_reference};/
335
+ rv.gsub!( re, entity_value.value ) if entity_value
336
+ end
337
+ end
338
+ end
339
+ rv.gsub!( /&amp;/, '&' )
340
+ end
341
+ rv
342
+ end
343
+ end
344
+ end