rubysl-rexml 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rexml/parsers/streamparser'
|
2
|
+
require 'rexml/parsers/baseparser'
|
3
|
+
require 'rexml/light/node'
|
4
|
+
|
5
|
+
module REXML
|
6
|
+
module Parsers
|
7
|
+
class LightParser
|
8
|
+
def initialize stream
|
9
|
+
@stream = stream
|
10
|
+
@parser = REXML::Parsers::BaseParser.new( stream )
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_listener( listener )
|
14
|
+
@parser.add_listener( listener )
|
15
|
+
end
|
16
|
+
|
17
|
+
def rewind
|
18
|
+
@stream.rewind
|
19
|
+
@parser.stream = @stream
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse
|
23
|
+
root = context = [ :document ]
|
24
|
+
while true
|
25
|
+
event = @parser.pull
|
26
|
+
case event[0]
|
27
|
+
when :end_document
|
28
|
+
break
|
29
|
+
when :end_doctype
|
30
|
+
context = context[1]
|
31
|
+
when :start_element, :start_doctype
|
32
|
+
new_node = event
|
33
|
+
context << new_node
|
34
|
+
new_node[1,0] = [context]
|
35
|
+
context = new_node
|
36
|
+
when :end_element, :end_doctype
|
37
|
+
context = context[1]
|
38
|
+
else
|
39
|
+
new_node = event
|
40
|
+
context << new_node
|
41
|
+
new_node[1,0] = [context]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
root
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# An element is an array. The array contains:
|
49
|
+
# 0 The parent element
|
50
|
+
# 1 The tag name
|
51
|
+
# 2 A hash of attributes
|
52
|
+
# 3..-1 The child elements
|
53
|
+
# An element is an array of size > 3
|
54
|
+
# Text is a String
|
55
|
+
# PIs are [ :processing_instruction, target, data ]
|
56
|
+
# Comments are [ :comment, data ]
|
57
|
+
# DocTypes are DocType structs
|
58
|
+
# The root is an array with XMLDecls, Text, DocType, Array, Text
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
require 'rexml/parseexception'
|
4
|
+
require 'rexml/parsers/baseparser'
|
5
|
+
require 'rexml/xmltokens'
|
6
|
+
|
7
|
+
module REXML
|
8
|
+
module Parsers
|
9
|
+
# = Using the Pull Parser
|
10
|
+
# <em>This API is experimental, and subject to change.</em>
|
11
|
+
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
12
|
+
# while parser.has_next?
|
13
|
+
# res = parser.next
|
14
|
+
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
15
|
+
# end
|
16
|
+
# See the PullEvent class for information on the content of the results.
|
17
|
+
# The data is identical to the arguments passed for the various events to
|
18
|
+
# the StreamListener API.
|
19
|
+
#
|
20
|
+
# Notice that:
|
21
|
+
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
22
|
+
# while parser.has_next?
|
23
|
+
# res = parser.next
|
24
|
+
# raise res[1] if res.error?
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# Nat Price gave me some good ideas for the API.
|
28
|
+
class PullParser
|
29
|
+
include XMLTokens
|
30
|
+
extend Forwardable
|
31
|
+
|
32
|
+
def_delegators( :@parser, :has_next? )
|
33
|
+
def_delegators( :@parser, :entity )
|
34
|
+
def_delegators( :@parser, :empty? )
|
35
|
+
def_delegators( :@parser, :source )
|
36
|
+
|
37
|
+
def initialize stream
|
38
|
+
@entities = {}
|
39
|
+
@listeners = nil
|
40
|
+
@parser = BaseParser.new( stream )
|
41
|
+
@my_stack = []
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_listener( listener )
|
45
|
+
@listeners = [] unless @listeners
|
46
|
+
@listeners << listener
|
47
|
+
end
|
48
|
+
|
49
|
+
def each
|
50
|
+
while has_next?
|
51
|
+
yield self.pull
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def peek depth=0
|
56
|
+
if @my_stack.length <= depth
|
57
|
+
(depth - @my_stack.length + 1).times {
|
58
|
+
e = PullEvent.new(@parser.pull)
|
59
|
+
@my_stack.push(e)
|
60
|
+
}
|
61
|
+
end
|
62
|
+
@my_stack[depth]
|
63
|
+
end
|
64
|
+
|
65
|
+
def pull
|
66
|
+
return @my_stack.shift if @my_stack.length > 0
|
67
|
+
|
68
|
+
event = @parser.pull
|
69
|
+
case event[0]
|
70
|
+
when :entitydecl
|
71
|
+
@entities[ event[1] ] =
|
72
|
+
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
73
|
+
when :text
|
74
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
75
|
+
event << unnormalized
|
76
|
+
end
|
77
|
+
PullEvent.new( event )
|
78
|
+
end
|
79
|
+
|
80
|
+
def unshift token
|
81
|
+
@my_stack.unshift token
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# A parsing event. The contents of the event are accessed as an +Array?,
|
86
|
+
# and the type is given either by the ...? methods, or by accessing the
|
87
|
+
# +type+ accessor. The contents of this object vary from event to event,
|
88
|
+
# but are identical to the arguments passed to +StreamListener+s for each
|
89
|
+
# event.
|
90
|
+
class PullEvent
|
91
|
+
# The type of this event. Will be one of :tag_start, :tag_end, :text,
|
92
|
+
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
93
|
+
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
94
|
+
def initialize(arg)
|
95
|
+
@contents = arg
|
96
|
+
end
|
97
|
+
|
98
|
+
def []( start, endd=nil)
|
99
|
+
if start.kind_of? Range
|
100
|
+
@contents.slice( start.begin+1 .. start.end )
|
101
|
+
elsif start.kind_of? Numeric
|
102
|
+
if endd.nil?
|
103
|
+
@contents.slice( start+1 )
|
104
|
+
else
|
105
|
+
@contents.slice( start+1, endd )
|
106
|
+
end
|
107
|
+
else
|
108
|
+
raise "Illegal argument #{start.inspect} (#{start.class})"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def event_type
|
113
|
+
@contents[0]
|
114
|
+
end
|
115
|
+
|
116
|
+
# Content: [ String tag_name, Hash attributes ]
|
117
|
+
def start_element?
|
118
|
+
@contents[0] == :start_element
|
119
|
+
end
|
120
|
+
|
121
|
+
# Content: [ String tag_name ]
|
122
|
+
def end_element?
|
123
|
+
@contents[0] == :end_element
|
124
|
+
end
|
125
|
+
|
126
|
+
# Content: [ String raw_text, String unnormalized_text ]
|
127
|
+
def text?
|
128
|
+
@contents[0] == :text
|
129
|
+
end
|
130
|
+
|
131
|
+
# Content: [ String text ]
|
132
|
+
def instruction?
|
133
|
+
@contents[0] == :processing_instruction
|
134
|
+
end
|
135
|
+
|
136
|
+
# Content: [ String text ]
|
137
|
+
def comment?
|
138
|
+
@contents[0] == :comment
|
139
|
+
end
|
140
|
+
|
141
|
+
# Content: [ String name, String pub_sys, String long_name, String uri ]
|
142
|
+
def doctype?
|
143
|
+
@contents[0] == :start_doctype
|
144
|
+
end
|
145
|
+
|
146
|
+
# Content: [ String text ]
|
147
|
+
def attlistdecl?
|
148
|
+
@contents[0] == :attlistdecl
|
149
|
+
end
|
150
|
+
|
151
|
+
# Content: [ String text ]
|
152
|
+
def elementdecl?
|
153
|
+
@contents[0] == :elementdecl
|
154
|
+
end
|
155
|
+
|
156
|
+
# Due to the wonders of DTDs, an entity declaration can be just about
|
157
|
+
# anything. There's no way to normalize it; you'll have to interpret the
|
158
|
+
# content yourself. However, the following is true:
|
159
|
+
#
|
160
|
+
# * If the entity declaration is an internal entity:
|
161
|
+
# [ String name, String value ]
|
162
|
+
# Content: [ String text ]
|
163
|
+
def entitydecl?
|
164
|
+
@contents[0] == :entitydecl
|
165
|
+
end
|
166
|
+
|
167
|
+
# Content: [ String text ]
|
168
|
+
def notationdecl?
|
169
|
+
@contents[0] == :notationdecl
|
170
|
+
end
|
171
|
+
|
172
|
+
# Content: [ String text ]
|
173
|
+
def entity?
|
174
|
+
@contents[0] == :entity
|
175
|
+
end
|
176
|
+
|
177
|
+
# Content: [ String text ]
|
178
|
+
def cdata?
|
179
|
+
@contents[0] == :cdata
|
180
|
+
end
|
181
|
+
|
182
|
+
# Content: [ String version, String encoding, String standalone ]
|
183
|
+
def xmldecl?
|
184
|
+
@contents[0] == :xmldecl
|
185
|
+
end
|
186
|
+
|
187
|
+
def error?
|
188
|
+
@contents[0] == :error
|
189
|
+
end
|
190
|
+
|
191
|
+
def inspect
|
192
|
+
@contents[0].to_s + ": " + @contents[1..-1].inspect
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require 'rexml/parsers/baseparser'
|
2
|
+
require 'rexml/parseexception'
|
3
|
+
require 'rexml/namespace'
|
4
|
+
require 'rexml/text'
|
5
|
+
|
6
|
+
module REXML
|
7
|
+
module Parsers
|
8
|
+
# SAX2Parser
|
9
|
+
class SAX2Parser
|
10
|
+
def initialize source
|
11
|
+
@parser = BaseParser.new(source)
|
12
|
+
@listeners = []
|
13
|
+
@procs = []
|
14
|
+
@namespace_stack = []
|
15
|
+
@has_listeners = false
|
16
|
+
@tag_stack = []
|
17
|
+
@entities = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def source
|
21
|
+
@parser.source
|
22
|
+
end
|
23
|
+
|
24
|
+
def add_listener( listener )
|
25
|
+
@parser.add_listener( listener )
|
26
|
+
end
|
27
|
+
|
28
|
+
# Listen arguments:
|
29
|
+
#
|
30
|
+
# Symbol, Array, Block
|
31
|
+
# Listen to Symbol events on Array elements
|
32
|
+
# Symbol, Block
|
33
|
+
# Listen to Symbol events
|
34
|
+
# Array, Listener
|
35
|
+
# Listen to all events on Array elements
|
36
|
+
# Array, Block
|
37
|
+
# Listen to :start_element events on Array elements
|
38
|
+
# Listener
|
39
|
+
# Listen to All events
|
40
|
+
#
|
41
|
+
# Symbol can be one of: :start_element, :end_element,
|
42
|
+
# :start_prefix_mapping, :end_prefix_mapping, :characters,
|
43
|
+
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
44
|
+
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
45
|
+
#
|
46
|
+
# There is an additional symbol that can be listened for: :progress.
|
47
|
+
# This will be called for every event generated, passing in the current
|
48
|
+
# stream position.
|
49
|
+
#
|
50
|
+
# Array contains regular expressions or strings which will be matched
|
51
|
+
# against fully qualified element names.
|
52
|
+
#
|
53
|
+
# Listener must implement the methods in SAX2Listener
|
54
|
+
#
|
55
|
+
# Block will be passed the same arguments as a SAX2Listener method would
|
56
|
+
# be, where the method name is the same as the matched Symbol.
|
57
|
+
# See the SAX2Listener for more information.
|
58
|
+
def listen( *args, &blok )
|
59
|
+
if args[0].kind_of? Symbol
|
60
|
+
if args.size == 2
|
61
|
+
args[1].each { |match| @procs << [args[0], match, blok] }
|
62
|
+
else
|
63
|
+
add( [args[0], nil, blok] )
|
64
|
+
end
|
65
|
+
elsif args[0].kind_of? Array
|
66
|
+
if args.size == 2
|
67
|
+
args[0].each { |match| add( [nil, match, args[1]] ) }
|
68
|
+
else
|
69
|
+
args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
70
|
+
end
|
71
|
+
else
|
72
|
+
add([nil, nil, args[0]])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def deafen( listener=nil, &blok )
|
77
|
+
if listener
|
78
|
+
@listeners.delete_if {|item| item[-1] == listener }
|
79
|
+
@has_listeners = false if @listeners.size == 0
|
80
|
+
else
|
81
|
+
@procs.delete_if {|item| item[-1] == blok }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
@procs.each { |sym,match,block| block.call if sym == :start_document }
|
87
|
+
@listeners.each { |sym,match,block|
|
88
|
+
block.start_document if sym == :start_document or sym.nil?
|
89
|
+
}
|
90
|
+
root = context = []
|
91
|
+
while true
|
92
|
+
event = @parser.pull
|
93
|
+
case event[0]
|
94
|
+
when :end_document
|
95
|
+
handle( :end_document )
|
96
|
+
break
|
97
|
+
when :start_doctype
|
98
|
+
handle( :doctype, *event[1..-1])
|
99
|
+
when :end_doctype
|
100
|
+
context = context[1]
|
101
|
+
when :start_element
|
102
|
+
@tag_stack.push(event[1])
|
103
|
+
# find the observers for namespaces
|
104
|
+
procs = get_procs( :start_prefix_mapping, event[1] )
|
105
|
+
listeners = get_listeners( :start_prefix_mapping, event[1] )
|
106
|
+
if procs or listeners
|
107
|
+
# break out the namespace declarations
|
108
|
+
# The attributes live in event[2]
|
109
|
+
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
|
110
|
+
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
|
111
|
+
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
|
112
|
+
@namespace_stack.push({})
|
113
|
+
nsdecl.each do |n,v|
|
114
|
+
@namespace_stack[-1][n] = v
|
115
|
+
# notify observers of namespaces
|
116
|
+
procs.each { |ob| ob.call( n, v ) } if procs
|
117
|
+
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
|
118
|
+
end
|
119
|
+
end
|
120
|
+
event[1] =~ Namespace::NAMESPLIT
|
121
|
+
prefix = $1
|
122
|
+
local = $2
|
123
|
+
uri = get_namespace(prefix)
|
124
|
+
# find the observers for start_element
|
125
|
+
procs = get_procs( :start_element, event[1] )
|
126
|
+
listeners = get_listeners( :start_element, event[1] )
|
127
|
+
# notify observers
|
128
|
+
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
|
129
|
+
listeners.each { |ob|
|
130
|
+
ob.start_element( uri, local, event[1], event[2] )
|
131
|
+
} if listeners
|
132
|
+
when :end_element
|
133
|
+
@tag_stack.pop
|
134
|
+
event[1] =~ Namespace::NAMESPLIT
|
135
|
+
prefix = $1
|
136
|
+
local = $2
|
137
|
+
uri = get_namespace(prefix)
|
138
|
+
# find the observers for start_element
|
139
|
+
procs = get_procs( :end_element, event[1] )
|
140
|
+
listeners = get_listeners( :end_element, event[1] )
|
141
|
+
# notify observers
|
142
|
+
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
|
143
|
+
listeners.each { |ob|
|
144
|
+
ob.end_element( uri, local, event[1] )
|
145
|
+
} if listeners
|
146
|
+
|
147
|
+
namespace_mapping = @namespace_stack.pop
|
148
|
+
# find the observers for namespaces
|
149
|
+
procs = get_procs( :end_prefix_mapping, event[1] )
|
150
|
+
listeners = get_listeners( :end_prefix_mapping, event[1] )
|
151
|
+
if procs or listeners
|
152
|
+
namespace_mapping.each do |prefix, uri|
|
153
|
+
# notify observers of namespaces
|
154
|
+
procs.each { |ob| ob.call( prefix ) } if procs
|
155
|
+
listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
|
156
|
+
end
|
157
|
+
end
|
158
|
+
when :text
|
159
|
+
#normalized = @parser.normalize( event[1] )
|
160
|
+
#handle( :characters, normalized )
|
161
|
+
copy = event[1].clone
|
162
|
+
@entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
|
163
|
+
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
164
|
+
m=$1
|
165
|
+
m = "0#{m}" if m[0] == ?x
|
166
|
+
[Integer(m)].pack('U*')
|
167
|
+
}
|
168
|
+
handle( :characters, copy )
|
169
|
+
when :entitydecl
|
170
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
171
|
+
handle( *event )
|
172
|
+
when :processing_instruction, :comment, :attlistdecl,
|
173
|
+
:elementdecl, :cdata, :notationdecl, :xmldecl
|
174
|
+
handle( *event )
|
175
|
+
end
|
176
|
+
handle( :progress, @parser.position )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
def handle( symbol, *arguments )
|
182
|
+
tag = @tag_stack[-1]
|
183
|
+
procs = get_procs( symbol, tag )
|
184
|
+
listeners = get_listeners( symbol, tag )
|
185
|
+
# notify observers
|
186
|
+
procs.each { |ob| ob.call( *arguments ) } if procs
|
187
|
+
listeners.each { |l|
|
188
|
+
l.send( symbol.to_s, *arguments )
|
189
|
+
} if listeners
|
190
|
+
end
|
191
|
+
|
192
|
+
# The following methods are duplicates, but it is faster than using
|
193
|
+
# a helper
|
194
|
+
def get_procs( symbol, name )
|
195
|
+
return nil if @procs.size == 0
|
196
|
+
@procs.find_all do |sym, match, block|
|
197
|
+
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
|
198
|
+
(
|
199
|
+
(sym.nil? or symbol == sym) and
|
200
|
+
((name.nil? and match.nil?) or match.nil? or (
|
201
|
+
(name == match) or
|
202
|
+
(match.kind_of? Regexp and name =~ match)
|
203
|
+
)
|
204
|
+
)
|
205
|
+
)
|
206
|
+
end.collect{|x| x[-1]}
|
207
|
+
end
|
208
|
+
def get_listeners( symbol, name )
|
209
|
+
return nil if @listeners.size == 0
|
210
|
+
@listeners.find_all do |sym, match, block|
|
211
|
+
(
|
212
|
+
(sym.nil? or symbol == sym) and
|
213
|
+
((name.nil? and match.nil?) or match.nil? or (
|
214
|
+
(name == match) or
|
215
|
+
(match.kind_of? Regexp and name =~ match)
|
216
|
+
)
|
217
|
+
)
|
218
|
+
)
|
219
|
+
end.collect{|x| x[-1]}
|
220
|
+
end
|
221
|
+
|
222
|
+
def add( pair )
|
223
|
+
if pair[-1].respond_to? :call
|
224
|
+
@procs << pair unless @procs.include? pair
|
225
|
+
else
|
226
|
+
@listeners << pair unless @listeners.include? pair
|
227
|
+
@has_listeners = true
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def get_namespace( prefix )
|
232
|
+
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
233
|
+
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
234
|
+
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|