xamplr 1.9.13 → 1.9.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,463 @@
1
+ # encoding utf-8
2
+
3
+ require 'libxml'
4
+
5
+ module Xampl
6
+
7
+ class FromXML
8
+
9
+ attr :checkWellFormed #1.9.1 , false
10
+ attr :is_realising #1.9.1 , false
11
+ attr :tokenise_content #1.9.1 , false
12
+
13
+ @reader = nil
14
+
15
+ @@by_tag = {}
16
+ @@by_ns_tag = {}
17
+
18
+ def initialize(recovering=false)
19
+ @recovering = recovering
20
+
21
+ @attribute_name = Array.new(32)
22
+ @attribute_namespace = Array.new(32)
23
+ @attribute_value = Array.new(32)
24
+
25
+ @insert_end_element = false
26
+ @faking_an_end_element = false
27
+ @just_opened_an_element = false
28
+ end
29
+
30
+ def FromXML.reset_registry
31
+ @@by_tag = {}
32
+ @@by_ns_tag = {}
33
+ end
34
+
35
+ def FromXML.register(tag, ns_tag, klass)
36
+ @@by_ns_tag[ns_tag] = [klass]
37
+ a = @@by_tag[tag]
38
+ if (nil == a) then
39
+ @@by_tag[tag] = [klass]
40
+ else
41
+ found = false
42
+ a.each { |thing| found = found | (thing == klass) }
43
+ a << klass unless found
44
+ end
45
+ end
46
+
47
+ def FromXML.registered(name)
48
+ #puts "registered by ns tag: #{ @@by_ns_tag.keys.sort.inspect }"
49
+ klass = @@by_ns_tag[name]
50
+ #puts "registered by tag: #{ @@by_tag.keys.sort.inspect }"
51
+ klass = @@by_tag[name] unless klass
52
+ klass = [] unless klass
53
+ return klass
54
+ end
55
+
56
+ def resolve(name)
57
+ #TODO -- ???
58
+ return name
59
+ end
60
+
61
+ def setup_parse(filename, tokenise_content=true, is_realising=false)
62
+ @resolver = self
63
+
64
+ @is_realising = is_realising
65
+ @tokenise_content = tokenise_content
66
+
67
+ @reader = LibXML::XML::Reader.file(filename,
68
+ :options => LibXML::XML::Parser::Options::NOENT |
69
+ LibXML::XML::Parser::Options::NONET |
70
+ LibXML::XML::Parser::Options::NOCDATA |
71
+ LibXML::XML::Parser::Options::DTDATTR |
72
+ # LibXML::XML::Parser::Options::COMPACT |
73
+ 0)
74
+ #TODO CLOSE THIS THING!!
75
+ end
76
+
77
+ def setup_parse_string(string, tokenise_content=true, is_realising=false)
78
+ @resolver = self
79
+
80
+ @is_realising = is_realising
81
+ @tokenise_content = tokenise_content
82
+
83
+ # setInput(string)
84
+ @reader = LibXML::XML::Reader.string(string,
85
+ :options => LibXML::XML::Parser::Options::NOENT |
86
+ LibXML::XML::Parser::Options::NONET |
87
+ LibXML::XML::Parser::Options::NOCDATA |
88
+ LibXML::XML::Parser::Options::DTDATTR |
89
+ # LibXML::XML::Parser::Options::COMPACT) |
90
+ 0)
91
+ #TODO CLOSE THIS THING!!
92
+ end
93
+
94
+ def parse(filename, tokenise_content=true, is_realising=false)
95
+ begin
96
+ setup_parse(filename, tokenise_content, is_realising)
97
+ element, ignore = parse_element
98
+ return element
99
+ rescue => e
100
+ raise RuntimeError, "trouble parsing file: '#{filename}' -- #{ e }", e.backtrace
101
+ end
102
+ end
103
+
104
+ def realise_string(string, tokenise_content=true, target=nil)
105
+ return parse_string(string, tokenise_content, true, target)
106
+ end
107
+
108
+ def parse_string(string, tokenise_content=true, is_realising=false, target=nil)
109
+ begin
110
+ setup_parse_string(string, tokenise_content, is_realising)
111
+ element, ignore = parse_element(nil, target)
112
+ return element
113
+ rescue => e
114
+ raise RuntimeError, "trouble parsing string: '#{string}' -- #{ e }", e.backtrace
115
+ end
116
+ end
117
+
118
+ def FromXML.tokenise_string(str, strip=true)
119
+ return nil unless str
120
+ str.strip! if strip
121
+ str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
122
+ return str
123
+ end
124
+
125
+ $idiot = 0
126
+
127
+ def parse_element(parent=nil, target=nil)
128
+ find_the_first_element
129
+ return unless start_element?
130
+
131
+ namespace = @reader.namespace_uri
132
+ name = @reader.local_name
133
+
134
+ existing_element = nil
135
+ element = nil
136
+
137
+ requires_caching = false
138
+
139
+ build_attribute_arrays
140
+
141
+ if ((nil != namespace) and (0 < namespace.size)) then
142
+ klass_name = "{#{namespace}}#{name}"
143
+ klasses = FromXML.registered(klass_name)
144
+ if (0 == klasses.size) then
145
+ # The class has not been registered (either it was never generated, or it was never loaded)
146
+ begin
147
+ #discard this node and all children, but say something
148
+ thing = @reader.expand
149
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
150
+ @reader.read
151
+ return nil, true
152
+ rescue => e
153
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] CRAP! #{ e }"
154
+ puts e.backtrace
155
+ raise e
156
+ end
157
+ end
158
+ if (1 < klasses.size) then
159
+ raise XamplException.new("there is more than one '#{name}' tag in namespace '#{namespace}'\nplease report this error")
160
+ end
161
+ else
162
+ klasses = FromXML.registered(name)
163
+ if (0 == klasses.size) then
164
+ raise XamplException.new("do not recognise tag '#{name}' (no namespace specified)")
165
+ end
166
+ if (1 < klasses.size) then
167
+ raise XamplException.new("there is more than one '#{name}' tag (no namespace specified)")
168
+ end
169
+ end
170
+
171
+ unless @is_realising then
172
+ @attribute_value.size.times do |i|
173
+ FromXML.tokenise_string @attribute_value[i]
174
+ end
175
+ end
176
+
177
+ if target then
178
+ element = target
179
+ target.load_needed = false
180
+ target = nil
181
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
182
+ element.note_attributes_initialised(@is_realising)
183
+ else
184
+ if klasses[0].persisted? then
185
+ @attribute_name.each_index do |i|
186
+ if @attribute_name[i] == klasses[0].persisted?.to_s then
187
+ existing_element = Xampl.find_known(klasses[0], @attribute_value[i])
188
+ if existing_element then
189
+ # so we've found the element. Now what??? We can do several
190
+ # reasonable things:
191
+ #
192
+ # 1) continue parsing into the found element
193
+ # 2) simply return the found element
194
+ # 3) replace the found element with the new element
195
+ #
196
+ # The first one is dubious, so we won't.
197
+ # The second and third option both make complete sense
198
+ #
199
+ # We are going to do the second
200
+ #
201
+ # BTW, 'existing element' means a representation of this element already in memory
202
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
203
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
204
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
205
+ # puts "FOUND AN EXISTING THING... #{ klasses[0] } #{ @attribute_value[i] }"
206
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
207
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
208
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
209
+ # caller(0).each { | trace | puts " #{trace}"}
210
+ # existing_element.reset_contents
211
+ # element = existing_element
212
+ # existing_element = nil
213
+ # puts "#{File.basename(__FILE__)} #{__LINE__} EXISTING ELEMENT: #{ existing_element }"
214
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, must handle the existing element correctly"
215
+ element = existing_element #TODO -- IS THIS RIGHT????????????????????????
216
+ end
217
+ unless element then
218
+ element = klasses[0].new
219
+ requires_caching = @recovering
220
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, what about recovering????"
221
+ #TODO -- IS THIS RIGHT????????????????????????
222
+ requires_caching = true #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
223
+ unless @recovering then
224
+ element.force_load if parent
225
+ end
226
+ element.note_created(@is_realising)
227
+ end
228
+
229
+ break
230
+ end
231
+ end
232
+ end
233
+
234
+ unless element then
235
+ element = klasses[0].new
236
+ element.note_created(@is_realising)
237
+ end
238
+
239
+ element.note_initialise_attributes_with(@attribute_name, @attribute_namespace, @attribute_value, @is_realising)
240
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
241
+ element.note_attributes_initialised(@is_realising)
242
+
243
+ if requires_caching and element and element.persist_required then
244
+ Xampl.cache(element)
245
+ end
246
+
247
+ #element = element.note_add_to_parent(parent, @is_realising)
248
+ #element.append_to(parent) if parent
249
+ end
250
+
251
+ while next_reader_event
252
+ case current_node_type
253
+
254
+ =begin
255
+ TODO -- can these ever happen?
256
+ when START_DOCUMENT
257
+ return element if @recovering
258
+ return existing_element || element
259
+ when END_DOCUMENT
260
+ return element if @recovering
261
+ return existing_element || element
262
+
263
+ =end
264
+
265
+ when LibXML::XML::Reader::TYPE_ELEMENT
266
+ child, ignore_child = parse_element(element)
267
+
268
+ unless ignore_child then
269
+ case child
270
+ when XamplObject then
271
+ child = child.note_add_to_parent(element, @is_realising) if child
272
+ child = element.note_add_child(child, @is_realising) if element
273
+ child.append_to(element) if element and child
274
+ when XMLText then
275
+ #TODO -- get rid of this puts
276
+ puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
277
+ else
278
+ #TODO -- get rid of this puts
279
+ puts "WHAT IS THIS??? #{child.class.name}"
280
+ end
281
+ end
282
+ when LibXML::XML::Reader::TYPE_END_ELEMENT
283
+ element = element.note_closed(@is_realising)
284
+ return element if @recovering
285
+ return existing_element || element
286
+ when LibXML::XML::Reader::TYPE_TEXT, LibXML::XML::Reader::TYPE_CDATA, LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE, LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
287
+ text = @reader.read_string.force_encoding('utf-8')
288
+
289
+ # text = "[#{ $idiot += 1 }]#{ element.has_mixed_content ? 'mixed' : 'data' }[#{ text }]"
290
+ # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] element: #{ element } {{#{ text }}}"
291
+ # the_text = text
292
+
293
+ the_text = element.note_adding_text_content(text, @is_realising)
294
+ if element.has_mixed_content then
295
+ # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] MIXED #{ text.encoding } #{ text }"
296
+ element << the_text
297
+ else
298
+ # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] NOT MIXED #{ text.encoding } #{ text }"
299
+ element.add_content(the_text, false)
300
+ end
301
+ else
302
+ end
303
+ end
304
+
305
+ return element if @recovering
306
+ return existing_element || element
307
+ end
308
+
309
+ def current_node_type
310
+ if @faking_an_end_element then
311
+ LibXML::XML::Reader::TYPE_END_ELEMENT
312
+ else
313
+ @reader.node_type
314
+ end
315
+ end
316
+
317
+ =begin
318
+ def describe_current_element_type()
319
+ case @reader.node_type
320
+ when LibXML::XML::Reader::TYPE_ATTRIBUTE
321
+ puts "ATTRIBUTE"
322
+ when LibXML::XML::Reader::TYPE_DOCUMENT
323
+ puts "DOCUMENT"
324
+ when LibXML::XML::Reader::TYPE_ELEMENT
325
+ attribute_count = @reader.attribute_count
326
+ puts "ELEMENT #{ @reader.local_name }, ns: #{ @reader.namespace_uri }, #attributes: #{ attribute_count }, depth: #{ @reader.depth }"
327
+ puts " FAKING END ELEMENT" if @faking_an_end_element
328
+ when LibXML::XML::Reader::TYPE_END_ELEMENT
329
+ puts "END ELEMENT"
330
+ when LibXML::XML::Reader::TYPE_TEXT
331
+ puts "TEXT [[#{ @reader.read_string }]]"
332
+ when LibXML::XML::Reader::TYPE_CDATA
333
+ puts "CDATA [[#{ @reader.read_string }]]"
334
+ when LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE
335
+ puts "SIGNIFICANT white space [[#{ @reader.read_string }]]"
336
+ when LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
337
+ puts "entity ref"
338
+ when LibXML::XML::Reader::TYPE_WHITESPACE
339
+ puts "whitespace"
340
+ when LibXML::XML::Reader::TYPE_PROCESSING_INSTRUCTION
341
+ puts "processing instruction"
342
+ when LibXML::XML::Reader::TYPE_COMMENT
343
+ puts "comment"
344
+ when LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
345
+ puts "doc type"
346
+
347
+ when LibXML::XML::Reader::TYPE_XML_DECLARATION
348
+ puts "xml decl"
349
+ when LibXML::XML::Reader::TYPE_NONE
350
+ puts "NONE!!"
351
+ when LibXML::XML::Reader::TYPE_NOTATION
352
+ puts "notifiation"
353
+ when LibXML::XML::Reader::TYPE_DOCUMENT_FRAGMENT
354
+ puts "doc fragment"
355
+ when LibXML::XML::Reader::TYPE_ENTITY
356
+ puts "entity"
357
+ when LibXML::XML::Reader::TYPE_END_ENTITY
358
+ puts "end entity"
359
+ else
360
+ puts "UNKNOWN: #{@reader.node_type}"
361
+ end
362
+ end
363
+ =end
364
+
365
+ def next_reader_event
366
+ if @insert_end_element then
367
+ @faking_an_end_element = true
368
+ @insert_end_element = false
369
+ return
370
+ end
371
+
372
+ @faking_an_end_element = false
373
+
374
+ #describe_current_element_type
375
+
376
+ begin
377
+ #TODO -- get rid of this??
378
+ #TODO -- really?
379
+ okay = @reader.read
380
+ rescue => e
381
+ raise RuntimeError, "WHAT?? -- #{ e }", e.backtrace
382
+ end
383
+
384
+ @just_opened_an_element = start_element?
385
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
386
+
387
+ #describe_current_element_type
388
+
389
+ okay
390
+ end
391
+
392
+ def start_element?
393
+ current_node_type == LibXML::XML::Reader::TYPE_ELEMENT
394
+ end
395
+
396
+ def whitespace?
397
+ current_note_type == LibXML::XML::Reader::TYPE_WHITESPACE
398
+ end
399
+
400
+ def find_the_first_element
401
+ while true do
402
+ break if start_element?
403
+ break unless next_reader_event
404
+ end
405
+ @just_opened_an_element = start_element?
406
+ end
407
+
408
+ def build_attribute_arrays
409
+
410
+ @attribute_name.clear
411
+ @attribute_namespace.clear
412
+ @attribute_value.clear
413
+
414
+ return unless LibXML::XML::Reader::TYPE_ELEMENT == current_node_type
415
+
416
+ if @reader.has_attributes? then
417
+ attribute_count = @reader.attribute_count
418
+ @reader.move_to_first_attribute
419
+ attribute_count.times do |i|
420
+ if @reader.namespace_declaration? then
421
+ @reader.move_to_next_attribute
422
+ next
423
+ end
424
+
425
+ @attribute_name << @reader.local_name
426
+ @attribute_namespace << @reader.namespace_uri
427
+ @attribute_value << @reader.value
428
+
429
+ @reader.move_to_next_attribute
430
+ end
431
+ end
432
+ end
433
+
434
+ def attributeCount
435
+ return @attribute_name.length
436
+ end
437
+
438
+ def attributeName(i)
439
+ return @attribute_name[i]
440
+ end
441
+
442
+ def attributeNamespace(i)
443
+ return @attribute_namespace[i]
444
+ end
445
+
446
+ def attributeValue(i)
447
+ return @attribute_value[i]
448
+ end
449
+
450
+ def depth
451
+ return @reader.depth
452
+ end
453
+
454
+ def line
455
+ return @reader.line_number
456
+ end
457
+
458
+ def column
459
+ return @reader.column_number
460
+ end
461
+ end
462
+
463
+ end