xamplr 1.9.13 → 1.9.14

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,437 @@
1
+ # encoding utf-8
2
+
3
+ require 'nokogiri'
4
+
5
+ module Xampl
6
+
7
+ class FromXML
8
+
9
+ attr :checkWellFormed #1.9.1 , false
10
+ attr :is_realising #1.9.1 , false
11
+ attr :tokenise_content #1.9.1 , false
12
+
13
+ @reader = nil
14
+
15
+ @@by_tag = {}
16
+ @@by_ns_tag = {}
17
+
18
+ def initialize(recovering=false)
19
+ @recovering = recovering
20
+
21
+ @attribute_name = Array.new(32)
22
+ @attribute_namespace = Array.new(32)
23
+ @attribute_value = Array.new(32)
24
+
25
+ @insert_end_element = false
26
+ @faking_an_end_element = false
27
+ @just_opened_an_element = false
28
+ end
29
+
30
+ def FromXML.reset_registry
31
+ @@by_tag = {}
32
+ @@by_ns_tag = {}
33
+ end
34
+
35
+ def FromXML.register(tag, ns_tag, klass)
36
+ @@by_ns_tag[ns_tag] = [klass]
37
+ a = @@by_tag[tag]
38
+ if (nil == a) then
39
+ @@by_tag[tag] = [klass]
40
+ else
41
+ found = false
42
+ a.each { |thing| found = found | (thing == klass) }
43
+ a << klass unless found
44
+ end
45
+ end
46
+
47
+ def FromXML.registered(name)
48
+ klass = @@by_ns_tag[name]
49
+ klass = @@by_tag[name] unless klass
50
+ klass = [] unless klass
51
+ return klass
52
+ end
53
+
54
+ def resolve(name)
55
+ #TODO -- ??? don't seem to need it, this is for specific named entities
56
+ return name
57
+ end
58
+
59
+ def setup_parse(filename, tokenise_content=true, is_realising=false)
60
+ xml = File.read(filename)
61
+ setup_parse_string(xml, tokenise_content, is_realising)
62
+ end
63
+
64
+ def setup_parse_string(string, tokenise_content=true, is_realising=false)
65
+ @resolver = self
66
+
67
+ @is_realising = is_realising
68
+ @tokenise_content = tokenise_content
69
+
70
+ =begin
71
+ STRICT = 0 Strict parsing
72
+ RECOVER = 1 << 0 Recover from errors
73
+ NOENT = 1 << 1 Substitute entities
74
+ DTDLOAD = 1 << 2 Load external subsets
75
+ DTDATTR = 1 << 3 Default DTD attributes
76
+ DTDVALID = 1 << 4 validate with the DTD
77
+ NOERROR = 1 << 5 suppress error reports
78
+ NOWARNING = 1 << 6 suppress warning reports
79
+ PEDANTIC = 1 << 7 pedantic error reporting
80
+ NOBLANKS = 1 << 8 remove blank nodes
81
+ SAX1 = 1 << 9 use the SAX1 interface internally
82
+ XINCLUDE = 1 << 10 Implement XInclude substitition
83
+ NONET = 1 << 11 Forbid network access
84
+ NODICT = 1 << 12 Do not reuse the context dictionnary
85
+ NSCLEAN = 1 << 13 remove redundant namespaces declarations
86
+ NOCDATA = 1 << 14 merge CDATA as text nodes
87
+ NOXINCNODE = 1 << 15 do not generate XINCLUDE START/END nodes
88
+ DEFAULT_XML = RECOVER the default options used for parsing XML documents
89
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET the default options used for parsing HTML documents
90
+ =end
91
+
92
+ options = Nokogiri::XML::ParseOptions::RECOVER | Nokogiri::XML::ParseOptions::NOENT | Nokogiri::XML::ParseOptions::NONET | Nokogiri::XML::ParseOptions::NOCDATA | Nokogiri::XML::ParseOptions::DTDATTR
93
+
94
+ utf8_string = string.force_encoding('utf-8')
95
+ url = nil
96
+ encoding = nil
97
+
98
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] parse:\n#{ utf8_string }"
99
+
100
+ @reader = Nokogiri::XML::Reader.from_memory(utf8_string, url, encoding, options)
101
+ end
102
+
103
+ def parse(filename, tokenise_content=true, is_realising=false)
104
+ begin
105
+ setup_parse(filename, tokenise_content, is_realising)
106
+ element, ignore = parse_element
107
+ return element
108
+ rescue => e
109
+ raise RuntimeError, "trouble parsing file: '#{filename}' -- #{ e }", e.backtrace
110
+ end
111
+ end
112
+
113
+ def realise_string(string, tokenise_content=true, target=nil)
114
+ return parse_string(string, tokenise_content, true, target)
115
+ end
116
+
117
+ def parse_string(string, tokenise_content=true, is_realising=false, target=nil)
118
+ begin
119
+ setup_parse_string(string, tokenise_content, is_realising)
120
+ element, ignore = parse_element(nil, target)
121
+ return element
122
+ rescue => e
123
+ raise RuntimeError, "trouble parsing string: '#{string}' -- #{ e }", e.backtrace
124
+ end
125
+ end
126
+
127
+ def chew
128
+ xml = @reader.outer_xml
129
+ depth = @reader.depth
130
+ @reader.read
131
+ while depth != @reader.depth do
132
+ @reader.read
133
+ end
134
+ return xml
135
+ end
136
+
137
+
138
+ def parse_element(parent=nil, target=nil)
139
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] ---------------------"
140
+ puts caller(0)[0..5]
141
+
142
+ find_the_first_element
143
+ return unless start_element?
144
+
145
+ namespace = @reader.namespace_uri
146
+ name = @reader.local_name
147
+
148
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] name: #{ name } in #{ namespace }"
149
+
150
+ existing_element = nil
151
+ element = nil
152
+
153
+ requires_caching = false
154
+
155
+ build_attribute_arrays
156
+
157
+ if ((nil != namespace) and (0 < namespace.size)) then
158
+ klass_name = "{#{namespace}}#{name}"
159
+ klasses = FromXML.registered(klass_name)
160
+ if (0 == klasses.size) then
161
+ # The class has not been registered (either it was never generated, or it was never loaded)
162
+ begin
163
+ #discard this node and all children, but say something
164
+ thing = chew
165
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
166
+ # thing = @reader.expand
167
+ # puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
168
+ # @reader.read
169
+ return nil, true
170
+ rescue => e
171
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] CRAP! #{ e }"
172
+ puts e.backtrace
173
+ raise e
174
+ end
175
+ end
176
+ if (1 < klasses.size) then
177
+ raise XamplException.new("there is more than one '#{name}' tag in namespace '#{namespace}'\nplease report this error")
178
+ end
179
+ else
180
+ klasses = FromXML.registered(name)
181
+ if (0 == klasses.size) then
182
+ raise XamplException.new("do not recognise tag '#{name}' (no namespace specified)")
183
+ end
184
+ if (1 < klasses.size) then
185
+ raise XamplException.new("there is more than one '#{name}' tag (no namespace specified)")
186
+ end
187
+ end
188
+
189
+ unless @is_realising then
190
+ @attribute_value.size.times do |i|
191
+ FromXML.tokenise_string @attribute_value[i]
192
+ end
193
+ end
194
+
195
+ if target then
196
+ element = target
197
+ target.load_needed = false
198
+ target = nil
199
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
200
+ element.note_attributes_initialised(@is_realising)
201
+ else
202
+ if klasses[0].persisted? then
203
+ @attribute_name.each_index do |i|
204
+ if @attribute_name[i] == klasses[0].persisted?.to_s then
205
+ existing_element = Xampl.find_known(klasses[0], @attribute_value[i])
206
+ if existing_element then
207
+ # so we've found the element. Now what??? We can do several
208
+ # reasonable things:
209
+ #
210
+ # 1) continue parsing into the found element
211
+ # 2) simply return the found element
212
+ # 3) replace the found element with the new element
213
+ #
214
+ # The first one is dubious, so we won't.
215
+ # The second and third option both make complete sense
216
+ #
217
+ # We are going to do the second
218
+ #
219
+ # BTW, 'existing element' means a representation of this element already in memory
220
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
221
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
222
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
223
+ # puts "FOUND AN EXISTING THING... #{ klasses[0] } #{ @attribute_value[i] }"
224
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
225
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
226
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
227
+ # caller(0).each { | trace | puts " #{trace}"}
228
+ # existing_element.reset_contents
229
+ # element = existing_element
230
+ # existing_element = nil
231
+ # puts "#{File.basename(__FILE__)} #{__LINE__} EXISTING ELEMENT: #{ existing_element }"
232
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, must handle the existing element correctly"
233
+ element = existing_element #TODO -- IS THIS RIGHT????????????????????????
234
+ end
235
+ unless element then
236
+ element = klasses[0].new
237
+ requires_caching = @recovering
238
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, what about recovering????"
239
+ #TODO -- IS THIS RIGHT????????????????????????
240
+ requires_caching = true #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
241
+ unless @recovering then
242
+ element.force_load if parent
243
+ end
244
+ element.note_created(@is_realising)
245
+ end
246
+
247
+ break
248
+ end
249
+ end
250
+ end
251
+
252
+ unless element then
253
+ element = klasses[0].new
254
+ element.note_created(@is_realising)
255
+ end
256
+
257
+ element.note_initialise_attributes_with(@attribute_name, @attribute_namespace, @attribute_value, @is_realising)
258
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
259
+ element.note_attributes_initialised(@is_realising)
260
+
261
+ if requires_caching and element and element.persist_required then
262
+ Xampl.cache(element)
263
+ end
264
+
265
+ #element = element.note_add_to_parent(parent, @is_realising)
266
+ #element.append_to(parent) if parent
267
+ end
268
+
269
+ while @reader.read do
270
+ if @reader.value? then
271
+ puts "TEXT(#{ @reader.depth }) name: #{ @reader.name }, value: [[#{ @reader.value }]]"
272
+ #TODO -- is this forced UTF8 conversion really necessary with nokogiri??
273
+ text = @reader.value.force_encoding('utf-8')
274
+ the_text = element.note_adding_text_content(text, @is_realising)
275
+ if element.has_mixed_content then
276
+ element << the_text
277
+ else
278
+ element.add_content(the_text, false)
279
+ end
280
+ elsif Nokogiri::XML::Node::ELEMENT_NODE == @reader.node_type then
281
+ if @reader.attributes? then
282
+ puts "OPEN(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }, attributes: #{ @reader.attributes.inspect }"
283
+ else
284
+ puts "OPEN(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }, attributes: NONE"
285
+ end
286
+ child, ignore_child = parse_element(element)
287
+
288
+ unless ignore_child then
289
+ case child
290
+ when XamplObject then
291
+ child = child.note_add_to_parent(element, @is_realising) if child
292
+ child = element.note_add_child(child, @is_realising) if element
293
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] ELEMENT: #{ element }/#{ element.class }"
294
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] CHILD: #{ child }/#{ child.class }"
295
+ child.append_to(element) if element && child
296
+ when XMLText then
297
+ #TODO -- get rid of this puts
298
+ puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
299
+ else
300
+ #TODO -- get rid of this puts
301
+ puts "WHAT IS THIS??? #{child.class.name}"
302
+ end
303
+ end
304
+ elsif Nokogiri::XML::Node::ELEMENT_DECL == @reader.node_type then
305
+ puts "CLOSE(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }"
306
+ element = element.note_closed(@is_realising)
307
+ return element if @recovering
308
+ return existing_element || element
309
+ else
310
+ puts "WTF??(#{ @reader.depth }) name: #{ @reader.name }, #{ say_node_type(@reader.node_type)}/#{ @reader.node_type }\n#{ @reader.outer_xml }"
311
+ end
312
+ end
313
+
314
+ return element if @recovering
315
+ return existing_element || element
316
+ end
317
+
318
+ def FromXML.tokenise_string(str, strip=true)
319
+ return nil unless str
320
+ str.strip! if strip
321
+ str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
322
+ return str
323
+ end
324
+
325
+ def current_node_type
326
+ if @faking_an_end_element then
327
+ LibXML::XML::Reader::TYPE_END_ELEMENT
328
+ else
329
+ @reader.node_type
330
+ end
331
+ end
332
+
333
+ def next_reader_event
334
+ if @insert_end_element then
335
+ @faking_an_end_element = true
336
+ @insert_end_element = false
337
+ return
338
+ end
339
+
340
+ @faking_an_end_element = false
341
+
342
+ begin
343
+ okay = @reader.read
344
+ rescue => e
345
+ raise RuntimeError, "WHAT?? -- #{ e }", e.backtrace
346
+ end
347
+
348
+ @just_opened_an_element = start_element?
349
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
350
+
351
+ okay
352
+ end
353
+
354
+ def start_element?
355
+ current_node_type == Nokogiri::XML::Node::ELEMENT_NODE
356
+ end
357
+
358
+ def whitespace?
359
+ #there is no whitespace type with nokogiri
360
+ @reader.value? && @reader.value.match(/\S/).nil?
361
+ # current_note_type == LibXML::XML::Reader::TYPE_WHITESPACE
362
+ end
363
+
364
+ def find_the_first_element
365
+ while true do
366
+ break if start_element?
367
+ break unless next_reader_event
368
+ end
369
+ @just_opened_an_element = start_element?
370
+ end
371
+
372
+ def build_attribute_arrays
373
+
374
+ @attribute_name.clear
375
+ @attribute_namespace.clear
376
+ @attribute_value.clear
377
+
378
+ return unless @reader.attributes?
379
+
380
+ @reader.attributes.each do |name, value|
381
+ @attribute_name << name
382
+ @attribute_namespace << nil
383
+ @attribute_value << value
384
+ end
385
+ =begin
386
+
387
+ return unless LibXML::XML::Reader::TYPE_ELEMENT == current_node_type
388
+
389
+ if @reader.has_attributes? then
390
+ attribute_count = @reader.attribute_count
391
+ @reader.move_to_first_attribute
392
+ attribute_count.times do |i|
393
+ if @reader.namespace_declaration? then
394
+ @reader.move_to_next_attribute
395
+ next
396
+ end
397
+
398
+ @attribute_name << @reader.local_name
399
+ @attribute_namespace << @reader.namespace_uri
400
+ @attribute_value << @reader.value
401
+
402
+ @reader.move_to_next_attribute
403
+ end
404
+ end
405
+ =end
406
+ end
407
+
408
+ def attributeCount
409
+ return @attribute_name.length
410
+ end
411
+
412
+ def attributeName(i)
413
+ return @attribute_name[i]
414
+ end
415
+
416
+ def attributeNamespace(i)
417
+ return @attribute_namespace[i]
418
+ end
419
+
420
+ def attributeValue(i)
421
+ return @attribute_value[i]
422
+ end
423
+
424
+ def depth
425
+ return @reader.depth
426
+ end
427
+
428
+ def line
429
+ return @reader.line_number
430
+ end
431
+
432
+ def column
433
+ return @reader.column_number
434
+ end
435
+ end
436
+
437
+ end
@@ -47,24 +47,12 @@ module Xampl
47
47
  =begin
48
48
 
49
49
  def attr_esc_fast(s)
50
- #NOTE -- there are known issues with using Ruby 1.9.1 and libxml-ruby, which this is using. Seems to mostly
51
- # be related to DOM and XPATH but...
52
- unless defined?(@@doc) then
53
- @@doc = LibXML::XML::Document.new()
54
- @@doc.root = LibXML::XML::Node.new('r')
55
- @@attr = LibXML::XML::Attr.new(@@doc.root, 'v', 'v')
56
- end
57
-
58
- @@attr.value = s.to_s
59
- (@@doc.root.to_s)[6..-4]
60
50
  end
61
51
 
62
52
  =end
63
53
 
64
54
  def attr_esc_slow(s)
65
- if (s.kind_of? XamplObject)
66
- return attr_esc(s.to_xml)
67
- end
55
+ return attr_esc(s.to_xml) if (s.kind_of? XamplObject)
68
56
 
69
57
  result = s.to_s.dup
70
58
 
@@ -77,7 +65,8 @@ module Xampl
77
65
  return "\"result\""
78
66
  end
79
67
 
80
- def attr_esc_encoding_safe(s)
68
+ def attr_esc(s)
69
+ # This depends on ruby 1.9
81
70
  return attr_esc(s.to_xml) if (s.kind_of? XamplObject)
82
71
 
83
72
  begin
@@ -88,22 +77,15 @@ module Xampl
88
77
  }
89
78
  result = s.to_s.dup.encode('UTF-8', 'UTF-8', options)
90
79
 
91
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] IN: [[#{ s.to_s }]], OUT: [[#{ result }]]"
92
-
93
80
  return result
94
81
  rescue => e
95
82
  puts "Naughty Programmer! No! Bad!: #{ e } encoding in: #{ s.encoding }, out: #{ result.encoding }"
96
83
  puts e.backtrace
97
84
  end
98
85
 
99
-
100
86
  return ''
101
-
102
87
  end
103
88
 
104
- # alias attr_esc attr_esc_fast
105
- alias attr_esc attr_esc_encoding_safe
106
-
107
89
  def content_esc(s)
108
90
  return content_esc(s.to_s.dup) if (s.kind_of? XamplObject)
109
91