xamplr 1.9.13 → 1.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,437 @@
1
+ # encoding utf-8
2
+
3
+ require 'nokogiri'
4
+
5
+ module Xampl
6
+
7
+ class FromXML
8
+
9
+ attr :checkWellFormed #1.9.1 , false
10
+ attr :is_realising #1.9.1 , false
11
+ attr :tokenise_content #1.9.1 , false
12
+
13
+ @reader = nil
14
+
15
+ @@by_tag = {}
16
+ @@by_ns_tag = {}
17
+
18
+ def initialize(recovering=false)
19
+ @recovering = recovering
20
+
21
+ @attribute_name = Array.new(32)
22
+ @attribute_namespace = Array.new(32)
23
+ @attribute_value = Array.new(32)
24
+
25
+ @insert_end_element = false
26
+ @faking_an_end_element = false
27
+ @just_opened_an_element = false
28
+ end
29
+
30
+ def FromXML.reset_registry
31
+ @@by_tag = {}
32
+ @@by_ns_tag = {}
33
+ end
34
+
35
+ def FromXML.register(tag, ns_tag, klass)
36
+ @@by_ns_tag[ns_tag] = [klass]
37
+ a = @@by_tag[tag]
38
+ if (nil == a) then
39
+ @@by_tag[tag] = [klass]
40
+ else
41
+ found = false
42
+ a.each { |thing| found = found | (thing == klass) }
43
+ a << klass unless found
44
+ end
45
+ end
46
+
47
+ def FromXML.registered(name)
48
+ klass = @@by_ns_tag[name]
49
+ klass = @@by_tag[name] unless klass
50
+ klass = [] unless klass
51
+ return klass
52
+ end
53
+
54
+ def resolve(name)
55
+ #TODO -- ??? don't seem to need it, this is for specific named entities
56
+ return name
57
+ end
58
+
59
+ def setup_parse(filename, tokenise_content=true, is_realising=false)
60
+ xml = File.read(filename)
61
+ setup_parse_string(xml, tokenise_content, is_realising)
62
+ end
63
+
64
+ def setup_parse_string(string, tokenise_content=true, is_realising=false)
65
+ @resolver = self
66
+
67
+ @is_realising = is_realising
68
+ @tokenise_content = tokenise_content
69
+
70
+ =begin
71
+ STRICT = 0 Strict parsing
72
+ RECOVER = 1 << 0 Recover from errors
73
+ NOENT = 1 << 1 Substitute entities
74
+ DTDLOAD = 1 << 2 Load external subsets
75
+ DTDATTR = 1 << 3 Default DTD attributes
76
+ DTDVALID = 1 << 4 validate with the DTD
77
+ NOERROR = 1 << 5 suppress error reports
78
+ NOWARNING = 1 << 6 suppress warning reports
79
+ PEDANTIC = 1 << 7 pedantic error reporting
80
+ NOBLANKS = 1 << 8 remove blank nodes
81
+ SAX1 = 1 << 9 use the SAX1 interface internally
82
+ XINCLUDE = 1 << 10 Implement XInclude substitition
83
+ NONET = 1 << 11 Forbid network access
84
+ NODICT = 1 << 12 Do not reuse the context dictionnary
85
+ NSCLEAN = 1 << 13 remove redundant namespaces declarations
86
+ NOCDATA = 1 << 14 merge CDATA as text nodes
87
+ NOXINCNODE = 1 << 15 do not generate XINCLUDE START/END nodes
88
+ DEFAULT_XML = RECOVER the default options used for parsing XML documents
89
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET the default options used for parsing HTML documents
90
+ =end
91
+
92
+ options = Nokogiri::XML::ParseOptions::RECOVER | Nokogiri::XML::ParseOptions::NOENT | Nokogiri::XML::ParseOptions::NONET | Nokogiri::XML::ParseOptions::NOCDATA | Nokogiri::XML::ParseOptions::DTDATTR
93
+
94
+ utf8_string = string.force_encoding('utf-8')
95
+ url = nil
96
+ encoding = nil
97
+
98
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] parse:\n#{ utf8_string }"
99
+
100
+ @reader = Nokogiri::XML::Reader.from_memory(utf8_string, url, encoding, options)
101
+ end
102
+
103
+ def parse(filename, tokenise_content=true, is_realising=false)
104
+ begin
105
+ setup_parse(filename, tokenise_content, is_realising)
106
+ element, ignore = parse_element
107
+ return element
108
+ rescue => e
109
+ raise RuntimeError, "trouble parsing file: '#{filename}' -- #{ e }", e.backtrace
110
+ end
111
+ end
112
+
113
+ def realise_string(string, tokenise_content=true, target=nil)
114
+ return parse_string(string, tokenise_content, true, target)
115
+ end
116
+
117
+ def parse_string(string, tokenise_content=true, is_realising=false, target=nil)
118
+ begin
119
+ setup_parse_string(string, tokenise_content, is_realising)
120
+ element, ignore = parse_element(nil, target)
121
+ return element
122
+ rescue => e
123
+ raise RuntimeError, "trouble parsing string: '#{string}' -- #{ e }", e.backtrace
124
+ end
125
+ end
126
+
127
+ def chew
128
+ xml = @reader.outer_xml
129
+ depth = @reader.depth
130
+ @reader.read
131
+ while depth != @reader.depth do
132
+ @reader.read
133
+ end
134
+ return xml
135
+ end
136
+
137
+
138
+ def parse_element(parent=nil, target=nil)
139
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] ---------------------"
140
+ puts caller(0)[0..5]
141
+
142
+ find_the_first_element
143
+ return unless start_element?
144
+
145
+ namespace = @reader.namespace_uri
146
+ name = @reader.local_name
147
+
148
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] name: #{ name } in #{ namespace }"
149
+
150
+ existing_element = nil
151
+ element = nil
152
+
153
+ requires_caching = false
154
+
155
+ build_attribute_arrays
156
+
157
+ if ((nil != namespace) and (0 < namespace.size)) then
158
+ klass_name = "{#{namespace}}#{name}"
159
+ klasses = FromXML.registered(klass_name)
160
+ if (0 == klasses.size) then
161
+ # The class has not been registered (either it was never generated, or it was never loaded)
162
+ begin
163
+ #discard this node and all children, but say something
164
+ thing = chew
165
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
166
+ # thing = @reader.expand
167
+ # puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
168
+ # @reader.read
169
+ return nil, true
170
+ rescue => e
171
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] CRAP! #{ e }"
172
+ puts e.backtrace
173
+ raise e
174
+ end
175
+ end
176
+ if (1 < klasses.size) then
177
+ raise XamplException.new("there is more than one '#{name}' tag in namespace '#{namespace}'\nplease report this error")
178
+ end
179
+ else
180
+ klasses = FromXML.registered(name)
181
+ if (0 == klasses.size) then
182
+ raise XamplException.new("do not recognise tag '#{name}' (no namespace specified)")
183
+ end
184
+ if (1 < klasses.size) then
185
+ raise XamplException.new("there is more than one '#{name}' tag (no namespace specified)")
186
+ end
187
+ end
188
+
189
+ unless @is_realising then
190
+ @attribute_value.size.times do |i|
191
+ FromXML.tokenise_string @attribute_value[i]
192
+ end
193
+ end
194
+
195
+ if target then
196
+ element = target
197
+ target.load_needed = false
198
+ target = nil
199
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
200
+ element.note_attributes_initialised(@is_realising)
201
+ else
202
+ if klasses[0].persisted? then
203
+ @attribute_name.each_index do |i|
204
+ if @attribute_name[i] == klasses[0].persisted?.to_s then
205
+ existing_element = Xampl.find_known(klasses[0], @attribute_value[i])
206
+ if existing_element then
207
+ # so we've found the element. Now what??? We can do several
208
+ # reasonable things:
209
+ #
210
+ # 1) continue parsing into the found element
211
+ # 2) simply return the found element
212
+ # 3) replace the found element with the new element
213
+ #
214
+ # The first one is dubious, so we won't.
215
+ # The second and third option both make complete sense
216
+ #
217
+ # We are going to do the second
218
+ #
219
+ # BTW, 'existing element' means a representation of this element already in memory
220
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
221
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
222
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
223
+ # puts "FOUND AN EXISTING THING... #{ klasses[0] } #{ @attribute_value[i] }"
224
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
225
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
226
+ # puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
227
+ # caller(0).each { | trace | puts " #{trace}"}
228
+ # existing_element.reset_contents
229
+ # element = existing_element
230
+ # existing_element = nil
231
+ # puts "#{File.basename(__FILE__)} #{__LINE__} EXISTING ELEMENT: #{ existing_element }"
232
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, must handle the existing element correctly"
233
+ element = existing_element #TODO -- IS THIS RIGHT????????????????????????
234
+ end
235
+ unless element then
236
+ element = klasses[0].new
237
+ requires_caching = @recovering
238
+ # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, what about recovering????"
239
+ #TODO -- IS THIS RIGHT????????????????????????
240
+ requires_caching = true #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
241
+ unless @recovering then
242
+ element.force_load if parent
243
+ end
244
+ element.note_created(@is_realising)
245
+ end
246
+
247
+ break
248
+ end
249
+ end
250
+ end
251
+
252
+ unless element then
253
+ element = klasses[0].new
254
+ element.note_created(@is_realising)
255
+ end
256
+
257
+ element.note_initialise_attributes_with(@attribute_name, @attribute_namespace, @attribute_value, @is_realising)
258
+ element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
259
+ element.note_attributes_initialised(@is_realising)
260
+
261
+ if requires_caching and element and element.persist_required then
262
+ Xampl.cache(element)
263
+ end
264
+
265
+ #element = element.note_add_to_parent(parent, @is_realising)
266
+ #element.append_to(parent) if parent
267
+ end
268
+
269
+ while @reader.read do
270
+ if @reader.value? then
271
+ puts "TEXT(#{ @reader.depth }) name: #{ @reader.name }, value: [[#{ @reader.value }]]"
272
+ #TODO -- is this forced UTF8 conversion really necessary with nokogiri??
273
+ text = @reader.value.force_encoding('utf-8')
274
+ the_text = element.note_adding_text_content(text, @is_realising)
275
+ if element.has_mixed_content then
276
+ element << the_text
277
+ else
278
+ element.add_content(the_text, false)
279
+ end
280
+ elsif Nokogiri::XML::Node::ELEMENT_NODE == @reader.node_type then
281
+ if @reader.attributes? then
282
+ puts "OPEN(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }, attributes: #{ @reader.attributes.inspect }"
283
+ else
284
+ puts "OPEN(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }, attributes: NONE"
285
+ end
286
+ child, ignore_child = parse_element(element)
287
+
288
+ unless ignore_child then
289
+ case child
290
+ when XamplObject then
291
+ child = child.note_add_to_parent(element, @is_realising) if child
292
+ child = element.note_add_child(child, @is_realising) if element
293
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] ELEMENT: #{ element }/#{ element.class }"
294
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] CHILD: #{ child }/#{ child.class }"
295
+ child.append_to(element) if element && child
296
+ when XMLText then
297
+ #TODO -- get rid of this puts
298
+ puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
299
+ else
300
+ #TODO -- get rid of this puts
301
+ puts "WHAT IS THIS??? #{child.class.name}"
302
+ end
303
+ end
304
+ elsif Nokogiri::XML::Node::ELEMENT_DECL == @reader.node_type then
305
+ puts "CLOSE(#{ @reader.depth }) name: #{ @reader.name }, #{ @reader.empty_element? }"
306
+ element = element.note_closed(@is_realising)
307
+ return element if @recovering
308
+ return existing_element || element
309
+ else
310
+ puts "WTF??(#{ @reader.depth }) name: #{ @reader.name }, #{ say_node_type(@reader.node_type)}/#{ @reader.node_type }\n#{ @reader.outer_xml }"
311
+ end
312
+ end
313
+
314
+ return element if @recovering
315
+ return existing_element || element
316
+ end
317
+
318
+ def FromXML.tokenise_string(str, strip=true)
319
+ return nil unless str
320
+ str.strip! if strip
321
+ str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
322
+ return str
323
+ end
324
+
325
+ def current_node_type
326
+ if @faking_an_end_element then
327
+ LibXML::XML::Reader::TYPE_END_ELEMENT
328
+ else
329
+ @reader.node_type
330
+ end
331
+ end
332
+
333
+ def next_reader_event
334
+ if @insert_end_element then
335
+ @faking_an_end_element = true
336
+ @insert_end_element = false
337
+ return
338
+ end
339
+
340
+ @faking_an_end_element = false
341
+
342
+ begin
343
+ okay = @reader.read
344
+ rescue => e
345
+ raise RuntimeError, "WHAT?? -- #{ e }", e.backtrace
346
+ end
347
+
348
+ @just_opened_an_element = start_element?
349
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
350
+
351
+ okay
352
+ end
353
+
354
+ def start_element?
355
+ current_node_type == Nokogiri::XML::Node::ELEMENT_NODE
356
+ end
357
+
358
+ def whitespace?
359
+ #there is no whitespace type with nokogiri
360
+ @reader.value? && @reader.value.match(/\S/).nil?
361
+ # current_note_type == LibXML::XML::Reader::TYPE_WHITESPACE
362
+ end
363
+
364
+ def find_the_first_element
365
+ while true do
366
+ break if start_element?
367
+ break unless next_reader_event
368
+ end
369
+ @just_opened_an_element = start_element?
370
+ end
371
+
372
+ def build_attribute_arrays
373
+
374
+ @attribute_name.clear
375
+ @attribute_namespace.clear
376
+ @attribute_value.clear
377
+
378
+ return unless @reader.attributes?
379
+
380
+ @reader.attributes.each do |name, value|
381
+ @attribute_name << name
382
+ @attribute_namespace << nil
383
+ @attribute_value << value
384
+ end
385
+ =begin
386
+
387
+ return unless LibXML::XML::Reader::TYPE_ELEMENT == current_node_type
388
+
389
+ if @reader.has_attributes? then
390
+ attribute_count = @reader.attribute_count
391
+ @reader.move_to_first_attribute
392
+ attribute_count.times do |i|
393
+ if @reader.namespace_declaration? then
394
+ @reader.move_to_next_attribute
395
+ next
396
+ end
397
+
398
+ @attribute_name << @reader.local_name
399
+ @attribute_namespace << @reader.namespace_uri
400
+ @attribute_value << @reader.value
401
+
402
+ @reader.move_to_next_attribute
403
+ end
404
+ end
405
+ =end
406
+ end
407
+
408
+ def attributeCount
409
+ return @attribute_name.length
410
+ end
411
+
412
+ def attributeName(i)
413
+ return @attribute_name[i]
414
+ end
415
+
416
+ def attributeNamespace(i)
417
+ return @attribute_namespace[i]
418
+ end
419
+
420
+ def attributeValue(i)
421
+ return @attribute_value[i]
422
+ end
423
+
424
+ def depth
425
+ return @reader.depth
426
+ end
427
+
428
+ def line
429
+ return @reader.line_number
430
+ end
431
+
432
+ def column
433
+ return @reader.column_number
434
+ end
435
+ end
436
+
437
+ end
@@ -47,24 +47,12 @@ module Xampl
47
47
  =begin
48
48
 
49
49
  def attr_esc_fast(s)
50
- #NOTE -- there are known issues with using Ruby 1.9.1 and libxml-ruby, which this is using. Seems to mostly
51
- # be related to DOM and XPATH but...
52
- unless defined?(@@doc) then
53
- @@doc = LibXML::XML::Document.new()
54
- @@doc.root = LibXML::XML::Node.new('r')
55
- @@attr = LibXML::XML::Attr.new(@@doc.root, 'v', 'v')
56
- end
57
-
58
- @@attr.value = s.to_s
59
- (@@doc.root.to_s)[6..-4]
60
50
  end
61
51
 
62
52
  =end
63
53
 
64
54
  def attr_esc_slow(s)
65
- if (s.kind_of? XamplObject)
66
- return attr_esc(s.to_xml)
67
- end
55
+ return attr_esc(s.to_xml) if (s.kind_of? XamplObject)
68
56
 
69
57
  result = s.to_s.dup
70
58
 
@@ -77,7 +65,8 @@ module Xampl
77
65
  return "\"result\""
78
66
  end
79
67
 
80
- def attr_esc_encoding_safe(s)
68
+ def attr_esc(s)
69
+ # This depends on ruby 1.9
81
70
  return attr_esc(s.to_xml) if (s.kind_of? XamplObject)
82
71
 
83
72
  begin
@@ -88,22 +77,15 @@ module Xampl
88
77
  }
89
78
  result = s.to_s.dup.encode('UTF-8', 'UTF-8', options)
90
79
 
91
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] IN: [[#{ s.to_s }]], OUT: [[#{ result }]]"
92
-
93
80
  return result
94
81
  rescue => e
95
82
  puts "Naughty Programmer! No! Bad!: #{ e } encoding in: #{ s.encoding }, out: #{ result.encoding }"
96
83
  puts e.backtrace
97
84
  end
98
85
 
99
-
100
86
  return ''
101
-
102
87
  end
103
88
 
104
- # alias attr_esc attr_esc_fast
105
- alias attr_esc attr_esc_encoding_safe
106
-
107
89
  def content_esc(s)
108
90
  return content_esc(s.to_s.dup) if (s.kind_of? XamplObject)
109
91