xamplr 1.9.13 → 1.9.14

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -12,7 +12,7 @@ begin
12
12
  gem.authors = ["Bob Hutchison"]
13
13
 
14
14
  gem.add_dependency('xamplr-pp', '>=1.2.0')
15
- gem.add_dependency('libxml-ruby', '>=1.1.3')
15
+ gem.add_dependency('nokogiri', '>=1.4.3')
16
16
 
17
17
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
18
  end
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :major: 1
3
3
  :minor: 9
4
- :patch: 13
4
+ :patch: 14
5
5
  :build:
data/lib/xamplr/TODO CHANGED
@@ -17,6 +17,6 @@
17
17
  level (below object level) and that this will allow sharing of the DB a
18
18
  little better -- still don't know about multiple writers to the same repo...
19
19
  can tyrant help here.
20
- -- mongo, and a couple of others might be interesting
20
+ -- redis, and a couple of others might be interesting (mongo didn't work out)
21
21
  -- what about SQL dbs? Maybe an SQL or DataMapper version??
22
22
 
@@ -13,6 +13,18 @@ module Xampl
13
13
  end
14
14
  end
15
15
 
16
+ class DuplicateXamplInPersister < Exception
17
+ attr_reader :msg
18
+
19
+ def initialize(xampl1, xampl2, persister)
20
+ @msg = "index: '#{ xampl1.get_the_index }', persister: #{ persister.name }, first: #{ xampl1 }, second: #{ xampl2 }"
21
+ end
22
+
23
+ def message
24
+ @msg
25
+ end
26
+ end
27
+
16
28
  class AlreadyKnownToPersister < Exception
17
29
  attr_reader :msg, :xampl
18
30
 
@@ -45,6 +57,40 @@ module Xampl
45
57
  end
46
58
  end
47
59
 
60
+ class NoAnonymousPersisters < Exception
61
+ def message
62
+ "All persisters must be named"
63
+ end
64
+ end
65
+
66
+ class NoPersisterNamed < Exception
67
+ attr_reader :name
68
+
69
+ def initialize(name=nil)
70
+ @name = name
71
+ end
72
+
73
+ def message
74
+ if name then
75
+ "there is no persister named: '#{ @name }'"
76
+ else
77
+ "no name was supplied"
78
+ end
79
+ end
80
+ end
81
+
82
+ class NotXamplPersistedObject < Exception
83
+ attr_reader :klass
84
+
85
+ def initialize(thing=nil)
86
+ @klass = thing.class.name
87
+ end
88
+
89
+ def message
90
+ "require a XamplPersistedObject, got a #{ klass }"
91
+ end
92
+ end
93
+
48
94
  class BlockedChange < Exception
49
95
  attr_reader :xampl
50
96
 
@@ -121,7 +167,7 @@ module Xampl
121
167
  attr_reader :msg
122
168
 
123
169
  def initialize(active, local)
124
- @msg = "mixed persisters:: active #{active.name}, local: #{local.name}"
170
+ @msg = "mixed persisters:: active #{active.name}/#{ active }, local: #{local.name}/#{ local }"
125
171
  end
126
172
 
127
173
  def message
@@ -1,6 +1,6 @@
1
1
  # encoding utf-8
2
2
 
3
- require 'libxml'
3
+ require 'nokogiri'
4
4
 
5
5
  module Xampl
6
6
 
@@ -10,31 +10,31 @@ module Xampl
10
10
  attr :is_realising #1.9.1 , false
11
11
  attr :tokenise_content #1.9.1 , false
12
12
 
13
- @reader = nil
13
+ @reader = nil
14
14
 
15
- @@by_tag = {}
15
+ @@by_tag = {}
16
16
  @@by_ns_tag = {}
17
17
 
18
18
  def initialize(recovering=false)
19
- @recovering = recovering
19
+ @recovering = recovering
20
20
 
21
- @attribute_name = Array.new(32)
22
- @attribute_namespace = Array.new(32)
23
- @attribute_value = Array.new(32)
21
+ @attribute_name = Array.new(32)
22
+ @attribute_namespace = Array.new(32)
23
+ @attribute_value = Array.new(32)
24
24
 
25
- @insert_end_element = false
26
- @faking_an_end_element = false
25
+ @insert_end_element = false
26
+ @faking_an_end_element = false
27
27
  @just_opened_an_element = false
28
28
  end
29
29
 
30
30
  def FromXML.reset_registry
31
- @@by_tag = {}
31
+ @@by_tag = {}
32
32
  @@by_ns_tag = {}
33
33
  end
34
34
 
35
35
  def FromXML.register(tag, ns_tag, klass)
36
36
  @@by_ns_tag[ns_tag] = [klass]
37
- a = @@by_tag[tag]
37
+ a = @@by_tag[tag]
38
38
  if (nil == a) then
39
39
  @@by_tag[tag] = [klass]
40
40
  else
@@ -45,50 +45,57 @@ module Xampl
45
45
  end
46
46
 
47
47
  def FromXML.registered(name)
48
- #puts "registered by ns tag: #{ @@by_ns_tag.keys.sort.inspect }"
49
48
  klass = @@by_ns_tag[name]
50
- #puts "registered by tag: #{ @@by_tag.keys.sort.inspect }"
51
49
  klass = @@by_tag[name] unless klass
52
50
  klass = [] unless klass
53
51
  return klass
54
52
  end
55
53
 
56
54
  def resolve(name)
57
- #TODO -- ???
55
+ #TODO -- ??? don't seem to need it, this is for specific named entities
58
56
  return name
59
57
  end
60
58
 
61
59
  def setup_parse(filename, tokenise_content=true, is_realising=false)
62
- @resolver = self
63
-
64
- @is_realising = is_realising
65
- @tokenise_content = tokenise_content
66
-
67
- @reader = LibXML::XML::Reader.file(filename,
68
- :options => LibXML::XML::Parser::Options::NOENT |
69
- LibXML::XML::Parser::Options::NONET |
70
- LibXML::XML::Parser::Options::NOCDATA |
71
- LibXML::XML::Parser::Options::DTDATTR |
72
- # LibXML::XML::Parser::Options::COMPACT |
73
- 0)
74
- #TODO CLOSE THIS THING!!
60
+ xml = File.read(filename)
61
+ setup_parse_string(xml, tokenise_content, is_realising)
75
62
  end
76
63
 
77
64
  def setup_parse_string(string, tokenise_content=true, is_realising=false)
78
- @resolver = self
65
+ @resolver = self
79
66
 
80
- @is_realising = is_realising
67
+ @is_realising = is_realising
81
68
  @tokenise_content = tokenise_content
82
69
 
83
- # setInput(string)
84
- @reader = LibXML::XML::Reader.string(string,
85
- :options => LibXML::XML::Parser::Options::NOENT |
86
- LibXML::XML::Parser::Options::NONET |
87
- LibXML::XML::Parser::Options::NOCDATA |
88
- LibXML::XML::Parser::Options::DTDATTR |
89
- # LibXML::XML::Parser::Options::COMPACT) |
90
- 0)
91
- #TODO CLOSE THIS THING!!
70
+ =begin
71
+ STRICT = 0 Strict parsing
72
+ RECOVER = 1 << 0 Recover from errors
73
+ NOENT = 1 << 1 Substitute entities
74
+ DTDLOAD = 1 << 2 Load external subsets
75
+ DTDATTR = 1 << 3 Default DTD attributes
76
+ DTDVALID = 1 << 4 validate with the DTD
77
+ NOERROR = 1 << 5 suppress error reports
78
+ NOWARNING = 1 << 6 suppress warning reports
79
+ PEDANTIC = 1 << 7 pedantic error reporting
80
+ NOBLANKS = 1 << 8 remove blank nodes
81
+ SAX1 = 1 << 9 use the SAX1 interface internally
82
+ XINCLUDE = 1 << 10 Implement XInclude substitition
83
+ NONET = 1 << 11 Forbid network access
84
+ NODICT = 1 << 12 Do not reuse the context dictionnary
85
+ NSCLEAN = 1 << 13 remove redundant namespaces declarations
86
+ NOCDATA = 1 << 14 merge CDATA as text nodes
87
+ NOXINCNODE = 1 << 15 do not generate XINCLUDE START/END nodes
88
+ DEFAULT_XML = RECOVER the default options used for parsing XML documents
89
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET the default options used for parsing HTML documents
90
+ =end
91
+
92
+ options = Nokogiri::XML::ParseOptions::RECOVER | Nokogiri::XML::ParseOptions::NOENT | Nokogiri::XML::ParseOptions::NONET | Nokogiri::XML::ParseOptions::NOCDATA | Nokogiri::XML::ParseOptions::DTDATTR
93
+
94
+ utf8_string = string.force_encoding('utf-8')
95
+ url = nil
96
+ encoding = nil
97
+
98
+ @reader = Nokogiri::XML::Reader.from_memory(utf8_string, url, encoding, options)
92
99
  end
93
100
 
94
101
  def parse(filename, tokenise_content=true, is_realising=false)
@@ -115,22 +122,27 @@ module Xampl
115
122
  end
116
123
  end
117
124
 
118
- def FromXML.tokenise_string(str, strip=true)
119
- return nil unless str
120
- str.strip! if strip
121
- str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
122
- return str
125
+ def chew
126
+ xml = @reader.outer_xml
127
+ depth = @reader.depth
128
+ @reader.read
129
+ while depth != @reader.depth do
130
+ @reader.read
131
+ end
132
+ return xml
123
133
  end
124
134
 
135
+
125
136
  def parse_element(parent=nil, target=nil)
137
+ # puts caller(0)[0..5]
138
+
126
139
  find_the_first_element
127
140
  return unless start_element?
128
141
 
129
- namespace = @reader.namespace_uri
130
- name = @reader.local_name
131
-
142
+ namespace = @reader.namespace_uri
143
+ name = @reader.local_name
132
144
  existing_element = nil
133
- element = nil
145
+ element = nil
134
146
 
135
147
  requires_caching = false
136
148
 
@@ -138,17 +150,19 @@ module Xampl
138
150
 
139
151
  if ((nil != namespace) and (0 < namespace.size)) then
140
152
  klass_name = "{#{namespace}}#{name}"
141
- klasses = FromXML.registered(klass_name)
153
+ klasses = FromXML.registered(klass_name)
142
154
  if (0 == klasses.size) then
143
155
  # The class has not been registered (either it was never generated, or it was never loaded)
144
- puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] Don't know about class name: #{ klass_name }"
145
- # puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] @@by_ns_tag: #{ @@by_ns_tag.inspect }"
146
- # puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] @@by_tag: #{ @@by_tag.inspect }"
147
- xml_text = XMLText.new
148
- xml_text.build(self)
149
- xml_text = parent.note_adding_text_content(xml_text, @is_realising)
150
- parent.add_content(xml_text, @tokenise_content) if xml_text
151
- return xml_text, false
156
+ begin
157
+ #discard this node and all children, but say something
158
+ thing = chew
159
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
160
+ return nil, true
161
+ rescue => e
162
+ puts "Ohhhh NO! #{ e }"
163
+ puts e.backtrace
164
+ raise e
165
+ end
152
166
  end
153
167
  if (1 < klasses.size) then
154
168
  raise XamplException.new("there is more than one '#{name}' tag in namespace '#{namespace}'\nplease report this error")
@@ -170,9 +184,9 @@ module Xampl
170
184
  end
171
185
 
172
186
  if target then
173
- element = target
187
+ element = target
174
188
  target.load_needed = false
175
- target = nil
189
+ target = nil
176
190
  element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
177
191
  element.note_attributes_initialised(@is_realising)
178
192
  else
@@ -210,7 +224,7 @@ module Xampl
210
224
  element = existing_element #TODO -- IS THIS RIGHT????????????????????????
211
225
  end
212
226
  unless element then
213
- element = klasses[0].new
227
+ element = klasses[0].new
214
228
  requires_caching = @recovering
215
229
  # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, what about recovering????"
216
230
  #TODO -- IS THIS RIGHT????????????????????????
@@ -235,62 +249,42 @@ module Xampl
235
249
  element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
236
250
  element.note_attributes_initialised(@is_realising)
237
251
 
238
- if requires_caching and element and element.persist_required then
239
- Xampl.cache(element)
240
- end
241
-
242
- #element = element.note_add_to_parent(parent, @is_realising)
243
- #element.append_to(parent) if parent
252
+ Xampl.cache(element) if requires_caching && element && element.persist_required
244
253
  end
245
254
 
246
- while next_reader_event
247
- case current_node_type
248
-
249
- =begin
250
- TODO -- can these ever happen?
251
- when START_DOCUMENT
252
- return element if @recovering
253
- return existing_element || element
254
- when END_DOCUMENT
255
- return element if @recovering
256
- return existing_element || element
257
-
258
- =end
259
-
260
- when LibXML::XML::Reader::TYPE_ELEMENT
261
- child, ignore_child = parse_element(element)
262
-
263
- unless ignore_child then
264
- case child
265
- when XamplObject then
266
- child = child.note_add_to_parent(element, @is_realising) if child
267
- child = element.note_add_child(child, @is_realising) if element
268
- child.append_to(element) if element and child
269
- when XMLText then
270
- #TODO -- get rid of this puts
271
- puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
272
- else
273
- #TODO -- get rid of this puts
274
- puts "WHAT IS THIS??? #{child.class.name}"
275
- end
276
- end
277
- when LibXML::XML::Reader::TYPE_END_ELEMENT
278
- element = element.note_closed(@is_realising)
279
- return element if @recovering
280
- return existing_element || element
281
- when LibXML::XML::Reader::TYPE_TEXT, LibXML::XML::Reader::TYPE_CDATA, LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE, LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
282
- if element.has_mixed_content then
283
- text = @reader.read_string.force_encoding('utf-8')
284
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] #{ text.encoding } [[#{ text }]]"
285
- the_text = element.note_adding_text_content(text, @is_realising)
286
- element << the_text
287
- else
288
- text = @reader.read_string.force_encoding('utf-8')
289
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] #{ text.encoding } [[#{ text }]] (#{ @reader.class })"
290
- the_text = element.note_adding_text_content(text, @is_realising)
291
- element.add_content(the_text, false)
292
- end
255
+ while next_reader_event do
256
+ if @reader.value? then
257
+ text = @reader.value
258
+ text = text.force_encoding('utf-8') unless 'UTF-8' == text.encoding
259
+ the_text = element.note_adding_text_content(text, @is_realising)
260
+ if element.has_mixed_content then
261
+ element << the_text
293
262
  else
263
+ element.add_content(the_text, false)
264
+ end
265
+ elsif Nokogiri::XML::Node::ELEMENT_NODE == @reader.node_type then
266
+ child, ignore_child = parse_element(element)
267
+
268
+ unless ignore_child then
269
+ case child
270
+ when XamplObject then
271
+ child = child.note_add_to_parent(element, @is_realising) if child
272
+ child = element.note_add_child(child, @is_realising) if element
273
+ child.append_to(element) if element && child
274
+ when XMLText then
275
+ #TODO -- get rid of this puts
276
+ puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
277
+ else
278
+ #TODO -- get rid of this puts
279
+ puts "WHAT IS THIS??? #{child.class.name}"
280
+ end
281
+ end
282
+ elsif Nokogiri::XML::Node::ELEMENT_DECL == @reader.node_type then
283
+ element = element.note_closed(@is_realising)
284
+ return element if @recovering
285
+ return existing_element || element
286
+ else
287
+ puts "WTF??(#{ @reader.depth }) name: #{ @reader.name }, #{ say_node_type(@reader.node_type)}/#{ @reader.node_type }\n#{ @reader.outer_xml }"
294
288
  end
295
289
  end
296
290
 
@@ -298,95 +292,49 @@ TODO -- can these ever happen?
298
292
  return existing_element || element
299
293
  end
300
294
 
295
+ def FromXML.tokenise_string(str, strip=true)
296
+ return nil unless str
297
+ str.strip! if strip
298
+ str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
299
+ return str
300
+ end
301
+
301
302
  def current_node_type
302
303
  if @faking_an_end_element then
303
- LibXML::XML::Reader::TYPE_END_ELEMENT
304
+ Nokogiri::XML::Node::ELEMENT_DECL
304
305
  else
305
306
  @reader.node_type
306
307
  end
307
308
  end
308
309
 
309
- =begin
310
- def describe_current_element_type()
311
- case @reader.node_type
312
- when LibXML::XML::Reader::TYPE_ATTRIBUTE
313
- puts "ATTRIBUTE"
314
- when LibXML::XML::Reader::TYPE_DOCUMENT
315
- puts "DOCUMENT"
316
- when LibXML::XML::Reader::TYPE_ELEMENT
317
- attribute_count = @reader.attribute_count
318
- puts "ELEMENT #{ @reader.local_name }, ns: #{ @reader.namespace_uri }, #attributes: #{ attribute_count }, depth: #{ @reader.depth }"
319
- puts " FAKING END ELEMENT" if @faking_an_end_element
320
- when LibXML::XML::Reader::TYPE_END_ELEMENT
321
- puts "END ELEMENT"
322
- when LibXML::XML::Reader::TYPE_TEXT
323
- puts "TEXT [[#{ @reader.read_string }]]"
324
- when LibXML::XML::Reader::TYPE_CDATA
325
- puts "CDATA [[#{ @reader.read_string }]]"
326
- when LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE
327
- puts "SIGNIFICANT white space [[#{ @reader.read_string }]]"
328
- when LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
329
- puts "entity ref"
330
- when LibXML::XML::Reader::TYPE_WHITESPACE
331
- puts "whitespace"
332
- when LibXML::XML::Reader::TYPE_PROCESSING_INSTRUCTION
333
- puts "processing instruction"
334
- when LibXML::XML::Reader::TYPE_COMMENT
335
- puts "comment"
336
- when LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
337
- puts "doc type"
338
-
339
- when LibXML::XML::Reader::TYPE_XML_DECLARATION
340
- puts "xml decl"
341
- when LibXML::XML::Reader::TYPE_NONE
342
- puts "NONE!!"
343
- when LibXML::XML::Reader::TYPE_NOTATION
344
- puts "notifiation"
345
- when LibXML::XML::Reader::TYPE_DOCUMENT_FRAGMENT
346
- puts "doc fragment"
347
- when LibXML::XML::Reader::TYPE_ENTITY
348
- puts "entity"
349
- when LibXML::XML::Reader::TYPE_END_ENTITY
350
- puts "end entity"
351
- else
352
- puts "UNKNOWN: #{@reader.node_type}"
353
- end
354
- end
355
- =end
356
-
357
310
  def next_reader_event
358
311
  if @insert_end_element then
359
312
  @faking_an_end_element = true
360
- @insert_end_element = false
313
+ @insert_end_element = false
361
314
  return
362
315
  end
363
316
 
364
- @faking_an_end_element = false
365
-
366
- #describe_current_element_type
317
+ @faking_an_end_element = false
367
318
 
368
319
  begin
369
- #TODO -- get rid of this??
370
- #TODO -- really?
371
320
  okay = @reader.read
372
321
  rescue => e
373
322
  raise RuntimeError, "WHAT?? -- #{ e }", e.backtrace
374
323
  end
375
324
 
376
- @just_opened_an_element = start_element?
377
- @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
378
-
379
- #describe_current_element_type
380
-
325
+ @just_opened_an_element = self.start_element?
326
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
381
327
  okay
382
328
  end
383
329
 
384
330
  def start_element?
385
- current_node_type == LibXML::XML::Reader::TYPE_ELEMENT
331
+ current_node_type == Nokogiri::XML::Node::ELEMENT_NODE
386
332
  end
387
333
 
388
334
  def whitespace?
389
- current_note_type == LibXML::XML::Reader::TYPE_WHITESPACE
335
+ #there is no whitespace type with nokogiri
336
+ #TODO -- this is not actually called, so...
337
+ @reader.value? && @reader.value.match(/\S/).nil?
390
338
  end
391
339
 
392
340
  def find_the_first_element
@@ -395,31 +343,20 @@ TODO -- can these ever happen?
395
343
  break unless next_reader_event
396
344
  end
397
345
  @just_opened_an_element = start_element?
346
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
398
347
  end
399
348
 
400
349
  def build_attribute_arrays
401
-
402
350
  @attribute_name.clear
403
351
  @attribute_namespace.clear
404
352
  @attribute_value.clear
405
353
 
406
- return unless LibXML::XML::Reader::TYPE_ELEMENT == current_node_type
407
-
408
- if @reader.has_attributes? then
409
- attribute_count = @reader.attribute_count
410
- @reader.move_to_first_attribute
411
- attribute_count.times do |i|
412
- if @reader.namespace_declaration? then
413
- @reader.move_to_next_attribute
414
- next
415
- end
416
-
417
- @attribute_name << @reader.local_name
418
- @attribute_namespace << @reader.namespace_uri
419
- @attribute_value << @reader.value
354
+ return unless @reader.attributes?
420
355
 
421
- @reader.move_to_next_attribute
422
- end
356
+ @reader.attributes.each do |name, value|
357
+ @attribute_name << name
358
+ @attribute_namespace << nil
359
+ @attribute_value << value
423
360
  end
424
361
  end
425
362