xamplr 1.9.13 → 1.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -12,7 +12,7 @@ begin
12
12
  gem.authors = ["Bob Hutchison"]
13
13
 
14
14
  gem.add_dependency('xamplr-pp', '>=1.2.0')
15
- gem.add_dependency('libxml-ruby', '>=1.1.3')
15
+ gem.add_dependency('nokogiri', '>=1.4.3')
16
16
 
17
17
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
18
  end
data/VERSION.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  ---
2
2
  :major: 1
3
3
  :minor: 9
4
- :patch: 13
4
+ :patch: 14
5
5
  :build:
data/lib/xamplr/TODO CHANGED
@@ -17,6 +17,6 @@
17
17
  level (below object level) and that this will allow sharing of the DB a
18
18
  little better -- still don't know about multiple writers to the same repo...
19
19
  can tyrant help here.
20
- -- mongo, and a couple of others might be interesting
20
+ -- redis, and a couple of others might be interesting (mongo didn't work out)
21
21
  -- what about SQL dbs? Maybe an SQL or DataMapper version??
22
22
 
@@ -13,6 +13,18 @@ module Xampl
13
13
  end
14
14
  end
15
15
 
16
+ class DuplicateXamplInPersister < Exception
17
+ attr_reader :msg
18
+
19
+ def initialize(xampl1, xampl2, persister)
20
+ @msg = "index: '#{ xampl1.get_the_index }', persister: #{ persister.name }, first: #{ xampl1 }, second: #{ xampl2 }"
21
+ end
22
+
23
+ def message
24
+ @msg
25
+ end
26
+ end
27
+
16
28
  class AlreadyKnownToPersister < Exception
17
29
  attr_reader :msg, :xampl
18
30
 
@@ -45,6 +57,40 @@ module Xampl
45
57
  end
46
58
  end
47
59
 
60
+ class NoAnonymousPersisters < Exception
61
+ def message
62
+ "All persisters must be named"
63
+ end
64
+ end
65
+
66
+ class NoPersisterNamed < Exception
67
+ attr_reader :name
68
+
69
+ def initialize(name=nil)
70
+ @name = name
71
+ end
72
+
73
+ def message
74
+ if name then
75
+ "there is no persister named: '#{ @name }'"
76
+ else
77
+ "no name was supplied"
78
+ end
79
+ end
80
+ end
81
+
82
+ class NotXamplPersistedObject < Exception
83
+ attr_reader :klass
84
+
85
+ def initialize(thing=nil)
86
+ @klass = thing.class.name
87
+ end
88
+
89
+ def message
90
+ "require a XamplPersistedObject, got a #{ klass }"
91
+ end
92
+ end
93
+
48
94
  class BlockedChange < Exception
49
95
  attr_reader :xampl
50
96
 
@@ -121,7 +167,7 @@ module Xampl
121
167
  attr_reader :msg
122
168
 
123
169
  def initialize(active, local)
124
- @msg = "mixed persisters:: active #{active.name}, local: #{local.name}"
170
+ @msg = "mixed persisters:: active #{active.name}/#{ active }, local: #{local.name}/#{ local }"
125
171
  end
126
172
 
127
173
  def message
@@ -1,6 +1,6 @@
1
1
  # encoding utf-8
2
2
 
3
- require 'libxml'
3
+ require 'nokogiri'
4
4
 
5
5
  module Xampl
6
6
 
@@ -10,31 +10,31 @@ module Xampl
10
10
  attr :is_realising #1.9.1 , false
11
11
  attr :tokenise_content #1.9.1 , false
12
12
 
13
- @reader = nil
13
+ @reader = nil
14
14
 
15
- @@by_tag = {}
15
+ @@by_tag = {}
16
16
  @@by_ns_tag = {}
17
17
 
18
18
  def initialize(recovering=false)
19
- @recovering = recovering
19
+ @recovering = recovering
20
20
 
21
- @attribute_name = Array.new(32)
22
- @attribute_namespace = Array.new(32)
23
- @attribute_value = Array.new(32)
21
+ @attribute_name = Array.new(32)
22
+ @attribute_namespace = Array.new(32)
23
+ @attribute_value = Array.new(32)
24
24
 
25
- @insert_end_element = false
26
- @faking_an_end_element = false
25
+ @insert_end_element = false
26
+ @faking_an_end_element = false
27
27
  @just_opened_an_element = false
28
28
  end
29
29
 
30
30
  def FromXML.reset_registry
31
- @@by_tag = {}
31
+ @@by_tag = {}
32
32
  @@by_ns_tag = {}
33
33
  end
34
34
 
35
35
  def FromXML.register(tag, ns_tag, klass)
36
36
  @@by_ns_tag[ns_tag] = [klass]
37
- a = @@by_tag[tag]
37
+ a = @@by_tag[tag]
38
38
  if (nil == a) then
39
39
  @@by_tag[tag] = [klass]
40
40
  else
@@ -45,50 +45,57 @@ module Xampl
45
45
  end
46
46
 
47
47
  def FromXML.registered(name)
48
- #puts "registered by ns tag: #{ @@by_ns_tag.keys.sort.inspect }"
49
48
  klass = @@by_ns_tag[name]
50
- #puts "registered by tag: #{ @@by_tag.keys.sort.inspect }"
51
49
  klass = @@by_tag[name] unless klass
52
50
  klass = [] unless klass
53
51
  return klass
54
52
  end
55
53
 
56
54
  def resolve(name)
57
- #TODO -- ???
55
+ #TODO -- ??? don't seem to need it, this is for specific named entities
58
56
  return name
59
57
  end
60
58
 
61
59
  def setup_parse(filename, tokenise_content=true, is_realising=false)
62
- @resolver = self
63
-
64
- @is_realising = is_realising
65
- @tokenise_content = tokenise_content
66
-
67
- @reader = LibXML::XML::Reader.file(filename,
68
- :options => LibXML::XML::Parser::Options::NOENT |
69
- LibXML::XML::Parser::Options::NONET |
70
- LibXML::XML::Parser::Options::NOCDATA |
71
- LibXML::XML::Parser::Options::DTDATTR |
72
- # LibXML::XML::Parser::Options::COMPACT |
73
- 0)
74
- #TODO CLOSE THIS THING!!
60
+ xml = File.read(filename)
61
+ setup_parse_string(xml, tokenise_content, is_realising)
75
62
  end
76
63
 
77
64
  def setup_parse_string(string, tokenise_content=true, is_realising=false)
78
- @resolver = self
65
+ @resolver = self
79
66
 
80
- @is_realising = is_realising
67
+ @is_realising = is_realising
81
68
  @tokenise_content = tokenise_content
82
69
 
83
- # setInput(string)
84
- @reader = LibXML::XML::Reader.string(string,
85
- :options => LibXML::XML::Parser::Options::NOENT |
86
- LibXML::XML::Parser::Options::NONET |
87
- LibXML::XML::Parser::Options::NOCDATA |
88
- LibXML::XML::Parser::Options::DTDATTR |
89
- # LibXML::XML::Parser::Options::COMPACT) |
90
- 0)
91
- #TODO CLOSE THIS THING!!
70
+ =begin
71
+ STRICT = 0 Strict parsing
72
+ RECOVER = 1 << 0 Recover from errors
73
+ NOENT = 1 << 1 Substitute entities
74
+ DTDLOAD = 1 << 2 Load external subsets
75
+ DTDATTR = 1 << 3 Default DTD attributes
76
+ DTDVALID = 1 << 4 validate with the DTD
77
+ NOERROR = 1 << 5 suppress error reports
78
+ NOWARNING = 1 << 6 suppress warning reports
79
+ PEDANTIC = 1 << 7 pedantic error reporting
80
+ NOBLANKS = 1 << 8 remove blank nodes
81
+ SAX1 = 1 << 9 use the SAX1 interface internally
82
+ XINCLUDE = 1 << 10 Implement XInclude substitition
83
+ NONET = 1 << 11 Forbid network access
84
+ NODICT = 1 << 12 Do not reuse the context dictionnary
85
+ NSCLEAN = 1 << 13 remove redundant namespaces declarations
86
+ NOCDATA = 1 << 14 merge CDATA as text nodes
87
+ NOXINCNODE = 1 << 15 do not generate XINCLUDE START/END nodes
88
+ DEFAULT_XML = RECOVER the default options used for parsing XML documents
89
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET the default options used for parsing HTML documents
90
+ =end
91
+
92
+ options = Nokogiri::XML::ParseOptions::RECOVER | Nokogiri::XML::ParseOptions::NOENT | Nokogiri::XML::ParseOptions::NONET | Nokogiri::XML::ParseOptions::NOCDATA | Nokogiri::XML::ParseOptions::DTDATTR
93
+
94
+ utf8_string = string.force_encoding('utf-8')
95
+ url = nil
96
+ encoding = nil
97
+
98
+ @reader = Nokogiri::XML::Reader.from_memory(utf8_string, url, encoding, options)
92
99
  end
93
100
 
94
101
  def parse(filename, tokenise_content=true, is_realising=false)
@@ -115,22 +122,27 @@ module Xampl
115
122
  end
116
123
  end
117
124
 
118
- def FromXML.tokenise_string(str, strip=true)
119
- return nil unless str
120
- str.strip! if strip
121
- str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
122
- return str
125
+ def chew
126
+ xml = @reader.outer_xml
127
+ depth = @reader.depth
128
+ @reader.read
129
+ while depth != @reader.depth do
130
+ @reader.read
131
+ end
132
+ return xml
123
133
  end
124
134
 
135
+
125
136
  def parse_element(parent=nil, target=nil)
137
+ # puts caller(0)[0..5]
138
+
126
139
  find_the_first_element
127
140
  return unless start_element?
128
141
 
129
- namespace = @reader.namespace_uri
130
- name = @reader.local_name
131
-
142
+ namespace = @reader.namespace_uri
143
+ name = @reader.local_name
132
144
  existing_element = nil
133
- element = nil
145
+ element = nil
134
146
 
135
147
  requires_caching = false
136
148
 
@@ -138,17 +150,19 @@ module Xampl
138
150
 
139
151
  if ((nil != namespace) and (0 < namespace.size)) then
140
152
  klass_name = "{#{namespace}}#{name}"
141
- klasses = FromXML.registered(klass_name)
153
+ klasses = FromXML.registered(klass_name)
142
154
  if (0 == klasses.size) then
143
155
  # The class has not been registered (either it was never generated, or it was never loaded)
144
- puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] Don't know about class name: #{ klass_name }"
145
- # puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] @@by_ns_tag: #{ @@by_ns_tag.inspect }"
146
- # puts "#{ __FILE__ }:#{ __LINE__ } [#{__method__}] @@by_tag: #{ @@by_tag.inspect }"
147
- xml_text = XMLText.new
148
- xml_text.build(self)
149
- xml_text = parent.note_adding_text_content(xml_text, @is_realising)
150
- parent.add_content(xml_text, @tokenise_content) if xml_text
151
- return xml_text, false
156
+ begin
157
+ #discard this node and all children, but say something
158
+ thing = chew
159
+ puts "#{ ::File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] UNRECOGNISED CHILD ELEMENTS: class: #{ klass_name }\n#{ thing }"
160
+ return nil, true
161
+ rescue => e
162
+ puts "Ohhhh NO! #{ e }"
163
+ puts e.backtrace
164
+ raise e
165
+ end
152
166
  end
153
167
  if (1 < klasses.size) then
154
168
  raise XamplException.new("there is more than one '#{name}' tag in namespace '#{namespace}'\nplease report this error")
@@ -170,9 +184,9 @@ module Xampl
170
184
  end
171
185
 
172
186
  if target then
173
- element = target
187
+ element = target
174
188
  target.load_needed = false
175
- target = nil
189
+ target = nil
176
190
  element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
177
191
  element.note_attributes_initialised(@is_realising)
178
192
  else
@@ -210,7 +224,7 @@ module Xampl
210
224
  element = existing_element #TODO -- IS THIS RIGHT????????????????????????
211
225
  end
212
226
  unless element then
213
- element = klasses[0].new
227
+ element = klasses[0].new
214
228
  requires_caching = @recovering
215
229
  # puts "#{File.basename(__FILE__)} #{__LINE__} WOW, what about recovering????"
216
230
  #TODO -- IS THIS RIGHT????????????????????????
@@ -235,62 +249,42 @@ module Xampl
235
249
  element.init_attributes(@attribute_name, @attribute_namespace, @attribute_value)
236
250
  element.note_attributes_initialised(@is_realising)
237
251
 
238
- if requires_caching and element and element.persist_required then
239
- Xampl.cache(element)
240
- end
241
-
242
- #element = element.note_add_to_parent(parent, @is_realising)
243
- #element.append_to(parent) if parent
252
+ Xampl.cache(element) if requires_caching && element && element.persist_required
244
253
  end
245
254
 
246
- while next_reader_event
247
- case current_node_type
248
-
249
- =begin
250
- TODO -- can these ever happen?
251
- when START_DOCUMENT
252
- return element if @recovering
253
- return existing_element || element
254
- when END_DOCUMENT
255
- return element if @recovering
256
- return existing_element || element
257
-
258
- =end
259
-
260
- when LibXML::XML::Reader::TYPE_ELEMENT
261
- child, ignore_child = parse_element(element)
262
-
263
- unless ignore_child then
264
- case child
265
- when XamplObject then
266
- child = child.note_add_to_parent(element, @is_realising) if child
267
- child = element.note_add_child(child, @is_realising) if element
268
- child.append_to(element) if element and child
269
- when XMLText then
270
- #TODO -- get rid of this puts
271
- puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
272
- else
273
- #TODO -- get rid of this puts
274
- puts "WHAT IS THIS??? #{child.class.name}"
275
- end
276
- end
277
- when LibXML::XML::Reader::TYPE_END_ELEMENT
278
- element = element.note_closed(@is_realising)
279
- return element if @recovering
280
- return existing_element || element
281
- when LibXML::XML::Reader::TYPE_TEXT, LibXML::XML::Reader::TYPE_CDATA, LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE, LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
282
- if element.has_mixed_content then
283
- text = @reader.read_string.force_encoding('utf-8')
284
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] #{ text.encoding } [[#{ text }]]"
285
- the_text = element.note_adding_text_content(text, @is_realising)
286
- element << the_text
287
- else
288
- text = @reader.read_string.force_encoding('utf-8')
289
- # puts "#{ File.basename __FILE__ }:#{ __LINE__ } [#{__method__}] #{ text.encoding } [[#{ text }]] (#{ @reader.class })"
290
- the_text = element.note_adding_text_content(text, @is_realising)
291
- element.add_content(the_text, false)
292
- end
255
+ while next_reader_event do
256
+ if @reader.value? then
257
+ text = @reader.value
258
+ text = text.force_encoding('utf-8') unless 'UTF-8' == text.encoding
259
+ the_text = element.note_adding_text_content(text, @is_realising)
260
+ if element.has_mixed_content then
261
+ element << the_text
293
262
  else
263
+ element.add_content(the_text, false)
264
+ end
265
+ elsif Nokogiri::XML::Node::ELEMENT_NODE == @reader.node_type then
266
+ child, ignore_child = parse_element(element)
267
+
268
+ unless ignore_child then
269
+ case child
270
+ when XamplObject then
271
+ child = child.note_add_to_parent(element, @is_realising) if child
272
+ child = element.note_add_child(child, @is_realising) if element
273
+ child.append_to(element) if element && child
274
+ when XMLText then
275
+ #TODO -- get rid of this puts
276
+ puts "UNRECOGNISED Well-formed XML: #{child.to_s[0..25]}..."
277
+ else
278
+ #TODO -- get rid of this puts
279
+ puts "WHAT IS THIS??? #{child.class.name}"
280
+ end
281
+ end
282
+ elsif Nokogiri::XML::Node::ELEMENT_DECL == @reader.node_type then
283
+ element = element.note_closed(@is_realising)
284
+ return element if @recovering
285
+ return existing_element || element
286
+ else
287
+ puts "WTF??(#{ @reader.depth }) name: #{ @reader.name }, #{ say_node_type(@reader.node_type)}/#{ @reader.node_type }\n#{ @reader.outer_xml }"
294
288
  end
295
289
  end
296
290
 
@@ -298,95 +292,49 @@ TODO -- can these ever happen?
298
292
  return existing_element || element
299
293
  end
300
294
 
295
+ def FromXML.tokenise_string(str, strip=true)
296
+ return nil unless str
297
+ str.strip! if strip
298
+ str.gsub!(/[ \n\r\t][ \n\r\t]*/, " ")
299
+ return str
300
+ end
301
+
301
302
  def current_node_type
302
303
  if @faking_an_end_element then
303
- LibXML::XML::Reader::TYPE_END_ELEMENT
304
+ Nokogiri::XML::Node::ELEMENT_DECL
304
305
  else
305
306
  @reader.node_type
306
307
  end
307
308
  end
308
309
 
309
- =begin
310
- def describe_current_element_type()
311
- case @reader.node_type
312
- when LibXML::XML::Reader::TYPE_ATTRIBUTE
313
- puts "ATTRIBUTE"
314
- when LibXML::XML::Reader::TYPE_DOCUMENT
315
- puts "DOCUMENT"
316
- when LibXML::XML::Reader::TYPE_ELEMENT
317
- attribute_count = @reader.attribute_count
318
- puts "ELEMENT #{ @reader.local_name }, ns: #{ @reader.namespace_uri }, #attributes: #{ attribute_count }, depth: #{ @reader.depth }"
319
- puts " FAKING END ELEMENT" if @faking_an_end_element
320
- when LibXML::XML::Reader::TYPE_END_ELEMENT
321
- puts "END ELEMENT"
322
- when LibXML::XML::Reader::TYPE_TEXT
323
- puts "TEXT [[#{ @reader.read_string }]]"
324
- when LibXML::XML::Reader::TYPE_CDATA
325
- puts "CDATA [[#{ @reader.read_string }]]"
326
- when LibXML::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE
327
- puts "SIGNIFICANT white space [[#{ @reader.read_string }]]"
328
- when LibXML::XML::Reader::TYPE_ENTITY_REFERENCE
329
- puts "entity ref"
330
- when LibXML::XML::Reader::TYPE_WHITESPACE
331
- puts "whitespace"
332
- when LibXML::XML::Reader::TYPE_PROCESSING_INSTRUCTION
333
- puts "processing instruction"
334
- when LibXML::XML::Reader::TYPE_COMMENT
335
- puts "comment"
336
- when LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
337
- puts "doc type"
338
-
339
- when LibXML::XML::Reader::TYPE_XML_DECLARATION
340
- puts "xml decl"
341
- when LibXML::XML::Reader::TYPE_NONE
342
- puts "NONE!!"
343
- when LibXML::XML::Reader::TYPE_NOTATION
344
- puts "notifiation"
345
- when LibXML::XML::Reader::TYPE_DOCUMENT_FRAGMENT
346
- puts "doc fragment"
347
- when LibXML::XML::Reader::TYPE_ENTITY
348
- puts "entity"
349
- when LibXML::XML::Reader::TYPE_END_ENTITY
350
- puts "end entity"
351
- else
352
- puts "UNKNOWN: #{@reader.node_type}"
353
- end
354
- end
355
- =end
356
-
357
310
  def next_reader_event
358
311
  if @insert_end_element then
359
312
  @faking_an_end_element = true
360
- @insert_end_element = false
313
+ @insert_end_element = false
361
314
  return
362
315
  end
363
316
 
364
- @faking_an_end_element = false
365
-
366
- #describe_current_element_type
317
+ @faking_an_end_element = false
367
318
 
368
319
  begin
369
- #TODO -- get rid of this??
370
- #TODO -- really?
371
320
  okay = @reader.read
372
321
  rescue => e
373
322
  raise RuntimeError, "WHAT?? -- #{ e }", e.backtrace
374
323
  end
375
324
 
376
- @just_opened_an_element = start_element?
377
- @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
378
-
379
- #describe_current_element_type
380
-
325
+ @just_opened_an_element = self.start_element?
326
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
381
327
  okay
382
328
  end
383
329
 
384
330
  def start_element?
385
- current_node_type == LibXML::XML::Reader::TYPE_ELEMENT
331
+ current_node_type == Nokogiri::XML::Node::ELEMENT_NODE
386
332
  end
387
333
 
388
334
  def whitespace?
389
- current_note_type == LibXML::XML::Reader::TYPE_WHITESPACE
335
+ #there is no whitespace type with nokogiri
336
+ #TODO -- this is not actually called, so...
337
+ @reader.value? && @reader.value.match(/\S/).nil?
390
338
  end
391
339
 
392
340
  def find_the_first_element
@@ -395,31 +343,20 @@ TODO -- can these ever happen?
395
343
  break unless next_reader_event
396
344
  end
397
345
  @just_opened_an_element = start_element?
346
+ @insert_end_element = (@just_opened_an_element and @reader.empty_element?)
398
347
  end
399
348
 
400
349
  def build_attribute_arrays
401
-
402
350
  @attribute_name.clear
403
351
  @attribute_namespace.clear
404
352
  @attribute_value.clear
405
353
 
406
- return unless LibXML::XML::Reader::TYPE_ELEMENT == current_node_type
407
-
408
- if @reader.has_attributes? then
409
- attribute_count = @reader.attribute_count
410
- @reader.move_to_first_attribute
411
- attribute_count.times do |i|
412
- if @reader.namespace_declaration? then
413
- @reader.move_to_next_attribute
414
- next
415
- end
416
-
417
- @attribute_name << @reader.local_name
418
- @attribute_namespace << @reader.namespace_uri
419
- @attribute_value << @reader.value
354
+ return unless @reader.attributes?
420
355
 
421
- @reader.move_to_next_attribute
422
- end
356
+ @reader.attributes.each do |name, value|
357
+ @attribute_name << name
358
+ @attribute_namespace << nil
359
+ @attribute_value << value
423
360
  end
424
361
  end
425
362