sax-machine 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,5 +7,5 @@ require "sax-machine/sax_handler"
7
7
  require "sax-machine/sax_config"
8
8
 
9
9
  module SAXMachine
10
- VERSION = "0.0.14"
10
+ VERSION = "0.0.15"
11
11
  end
@@ -8,25 +8,37 @@ module SAXMachine
8
8
  @name = name.to_s
9
9
  @class = options[:class]
10
10
  @as = options[:as].to_s
11
- end
12
-
13
- def handler
14
- SAXHandler.new(@class.new)
11
+
12
+ if options.has_key?(:with)
13
+ # for faster comparisons later
14
+ @with = options[:with].to_a.flatten.collect {|o| o.to_s}
15
+ else
16
+ @with = nil
17
+ end
15
18
  end
16
19
 
17
20
  def accessor
18
21
  as
19
22
  end
20
23
 
24
+ def attrs_match?(attrs)
25
+ if @with
26
+ @with == (@with & attrs)
27
+ else
28
+ true
29
+ end
30
+ end
31
+
32
+ def data_class
33
+ @class || @name
34
+ end
35
+
21
36
  protected
22
37
 
23
38
  def as
24
39
  @as
25
40
  end
26
41
 
27
- def class
28
- @class || @name
29
- end
30
42
  end
31
43
 
32
44
  end
@@ -3,38 +3,45 @@ require "sax-machine/sax_collection_config"
3
3
 
4
4
  module SAXMachine
5
5
  class SAXConfig
6
- attr_reader :top_level_elements, :collection_elements
7
-
6
+ attr_accessor :top_level_elements, :collection_elements
7
+
8
8
  def initialize
9
- @top_level_elements = []
10
- @collection_elements = []
9
+ @top_level_elements = {}
10
+ @collection_elements = {}
11
+ end
12
+
13
+ def columns
14
+ @top_level_elements.map {|name, ecs| ecs }.flatten
15
+ end
16
+
17
+ def initialize_copy(sax_config)
18
+ @top_level_elements = sax_config.top_level_elements.clone
19
+ @collection_elements = sax_config.collection_elements.clone
11
20
  end
12
21
 
13
22
  def add_top_level_element(name, options)
14
- @top_level_elements << ElementConfig.new(name, options)
23
+ @top_level_elements[name.to_s] = [] unless @top_level_elements[name.to_s]
24
+ @top_level_elements[name.to_s] << ElementConfig.new(name, options)
15
25
  end
16
26
 
17
27
  def add_collection_element(name, options)
18
- @collection_elements << CollectionConfig.new(name, options)
28
+ @collection_elements[name.to_s] = [] unless @collection_elements[name.to_s]
29
+ @collection_elements[name.to_s] << CollectionConfig.new(name, options)
19
30
  end
20
31
 
21
- def collection_config(name)
22
- @collection_elements.detect { |ce| ce.name.to_s == name.to_s }
32
+ def collection_config(name, attrs)
33
+ ces = @collection_elements[name.to_s]
34
+ ces && ces.detect { |cc| cc.attrs_match?(attrs) }
23
35
  end
24
36
 
25
37
  def element_configs_for_attribute(name, attrs)
26
- @top_level_elements.select do |element_config|
27
- element_config.name == name &&
28
- element_config.has_value_and_attrs_match?(attrs)
29
- end
38
+ tes = @top_level_elements[name.to_s]
39
+ tes && tes.select { |ec| ec.has_value_and_attrs_match?(attrs) } || []
30
40
  end
31
41
 
32
42
  def element_config_for_tag(name, attrs)
33
- @top_level_elements.detect do |element_config|
34
- element_config.name == name &&
35
- element_config.attrs_match?(attrs)
36
- end
43
+ tes = @top_level_elements[name.to_s]
44
+ tes && tes.detect { |ec| ec.attrs_match?(attrs) }
37
45
  end
38
-
39
46
  end
40
47
  end
@@ -31,7 +31,7 @@ module SAXMachine
31
31
  end
32
32
 
33
33
  def columns
34
- sax_config.top_level_elements
34
+ sax_config.columns
35
35
  end
36
36
 
37
37
  def column(sym)
@@ -2,7 +2,7 @@ module SAXMachine
2
2
  class SAXConfig
3
3
 
4
4
  class ElementConfig
5
- attr_reader :name, :setter, :data_class
5
+ attr_reader :name, :setter, :data_class, :collection
6
6
 
7
7
  def initialize(name, options)
8
8
  @name = name.to_s
@@ -2,19 +2,16 @@ require "nokogiri"
2
2
 
3
3
  module SAXMachine
4
4
  class SAXHandler < Nokogiri::XML::SAX::Document
5
- attr_reader :object
5
+ attr_reader :stack
6
6
 
7
7
  def initialize(object)
8
- @object = object
8
+ @stack = [[object, nil, ""]]
9
9
  @parsed_configs = {}
10
10
  end
11
11
 
12
12
  def characters(string)
13
- if parsing_collection?
14
- @collection_handler.characters(string)
15
- elsif @element_config
16
- @value << string
17
- end
13
+ object, config, value = stack.last
14
+ value << string
18
15
  end
19
16
 
20
17
  def cdata_block(string)
@@ -22,95 +19,48 @@ module SAXMachine
22
19
  end
23
20
 
24
21
  def start_element(name, attrs = [])
25
- @name = name
26
- @attrs = attrs
27
-
28
- if parsing_collection?
29
- @collection_handler.start_element(@name, @attrs)
30
-
31
- elsif @collection_config = sax_config.collection_config(@name)
32
- @collection_handler = @collection_config.handler
33
- @collection_handler.start_element(@name, @attrs)
34
-
35
- elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
36
- parse_element_attributes(element_configs)
37
- set_element_config_for_element_value
22
+ object, config, value = stack.last
23
+ sax_config = object.class.respond_to?(:sax_config) ? object.class.sax_config : nil
38
24
 
39
- else
40
- set_element_config_for_element_value
25
+ if sax_config
26
+ if collection_config = sax_config.collection_config(name, attrs)
27
+ stack.push [object = collection_config.data_class.new, collection_config, ""]
28
+ object, sax_config, is_collection = object, object.class.sax_config, true
29
+ end
30
+ sax_config.element_configs_for_attribute(name, attrs).each do |ec|
31
+ unless parsed_config?(object, ec)
32
+ object.send(ec.setter, ec.value_from_attrs(attrs))
33
+ mark_as_parsed(object, ec)
34
+ end
35
+ end
36
+ if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
37
+ stack.push [element_config.data_class ? element_config.data_class.new : object, element_config, ""]
38
+ end
41
39
  end
42
40
  end
43
41
 
44
42
  def end_element(name)
45
- if parsing_collection? && @collection_config.name == name
46
- @object.send(@collection_config.accessor) << @collection_handler.object
47
- reset_current_collection
48
-
49
- elsif parsing_collection?
50
- @collection_handler.end_element(name)
51
-
52
- elsif characaters_captured? && !parsed_config?
53
- mark_as_parsed
54
- @object.send(@element_config.setter, @value)
55
- end
56
-
57
- reset_current_tag
58
- end
59
-
60
- def characaters_captured?
61
- !@value.nil? && !@value.empty?
62
- end
63
-
64
- def parsing_collection?
65
- !@collection_handler.nil?
66
- end
67
-
68
- def parse_collection_instance_attributes
69
- instance = @collection_handler.object
70
- @attrs.each_with_index do |attr_name,index|
71
- instance.send("#{attr_name}=", @attrs[index + 1]) if index % 2 == 0 && instance.methods.include?("#{attr_name}=")
72
- end
73
- end
74
-
75
- def parse_element_attributes(element_configs)
76
- element_configs.each do |ec|
77
- unless parsed_config?(ec)
78
- @object.send(ec.setter, ec.value_from_attrs(@attrs))
79
- mark_as_parsed(ec)
43
+ (object, tag_config, _), (element, config, value) = stack[-2..-1]
44
+ return unless stack.size > 1 && config && config.name.to_s == name.to_s
45
+
46
+ unless parsed_config?(object, config)
47
+ if config.respond_to?(:accessor)
48
+ object.send(config.accessor) << element
49
+ else
50
+ value = config.data_class ? element : value
51
+ object.send(config.setter, value) unless value == ""
52
+ mark_as_parsed(object, config)
80
53
  end
81
54
  end
82
- @element_config = nil
83
- end
84
-
85
- def set_element_config_for_element_value
86
- @value = ""
87
- @element_config = sax_config.element_config_for_tag(@name, @attrs)
88
- end
89
-
90
- def mark_as_parsed(element_config=nil)
91
- element_config ||= @element_config
92
- @parsed_configs[element_config] = true unless element_config.collection?
93
- end
94
-
95
- def parsed_config?(element_config=nil)
96
- element_config ||= @element_config
97
- @parsed_configs[element_config]
98
- end
99
-
100
- def reset_current_collection
101
- @collection_handler = nil
102
- @collection_config = nil
55
+ stack.pop
103
56
  end
104
57
 
105
- def reset_current_tag
106
- @name = nil
107
- @attrs = nil
108
- @value = nil
109
- @element_config = nil
58
+ def mark_as_parsed(object, element_config)
59
+ @parsed_configs[[object.object_id, element_config.object_id]] = true unless element_config.collection?
110
60
  end
111
61
 
112
- def sax_config
113
- @object.class.sax_config
62
+ def parsed_config?(object, element_config)
63
+ @parsed_configs[[object.object_id, element_config.object_id]]
114
64
  end
115
65
  end
116
66
  end
@@ -15,7 +15,7 @@ describe "SAXMachine" do
15
15
  document.title = "Title"
16
16
  document.title.should == "Title"
17
17
  end
18
-
18
+
19
19
  it "should allow introspection of the elements" do
20
20
  @klass.column_names.should =~ [:title]
21
21
  end
@@ -54,7 +54,7 @@ describe "SAXMachine" do
54
54
  @klass.required?(:date).should be_true
55
55
  end
56
56
  end
57
-
57
+
58
58
  it "should not overwrite the accessor when the element is not present" do
59
59
  document = @klass.new
60
60
  document.title = "Title"
@@ -73,7 +73,7 @@ describe "SAXMachine" do
73
73
  document = @klass.parse("<title>My Title</title>")
74
74
  document.title.should == "My Title"
75
75
  end
76
-
76
+
77
77
  it "should save cdata into an accessor" do
78
78
  document = @klass.parse("<title><![CDATA[A Title]]></title>")
79
79
  document.title.should == "A Title"
@@ -86,7 +86,8 @@ describe "SAXMachine" do
86
86
 
87
87
  it "should save the first element text when there are multiple of the same element" do
88
88
  document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
89
- document.title.should == "My Title"
89
+ document.title.should == "My Title"
90
+
90
91
  end
91
92
  end
92
93
 
@@ -126,7 +127,7 @@ describe "SAXMachine" do
126
127
  document.summary.should == "here is a description"
127
128
  end
128
129
  end
129
-
130
+
130
131
  describe "using the :with option" do
131
132
  describe "and the :value option" do
132
133
  before :each do
@@ -135,17 +136,17 @@ describe "SAXMachine" do
135
136
  element :link, :value => :href, :with => {:foo => "bar"}
136
137
  end
137
138
  end
138
-
139
+
139
140
  it "should save the value of a matching element" do
140
141
  document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
141
142
  document.link.should == "test"
142
143
  end
143
-
144
+
144
145
  it "should save the value of the first matching element" do
145
146
  document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
146
147
  document.link.should == "first"
147
148
  end
148
-
149
+
149
150
  describe "and the :as option" do
150
151
  before :each do
151
152
  @klass = Class.new do
@@ -154,15 +155,16 @@ describe "SAXMachine" do
154
155
  element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
155
156
  end
156
157
  end
157
-
158
+
158
159
  it "should save the value of the first matching element" do
159
160
  document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
160
161
  document.url.should == "first"
161
162
  document.second_url.should == "second"
162
- end
163
+ end
164
+
163
165
  end
164
166
  end
165
-
167
+
166
168
  describe "with only one element" do
167
169
  before :each do
168
170
  @klass = Class.new do
@@ -183,15 +185,16 @@ describe "SAXMachine" do
183
185
 
184
186
  it "should save the text of an element that has matching attributes when it is the second of that type" do
185
187
  document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
186
- document.link.should == "match"
188
+ document.link.should == "match"
189
+
187
190
  end
188
-
191
+
189
192
  it "should save the text of an element that has matching attributes plus a few more" do
190
193
  document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
191
194
  document.link.should == "match"
192
195
  end
193
196
  end
194
-
197
+
195
198
  describe "with multiple elements of same tag" do
196
199
  before :each do
197
200
  @klass = Class.new do
@@ -200,19 +203,19 @@ describe "SAXMachine" do
200
203
  element :link, :as => :second, :with => {:asdf => "jkl"}
201
204
  end
202
205
  end
203
-
206
+
204
207
  it "should match the first element" do
205
208
  document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
206
209
  document.first.should == "first match"
207
210
  end
208
-
211
+
209
212
  it "should match the second element" do
210
213
  document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
211
214
  document.second.should == "second match"
212
215
  end
213
216
  end
214
217
  end # using the 'with' option
215
-
218
+
216
219
  describe "using the 'value' option" do
217
220
  before :each do
218
221
  @klass = Class.new do
@@ -220,22 +223,22 @@ describe "SAXMachine" do
220
223
  element :link, :value => :foo
221
224
  end
222
225
  end
223
-
226
+
224
227
  it "should save the attribute value" do
225
228
  document = @klass.parse("<link foo='test'>hello</link>")
226
229
  document.link.should == 'test'
227
230
  end
228
-
231
+
229
232
  it "should save the attribute value when there is no text enclosed by the tag" do
230
233
  document = @klass.parse("<link foo='test'></link>")
231
234
  document.link.should == 'test'
232
235
  end
233
-
236
+
234
237
  it "should save the attribute value when the tag close is in the open" do
235
238
  document = @klass.parse("<link foo='test'/>")
236
239
  document.link.should == 'test'
237
240
  end
238
-
241
+
239
242
  it "should save two different attribute values on a single tag" do
240
243
  @klass = Class.new do
241
244
  include SAXMachine
@@ -246,7 +249,7 @@ describe "SAXMachine" do
246
249
  document.first.should == "foo value"
247
250
  document.second.should == "bar value"
248
251
  end
249
-
252
+
250
253
  it "should not fail if one of the attribute hasn't been defined" do
251
254
  @klass = Class.new do
252
255
  include SAXMachine
@@ -258,7 +261,7 @@ describe "SAXMachine" do
258
261
  document.second.should be_nil
259
262
  end
260
263
  end
261
-
264
+
262
265
  describe "when desiring both the content and attributes of an element" do
263
266
  before :each do
264
267
  @klass = Class.new do
@@ -276,10 +279,10 @@ describe "SAXMachine" do
276
279
  document.link_bar.should == 'test2'
277
280
  end
278
281
  end
279
-
282
+
280
283
  end
281
284
  end
282
-
285
+
283
286
  describe "elements" do
284
287
  describe "when parsing multiple elements" do
285
288
  before :each do
@@ -288,23 +291,23 @@ describe "SAXMachine" do
288
291
  elements :entry, :as => :entries
289
292
  end
290
293
  end
291
-
294
+
292
295
  it "should provide a collection accessor" do
293
296
  document = @klass.new
294
297
  document.entries << :foo
295
298
  document.entries.should == [:foo]
296
299
  end
297
-
300
+
298
301
  it "should parse a single element" do
299
302
  document = @klass.parse("<entry>hello</entry>")
300
303
  document.entries.should == ["hello"]
301
304
  end
302
-
305
+
303
306
  it "should parse multiple elements" do
304
307
  document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
305
308
  document.entries.should == ["hello", "world"]
306
309
  end
307
-
310
+
308
311
  it "should parse multiple elements when taking an attribute value" do
309
312
  attribute_klass = Class.new do
310
313
  include SAXMachine
@@ -314,7 +317,40 @@ describe "SAXMachine" do
314
317
  doc.entries.should == ["asdf", "jkl"]
315
318
  end
316
319
  end
317
-
320
+
321
+ describe "when using the with and class options" do
322
+ before :each do
323
+ class Bar
324
+ include SAXMachine
325
+ element :title
326
+ end
327
+
328
+ class Foo
329
+ include SAXMachine
330
+ element :title
331
+ end
332
+
333
+ class Item
334
+ include SAXMachine
335
+
336
+ end
337
+ @klass = Class.new do
338
+ include SAXMachine
339
+ elements :item, :as => :items, :with => {:type => 'Bar'}, :class => Bar
340
+ elements :item, :as => :items, :with => {:type => 'Foo'}, :class => Foo
341
+ end
342
+ end
343
+
344
+ it "should cast into the correct class" do
345
+ document = @klass.parse("<items><item type=\"Bar\"><title>Bar title</title></item><item type=\"Foo\"><title>Foo title</title></item></items>")
346
+ document.items.size.should == 2
347
+ document.items.first.should be_a(Bar)
348
+ document.items.first.title.should == "Bar title"
349
+ document.items.last.should be_a(Foo)
350
+ document.items.last.title.should == "Foo title"
351
+ end
352
+ end
353
+
318
354
  describe "when using the class option" do
319
355
  before :each do
320
356
  class Foo
@@ -326,26 +362,26 @@ describe "SAXMachine" do
326
362
  elements :entry, :as => :entries, :class => Foo
327
363
  end
328
364
  end
329
-
365
+
330
366
  it "should parse a single element with children" do
331
367
  document = @klass.parse("<entry><title>a title</title></entry>")
332
368
  document.entries.size.should == 1
333
369
  document.entries.first.title.should == "a title"
334
370
  end
335
-
371
+
336
372
  it "should parse multiple elements with children" do
337
373
  document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
338
374
  document.entries.size.should == 2
339
375
  document.entries.first.title.should == "title 1"
340
376
  document.entries.last.title.should == "title 2"
341
377
  end
342
-
378
+
343
379
  it "should not parse a top level element that is specified only in a child" do
344
380
  document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
345
381
  document.entries.size.should == 1
346
382
  document.entries.first.title.should == "correct title"
347
383
  end
348
-
384
+
349
385
  it "should parse out an attribute value from the tag that starts the collection" do
350
386
  class Foo
351
387
  element :entry, :value => :href, :as => :url
@@ -355,9 +391,10 @@ describe "SAXMachine" do
355
391
  document.entries.first.title.should == "paul"
356
392
  document.entries.first.url.should == "http://pauldix.net"
357
393
  end
358
- end
394
+ end
395
+
359
396
  end
360
-
397
+
361
398
  describe "full example" do
362
399
  before :each do
363
400
  @xml = File.read('spec/sax-machine/atom.xml')
@@ -370,7 +407,7 @@ describe "SAXMachine" do
370
407
  element :content
371
408
  element :published
372
409
  end
373
-
410
+
374
411
  class Atom
375
412
  include SAXMachine
376
413
  element :title
@@ -379,10 +416,112 @@ describe "SAXMachine" do
379
416
  elements :entry, :as => :entries, :class => AtomEntry
380
417
  end
381
418
  end # before
382
-
419
+
383
420
  it "should parse the url" do
384
421
  f = Atom.parse(@xml)
385
422
  f.url.should == "http://www.pauldix.net/"
386
423
  end
387
424
  end
425
+
426
+ describe "parsing a tree" do
427
+ before do
428
+ @xml = %[
429
+ <categories>
430
+ <category id="1">
431
+ <title>First</title>
432
+ <categories>
433
+ <category id="2">
434
+ <title>Second</title>
435
+ </category>
436
+ </categories>
437
+ </category>
438
+ </categories>
439
+ ]
440
+ class CategoryCollection; end
441
+ class Category
442
+ include SAXMachine
443
+ attr_accessor :id
444
+ element :category, :value => :id, :as => :id
445
+ element :title
446
+ element :categories, :as => :collection, :class => CategoryCollection
447
+ end
448
+ class CategoryCollection
449
+ include SAXMachine
450
+ elements :category, :as => :categories, :class => Category
451
+ end
452
+ @collection = CategoryCollection.parse(@xml)
453
+ end
454
+
455
+ it "should parse the first category" do
456
+ @collection.categories.first.id.should == "1"
457
+ @collection.categories.first.title.should == "First"
458
+ end
459
+
460
+ it "should parse the nested category" do
461
+ @collection.categories.first.collection.categories.first.id.should == "2"
462
+ @collection.categories.first.collection.categories.first.title.should == "Second"
463
+ end
464
+ end
465
+
466
+ describe "parsing a tree without a collection class" do
467
+ before do
468
+ @xml = %[
469
+ <categories>
470
+ <category id="1">
471
+ <title>First</title>
472
+ <categories>
473
+ <category id="2">
474
+ <title>Second</title>
475
+ </category>
476
+ </categories>
477
+ </category>
478
+ </categories>
479
+ ]
480
+ class CategoryTree
481
+ include SAXMachine
482
+ attr_accessor :id
483
+ element :category, :value => :id, :as => :id
484
+ element :title
485
+ elements :category, :as => :categories, :class => CategoryTree
486
+ end
487
+ @collection = CategoryTree.parse(@xml)
488
+ end
489
+
490
+ it "should parse the first category" do
491
+ @collection.categories.first.id.should == "1"
492
+ @collection.categories.first.title.should == "First"
493
+ end
494
+
495
+ it "should parse the nested category" do
496
+ @collection.categories.first.categories.first.id.should == "2"
497
+ @collection.categories.first.categories.first.title.should == "Second"
498
+ end
499
+ end
500
+
501
+ describe "with element deeper inside the xml structure" do
502
+ before do
503
+ @xml = %[
504
+ <item id="1">
505
+ <texts>
506
+ <title>Hello</title>
507
+ </texts>
508
+ </item>
509
+ ]
510
+ @klass = Class.new do
511
+ include SAXMachine
512
+ attr_accessor :id
513
+ element :item, :value => "id", :as => :id
514
+ element :title
515
+ end
516
+ @item = @klass.parse(@xml)
517
+ end
518
+
519
+ it "should have an id" do
520
+ @item.id.should == "1"
521
+ end
522
+
523
+ it "should have a title" do
524
+ @item.title.should == "Hello"
525
+ end
526
+ end
388
527
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sax-machine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix