julien51-sax-machine 0.0.14 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ module SAXMachine
2
+ class NSStack < Hash
3
+ def initialize(parent=nil, attrs=[])
4
+ # Initialize
5
+ super()
6
+ @parent = parent
7
+
8
+ # Parse attributes
9
+ attrs.each do |attr|
10
+ if attr.kind_of?(Array)
11
+ k, v = attr
12
+ case k
13
+ when 'xmlns' then self[''] = v
14
+ when /^xmlns:(.+)/ then self[$1] = v
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ # Lookup
21
+ def [](name)
22
+ if (ns = super(name.to_s))
23
+ # I've got it
24
+ ns
25
+ elsif @parent
26
+ # Parent may have it
27
+ @parent[name]
28
+ else
29
+ # Undefined, empty namespace
30
+ ''
31
+ end
32
+ end
33
+
34
+ def pop
35
+ @parent
36
+ end
37
+ end
38
+ end
@@ -8,16 +8,25 @@ module SAXMachine
8
8
  @name = name.to_s
9
9
  @class = options[:class]
10
10
  @as = options[:as].to_s
11
+ @xmlns = case options[:xmlns]
12
+ when Array then options[:xmlns]
13
+ when String then [options[:xmlns]]
14
+ else nil
15
+ end
11
16
  end
12
17
 
13
- def handler
14
- SAXHandler.new(@class.new)
18
+ def handler(nsstack)
19
+ SAXHandler.new(@class.new, nsstack)
15
20
  end
16
21
 
17
22
  def accessor
18
23
  as
19
24
  end
20
25
 
26
+ def xmlns_match?(ns)
27
+ @xmlns.nil? || @xmlns.include?(ns)
28
+ end
29
+
21
30
  protected
22
31
 
23
32
  def as
@@ -30,4 +39,4 @@ module SAXMachine
30
39
  end
31
40
 
32
41
  end
33
- end
42
+ end
@@ -6,35 +6,48 @@ module SAXMachine
6
6
  attr_reader :top_level_elements, :collection_elements
7
7
 
8
8
  def initialize
9
- @top_level_elements = []
10
- @collection_elements = []
9
+ @top_level_elements = {}
10
+ @collection_elements = {}
11
11
  end
12
12
 
13
13
  def add_top_level_element(name, options)
14
- @top_level_elements << ElementConfig.new(name, options)
14
+ @top_level_elements[name.to_s] ||= []
15
+ @top_level_elements[name.to_s] << ElementConfig.new(name, options)
15
16
  end
16
17
 
17
18
  def add_collection_element(name, options)
18
- @collection_elements << CollectionConfig.new(name, options)
19
+ @collection_elements[name.to_s] ||= []
20
+ @collection_elements[name.to_s] << CollectionConfig.new(name, options)
19
21
  end
20
22
 
21
- def collection_config(name)
22
- @collection_elements.detect { |ce| ce.name.to_s == name.to_s }
23
+ def collection_config(name, nsstack)
24
+ prefix, name = name.split(':', 2)
25
+ prefix, name = nil, prefix unless name # No prefix
26
+ namespace = nsstack[prefix]
27
+
28
+ (@collection_elements[name.to_s] || []).detect { |ce|
29
+ ce.name.to_s == name.to_s &&
30
+ ce.xmlns_match?(namespace)
31
+ }
23
32
  end
24
33
 
25
34
  def element_configs_for_attribute(name, attrs)
26
- @top_level_elements.select do |element_config|
27
- element_config.name == name &&
35
+ name = name.split(':', 2).last
36
+ (@top_level_elements[name.to_s] || []).select do |element_config|
28
37
  element_config.has_value_and_attrs_match?(attrs)
29
38
  end
30
39
  end
31
40
 
32
- def element_config_for_tag(name, attrs)
33
- @top_level_elements.detect do |element_config|
34
- element_config.name == name &&
41
+ def element_config_for_tag(name, attrs, nsstack)
42
+ prefix, name = name.split(':', 2)
43
+ prefix, name = nil, prefix unless name # No prefix
44
+ namespace = nsstack[prefix]
45
+
46
+ (@top_level_elements[name.to_s] || []).detect do |element_config|
47
+ element_config.xmlns_match?(namespace) &&
35
48
  element_config.attrs_match?(attrs)
36
49
  end
37
50
  end
38
51
 
39
52
  end
40
- end
53
+ end
@@ -39,15 +39,19 @@ module SAXMachine
39
39
  # this is how we allow custom parsing behavior. So you could define the setter
40
40
  # and have it parse the string into a date or whatever.
41
41
  attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
42
- attr_writer options[:as] unless instance_methods.include?("#{options[:as]}=")
42
+ attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
43
43
  end
44
44
 
45
45
  def columns
46
- sax_config.top_level_elements
46
+ r = []
47
+ sax_config.top_level_elements.each do |name, ecs|
48
+ r += ecs
49
+ end
50
+ r
47
51
  end
48
52
 
49
53
  def column(sym)
50
- columns.select{|c| c.column == sym}[0]
54
+ sax_config.top_level_elements[sym.to_s][0]
51
55
  end
52
56
 
53
57
  def data_class(sym)
@@ -89,6 +93,14 @@ module SAXMachine
89
93
  def sax_config
90
94
  @sax_config ||= SAXConfig.new
91
95
  end
96
+
97
+ def attr_writer_once(attr)
98
+ class_eval <<-SRC
99
+ def #{attr}=(val)
100
+ @#{attr} ||= val
101
+ end
102
+ SRC
103
+ end
92
104
  end
93
105
 
94
106
  end
@@ -30,6 +30,12 @@ module SAXMachine
30
30
  end
31
31
  @data_class = options[:class]
32
32
  @required = options[:required]
33
+
34
+ @xmlns = case options[:xmlns]
35
+ when Array then options[:xmlns]
36
+ when String then [options[:xmlns]]
37
+ else nil
38
+ end
33
39
  end
34
40
 
35
41
  def column
@@ -55,6 +61,10 @@ module SAXMachine
55
61
  def has_value_and_attrs_match?(attrs)
56
62
  !@value.nil? && attrs_match?(attrs)
57
63
  end
64
+
65
+ def xmlns_match?(ns)
66
+ @xmlns.nil? || @xmlns.include?(ns)
67
+ end
58
68
 
59
69
  def collection?
60
70
  @collection
@@ -62,4 +72,4 @@ module SAXMachine
62
72
  end
63
73
 
64
74
  end
65
- end
75
+ end
@@ -1,12 +1,13 @@
1
1
  require "nokogiri"
2
+ require "sax-machine/ns_stack"
2
3
 
3
4
  module SAXMachine
4
5
  class SAXHandler < Nokogiri::XML::SAX::Document
5
6
  attr_reader :object
6
7
 
7
- def initialize(object)
8
+ def initialize(object, nsstack=nil)
8
9
  @object = object
9
- @parsed_configs = {}
10
+ @nsstack = nsstack || NSStack.new
10
11
  end
11
12
 
12
13
  def characters(string)
@@ -22,15 +23,16 @@ module SAXMachine
22
23
  end
23
24
 
24
25
  def start_element(name, attrs = [])
25
-
26
+
26
27
  @name = name
27
28
  @attrs = attrs.map { |a| SAXHandler.decode_xml(a) }
29
+ @nsstack = NSStack.new(@nsstack, @attrs)
28
30
 
29
31
  if parsing_collection?
30
32
  @collection_handler.start_element(@name, @attrs)
31
33
 
32
- elsif @collection_config = sax_config.collection_config(@name)
33
- @collection_handler = @collection_config.handler
34
+ elsif @collection_config = sax_config.collection_config(@name, @nsstack)
35
+ @collection_handler = @collection_config.handler(@nsstack)
34
36
  @collection_handler.start_element(@name, @attrs)
35
37
 
36
38
  elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
@@ -44,6 +46,7 @@ module SAXMachine
44
46
 
45
47
  def end_element(name)
46
48
  if parsing_collection? && @collection_config.name == name
49
+ @collection_handler.end_element(name)
47
50
  @object.send(@collection_config.accessor) << @collection_handler.object
48
51
  reset_current_collection
49
52
 
@@ -51,11 +54,11 @@ module SAXMachine
51
54
  @collection_handler.end_element(name)
52
55
 
53
56
  elsif characaters_captured?
54
- mark_as_parsed
55
57
  @object.send(@element_config.setter, @value)
56
58
  end
57
59
 
58
60
  reset_current_tag
61
+ @nsstack = @nsstack.pop
59
62
  end
60
63
 
61
64
  def characaters_captured?
@@ -75,27 +78,14 @@ module SAXMachine
75
78
 
76
79
  def parse_element_attributes(element_configs)
77
80
  element_configs.each do |ec|
78
- unless parsed_config?(ec)
79
- @object.send(ec.setter, ec.value_from_attrs(@attrs))
80
- mark_as_parsed(ec)
81
- end
81
+ @object.send(ec.setter, ec.value_from_attrs(@attrs))
82
82
  end
83
83
  @element_config = nil
84
84
  end
85
85
 
86
86
  def set_element_config_for_element_value
87
87
  @value = ""
88
- @element_config = sax_config.element_config_for_tag(@name, @attrs)
89
- end
90
-
91
- def mark_as_parsed(element_config=nil)
92
- element_config ||= @element_config
93
- @parsed_configs[element_config] = true unless element_config.collection?
94
- end
95
-
96
- def parsed_config?(element_config=nil)
97
- element_config ||= @element_config
98
- @parsed_configs[element_config]
88
+ @element_config = sax_config.element_config_for_tag(@name, @attrs, @nsstack)
99
89
  end
100
90
 
101
91
  def reset_current_collection
@@ -119,14 +109,15 @@ module SAXMachine
119
109
  def self.decode_xml(str)
120
110
  return str.map &method(:decode_xml) if str.kind_of?(Array)
121
111
 
122
- entities = {
123
- '#38' => '&amp;',
124
- '#13' => "\r",
125
- }
126
- entities.keys.inject(str) { |string, key|
127
- string.gsub(/&#{key};/, entities[key])
128
- }
112
+ # entities = {
113
+ # '#38' => '&amp;',
114
+ # '#13' => "\r",
115
+ # }
116
+ # entities.keys.inject(str) { |string, key|
117
+ # string.gsub(/&#{key};/, entities[key])
118
+ # }
119
+ CGI.unescapeHTML(str)
129
120
  end
130
121
 
131
122
  end
132
- end
123
+ end
data/lib/sax-machine.rb CHANGED
@@ -1,11 +1,11 @@
1
- require "rubygems"
2
-
3
1
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
2
 
3
+ require "cgi"
4
+
5
5
  require "sax-machine/sax_document"
6
6
  require "sax-machine/sax_handler"
7
7
  require "sax-machine/sax_config"
8
8
 
9
9
  module SAXMachine
10
- VERSION = "0.0.14"
10
+ VERSION = "0.0.18"
11
11
  end
@@ -62,11 +62,11 @@ describe "SAXMachine" do
62
62
  document.title.should == "Title"
63
63
  end
64
64
 
65
- it "should overwrite the value when the element is present" do
65
+ it "should *not* overwrite the value when the element is present (new behaviour!)" do
66
66
  document = @klass.new
67
67
  document.title = "Old title"
68
68
  document.parse("<title>New title</title>")
69
- document.title.should == "New title"
69
+ document.title.should == "Old title"
70
70
  end
71
71
 
72
72
  it "should save the element text into an accessor" do
@@ -138,7 +138,7 @@ describe "SAXMachine" do
138
138
 
139
139
  it "should escape correctly the ampersand" do
140
140
  document = @klass.parse("<link href='http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&amp;lang=en-us&amp;format=atom' foo='bar'>asdf</link>")
141
- document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&amp;lang=en-us&amp;format=atom"
141
+ document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom"
142
142
  end
143
143
 
144
144
  it "should save the value of a matching element" do
@@ -281,6 +281,57 @@ describe "SAXMachine" do
281
281
  document.link_bar.should == 'test2'
282
282
  end
283
283
  end
284
+
285
+ describe "when specifying namespaces" do
286
+ before :all do
287
+ @klass = Class.new do
288
+ include SAXMachine
289
+ element :a, :xmlns => 'urn:test'
290
+ element :b, :xmlns => ['', 'urn:test']
291
+ end
292
+ end
293
+
294
+ it "should get the element with the xmlns" do
295
+ document = @klass.parse("<a xmlns='urn:test'>hello</a>")
296
+ document.a.should == 'hello'
297
+ end
298
+
299
+ it "shouldn't get the element without the xmlns" do
300
+ document = @klass.parse("<a>hello</a>")
301
+ document.a.should be_nil
302
+ end
303
+
304
+ it "shouldn't get the element with the wrong xmlns" do
305
+ document = @klass.parse("<a xmlns='urn:test2'>hello</a>")
306
+ document.a.should be_nil
307
+ end
308
+
309
+ it "should get an element without xmlns if the empty namespace is desired" do
310
+ document = @klass.parse("<b>hello</b>")
311
+ document.b.should == 'hello'
312
+ end
313
+
314
+ it "should get an element with the right prefix" do
315
+ document = @klass.parse("<p:a xmlns:p='urn:test'>hello</p:a>")
316
+ document.a.should == 'hello'
317
+ end
318
+
319
+ it "should not get an element with the wrong prefix" do
320
+ document = @klass.parse("<x:a xmlns:p='urn:test' xmlns:x='urn:test2'>hello</x:a>")
321
+ document.a.should be_nil
322
+ end
323
+
324
+ it "should get a prefixed element without xmlns if the empty namespace is desired" do
325
+ pending "this needs a less pickier nokogiri push parser"
326
+ document = @klass.parse("<x:b>hello</x:b>")
327
+ document.b.should == 'hello'
328
+ end
329
+
330
+ it "should get the namespaced element even it's not first" do
331
+ document = @klass.parse("<root xmlns:a='urn:test'><a>foo</a><a>foo</a><a:a>bar</a:a></root>")
332
+ document.a.should == 'bar'
333
+ end
334
+ end
284
335
 
285
336
  end
286
337
  end
@@ -364,13 +415,16 @@ describe "SAXMachine" do
364
415
  end
365
416
 
366
417
  describe "full example" do
418
+ XMLNS_ATOM = "http://www.w3.org/2005/Atom"
419
+ XMLNS_FEEDBURNER = "http://rssnamespace.org/feedburner/ext/1.0"
420
+
367
421
  before :each do
368
422
  @xml = File.read('spec/sax-machine/atom.xml')
369
423
  class AtomEntry
370
424
  include SAXMachine
371
425
  element :title
372
426
  element :name, :as => :author
373
- element "feedburner:origLink", :as => :url
427
+ element :origLink, :as => :orig_link, :xmlns => XMLNS_FEEDBURNER
374
428
  element :summary
375
429
  element :content
376
430
  element :published
@@ -381,7 +435,7 @@ describe "SAXMachine" do
381
435
  element :title
382
436
  element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
383
437
  element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
384
- elements :entry, :as => :entries, :class => AtomEntry
438
+ elements :entry, :as => :entries, :class => AtomEntry, :xmlns => XMLNS_ATOM
385
439
  end
386
440
  end # before
387
441
 
@@ -389,5 +443,15 @@ describe "SAXMachine" do
389
443
  f = Atom.parse(@xml)
390
444
  f.url.should == "http://www.pauldix.net/"
391
445
  end
446
+
447
+ it "should parse all entries" do
448
+ f = Atom.parse(@xml)
449
+ f.entries.length.should == 5
450
+ end
451
+
452
+ it "should parse the feedburner:origLink" do
453
+ f = Atom.parse(@xml)
454
+ f.entries[0].orig_link.should == 'http://www.pauldix.net/2008/09/marshal-data-to.html'
455
+ end
392
456
  end
393
457
  end
metadata CHANGED
@@ -1,10 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: julien51-sax-machine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
8
+ - astro
9
+ - julien51
10
+ - superfeedr
8
11
  autorequire:
9
12
  bindir: bin
10
13
  cert_chain: []
@@ -37,6 +40,7 @@ files:
37
40
  - lib/sax-machine/sax_element_config.rb
38
41
  - lib/sax-machine/sax_document.rb
39
42
  - lib/sax-machine/sax_handler.rb
43
+ - lib/sax-machine/ns_stack.rb
40
44
  - README.textile
41
45
  - Rakefile
42
46
  - spec/spec.opts