julien51-sax-machine 0.0.14 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sax-machine/ns_stack.rb +38 -0
- data/lib/sax-machine/sax_collection_config.rb +12 -3
- data/lib/sax-machine/sax_config.rb +25 -12
- data/lib/sax-machine/sax_document.rb +15 -3
- data/lib/sax-machine/sax_element_config.rb +11 -1
- data/lib/sax-machine/sax_handler.rb +20 -29
- data/lib/sax-machine.rb +3 -3
- data/spec/sax-machine/sax_document_spec.rb +69 -5
- metadata +5 -1
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module SAXMachine
|
|
2
|
+
class NSStack < Hash
|
|
3
|
+
def initialize(parent=nil, attrs=[])
|
|
4
|
+
# Initialize
|
|
5
|
+
super()
|
|
6
|
+
@parent = parent
|
|
7
|
+
|
|
8
|
+
# Parse attributes
|
|
9
|
+
attrs.each do |attr|
|
|
10
|
+
if attr.kind_of?(Array)
|
|
11
|
+
k, v = attr
|
|
12
|
+
case k
|
|
13
|
+
when 'xmlns' then self[''] = v
|
|
14
|
+
when /^xmlns:(.+)/ then self[$1] = v
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Lookup
|
|
21
|
+
def [](name)
|
|
22
|
+
if (ns = super(name.to_s))
|
|
23
|
+
# I've got it
|
|
24
|
+
ns
|
|
25
|
+
elsif @parent
|
|
26
|
+
# Parent may have it
|
|
27
|
+
@parent[name]
|
|
28
|
+
else
|
|
29
|
+
# Undefined, empty namespace
|
|
30
|
+
''
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def pop
|
|
35
|
+
@parent
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -8,16 +8,25 @@ module SAXMachine
|
|
|
8
8
|
@name = name.to_s
|
|
9
9
|
@class = options[:class]
|
|
10
10
|
@as = options[:as].to_s
|
|
11
|
+
@xmlns = case options[:xmlns]
|
|
12
|
+
when Array then options[:xmlns]
|
|
13
|
+
when String then [options[:xmlns]]
|
|
14
|
+
else nil
|
|
15
|
+
end
|
|
11
16
|
end
|
|
12
17
|
|
|
13
|
-
def handler
|
|
14
|
-
SAXHandler.new(@class.new)
|
|
18
|
+
def handler(nsstack)
|
|
19
|
+
SAXHandler.new(@class.new, nsstack)
|
|
15
20
|
end
|
|
16
21
|
|
|
17
22
|
def accessor
|
|
18
23
|
as
|
|
19
24
|
end
|
|
20
25
|
|
|
26
|
+
def xmlns_match?(ns)
|
|
27
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
|
28
|
+
end
|
|
29
|
+
|
|
21
30
|
protected
|
|
22
31
|
|
|
23
32
|
def as
|
|
@@ -30,4 +39,4 @@ module SAXMachine
|
|
|
30
39
|
end
|
|
31
40
|
|
|
32
41
|
end
|
|
33
|
-
end
|
|
42
|
+
end
|
|
@@ -6,35 +6,48 @@ module SAXMachine
|
|
|
6
6
|
attr_reader :top_level_elements, :collection_elements
|
|
7
7
|
|
|
8
8
|
def initialize
|
|
9
|
-
@top_level_elements =
|
|
10
|
-
@collection_elements =
|
|
9
|
+
@top_level_elements = {}
|
|
10
|
+
@collection_elements = {}
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def add_top_level_element(name, options)
|
|
14
|
-
@top_level_elements
|
|
14
|
+
@top_level_elements[name.to_s] ||= []
|
|
15
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def add_collection_element(name, options)
|
|
18
|
-
@collection_elements
|
|
19
|
+
@collection_elements[name.to_s] ||= []
|
|
20
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
|
19
21
|
end
|
|
20
22
|
|
|
21
|
-
def collection_config(name)
|
|
22
|
-
|
|
23
|
+
def collection_config(name, nsstack)
|
|
24
|
+
prefix, name = name.split(':', 2)
|
|
25
|
+
prefix, name = nil, prefix unless name # No prefix
|
|
26
|
+
namespace = nsstack[prefix]
|
|
27
|
+
|
|
28
|
+
(@collection_elements[name.to_s] || []).detect { |ce|
|
|
29
|
+
ce.name.to_s == name.to_s &&
|
|
30
|
+
ce.xmlns_match?(namespace)
|
|
31
|
+
}
|
|
23
32
|
end
|
|
24
33
|
|
|
25
34
|
def element_configs_for_attribute(name, attrs)
|
|
26
|
-
|
|
27
|
-
|
|
35
|
+
name = name.split(':', 2).last
|
|
36
|
+
(@top_level_elements[name.to_s] || []).select do |element_config|
|
|
28
37
|
element_config.has_value_and_attrs_match?(attrs)
|
|
29
38
|
end
|
|
30
39
|
end
|
|
31
40
|
|
|
32
|
-
def element_config_for_tag(name, attrs)
|
|
33
|
-
|
|
34
|
-
|
|
41
|
+
def element_config_for_tag(name, attrs, nsstack)
|
|
42
|
+
prefix, name = name.split(':', 2)
|
|
43
|
+
prefix, name = nil, prefix unless name # No prefix
|
|
44
|
+
namespace = nsstack[prefix]
|
|
45
|
+
|
|
46
|
+
(@top_level_elements[name.to_s] || []).detect do |element_config|
|
|
47
|
+
element_config.xmlns_match?(namespace) &&
|
|
35
48
|
element_config.attrs_match?(attrs)
|
|
36
49
|
end
|
|
37
50
|
end
|
|
38
51
|
|
|
39
52
|
end
|
|
40
|
-
end
|
|
53
|
+
end
|
|
@@ -39,15 +39,19 @@ module SAXMachine
|
|
|
39
39
|
# this is how we allow custom parsing behavior. So you could define the setter
|
|
40
40
|
# and have it parse the string into a date or whatever.
|
|
41
41
|
attr_reader options[:as] unless instance_methods.include?(options[:as].to_s)
|
|
42
|
-
|
|
42
|
+
attr_writer_once options[:as] unless instance_methods.include?("#{options[:as]}=")
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
def columns
|
|
46
|
-
|
|
46
|
+
r = []
|
|
47
|
+
sax_config.top_level_elements.each do |name, ecs|
|
|
48
|
+
r += ecs
|
|
49
|
+
end
|
|
50
|
+
r
|
|
47
51
|
end
|
|
48
52
|
|
|
49
53
|
def column(sym)
|
|
50
|
-
|
|
54
|
+
sax_config.top_level_elements[sym.to_s][0]
|
|
51
55
|
end
|
|
52
56
|
|
|
53
57
|
def data_class(sym)
|
|
@@ -89,6 +93,14 @@ module SAXMachine
|
|
|
89
93
|
def sax_config
|
|
90
94
|
@sax_config ||= SAXConfig.new
|
|
91
95
|
end
|
|
96
|
+
|
|
97
|
+
def attr_writer_once(attr)
|
|
98
|
+
class_eval <<-SRC
|
|
99
|
+
def #{attr}=(val)
|
|
100
|
+
@#{attr} ||= val
|
|
101
|
+
end
|
|
102
|
+
SRC
|
|
103
|
+
end
|
|
92
104
|
end
|
|
93
105
|
|
|
94
106
|
end
|
|
@@ -30,6 +30,12 @@ module SAXMachine
|
|
|
30
30
|
end
|
|
31
31
|
@data_class = options[:class]
|
|
32
32
|
@required = options[:required]
|
|
33
|
+
|
|
34
|
+
@xmlns = case options[:xmlns]
|
|
35
|
+
when Array then options[:xmlns]
|
|
36
|
+
when String then [options[:xmlns]]
|
|
37
|
+
else nil
|
|
38
|
+
end
|
|
33
39
|
end
|
|
34
40
|
|
|
35
41
|
def column
|
|
@@ -55,6 +61,10 @@ module SAXMachine
|
|
|
55
61
|
def has_value_and_attrs_match?(attrs)
|
|
56
62
|
!@value.nil? && attrs_match?(attrs)
|
|
57
63
|
end
|
|
64
|
+
|
|
65
|
+
def xmlns_match?(ns)
|
|
66
|
+
@xmlns.nil? || @xmlns.include?(ns)
|
|
67
|
+
end
|
|
58
68
|
|
|
59
69
|
def collection?
|
|
60
70
|
@collection
|
|
@@ -62,4 +72,4 @@ module SAXMachine
|
|
|
62
72
|
end
|
|
63
73
|
|
|
64
74
|
end
|
|
65
|
-
end
|
|
75
|
+
end
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
require "nokogiri"
|
|
2
|
+
require "sax-machine/ns_stack"
|
|
2
3
|
|
|
3
4
|
module SAXMachine
|
|
4
5
|
class SAXHandler < Nokogiri::XML::SAX::Document
|
|
5
6
|
attr_reader :object
|
|
6
7
|
|
|
7
|
-
def initialize(object)
|
|
8
|
+
def initialize(object, nsstack=nil)
|
|
8
9
|
@object = object
|
|
9
|
-
@
|
|
10
|
+
@nsstack = nsstack || NSStack.new
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
def characters(string)
|
|
@@ -22,15 +23,16 @@ module SAXMachine
|
|
|
22
23
|
end
|
|
23
24
|
|
|
24
25
|
def start_element(name, attrs = [])
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
@name = name
|
|
27
28
|
@attrs = attrs.map { |a| SAXHandler.decode_xml(a) }
|
|
29
|
+
@nsstack = NSStack.new(@nsstack, @attrs)
|
|
28
30
|
|
|
29
31
|
if parsing_collection?
|
|
30
32
|
@collection_handler.start_element(@name, @attrs)
|
|
31
33
|
|
|
32
|
-
elsif @collection_config = sax_config.collection_config(@name)
|
|
33
|
-
@collection_handler = @collection_config.handler
|
|
34
|
+
elsif @collection_config = sax_config.collection_config(@name, @nsstack)
|
|
35
|
+
@collection_handler = @collection_config.handler(@nsstack)
|
|
34
36
|
@collection_handler.start_element(@name, @attrs)
|
|
35
37
|
|
|
36
38
|
elsif (element_configs = sax_config.element_configs_for_attribute(@name, @attrs)).any?
|
|
@@ -44,6 +46,7 @@ module SAXMachine
|
|
|
44
46
|
|
|
45
47
|
def end_element(name)
|
|
46
48
|
if parsing_collection? && @collection_config.name == name
|
|
49
|
+
@collection_handler.end_element(name)
|
|
47
50
|
@object.send(@collection_config.accessor) << @collection_handler.object
|
|
48
51
|
reset_current_collection
|
|
49
52
|
|
|
@@ -51,11 +54,11 @@ module SAXMachine
|
|
|
51
54
|
@collection_handler.end_element(name)
|
|
52
55
|
|
|
53
56
|
elsif characaters_captured?
|
|
54
|
-
mark_as_parsed
|
|
55
57
|
@object.send(@element_config.setter, @value)
|
|
56
58
|
end
|
|
57
59
|
|
|
58
60
|
reset_current_tag
|
|
61
|
+
@nsstack = @nsstack.pop
|
|
59
62
|
end
|
|
60
63
|
|
|
61
64
|
def characaters_captured?
|
|
@@ -75,27 +78,14 @@ module SAXMachine
|
|
|
75
78
|
|
|
76
79
|
def parse_element_attributes(element_configs)
|
|
77
80
|
element_configs.each do |ec|
|
|
78
|
-
|
|
79
|
-
@object.send(ec.setter, ec.value_from_attrs(@attrs))
|
|
80
|
-
mark_as_parsed(ec)
|
|
81
|
-
end
|
|
81
|
+
@object.send(ec.setter, ec.value_from_attrs(@attrs))
|
|
82
82
|
end
|
|
83
83
|
@element_config = nil
|
|
84
84
|
end
|
|
85
85
|
|
|
86
86
|
def set_element_config_for_element_value
|
|
87
87
|
@value = ""
|
|
88
|
-
@element_config = sax_config.element_config_for_tag(@name, @attrs)
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
def mark_as_parsed(element_config=nil)
|
|
92
|
-
element_config ||= @element_config
|
|
93
|
-
@parsed_configs[element_config] = true unless element_config.collection?
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def parsed_config?(element_config=nil)
|
|
97
|
-
element_config ||= @element_config
|
|
98
|
-
@parsed_configs[element_config]
|
|
88
|
+
@element_config = sax_config.element_config_for_tag(@name, @attrs, @nsstack)
|
|
99
89
|
end
|
|
100
90
|
|
|
101
91
|
def reset_current_collection
|
|
@@ -119,14 +109,15 @@ module SAXMachine
|
|
|
119
109
|
def self.decode_xml(str)
|
|
120
110
|
return str.map &method(:decode_xml) if str.kind_of?(Array)
|
|
121
111
|
|
|
122
|
-
entities = {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
126
|
-
entities.keys.inject(str) { |string, key|
|
|
127
|
-
|
|
128
|
-
}
|
|
112
|
+
# entities = {
|
|
113
|
+
# '#38' => '&',
|
|
114
|
+
# '#13' => "\r",
|
|
115
|
+
# }
|
|
116
|
+
# entities.keys.inject(str) { |string, key|
|
|
117
|
+
# string.gsub(/&#{key};/, entities[key])
|
|
118
|
+
# }
|
|
119
|
+
CGI.unescapeHTML(str)
|
|
129
120
|
end
|
|
130
121
|
|
|
131
122
|
end
|
|
132
|
-
end
|
|
123
|
+
end
|
data/lib/sax-machine.rb
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
require "rubygems"
|
|
2
|
-
|
|
3
1
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
|
4
2
|
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
5
5
|
require "sax-machine/sax_document"
|
|
6
6
|
require "sax-machine/sax_handler"
|
|
7
7
|
require "sax-machine/sax_config"
|
|
8
8
|
|
|
9
9
|
module SAXMachine
|
|
10
|
-
VERSION = "0.0.
|
|
10
|
+
VERSION = "0.0.18"
|
|
11
11
|
end
|
|
@@ -62,11 +62,11 @@ describe "SAXMachine" do
|
|
|
62
62
|
document.title.should == "Title"
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
-
it "should overwrite the value when the element is present" do
|
|
65
|
+
it "should *not* overwrite the value when the element is present (new behaviour!)" do
|
|
66
66
|
document = @klass.new
|
|
67
67
|
document.title = "Old title"
|
|
68
68
|
document.parse("<title>New title</title>")
|
|
69
|
-
document.title.should == "
|
|
69
|
+
document.title.should == "Old title"
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
it "should save the element text into an accessor" do
|
|
@@ -138,7 +138,7 @@ describe "SAXMachine" do
|
|
|
138
138
|
|
|
139
139
|
it "should escape correctly the ampersand" do
|
|
140
140
|
document = @klass.parse("<link href='http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom' foo='bar'>asdf</link>")
|
|
141
|
-
document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&
|
|
141
|
+
document.link.should == "http://api.flickr.com/services/feeds/photos_public.gne?id=49724566@N00&lang=en-us&format=atom"
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
it "should save the value of a matching element" do
|
|
@@ -281,6 +281,57 @@ describe "SAXMachine" do
|
|
|
281
281
|
document.link_bar.should == 'test2'
|
|
282
282
|
end
|
|
283
283
|
end
|
|
284
|
+
|
|
285
|
+
describe "when specifying namespaces" do
|
|
286
|
+
before :all do
|
|
287
|
+
@klass = Class.new do
|
|
288
|
+
include SAXMachine
|
|
289
|
+
element :a, :xmlns => 'urn:test'
|
|
290
|
+
element :b, :xmlns => ['', 'urn:test']
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
it "should get the element with the xmlns" do
|
|
295
|
+
document = @klass.parse("<a xmlns='urn:test'>hello</a>")
|
|
296
|
+
document.a.should == 'hello'
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
it "shouldn't get the element without the xmlns" do
|
|
300
|
+
document = @klass.parse("<a>hello</a>")
|
|
301
|
+
document.a.should be_nil
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
it "shouldn't get the element with the wrong xmlns" do
|
|
305
|
+
document = @klass.parse("<a xmlns='urn:test2'>hello</a>")
|
|
306
|
+
document.a.should be_nil
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
it "should get an element without xmlns if the empty namespace is desired" do
|
|
310
|
+
document = @klass.parse("<b>hello</b>")
|
|
311
|
+
document.b.should == 'hello'
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
it "should get an element with the right prefix" do
|
|
315
|
+
document = @klass.parse("<p:a xmlns:p='urn:test'>hello</p:a>")
|
|
316
|
+
document.a.should == 'hello'
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
it "should not get an element with the wrong prefix" do
|
|
320
|
+
document = @klass.parse("<x:a xmlns:p='urn:test' xmlns:x='urn:test2'>hello</x:a>")
|
|
321
|
+
document.a.should be_nil
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it "should get a prefixed element without xmlns if the empty namespace is desired" do
|
|
325
|
+
pending "this needs a less pickier nokogiri push parser"
|
|
326
|
+
document = @klass.parse("<x:b>hello</x:b>")
|
|
327
|
+
document.b.should == 'hello'
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
it "should get the namespaced element even it's not first" do
|
|
331
|
+
document = @klass.parse("<root xmlns:a='urn:test'><a>foo</a><a>foo</a><a:a>bar</a:a></root>")
|
|
332
|
+
document.a.should == 'bar'
|
|
333
|
+
end
|
|
334
|
+
end
|
|
284
335
|
|
|
285
336
|
end
|
|
286
337
|
end
|
|
@@ -364,13 +415,16 @@ describe "SAXMachine" do
|
|
|
364
415
|
end
|
|
365
416
|
|
|
366
417
|
describe "full example" do
|
|
418
|
+
XMLNS_ATOM = "http://www.w3.org/2005/Atom"
|
|
419
|
+
XMLNS_FEEDBURNER = "http://rssnamespace.org/feedburner/ext/1.0"
|
|
420
|
+
|
|
367
421
|
before :each do
|
|
368
422
|
@xml = File.read('spec/sax-machine/atom.xml')
|
|
369
423
|
class AtomEntry
|
|
370
424
|
include SAXMachine
|
|
371
425
|
element :title
|
|
372
426
|
element :name, :as => :author
|
|
373
|
-
element
|
|
427
|
+
element :origLink, :as => :orig_link, :xmlns => XMLNS_FEEDBURNER
|
|
374
428
|
element :summary
|
|
375
429
|
element :content
|
|
376
430
|
element :published
|
|
@@ -381,7 +435,7 @@ describe "SAXMachine" do
|
|
|
381
435
|
element :title
|
|
382
436
|
element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
|
|
383
437
|
element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
|
|
384
|
-
elements :entry, :as => :entries, :class => AtomEntry
|
|
438
|
+
elements :entry, :as => :entries, :class => AtomEntry, :xmlns => XMLNS_ATOM
|
|
385
439
|
end
|
|
386
440
|
end # before
|
|
387
441
|
|
|
@@ -389,5 +443,15 @@ describe "SAXMachine" do
|
|
|
389
443
|
f = Atom.parse(@xml)
|
|
390
444
|
f.url.should == "http://www.pauldix.net/"
|
|
391
445
|
end
|
|
446
|
+
|
|
447
|
+
it "should parse all entries" do
|
|
448
|
+
f = Atom.parse(@xml)
|
|
449
|
+
f.entries.length.should == 5
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
it "should parse the feedburner:origLink" do
|
|
453
|
+
f = Atom.parse(@xml)
|
|
454
|
+
f.entries[0].orig_link.should == 'http://www.pauldix.net/2008/09/marshal-data-to.html'
|
|
455
|
+
end
|
|
392
456
|
end
|
|
393
457
|
end
|
metadata
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: julien51-sax-machine
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.18
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paul Dix
|
|
8
|
+
- astro
|
|
9
|
+
- julien51
|
|
10
|
+
- superfeedr
|
|
8
11
|
autorequire:
|
|
9
12
|
bindir: bin
|
|
10
13
|
cert_chain: []
|
|
@@ -37,6 +40,7 @@ files:
|
|
|
37
40
|
- lib/sax-machine/sax_element_config.rb
|
|
38
41
|
- lib/sax-machine/sax_document.rb
|
|
39
42
|
- lib/sax-machine/sax_handler.rb
|
|
43
|
+
- lib/sax-machine/ns_stack.rb
|
|
40
44
|
- README.textile
|
|
41
45
|
- Rakefile
|
|
42
46
|
- spec/spec.opts
|