pauldix-sax-machine 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "spec"
2
+ require "spec/rake/spectask"
3
+ require 'lib/sax-machine.rb'
4
+
5
+ Spec::Rake::SpecTask.new do |t|
6
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
7
+ t.spec_files = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :install do
11
+ rm_rf "*.gem"
12
+ puts `gem build sax-machine.gemspec`
13
+ puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
14
+ end
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+
3
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
+
5
+ require "sax-machine/sax_document"
6
+ require "sax-machine/sax_handler"
7
+ require "sax-machine/sax_config"
8
+
9
+ module SAXMachine
10
+ VERSION = "0.0.3"
11
+ end
@@ -0,0 +1,38 @@
1
+ require "sax-machine/sax_element_config"
2
+ require "sax-machine/sax_collection_config"
3
+
4
+ module SAXMachine
5
+ class SAXConfig
6
+ def initialize
7
+ @top_level_elements = []
8
+ @collection_elements = []
9
+ end
10
+
11
+ def add_top_level_element(name, options)
12
+ @top_level_elements << ElementConfig.new(name, options)
13
+ end
14
+
15
+ def add_collection_element(name, options)
16
+ @collection_elements << CollectionConfig.new(name, options)
17
+ end
18
+
19
+ def collection_config(name)
20
+ @collection_elements.detect { |ce| ce.name.to_s == name.to_s }
21
+ end
22
+
23
+ def element_config_for_attribute(name, attrs)
24
+ @top_level_elements.detect do |element_config|
25
+ element_config.name == name &&
26
+ element_config.has_value_and_attrs_match?(attrs)
27
+ end
28
+ end
29
+
30
+ def element_config_for_tag(name, attrs)
31
+ @top_level_elements.detect do |element_config|
32
+ element_config.name == name &&
33
+ element_config.attrs_match?(attrs)
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,46 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ def parse(xml_text)
10
+ sax_handler = SAXHandler.new(self)
11
+ parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
12
+ parser.parse(xml_text)
13
+ self
14
+ end
15
+
16
+ module ClassMethods
17
+
18
+ def parse(xml_text)
19
+ new.parse(xml_text)
20
+ end
21
+
22
+ def element(name, options = {})
23
+ options[:as] ||= name
24
+ sax_config.add_top_level_element(name, options)
25
+ attr_accessor options[:as]
26
+ end
27
+
28
+ def elements(name, options = {})
29
+ options[:as] ||= name
30
+ sax_config.add_collection_element(name, options)
31
+
32
+ class_eval <<-SRC
33
+ def #{options[:as]}
34
+ @#{options[:as]} ||= []
35
+ end
36
+ SRC
37
+
38
+ attr_writer options[:as]
39
+ end
40
+
41
+ def sax_config
42
+ @sax_config ||= SAXConfig.new
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,95 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+ class SAXHandler < Nokogiri::XML::SAX::Document
5
+ attr_reader :object
6
+
7
+ def initialize(object)
8
+ @object = object
9
+ @parsed_configs = {}
10
+ end
11
+
12
+ def characters(string)
13
+ if parsing_collection?
14
+ @collection_handler.characters(string)
15
+ elsif @element_config
16
+ @value = string
17
+ end
18
+ end
19
+
20
+ def start_element(name, attrs = [])
21
+ @name = name
22
+ @attrs = attrs
23
+
24
+ if parsing_collection?
25
+ @collection_handler.start_element(@name, @attrs)
26
+
27
+ elsif @collection_config = sax_config.collection_config(@name)
28
+ @collection_handler = @collection_config.handler
29
+
30
+ elsif @element_config = sax_config.element_config_for_attribute(@name, @attrs)
31
+ parse_element_attribute
32
+
33
+ else
34
+ @element_config = sax_config.element_config_for_tag(@name, @attrs)
35
+ end
36
+ end
37
+
38
+ def end_element(name)
39
+ if parsing_collection? && @collection_config.name == name
40
+ @object.send(@collection_config.accessor) << @collection_handler.object
41
+ reset_current_collection
42
+
43
+ elsif parsing_collection?
44
+ @collection_handler.end_element(name)
45
+
46
+ elsif characaters_captured? && !parsed_config?
47
+ mark_as_parsed
48
+ @object.send(@element_config.setter, @value)
49
+ end
50
+
51
+ reset_current_tag
52
+ end
53
+
54
+ def characaters_captured?
55
+ !@value.nil?
56
+ end
57
+
58
+ def parsing_collection?
59
+ !@collection_handler.nil?
60
+ end
61
+
62
+ def parse_element_attribute
63
+ unless parsed_config?
64
+ mark_as_parsed
65
+ @object.send(@element_config.setter, @element_config.value_from_attrs(@attrs))
66
+ end
67
+
68
+ @element_config = nil
69
+ end
70
+
71
+ def mark_as_parsed
72
+ @parsed_configs[@element_config] = true
73
+ end
74
+
75
+ def parsed_config?
76
+ @parsed_configs[@element_config]
77
+ end
78
+
79
+ def reset_current_collection
80
+ @collection_handler = nil
81
+ @collection_config = nil
82
+ end
83
+
84
+ def reset_current_tag
85
+ @name = nil
86
+ @attrs = nil
87
+ @value = nil
88
+ @element_config = nil
89
+ end
90
+
91
+ def sax_config
92
+ @object.class.sax_config
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,286 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "SAXMachine" do
4
+ describe "element" do
5
+ describe "when parsing a single element" do
6
+ before :each do
7
+ @klass = Class.new do
8
+ include SAXMachine
9
+ element :title
10
+ end
11
+ end
12
+
13
+ it "should provide an accessor" do
14
+ document = @klass.new
15
+ document.title = "Title"
16
+ document.title.should == "Title"
17
+ end
18
+
19
+ it "should not overwrite the accessor when the element is not present" do
20
+ document = @klass.new
21
+ document.title = "Title"
22
+ document.parse("<foo></foo>")
23
+ document.title.should == "Title"
24
+ end
25
+
26
+ it "should overwrite the accessor when the element is present" do
27
+ document = @klass.new
28
+ document.title = "Old title"
29
+ document.parse("<title>New title</title>")
30
+ document.title.should == "New title"
31
+ end
32
+
33
+ it "should save the element text into an accessor" do
34
+ document = @klass.parse("<title>My Title</title>")
35
+ document.title.should == "My Title"
36
+ end
37
+
38
+ it "should save the element text into an accessor when there are multiple elements" do
39
+ document = @klass.parse("<xml><title>My Title</title><foo>bar</foo></xml>")
40
+ document.title.should == "My Title"
41
+ end
42
+
43
+ it "should save the first element text when there are multiple of the same element" do
44
+ document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
45
+ document.title.should == "My Title"
46
+ end
47
+ end
48
+
49
+ describe "when parsing multiple elements" do
50
+ before :each do
51
+ @klass = Class.new do
52
+ include SAXMachine
53
+ element :title
54
+ element :name
55
+ end
56
+ end
57
+
58
+ it "should save the element text for a second tag" do
59
+ document = @klass.parse("<xml><title>My Title</title><name>Paul</name></xml>")
60
+ document.name.should == "Paul"
61
+ document.title.should == "My Title"
62
+ end
63
+ end
64
+
65
+ describe "when using options for parsing elements" do
66
+ describe "using the 'as' option" do
67
+ before :each do
68
+ @klass = Class.new do
69
+ include SAXMachine
70
+ element :description, :as => :summary
71
+ end
72
+ end
73
+
74
+ it "should provide an accessor using the 'as' name" do
75
+ document = @klass.new
76
+ document.summary = "a small summary"
77
+ document.summary.should == "a small summary"
78
+ end
79
+
80
+ it "should save the element text into the 'as' accessor" do
81
+ document = @klass.parse("<description>here is a description</description>")
82
+ document.summary.should == "here is a description"
83
+ end
84
+ end
85
+
86
+ describe "using the :with option" do
87
+ describe "and the :value option" do
88
+ before :each do
89
+ @klass = Class.new do
90
+ include SAXMachine
91
+ element :link, :value => :href, :with => {:foo => "bar"}
92
+ end
93
+ end
94
+
95
+ it "should save the value of a matching element" do
96
+ document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
97
+ document.link.should == "test"
98
+ end
99
+
100
+ it "should save the value of the first matching element" do
101
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
102
+ document.link.should == "first"
103
+ end
104
+
105
+ describe "and the :as option" do
106
+ before :each do
107
+ @klass = Class.new do
108
+ include SAXMachine
109
+ element :link, :value => :href, :as => :url, :with => {:foo => "bar"}
110
+ element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
111
+ end
112
+ end
113
+
114
+ it "should save the value of the first matching element" do
115
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
116
+ document.url.should == "first"
117
+ document.second_url.should == "second"
118
+ end
119
+ end
120
+ end
121
+
122
+ describe "with only one element" do
123
+ before :each do
124
+ @klass = Class.new do
125
+ include SAXMachine
126
+ element :link, :with => {:foo => "bar"}
127
+ end
128
+ end
129
+
130
+ it "should save the text of an element that has matching attributes" do
131
+ document = @klass.parse("<link foo=\"bar\">match</link>")
132
+ document.link.should == "match"
133
+ end
134
+
135
+ it "should not save the text of an element that doesn't have matching attributes" do
136
+ document = @klass.parse("<link>no match</link>")
137
+ document.link.should be_nil
138
+ end
139
+
140
+ it "should save the text of an element that has matching attributes when it is the second of that type" do
141
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
142
+ document.link.should == "match"
143
+ end
144
+
145
+ it "should save the text of an element that has matching attributes plus a few more" do
146
+ document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
147
+ document.link.should == "match"
148
+ end
149
+ end
150
+
151
+ describe "with multiple elements of same tag" do
152
+ before :each do
153
+ @klass = Class.new do
154
+ include SAXMachine
155
+ element :link, :as => :first, :with => {:foo => "bar"}
156
+ element :link, :as => :second, :with => {:asdf => "jkl"}
157
+ end
158
+ end
159
+
160
+ it "should match the first element" do
161
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
162
+ document.first.should == "first match"
163
+ end
164
+
165
+ it "should match the second element" do
166
+ document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
167
+ document.second.should == "second match"
168
+ end
169
+ end
170
+ end # using the 'with' option
171
+
172
+ describe "using the 'value' option" do
173
+ before :each do
174
+ @klass = Class.new do
175
+ include SAXMachine
176
+ element :link, :value => :foo
177
+ end
178
+ end
179
+
180
+ it "should save the attribute value" do
181
+ document = @klass.parse("<link foo='test'>hello</link>")
182
+ document.link.should == 'test'
183
+ end
184
+
185
+ it "should save the attribute value when there is no text enclosed by the tag" do
186
+ document = @klass.parse("<link foo='test'></link>")
187
+ document.link.should == 'test'
188
+ end
189
+
190
+ it "should save the attribute value when the tag close is in the open" do
191
+ document = @klass.parse("<link foo='test'/>")
192
+ document.link.should == 'test'
193
+ end
194
+ end
195
+ end
196
+ end
197
+
198
+ describe "elements" do
199
+ # I took this stuff out because I'm not sure yet if this is something I want to bother supporting.
200
+
201
+ # describe "when parsing multiple elements" do
202
+ # before :each do
203
+ # @klass = Class.new do
204
+ # include SAXMachine
205
+ # elements :entry, :as => :entries
206
+ # end
207
+ # end
208
+ #
209
+ # it "should provide a collection accessor" do
210
+ # document = @klass.new
211
+ # document.entries << :foo
212
+ # document.entries.should == [:foo]
213
+ # end
214
+ #
215
+ # it "should parse a single element" do
216
+ # document = @klass.parse("<entry>hello</entry>")
217
+ # document.entries.should == ["hello"]
218
+ # end
219
+ #
220
+ # it "should parse multiple elements" do
221
+ # document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
222
+ # document.entries.should == ["hello", "world"]
223
+ # end
224
+ # end
225
+
226
+ describe "when using the class option" do
227
+ before :each do
228
+ class Foo
229
+ include SAXMachine
230
+ element :title
231
+ end
232
+ @klass = Class.new do
233
+ include SAXMachine
234
+ elements :entry, :as => :entries, :class => Foo
235
+ end
236
+ end
237
+
238
+ it "should parse a single element with children" do
239
+ document = @klass.parse("<entry><title>a title</title></entry>")
240
+ document.entries.size.should == 1
241
+ document.entries.first.title.should == "a title"
242
+ end
243
+
244
+ it "should parse multiple elements with children" do
245
+ document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
246
+ document.entries.size.should == 2
247
+ document.entries.first.title.should == "title 1"
248
+ document.entries.last.title.should == "title 2"
249
+ end
250
+
251
+ it "should not parse a top level element that is specified only in a child" do
252
+ document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
253
+ document.entries.size.should == 1
254
+ document.entries.first.title.should == "correct title"
255
+ end
256
+ end
257
+ end
258
+
259
+ describe "full example" do
260
+ before :each do
261
+ @xml = File.read('spec/sax-machine/atom.xml')
262
+ class AtomEntry
263
+ include SAXMachine
264
+ element :title
265
+ element :name, :as => :author
266
+ element "feedburner:origLink", :as => :url
267
+ element :summary
268
+ element :content
269
+ element :published
270
+ end
271
+
272
+ class Atom
273
+ include SAXMachine
274
+ element :title
275
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
276
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
277
+ elements :entry, :as => :entries, :class => AtomEntry
278
+ end
279
+ end # before
280
+
281
+ it "should parse the url" do
282
+ f = Atom.parse(@xml)
283
+ f.url.should == "http://www.pauldix.net/"
284
+ end
285
+ end
286
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,13 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "sax-machine"
11
+
12
+ # Spec::Runner.configure do |config|
13
+ # end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pauldix-sax-machine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Paul Dix
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-13 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.0
23
+ version:
24
+ description:
25
+ email: paul@pauldix.net
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - lib/sax-machine.rb
34
+ - lib/sax-machine/sax_config.rb
35
+ - lib/sax-machine/sax_document.rb
36
+ - lib/sax-machine/sax_handler.rb
37
+ - README.rdoc
38
+ - Rakefile
39
+ - spec/spec.opts
40
+ - spec/spec_helper.rb
41
+ - spec/sax-machine/sax_document_spec.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/pauldix/sax-machine
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.2.0
65
+ signing_key:
66
+ specification_version: 2
67
+ summary: Declarative SAX Parsing with Nokogiri
68
+ test_files: []
69
+