pauldix-sax-machine 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "spec"
2
+ require "spec/rake/spectask"
3
+ require 'lib/sax-machine.rb'
4
+
5
+ Spec::Rake::SpecTask.new do |t|
6
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
7
+ t.spec_files = FileList['spec/**/*_spec.rb']
8
+ end
9
+
10
+ task :install do
11
+ rm_rf "*.gem"
12
+ puts `gem build sax-machine.gemspec`
13
+ puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
14
+ end
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+
3
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
+
5
+ require "sax-machine/sax_document"
6
+ require "sax-machine/sax_handler"
7
+ require "sax-machine/sax_config"
8
+
9
+ module SAXMachine
10
+ VERSION = "0.0.3"
11
+ end
@@ -0,0 +1,38 @@
1
+ require "sax-machine/sax_element_config"
2
+ require "sax-machine/sax_collection_config"
3
+
4
+ module SAXMachine
5
+ class SAXConfig
6
+ def initialize
7
+ @top_level_elements = []
8
+ @collection_elements = []
9
+ end
10
+
11
+ def add_top_level_element(name, options)
12
+ @top_level_elements << ElementConfig.new(name, options)
13
+ end
14
+
15
+ def add_collection_element(name, options)
16
+ @collection_elements << CollectionConfig.new(name, options)
17
+ end
18
+
19
+ def collection_config(name)
20
+ @collection_elements.detect { |ce| ce.name.to_s == name.to_s }
21
+ end
22
+
23
+ def element_config_for_attribute(name, attrs)
24
+ @top_level_elements.detect do |element_config|
25
+ element_config.name == name &&
26
+ element_config.has_value_and_attrs_match?(attrs)
27
+ end
28
+ end
29
+
30
+ def element_config_for_tag(name, attrs)
31
+ @top_level_elements.detect do |element_config|
32
+ element_config.name == name &&
33
+ element_config.attrs_match?(attrs)
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,46 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+
5
+ def self.included(base)
6
+ base.extend ClassMethods
7
+ end
8
+
9
+ def parse(xml_text)
10
+ sax_handler = SAXHandler.new(self)
11
+ parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
12
+ parser.parse(xml_text)
13
+ self
14
+ end
15
+
16
+ module ClassMethods
17
+
18
+ def parse(xml_text)
19
+ new.parse(xml_text)
20
+ end
21
+
22
+ def element(name, options = {})
23
+ options[:as] ||= name
24
+ sax_config.add_top_level_element(name, options)
25
+ attr_accessor options[:as]
26
+ end
27
+
28
+ def elements(name, options = {})
29
+ options[:as] ||= name
30
+ sax_config.add_collection_element(name, options)
31
+
32
+ class_eval <<-SRC
33
+ def #{options[:as]}
34
+ @#{options[:as]} ||= []
35
+ end
36
+ SRC
37
+
38
+ attr_writer options[:as]
39
+ end
40
+
41
+ def sax_config
42
+ @sax_config ||= SAXConfig.new
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,95 @@
1
+ require "nokogiri"
2
+
3
+ module SAXMachine
4
+ class SAXHandler < Nokogiri::XML::SAX::Document
5
+ attr_reader :object
6
+
7
+ def initialize(object)
8
+ @object = object
9
+ @parsed_configs = {}
10
+ end
11
+
12
+ def characters(string)
13
+ if parsing_collection?
14
+ @collection_handler.characters(string)
15
+ elsif @element_config
16
+ @value = string
17
+ end
18
+ end
19
+
20
+ def start_element(name, attrs = [])
21
+ @name = name
22
+ @attrs = attrs
23
+
24
+ if parsing_collection?
25
+ @collection_handler.start_element(@name, @attrs)
26
+
27
+ elsif @collection_config = sax_config.collection_config(@name)
28
+ @collection_handler = @collection_config.handler
29
+
30
+ elsif @element_config = sax_config.element_config_for_attribute(@name, @attrs)
31
+ parse_element_attribute
32
+
33
+ else
34
+ @element_config = sax_config.element_config_for_tag(@name, @attrs)
35
+ end
36
+ end
37
+
38
+ def end_element(name)
39
+ if parsing_collection? && @collection_config.name == name
40
+ @object.send(@collection_config.accessor) << @collection_handler.object
41
+ reset_current_collection
42
+
43
+ elsif parsing_collection?
44
+ @collection_handler.end_element(name)
45
+
46
+ elsif characaters_captured? && !parsed_config?
47
+ mark_as_parsed
48
+ @object.send(@element_config.setter, @value)
49
+ end
50
+
51
+ reset_current_tag
52
+ end
53
+
54
+ def characaters_captured?
55
+ !@value.nil?
56
+ end
57
+
58
+ def parsing_collection?
59
+ !@collection_handler.nil?
60
+ end
61
+
62
+ def parse_element_attribute
63
+ unless parsed_config?
64
+ mark_as_parsed
65
+ @object.send(@element_config.setter, @element_config.value_from_attrs(@attrs))
66
+ end
67
+
68
+ @element_config = nil
69
+ end
70
+
71
+ def mark_as_parsed
72
+ @parsed_configs[@element_config] = true
73
+ end
74
+
75
+ def parsed_config?
76
+ @parsed_configs[@element_config]
77
+ end
78
+
79
+ def reset_current_collection
80
+ @collection_handler = nil
81
+ @collection_config = nil
82
+ end
83
+
84
+ def reset_current_tag
85
+ @name = nil
86
+ @attrs = nil
87
+ @value = nil
88
+ @element_config = nil
89
+ end
90
+
91
+ def sax_config
92
+ @object.class.sax_config
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,286 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "SAXMachine" do
4
+ describe "element" do
5
+ describe "when parsing a single element" do
6
+ before :each do
7
+ @klass = Class.new do
8
+ include SAXMachine
9
+ element :title
10
+ end
11
+ end
12
+
13
+ it "should provide an accessor" do
14
+ document = @klass.new
15
+ document.title = "Title"
16
+ document.title.should == "Title"
17
+ end
18
+
19
+ it "should not overwrite the accessor when the element is not present" do
20
+ document = @klass.new
21
+ document.title = "Title"
22
+ document.parse("<foo></foo>")
23
+ document.title.should == "Title"
24
+ end
25
+
26
+ it "should overwrite the accessor when the element is present" do
27
+ document = @klass.new
28
+ document.title = "Old title"
29
+ document.parse("<title>New title</title>")
30
+ document.title.should == "New title"
31
+ end
32
+
33
+ it "should save the element text into an accessor" do
34
+ document = @klass.parse("<title>My Title</title>")
35
+ document.title.should == "My Title"
36
+ end
37
+
38
+ it "should save the element text into an accessor when there are multiple elements" do
39
+ document = @klass.parse("<xml><title>My Title</title><foo>bar</foo></xml>")
40
+ document.title.should == "My Title"
41
+ end
42
+
43
+ it "should save the first element text when there are multiple of the same element" do
44
+ document = @klass.parse("<xml><title>My Title</title><title>bar</title></xml>")
45
+ document.title.should == "My Title"
46
+ end
47
+ end
48
+
49
+ describe "when parsing multiple elements" do
50
+ before :each do
51
+ @klass = Class.new do
52
+ include SAXMachine
53
+ element :title
54
+ element :name
55
+ end
56
+ end
57
+
58
+ it "should save the element text for a second tag" do
59
+ document = @klass.parse("<xml><title>My Title</title><name>Paul</name></xml>")
60
+ document.name.should == "Paul"
61
+ document.title.should == "My Title"
62
+ end
63
+ end
64
+
65
+ describe "when using options for parsing elements" do
66
+ describe "using the 'as' option" do
67
+ before :each do
68
+ @klass = Class.new do
69
+ include SAXMachine
70
+ element :description, :as => :summary
71
+ end
72
+ end
73
+
74
+ it "should provide an accessor using the 'as' name" do
75
+ document = @klass.new
76
+ document.summary = "a small summary"
77
+ document.summary.should == "a small summary"
78
+ end
79
+
80
+ it "should save the element text into the 'as' accessor" do
81
+ document = @klass.parse("<description>here is a description</description>")
82
+ document.summary.should == "here is a description"
83
+ end
84
+ end
85
+
86
+ describe "using the :with option" do
87
+ describe "and the :value option" do
88
+ before :each do
89
+ @klass = Class.new do
90
+ include SAXMachine
91
+ element :link, :value => :href, :with => {:foo => "bar"}
92
+ end
93
+ end
94
+
95
+ it "should save the value of a matching element" do
96
+ document = @klass.parse("<link href='test' foo='bar'>asdf</link>")
97
+ document.link.should == "test"
98
+ end
99
+
100
+ it "should save the value of the first matching element" do
101
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' foo='bar' /></xml>")
102
+ document.link.should == "first"
103
+ end
104
+
105
+ describe "and the :as option" do
106
+ before :each do
107
+ @klass = Class.new do
108
+ include SAXMachine
109
+ element :link, :value => :href, :as => :url, :with => {:foo => "bar"}
110
+ element :link, :value => :href, :as => :second_url, :with => {:asdf => "jkl"}
111
+ end
112
+ end
113
+
114
+ it "should save the value of the first matching element" do
115
+ document = @klass.parse("<xml><link href='first' foo='bar' /><link href='second' asdf='jkl' /><link href='second' foo='bar' /></xml>")
116
+ document.url.should == "first"
117
+ document.second_url.should == "second"
118
+ end
119
+ end
120
+ end
121
+
122
+ describe "with only one element" do
123
+ before :each do
124
+ @klass = Class.new do
125
+ include SAXMachine
126
+ element :link, :with => {:foo => "bar"}
127
+ end
128
+ end
129
+
130
+ it "should save the text of an element that has matching attributes" do
131
+ document = @klass.parse("<link foo=\"bar\">match</link>")
132
+ document.link.should == "match"
133
+ end
134
+
135
+ it "should not save the text of an element that doesn't have matching attributes" do
136
+ document = @klass.parse("<link>no match</link>")
137
+ document.link.should be_nil
138
+ end
139
+
140
+ it "should save the text of an element that has matching attributes when it is the second of that type" do
141
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">match</link></xml>")
142
+ document.link.should == "match"
143
+ end
144
+
145
+ it "should save the text of an element that has matching attributes plus a few more" do
146
+ document = @klass.parse("<xml><link>no match</link><link asdf='jkl' foo='bar'>match</link>")
147
+ document.link.should == "match"
148
+ end
149
+ end
150
+
151
+ describe "with multiple elements of same tag" do
152
+ before :each do
153
+ @klass = Class.new do
154
+ include SAXMachine
155
+ element :link, :as => :first, :with => {:foo => "bar"}
156
+ element :link, :as => :second, :with => {:asdf => "jkl"}
157
+ end
158
+ end
159
+
160
+ it "should match the first element" do
161
+ document = @klass.parse("<xml><link>no match</link><link foo=\"bar\">first match</link><link>no match</link></xml>")
162
+ document.first.should == "first match"
163
+ end
164
+
165
+ it "should match the second element" do
166
+ document = @klass.parse("<xml><link>no match</link><link foo='bar'>first match</link><link asdf='jkl'>second match</link><link>hi</link></xml>")
167
+ document.second.should == "second match"
168
+ end
169
+ end
170
+ end # using the 'with' option
171
+
172
+ describe "using the 'value' option" do
173
+ before :each do
174
+ @klass = Class.new do
175
+ include SAXMachine
176
+ element :link, :value => :foo
177
+ end
178
+ end
179
+
180
+ it "should save the attribute value" do
181
+ document = @klass.parse("<link foo='test'>hello</link>")
182
+ document.link.should == 'test'
183
+ end
184
+
185
+ it "should save the attribute value when there is no text enclosed by the tag" do
186
+ document = @klass.parse("<link foo='test'></link>")
187
+ document.link.should == 'test'
188
+ end
189
+
190
+ it "should save the attribute value when the tag close is in the open" do
191
+ document = @klass.parse("<link foo='test'/>")
192
+ document.link.should == 'test'
193
+ end
194
+ end
195
+ end
196
+ end
197
+
198
+ describe "elements" do
199
+ # I took this stuff out because I'm not sure yet if this is something I want to bother supporting.
200
+
201
+ # describe "when parsing multiple elements" do
202
+ # before :each do
203
+ # @klass = Class.new do
204
+ # include SAXMachine
205
+ # elements :entry, :as => :entries
206
+ # end
207
+ # end
208
+ #
209
+ # it "should provide a collection accessor" do
210
+ # document = @klass.new
211
+ # document.entries << :foo
212
+ # document.entries.should == [:foo]
213
+ # end
214
+ #
215
+ # it "should parse a single element" do
216
+ # document = @klass.parse("<entry>hello</entry>")
217
+ # document.entries.should == ["hello"]
218
+ # end
219
+ #
220
+ # it "should parse multiple elements" do
221
+ # document = @klass.parse("<xml><entry>hello</entry><entry>world</entry></xml>")
222
+ # document.entries.should == ["hello", "world"]
223
+ # end
224
+ # end
225
+
226
+ describe "when using the class option" do
227
+ before :each do
228
+ class Foo
229
+ include SAXMachine
230
+ element :title
231
+ end
232
+ @klass = Class.new do
233
+ include SAXMachine
234
+ elements :entry, :as => :entries, :class => Foo
235
+ end
236
+ end
237
+
238
+ it "should parse a single element with children" do
239
+ document = @klass.parse("<entry><title>a title</title></entry>")
240
+ document.entries.size.should == 1
241
+ document.entries.first.title.should == "a title"
242
+ end
243
+
244
+ it "should parse multiple elements with children" do
245
+ document = @klass.parse("<xml><entry><title>title 1</title></entry><entry><title>title 2</title></entry></xml>")
246
+ document.entries.size.should == 2
247
+ document.entries.first.title.should == "title 1"
248
+ document.entries.last.title.should == "title 2"
249
+ end
250
+
251
+ it "should not parse a top level element that is specified only in a child" do
252
+ document = @klass.parse("<xml><title>no parse</title><entry><title>correct title</title></entry></xml>")
253
+ document.entries.size.should == 1
254
+ document.entries.first.title.should == "correct title"
255
+ end
256
+ end
257
+ end
258
+
259
+ describe "full example" do
260
+ before :each do
261
+ @xml = File.read('spec/sax-machine/atom.xml')
262
+ class AtomEntry
263
+ include SAXMachine
264
+ element :title
265
+ element :name, :as => :author
266
+ element "feedburner:origLink", :as => :url
267
+ element :summary
268
+ element :content
269
+ element :published
270
+ end
271
+
272
+ class Atom
273
+ include SAXMachine
274
+ element :title
275
+ element :link, :value => :href, :as => :url, :with => {:type => "text/html"}
276
+ element :link, :value => :href, :as => :feed_url, :with => {:type => "application/atom+xml"}
277
+ elements :entry, :as => :entries, :class => AtomEntry
278
+ end
279
+ end # before
280
+
281
+ it "should parse the url" do
282
+ f = Atom.parse(@xml)
283
+ f.url.should == "http://www.pauldix.net/"
284
+ end
285
+ end
286
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,13 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "sax-machine"
11
+
12
+ # Spec::Runner.configure do |config|
13
+ # end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pauldix-sax-machine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Paul Dix
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-13 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.0
23
+ version:
24
+ description:
25
+ email: paul@pauldix.net
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - lib/sax-machine.rb
34
+ - lib/sax-machine/sax_config.rb
35
+ - lib/sax-machine/sax_document.rb
36
+ - lib/sax-machine/sax_handler.rb
37
+ - README.rdoc
38
+ - Rakefile
39
+ - spec/spec.opts
40
+ - spec/spec_helper.rb
41
+ - spec/sax-machine/sax_document_spec.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/pauldix/sax-machine
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.2.0
65
+ signing_key:
66
+ specification_version: 2
67
+ summary: Declarative SAX Parsing with Nokogiri
68
+ test_files: []
69
+