saxy 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ script: "bundle exec rspec ./spec/"
2
+ language: ruby
3
+ rvm:
4
+ - 1.9.3
5
+ - 1.9.2
6
+ - 1.8.7
7
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in saxy.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Michał Szajbe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Saxy
2
+
3
+ [![Build Status](https://secure.travis-ci.org/monterail/saxy.png)](http://travis-ci.org/monterail/saxy)
4
+
5
+ Memory-efficient XML parser. Finds object definitions in XML and translates them into Ruby objects.
6
+
7
+ It uses SAX parser under the hood, which means that it doesn't load the whole XML file into memory. It goes once though it and yields objects along the way.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'saxy'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install saxy
22
+
23
+ ## Usage
24
+
25
+ Assume the XML file:
26
+
27
+ <?xml version='1.0' encoding='UTF-8'?>
28
+ <webstore>
29
+ <name>Amazon</name>
30
+ <products>
31
+ <product>
32
+ <uid>FFCF177</uid>
33
+ <name>Kindle</name>
34
+ <description>The world's best-selling e-reader.</description>
35
+ <price>$109</price>
36
+ <images>
37
+ <thumb>http://amazon.com/kindle_thumb.jpg</thumb>
38
+ <large>http://amazon.com/kindle.jpg</large>
39
+ </images>
40
+ </product>
41
+ </products>
42
+ </webstore>
43
+
44
+ You instantiate the parser by passing path to XML file and object-identyfing tag name as it's arguments.
45
+
46
+ The following will parse the XML, find product definitions (inside `<product>` and `</product>` tags), build `OpenStruct`s and yield them inside the block:
47
+
48
+ Saxy.parse("filename.xml", "product").each do |product|
49
+ puts product.uid # => FFCF177
50
+ puts product.name # => "Kindle"
51
+ puts product.description # => "The world's best-selling e-reader."
52
+ puts product.price # => "$109"
53
+
54
+ # nested objects are build as well
55
+ puts product.images.thumb # => "http://amazon.com/kindle_thumb.jpg"
56
+ end
57
+
58
+ Saxy supports Enumerable, so you can use it's goodies to your comfort without building intermediate arrays:
59
+
60
+ Saxy.parse("filename.xml", "product").map do |object|
61
+ # map OpenStructs to ActiveRecord instances, etc.
62
+ end
63
+
64
+ You can also grab an Enumerator for external use (e.g. lazy evaluation, etc.):
65
+
66
+ enumerator = Saxy.parse("filename.xml", "product").each
67
+
68
+ ## Contributing
69
+
70
+ 1. Fork it
71
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
72
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
73
+ 4. Push to the branch (`git push origin my-new-feature`)
74
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/lib/saxy.rb ADDED
@@ -0,0 +1,18 @@
1
+ require "saxy/element"
2
+ require "saxy/parser"
3
+ require "saxy/parsing_error"
4
+ require "saxy/version"
5
+
6
+ module Saxy
7
+ class << self
8
+ def parse(xml_file, object_tag, &blk)
9
+ parser = Parser.new(xml_file, object_tag)
10
+
11
+ if blk
12
+ parser.each(blk)
13
+ else
14
+ parser.each
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,32 @@
1
+ require 'active_support/core_ext/string/inflections'
2
+ require 'ostruct'
3
+
4
+ module Saxy
5
+ class Element
6
+ attr_reader :attributes, :value
7
+
8
+ def initialize
9
+ @attributes = {}
10
+ @value = nil
11
+ end
12
+
13
+ def set_attribute(name, value)
14
+ attributes[attribute_name(name)] = value
15
+ end
16
+
17
+ def append_value(string)
18
+ unless (string = string.strip).empty?
19
+ @value ||= ""
20
+ @value << string
21
+ end
22
+ end
23
+
24
+ def as_object
25
+ attributes.any? ? OpenStruct.new(attributes) : value
26
+ end
27
+
28
+ def attribute_name(name)
29
+ name.underscore
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,72 @@
1
+ require 'nokogiri'
2
+
3
+ module Saxy
4
+ class Parser < Nokogiri::XML::SAX::Document
5
+ include Enumerable
6
+
7
+ # Stack of XML tags built while traversing XML tree
8
+ attr_reader :tags
9
+
10
+ # Stack of elements built while traversing XML tree
11
+ #
12
+ # First element is pushed to the stack only after finding the object_tag in
13
+ # the XML tree.
14
+ attr_reader :elements
15
+
16
+ # Will yield objects inside the callback after they're built
17
+ attr_reader :callback
18
+
19
+ def initialize(xml_file, object_tag)
20
+ @xml_file, @object_tag = xml_file, object_tag
21
+ @tags, @elements = [], []
22
+ end
23
+
24
+ def start_element(tag, attributes=[])
25
+ @tags << tag
26
+
27
+ if tag == @object_tag || elements.any?
28
+ elements << Element.new
29
+ end
30
+ end
31
+
32
+ def end_element(tag)
33
+ tags.pop
34
+ if element = elements.pop
35
+ object = element.as_object
36
+
37
+ if current_element
38
+ current_element.set_attribute(tag, object)
39
+ elsif callback
40
+ callback.call(object)
41
+ end
42
+ end
43
+ end
44
+
45
+ def cdata_block(cdata)
46
+ current_element.append_value(cdata) if current_element
47
+ end
48
+
49
+ def characters(chars)
50
+ current_element.append_value(chars) if current_element
51
+ end
52
+
53
+ def error(message)
54
+ raise ParsingError.new(message)
55
+ end
56
+
57
+ def current_element
58
+ elements.last
59
+ end
60
+
61
+ def each(&blk)
62
+ if blk
63
+ @callback = blk
64
+
65
+ parser = Nokogiri::XML::SAX::Parser.new(self)
66
+ parser.parse_file(@xml_file)
67
+ else
68
+ (RUBY_VERSION =~ /^1\.8/ ? Enumerable::Enumerator : Enumerator).new(self, :each)
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,4 @@
1
+ module Saxy
2
+ class ParsingError < ::Exception
3
+ end
4
+ end
@@ -0,0 +1,3 @@
1
+ module Saxy
2
+ VERSION = "0.1.1"
3
+ end
data/saxy.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/saxy/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Michał Szajbe"]
6
+ gem.email = ["michal.szajbe@gmail.com"]
7
+ gem.description = %q{Saxy finds object definitions in XML files and translates them into Ruby objects. It uses SAX parser under the hood, which means that it doesn't load the whole XML file into memory. It goes once though it and yields objects along the way.}
8
+ gem.summary = %q{Memory-efficient XML parser. Finds object definitions and translates them into Ruby objects.}
9
+ gem.homepage = "http://github.com/monterail/saxy"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "saxy"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Saxy::VERSION
17
+
18
+ gem.add_dependency "activesupport"
19
+ gem.add_dependency "nokogiri"
20
+ gem.add_development_dependency "rspec"
21
+ gem.add_development_dependency "ZenTest"
22
+ end
@@ -0,0 +1,26 @@
1
+ <?xml version='1.0' encoding='UTF-8'?>
2
+ <webstore>
3
+ <name>Amazon</name>
4
+ <products>
5
+ <product>
6
+ <uid>FFCF177</uid>
7
+ <name>Kindle</name>
8
+ <description>The world's best-selling e-reader.</description>
9
+ <price>$109</price>
10
+ <images>
11
+ <thumb>http://amazon.com/kindle_thumb.jpg</thumb>
12
+ <large>http://amazon.com/kindle.jpg</large>
13
+ </images>
14
+ </product>
15
+ <product>
16
+ <uid>YD26NT</uid>
17
+ <name>Kindle Touch</name>
18
+ <description>Simple-to-use touchscreen with built-in WIFI.</description>
19
+ <price>$79</price>
20
+ <images>
21
+ <thumb>http://amazon.com/kindle_touch_thumb.jpg</thumb>
22
+ <large>http://amazon.com/kindle_touch.jpg</large>
23
+ </images>
24
+ </product>
25
+ </products>
26
+ </webstore>
@@ -0,0 +1,5 @@
1
+ module FixturesHelper
2
+ def fixture_file(filename)
3
+ File.expand_path(File.join(File.dirname(__FILE__), 'fixtures', filename))
4
+ end
5
+ end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe Saxy::Element do
4
+ let(:element) { Saxy::Element.new }
5
+
6
+ it "should not append empty string as value" do
7
+ element.append_value("")
8
+ element.value.should be_nil
9
+ end
10
+
11
+ it "should append stripped value" do
12
+ element.append_value(" foo ")
13
+ element.append_value(" bar ")
14
+ element.value.should == "foobar"
15
+ end
16
+
17
+ it "should dump as string when no attributes are set" do
18
+ element.stub(:value).and_return("foo")
19
+ element.as_object.should == "foo"
20
+ end
21
+
22
+ it "should dump as object when attributes are set" do
23
+ element.stub(:attributes).and_return("foo" => 1, "bar" => 2)
24
+ object = element.as_object
25
+
26
+ object.foo.should == 1
27
+ object.bar.should == 2
28
+ end
29
+
30
+ it "should add attributes under underscored names" do
31
+ element.set_attribute("FooBar", "baz")
32
+ element.as_object.foo_bar.should == "baz"
33
+ end
34
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ describe Saxy::Parser do
4
+ include FixturesHelper
5
+
6
+ let(:parser) { Saxy::Parser.new(fixture_file("webstore.xml"), "product") }
7
+
8
+ it "should have empty tag stack" do
9
+ parser.tags.should == %w( )
10
+ end
11
+
12
+ it "should push/pop tag names on/from tag stack when going down/up the XML tree" do
13
+ parser.tags.should == %w( )
14
+
15
+ parser.start_element('webstore')
16
+ parser.tags.should == %w( webstore )
17
+
18
+ parser.start_element('products')
19
+ parser.tags.should == %w( webstore products )
20
+
21
+ parser.start_element('product')
22
+ parser.tags.should == %w( webstore products product )
23
+
24
+ parser.end_element('product')
25
+ parser.tags.should == %w( webstore products )
26
+
27
+ parser.end_element('products')
28
+ parser.tags.should == %w( webstore )
29
+
30
+ parser.end_element('webstore')
31
+ parser.tags.should == %w( )
32
+ end
33
+
34
+ context "when detecting object tag opening" do
35
+ before do
36
+ parser.start_element("product")
37
+ end
38
+
39
+ it "should add new element to stack" do
40
+ parser.elements.size.should == 1
41
+ end
42
+ end
43
+
44
+ context "when detecting other tag opening" do
45
+ before do
46
+ parser.start_element("other")
47
+ end
48
+
49
+ it "should not add new element to stack" do
50
+ parser.elements.should be_empty
51
+ end
52
+ end
53
+
54
+ context "with non-empty element stack" do
55
+ before do
56
+ parser.start_element("product")
57
+ parser.elements.should_not be_empty
58
+ end
59
+
60
+ context "when detecting object tag opening" do
61
+ before do
62
+ parser.start_element("product")
63
+ end
64
+
65
+ it "should add new element to stack" do
66
+ parser.elements.size.should == 2
67
+ end
68
+ end
69
+
70
+ context "when detecting other tag opening" do
71
+ before do
72
+ parser.start_element("other")
73
+ end
74
+
75
+ it "should not add new element to stack" do
76
+ parser.elements.size.should == 2
77
+ end
78
+ end
79
+
80
+ context "when detecting any tag closing" do
81
+ before do
82
+ parser.end_element("any")
83
+ end
84
+
85
+ it "should pop element from stack" do
86
+ parser.elements.should be_empty
87
+ end
88
+ end
89
+
90
+ context "with callback defined" do
91
+ before do
92
+ @callback = lambda { |object| object }
93
+ parser.stub(:callback).and_return(@callback)
94
+ end
95
+
96
+ it "should yield the object inside the callback after detecting object tag closing" do
97
+ @callback.should_receive(:call).with(parser.current_element.as_object)
98
+ parser.end_element("product")
99
+ end
100
+
101
+ it "should not yield the object inside the callback after detecting other tag closing" do
102
+ parser.start_element("other")
103
+ @callback.should_not_receive(:call)
104
+ parser.end_element("other")
105
+ end
106
+ end
107
+
108
+ it "should append cdata block's contents to top element's value when detecting cdata block" do
109
+ parser.current_element.should_receive(:append_value).with("foo")
110
+ parser.cdata_block("foo")
111
+ end
112
+
113
+ it "should append characters to top element's value when detecting characters block" do
114
+ parser.current_element.should_receive(:append_value).with("foo")
115
+ parser.current_element.should_receive(:append_value).with("bar")
116
+ parser.characters("foo")
117
+ parser.characters("bar")
118
+ end
119
+
120
+ it "should set element's attribute after processing tags" do
121
+ element = parser.current_element
122
+
123
+ element.should_receive(:set_attribute).with("foo", "bar")
124
+
125
+ parser.start_element("foo")
126
+ parser.characters("bar")
127
+ parser.end_element("foo")
128
+ end
129
+ end
130
+
131
+ it "should raise Saxy::ParsingError on error" do
132
+ lambda { parser.error("Error message.") }.should raise_error(Saxy::ParsingError, "Error message.")
133
+ end
134
+
135
+ it "should return Enumerator when calling #each without a block", :unless => RUBY_1_8 do
136
+ parser.each.should be_instance_of Enumerator
137
+ end
138
+
139
+ it "should return Enumerator when calling #each without a block", :if => RUBY_1_8 do
140
+ parser.each.should be_instance_of Enumerable::Enumerator
141
+ end
142
+ end
data/spec/saxy_spec.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe Saxy do
4
+ include FixturesHelper
5
+
6
+ it "should find object definitions in XML file and yield them as Ruby objects" do
7
+ products = Saxy.parse(fixture_file("webstore.xml"), "product").inject([]) do |arr, product|
8
+ arr << product
9
+ arr
10
+ end
11
+
12
+ products[0].uid.should == "FFCF177"
13
+ products[0].name.should == "Kindle"
14
+ products[0].description.should == "The world's best-selling e-reader."
15
+ products[0].price.should == "$109"
16
+ products[0].images.thumb.should == "http://amazon.com/kindle_thumb.jpg"
17
+ products[0].images.large.should == "http://amazon.com/kindle.jpg"
18
+
19
+ products[1].uid.should == "YD26NT"
20
+ products[1].name.should == "Kindle Touch"
21
+ products[1].description.should == "Simple-to-use touchscreen with built-in WIFI."
22
+ products[1].price.should == "$79"
23
+ products[1].images.thumb.should == "http://amazon.com/kindle_touch_thumb.jpg"
24
+ products[1].images.large.should == "http://amazon.com/kindle_touch.jpg"
25
+ end
26
+
27
+ it "should return Enumerator when calling #parse without a block", :unless => RUBY_1_8 do
28
+ Saxy.parse(fixture_file("webstore.xml"), "product").each.should be_instance_of Enumerator
29
+ end
30
+
31
+ it "should return Enumerator when calling #parse without a block", :if => RUBY_1_8 do
32
+ Saxy.parse(fixture_file("webstore.xml"), "product").each.should be_instance_of Enumerable::Enumerator
33
+ end
34
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'saxy'
4
+
5
+ require 'fixtures_helper'
6
+
7
+ RUBY_1_8 = (RUBY_VERSION =~ /^1\.8/)
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: saxy
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.1
6
+ platform: ruby
7
+ authors:
8
+ - "Micha\xC5\x82 Szajbe"
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-08-17 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: rspec
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: ZenTest
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :development
58
+ version_requirements: *id004
59
+ description: Saxy finds object definitions in XML files and translates them into Ruby objects. It uses SAX parser under the hood, which means that it doesn't load the whole XML file into memory. It goes once though it and yields objects along the way.
60
+ email:
61
+ - michal.szajbe@gmail.com
62
+ executables: []
63
+
64
+ extensions: []
65
+
66
+ extra_rdoc_files: []
67
+
68
+ files:
69
+ - .gitignore
70
+ - .rspec
71
+ - .travis.yml
72
+ - Gemfile
73
+ - LICENSE
74
+ - README.md
75
+ - Rakefile
76
+ - lib/saxy.rb
77
+ - lib/saxy/element.rb
78
+ - lib/saxy/parser.rb
79
+ - lib/saxy/parsing_error.rb
80
+ - lib/saxy/version.rb
81
+ - saxy.gemspec
82
+ - spec/fixtures/webstore.xml
83
+ - spec/fixtures_helper.rb
84
+ - spec/saxy/element_spec.rb
85
+ - spec/saxy/parser_spec.rb
86
+ - spec/saxy_spec.rb
87
+ - spec/spec_helper.rb
88
+ homepage: http://github.com/monterail/saxy
89
+ licenses: []
90
+
91
+ post_install_message:
92
+ rdoc_options: []
93
+
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: "0"
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: "0"
108
+ requirements: []
109
+
110
+ rubyforge_project:
111
+ rubygems_version: 1.8.24
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Memory-efficient XML parser. Finds object definitions and translates them into Ruby objects.
115
+ test_files:
116
+ - spec/fixtures/webstore.xml
117
+ - spec/fixtures_helper.rb
118
+ - spec/saxy/element_spec.rb
119
+ - spec/saxy/parser_spec.rb
120
+ - spec/saxy_spec.rb
121
+ - spec/spec_helper.rb