sax-machine 0.1.0 → 0.2.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ .idea
2
+ .bundle
3
+ *.gem
4
+ Gemfile.lock
5
+ .rvmrc
6
+ .DS_STORE
7
+ pkg/
8
+ coverage/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - rbx-18mode
7
+ - rbx-19mode
8
+ notifications:
9
+ irc: "irc.freenode.org#sax-machine"
data/Gemfile CHANGED
@@ -1,4 +1,10 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem 'nokogiri', '>= 1.4.4'
4
- gem 'rspec', '>= 2.6.0'
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rake'
7
+ gem 'guard-rspec'
8
+ gem 'growl', :require => false
9
+ gem 'simplecov', :require => false, :platforms => :mri_19
10
+ end
@@ -0,0 +1,5 @@
1
+ guard 'rspec', :version => 2 do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
@@ -0,0 +1,13 @@
1
+ # 0.2.0.rc1
2
+ * Tried to reduce the number of instances of respond_to? in the code by
3
+ pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
4
+ * The parse stack is now composed of simple objects instead of it being
5
+ an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
6
+ * Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
7
+ * Cleaned up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
8
+ * Encapsulated stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
9
+ * #cdata_block is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
10
+
11
+ # 0.1.0
12
+ * rename parent to ancestor
13
+ * added SAXMachine.configure
@@ -1,16 +1,13 @@
1
- h1. SAX Machine
1
+ # SAX Machine [![Build Status](https://secure.travis-ci.org/pauldix/sax-machine.png?branch=master)](http://travis-ci.org/pauldix/sax-machine)
2
2
 
3
- "http://github.com/pauldix/sax-machine/wikis":http://github.com/pauldix/sax-machine/wikis
3
+ [Wiki](https://github.com/pauldix/sax-machine/wiki)
4
4
 
5
- "http://github.com/pauldix/sax-machine/tree/master":http://github.com/pauldix/sax-machine/tree/master
6
-
7
- h2. Description
5
+ ## Description
8
6
 
9
7
  A declarative SAX parsing library backed by Nokogiri
10
8
 
11
- h2. Usage
12
-
13
- <pre>
9
+ ## Usage
10
+ ```ruby
14
11
  require 'sax-machine'
15
12
 
16
13
  # Class for information associated with content parts in a feed.
@@ -69,26 +66,26 @@ end
69
66
  response = SomeServiceResponse.parse("<response><message>hi</message><message>world</message></response>")
70
67
  response.messages.first # => "hi"
71
68
  response.messages.last # => "world"
72
- </pre>
73
69
 
74
70
  # To limit conflicts in the class used for mappping, you can use the alternate SAXMachine.configure syntax
75
71
 
76
72
  class X < ActiveRecord::Base
77
-
78
73
  # this way no element, elements or ancestor method will be added to X
79
74
  SAXMachine.configure(X) do |c|
80
75
  c.element :title
81
76
  end
82
-
83
77
  end
78
+ ```
84
79
 
85
- h2. LICENSE
80
+ ## LICENSE
86
81
 
87
- (The MIT License)
82
+ The MIT License
88
83
 
89
- Copyright (c) 2009 - 2011:
84
+ Copyright (c) 2009-2012:
90
85
 
91
- "Paul Dix":http://pauldix.net
86
+ * [Paul Dix](http://www.pauldix.net)
87
+ * [Julien Kirch](http://www.archiloque.net)
88
+ * [Ezekiel Templin](http://zeke.templ.in)
92
89
 
93
90
  Permission is hereby granted, free of charge, to any person obtaining
94
91
  a copy of this software and associated documentation files (the
data/Rakefile CHANGED
@@ -1,21 +1,6 @@
1
- require "rspec/core/rake_task"
2
-
3
- $LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
4
- require 'sax-machine'
5
-
6
- desc "Run all specs"
7
- RSpec::Core::RakeTask.new do |t|
8
- t.rspec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
9
- end
10
-
11
- task :default => [:spec]
12
-
13
- task :test do
14
- sh 'rspec spec'
15
- end
16
-
17
- task :install do
18
- rm_rf "*.gem"
19
- puts `gem build sax-machine.gemspec`
20
- puts `sudo gem install sax-machine-#{SAXMachine::VERSION}.gem`
21
- end
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :test => :spec
6
+ task :default => :test
@@ -1,12 +1,8 @@
1
- require "rubygems"
2
-
3
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
-
1
+ require "sax-machine/version"
5
2
  require "sax-machine/sax_document"
6
3
  require "sax-machine/sax_configure"
7
4
  require "sax-machine/sax_handler"
8
5
  require "sax-machine/sax_config"
9
6
 
10
7
  module SAXMachine
11
- VERSION = "0.0.16"
12
8
  end
@@ -1,107 +1,191 @@
1
1
  require "nokogiri"
2
+ require "time"
2
3
 
3
4
  module SAXMachine
4
5
  class SAXHandler < Nokogiri::XML::SAX::Document
5
- attr_reader :stack
6
+ NO_BUFFER = :no_buffer
7
+
8
+ class StackNode < Struct.new(:object, :config, :buffer)
9
+ def initialize(object, config = nil, buffer = NO_BUFFER)
10
+ self.object = object
11
+ self.config = config
12
+ self.buffer = buffer
13
+ end
14
+ end
6
15
 
7
16
  def initialize(object, on_error = nil, on_warning = nil)
8
- @stack = [[object, nil, String.new]]
17
+ @stack = [ StackNode.new(object) ]
9
18
  @parsed_configs = {}
10
19
  @on_error = on_error
11
20
  @on_warning = on_warning
12
21
  end
13
22
 
14
- def characters(string)
15
- object, config, value = stack.last
16
- value << string
17
- end
23
+ def characters(data)
24
+ node = stack.last
18
25
 
19
- def cdata_block(string)
20
- characters(string)
26
+ if node.buffer == NO_BUFFER
27
+ node.buffer = data.dup
28
+ else
29
+ node.buffer << data
30
+ end
21
31
  end
32
+ alias cdata_block characters
22
33
 
23
34
  def start_element(name, attrs = [])
24
35
  attrs.flatten!
25
- object, config, value = stack.last
26
- sax_config = object.class.respond_to?(:sax_config) ? object.class.sax_config : nil
36
+
37
+ name = normalize_name(name)
38
+ node = stack.last
39
+ object = node.object
40
+
41
+ sax_config = sax_config_for(object)
27
42
 
28
43
  if sax_config
29
44
  if collection_config = sax_config.collection_config(name, attrs)
30
- stack.push [object = collection_config.data_class.new, collection_config, String.new]
31
- object, sax_config, is_collection = object, object.class.sax_config, true
45
+ object = collection_config.data_class.new
46
+ sax_config = sax_config_for(object)
32
47
 
33
- if (attribute_config = object.class.respond_to?(:sax_config) && object.class.sax_config.attribute_configs_for_element(attrs))
34
- attribute_config.each { |ac| object.send(ac.setter, ac.value_from_attrs(attrs)) }
35
- end
48
+ stack.push(StackNode.new(object, collection_config))
49
+
50
+ set_attributes_on(object, attrs)
36
51
  end
52
+
37
53
  sax_config.element_configs_for_attribute(name, attrs).each do |ec|
38
54
  unless parsed_config?(object, ec)
39
55
  object.send(ec.setter, ec.value_from_attrs(attrs))
40
56
  mark_as_parsed(object, ec)
41
57
  end
42
58
  end
43
- if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
44
- new_object = element_config.data_class ? element_config.data_class.new : object
45
- stack.push [new_object, element_config, String.new]
46
59
 
47
- if (attribute_config = new_object.class.respond_to?(:sax_config) && new_object.class.sax_config.attribute_configs_for_element(attrs))
48
- attribute_config.each { |ac| new_object.send(ac.setter, ac.value_from_attrs(attrs)) }
49
- end
60
+ if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
61
+ new_object =
62
+ case element_config.data_class.to_s
63
+ when 'Integer' then 0
64
+ when 'Float' then 0.0
65
+ when 'Time' then Time.at(0)
66
+ when '' then object
67
+ else
68
+ element_config.data_class.new
69
+ end
70
+
71
+ stack.push(StackNode.new(new_object, element_config))
72
+
73
+ set_attributes_on(new_object, attrs)
50
74
  end
51
75
  end
52
76
  end
53
77
 
54
78
  def end_element(name)
55
- (object, tag_config, _), (element, config, value) = stack[-2..-1]
56
- return unless stack.size > 1 && config && config.name.to_s == name.to_s
79
+ name = normalize_name(name)
80
+
81
+ start_tag = stack[-2]
82
+ close_tag = stack[-1]
83
+
84
+ return unless start_tag && close_tag
85
+
86
+ object = start_tag.object
87
+ element = close_tag.object
88
+ config = close_tag.config
89
+ value = close_tag.buffer
90
+
91
+ return unless config.name == name
57
92
 
58
93
  unless parsed_config?(object, config)
59
- if (element_value_config = config.data_class.respond_to?(:sax_config) && config.data_class.sax_config.element_values_for_element)
94
+ if (element_value_config = element_values_for(config))
60
95
  element_value_config.each { |evc| element.send(evc.setter, value) }
61
96
  end
62
97
 
63
98
  if config.respond_to?(:accessor)
64
- subconfig = element.class.sax_config if element.class.respond_to?(:sax_config)
99
+ subconfig = sax_config_for(element)
100
+
65
101
  if econf = subconfig.element_config_for_tag(name, [])
66
102
  element.send(econf.setter, value) unless econf.value_configured?
67
103
  end
104
+
68
105
  object.send(config.accessor) << element
69
106
  else
70
- value = config.data_class ? element : value
71
- object.send(config.setter, value) unless value == ""
107
+ value =
108
+ case config.data_class.to_s
109
+ when 'String' then value.to_s
110
+ when 'Integer' then value.to_i
111
+ when 'Float' then value.to_f
112
+ # Assumes that time elements will be string-based and are not
113
+ # something else, e.g. seconds since epoch
114
+ when 'Time' then Time.parse(value.to_s)
115
+ when '' then value
116
+ else
117
+ element
118
+ end
119
+
120
+ object.send(config.setter, value) unless value == NO_BUFFER
121
+
72
122
  mark_as_parsed(object, config)
73
123
  end
74
124
 
75
125
  # try to set the ancestor
76
- sax_config = element.class.respond_to?(:sax_config) ? element.class.sax_config : nil
77
- if sax_config
126
+ if (sax_config = sax_config_for(element))
78
127
  sax_config.ancestors.each do |ancestor|
79
128
  element.send(ancestor.setter, object)
80
129
  end
81
130
  end
82
131
  end
132
+
83
133
  stack.pop
84
134
  end
85
135
 
136
+ private
137
+
86
138
  def mark_as_parsed(object, element_config)
87
- @parsed_configs[[object.object_id, element_config.object_id]] = true unless element_config.collection?
139
+ unless element_config.collection?
140
+ @parsed_configs[[object.object_id, element_config.object_id]] = true
141
+ end
88
142
  end
89
143
 
90
144
  def parsed_config?(object, element_config)
91
145
  @parsed_configs[[object.object_id, element_config.object_id]]
92
146
  end
93
147
 
94
- def warning string
148
+ def warning(string)
95
149
  if @on_warning
96
150
  @on_warning.call(string)
97
151
  end
98
152
  end
99
153
 
100
- def error string
154
+ def error(string)
101
155
  if @on_error
102
156
  @on_error.call(string)
103
157
  end
104
158
  end
105
159
 
160
+
161
+ def sax_config_for(object)
162
+ if object.class.respond_to?(:sax_config)
163
+ object.class.sax_config
164
+ end
165
+ end
166
+
167
+ def element_values_for(config)
168
+ if config.data_class.respond_to?(:sax_config)
169
+ config.data_class.sax_config.element_values_for_element
170
+ end
171
+ end
172
+
173
+ def normalize_name(name)
174
+ name.gsub(/\-/, '_')
175
+ end
176
+
177
+ def set_attributes_on(object, attributes)
178
+ config = sax_config_for(object)
179
+
180
+ if config
181
+ config.attribute_configs_for_element(attributes).each do |ac|
182
+ object.send(ac.setter, ac.value_from_attrs(attributes))
183
+ end
184
+ end
185
+ end
186
+
187
+ def stack
188
+ @stack
189
+ end
106
190
  end
107
191
  end
@@ -0,0 +1,3 @@
1
+ module SAXMachine
2
+ VERSION = "0.2.0.rc1"
3
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sax-machine/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'sax-machine'
6
+ s.version = SAXMachine::VERSION
7
+
8
+ s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
9
+ s.date = Date.today
10
+ s.email = %q{paul@pauldix.net}
11
+ s.homepage = %q{http://github.com/pauldix/sax-machine}
12
+
13
+ s.summary = %q{Declarative SAX Parsing with Nokogiri}
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.require_paths = ["lib"]
18
+
19
+ s.platform = Gem::Platform::RUBY
20
+
21
+ s.add_dependency 'nokogiri', "~> 1.5.2"
22
+ s.add_development_dependency "rspec", "~> 2.10.0"
23
+ end
@@ -0,0 +1,40 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ItemSearchResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
3
+ <OperationRequest>
4
+ <HTTPHeaders>
5
+ <Header Name="UserAgent">
6
+ </Header>
7
+ </HTTPHeaders>
8
+ <RequestId>16WRJBVEM155Q026KCV1</RequestId>
9
+ <Arguments>
10
+ <Argument Name="SearchIndex" Value="Books"></Argument>
11
+ <Argument Name="Service" Value="AWSECommerceService"></Argument>
12
+ <Argument Name="Title" Value="Ruby on Rails"></Argument>
13
+ <Argument Name="Operation" Value="ItemSearch"></Argument>
14
+ <Argument Name="AWSAccessKeyId" Value="dontbeaswoosh"></Argument>
15
+ </Arguments>
16
+ <RequestProcessingTime>0.064924955368042</RequestProcessingTime>
17
+ </OperationRequest>
18
+ <Items>
19
+ <Request>
20
+ <IsValid>True</IsValid>
21
+ <ItemSearchRequest>
22
+ <SearchIndex>Books</SearchIndex>
23
+ <Title>Ruby on Rails</Title>
24
+ </ItemSearchRequest>
25
+ </Request>
26
+ <TotalResults>22</TotalResults>
27
+ <TotalPages>3</TotalPages>
28
+ <Item>
29
+ <ASIN>0321480791</ASIN>
30
+ <DetailPageURL>http://www.amazon.com/gp/redirect.html%3FASIN=0321480791%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0321480791%253FSubscriptionId=dontbeaswoosh</DetailPageURL>
31
+ <ItemAttributes>
32
+ <Author>Michael Hartl</Author>
33
+ <Author>Aurelius Prochazka</Author>
34
+ <Manufacturer>Addison-Wesley Professional</Manufacturer>
35
+ <ProductGroup>Book</ProductGroup>
36
+ <Title>RailsSpace: Building a Social Networking Website with Ruby on Rails (Addison-Wesley Professional Ruby Series)</Title>
37
+ </ItemAttributes>
38
+ </Item>
39
+ </Items>
40
+ </ItemSearchResponse>