sax-machine 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +14 -4
- data/Gemfile +5 -1
- data/Guardfile +2 -2
- data/HISTORY.md +23 -6
- data/README.md +111 -40
- data/Rakefile +4 -3
- data/lib/sax-machine.rb +11 -2
- data/lib/sax-machine/{sax_ancestor_config.rb → config/sax_ancestor.rb} +3 -7
- data/lib/sax-machine/{sax_attribute_config.rb → config/sax_attribute.rb} +4 -6
- data/lib/sax-machine/{sax_collection_config.rb → config/sax_collection.rb} +6 -10
- data/lib/sax-machine/{sax_element_config.rb → config/sax_element.rb} +16 -17
- data/lib/sax-machine/{sax_element_value_config.rb → config/sax_element_value.rb} +5 -7
- data/lib/sax-machine/{sax_handler.rb → handlers/sax_abstract_handler.rb} +28 -32
- data/lib/sax-machine/handlers/sax_nokogiri_handler.rb +16 -0
- data/lib/sax-machine/handlers/sax_ox_handler.rb +41 -0
- data/lib/sax-machine/sax_config.rb +9 -9
- data/lib/sax-machine/sax_configure.rb +1 -6
- data/lib/sax-machine/sax_document.rb +28 -17
- data/lib/sax-machine/version.rb +2 -2
- data/sax-machine.gemspec +8 -11
- data/spec/fixtures/atom-content.html +15 -0
- data/spec/{sax-machine → fixtures}/atom.xml +0 -0
- data/spec/sax-machine/sax_activerecord_spec.rb +23 -0
- data/spec/sax-machine/sax_configure_spec.rb +48 -0
- data/spec/sax-machine/sax_document_spec.rb +333 -280
- data/spec/sax-machine/sax_include_spec.rb +43 -0
- data/spec/spec_helper.rb +11 -2
- metadata +36 -41
- data/spec/benchmarks/amazon.xml +0 -40
- data/spec/benchmarks/benchmark.rb +0 -158
- data/spec/benchmarks/public_timeline.xml +0 -411
- data/spec/sax-machine/configure_sax_machine_spec.rb +0 -53
- data/spec/sax-machine/include_sax_machine_spec.rb +0 -42
@@ -1,13 +1,12 @@
|
|
1
1
|
module SAXMachine
|
2
2
|
class SAXConfig
|
3
|
-
|
4
3
|
class ElementValueConfig
|
5
4
|
attr_reader :name, :setter
|
6
5
|
|
7
6
|
def initialize(name, options)
|
8
|
-
@name
|
9
|
-
@as
|
10
|
-
@setter
|
7
|
+
@name = name.to_s
|
8
|
+
@as = options[:as]
|
9
|
+
@setter = "#{@as}="
|
11
10
|
@required = options[:required]
|
12
11
|
end
|
13
12
|
|
@@ -16,9 +15,8 @@ module SAXMachine
|
|
16
15
|
end
|
17
16
|
|
18
17
|
def required?
|
19
|
-
|
18
|
+
!!@required
|
20
19
|
end
|
21
20
|
end
|
22
|
-
|
23
21
|
end
|
24
|
-
end
|
22
|
+
end
|
@@ -1,8 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require "time"
|
1
|
+
require 'time'
|
3
2
|
|
4
3
|
module SAXMachine
|
5
|
-
|
4
|
+
module SAXAbstractHandler
|
6
5
|
NO_BUFFER = :no_buffer
|
7
6
|
|
8
7
|
class StackNode < Struct.new(:object, :config, :buffer)
|
@@ -13,14 +12,14 @@ module SAXMachine
|
|
13
12
|
end
|
14
13
|
end
|
15
14
|
|
16
|
-
def
|
15
|
+
def _initialize(object, on_error = nil, on_warning = nil)
|
17
16
|
@stack = [ StackNode.new(object) ]
|
18
17
|
@parsed_configs = {}
|
19
18
|
@on_error = on_error
|
20
19
|
@on_warning = on_warning
|
21
20
|
end
|
22
21
|
|
23
|
-
def
|
22
|
+
def _characters(data)
|
24
23
|
node = stack.last
|
25
24
|
|
26
25
|
if node.buffer == NO_BUFFER
|
@@ -29,10 +28,8 @@ module SAXMachine
|
|
29
28
|
node.buffer << data
|
30
29
|
end
|
31
30
|
end
|
32
|
-
alias cdata_block characters
|
33
|
-
|
34
|
-
def start_element(name, attrs = [])
|
35
31
|
|
32
|
+
def _start_element(name, attrs = [])
|
36
33
|
name = normalize_name(name)
|
37
34
|
node = stack.last
|
38
35
|
object = node.object
|
@@ -61,10 +58,10 @@ module SAXMachine
|
|
61
58
|
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
62
59
|
new_object =
|
63
60
|
case element_config.data_class.to_s
|
64
|
-
when
|
65
|
-
when
|
66
|
-
when
|
67
|
-
when
|
61
|
+
when "Integer" then 0
|
62
|
+
when "Float" then 0.0
|
63
|
+
when "Time" then Time.at(0)
|
64
|
+
when "" then object
|
68
65
|
else
|
69
66
|
element_config.data_class.new
|
70
67
|
end
|
@@ -76,7 +73,7 @@ module SAXMachine
|
|
76
73
|
end
|
77
74
|
end
|
78
75
|
|
79
|
-
def
|
76
|
+
def _end_element(name)
|
80
77
|
name = normalize_name(name)
|
81
78
|
|
82
79
|
start_tag = stack[-2]
|
@@ -107,13 +104,13 @@ module SAXMachine
|
|
107
104
|
else
|
108
105
|
value =
|
109
106
|
case config.data_class.to_s
|
110
|
-
when
|
111
|
-
when
|
112
|
-
when
|
107
|
+
when "String" then value.to_s
|
108
|
+
when "Integer" then value.to_i
|
109
|
+
when "Float" then value.to_f
|
113
110
|
# Assumes that time elements will be string-based and are not
|
114
111
|
# something else, e.g. seconds since epoch
|
115
|
-
when
|
116
|
-
when
|
112
|
+
when "Time" then Time.parse(value.to_s)
|
113
|
+
when "" then value
|
117
114
|
else
|
118
115
|
element
|
119
116
|
end
|
@@ -134,30 +131,29 @@ module SAXMachine
|
|
134
131
|
stack.pop
|
135
132
|
end
|
136
133
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
unless element_config.collection?
|
141
|
-
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
134
|
+
def _error(string)
|
135
|
+
if @on_error
|
136
|
+
@on_error.call(string)
|
142
137
|
end
|
143
138
|
end
|
144
139
|
|
145
|
-
def
|
146
|
-
@parsed_configs[[object.object_id, element_config.object_id]]
|
147
|
-
end
|
148
|
-
|
149
|
-
def warning(string)
|
140
|
+
def _warning(string)
|
150
141
|
if @on_warning
|
151
142
|
@on_warning.call(string)
|
152
143
|
end
|
153
144
|
end
|
154
145
|
|
155
|
-
|
156
|
-
|
157
|
-
|
146
|
+
private
|
147
|
+
|
148
|
+
def mark_as_parsed(object, element_config)
|
149
|
+
unless element_config.collection?
|
150
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
158
151
|
end
|
159
152
|
end
|
160
153
|
|
154
|
+
def parsed_config?(object, element_config)
|
155
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
156
|
+
end
|
161
157
|
|
162
158
|
def sax_config_for(object)
|
163
159
|
if object.class.respond_to?(:sax_config)
|
@@ -172,7 +168,7 @@ module SAXMachine
|
|
172
168
|
end
|
173
169
|
|
174
170
|
def normalize_name(name)
|
175
|
-
name.gsub(/\-/,
|
171
|
+
name.gsub(/\-/, "_")
|
176
172
|
end
|
177
173
|
|
178
174
|
def set_attributes_on(object, attributes)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'sax-machine/handlers/sax_abstract_handler'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXNokogiriHandler < Nokogiri::XML::SAX::Document
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
alias_method :initialize, :_initialize
|
9
|
+
alias_method :characters, :_characters
|
10
|
+
alias_method :cdata_block, :_characters
|
11
|
+
alias_method :start_element, :_start_element
|
12
|
+
alias_method :end_element, :_end_element
|
13
|
+
alias_method :error, :_error
|
14
|
+
alias_method :warning, :_warning
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'sax-machine/handlers/sax_abstract_handler'
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXOxHandler < Ox::Sax
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
_reset_element
|
11
|
+
end
|
12
|
+
|
13
|
+
def attr(name, str)
|
14
|
+
@attrs[name] = str
|
15
|
+
end
|
16
|
+
|
17
|
+
def attrs_done
|
18
|
+
_start_element(@element, @attrs)
|
19
|
+
_reset_element
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_element(name)
|
23
|
+
@element = name
|
24
|
+
end
|
25
|
+
|
26
|
+
def error(message, line, column)
|
27
|
+
_error("#{message} on line #{line} column #{column}")
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :text, :_characters
|
31
|
+
alias_method :cdata, :_characters
|
32
|
+
alias_method :end_element, :_end_element
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def _reset_element
|
37
|
+
@attrs = {}
|
38
|
+
@element = ""
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -1,17 +1,16 @@
|
|
1
|
-
require "sax-machine/
|
2
|
-
require "sax-machine/
|
3
|
-
require "sax-machine/
|
4
|
-
require "sax-machine/
|
5
|
-
require "sax-machine/
|
1
|
+
require "sax-machine/config/sax_attribute"
|
2
|
+
require "sax-machine/config/sax_element_value"
|
3
|
+
require "sax-machine/config/sax_element"
|
4
|
+
require "sax-machine/config/sax_collection"
|
5
|
+
require "sax-machine/config/sax_ancestor"
|
6
6
|
|
7
7
|
module SAXMachine
|
8
8
|
class SAXConfig
|
9
|
-
|
10
9
|
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
11
10
|
|
12
11
|
def initialize
|
13
12
|
# Default value is an empty array
|
14
|
-
@top_level_elements
|
13
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
15
14
|
@top_level_attributes = []
|
16
15
|
@top_level_element_value = []
|
17
16
|
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
@@ -19,11 +18,12 @@ module SAXMachine
|
|
19
18
|
end
|
20
19
|
|
21
20
|
def columns
|
22
|
-
@top_level_elements.map {|name, ecs| ecs }.flatten
|
21
|
+
@top_level_elements.map { |name, ecs| ecs }.flatten
|
23
22
|
end
|
24
23
|
|
25
24
|
def initialize_copy(sax_config)
|
26
25
|
super
|
26
|
+
|
27
27
|
@top_level_elements = sax_config.top_level_elements.clone
|
28
28
|
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
29
|
@top_level_element_value = sax_config.top_level_element_value.clone
|
@@ -71,4 +71,4 @@ module SAXMachine
|
|
71
71
|
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
72
72
|
end
|
73
73
|
end
|
74
|
-
end
|
74
|
+
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module SAXMachine
|
2
|
-
|
3
2
|
def self.configure(clazz)
|
4
3
|
extended_clazz = Class.new(clazz)
|
5
4
|
extended_clazz.send(:include, SAXMachine)
|
@@ -18,11 +17,9 @@ module SAXMachine
|
|
18
17
|
(class << clazz;self;end).send(:define_method, :parse) do |xml_text|
|
19
18
|
extended_clazz.parse(xml_text)
|
20
19
|
end
|
21
|
-
|
22
20
|
end
|
23
21
|
|
24
22
|
module LightWeightSaxMachine
|
25
|
-
|
26
23
|
attr_writer :sax_config
|
27
24
|
|
28
25
|
def sax_config
|
@@ -32,7 +29,5 @@ module SAXMachine
|
|
32
29
|
def inherited(subclass)
|
33
30
|
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
34
31
|
end
|
35
|
-
|
36
32
|
end
|
37
|
-
|
38
|
-
end
|
33
|
+
end
|
@@ -1,18 +1,30 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module SAXMachine
|
4
|
-
|
5
4
|
def self.included(base)
|
6
5
|
base.send(:include, InstanceMethods)
|
7
|
-
base.extend
|
6
|
+
base.extend(ClassMethods)
|
8
7
|
end
|
9
8
|
|
10
9
|
def parse(xml_text, on_error = nil, on_warning = nil)
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
if SAXMachine.handler == :ox
|
11
|
+
Ox.sax_parse(
|
12
|
+
SAXOxHandler.new(self, on_error, on_warning),
|
13
|
+
StringIO.new(xml_text),
|
14
|
+
{
|
15
|
+
symbolize: false,
|
16
|
+
convert_special: true,
|
17
|
+
skip: :skip_return,
|
18
|
+
}
|
19
|
+
)
|
20
|
+
else
|
21
|
+
handler = SAXNokogiriHandler.new(self, on_error, on_warning)
|
22
|
+
parser = Nokogiri::XML::SAX::Parser.new(handler)
|
23
|
+
parser.parse(xml_text) do |ctx|
|
24
|
+
ctx.replace_entities = true
|
25
|
+
end
|
15
26
|
end
|
27
|
+
|
16
28
|
self
|
17
29
|
end
|
18
30
|
|
@@ -25,13 +37,12 @@ module SAXMachine
|
|
25
37
|
end
|
26
38
|
|
27
39
|
module ClassMethods
|
28
|
-
|
29
40
|
def inherited(subclass)
|
30
41
|
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
31
42
|
end
|
32
43
|
|
33
|
-
def parse(
|
34
|
-
new.parse(
|
44
|
+
def parse(*args)
|
45
|
+
new.parse(*args)
|
35
46
|
end
|
36
47
|
|
37
48
|
def element(name, options = {})
|
@@ -42,13 +53,13 @@ module SAXMachine
|
|
42
53
|
|
43
54
|
def attribute(name, options = {})
|
44
55
|
real_name = (options[:as] ||= name).to_s
|
45
|
-
sax_config.add_top_level_attribute(self.class.to_s, options.merge(:
|
56
|
+
sax_config.add_top_level_attribute(self.class.to_s, options.merge(name: name))
|
46
57
|
create_attr real_name
|
47
58
|
end
|
48
59
|
|
49
60
|
def value(name, options = {})
|
50
61
|
real_name = (options[:as] ||= name).to_s
|
51
|
-
sax_config.add_top_level_element_value(self.class.to_s, options.merge(:
|
62
|
+
sax_config.add_top_level_element_value(self.class.to_s, options.merge(name: name))
|
52
63
|
create_attr real_name
|
53
64
|
end
|
54
65
|
|
@@ -80,6 +91,7 @@ module SAXMachine
|
|
80
91
|
|
81
92
|
def elements(name, options = {})
|
82
93
|
options[:as] ||= name
|
94
|
+
|
83
95
|
if options[:class]
|
84
96
|
sax_config.add_collection_element(name, options)
|
85
97
|
else
|
@@ -88,7 +100,7 @@ module SAXMachine
|
|
88
100
|
#{options[:as]} << value
|
89
101
|
end
|
90
102
|
SRC
|
91
|
-
sax_config.add_top_level_element(name, options.merge(:
|
103
|
+
sax_config.add_top_level_element(name, options.merge(collection: true))
|
92
104
|
end
|
93
105
|
|
94
106
|
if !method_defined?(options[:as].to_s)
|
@@ -99,7 +111,7 @@ module SAXMachine
|
|
99
111
|
SRC
|
100
112
|
end
|
101
113
|
|
102
|
-
attr_writer
|
114
|
+
attr_writer(options[:as]) unless method_defined?("#{options[:as]}=")
|
103
115
|
end
|
104
116
|
|
105
117
|
def sax_config
|
@@ -109,10 +121,9 @@ module SAXMachine
|
|
109
121
|
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
110
122
|
# this is how we allow custom parsing behavior. So you could define the setter
|
111
123
|
# and have it parse the string into a date or whatever.
|
112
|
-
def create_attr
|
113
|
-
attr_reader
|
114
|
-
attr_writer
|
124
|
+
def create_attr(real_name)
|
125
|
+
attr_reader(real_name) unless method_defined?(real_name)
|
126
|
+
attr_writer(real_name) unless method_defined?("#{real_name}=")
|
115
127
|
end
|
116
128
|
end
|
117
|
-
|
118
129
|
end
|
data/lib/sax-machine/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module SAXMachine
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.3.0"
|
3
|
+
end
|
data/sax-machine.gemspec
CHANGED
@@ -1,24 +1,21 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path(
|
2
|
+
require File.expand_path("../lib/sax-machine/version", __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
|
-
s.name
|
6
|
-
s.version
|
5
|
+
s.name = "sax-machine"
|
6
|
+
s.version = SAXMachine::VERSION
|
7
7
|
|
8
|
-
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin"]
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin", "Dmitry Krasnoukhov"]
|
9
9
|
s.email = %q{paul@pauldix.net}
|
10
10
|
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
11
|
-
|
12
|
-
s.summary = %q{Declarative SAX Parsing with Nokogiri}
|
13
|
-
|
11
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri or Ox}
|
14
12
|
s.license = %q{MIT}
|
15
13
|
|
16
14
|
s.files = `git ls-files`.split("\n")
|
17
15
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
16
|
s.require_paths = ["lib"]
|
17
|
+
s.platform = Gem::Platform::RUBY
|
19
18
|
|
20
|
-
s.
|
21
|
-
|
22
|
-
s.add_dependency 'nokogiri', "~> 1.6.0"
|
23
|
-
s.add_development_dependency "rspec", "~> 2.13.0"
|
19
|
+
s.add_dependency "nokogiri", "~> 1.6.0"
|
20
|
+
s.add_development_dependency "rspec", "~> 3.0"
|
24
21
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
<div xmlns="http://www.w3.org/1999/xhtml"><p>In my previous <a href="http://www.pauldix.net/2008/08/serializing-dat.html">post about the speed of serializing data</a>, I concluded that Marshal was the quickest way to get things done. So I set about using Marshal to store some data in an ActiveRecord object. Things worked great at first, but on some test data I got this error: marshal data too short. Luckily, <a href="http://www.brynary.com/">Bryan Helmkamp</a> had helpfully pointed out that there were sometimes problems with storing marshaled data in the database. He said it was best to base64 encode the marshal dump before storing.</p>
|
3
|
+
|
4
|
+
<p>I was curious why it was working on some things and not others. It turns out that some types of data being marshaled were causing the error to pop up. Here's the test data I used in my specs:</p>
|
5
|
+
<pre>{ :foo => 3, :bar => 2 } # hash with symbols for keys and integer values<br />[3, 2.1, 4, 8] # array with integer and float values</pre>
|
6
|
+
<p>Everything worked when I switched the array values to all integers so it seems that floats were causing the problem. However, in the interest of keeping everything working regardless of data types, I base64 encoded before going into the database and decoded on the way out.</p>
|
7
|
+
|
8
|
+
<p>I also ran the benchmarks again to determine what impact this would have on speed. Here are the results for 100 iterations on a 10k element array and a 10k element hash with and without base64 encode/decode:</p>
|
9
|
+
<pre> user system total real<br />array marshal 0.200000 0.010000 0.210000 ( 0.214018) (without Base64)<br />array marshal 0.220000 0.010000 0.230000 ( 0.250260)<br /><br />hash marshal 1.830000 0.040000 1.870000 ( 1.892874) (without Base64)<br />hash marshal 2.040000 0.100000 2.140000 ( 2.170405)</pre>
|
10
|
+
<p>As you can see the difference in speed is pretty negligible. I assume that the error has to do with AR cleaning the stuff that gets inserted into the database, but I'm not really sure. In the end it's just easier to use Base64.encode64 when serializing data into a text field in ActiveRecord using Marshal.</p>
|
11
|
+
|
12
|
+
<p>I've also read people posting about this error when using the database session store. I can only assume that it's because they were trying to store either way too much data in their session (too much for a regular text field) or they were storing float values or some other data type that would cause this to pop up. Hopefully this helps.</p></div>
|
13
|
+
<div class="feedflare">
|
14
|
+
<a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=rWfWO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=rWfWO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=RaCqo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=RaCqo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=1CBLo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=1CBLo" border="0"></img></a>
|
15
|
+
</div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/383536354" height="1" width="1"/>
|