sax-machine 0.1.0 → 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +32 -0
- data/Gemfile +13 -2
- data/Guardfile +5 -0
- data/HISTORY.md +77 -0
- data/README.md +207 -0
- data/Rakefile +6 -20
- data/lib/sax-machine/{sax_ancestor_config.rb → config/sax_ancestor.rb} +3 -7
- data/lib/sax-machine/config/sax_attribute.rb +18 -0
- data/lib/sax-machine/config/sax_collection.rb +33 -0
- data/lib/sax-machine/{sax_element_config.rb → config/sax_element.rb} +23 -31
- data/lib/sax-machine/{sax_element_value_config.rb → config/sax_element_value.rb} +7 -8
- data/lib/sax-machine/handlers/sax_abstract_handler.rb +199 -0
- data/lib/sax-machine/handlers/sax_nokogiri_handler.rb +23 -0
- data/lib/sax-machine/handlers/sax_oga_handler.rb +39 -0
- data/lib/sax-machine/handlers/sax_ox_handler.rb +56 -0
- data/lib/sax-machine/sax_config.rb +13 -9
- data/lib/sax-machine/sax_configure.rb +3 -8
- data/lib/sax-machine/sax_document.rb +79 -49
- data/lib/sax-machine/version.rb +3 -0
- data/lib/sax-machine.rb +26 -7
- data/sax-machine.gemspec +20 -0
- data/spec/fixtures/atom-content.html +15 -0
- data/spec/fixtures/atom.xml +165 -0
- data/spec/sax-machine/sax_activerecord_spec.rb +21 -0
- data/spec/sax-machine/sax_configure_spec.rb +51 -0
- data/spec/sax-machine/sax_document_spec.rb +709 -239
- data/spec/sax-machine/sax_include_spec.rb +49 -0
- data/spec/spec_helper.rb +18 -7
- metadata +71 -70
- data/README.textile +0 -110
- data/lib/sax-machine/sax_attribute_config.rb +0 -40
- data/lib/sax-machine/sax_collection_config.rb +0 -45
- data/lib/sax-machine/sax_handler.rb +0 -107
- data/spec/sax-machine/configure_sax_machine_spec.rb +0 -53
- data/spec/sax-machine/include_sax_machine_spec.rb +0 -42
@@ -0,0 +1,199 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module SAXMachine
|
4
|
+
module SAXAbstractHandler
|
5
|
+
NO_BUFFER = :no_buffer
|
6
|
+
|
7
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
8
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
9
|
+
self.object = object
|
10
|
+
self.config = config
|
11
|
+
self.buffer = buffer
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def sax_parse(xml_input)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
19
|
+
def _initialize(object, on_error = nil, on_warning = nil)
|
20
|
+
@stack = [ StackNode.new(object) ]
|
21
|
+
@parsed_configs = {}
|
22
|
+
@on_error = on_error
|
23
|
+
@on_warning = on_warning
|
24
|
+
end
|
25
|
+
|
26
|
+
def _characters(data)
|
27
|
+
node = stack.last
|
28
|
+
|
29
|
+
if node.buffer == NO_BUFFER
|
30
|
+
node.buffer = data.dup
|
31
|
+
else
|
32
|
+
node.buffer << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def _start_element(name, attrs = [])
|
37
|
+
name = normalize_name(name)
|
38
|
+
node = stack.last
|
39
|
+
object = node.object
|
40
|
+
|
41
|
+
sax_config = sax_config_for(object)
|
42
|
+
|
43
|
+
if sax_config
|
44
|
+
attrs = Hash[attrs]
|
45
|
+
|
46
|
+
if collection_config = sax_config.collection_config(name, attrs)
|
47
|
+
object = collection_config.data_class.new
|
48
|
+
sax_config = sax_config_for(object)
|
49
|
+
|
50
|
+
stack.push(StackNode.new(object, collection_config))
|
51
|
+
|
52
|
+
set_attributes_on(object, attrs)
|
53
|
+
end
|
54
|
+
|
55
|
+
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
56
|
+
unless parsed_config?(object, ec)
|
57
|
+
object.send(ec.setter, ec.value_from_attrs(attrs))
|
58
|
+
mark_as_parsed(object, ec)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
63
|
+
new_object =
|
64
|
+
case element_config.data_class.to_s
|
65
|
+
when "Integer" then 0
|
66
|
+
when "Float" then 0.0
|
67
|
+
when "Symbol" then nil
|
68
|
+
when "Time" then Time.at(0)
|
69
|
+
when "" then object
|
70
|
+
else
|
71
|
+
element_config.data_class.new
|
72
|
+
end
|
73
|
+
|
74
|
+
stack.push(StackNode.new(new_object, element_config))
|
75
|
+
|
76
|
+
set_attributes_on(new_object, attrs)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def _end_element(name)
|
82
|
+
name = normalize_name(name)
|
83
|
+
|
84
|
+
start_tag = stack[-2]
|
85
|
+
close_tag = stack[-1]
|
86
|
+
|
87
|
+
return unless start_tag && close_tag
|
88
|
+
|
89
|
+
object = start_tag.object
|
90
|
+
element = close_tag.object
|
91
|
+
config = close_tag.config
|
92
|
+
value = close_tag.buffer
|
93
|
+
|
94
|
+
return unless config.name == name
|
95
|
+
|
96
|
+
unless parsed_config?(object, config)
|
97
|
+
if (element_value_config = element_values_for(config))
|
98
|
+
element_value_config.each { |evc| element.send(evc.setter, value) }
|
99
|
+
end
|
100
|
+
|
101
|
+
if config.respond_to?(:accessor)
|
102
|
+
subconfig = sax_config_for(element)
|
103
|
+
|
104
|
+
if econf = subconfig.element_config_for_tag(name, [])
|
105
|
+
element.send(econf.setter, value) unless econf.value_configured?
|
106
|
+
end
|
107
|
+
|
108
|
+
object.send(config.accessor) << element
|
109
|
+
else
|
110
|
+
value = data_class_value(config.data_class, value) || element
|
111
|
+
object.send(config.setter, value) if value != NO_BUFFER
|
112
|
+
mark_as_parsed(object, config)
|
113
|
+
end
|
114
|
+
|
115
|
+
# try to set the ancestor
|
116
|
+
if (sax_config = sax_config_for(element))
|
117
|
+
sax_config.ancestors.each do |ancestor|
|
118
|
+
element.send(ancestor.setter, object)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
stack.pop
|
124
|
+
end
|
125
|
+
|
126
|
+
def _error(string)
|
127
|
+
if @on_error
|
128
|
+
@on_error.call(string)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def _warning(string)
|
133
|
+
if @on_warning
|
134
|
+
@on_warning.call(string)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def mark_as_parsed(object, element_config)
|
141
|
+
unless element_config.collection?
|
142
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def parsed_config?(object, element_config)
|
147
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
148
|
+
end
|
149
|
+
|
150
|
+
def sax_config_for(object)
|
151
|
+
if object.class.respond_to?(:sax_config)
|
152
|
+
object.class.sax_config
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def element_values_for(config)
|
157
|
+
if config.data_class.respond_to?(:sax_config)
|
158
|
+
config.data_class.sax_config.element_values_for_element
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def normalize_name(name)
|
163
|
+
name.to_s.tr("-", "_")
|
164
|
+
end
|
165
|
+
|
166
|
+
def set_attributes_on(object, attributes)
|
167
|
+
config = sax_config_for(object)
|
168
|
+
|
169
|
+
if config
|
170
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
171
|
+
value = data_class_value(ac.data_class, ac.value_from_attrs(attributes))
|
172
|
+
object.send(ac.setter, value)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def data_class_value(data_class, value)
|
178
|
+
case data_class.to_s
|
179
|
+
when "String" then value != NO_BUFFER ? value.to_s : value
|
180
|
+
when "Integer" then value != NO_BUFFER ? value.to_i : value
|
181
|
+
when "Float" then value != NO_BUFFER ? value.to_s.gsub(",",".").to_f : value
|
182
|
+
when "Symbol" then
|
183
|
+
if value != NO_BUFFER
|
184
|
+
value.to_s.empty? ? nil : value.to_s.downcase.to_sym
|
185
|
+
else
|
186
|
+
value
|
187
|
+
end
|
188
|
+
# Assumes that time elements will be string-based and are not
|
189
|
+
# something else, e.g. seconds since epoch
|
190
|
+
when "Time" then value != NO_BUFFER ? Time.parse(value.to_s) : value
|
191
|
+
when "" then value
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def stack
|
196
|
+
@stack
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'sax-machine/handlers/sax_abstract_handler'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXNokogiriHandler < Nokogiri::XML::SAX::Document
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def sax_parse(xml_input)
|
9
|
+
parser = Nokogiri::XML::SAX::Parser.new(self)
|
10
|
+
parser.parse(xml_input) do |ctx|
|
11
|
+
ctx.replace_entities = true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
alias_method :initialize, :_initialize
|
16
|
+
alias_method :characters, :_characters
|
17
|
+
alias_method :cdata_block, :_characters
|
18
|
+
alias_method :start_element, :_start_element
|
19
|
+
alias_method :end_element, :_end_element
|
20
|
+
alias_method :error, :_error
|
21
|
+
alias_method :warning, :_warning
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'sax-machine/handlers/sax_abstract_handler'
|
2
|
+
require 'oga'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXOgaHandler
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
def sax_parse(xml_input)
|
13
|
+
Oga.sax_parse_xml(self, xml_input)
|
14
|
+
rescue LL::ParserError => e
|
15
|
+
on_error(e.message)
|
16
|
+
end
|
17
|
+
|
18
|
+
def on_element(namespace, name, attrs)
|
19
|
+
_start_element(node_name(namespace, name), attrs)
|
20
|
+
end
|
21
|
+
|
22
|
+
def after_element(namespace, name)
|
23
|
+
_end_element(node_name(namespace, name))
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_error(*args)
|
27
|
+
_error(args.join(" "))
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :on_text, :_characters
|
31
|
+
alias_method :on_cdata, :_characters
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def node_name(namespace, name)
|
36
|
+
namespace ? "#{namespace}:#{name}" : name
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sax-machine/handlers/sax_abstract_handler'
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
module SAXMachine
|
5
|
+
class SAXOxHandler < Ox::Sax
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
_reset_element
|
11
|
+
end
|
12
|
+
|
13
|
+
def sax_parse(xml_input)
|
14
|
+
# Ox requires input to be streamable
|
15
|
+
xml_input = StringIO.new(xml_input) if xml_input.is_a?(String)
|
16
|
+
|
17
|
+
Ox.sax_parse(self, xml_input,
|
18
|
+
symbolize: false,
|
19
|
+
convert_special: true,
|
20
|
+
skip: :skip_return,
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def attr(name, str)
|
25
|
+
@attrs[name] = str
|
26
|
+
end
|
27
|
+
|
28
|
+
def attrs_done
|
29
|
+
_start_element(@element, @attrs)
|
30
|
+
_reset_element
|
31
|
+
end
|
32
|
+
|
33
|
+
def start_element(name)
|
34
|
+
@element = name
|
35
|
+
end
|
36
|
+
|
37
|
+
def text(value)
|
38
|
+
_characters(value) if value && !value.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
alias_method :cdata, :text
|
42
|
+
|
43
|
+
def error(message, line, column)
|
44
|
+
_error("#{message} on line #{line} column #{column}")
|
45
|
+
end
|
46
|
+
|
47
|
+
alias_method :end_element, :_end_element
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def _reset_element
|
52
|
+
@attrs = {}
|
53
|
+
@element = ""
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -1,17 +1,16 @@
|
|
1
|
-
require "sax-machine/
|
2
|
-
require "sax-machine/
|
3
|
-
require "sax-machine/
|
4
|
-
require "sax-machine/
|
5
|
-
require "sax-machine/
|
1
|
+
require "sax-machine/config/sax_element_value"
|
2
|
+
require "sax-machine/config/sax_attribute"
|
3
|
+
require "sax-machine/config/sax_element"
|
4
|
+
require "sax-machine/config/sax_collection"
|
5
|
+
require "sax-machine/config/sax_ancestor"
|
6
6
|
|
7
7
|
module SAXMachine
|
8
8
|
class SAXConfig
|
9
|
-
|
10
9
|
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
11
10
|
|
12
11
|
def initialize
|
13
12
|
# Default value is an empty array
|
14
|
-
@top_level_elements
|
13
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
15
14
|
@top_level_attributes = []
|
16
15
|
@top_level_element_value = []
|
17
16
|
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
@@ -19,11 +18,12 @@ module SAXMachine
|
|
19
18
|
end
|
20
19
|
|
21
20
|
def columns
|
22
|
-
@top_level_elements.map {|
|
21
|
+
@top_level_elements.map { |_, ecs| ecs }.flatten
|
23
22
|
end
|
24
23
|
|
25
24
|
def initialize_copy(sax_config)
|
26
25
|
super
|
26
|
+
|
27
27
|
@top_level_elements = sax_config.top_level_elements.clone
|
28
28
|
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
29
|
@top_level_element_value = sax_config.top_level_element_value.clone
|
@@ -64,11 +64,15 @@ module SAXMachine
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def element_configs_for_attribute(name, attrs)
|
67
|
+
return [] unless @top_level_elements.has_key?(name.to_s)
|
68
|
+
|
67
69
|
@top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) }
|
68
70
|
end
|
69
71
|
|
70
72
|
def element_config_for_tag(name, attrs)
|
73
|
+
return unless @top_level_elements.has_key?(name.to_s)
|
74
|
+
|
71
75
|
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
72
76
|
end
|
73
77
|
end
|
74
|
-
end
|
78
|
+
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module SAXMachine
|
2
|
-
|
3
2
|
def self.configure(clazz)
|
4
3
|
extended_clazz = Class.new(clazz)
|
5
4
|
extended_clazz.send(:include, SAXMachine)
|
@@ -15,14 +14,12 @@ module SAXMachine
|
|
15
14
|
clazz.extend LightWeightSaxMachine
|
16
15
|
clazz.sax_config = extended_clazz.sax_config
|
17
16
|
|
18
|
-
(class << clazz;self;end).send(:define_method, :parse) do |
|
19
|
-
extended_clazz.parse(
|
17
|
+
(class << clazz;self;end).send(:define_method, :parse) do |xml_input|
|
18
|
+
extended_clazz.parse(xml_input)
|
20
19
|
end
|
21
|
-
|
22
20
|
end
|
23
21
|
|
24
22
|
module LightWeightSaxMachine
|
25
|
-
|
26
23
|
attr_writer :sax_config
|
27
24
|
|
28
25
|
def sax_config
|
@@ -32,7 +29,5 @@ module SAXMachine
|
|
32
29
|
def inherited(subclass)
|
33
30
|
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
34
31
|
end
|
35
|
-
|
36
32
|
end
|
37
|
-
|
38
|
-
end
|
33
|
+
end
|
@@ -1,50 +1,96 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
1
|
module SAXMachine
|
4
|
-
|
5
2
|
def self.included(base)
|
6
|
-
base.
|
3
|
+
base.send(:include, InstanceMethods)
|
4
|
+
base.extend(ClassMethods)
|
7
5
|
end
|
8
6
|
|
9
|
-
def parse(
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
def parse(xml_input, on_error = nil, on_warning = nil)
|
8
|
+
handler_klass = SAXMachine.const_get("SAX#{SAXMachine.handler.capitalize}Handler")
|
9
|
+
|
10
|
+
handler = handler_klass.new(self, on_error, on_warning)
|
11
|
+
handler.sax_parse(xml_input)
|
12
|
+
|
13
13
|
self
|
14
14
|
end
|
15
15
|
|
16
|
-
module
|
16
|
+
module InstanceMethods
|
17
|
+
def initialize(attributes = {})
|
18
|
+
attributes.each do |name, value|
|
19
|
+
send("#{name}=", value)
|
20
|
+
end
|
17
21
|
|
22
|
+
self.class.sax_config.top_level_elements.each do |_, configs|
|
23
|
+
configs.each do |config|
|
24
|
+
next if config.default.nil?
|
25
|
+
next unless send(config.as).nil?
|
26
|
+
|
27
|
+
send(config.setter, config.default)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module ClassMethods
|
18
34
|
def inherited(subclass)
|
19
35
|
subclass.sax_config.send(:initialize_copy, self.sax_config)
|
20
36
|
end
|
21
37
|
|
22
|
-
def parse(
|
23
|
-
new.parse(
|
38
|
+
def parse(*args)
|
39
|
+
new.parse(*args)
|
24
40
|
end
|
25
41
|
|
26
|
-
def element(name, options = {})
|
42
|
+
def element(name, options = {}, &block)
|
27
43
|
real_name = (options[:as] ||= name).to_s
|
28
44
|
sax_config.add_top_level_element(name, options)
|
29
|
-
create_attr
|
45
|
+
create_attr(real_name, &block)
|
30
46
|
end
|
31
47
|
|
32
|
-
def attribute(name, options = {})
|
48
|
+
def attribute(name, options = {}, &block)
|
33
49
|
real_name = (options[:as] ||= name).to_s
|
34
|
-
sax_config.add_top_level_attribute(self.class.to_s, options.merge(:
|
35
|
-
create_attr
|
50
|
+
sax_config.add_top_level_attribute(self.class.to_s, options.merge(name: name))
|
51
|
+
create_attr(real_name, &block)
|
36
52
|
end
|
37
53
|
|
38
|
-
def value(name, options = {})
|
54
|
+
def value(name, options = {}, &block)
|
39
55
|
real_name = (options[:as] ||= name).to_s
|
40
|
-
sax_config.add_top_level_element_value(self.class.to_s, options.merge(:
|
41
|
-
create_attr
|
56
|
+
sax_config.add_top_level_element_value(self.class.to_s, options.merge(name: name))
|
57
|
+
create_attr(real_name, &block)
|
42
58
|
end
|
43
59
|
|
44
|
-
def ancestor(name, options = {})
|
60
|
+
def ancestor(name, options = {}, &block)
|
45
61
|
real_name = (options[:as] ||= name).to_s
|
46
62
|
sax_config.add_ancestor(name, options)
|
47
|
-
create_attr(real_name)
|
63
|
+
create_attr(real_name, &block)
|
64
|
+
end
|
65
|
+
|
66
|
+
def elements(name, options = {}, &block)
|
67
|
+
real_name = (options[:as] ||= name).to_s
|
68
|
+
|
69
|
+
if options[:class]
|
70
|
+
sax_config.add_collection_element(name, options)
|
71
|
+
else
|
72
|
+
if block_given?
|
73
|
+
define_method("add_#{real_name}") do |value|
|
74
|
+
send(real_name).send(:<<, instance_exec(value, &block))
|
75
|
+
end
|
76
|
+
else
|
77
|
+
define_method("add_#{real_name}") do |value|
|
78
|
+
send(real_name).send(:<<, value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
sax_config.add_top_level_element(name, options.merge(collection: true))
|
83
|
+
end
|
84
|
+
|
85
|
+
if !method_defined?(real_name)
|
86
|
+
class_eval <<-SRC
|
87
|
+
def #{real_name}
|
88
|
+
@#{real_name} ||= []
|
89
|
+
end
|
90
|
+
SRC
|
91
|
+
end
|
92
|
+
|
93
|
+
attr_writer(options[:as]) unless method_defined?("#{options[:as]}=")
|
48
94
|
end
|
49
95
|
|
50
96
|
def columns
|
@@ -67,30 +113,6 @@ module SAXMachine
|
|
67
113
|
columns.map { |e| e.column }
|
68
114
|
end
|
69
115
|
|
70
|
-
def elements(name, options = {})
|
71
|
-
options[:as] ||= name
|
72
|
-
if options[:class]
|
73
|
-
sax_config.add_collection_element(name, options)
|
74
|
-
else
|
75
|
-
class_eval <<-SRC
|
76
|
-
def add_#{options[:as]}(value)
|
77
|
-
#{options[:as]} << value
|
78
|
-
end
|
79
|
-
SRC
|
80
|
-
sax_config.add_top_level_element(name, options.merge(:collection => true))
|
81
|
-
end
|
82
|
-
|
83
|
-
if !method_defined?(options[:as].to_s)
|
84
|
-
class_eval <<-SRC
|
85
|
-
def #{options[:as]}
|
86
|
-
@#{options[:as]} ||= []
|
87
|
-
end
|
88
|
-
SRC
|
89
|
-
end
|
90
|
-
|
91
|
-
attr_writer options[:as] unless method_defined?("#{options[:as]}=")
|
92
|
-
end
|
93
|
-
|
94
116
|
def sax_config
|
95
117
|
@sax_config ||= SAXConfig.new
|
96
118
|
end
|
@@ -98,10 +120,18 @@ module SAXMachine
|
|
98
120
|
# we only want to insert the getter and setter if they haven't defined it from elsewhere.
|
99
121
|
# this is how we allow custom parsing behavior. So you could define the setter
|
100
122
|
# and have it parse the string into a date or whatever.
|
101
|
-
def create_attr
|
102
|
-
attr_reader
|
103
|
-
|
123
|
+
def create_attr(real_name, &block)
|
124
|
+
attr_reader(real_name) unless method_defined?(real_name)
|
125
|
+
|
126
|
+
if !method_defined?("#{real_name}=")
|
127
|
+
if block_given?
|
128
|
+
define_method("#{real_name}=") do |value|
|
129
|
+
instance_variable_set("@#{real_name}", instance_exec(value, &block))
|
130
|
+
end
|
131
|
+
else
|
132
|
+
attr_writer(real_name)
|
133
|
+
end
|
134
|
+
end
|
104
135
|
end
|
105
136
|
end
|
106
|
-
|
107
137
|
end
|
data/lib/sax-machine.rb
CHANGED
@@ -1,12 +1,31 @@
|
|
1
|
-
require "
|
2
|
-
|
3
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
4
|
-
|
1
|
+
require "sax-machine/version"
|
5
2
|
require "sax-machine/sax_document"
|
6
3
|
require "sax-machine/sax_configure"
|
7
|
-
require "sax-machine/sax_handler"
|
8
4
|
require "sax-machine/sax_config"
|
9
5
|
|
10
6
|
module SAXMachine
|
11
|
-
|
12
|
-
|
7
|
+
def self.handler
|
8
|
+
@@handler ||= nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.handler=(handler)
|
12
|
+
if handler
|
13
|
+
require "sax-machine/handlers/sax_#{handler}_handler"
|
14
|
+
@@handler = handler
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Try handlers
|
20
|
+
[:ox, :oga].each do |handler|
|
21
|
+
begin
|
22
|
+
SAXMachine.handler = handler
|
23
|
+
break
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Still no handler, use Nokogiri
|
29
|
+
if SAXMachine.handler.nil?
|
30
|
+
SAXMachine.handler = :nokogiri
|
31
|
+
end
|
data/sax-machine.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path("../lib/sax-machine/version", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "sax-machine"
|
6
|
+
s.version = SAXMachine::VERSION
|
7
|
+
|
8
|
+
s.authors = ["Paul Dix", "Julien Kirch", "Ezekiel Templin", "Dmitry Krasnoukhov"]
|
9
|
+
s.email = %q{paul@pauldix.net}
|
10
|
+
s.homepage = %q{http://github.com/pauldix/sax-machine}
|
11
|
+
s.summary = %q{Declarative SAX Parsing with Nokogiri, Ox or Oga}
|
12
|
+
s.license = %q{MIT}
|
13
|
+
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.platform = Gem::Platform::RUBY
|
18
|
+
|
19
|
+
s.add_development_dependency "rspec", "~> 3.0"
|
20
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
<div xmlns="http://www.w3.org/1999/xhtml"><p>In my previous <a href="http://www.pauldix.net/2008/08/serializing-dat.html">post about the speed of serializing data</a>, I concluded that Marshal was the quickest way to get things done. So I set about using Marshal to store some data in an ActiveRecord object. Things worked great at first, but on some test data I got this error: marshal data too short. Luckily, <a href="http://www.brynary.com/">Bryan Helmkamp</a> had helpfully pointed out that there were sometimes problems with storing marshaled data in the database. He said it was best to base64 encode the marshal dump before storing.</p>
|
3
|
+
|
4
|
+
<p>I was curious why it was working on some things and not others. It turns out that some types of data being marshaled were causing the error to pop up. Here's the test data I used in my specs:</p>
|
5
|
+
<pre>{ :foo => 3, :bar => 2 } # hash with symbols for keys and integer values<br />[3, 2.1, 4, 8] # array with integer and float values</pre>
|
6
|
+
<p>Everything worked when I switched the array values to all integers so it seems that floats were causing the problem. However, in the interest of keeping everything working regardless of data types, I base64 encoded before going into the database and decoded on the way out.</p>
|
7
|
+
|
8
|
+
<p>I also ran the benchmarks again to determine what impact this would have on speed. Here are the results for 100 iterations on a 10k element array and a 10k element hash with and without base64 encode/decode:</p>
|
9
|
+
<pre> user system total real<br />array marshal 0.200000 0.010000 0.210000 ( 0.214018) (without Base64)<br />array marshal 0.220000 0.010000 0.230000 ( 0.250260)<br /><br />hash marshal 1.830000 0.040000 1.870000 ( 1.892874) (without Base64)<br />hash marshal 2.040000 0.100000 2.140000 ( 2.170405)</pre>
|
10
|
+
<p>As you can see the difference in speed is pretty negligible. I assume that the error has to do with AR cleaning the stuff that gets inserted into the database, but I'm not really sure. In the end it's just easier to use Base64.encode64 when serializing data into a text field in ActiveRecord using Marshal.</p>
|
11
|
+
|
12
|
+
<p>I've also read people posting about this error when using the database session store. I can only assume that it's because they were trying to store either way too much data in their session (too much for a regular text field) or they were storing float values or some other data type that would cause this to pop up. Hopefully this helps.</p></div>
|
13
|
+
<div class="feedflare">
|
14
|
+
<a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=rWfWO"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=rWfWO" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=RaCqo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=RaCqo" border="0"></img></a> <a href="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?a=1CBLo"><img src="http://feeds.feedburner.com/~f/PaulDixExplainsNothing?i=1CBLo" border="0"></img></a>
|
15
|
+
</div><img src="http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~4/383536354" height="1" width="1"/>
|