saxophone 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +35 -0
- data/Gemfile +19 -0
- data/README.md +206 -0
- data/Rakefile +7 -0
- data/archive/HISTORY.md +77 -0
- data/lib/saxophone.rb +31 -0
- data/lib/saxophone/config/sax_ancestor.rb +17 -0
- data/lib/saxophone/config/sax_attribute.rb +18 -0
- data/lib/saxophone/config/sax_collection.rb +33 -0
- data/lib/saxophone/config/sax_element.rb +65 -0
- data/lib/saxophone/config/sax_element_value.rb +23 -0
- data/lib/saxophone/handlers/sax_abstract_handler.rb +200 -0
- data/lib/saxophone/handlers/sax_nokogiri_handler.rb +23 -0
- data/lib/saxophone/handlers/sax_oga_handler.rb +39 -0
- data/lib/saxophone/handlers/sax_ox_handler.rb +56 -0
- data/lib/saxophone/sax_config.rb +78 -0
- data/lib/saxophone/sax_configure.rb +33 -0
- data/lib/saxophone/sax_document.rb +137 -0
- data/lib/saxophone/version.rb +3 -0
- data/saxophone.gemspec +19 -0
- data/spec/fixtures/atom-content.html +15 -0
- data/spec/fixtures/atom.xml +165 -0
- data/spec/saxophone/sax_activerecord_spec.rb +33 -0
- data/spec/saxophone/sax_configure_spec.rb +51 -0
- data/spec/saxophone/sax_document_spec.rb +1218 -0
- data/spec/saxophone/sax_include_spec.rb +49 -0
- data/spec/spec_helper.rb +22 -0
- metadata +98 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class ElementConfig
|
4
|
+
attr_reader :name, :as, :setter, :data_class, :collection, :default
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@with = options.fetch(:with, {})
|
9
|
+
|
10
|
+
@value = if options.has_key?(:value)
|
11
|
+
options[:value].to_s
|
12
|
+
else
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
@as = options[:as]
|
17
|
+
@collection = options[:collection]
|
18
|
+
@default = options[:default]
|
19
|
+
|
20
|
+
@setter = if @collection
|
21
|
+
"add_#{options[:as]}"
|
22
|
+
else
|
23
|
+
"#{@as}="
|
24
|
+
end
|
25
|
+
|
26
|
+
@data_class = options[:class]
|
27
|
+
@required = options[:required]
|
28
|
+
end
|
29
|
+
|
30
|
+
def value_configured?
|
31
|
+
!@value.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
"name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def column
|
39
|
+
@as || @name.to_sym
|
40
|
+
end
|
41
|
+
|
42
|
+
def required?
|
43
|
+
!!@required
|
44
|
+
end
|
45
|
+
|
46
|
+
def value_from_attrs(attrs)
|
47
|
+
attrs.fetch(@value, nil)
|
48
|
+
end
|
49
|
+
|
50
|
+
def attrs_match?(attrs)
|
51
|
+
@with.all? do |key, value|
|
52
|
+
value === attrs[key.to_s]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def has_value_and_attrs_match?(attrs)
|
57
|
+
!@value.nil? && attrs_match?(attrs)
|
58
|
+
end
|
59
|
+
|
60
|
+
def collection?
|
61
|
+
!!@collection
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class ElementValueConfig
|
4
|
+
attr_reader :name, :setter, :data_class
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@as = options[:as]
|
9
|
+
@setter = "#{@as}="
|
10
|
+
@required = options[:required]
|
11
|
+
@data_class = options[:class]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
!!@required
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Saxophone
|
4
|
+
module SAXAbstractHandler
|
5
|
+
NO_BUFFER = :no_buffer
|
6
|
+
|
7
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
8
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
9
|
+
self.object = object
|
10
|
+
self.config = config
|
11
|
+
self.buffer = buffer
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def sax_parse(xml_input)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
19
|
+
def _initialize(object, on_error = nil, on_warning = nil)
|
20
|
+
@stack = [ StackNode.new(object) ]
|
21
|
+
@parsed_configs = {}
|
22
|
+
@on_error = on_error
|
23
|
+
@on_warning = on_warning
|
24
|
+
end
|
25
|
+
|
26
|
+
def _characters(data)
|
27
|
+
node = stack.last
|
28
|
+
|
29
|
+
if node.buffer == NO_BUFFER
|
30
|
+
node.buffer = data.dup
|
31
|
+
else
|
32
|
+
node.buffer << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def _start_element(name, attrs = [])
|
37
|
+
name = normalize_name(name)
|
38
|
+
node = stack.last
|
39
|
+
object = node.object
|
40
|
+
|
41
|
+
sax_config = sax_config_for(object)
|
42
|
+
|
43
|
+
if sax_config
|
44
|
+
attrs = Hash[attrs]
|
45
|
+
|
46
|
+
if collection_config = sax_config.collection_config(name, attrs)
|
47
|
+
object = collection_config.data_class.new
|
48
|
+
sax_config = sax_config_for(object)
|
49
|
+
|
50
|
+
stack.push(StackNode.new(object, collection_config))
|
51
|
+
|
52
|
+
set_attributes_on(object, attrs)
|
53
|
+
end
|
54
|
+
|
55
|
+
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
56
|
+
unless parsed_config?(object, ec)
|
57
|
+
value = data_class_value(ec.data_class, ec.value_from_attrs(attrs))
|
58
|
+
object.send(ec.setter, value)
|
59
|
+
mark_as_parsed(object, ec)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
64
|
+
new_object =
|
65
|
+
case element_config.data_class.to_s
|
66
|
+
when "Integer" then 0
|
67
|
+
when "Float" then 0.0
|
68
|
+
when "Symbol" then nil
|
69
|
+
when "Time" then Time.at(0)
|
70
|
+
when "" then object
|
71
|
+
else
|
72
|
+
element_config.data_class.new
|
73
|
+
end
|
74
|
+
|
75
|
+
stack.push(StackNode.new(new_object, element_config))
|
76
|
+
|
77
|
+
set_attributes_on(new_object, attrs)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def _end_element(name)
|
83
|
+
name = normalize_name(name)
|
84
|
+
|
85
|
+
start_tag = stack[-2]
|
86
|
+
close_tag = stack[-1]
|
87
|
+
|
88
|
+
return unless start_tag && close_tag
|
89
|
+
|
90
|
+
object = start_tag.object
|
91
|
+
element = close_tag.object
|
92
|
+
config = close_tag.config
|
93
|
+
value = close_tag.buffer
|
94
|
+
|
95
|
+
return unless config.name == name
|
96
|
+
|
97
|
+
unless parsed_config?(object, config)
|
98
|
+
if (element_value_config = element_values_for(config))
|
99
|
+
element_value_config.each { |evc| element.send(evc.setter, value) }
|
100
|
+
end
|
101
|
+
|
102
|
+
if config.respond_to?(:accessor)
|
103
|
+
subconfig = sax_config_for(element)
|
104
|
+
|
105
|
+
if econf = subconfig.element_config_for_tag(name, [])
|
106
|
+
element.send(econf.setter, value) unless econf.value_configured?
|
107
|
+
end
|
108
|
+
|
109
|
+
object.send(config.accessor) << element
|
110
|
+
else
|
111
|
+
value = data_class_value(config.data_class, value) || element
|
112
|
+
object.send(config.setter, value) if value != NO_BUFFER
|
113
|
+
mark_as_parsed(object, config)
|
114
|
+
end
|
115
|
+
|
116
|
+
# try to set the ancestor
|
117
|
+
if (sax_config = sax_config_for(element))
|
118
|
+
sax_config.ancestors.each do |ancestor|
|
119
|
+
element.send(ancestor.setter, object)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
stack.pop
|
125
|
+
end
|
126
|
+
|
127
|
+
def _error(string)
|
128
|
+
if @on_error
|
129
|
+
@on_error.call(string)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def _warning(string)
|
134
|
+
if @on_warning
|
135
|
+
@on_warning.call(string)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def mark_as_parsed(object, element_config)
|
142
|
+
unless element_config.collection?
|
143
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def parsed_config?(object, element_config)
|
148
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
149
|
+
end
|
150
|
+
|
151
|
+
def sax_config_for(object)
|
152
|
+
if object.class.respond_to?(:sax_config)
|
153
|
+
object.class.sax_config
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def element_values_for(config)
|
158
|
+
if config.data_class.respond_to?(:sax_config)
|
159
|
+
config.data_class.sax_config.element_values_for_element
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def normalize_name(name)
|
164
|
+
name.to_s.tr("-", "_")
|
165
|
+
end
|
166
|
+
|
167
|
+
def set_attributes_on(object, attributes)
|
168
|
+
config = sax_config_for(object)
|
169
|
+
|
170
|
+
if config
|
171
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
172
|
+
value = data_class_value(ac.data_class, ac.value_from_attrs(attributes))
|
173
|
+
object.send(ac.setter, value)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def data_class_value(data_class, value)
|
179
|
+
case data_class.to_s
|
180
|
+
when "String" then value != NO_BUFFER ? value.to_s : value
|
181
|
+
when "Integer" then value != NO_BUFFER ? value.to_i : value
|
182
|
+
when "Float" then value != NO_BUFFER ? value.to_s.gsub(",",".").to_f : value
|
183
|
+
when "Symbol" then
|
184
|
+
if value != NO_BUFFER
|
185
|
+
value.to_s.empty? ? nil : value.to_s.downcase.to_sym
|
186
|
+
else
|
187
|
+
value
|
188
|
+
end
|
189
|
+
# Assumes that time elements will be string-based and are not
|
190
|
+
# something else, e.g. seconds since epoch
|
191
|
+
when "Time" then value != NO_BUFFER ? Time.parse(value.to_s) : value
|
192
|
+
when "" then value
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def stack
|
197
|
+
@stack
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXNokogiriHandler < Nokogiri::XML::SAX::Document
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def sax_parse(xml_input)
|
9
|
+
parser = Nokogiri::XML::SAX::Parser.new(self)
|
10
|
+
parser.parse(xml_input) do |ctx|
|
11
|
+
ctx.replace_entities = true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
alias_method :initialize, :_initialize
|
16
|
+
alias_method :characters, :_characters
|
17
|
+
alias_method :cdata_block, :_characters
|
18
|
+
alias_method :start_element, :_start_element
|
19
|
+
alias_method :end_element, :_end_element
|
20
|
+
alias_method :error, :_error
|
21
|
+
alias_method :warning, :_warning
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'oga'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXOgaHandler
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
def sax_parse(xml_input)
|
13
|
+
Oga.sax_parse_xml(self, xml_input)
|
14
|
+
rescue LL::ParserError => e
|
15
|
+
on_error(e.message)
|
16
|
+
end
|
17
|
+
|
18
|
+
def on_element(namespace, name, attrs)
|
19
|
+
_start_element(node_name(namespace, name), attrs)
|
20
|
+
end
|
21
|
+
|
22
|
+
def after_element(namespace, name)
|
23
|
+
_end_element(node_name(namespace, name))
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_error(*args)
|
27
|
+
_error(args.join(" "))
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :on_text, :_characters
|
31
|
+
alias_method :on_cdata, :_characters
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def node_name(namespace, name)
|
36
|
+
namespace ? "#{namespace}:#{name}" : name
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXOxHandler < Ox::Sax
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
_reset_element
|
11
|
+
end
|
12
|
+
|
13
|
+
def sax_parse(xml_input)
|
14
|
+
# Ox requires input to be streamable
|
15
|
+
xml_input = StringIO.new(xml_input) if xml_input.is_a?(String)
|
16
|
+
|
17
|
+
Ox.sax_parse(self, xml_input,
|
18
|
+
symbolize: false,
|
19
|
+
convert_special: true,
|
20
|
+
skip: :skip_return,
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def attr(name, str)
|
25
|
+
@attrs[name] = str
|
26
|
+
end
|
27
|
+
|
28
|
+
def attrs_done
|
29
|
+
_start_element(@element, @attrs)
|
30
|
+
_reset_element
|
31
|
+
end
|
32
|
+
|
33
|
+
def start_element(name)
|
34
|
+
@element = name
|
35
|
+
end
|
36
|
+
|
37
|
+
def text(value)
|
38
|
+
_characters(value) if value && !value.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
alias_method :cdata, :text
|
42
|
+
|
43
|
+
def error(message, line, column)
|
44
|
+
_error("#{message} on line #{line} column #{column}")
|
45
|
+
end
|
46
|
+
|
47
|
+
alias_method :end_element, :_end_element
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def _reset_element
|
52
|
+
@attrs = {}
|
53
|
+
@element = ""
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "saxophone/config/sax_element_value"
|
2
|
+
require "saxophone/config/sax_attribute"
|
3
|
+
require "saxophone/config/sax_element"
|
4
|
+
require "saxophone/config/sax_collection"
|
5
|
+
require "saxophone/config/sax_ancestor"
|
6
|
+
|
7
|
+
module Saxophone
|
8
|
+
class SAXConfig
|
9
|
+
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
# Default value is an empty array
|
13
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
14
|
+
@top_level_attributes = []
|
15
|
+
@top_level_element_value = []
|
16
|
+
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
17
|
+
@ancestors = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def columns
|
21
|
+
@top_level_elements.map { |_, ecs| ecs }.flatten
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize_copy(sax_config)
|
25
|
+
super
|
26
|
+
|
27
|
+
@top_level_elements = sax_config.top_level_elements.clone
|
28
|
+
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
|
+
@top_level_element_value = sax_config.top_level_element_value.clone
|
30
|
+
@collection_elements = sax_config.collection_elements.clone
|
31
|
+
@ancestors = sax_config.ancestors.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_top_level_element(name, options)
|
35
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_top_level_attribute(name, options)
|
39
|
+
@top_level_attributes << AttributeConfig.new(options.delete(:name), options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_top_level_element_value(name, options)
|
43
|
+
@top_level_element_value << ElementValueConfig.new(options.delete(:name), options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_collection_element(name, options)
|
47
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_ancestor(name, options)
|
51
|
+
@ancestors << AncestorConfig.new(name, options)
|
52
|
+
end
|
53
|
+
|
54
|
+
def collection_config(name, attrs)
|
55
|
+
@collection_elements[name.to_s].detect { |cc| cc.attrs_match?(attrs) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def attribute_configs_for_element(attrs)
|
59
|
+
@top_level_attributes.select { |aa| aa.attrs_match?(attrs) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def element_values_for_element
|
63
|
+
@top_level_element_value
|
64
|
+
end
|
65
|
+
|
66
|
+
def element_configs_for_attribute(name, attrs)
|
67
|
+
return [] unless @top_level_elements.has_key?(name.to_s)
|
68
|
+
|
69
|
+
@top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def element_config_for_tag(name, attrs)
|
73
|
+
return unless @top_level_elements.has_key?(name.to_s)
|
74
|
+
|
75
|
+
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|