saxophone 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +35 -0
- data/Gemfile +19 -0
- data/README.md +206 -0
- data/Rakefile +7 -0
- data/archive/HISTORY.md +77 -0
- data/lib/saxophone.rb +31 -0
- data/lib/saxophone/config/sax_ancestor.rb +17 -0
- data/lib/saxophone/config/sax_attribute.rb +18 -0
- data/lib/saxophone/config/sax_collection.rb +33 -0
- data/lib/saxophone/config/sax_element.rb +65 -0
- data/lib/saxophone/config/sax_element_value.rb +23 -0
- data/lib/saxophone/handlers/sax_abstract_handler.rb +200 -0
- data/lib/saxophone/handlers/sax_nokogiri_handler.rb +23 -0
- data/lib/saxophone/handlers/sax_oga_handler.rb +39 -0
- data/lib/saxophone/handlers/sax_ox_handler.rb +56 -0
- data/lib/saxophone/sax_config.rb +78 -0
- data/lib/saxophone/sax_configure.rb +33 -0
- data/lib/saxophone/sax_document.rb +137 -0
- data/lib/saxophone/version.rb +3 -0
- data/saxophone.gemspec +19 -0
- data/spec/fixtures/atom-content.html +15 -0
- data/spec/fixtures/atom.xml +165 -0
- data/spec/saxophone/sax_activerecord_spec.rb +33 -0
- data/spec/saxophone/sax_configure_spec.rb +51 -0
- data/spec/saxophone/sax_document_spec.rb +1218 -0
- data/spec/saxophone/sax_include_spec.rb +49 -0
- data/spec/spec_helper.rb +22 -0
- metadata +98 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class ElementConfig
|
4
|
+
attr_reader :name, :as, :setter, :data_class, :collection, :default
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@with = options.fetch(:with, {})
|
9
|
+
|
10
|
+
@value = if options.has_key?(:value)
|
11
|
+
options[:value].to_s
|
12
|
+
else
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
@as = options[:as]
|
17
|
+
@collection = options[:collection]
|
18
|
+
@default = options[:default]
|
19
|
+
|
20
|
+
@setter = if @collection
|
21
|
+
"add_#{options[:as]}"
|
22
|
+
else
|
23
|
+
"#{@as}="
|
24
|
+
end
|
25
|
+
|
26
|
+
@data_class = options[:class]
|
27
|
+
@required = options[:required]
|
28
|
+
end
|
29
|
+
|
30
|
+
def value_configured?
|
31
|
+
!@value.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
"name: #{@name} dataclass: #{@data_class} setter: #{@setter} required: #{@required} value: #{@value} as:#{@as} collection: #{@collection} with: #{@with}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def column
|
39
|
+
@as || @name.to_sym
|
40
|
+
end
|
41
|
+
|
42
|
+
def required?
|
43
|
+
!!@required
|
44
|
+
end
|
45
|
+
|
46
|
+
def value_from_attrs(attrs)
|
47
|
+
attrs.fetch(@value, nil)
|
48
|
+
end
|
49
|
+
|
50
|
+
def attrs_match?(attrs)
|
51
|
+
@with.all? do |key, value|
|
52
|
+
value === attrs[key.to_s]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def has_value_and_attrs_match?(attrs)
|
57
|
+
!@value.nil? && attrs_match?(attrs)
|
58
|
+
end
|
59
|
+
|
60
|
+
def collection?
|
61
|
+
!!@collection
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class ElementValueConfig
|
4
|
+
attr_reader :name, :setter, :data_class
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@as = options[:as]
|
9
|
+
@setter = "#{@as}="
|
10
|
+
@required = options[:required]
|
11
|
+
@data_class = options[:class]
|
12
|
+
end
|
13
|
+
|
14
|
+
def column
|
15
|
+
@as || @name.to_sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def required?
|
19
|
+
!!@required
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Saxophone
|
4
|
+
module SAXAbstractHandler
|
5
|
+
NO_BUFFER = :no_buffer
|
6
|
+
|
7
|
+
class StackNode < Struct.new(:object, :config, :buffer)
|
8
|
+
def initialize(object, config = nil, buffer = NO_BUFFER)
|
9
|
+
self.object = object
|
10
|
+
self.config = config
|
11
|
+
self.buffer = buffer
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def sax_parse(xml_input)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
19
|
+
def _initialize(object, on_error = nil, on_warning = nil)
|
20
|
+
@stack = [ StackNode.new(object) ]
|
21
|
+
@parsed_configs = {}
|
22
|
+
@on_error = on_error
|
23
|
+
@on_warning = on_warning
|
24
|
+
end
|
25
|
+
|
26
|
+
def _characters(data)
|
27
|
+
node = stack.last
|
28
|
+
|
29
|
+
if node.buffer == NO_BUFFER
|
30
|
+
node.buffer = data.dup
|
31
|
+
else
|
32
|
+
node.buffer << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def _start_element(name, attrs = [])
|
37
|
+
name = normalize_name(name)
|
38
|
+
node = stack.last
|
39
|
+
object = node.object
|
40
|
+
|
41
|
+
sax_config = sax_config_for(object)
|
42
|
+
|
43
|
+
if sax_config
|
44
|
+
attrs = Hash[attrs]
|
45
|
+
|
46
|
+
if collection_config = sax_config.collection_config(name, attrs)
|
47
|
+
object = collection_config.data_class.new
|
48
|
+
sax_config = sax_config_for(object)
|
49
|
+
|
50
|
+
stack.push(StackNode.new(object, collection_config))
|
51
|
+
|
52
|
+
set_attributes_on(object, attrs)
|
53
|
+
end
|
54
|
+
|
55
|
+
sax_config.element_configs_for_attribute(name, attrs).each do |ec|
|
56
|
+
unless parsed_config?(object, ec)
|
57
|
+
value = data_class_value(ec.data_class, ec.value_from_attrs(attrs))
|
58
|
+
object.send(ec.setter, value)
|
59
|
+
mark_as_parsed(object, ec)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
if !collection_config && element_config = sax_config.element_config_for_tag(name, attrs)
|
64
|
+
new_object =
|
65
|
+
case element_config.data_class.to_s
|
66
|
+
when "Integer" then 0
|
67
|
+
when "Float" then 0.0
|
68
|
+
when "Symbol" then nil
|
69
|
+
when "Time" then Time.at(0)
|
70
|
+
when "" then object
|
71
|
+
else
|
72
|
+
element_config.data_class.new
|
73
|
+
end
|
74
|
+
|
75
|
+
stack.push(StackNode.new(new_object, element_config))
|
76
|
+
|
77
|
+
set_attributes_on(new_object, attrs)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def _end_element(name)
|
83
|
+
name = normalize_name(name)
|
84
|
+
|
85
|
+
start_tag = stack[-2]
|
86
|
+
close_tag = stack[-1]
|
87
|
+
|
88
|
+
return unless start_tag && close_tag
|
89
|
+
|
90
|
+
object = start_tag.object
|
91
|
+
element = close_tag.object
|
92
|
+
config = close_tag.config
|
93
|
+
value = close_tag.buffer
|
94
|
+
|
95
|
+
return unless config.name == name
|
96
|
+
|
97
|
+
unless parsed_config?(object, config)
|
98
|
+
if (element_value_config = element_values_for(config))
|
99
|
+
element_value_config.each { |evc| element.send(evc.setter, value) }
|
100
|
+
end
|
101
|
+
|
102
|
+
if config.respond_to?(:accessor)
|
103
|
+
subconfig = sax_config_for(element)
|
104
|
+
|
105
|
+
if econf = subconfig.element_config_for_tag(name, [])
|
106
|
+
element.send(econf.setter, value) unless econf.value_configured?
|
107
|
+
end
|
108
|
+
|
109
|
+
object.send(config.accessor) << element
|
110
|
+
else
|
111
|
+
value = data_class_value(config.data_class, value) || element
|
112
|
+
object.send(config.setter, value) if value != NO_BUFFER
|
113
|
+
mark_as_parsed(object, config)
|
114
|
+
end
|
115
|
+
|
116
|
+
# try to set the ancestor
|
117
|
+
if (sax_config = sax_config_for(element))
|
118
|
+
sax_config.ancestors.each do |ancestor|
|
119
|
+
element.send(ancestor.setter, object)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
stack.pop
|
125
|
+
end
|
126
|
+
|
127
|
+
def _error(string)
|
128
|
+
if @on_error
|
129
|
+
@on_error.call(string)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def _warning(string)
|
134
|
+
if @on_warning
|
135
|
+
@on_warning.call(string)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def mark_as_parsed(object, element_config)
|
142
|
+
unless element_config.collection?
|
143
|
+
@parsed_configs[[object.object_id, element_config.object_id]] = true
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def parsed_config?(object, element_config)
|
148
|
+
@parsed_configs[[object.object_id, element_config.object_id]]
|
149
|
+
end
|
150
|
+
|
151
|
+
def sax_config_for(object)
|
152
|
+
if object.class.respond_to?(:sax_config)
|
153
|
+
object.class.sax_config
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def element_values_for(config)
|
158
|
+
if config.data_class.respond_to?(:sax_config)
|
159
|
+
config.data_class.sax_config.element_values_for_element
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def normalize_name(name)
|
164
|
+
name.to_s.tr("-", "_")
|
165
|
+
end
|
166
|
+
|
167
|
+
def set_attributes_on(object, attributes)
|
168
|
+
config = sax_config_for(object)
|
169
|
+
|
170
|
+
if config
|
171
|
+
config.attribute_configs_for_element(attributes).each do |ac|
|
172
|
+
value = data_class_value(ac.data_class, ac.value_from_attrs(attributes))
|
173
|
+
object.send(ac.setter, value)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def data_class_value(data_class, value)
|
179
|
+
case data_class.to_s
|
180
|
+
when "String" then value != NO_BUFFER ? value.to_s : value
|
181
|
+
when "Integer" then value != NO_BUFFER ? value.to_i : value
|
182
|
+
when "Float" then value != NO_BUFFER ? value.to_s.gsub(",",".").to_f : value
|
183
|
+
when "Symbol" then
|
184
|
+
if value != NO_BUFFER
|
185
|
+
value.to_s.empty? ? nil : value.to_s.downcase.to_sym
|
186
|
+
else
|
187
|
+
value
|
188
|
+
end
|
189
|
+
# Assumes that time elements will be string-based and are not
|
190
|
+
# something else, e.g. seconds since epoch
|
191
|
+
when "Time" then value != NO_BUFFER ? Time.parse(value.to_s) : value
|
192
|
+
when "" then value
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def stack
|
197
|
+
@stack
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXNokogiriHandler < Nokogiri::XML::SAX::Document
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def sax_parse(xml_input)
|
9
|
+
parser = Nokogiri::XML::SAX::Parser.new(self)
|
10
|
+
parser.parse(xml_input) do |ctx|
|
11
|
+
ctx.replace_entities = true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
alias_method :initialize, :_initialize
|
16
|
+
alias_method :characters, :_characters
|
17
|
+
alias_method :cdata_block, :_characters
|
18
|
+
alias_method :start_element, :_start_element
|
19
|
+
alias_method :end_element, :_end_element
|
20
|
+
alias_method :error, :_error
|
21
|
+
alias_method :warning, :_warning
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'oga'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXOgaHandler
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
def sax_parse(xml_input)
|
13
|
+
Oga.sax_parse_xml(self, xml_input)
|
14
|
+
rescue LL::ParserError => e
|
15
|
+
on_error(e.message)
|
16
|
+
end
|
17
|
+
|
18
|
+
def on_element(namespace, name, attrs)
|
19
|
+
_start_element(node_name(namespace, name), attrs)
|
20
|
+
end
|
21
|
+
|
22
|
+
def after_element(namespace, name)
|
23
|
+
_end_element(node_name(namespace, name))
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_error(*args)
|
27
|
+
_error(args.join(" "))
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :on_text, :_characters
|
31
|
+
alias_method :on_cdata, :_characters
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def node_name(namespace, name)
|
36
|
+
namespace ? "#{namespace}:#{name}" : name
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'saxophone/handlers/sax_abstract_handler'
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
module Saxophone
|
5
|
+
class SAXOxHandler < Ox::Sax
|
6
|
+
include SAXAbstractHandler
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
_initialize(*args)
|
10
|
+
_reset_element
|
11
|
+
end
|
12
|
+
|
13
|
+
def sax_parse(xml_input)
|
14
|
+
# Ox requires input to be streamable
|
15
|
+
xml_input = StringIO.new(xml_input) if xml_input.is_a?(String)
|
16
|
+
|
17
|
+
Ox.sax_parse(self, xml_input,
|
18
|
+
symbolize: false,
|
19
|
+
convert_special: true,
|
20
|
+
skip: :skip_return,
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def attr(name, str)
|
25
|
+
@attrs[name] = str
|
26
|
+
end
|
27
|
+
|
28
|
+
def attrs_done
|
29
|
+
_start_element(@element, @attrs)
|
30
|
+
_reset_element
|
31
|
+
end
|
32
|
+
|
33
|
+
def start_element(name)
|
34
|
+
@element = name
|
35
|
+
end
|
36
|
+
|
37
|
+
def text(value)
|
38
|
+
_characters(value) if value && !value.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
alias_method :cdata, :text
|
42
|
+
|
43
|
+
def error(message, line, column)
|
44
|
+
_error("#{message} on line #{line} column #{column}")
|
45
|
+
end
|
46
|
+
|
47
|
+
alias_method :end_element, :_end_element
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def _reset_element
|
52
|
+
@attrs = {}
|
53
|
+
@element = ""
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "saxophone/config/sax_element_value"
|
2
|
+
require "saxophone/config/sax_attribute"
|
3
|
+
require "saxophone/config/sax_element"
|
4
|
+
require "saxophone/config/sax_collection"
|
5
|
+
require "saxophone/config/sax_ancestor"
|
6
|
+
|
7
|
+
module Saxophone
|
8
|
+
class SAXConfig
|
9
|
+
attr_accessor :top_level_elements, :top_level_attributes, :top_level_element_value, :collection_elements, :ancestors
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
# Default value is an empty array
|
13
|
+
@top_level_elements = Hash.new { |hash, key| hash[key] = [] }
|
14
|
+
@top_level_attributes = []
|
15
|
+
@top_level_element_value = []
|
16
|
+
@collection_elements = Hash.new { |hash, key| hash[key] = [] }
|
17
|
+
@ancestors = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def columns
|
21
|
+
@top_level_elements.map { |_, ecs| ecs }.flatten
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize_copy(sax_config)
|
25
|
+
super
|
26
|
+
|
27
|
+
@top_level_elements = sax_config.top_level_elements.clone
|
28
|
+
@top_level_attributes = sax_config.top_level_attributes.clone
|
29
|
+
@top_level_element_value = sax_config.top_level_element_value.clone
|
30
|
+
@collection_elements = sax_config.collection_elements.clone
|
31
|
+
@ancestors = sax_config.ancestors.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_top_level_element(name, options)
|
35
|
+
@top_level_elements[name.to_s] << ElementConfig.new(name, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_top_level_attribute(name, options)
|
39
|
+
@top_level_attributes << AttributeConfig.new(options.delete(:name), options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_top_level_element_value(name, options)
|
43
|
+
@top_level_element_value << ElementValueConfig.new(options.delete(:name), options)
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_collection_element(name, options)
|
47
|
+
@collection_elements[name.to_s] << CollectionConfig.new(name, options)
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_ancestor(name, options)
|
51
|
+
@ancestors << AncestorConfig.new(name, options)
|
52
|
+
end
|
53
|
+
|
54
|
+
def collection_config(name, attrs)
|
55
|
+
@collection_elements[name.to_s].detect { |cc| cc.attrs_match?(attrs) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def attribute_configs_for_element(attrs)
|
59
|
+
@top_level_attributes.select { |aa| aa.attrs_match?(attrs) }
|
60
|
+
end
|
61
|
+
|
62
|
+
def element_values_for_element
|
63
|
+
@top_level_element_value
|
64
|
+
end
|
65
|
+
|
66
|
+
def element_configs_for_attribute(name, attrs)
|
67
|
+
return [] unless @top_level_elements.has_key?(name.to_s)
|
68
|
+
|
69
|
+
@top_level_elements[name.to_s].select { |ec| ec.has_value_and_attrs_match?(attrs) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def element_config_for_tag(name, attrs)
|
73
|
+
return unless @top_level_elements.has_key?(name.to_s)
|
74
|
+
|
75
|
+
@top_level_elements[name.to_s].detect { |ec| ec.attrs_match?(attrs) }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|