sax-machine 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +7 -0
- data/README.md +20 -0
- data/lib/sax-machine/config/sax_attribute.rb +1 -18
- data/lib/sax-machine/handlers/sax_abstract_handler.rb +11 -0
- data/lib/sax-machine/handlers/sax_nokogiri_handler.rb +7 -0
- data/lib/sax-machine/handlers/sax_ox_handler.rb +8 -0
- data/lib/sax-machine/sax_config.rb +1 -1
- data/lib/sax-machine/sax_document.rb +4 -19
- data/lib/sax-machine/version.rb +1 -1
- data/spec/sax-machine/sax_document_spec.rb +62 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70bbbc0d3143b8a3470722b16bd6ae18c7f20823
|
4
|
+
data.tar.gz: 2175ebaaa36b8d325d5bd2a67296edb8ad6c5b9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de8580d61bbfc9bba11ea486c2eac3796a74b882c61d9fade5c14073b38f684b80c3d48489e6d9f520e0d77d3c8af34c0538fa5b125de33e2708c6b93a41b8de
|
7
|
+
data.tar.gz: 6f642b324f6f39c6662e5112fced199d8bd976d968fd228a530e497a0fd695df5232e25c64a34151a5399b00154de0f97878026c34fe8320f89db8728370d3e2
|
data/HISTORY.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# HEAD
|
2
2
|
|
3
|
+
# 1.0.3
|
4
|
+
|
5
|
+
* Remove missed `nokogiri` reference [[#54](https://github.com/pauldix/sax-machine/pull/54)]
|
6
|
+
* Add support for `Symbol` data type conversion [[#57](https://github.com/pauldix/sax-machine/pull/57)]
|
7
|
+
* Add specs for multiple elements with the same alias [[#53](https://github.com/pauldix/sax-machine/pull/53)]
|
8
|
+
* Various code and documentation enhancements
|
9
|
+
|
3
10
|
# 1.0.2
|
4
11
|
|
5
12
|
* Make sure SAXConfig getters do not modify internal vars. Prevent race conditions
|
data/README.md
CHANGED
@@ -137,6 +137,26 @@ class X < ActiveRecord::Base
|
|
137
137
|
end
|
138
138
|
```
|
139
139
|
|
140
|
+
Multiple elements can be mapped to the same alias:
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
class RSSEntry
|
144
|
+
include SAXMachine
|
145
|
+
# ...
|
146
|
+
element :pubDate, as: :published
|
147
|
+
element :pubdate, as: :published
|
148
|
+
element :"dc:date", as: :published
|
149
|
+
element :"dc:Date", as: :published
|
150
|
+
element :"dcterms:created", as: :published
|
151
|
+
end
|
152
|
+
```
|
153
|
+
|
154
|
+
If more than one of these elements exists in the source, the value from the *last one* is used. The order of
|
155
|
+
the `element` declarations in the code is unimportant. The order they are encountered while parsing the
|
156
|
+
document determines the value assigned to the alias.
|
157
|
+
|
158
|
+
If an element is defined in the source but is blank (e.g., `<pubDate></pubDate>`), it is ignored, and non-empty one is picked.
|
159
|
+
|
140
160
|
## Contributing
|
141
161
|
|
142
162
|
1. Fork it
|
@@ -1,23 +1,6 @@
|
|
1
1
|
module SAXMachine
|
2
2
|
class SAXConfig
|
3
|
-
class AttributeConfig
|
4
|
-
attr_reader :name, :setter
|
5
|
-
|
6
|
-
def initialize(name, options)
|
7
|
-
@name = name.to_s
|
8
|
-
@as = options[:as]
|
9
|
-
@setter = "#{@as}="
|
10
|
-
@required = options[:required]
|
11
|
-
end
|
12
|
-
|
13
|
-
def column
|
14
|
-
@as || @name.to_sym
|
15
|
-
end
|
16
|
-
|
17
|
-
def required?
|
18
|
-
!!@required
|
19
|
-
end
|
20
|
-
|
3
|
+
class AttributeConfig < ElementValueConfig
|
21
4
|
def value_from_attrs(attrs)
|
22
5
|
attrs.fetch(@name, nil)
|
23
6
|
end
|
@@ -12,6 +12,10 @@ module SAXMachine
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
def sax_parse(xml_text)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
15
19
|
def _initialize(object, on_error = nil, on_warning = nil)
|
16
20
|
@stack = [ StackNode.new(object) ]
|
17
21
|
@parsed_configs = {}
|
@@ -60,6 +64,7 @@ module SAXMachine
|
|
60
64
|
case element_config.data_class.to_s
|
61
65
|
when "Integer" then 0
|
62
66
|
when "Float" then 0.0
|
67
|
+
when "Symbol" then nil
|
63
68
|
when "Time" then Time.at(0)
|
64
69
|
when "" then object
|
65
70
|
else
|
@@ -107,6 +112,12 @@ module SAXMachine
|
|
107
112
|
when "String" then value != NO_BUFFER ? value.to_s : value
|
108
113
|
when "Integer" then value != NO_BUFFER ? value.to_i : value
|
109
114
|
when "Float" then value != NO_BUFFER ? value.to_f : value
|
115
|
+
when "Symbol" then
|
116
|
+
if value != NO_BUFFER
|
117
|
+
value.to_s.empty? ? nil : value.to_s.downcase.to_sym
|
118
|
+
else
|
119
|
+
value
|
120
|
+
end
|
110
121
|
# Assumes that time elements will be string-based and are not
|
111
122
|
# something else, e.g. seconds since epoch
|
112
123
|
when "Time" then value != NO_BUFFER ? Time.parse(value.to_s) : value
|
@@ -5,6 +5,13 @@ module SAXMachine
|
|
5
5
|
class SAXNokogiriHandler < Nokogiri::XML::SAX::Document
|
6
6
|
include SAXAbstractHandler
|
7
7
|
|
8
|
+
def sax_parse(xml_text)
|
9
|
+
parser = Nokogiri::XML::SAX::Parser.new(self)
|
10
|
+
parser.parse(xml_text) do |ctx|
|
11
|
+
ctx.replace_entities = true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
8
15
|
alias_method :initialize, :_initialize
|
9
16
|
alias_method :characters, :_characters
|
10
17
|
alias_method :cdata_block, :_characters
|
@@ -10,6 +10,14 @@ module SAXMachine
|
|
10
10
|
_reset_element
|
11
11
|
end
|
12
12
|
|
13
|
+
def sax_parse(xml_text)
|
14
|
+
Ox.sax_parse(self, StringIO.new(xml_text),
|
15
|
+
symbolize: false,
|
16
|
+
convert_special: true,
|
17
|
+
skip: :skip_return,
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
13
21
|
def attr(name, str)
|
14
22
|
@attrs[name] = str
|
15
23
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
1
|
module SAXMachine
|
4
2
|
def self.included(base)
|
5
3
|
base.send(:include, InstanceMethods)
|
@@ -7,23 +5,10 @@ module SAXMachine
|
|
7
5
|
end
|
8
6
|
|
9
7
|
def parse(xml_text, on_error = nil, on_warning = nil)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
{
|
15
|
-
symbolize: false,
|
16
|
-
convert_special: true,
|
17
|
-
skip: :skip_return,
|
18
|
-
}
|
19
|
-
)
|
20
|
-
else
|
21
|
-
handler = SAXNokogiriHandler.new(self, on_error, on_warning)
|
22
|
-
parser = Nokogiri::XML::SAX::Parser.new(handler)
|
23
|
-
parser.parse(xml_text) do |ctx|
|
24
|
-
ctx.replace_entities = true
|
25
|
-
end
|
26
|
-
end
|
8
|
+
handler_klass = SAXMachine.const_get("SAX#{SAXMachine.handler.capitalize}Handler")
|
9
|
+
|
10
|
+
handler = handler_klass.new(self, on_error, on_warning)
|
11
|
+
handler.sax_parse(xml_text)
|
27
12
|
|
28
13
|
self
|
29
14
|
end
|
data/lib/sax-machine/version.rb
CHANGED
@@ -182,6 +182,16 @@ describe "SAXMachine" do
|
|
182
182
|
document = @klass.parse("<time>1994-02-04T06:20:00Z</time>")
|
183
183
|
expect(document.time).to eq(Time.utc(1994, 2, 4, 6, 20, 0, 0))
|
184
184
|
end
|
185
|
+
|
186
|
+
it "handles a Symbol class" do
|
187
|
+
@klass = Class.new do
|
188
|
+
include SAXMachine
|
189
|
+
element :symbol, class: Symbol
|
190
|
+
end
|
191
|
+
|
192
|
+
document = @klass.parse("<symbol>MY_SYMBOL_VALUE</symbol>")
|
193
|
+
expect(document.symbol).to eq(:my_symbol_value)
|
194
|
+
end
|
185
195
|
end
|
186
196
|
|
187
197
|
describe "the default attribute" do
|
@@ -914,6 +924,58 @@ describe "SAXMachine" do
|
|
914
924
|
end
|
915
925
|
end
|
916
926
|
|
927
|
+
describe "with multiple elements with the same alias" do
|
928
|
+
let(:item) { ItemElement5.parse(xml) }
|
929
|
+
|
930
|
+
before do
|
931
|
+
class ItemElement5
|
932
|
+
include SAXMachine
|
933
|
+
element :pubDate, as: :published
|
934
|
+
element :"dc:date", as: :published
|
935
|
+
end
|
936
|
+
end
|
937
|
+
|
938
|
+
describe "only first defined" do
|
939
|
+
let(:xml) { "<item xmlns:dc='http://www.example.com'><pubDate>first value</pubDate></item>" }
|
940
|
+
|
941
|
+
it "has first value" do
|
942
|
+
expect(item.published).to eq("first value")
|
943
|
+
end
|
944
|
+
end
|
945
|
+
|
946
|
+
describe "only last defined" do
|
947
|
+
let(:xml) { "<item xmlns:dc='http://www.example.com'><dc:date>last value</dc:date></item>" }
|
948
|
+
|
949
|
+
it "has last value" do
|
950
|
+
expect(item.published).to eq("last value")
|
951
|
+
end
|
952
|
+
end
|
953
|
+
|
954
|
+
describe "both defined" do
|
955
|
+
let(:xml) { "<item xmlns:dc='http://www.example.com'><pubDate>first value</pubDate><dc:date>last value</dc:date></item>" }
|
956
|
+
|
957
|
+
it "has last value" do
|
958
|
+
expect(item.published).to eq("last value")
|
959
|
+
end
|
960
|
+
end
|
961
|
+
|
962
|
+
describe "both defined but order is reversed" do
|
963
|
+
let(:xml) { "<item xmlns:dc='http://www.example.com'><dc:date>last value</dc:date><pubDate>first value</pubDate></item>" }
|
964
|
+
|
965
|
+
it "has first value" do
|
966
|
+
expect(item.published).to eq("first value")
|
967
|
+
end
|
968
|
+
end
|
969
|
+
|
970
|
+
describe "both defined but last is empty" do
|
971
|
+
let(:xml) { "<item xmlns:dc='http://www.example.com'><pubDate>first value</pubDate><dc:date></dc:date></item>" }
|
972
|
+
|
973
|
+
it "has first value" do
|
974
|
+
expect(item.published).to eq("first value")
|
975
|
+
end
|
976
|
+
end
|
977
|
+
end
|
978
|
+
|
917
979
|
describe "with error handling" do
|
918
980
|
before do
|
919
981
|
@xml = %[
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sax-machine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2014-
|
14
|
+
date: 2014-11-06 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|