saxophone 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +35 -0
- data/Gemfile +19 -0
- data/README.md +206 -0
- data/Rakefile +7 -0
- data/archive/HISTORY.md +77 -0
- data/lib/saxophone.rb +31 -0
- data/lib/saxophone/config/sax_ancestor.rb +17 -0
- data/lib/saxophone/config/sax_attribute.rb +18 -0
- data/lib/saxophone/config/sax_collection.rb +33 -0
- data/lib/saxophone/config/sax_element.rb +65 -0
- data/lib/saxophone/config/sax_element_value.rb +23 -0
- data/lib/saxophone/handlers/sax_abstract_handler.rb +200 -0
- data/lib/saxophone/handlers/sax_nokogiri_handler.rb +23 -0
- data/lib/saxophone/handlers/sax_oga_handler.rb +39 -0
- data/lib/saxophone/handlers/sax_ox_handler.rb +56 -0
- data/lib/saxophone/sax_config.rb +78 -0
- data/lib/saxophone/sax_configure.rb +33 -0
- data/lib/saxophone/sax_document.rb +137 -0
- data/lib/saxophone/version.rb +3 -0
- data/saxophone.gemspec +19 -0
- data/spec/fixtures/atom-content.html +15 -0
- data/spec/fixtures/atom.xml +165 -0
- data/spec/saxophone/sax_activerecord_spec.rb +33 -0
- data/spec/saxophone/sax_configure_spec.rb +51 -0
- data/spec/saxophone/sax_document_spec.rb +1218 -0
- data/spec/saxophone/sax_include_spec.rb +49 -0
- data/spec/spec_helper.rb +22 -0
- metadata +98 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 57c5242aea2b5daec2f7129d6b12fe56e28e07a8
|
4
|
+
data.tar.gz: c5f4823813e45c56693feeafbb808253367cf533
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 18a39e8330f39c5da059bfcb39f147084d8d707fd6375a9561d4711ce3269dc16a98f860a717d45b181cbd79c555edfc329e7ff80818c0f7e41a65f29a0ce9e6
|
7
|
+
data.tar.gz: 6d4b5006f62bacf0d51e6300ba146083301fcb3922dd960282216272622a130ce7cd6fac88481ad5149ceedf4f0e69c5538decde24cae5fa16ae7944594a3c7c
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.5.1
|
data/.travis.yml
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
language: ruby
|
2
|
+
|
3
|
+
rvm:
|
4
|
+
- 1.9.3
|
5
|
+
- 2.0
|
6
|
+
- 2.1
|
7
|
+
- 2.2
|
8
|
+
- 2.3
|
9
|
+
- 2.4
|
10
|
+
- 2.5
|
11
|
+
- jruby-1.7
|
12
|
+
- rbx-2
|
13
|
+
- ruby-head
|
14
|
+
- jruby-head
|
15
|
+
|
16
|
+
sudo: false
|
17
|
+
|
18
|
+
env:
|
19
|
+
matrix:
|
20
|
+
- HANDLER="nokogiri"
|
21
|
+
- HANDLER="ox"
|
22
|
+
- HANDLER="oga"
|
23
|
+
|
24
|
+
matrix:
|
25
|
+
exclude:
|
26
|
+
- env: HANDLER="ox"
|
27
|
+
rvm: jruby-1.7
|
28
|
+
- env: HANDLER="ox"
|
29
|
+
rvm: jruby-head
|
30
|
+
allow_failures:
|
31
|
+
- env: HANDLER="oga"
|
32
|
+
rvm: jruby-1.7
|
33
|
+
- rvm: rbx-2
|
34
|
+
- rvm: ruby-head
|
35
|
+
- rvm: jruby-head
|
data/Gemfile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
group :development, :test do
|
6
|
+
gem 'rake'
|
7
|
+
gem 'guard-rspec'
|
8
|
+
gem 'simplecov', require: false, platforms: [:mri]
|
9
|
+
gem 'coveralls', require: false, platforms: [:mri]
|
10
|
+
|
11
|
+
gem 'activerecord', '~> 5.0.0'
|
12
|
+
gem 'nokogiri', '>= 1.8.2'
|
13
|
+
gem 'ox', '>= 2.1.2', platforms: [:mri, :rbx]
|
14
|
+
gem 'oga', '>= 0.3.4'
|
15
|
+
end
|
16
|
+
|
17
|
+
group :test do
|
18
|
+
gem 'sqlite3'
|
19
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
# Saxophone
|
2
|
+
|
3
|
+
A declarative SAX parsing library backed by Nokogiri, Ox or Oga.
|
4
|
+
|
5
|
+
## Origins
|
6
|
+
|
7
|
+
This repository is a fork of [pauldix/sax-machine](https://github.com/pauldix/sax-machine). We'd like to
|
8
|
+
thank all original authors and contributers for their work on the original repository. However, we have
|
9
|
+
the feeling that the original repository is not being actively maintained anymore - that's why we decided to
|
10
|
+
fork it and continue the work of the original authors in our façon. To make the distinction clear, we
|
11
|
+
renamed the project from that point to `Saxophone`.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'saxophone'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
```bash
|
24
|
+
$ bundle
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
Saxophone can use either `nokogiri`, `ox` or `oga` as XML SAX handler.
|
30
|
+
|
31
|
+
To use **Nokogiri** add this line to your Gemfile:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
gem 'nokogiri', '~> 1.6'
|
35
|
+
```
|
36
|
+
|
37
|
+
To use **Ox** add this line to your Gemfile:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
gem 'ox', '>= 2.1.2'
|
41
|
+
```
|
42
|
+
|
43
|
+
To use **Oga** add this line to your Gemfile:
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
gem 'oga', '>= 0.2.0'
|
47
|
+
```
|
48
|
+
|
49
|
+
You can also specify which handler to use manually, like this:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
Saxophone.handler = :nokogiri
|
53
|
+
```
|
54
|
+
|
55
|
+
## Examples
|
56
|
+
|
57
|
+
Include `Saxophone` in any class and define properties to parse:
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
class AtomContent
|
61
|
+
include Saxophone
|
62
|
+
attribute :type
|
63
|
+
value :text
|
64
|
+
end
|
65
|
+
|
66
|
+
class AtomEntry
|
67
|
+
include Saxophone
|
68
|
+
element :title
|
69
|
+
# The :as argument makes this available through entry.author instead of .name
|
70
|
+
element :name, as: :author
|
71
|
+
element "feedburner:origLink", as: :url
|
72
|
+
# The :default argument specifies default value for element when it's missing
|
73
|
+
element :summary, class: String, default: "No summary available"
|
74
|
+
element :content, class: AtomContent
|
75
|
+
element :published
|
76
|
+
ancestor :ancestor
|
77
|
+
end
|
78
|
+
|
79
|
+
class Atom
|
80
|
+
include Saxophone
|
81
|
+
# Use block to modify the returned value
|
82
|
+
# Blocks are working with pretty much everything,
|
83
|
+
# except for `elements` with `class` attribute
|
84
|
+
element :title do |title|
|
85
|
+
title.strip
|
86
|
+
end
|
87
|
+
# The :with argument means that you only match a link tag
|
88
|
+
# that has an attribute of type: "text/html"
|
89
|
+
element :link, value: :href, as: :url, with: {
|
90
|
+
type: "text/html"
|
91
|
+
}
|
92
|
+
# The :value argument means that instead of setting the value
|
93
|
+
# to the text between the tag, it sets it to the attribute value of :href
|
94
|
+
element :link, value: :href, as: :feed_url, with: {
|
95
|
+
type: "application/atom+xml"
|
96
|
+
}
|
97
|
+
elements :entry, as: :entries, class: AtomEntry
|
98
|
+
end
|
99
|
+
```
|
100
|
+
|
101
|
+
Then parse any XML with your class:
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
feed = Atom.parse(xml_text)
|
105
|
+
|
106
|
+
feed.title # Whatever the title of the blog is
|
107
|
+
feed.url # The main URL of the blog
|
108
|
+
feed.feed_url # The URL of the blog feed
|
109
|
+
|
110
|
+
feed.entries.first.title # Title of the first entry
|
111
|
+
feed.entries.first.author # The author of the first entry
|
112
|
+
feed.entries.first.url # Permalink on the blog for this entry
|
113
|
+
feed.entries.first.summary # Returns "No summary available" if summary is missing
|
114
|
+
feed.entries.first.ancestor # The Atom ancestor
|
115
|
+
feed.entries.first.content # Instance of AtomContent
|
116
|
+
feed.entries.first.content.text # Entry content text
|
117
|
+
```
|
118
|
+
|
119
|
+
You can also use the elements method without specifying a class:
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
class ServiceResponse
|
123
|
+
include Saxophone
|
124
|
+
elements :message, as: :messages
|
125
|
+
end
|
126
|
+
|
127
|
+
response = ServiceResponse.parse("
|
128
|
+
<response>
|
129
|
+
<message>hi</message>
|
130
|
+
<message>world</message>
|
131
|
+
</response>
|
132
|
+
")
|
133
|
+
response.messages.first # hi
|
134
|
+
response.messages.last # world
|
135
|
+
```
|
136
|
+
|
137
|
+
To limit conflicts in the class used for mappping, you can use the alternate
|
138
|
+
`Saxophone.configure` syntax:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
class X < ActiveRecord::Base
|
142
|
+
# This way no element, elements or ancestor method will be added to X
|
143
|
+
Saxophone.configure(X) do |c|
|
144
|
+
c.element :title
|
145
|
+
end
|
146
|
+
end
|
147
|
+
```
|
148
|
+
|
149
|
+
Multiple elements can be mapped to the same alias:
|
150
|
+
|
151
|
+
```ruby
|
152
|
+
class RSSEntry
|
153
|
+
include Saxophone
|
154
|
+
# ...
|
155
|
+
element :pubDate, as: :published
|
156
|
+
element :pubdate, as: :published
|
157
|
+
element :"dc:date", as: :published
|
158
|
+
element :"dc:Date", as: :published
|
159
|
+
element :"dcterms:created", as: :published
|
160
|
+
end
|
161
|
+
```
|
162
|
+
|
163
|
+
If more than one of these elements exists in the source, the value from the *last one* is used. The order of
|
164
|
+
the `element` declarations in the code is unimportant. The order they are encountered while parsing the
|
165
|
+
document determines the value assigned to the alias.
|
166
|
+
|
167
|
+
If an element is defined in the source but is blank (e.g., `<pubDate></pubDate>`), it is ignored, and non-empty one is picked.
|
168
|
+
|
169
|
+
## Contributing
|
170
|
+
|
171
|
+
1. Fork it
|
172
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
173
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
174
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
175
|
+
5. Create new Pull Request
|
176
|
+
|
177
|
+
## LICENSE
|
178
|
+
|
179
|
+
The MIT License
|
180
|
+
|
181
|
+
Copyright (c) 2009-2018:
|
182
|
+
|
183
|
+
* [Paul Dix](http://www.pauldix.net)
|
184
|
+
* [Julien Kirch](http://www.archiloque.net)
|
185
|
+
* [Ezekiel Templin](http://zeke.templ.in)
|
186
|
+
* [Dmitry Krasnoukhov](http://krasnoukhov.com)
|
187
|
+
* [Robin Neumann](https://github.com/neumanrq)
|
188
|
+
|
189
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
190
|
+
a copy of this software and associated documentation files (the
|
191
|
+
'Software'), to deal in the Software without restriction, including
|
192
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
193
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
194
|
+
permit persons to whom the Software is furnished to do so, subject to
|
195
|
+
the following conditions:
|
196
|
+
|
197
|
+
The above copyright notice and this permission notice shall be
|
198
|
+
included in all copies or substantial portions of the Software.
|
199
|
+
|
200
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
201
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
202
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
203
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
204
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
205
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
206
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/archive/HISTORY.md
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# Version history of parent gem "sax-machine":
|
2
|
+
|
3
|
+
# 1.3.2
|
4
|
+
|
5
|
+
* Compatibility with Oga 0.3
|
6
|
+
|
7
|
+
# 1.3.1
|
8
|
+
|
9
|
+
* Allow default value to be `false` [[#66](https://github.com/pauldix/sax-machine/pull/66)]
|
10
|
+
* Support adding class to an attribute [[#68](https://github.com/pauldix/sax-machine/pull/68)]
|
11
|
+
* Adjust Ox handler to skip empty text/cdata values
|
12
|
+
|
13
|
+
# 1.3.0
|
14
|
+
|
15
|
+
* Improve block modifiers to support all config options
|
16
|
+
* Make block modifiers run in instance context
|
17
|
+
* Make all handlers support IO as a input
|
18
|
+
|
19
|
+
# 1.2.0
|
20
|
+
|
21
|
+
* Add support for blocks as value modifiers [[#61](https://github.com/pauldix/sax-machine/pull/61)]
|
22
|
+
|
23
|
+
# 1.1.1
|
24
|
+
|
25
|
+
* Fix Nokogiri autoloading [[#60](https://github.com/pauldix/sax-machine/pull/60)]
|
26
|
+
|
27
|
+
# 1.1.0
|
28
|
+
|
29
|
+
* Option to use Oga as a SAX handler
|
30
|
+
|
31
|
+
# 1.0.3
|
32
|
+
|
33
|
+
* Remove missed `nokogiri` reference [[#54](https://github.com/pauldix/sax-machine/pull/54)]
|
34
|
+
* Add support for `Symbol` data type conversion [[#57](https://github.com/pauldix/sax-machine/pull/57)]
|
35
|
+
* Add specs for multiple elements with the same alias [[#53](https://github.com/pauldix/sax-machine/pull/53)]
|
36
|
+
* Various code and documentation enhancements
|
37
|
+
|
38
|
+
# 1.0.2
|
39
|
+
|
40
|
+
* Make sure SAXConfig getters do not modify internal vars. Prevent race conditions
|
41
|
+
|
42
|
+
# 1.0.1
|
43
|
+
|
44
|
+
* Improve normalize_name performance
|
45
|
+
|
46
|
+
# 1.0.0
|
47
|
+
|
48
|
+
* Make `nokogiri` dependency optional
|
49
|
+
* Add :default argument for elements [[#51](https://github.com/pauldix/sax-machine/pull/51)]
|
50
|
+
|
51
|
+
# 0.3.0
|
52
|
+
|
53
|
+
* Option to use Ox as a SAX handler instead of Nokogiri [[#49](https://github.com/pauldix/sax-machine/pull/49)]
|
54
|
+
* Bump RSpec to 3.0, convert existing specs
|
55
|
+
|
56
|
+
# 0.2.1
|
57
|
+
|
58
|
+
* Turn on replace_entities on Nokogiri parser [[#40](https://github.com/pauldix/sax-machine/pull/40)]
|
59
|
+
* Provide mass assignment through initialize method [[#38](https://github.com/pauldix/sax-machine/pull/38)]
|
60
|
+
* Bump nokogiri (~> 1.6) and rspec, drop growl dependency
|
61
|
+
* Update 'with' option to allow pattern matching in addition to string matching
|
62
|
+
|
63
|
+
# 0.2.0.rc1
|
64
|
+
|
65
|
+
* Try to reduce the number of instances of respond_to? in the code by
|
66
|
+
pulling common uses of it out to methods. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
67
|
+
* The parse stack is now composed of simple objects instead of it being
|
68
|
+
an array of arrays. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
69
|
+
* Now using an identifier for an empty buffer instead of empty string. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
70
|
+
* Clean up several variables that were not being used. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
71
|
+
* Encapsulate stack so it's not being exposed as part of the API. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
72
|
+
* `cdata_block` is now an alias instead of delegating to characters. [[#32](https://github.com/pauldix/sax-machine/pull/32)]
|
73
|
+
|
74
|
+
# 0.1.0
|
75
|
+
|
76
|
+
* Rename parent to ancestor
|
77
|
+
* Add SAXMachine.configure
|
data/lib/saxophone.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require "saxophone/version"
|
2
|
+
require "saxophone/sax_document"
|
3
|
+
require "saxophone/sax_configure"
|
4
|
+
require "saxophone/sax_config"
|
5
|
+
|
6
|
+
module Saxophone
|
7
|
+
def self.handler
|
8
|
+
@@handler ||= nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.handler=(handler)
|
12
|
+
if handler
|
13
|
+
require "saxophone/handlers/sax_#{handler}_handler"
|
14
|
+
@@handler = handler
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Try handlers
|
20
|
+
[:ox, :oga].each do |handler|
|
21
|
+
begin
|
22
|
+
Saxophone.handler = handler
|
23
|
+
break
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Still no handler, use Nokogiri
|
29
|
+
if Saxophone.handler.nil?
|
30
|
+
Saxophone.handler = :nokogiri
|
31
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class AncestorConfig
|
4
|
+
attr_reader :name, :setter
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@as = options[:as]
|
9
|
+
@setter = "#{@as}="
|
10
|
+
end
|
11
|
+
|
12
|
+
def column
|
13
|
+
@as || @name.to_sym
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class AttributeConfig < ElementValueConfig
|
4
|
+
def value_from_attrs(attrs)
|
5
|
+
attrs.fetch(@name, nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
def attrs_match?(attrs)
|
9
|
+
attrs.key?(@name) || attrs.value?(@name)
|
10
|
+
end
|
11
|
+
alias_method :has_value_and_attrs_match?, :attrs_match?
|
12
|
+
|
13
|
+
def collection?
|
14
|
+
false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Saxophone
|
2
|
+
class SAXConfig
|
3
|
+
class CollectionConfig
|
4
|
+
attr_reader :name
|
5
|
+
|
6
|
+
def initialize(name, options)
|
7
|
+
@name = name.to_s
|
8
|
+
@class = options[:class]
|
9
|
+
@as = options[:as].to_s
|
10
|
+
@with = options.fetch(:with, {})
|
11
|
+
end
|
12
|
+
|
13
|
+
def accessor
|
14
|
+
as
|
15
|
+
end
|
16
|
+
|
17
|
+
def attrs_match?(attrs)
|
18
|
+
@with.all? do |key, value|
|
19
|
+
value === attrs[key.to_s]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def data_class
|
24
|
+
@class || @name
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
def as
|
29
|
+
@as
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|