saxerator 0.9.2 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/README.md +10 -0
- data/lib/saxerator.rb +3 -2
- data/lib/saxerator/builder/array_element.rb +4 -1
- data/lib/saxerator/builder/empty_element.rb +24 -0
- data/lib/saxerator/builder/hash_builder.rb +9 -3
- data/lib/saxerator/builder/hash_element.rb +10 -1
- data/lib/saxerator/builder/string_element.rb +10 -1
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/builder/hash_builder_spec.rb +47 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3275996c0843e22e1ef7148b6fc14c58aff415d
|
4
|
+
data.tar.gz: bd4ea2b9abb8c9ad8cea065c7d2c41fd472df4e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5891cd6b524b6880717f21c59552dd909f73ec3135439aaafc79e90dcf76a4a30d774c1b12ad14ebf7b395d7cff6a1c2e3a049bb958ab03d0b681d06092d1b15
|
7
|
+
data.tar.gz: 82f8257eeaedf4b4942979aa961dcd4574f7c4320fd821697fbea25bc1926064bad3f5fd5cbc176382443d81fb69a79d9c323fd1fc87263c713b6b35a44e8e34
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -119,6 +119,16 @@ Why not DOM parsing?
|
|
119
119
|
> DOM parsers load the entire document into memory. Saxerator only holds a single chunk in memory at a time. If your
|
120
120
|
> document is very large, this can be an important consideration.
|
121
121
|
|
122
|
+
When I fetch a tag that has one or more elements, sometimes I get an `Array`, and other times I get a `Hash` or `String`. Is there a way I can treat these consistently?
|
123
|
+
|
124
|
+
> You can treat objects consistently as arrays using
|
125
|
+
> [Ruby's built-in array conversion method](http://www.ruby-doc.org/core-2.1.1/Kernel.html#method-i-Array)
|
126
|
+
> in the form `Array(element_or_array)`
|
127
|
+
>
|
128
|
+
> Generally you should not need to convert a parsed element to a `String` or `Hash`. One case it
|
129
|
+
> occasionally comes up is for elements that are sometimes-empty. Empty elements behave mostly like an
|
130
|
+
> empty `Hash`, however you may convert it to a more `String`-like object via `#to_s`
|
131
|
+
|
122
132
|
### Acknowledgements ###
|
123
133
|
Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and [Gregory Brown](http://majesticseacreature.com/)'s
|
124
134
|
[Practicing Ruby](http://practicingruby.com/)
|
data/lib/saxerator.rb
CHANGED
@@ -8,9 +8,10 @@ require 'saxerator/document_fragment'
|
|
8
8
|
require 'saxerator/configuration'
|
9
9
|
|
10
10
|
require 'saxerator/builder'
|
11
|
-
require 'saxerator/builder/string_element'
|
12
|
-
require 'saxerator/builder/hash_element'
|
13
11
|
require 'saxerator/builder/array_element'
|
12
|
+
require 'saxerator/builder/empty_element'
|
13
|
+
require 'saxerator/builder/hash_element'
|
14
|
+
require 'saxerator/builder/string_element'
|
14
15
|
require 'saxerator/builder/hash_builder'
|
15
16
|
require 'saxerator/builder/xml_builder'
|
16
17
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'saxerator/builder/hash_element'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
module Builder
|
5
|
+
class EmptyElement < HashElement
|
6
|
+
def nil?; true end
|
7
|
+
def !; true end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
StringElement.new('', name, attributes)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_h
|
14
|
+
HashElement.new(name, attributes)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_a
|
18
|
+
array = ArrayElement.new
|
19
|
+
array.name = name
|
20
|
+
array
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -16,6 +16,10 @@ module Saxerator
|
|
16
16
|
@children << node
|
17
17
|
end
|
18
18
|
|
19
|
+
def to_empty_element
|
20
|
+
EmptyElement.new(@name, @attributes)
|
21
|
+
end
|
22
|
+
|
19
23
|
def to_s
|
20
24
|
StringElement.new(@children.join, @name, @attributes)
|
21
25
|
end
|
@@ -27,14 +31,14 @@ module Saxerator
|
|
27
31
|
name = child.name
|
28
32
|
element = child.block_variable
|
29
33
|
|
30
|
-
add_to_hash_element(
|
34
|
+
add_to_hash_element(hash, name, element)
|
31
35
|
end
|
32
36
|
|
33
37
|
if @config.put_attributes_in_hash?
|
34
38
|
|
35
39
|
@attributes.each do |attribute|
|
36
40
|
attribute.each_slice(2) do |name, element|
|
37
|
-
add_to_hash_element(
|
41
|
+
add_to_hash_element(hash, name, element)
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -56,7 +60,9 @@ module Saxerator
|
|
56
60
|
end
|
57
61
|
|
58
62
|
def block_variable
|
59
|
-
|
63
|
+
return to_s if @text
|
64
|
+
return to_hash if @children.count > 0
|
65
|
+
to_empty_element
|
60
66
|
end
|
61
67
|
end
|
62
68
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'saxerator/builder/array_element'
|
2
|
+
|
1
3
|
module Saxerator
|
2
4
|
module Builder
|
3
5
|
class HashElement < Hash
|
@@ -8,6 +10,13 @@ module Saxerator
|
|
8
10
|
self.name = name
|
9
11
|
self.attributes = attributes
|
10
12
|
end
|
13
|
+
|
14
|
+
def to_a
|
15
|
+
array = ArrayElement.new
|
16
|
+
array.name = name
|
17
|
+
array.concat super
|
18
|
+
array
|
19
|
+
end
|
11
20
|
end
|
12
21
|
end
|
13
|
-
end
|
22
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'saxerator/builder/array_element'
|
2
|
+
|
1
3
|
module Saxerator
|
2
4
|
module Builder
|
3
5
|
class StringElement < String
|
@@ -9,6 +11,13 @@ module Saxerator
|
|
9
11
|
self.attributes = attributes
|
10
12
|
super(str)
|
11
13
|
end
|
14
|
+
|
15
|
+
def to_a
|
16
|
+
array = ArrayElement.new
|
17
|
+
array << self
|
18
|
+
array.name = name
|
19
|
+
array
|
20
|
+
end
|
12
21
|
end
|
13
22
|
end
|
14
|
-
end
|
23
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
@@ -26,6 +26,24 @@ describe "Saxerator (default) hash format" do
|
|
26
26
|
# name on a string
|
27
27
|
specify { entry['title'].name.should == 'title' }
|
28
28
|
|
29
|
+
describe "#to_a" do
|
30
|
+
it "preserves the element name on a parsed hash" do
|
31
|
+
entry.to_a.name.should == 'entry'
|
32
|
+
end
|
33
|
+
|
34
|
+
it "converts parsed hashes to nested key/value pairs (just like regular hashes)" do
|
35
|
+
entry.to_a.first.should == ['id', '1']
|
36
|
+
end
|
37
|
+
|
38
|
+
it "preserves the element name on a parsed string" do
|
39
|
+
entry['title'].to_a.name.should == 'title'
|
40
|
+
end
|
41
|
+
|
42
|
+
it "preserves the element name on an array" do
|
43
|
+
entry['contributor'].to_a.name.should eq 'contributor'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
29
47
|
# name on an array
|
30
48
|
specify { entry['contributor'].name.should == 'contributor' }
|
31
49
|
|
@@ -33,5 +51,32 @@ describe "Saxerator (default) hash format" do
|
|
33
51
|
specify { entry['content'].should == "<p>Airplanes are very large — this can present difficulty in digestion.</p>" }
|
34
52
|
|
35
53
|
# empty element
|
36
|
-
|
37
|
-
|
54
|
+
context "parsing an empty element" do
|
55
|
+
subject(:element) { entry['media:thumbnail'] }
|
56
|
+
|
57
|
+
it "behaves somewhat like nil" do
|
58
|
+
element.should be_nil
|
59
|
+
(!element).should eq true
|
60
|
+
element.to_s.should eq ''
|
61
|
+
element.to_h.should eq Hash.new
|
62
|
+
end
|
63
|
+
|
64
|
+
it { should be_empty }
|
65
|
+
|
66
|
+
it "has attributes" do
|
67
|
+
element.attributes.keys.should eq ['url']
|
68
|
+
end
|
69
|
+
|
70
|
+
[:to_s, :to_h, :to_a].each do |conversion|
|
71
|
+
it "preserves the element name through ##{conversion}" do
|
72
|
+
element.send(conversion).name.should eq 'media:thumbnail'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[:to_s, :to_h].each do |conversion|
|
77
|
+
it "preserves attributes through ##{conversion}" do
|
78
|
+
element.send(conversion).attributes.keys.should eq ['url']
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bradley Schaefer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -58,6 +58,7 @@ files:
|
|
58
58
|
- lib/saxerator.rb
|
59
59
|
- lib/saxerator/builder.rb
|
60
60
|
- lib/saxerator/builder/array_element.rb
|
61
|
+
- lib/saxerator/builder/empty_element.rb
|
61
62
|
- lib/saxerator/builder/hash_builder.rb
|
62
63
|
- lib/saxerator/builder/hash_element.rb
|
63
64
|
- lib/saxerator/builder/string_element.rb
|