saxerator 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/README.md +10 -0
- data/lib/saxerator.rb +3 -2
- data/lib/saxerator/builder/array_element.rb +4 -1
- data/lib/saxerator/builder/empty_element.rb +24 -0
- data/lib/saxerator/builder/hash_builder.rb +9 -3
- data/lib/saxerator/builder/hash_element.rb +10 -1
- data/lib/saxerator/builder/string_element.rb +10 -1
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/builder/hash_builder_spec.rb +47 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3275996c0843e22e1ef7148b6fc14c58aff415d
|
4
|
+
data.tar.gz: bd4ea2b9abb8c9ad8cea065c7d2c41fd472df4e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5891cd6b524b6880717f21c59552dd909f73ec3135439aaafc79e90dcf76a4a30d774c1b12ad14ebf7b395d7cff6a1c2e3a049bb958ab03d0b681d06092d1b15
|
7
|
+
data.tar.gz: 82f8257eeaedf4b4942979aa961dcd4574f7c4320fd821697fbea25bc1926064bad3f5fd5cbc176382443d81fb69a79d9c323fd1fc87263c713b6b35a44e8e34
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -119,6 +119,16 @@ Why not DOM parsing?
|
|
119
119
|
> DOM parsers load the entire document into memory. Saxerator only holds a single chunk in memory at a time. If your
|
120
120
|
> document is very large, this can be an important consideration.
|
121
121
|
|
122
|
+
When I fetch a tag that has one or more elements, sometimes I get an `Array`, and other times I get a `Hash` or `String`. Is there a way I can treat these consistently?
|
123
|
+
|
124
|
+
> You can treat objects consistently as arrays using
|
125
|
+
> [Ruby's built-in array conversion method](http://www.ruby-doc.org/core-2.1.1/Kernel.html#method-i-Array)
|
126
|
+
> in the form `Array(element_or_array)`
|
127
|
+
>
|
128
|
+
> Generally you should not need to convert a parsed element to a `String` or `Hash`. One case it
|
129
|
+
> occasionally comes up is for elements that are sometimes-empty. Empty elements behave mostly like an
|
130
|
+
> empty `Hash`, however you may convert it to a more `String`-like object via `#to_s`
|
131
|
+
|
122
132
|
### Acknowledgements ###
|
123
133
|
Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and [Gregory Brown](http://majesticseacreature.com/)'s
|
124
134
|
[Practicing Ruby](http://practicingruby.com/)
|
data/lib/saxerator.rb
CHANGED
@@ -8,9 +8,10 @@ require 'saxerator/document_fragment'
|
|
8
8
|
require 'saxerator/configuration'
|
9
9
|
|
10
10
|
require 'saxerator/builder'
|
11
|
-
require 'saxerator/builder/string_element'
|
12
|
-
require 'saxerator/builder/hash_element'
|
13
11
|
require 'saxerator/builder/array_element'
|
12
|
+
require 'saxerator/builder/empty_element'
|
13
|
+
require 'saxerator/builder/hash_element'
|
14
|
+
require 'saxerator/builder/string_element'
|
14
15
|
require 'saxerator/builder/hash_builder'
|
15
16
|
require 'saxerator/builder/xml_builder'
|
16
17
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'saxerator/builder/hash_element'
|
2
|
+
|
3
|
+
module Saxerator
|
4
|
+
module Builder
|
5
|
+
class EmptyElement < HashElement
|
6
|
+
def nil?; true end
|
7
|
+
def !; true end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
StringElement.new('', name, attributes)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_h
|
14
|
+
HashElement.new(name, attributes)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_a
|
18
|
+
array = ArrayElement.new
|
19
|
+
array.name = name
|
20
|
+
array
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -16,6 +16,10 @@ module Saxerator
|
|
16
16
|
@children << node
|
17
17
|
end
|
18
18
|
|
19
|
+
def to_empty_element
|
20
|
+
EmptyElement.new(@name, @attributes)
|
21
|
+
end
|
22
|
+
|
19
23
|
def to_s
|
20
24
|
StringElement.new(@children.join, @name, @attributes)
|
21
25
|
end
|
@@ -27,14 +31,14 @@ module Saxerator
|
|
27
31
|
name = child.name
|
28
32
|
element = child.block_variable
|
29
33
|
|
30
|
-
add_to_hash_element(
|
34
|
+
add_to_hash_element(hash, name, element)
|
31
35
|
end
|
32
36
|
|
33
37
|
if @config.put_attributes_in_hash?
|
34
38
|
|
35
39
|
@attributes.each do |attribute|
|
36
40
|
attribute.each_slice(2) do |name, element|
|
37
|
-
add_to_hash_element(
|
41
|
+
add_to_hash_element(hash, name, element)
|
38
42
|
end
|
39
43
|
end
|
40
44
|
end
|
@@ -56,7 +60,9 @@ module Saxerator
|
|
56
60
|
end
|
57
61
|
|
58
62
|
def block_variable
|
59
|
-
|
63
|
+
return to_s if @text
|
64
|
+
return to_hash if @children.count > 0
|
65
|
+
to_empty_element
|
60
66
|
end
|
61
67
|
end
|
62
68
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'saxerator/builder/array_element'
|
2
|
+
|
1
3
|
module Saxerator
|
2
4
|
module Builder
|
3
5
|
class HashElement < Hash
|
@@ -8,6 +10,13 @@ module Saxerator
|
|
8
10
|
self.name = name
|
9
11
|
self.attributes = attributes
|
10
12
|
end
|
13
|
+
|
14
|
+
def to_a
|
15
|
+
array = ArrayElement.new
|
16
|
+
array.name = name
|
17
|
+
array.concat super
|
18
|
+
array
|
19
|
+
end
|
11
20
|
end
|
12
21
|
end
|
13
|
-
end
|
22
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'saxerator/builder/array_element'
|
2
|
+
|
1
3
|
module Saxerator
|
2
4
|
module Builder
|
3
5
|
class StringElement < String
|
@@ -9,6 +11,13 @@ module Saxerator
|
|
9
11
|
self.attributes = attributes
|
10
12
|
super(str)
|
11
13
|
end
|
14
|
+
|
15
|
+
def to_a
|
16
|
+
array = ArrayElement.new
|
17
|
+
array << self
|
18
|
+
array.name = name
|
19
|
+
array
|
20
|
+
end
|
12
21
|
end
|
13
22
|
end
|
14
|
-
end
|
23
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
@@ -26,6 +26,24 @@ describe "Saxerator (default) hash format" do
|
|
26
26
|
# name on a string
|
27
27
|
specify { entry['title'].name.should == 'title' }
|
28
28
|
|
29
|
+
describe "#to_a" do
|
30
|
+
it "preserves the element name on a parsed hash" do
|
31
|
+
entry.to_a.name.should == 'entry'
|
32
|
+
end
|
33
|
+
|
34
|
+
it "converts parsed hashes to nested key/value pairs (just like regular hashes)" do
|
35
|
+
entry.to_a.first.should == ['id', '1']
|
36
|
+
end
|
37
|
+
|
38
|
+
it "preserves the element name on a parsed string" do
|
39
|
+
entry['title'].to_a.name.should == 'title'
|
40
|
+
end
|
41
|
+
|
42
|
+
it "preserves the element name on an array" do
|
43
|
+
entry['contributor'].to_a.name.should eq 'contributor'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
29
47
|
# name on an array
|
30
48
|
specify { entry['contributor'].name.should == 'contributor' }
|
31
49
|
|
@@ -33,5 +51,32 @@ describe "Saxerator (default) hash format" do
|
|
33
51
|
specify { entry['content'].should == "<p>Airplanes are very large — this can present difficulty in digestion.</p>" }
|
34
52
|
|
35
53
|
# empty element
|
36
|
-
|
37
|
-
|
54
|
+
context "parsing an empty element" do
|
55
|
+
subject(:element) { entry['media:thumbnail'] }
|
56
|
+
|
57
|
+
it "behaves somewhat like nil" do
|
58
|
+
element.should be_nil
|
59
|
+
(!element).should eq true
|
60
|
+
element.to_s.should eq ''
|
61
|
+
element.to_h.should eq Hash.new
|
62
|
+
end
|
63
|
+
|
64
|
+
it { should be_empty }
|
65
|
+
|
66
|
+
it "has attributes" do
|
67
|
+
element.attributes.keys.should eq ['url']
|
68
|
+
end
|
69
|
+
|
70
|
+
[:to_s, :to_h, :to_a].each do |conversion|
|
71
|
+
it "preserves the element name through ##{conversion}" do
|
72
|
+
element.send(conversion).name.should eq 'media:thumbnail'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[:to_s, :to_h].each do |conversion|
|
77
|
+
it "preserves attributes through ##{conversion}" do
|
78
|
+
element.send(conversion).attributes.keys.should eq ['url']
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bradley Schaefer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -58,6 +58,7 @@ files:
|
|
58
58
|
- lib/saxerator.rb
|
59
59
|
- lib/saxerator/builder.rb
|
60
60
|
- lib/saxerator/builder/array_element.rb
|
61
|
+
- lib/saxerator/builder/empty_element.rb
|
61
62
|
- lib/saxerator/builder/hash_builder.rb
|
62
63
|
- lib/saxerator/builder/hash_element.rb
|
63
64
|
- lib/saxerator/builder/string_element.rb
|