sergio 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.gem
2
+ *.swp
3
+ *.swo
data/Gemfile.lock CHANGED
@@ -4,7 +4,6 @@ GEM
4
4
  diff-lcs (1.1.2)
5
5
  nokogiri (1.4.4)
6
6
  rake (0.9.0)
7
- rcov (0.9.9)
8
7
  rspec (2.6.0)
9
8
  rspec-core (~> 2.6.0)
10
9
  rspec-expectations (~> 2.6.0)
@@ -20,5 +19,4 @@ PLATFORMS
20
19
  DEPENDENCIES
21
20
  nokogiri
22
21
  rake
23
- rcov
24
22
  rspec
@@ -17,8 +17,12 @@ module Sergio
17
17
  k = k.is_a?(Array) ? k[0] : k
18
18
  v = hash[k]
19
19
  if v
20
- if v.is_a?(Hash) && path.length > 0
21
- value_at_path(path, v)
20
+ if path.length > 0
21
+ if v.is_a?(Hash)
22
+ value_at_path(path, v)
23
+ elsif v.last.is_a?(Hash)
24
+ value_at_path(path, v.last)
25
+ end
22
26
  else
23
27
  v
24
28
  end
@@ -54,5 +58,37 @@ module Sergio
54
58
  end
55
59
  v
56
60
  end
61
+
62
+ def hash_recursive_append(lval, rval)
63
+ r = {}
64
+ v = lval.merge(rval) do |key, oldval, newval|
65
+ r[key] = if oldval.is_a?(Hash) && newval.is_a?(Hash)
66
+ if newval.size == 0
67
+ [oldval, newval]
68
+ else
69
+ hash_recursive_append(oldval, newval)
70
+ end
71
+ else
72
+ if oldval.is_a?(Array)
73
+ if oldval.last.is_a?(Hash) && newval.is_a?(Hash) && newval.size > 0
74
+ oldval << hash_recursive_append(oldval.pop, newval)
75
+ else
76
+ oldval << newval
77
+ end
78
+ elsif newval.is_a?(Array)
79
+ newval << oldval
80
+ else
81
+ [oldval] << newval
82
+ end
83
+ end
84
+ end
85
+ v
86
+ end
87
+ end
88
+
89
+ def remove_empty_hashes(hash)
90
+ hash.delete_if do |k,v|
91
+ true
92
+ end
57
93
  end
58
94
  end
@@ -23,6 +23,7 @@ module Sergio
23
23
  newname = name unless newname
24
24
  name = [name] unless name.is_a?(Array)
25
25
  newname = [newname] unless newname.is_a?(Array)
26
+ aggregate_element = false
26
27
 
27
28
  name.each do |n|
28
29
  @current_path << n
@@ -42,11 +43,12 @@ module Sergio
42
43
  if blk.arity < 1
43
44
  blk.call
44
45
  callback = lambda {|v|{}}
46
+ aggregate_element = true
45
47
  else
46
48
  callback = blk
47
49
  end
48
50
 
49
- elem = SergioElement.new(new_path, args, callback)
51
+ elem = Sergio::Element.new(new_path, args, callback, aggregate_element)
50
52
 
51
53
  @parsing_elements = hash_recursive_merge_to_arrays(@parsing_elements, hash_from_path(current_path, {:sergio_elem => elem}))
52
54
  current_path.pop(name.length)
@@ -60,7 +62,7 @@ module Sergio
60
62
  if v
61
63
  v = v[:sergio_elem]
62
64
  if v
63
- v = [v] if v.is_a?(SergioElement)
65
+ v = [v] if v.is_a?(Sergio::Element)
64
66
  vs = v.select do |v|
65
67
  if v.options[:having]
66
68
  match = v.options[:having].any? do |attr,value|
@@ -1,8 +1,11 @@
1
- class SergioElement
2
- attr_accessor :new_path, :callback, :options
3
- def initialize(new_path, args, callback)
4
- @new_path = new_path.clone
5
- @callback = callback
6
- @options = args
1
+ module Sergio
2
+ class Element
3
+ attr_accessor :new_path, :callback, :options, :aggregate_element
4
+ def initialize(new_path, args, callback, aggregate_element)
5
+ @new_path = new_path.clone
6
+ @callback = callback
7
+ @options = args
8
+ @aggregate_element = aggregate_element
9
+ end
7
10
  end
8
11
  end
@@ -8,24 +8,14 @@ module Sergio
8
8
  end
9
9
 
10
10
  def set_element(path, val, options = {})
11
- v = value_at_path(path.clone, self.parsed_hash)
11
+ old_val = value_at_path(path.clone, self.parsed_hash)
12
12
 
13
- val = if v
14
- if v.is_a? Array
15
- v << val
16
- else
17
- if val.is_a?(Hash) && val.empty?
18
- options[:as_array] ? [v] : v
19
- else
20
- [v] << val
21
- end
22
- end
23
- else
24
- options[:as_array] ? [val] : val
13
+ if options[:as_array] && !val.is_a?(Array) && !old_val.is_a?(Array)
14
+ val = [val]
25
15
  end
26
16
 
27
17
  h = hash_from_path(path, val)
28
- @parsed_hash = hash_recursive_merge(self.parsed_hash, h)
18
+ @parsed_hash = hash_recursive_append(self.parsed_hash, h)
29
19
  @parsed_hash
30
20
  end
31
21
  end
@@ -2,10 +2,21 @@ class SergioSax < Nokogiri::XML::SAX::Document
2
2
  def initialize(object)
3
3
  @stack = []
4
4
  @object = object
5
+ @current_configs = []
6
+ @parent_callbacks = []
5
7
  end
6
8
 
7
9
  def start_element(name, attrs = [])
8
10
  @stack << [name, attrs]
11
+ if current_configs = @object.class.sergio_config.get_element_configs(@stack.clone)
12
+ current_configs.each do |c|
13
+ if c.aggregate_element
14
+ @parent_callbacks << lambda do
15
+ @object.sergio_parsed_document.set_element(c.new_path, {}, c.options)
16
+ end
17
+ end
18
+ end
19
+ end
9
20
  end
10
21
 
11
22
  def characters(string)
@@ -18,28 +29,36 @@ class SergioSax < Nokogiri::XML::SAX::Document
18
29
  end
19
30
 
20
31
  def end_element(name)
21
- e_context = @stack.clone
32
+ current_configs = @object.class.sergio_config.get_element_configs(@stack.clone)
22
33
  name, attrs = @stack.pop
23
- if sergio_elements = @object.class.sergio_config.get_element_configs(e_context)
24
- sergio_elements.each do |sergio_element|
25
- attr = sergio_element.options[:attribute]
34
+ if current_configs
35
+ current_configs.each do |c|
36
+ attr = c.options[:attribute]
26
37
  val = attrs.assoc(attr)
27
- if val
28
- val = val[1]
29
- hash_path = sergio_element.new_path
30
- callback = sergio_element.callback
38
+ callback = c.callback
31
39
 
40
+ if val && !c.aggregate_element
41
+ val = val[1]
32
42
  r = if callback.arity == 1
33
43
  callback.call(val)
34
44
  elsif callback.arity == 2
35
45
  h = Hash[*attrs.flatten]
36
46
  h.delete('@text')
37
- callback.call(val, Hash[*attrs.flatten])
47
+ callback.call(val, h)
48
+ end
49
+
50
+ #only builds parent elements if at least one of their child elements has a match
51
+ @parent_callbacks.each do |c|
52
+ c.call
38
53
  end
54
+ @parent_callbacks = []
39
55
 
40
- @object.sergio_parsed_document.set_element(hash_path, r, sergio_element.options)
56
+ #build an array of hashes if return value from callback is a hash
57
+ @object.sergio_parsed_document.set_element(c.new_path, {}, c.options) if r.is_a?(Hash)
58
+ @object.sergio_parsed_document.set_element(c.new_path, r, c.options)
41
59
  end
42
60
  end
43
61
  end
62
+ @parent_callbacks = []
44
63
  end
45
64
  end
@@ -0,0 +1,3 @@
1
+ module Sergio
2
+ VERSION = "0.0.2"
3
+ end
data/readme.markdown ADDED
@@ -0,0 +1,91 @@
1
+ <a href='http://www.youtube.com/watch?v=GaoLU6zKaws'><img src="http://i.imgur.com/HThQt.jpg" alt="" title="Hosted by imgur.com" /></a>
2
+ ### Sergio is a SAX parser with a handy dsl for transforming xml into hashes
3
+
4
+ ##Usage
5
+ require 'sergio'
6
+
7
+ class MyXmlMunger
8
+ include Sergio
9
+
10
+ #the hash key will be renamed to the second argument passed to element if one is provided
11
+ element 'body', 'bro' do
12
+
13
+ #if a second argument isn't provided it will just use the original name of the element
14
+ element 'id'
15
+
16
+ #the :attribute option specifies what attribute to draw the value from for the resulting hash
17
+ element 'a', 'link', :attribute => 'href'
18
+
19
+ #You can pass a block to #element with a value argument and the hash value will be set to the result of the block
20
+ element 'p', 'content' do |v|
21
+ v.reverse
22
+ end
23
+
24
+ #You can pass :having to #element to specify attributes required to match against
25
+ element 'div', 'cars', :having => {'class' => 'car'} do
26
+
27
+ #You can pass value and attributes arguments into the block you pass to element
28
+ element 'p', 'description' do |value, attributes|
29
+ "#{value} #{attributes['attribute']}"
30
+ end
31
+ end
32
+
33
+ #You can pass arbitrary nestings to match against and merge to
34
+ element ['some', 'nesting'], ['some', 'other', 'nesting']
35
+
36
+ #Duplicate elements in the same scope are automatically made into an array:
37
+ element 'some', 'thing'
38
+
39
+ #parses
40
+ #<body><some>hey</some><some>hi</some></body>
41
+ #as
42
+ #{'document' => {'thing' => ['hey, 'hi']}}
43
+ #and
44
+ #<body><some>hi</some></body>
45
+ #as
46
+ #{'document' => {'thing' => 'hi'}}
47
+
48
+ #You can force arrays for even a single matching element within a given scope using the :as_array option
49
+ element 'some', 'thing', :as_array => true
50
+
51
+ #parses
52
+ #<body><some>hi</some></body>
53
+ #as
54
+ #{'document' => {'thing' => ['hi']}}
55
+ #and
56
+ #<body><some>hey</some><some>hi</some></body>
57
+ #as
58
+ #{'document' => {'thing' => ['hey', 'hi']}}
59
+ end
60
+ end
61
+
62
+ ###To parse a document into a hash, call parse on an instance of your parsing class with a document string as an argument
63
+ MyXmlMunger.new.parse("<body><id>1</id><a href='dude'>something</a></body>")
64
+ #returns {'bro' => {'id' => '1', 'link' => 'dude'}}
65
+
66
+ ##LICENSE
67
+
68
+ (The MIT License)
69
+
70
+ Copyright © 2011:
71
+
72
+ Max Justus Spransy
73
+
74
+ Permission is hereby granted, free of charge, to any person obtaining
75
+ a copy of this software and associated documentation files (the
76
+ ‘Software’), to deal in the Software without restriction, including
77
+ without limitation the rights to use, copy, modify, merge, publish,
78
+ distribute, sublicense, and/or sell copies of the Software, and to
79
+ permit persons to whom the Software is furnished to do so, subject to
80
+ the following conditions:
81
+
82
+ The above copyright notice and this permission notice shall be
83
+ included in all copies or substantial portions of the Software.
84
+
85
+ THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND,
86
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
87
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
88
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
89
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
90
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
91
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/sergio.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "sergio/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "sergio"
7
+ s.version = Sergio::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Max Justus Spransy"]
10
+ s.email = ["maxjustus@gmail.com"]
11
+ s.homepage = "https://github.com/maxjustus/Sergio"
12
+ s.summary = %q{SAXy xml to hash transformation.}
13
+ s.description = %q{
14
+ Sergio provides a declarative syntax for parsing unruly xml into nice pretty hashes.
15
+ }
16
+
17
+ s.rubyforge_project = "sergio"
18
+
19
+ s.add_dependency 'nokogiri'
20
+ s.add_development_dependency 'rspec', '~> 2.5.0'
21
+ s.add_development_dependency 'rake'
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
@@ -13,6 +13,12 @@ describe Sergio::HashMethods do
13
13
  v.should == '2'
14
14
  end
15
15
 
16
+ it 'gets the value from last hash in array of hashes in passed in hash at point in heirarchy specified by passed in array' do
17
+ v = @s.value_at_path([['thing'], ['stuff']], {'thing' => [{'stuff' => '1'}, {'stuff' => '2'}]})
18
+ v = @s.value_at_path([['thing']], {'thing' => [{'stuff' => '1'}, {'stuff' => '2'}]})
19
+ v.should == [{'stuff' => '1'}, {'stuff' => '2'}]
20
+ end
21
+
16
22
  it 'gets the value from passed in hash at point in heirarchy specified by passed in array if array is one element long' do
17
23
  v = @s.value_at_path([['thing']], {'thing' => {'stuff' => '2'}})
18
24
  v.should == {'stuff' => '2'}
@@ -31,7 +37,6 @@ describe Sergio::HashMethods do
31
37
  end.new
32
38
  end
33
39
 
34
-
35
40
  it 'merges subhashes together' do
36
41
  h1 = {:thing => {'guy' => 'cool'}}
37
42
  h2 = {:thing => {'guys' => 'cool'}}
@@ -75,5 +80,76 @@ describe Sergio::HashMethods do
75
80
  h2 = {:thing => {'guy' => 'cools'}}
76
81
  @s.hash_recursive_merge(h1, h2).should == {:thing => {'guy' => 'cools'}}
77
82
  end
83
+
84
+ it 'merges new key values into existing hash if one is present' do
85
+ h1 = {:thing => {'guy' => 'cool'}}
86
+ h2 = {:thing => {'neat' => 'cools'}}
87
+ @s.hash_recursive_merge(h1, h2).should == {:thing => {'guy' => 'cool', 'neat' => 'cools'}}
88
+ end
89
+ end
90
+
91
+ context 'hash_recursive_append' do
92
+ before do
93
+ @s = Class.new do
94
+ include Sergio::HashMethods
95
+ end.new
96
+ end
97
+
98
+ it 'merges subhashes together' do
99
+ h1 = {:thing => {'guy' => 'cool'}}
100
+ h2 = {:thing => {'guys' => 'cool'}}
101
+ @s.hash_recursive_append(h1, h2).should == {:thing => {'guy' => 'cool', 'guys' => 'cool'}}
102
+ end
103
+
104
+ it 'aggregates intersecting key values into an array' do
105
+ h1 = {:thing => {'guy' => 'cool'}}
106
+ h2 = {:thing => {'guy' => 'cools'}}
107
+ @s.hash_recursive_append(h1, h2).should == {:thing => {'guy' => ['cool', 'cools']}}
108
+ end
109
+
110
+ it 'aggregates intersecting key values into an array of values where values are not hashes' do
111
+ h1 = {:thing => {'guy' => 'cool'}}
112
+ h2 = {:thing => {'guy' => 'cools'}}
113
+ @s.hash_recursive_append(h1, h2).should == {:thing => {'guy' => ['cool', 'cools']}}
114
+ h1 = {:thing => {'guy' => {'cool' => '1'}}}
115
+ h2 = {:thing => {'guy' => {'cool' => '3'}}}
116
+ @s.hash_recursive_append(h1, h2).should == {:thing => {'guy' => {'cool' => ['1', '3']}}}
117
+ end
118
+
119
+ it 'merges new key values into existing hash if one is present' do
120
+ h1 = {:thing => {'guy' => 'cool'}}
121
+ h2 = {:thing => {'neat' => 'cools'}}
122
+ @s.hash_recursive_append(h1, h2).should == {:thing => {'guy' => 'cool', 'neat' => 'cools'}}
123
+ end
124
+
125
+ it 'creates an array of hashes at given value if new hash value is an empty hash' do
126
+ h1 = {:thing => {'guy' => 'cool'}}
127
+ h2 = {:thing => {}}
128
+ @s.hash_recursive_append(h1, h2).should == {:thing => [{'guy' => 'cool'}, {}]}
129
+ end
130
+
131
+ it 'merges key values into last hash in existing array of hashes if one is present' do
132
+ h1 = {:thing => {'guy' => 'cool'}}
133
+ h2 = {:thing => {'neat' => 'cools'}}
134
+ h1 = @s.hash_recursive_append(h1, h2)
135
+ h2 = {:thing => {}}
136
+ h1 = @s.hash_recursive_append(h1, h2)
137
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {}]}
138
+ h2 = {:thing => {'guy' => 'cools'}}
139
+ h1 = @s.hash_recursive_append(h1, h2)
140
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {'guy' => 'cools'}]}
141
+ h2 = {:thing => {'neat' => 'cools'}}
142
+ h1 = @s.hash_recursive_append(h1, h2)
143
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {'guy' => 'cools', 'neat' => 'cools'}]}
144
+ h2 = {:thing => {'neat' => 'cools'}}
145
+ h1 = @s.hash_recursive_append(h1, h2)
146
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {'guy' => 'cools', 'neat' => ['cools', 'cools']}]}
147
+ h2 = {:thing => {}}
148
+ h1 = @s.hash_recursive_append(h1, h2)
149
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {'guy' => 'cools', 'neat' => ['cools', 'cools']}, {}]}
150
+ h2 = {:thing => {'hi' => 'hey'}}
151
+ h1 = @s.hash_recursive_append(h1, h2)
152
+ h1.should == {:thing => [{'guy' => 'cool', 'neat' => 'cools'}, {'guy' => 'cools', 'neat' => ['cools', 'cools']}, {'hi' => 'hey'}]}
153
+ end
78
154
  end
79
155
  end
@@ -15,27 +15,65 @@ describe Sergio do
15
15
  it 'parses duplicate elements into an array' do
16
16
  s = new_sergio do
17
17
  element 'parent' do
18
- element 'id'
18
+ element 'post', 'posts' do
19
+ element 'id', :as_array => true
20
+ end
19
21
  end
20
22
  end
21
23
 
22
- @xml = "<parent><id>1</id><id>2</id></parent>"
24
+ @xml = "<parent><post><id>1</id><id>2</id></post><post><id>3</id></post><post></post></parent>"
23
25
  @hash = s.new.parse(@xml)
24
- @hash['parent']['id'].should == ['1', '2']
26
+ @hash['parent']['posts'][0]['id'].should == ['1', '2']
27
+ @hash['parent']['posts'][1]['id'].should == ['3']
28
+ end
29
+
30
+ it 'ignores empty adjacent elements' do
31
+ s = new_sergio do
32
+ element 'a' do
33
+ element 'b' do
34
+ element 'f'
35
+ end
36
+ element 'c', 'b' do
37
+ element 'e'
38
+ end
39
+ end
40
+ end
41
+ @xml = '<a>
42
+ <b></b>
43
+ <b><f>a</f></b>
44
+ <b>he</b>
45
+ <c>a</c>
46
+ <c>
47
+ <e>e</e>
48
+ </c>
49
+ </a>'
50
+ h = s.new.parse(@xml)
51
+ h.should == {'a' => {'b' => [{'f' => 'a'}, {'e' => 'e'}]}}
25
52
  end
26
53
 
27
54
  it 'parses duplicate elements whose callbacks return a hash into an array' do
28
55
  s = new_sergio do
29
56
  element 'parent' do
30
- element 'id' do |v|
31
- {'v' => v}
57
+ element 'p' do
58
+ element 'id', do |v, attributes|
59
+ {'v' => v}
60
+ end
32
61
  end
33
62
  end
34
63
  end
35
64
 
36
- @xml = "<parent><id>1</id><id>2</id></parent>"
65
+ @xml = "<parent>
66
+ <p>
67
+ <id cool='neat'>1</id>
68
+ <id>2</id>
69
+ </p>
70
+ <p>
71
+ <id>5</id>
72
+ </p>
73
+ </parent>"
37
74
  @hash = s.new.parse(@xml)
38
- @hash['parent']['id'].should == [{'v' => '1',}, {'v' => '2'}]
75
+ @hash['parent']['p'][0]['id'].should == [{'v' => '1'}, {'v' => '2'}]
76
+ @hash['parent']['p'][1]['id'].should == {'v' => '5'}
39
77
  end
40
78
 
41
79
  it 'parses a nested element' do
@@ -105,7 +143,7 @@ describe Sergio do
105
143
 
106
144
  @xml = "<parent><id>1</id><id>2</id></parent>"
107
145
  @hash = s.new.parse(@xml)
108
- @hash['post'].should == {'di' => ['1', '2']}
146
+ @hash['post'].keys.should == ['di']
109
147
  end
110
148
 
111
149
  it 'matches against attributes using :having argument' do
@@ -244,5 +282,13 @@ describe Sergio do
244
282
  v = @s.new.sergio_parsed_document.set_element(['thing', 'stuff'], '1')
245
283
  v.should == {'thing' => {'stuff' => '1'}}
246
284
  end
285
+
286
+ it 'aggregates adjacent parents into arrays' do
287
+ s = @s.new
288
+ s.sergio_parsed_document.set_element(['thing', 'stuff'], '1')
289
+ s.sergio_parsed_document.set_element(['thing'], {})
290
+ v = s.sergio_parsed_document.set_element(['thing', 'stuff'], '2')
291
+ v.should == {'thing' => [{'stuff' => '1'}, {'stuff' => '2'}]}
292
+ end
247
293
  end
248
294
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: sergio
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.1
5
+ version: 0.0.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - Max Justus Spransy
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-25 00:00:00 -05:00
13
+ date: 2011-05-26 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -56,6 +56,7 @@ extensions: []
56
56
  extra_rdoc_files: []
57
57
 
58
58
  files:
59
+ - .gitignore
59
60
  - Gemfile
60
61
  - Gemfile.lock
61
62
  - Rakefile
@@ -65,6 +66,9 @@ files:
65
66
  - lib/sergio/sergio_element.rb
66
67
  - lib/sergio/sergio_parsed_document.rb
67
68
  - lib/sergio/sergio_sax.rb
69
+ - lib/sergio/version.rb
70
+ - readme.markdown
71
+ - sergio.gemspec
68
72
  - spec/spec_helper.rb
69
73
  - spec/support/buzz_activity_stream.xml
70
74
  - spec/support/facebook_activity_stream.xml