summaryse 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +34 -0
- data/LICENCE.md +22 -0
- data/Manifest.txt +14 -0
- data/README.md +204 -0
- data/Rakefile +23 -0
- data/examples/yaml_merge.rb +33 -0
- data/lib/summaryse.rb +3 -0
- data/lib/summaryse/core_ext/array.rb +99 -0
- data/lib/summaryse/loader.rb +0 -0
- data/lib/summaryse/version.rb +14 -0
- data/spec/readme_spec.rb +112 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/summaryse_spec.rb +111 -0
- data/summaryse.gemspec +190 -0
- data/summaryse.noespec +28 -0
- data/tasks/debug_mail.rake +78 -0
- data/tasks/debug_mail.txt +13 -0
- data/tasks/gem.rake +68 -0
- data/tasks/spec_test.rake +79 -0
- data/tasks/unit_test.rake +77 -0
- data/tasks/yard.rake +51 -0
- metadata +149 -0
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
summaryse (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
bluecloth (2.0.11)
|
10
|
+
diff-lcs (1.1.2)
|
11
|
+
rake (0.9.2)
|
12
|
+
rspec (2.4.0)
|
13
|
+
rspec-core (~> 2.4.0)
|
14
|
+
rspec-expectations (~> 2.4.0)
|
15
|
+
rspec-mocks (~> 2.4.0)
|
16
|
+
rspec-core (2.4.0)
|
17
|
+
rspec-expectations (2.4.0)
|
18
|
+
diff-lcs (~> 1.1.2)
|
19
|
+
rspec-mocks (2.4.0)
|
20
|
+
wlang (0.10.2)
|
21
|
+
yard (0.7.2)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
java
|
25
|
+
ruby
|
26
|
+
|
27
|
+
DEPENDENCIES
|
28
|
+
bluecloth (~> 2.0.9)
|
29
|
+
bundler (~> 1.0)
|
30
|
+
rake (~> 0.9.2)
|
31
|
+
rspec (~> 2.4.0)
|
32
|
+
summaryse!
|
33
|
+
wlang (~> 0.10.1)
|
34
|
+
yard (~> 0.7.2)
|
data/LICENCE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# The MIT Licence
|
2
|
+
|
3
|
+
Copyright (c) 2011 - Bernard Lambeau
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
# Array#summaryse
|
2
|
+
|
3
|
+
[sudo] gem install summaryse
|
4
|
+
|
5
|
+
## Links
|
6
|
+
|
7
|
+
* {http://rubydoc.info/github/blambeau/summaryse/master/frames} (read this file there!)
|
8
|
+
* {http://github.com/blambeau/summaryse} (source code)
|
9
|
+
|
10
|
+
## Summaryse's summary
|
11
|
+
|
12
|
+
Summaryse provides a core extension, namely Array#summaryse. Oh, sorry, I must
|
13
|
+
add: "OMG, a core extension :-/ If you are aware of any compatibility issue,
|
14
|
+
let me know!".
|
15
|
+
|
16
|
+
So, what is Array#summaryse? Roughly, a way to computate aggregations. This goes
|
17
|
+
from simple aggregations on simple values (summing integers), to complex aggregations
|
18
|
+
on complex values (merging arrays of hashes that contain hashes and array of
|
19
|
+
hashes that...). Below is a typical use case showing how Array#summaryse can be
|
20
|
+
used to merge YAML files. Simpler examples are given a bit later.
|
21
|
+
|
22
|
+
## An opinionated use-case -- YAML merging
|
23
|
+
|
24
|
+
In many projects of mine including
|
25
|
+
{https://github.com/blambeau/noe noe},
|
26
|
+
{https://github.com/blambeau/agora agora} or
|
27
|
+
{https://github.com/blambeau/dbagile dbagile}, a common need is merge YAML files.
|
28
|
+
Merging YAML files is complex because you need full control of how merging applies
|
29
|
+
on specific tree nodes. Summaryse solves this very effectively.
|
30
|
+
|
31
|
+
# This is left.yaml
|
32
|
+
left = YAML.load ... # syntactically wrong, but to avoid Yard's rewriting
|
33
|
+
hobbies:
|
34
|
+
- ruby
|
35
|
+
- rails
|
36
|
+
dependencies:
|
37
|
+
- {name: rspec, version: '2.6.4', for: [ runtime ]}
|
38
|
+
...
|
39
|
+
|
40
|
+
# This is right.yaml
|
41
|
+
right = YAML.load ...
|
42
|
+
hobbies:
|
43
|
+
- ruby
|
44
|
+
- music
|
45
|
+
dependencies:
|
46
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
47
|
+
- {name: rspec, version: '2.6.4', for: [ test ]}
|
48
|
+
...
|
49
|
+
|
50
|
+
# This is merge.yaml
|
51
|
+
merge = YAML.load ...
|
52
|
+
hobbies: # on hobbies, we simply make a set-based union
|
53
|
+
:union
|
54
|
+
dependencies: # on dependencies, we apply recursively
|
55
|
+
- [name, version] # - 'aggregate by name and version'
|
56
|
+
- for: :union # - compute the union of 'for' usage
|
57
|
+
...
|
58
|
+
|
59
|
+
# Merge and re-dump
|
60
|
+
[ left, right ].summaryse(merge).to_yaml
|
61
|
+
|
62
|
+
# This is the (pretty-printed) result
|
63
|
+
hobbies:
|
64
|
+
- ruby
|
65
|
+
- rails
|
66
|
+
- music
|
67
|
+
dependencies:
|
68
|
+
- {name: rspec, version: '2.6.4', for: [ runtime, test ]}
|
69
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
70
|
+
|
71
|
+
This is a very opinionated, yet already complex, case-study. Let me go back to
|
72
|
+
a more general explanation now.
|
73
|
+
|
74
|
+
## On simple values (integers, floats, ...)
|
75
|
+
|
76
|
+
Summarizing an array of simple values yields -> a simple value... Below are some
|
77
|
+
examples on integers. We are in ruby, so duck-typing applies everywhere.
|
78
|
+
|
79
|
+
### Arithmetics & Algebra
|
80
|
+
|
81
|
+
# :count, same as #size
|
82
|
+
[1, 4, 12, 7].summaryse(:count) # => 4
|
83
|
+
|
84
|
+
# :sum, same as #inject(:+)
|
85
|
+
[1, 4, 12, 7].summaryse(:sum) # => 24
|
86
|
+
|
87
|
+
# :avg, same as #inject(:+)/size
|
88
|
+
[1, 4, 12, 7].summaryse(:avg) # => 6.0
|
89
|
+
|
90
|
+
### Array theory
|
91
|
+
|
92
|
+
# :min, same as #min
|
93
|
+
[1, 4, 12, 7].summaryse(:min) # => 1
|
94
|
+
|
95
|
+
# :max, same as #max
|
96
|
+
[1, 4, 12, 7].summaryse(:max) # => 12
|
97
|
+
|
98
|
+
# :first, same as #first
|
99
|
+
[1, 4, 12, 7].summaryse(:first) # => 1
|
100
|
+
|
101
|
+
# :last, same as #last
|
102
|
+
[1, 4, 12, 7].summaryse(:last) # => 7
|
103
|
+
|
104
|
+
### Set theory
|
105
|
+
|
106
|
+
# :union, same as #inject(:|)
|
107
|
+
[ [1, 4], [12, 1, 7], [1] ].summaryse(:union) # => [1, 4, 12, 7]
|
108
|
+
|
109
|
+
# :intersection, same as #inject(:&)
|
110
|
+
[ [1, 4], [12, 1, 7], [1] ].summaryse(:intersection) # => [1]
|
111
|
+
|
112
|
+
## On Hash-es
|
113
|
+
|
114
|
+
Summarizing an Array of Hash-es yields -> a Hash.
|
115
|
+
|
116
|
+
Previous section provided the base cases. You can use them on elements of hashes
|
117
|
+
by passing a ... Hash of course:
|
118
|
+
|
119
|
+
[
|
120
|
+
{ :hobbies => [:ruby], :size => 12 },
|
121
|
+
{ :hobbies => [:music], :size => 17 }
|
122
|
+
].summaryse(:hobbies => :union, :size => :max)
|
123
|
+
# => {:hobbies => [:ruby, :music], :size => 17}
|
124
|
+
|
125
|
+
And it works recursively, of course:
|
126
|
+
|
127
|
+
[
|
128
|
+
{ :hobbies => {:day => [:ruby], :night => [:ruby] } },
|
129
|
+
{ :hobbies => {:day => [], :night => [:sleep]} }
|
130
|
+
].summaryse(:hobbies => {:day => :union, :night => :union})
|
131
|
+
# => {:hobbies => {:day => [:ruby], :night => [:ruby, :sleep]}}
|
132
|
+
|
133
|
+
### Specifying default behavior
|
134
|
+
|
135
|
+
By default, the returned hash only contains elements for which you have provided
|
136
|
+
a summarization heuristic. However, you can use a nil key to specify the default
|
137
|
+
behavior to use on others:
|
138
|
+
|
139
|
+
[
|
140
|
+
{ :hobbies => [:ruby], :size => 12 },
|
141
|
+
{ :hobbies => [:music], :size => 17 }
|
142
|
+
].summaryse(:hobbies => :union, nil => :first)
|
143
|
+
# => {:hobbies => [:ruby, :music], :size => 12}
|
144
|
+
|
145
|
+
### Specifying with lambdas
|
146
|
+
|
147
|
+
When no default summarization function fit your needs, just pass a lambda. It
|
148
|
+
will be called with the array of values on which aggregation must be done:
|
149
|
+
|
150
|
+
[
|
151
|
+
{ :hobbies => [:ruby], :size => 12 },
|
152
|
+
{ :hobbies => [:music], :size => 17 }
|
153
|
+
].summaryse(:hobbies => :union, :size => lambda{|a|
|
154
|
+
a.join(', ')
|
155
|
+
})
|
156
|
+
# => {:hobbies => [:ruby, :music], :size => "12, 17"}
|
157
|
+
|
158
|
+
## On Arrays of Hash-es
|
159
|
+
|
160
|
+
Summarizing an Array of Array-s of Hash-es yields -> an Array of Hash-es
|
161
|
+
|
162
|
+
There is a subtelty here, as you have to specify the "by key", that is, what
|
163
|
+
hash elements form the summarization grouping terms.
|
164
|
+
|
165
|
+
[
|
166
|
+
[ { :name => :yard, :for => [ :devel ] },
|
167
|
+
{ :name => :summaryse, :for => [ :runtime ] } ],
|
168
|
+
[ { :name => :summaryse, :for => [ :devel ] },
|
169
|
+
{ :name => :treetop, :for => [ :runtime ] } ]
|
170
|
+
].summaryse([ [:name], {:for => :union} ])
|
171
|
+
# => [ {:name => :yard, :for => [:devel] },
|
172
|
+
# {:name => :summaryse, :for => [:devel, :runtime] },
|
173
|
+
# {:name => :treetop, :for => [:runtime] } ]
|
174
|
+
|
175
|
+
A quick remark: when merging arrays of hashes, #summaryse guarantees that the
|
176
|
+
returned hashes are in order of encountered 'by key' values. That is, in the
|
177
|
+
example above, yard comes before summaryse that comes before treetop because
|
178
|
+
this is the order in which they have been seen initially.
|
179
|
+
|
180
|
+
# By the way, why this stupid name?
|
181
|
+
|
182
|
+
Just because summarize was already an {https://rubygems.org/gems/summarize existing gem}.
|
183
|
+
Summaryse is also much less likely to cause a name clash on the Array class. And
|
184
|
+
I'm a french-speaking developer :-)
|
185
|
+
|
186
|
+
And where does 'summarize' come from? The name is inspired by (yet not equivalent
|
187
|
+
to) {http://en.wikipedia.org/wiki/D_(data_language_specification)#Tutorial_D
|
188
|
+
TUTORIAL D}'s summarization operator on relations.
|
189
|
+
See my {https://github.com/blambeau/alf alf} project. Array#summaryse is
|
190
|
+
rubyiesque in mind and does not conform to a purely relational vision of
|
191
|
+
summarization, though.
|
192
|
+
|
193
|
+
# Contribute, Versioning and so on.
|
194
|
+
|
195
|
+
As usual: the code is on {http://github.com/blambeau/summaryse github}, I follow
|
196
|
+
{http://semver.org/ semantic versioning} (the public API is almost everything but
|
197
|
+
implementation details, that is, the method name, its recognized arguments and
|
198
|
+
the semantics of the returned value), etc.
|
199
|
+
|
200
|
+
Now, frankly, you can also copy/paste the source code of this simple array
|
201
|
+
extension in your own project. This tend to be much friendly and much simpler
|
202
|
+
than using a gem, IMHO. Reuse by copy-pasting even has a name:
|
203
|
+
{http://revision-zero.org/reuse code scavenging}.
|
204
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
begin
|
2
|
+
gem "bundler", "~> 1.0"
|
3
|
+
require "bundler/setup"
|
4
|
+
rescue LoadError => ex
|
5
|
+
puts ex.message
|
6
|
+
abort "Bundler failed to load, (did you run 'gem install bundler' ?)"
|
7
|
+
end
|
8
|
+
|
9
|
+
# Dynamically load the gem spec
|
10
|
+
$gemspec_file = File.expand_path('../summaryse.gemspec', __FILE__)
|
11
|
+
$gemspec = Kernel.eval(File.read($gemspec_file))
|
12
|
+
|
13
|
+
# We run tests by default
|
14
|
+
task :default => :test
|
15
|
+
|
16
|
+
#
|
17
|
+
# Install all tasks found in tasks folder
|
18
|
+
#
|
19
|
+
# See .rake files there for complete documentation.
|
20
|
+
#
|
21
|
+
Dir["tasks/*.rake"].each do |taskfile|
|
22
|
+
instance_eval File.read(taskfile), taskfile
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'summaryse'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
# This is left.yaml
|
6
|
+
left = YAML.load <<-Y
|
7
|
+
hobbies:
|
8
|
+
- ruby
|
9
|
+
- rails
|
10
|
+
dependencies:
|
11
|
+
- {name: rspec, version: '2.6.4', for: [ runtime ]}
|
12
|
+
Y
|
13
|
+
|
14
|
+
# This is right.yaml
|
15
|
+
right = YAML.load <<-Y
|
16
|
+
hobbies:
|
17
|
+
- ruby
|
18
|
+
- music
|
19
|
+
dependencies:
|
20
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
21
|
+
- {name: rspec, version: '2.6.4', for: [ test ]}
|
22
|
+
Y
|
23
|
+
|
24
|
+
# This is merge.yaml
|
25
|
+
merge = YAML.load <<-M
|
26
|
+
hobbies:
|
27
|
+
:union
|
28
|
+
dependencies:
|
29
|
+
- [name, version]
|
30
|
+
- for: :union
|
31
|
+
M
|
32
|
+
|
33
|
+
puts [ left, right ].summaryse(merge).to_yaml
|
data/lib/summaryse.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
class Array
|
2
|
+
|
3
|
+
#
|
4
|
+
# Apply a summarization to self
|
5
|
+
#
|
6
|
+
# == Base
|
7
|
+
#
|
8
|
+
# Basic summarization operators are
|
9
|
+
# * :avg, :count, :sum
|
10
|
+
# * :mix, :max, :first, :last
|
11
|
+
# * :union, :intersection
|
12
|
+
#
|
13
|
+
# They suppose the existence of the corresponding dyadic operators like
|
14
|
+
# '+' (i.e. sum, avg), '|' (union), '&' (intersection) on the array values.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# [1, 4, 12, 7].summaryse(:sum) # => 24
|
18
|
+
#
|
19
|
+
# == Hashes
|
20
|
+
#
|
21
|
+
# Summarizing arrays of hashes may be done by passing an hash of summarization
|
22
|
+
# expressions. Calls will be made recursively in this case:
|
23
|
+
#
|
24
|
+
# Example:
|
25
|
+
# [
|
26
|
+
# { :hobbies => [:ruby], :size => 12 },
|
27
|
+
# { :hobbies => [:music], :size => 17 }
|
28
|
+
# ].summaryse(:hobbies => :union, :size => :max)
|
29
|
+
# # => {:hobbies => [:ruby, :music], :size => 17}
|
30
|
+
#
|
31
|
+
# Use the nil key to specify the default aggregation to apply if not explcitely
|
32
|
+
# specified. A Proc object can also be passed as aggregation operator. In this
|
33
|
+
# case, it will be called with the array of values on which the aggregation
|
34
|
+
# must be done:
|
35
|
+
#
|
36
|
+
# # In the example below, hobbies are summarized through default behavior
|
37
|
+
# # provided by the nil key. Sizes are summarized by the lambda.
|
38
|
+
# [
|
39
|
+
# { :hobbies => [:ruby], :size => 12 },
|
40
|
+
# { :hobbies => [:music], :size => 17 }
|
41
|
+
# ].summaryse(nil => :union, :size => lambda{|a| a.join(',')})
|
42
|
+
# # => {:hobbies => [:ruby, :music], :size => "12,17"}
|
43
|
+
#
|
44
|
+
# == Arrays of Hashes
|
45
|
+
#
|
46
|
+
# Summarizing arrays of arrays of hashes may be done by passing an array of
|
47
|
+
# two values. The first one is a 'by key', the second is the summarization
|
48
|
+
# hash to apply.
|
49
|
+
#
|
50
|
+
# Example:
|
51
|
+
# [
|
52
|
+
# [ { :name => :yard, :for => [ :devel ] },
|
53
|
+
# { :name => :summaryse, :for => [ :runtime ] } ],
|
54
|
+
# [ { :name => :summaryse, :for => [ :devel ] },
|
55
|
+
# { :name => :treetop, :for => [ :runtime ] } ]
|
56
|
+
# ].summaryse([ [:name], {:for => :union} ])
|
57
|
+
# # => [ {:name => :yard, :for => [:devel] },
|
58
|
+
# # {:name => :summaryse, :for => [:devel, :runtime] },
|
59
|
+
# # {:name => :treetop, :for => [:runtime] } ]
|
60
|
+
#
|
61
|
+
def summaryse(agg)
|
62
|
+
case agg
|
63
|
+
when Proc
|
64
|
+
agg.call(self)
|
65
|
+
when :avg
|
66
|
+
inject(:+).to_f/size
|
67
|
+
when :count
|
68
|
+
size
|
69
|
+
when :intersection
|
70
|
+
inject(:&)
|
71
|
+
when :sum
|
72
|
+
inject(:+)
|
73
|
+
when :union
|
74
|
+
inject(:|)
|
75
|
+
when Symbol
|
76
|
+
self.send(agg)
|
77
|
+
when Hash
|
78
|
+
big = Hash.new{|h,k| h[k] = []}
|
79
|
+
each{|t| t.each_pair{|k,v| big[k] << v}}
|
80
|
+
Hash[big.collect{|k,v|
|
81
|
+
if summ = (agg[k] || agg[nil])
|
82
|
+
[k,v.summaryse(summ)]
|
83
|
+
end
|
84
|
+
}.compact]
|
85
|
+
when Array
|
86
|
+
by, agg = agg
|
87
|
+
keys = []
|
88
|
+
grouped = Hash.new{|h,k| h[k] = []}
|
89
|
+
flatten.each{|t|
|
90
|
+
key = Hash[by.collect{|k| [k, t[k]] }]
|
91
|
+
keys << key
|
92
|
+
grouped[key] << t
|
93
|
+
}
|
94
|
+
agg = agg.merge(Hash[by.collect{|k| [k, :first]}])
|
95
|
+
keys.uniq.collect{|key| grouped[key].summaryse(agg)}
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|