summaryse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +34 -0
- data/LICENCE.md +22 -0
- data/Manifest.txt +14 -0
- data/README.md +204 -0
- data/Rakefile +23 -0
- data/examples/yaml_merge.rb +33 -0
- data/lib/summaryse.rb +3 -0
- data/lib/summaryse/core_ext/array.rb +99 -0
- data/lib/summaryse/loader.rb +0 -0
- data/lib/summaryse/version.rb +14 -0
- data/spec/readme_spec.rb +112 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/summaryse_spec.rb +111 -0
- data/summaryse.gemspec +190 -0
- data/summaryse.noespec +28 -0
- data/tasks/debug_mail.rake +78 -0
- data/tasks/debug_mail.txt +13 -0
- data/tasks/gem.rake +68 -0
- data/tasks/spec_test.rake +79 -0
- data/tasks/unit_test.rake +77 -0
- data/tasks/yard.rake +51 -0
- metadata +149 -0
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
summaryse (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
bluecloth (2.0.11)
|
10
|
+
diff-lcs (1.1.2)
|
11
|
+
rake (0.9.2)
|
12
|
+
rspec (2.4.0)
|
13
|
+
rspec-core (~> 2.4.0)
|
14
|
+
rspec-expectations (~> 2.4.0)
|
15
|
+
rspec-mocks (~> 2.4.0)
|
16
|
+
rspec-core (2.4.0)
|
17
|
+
rspec-expectations (2.4.0)
|
18
|
+
diff-lcs (~> 1.1.2)
|
19
|
+
rspec-mocks (2.4.0)
|
20
|
+
wlang (0.10.2)
|
21
|
+
yard (0.7.2)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
java
|
25
|
+
ruby
|
26
|
+
|
27
|
+
DEPENDENCIES
|
28
|
+
bluecloth (~> 2.0.9)
|
29
|
+
bundler (~> 1.0)
|
30
|
+
rake (~> 0.9.2)
|
31
|
+
rspec (~> 2.4.0)
|
32
|
+
summaryse!
|
33
|
+
wlang (~> 0.10.1)
|
34
|
+
yard (~> 0.7.2)
|
data/LICENCE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# The MIT Licence
|
2
|
+
|
3
|
+
Copyright (c) 2011 - Bernard Lambeau
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
# Array#summaryse
|
2
|
+
|
3
|
+
[sudo] gem install summaryse
|
4
|
+
|
5
|
+
## Links
|
6
|
+
|
7
|
+
* {http://rubydoc.info/github/blambeau/summaryse/master/frames} (read this file there!)
|
8
|
+
* {http://github.com/blambeau/summaryse} (source code)
|
9
|
+
|
10
|
+
## Summaryse's summary
|
11
|
+
|
12
|
+
Summaryse provides a core extension, namely Array#summaryse. Oh, sorry, I must
|
13
|
+
add: "OMG, a core extension :-/ If you are aware of any compatibility issue,
|
14
|
+
let me know!".
|
15
|
+
|
16
|
+
So, what is Array#summaryse? Roughly, a way to computate aggregations. This goes
|
17
|
+
from simple aggregations on simple values (summing integers), to complex aggregations
|
18
|
+
on complex values (merging arrays of hashes that contain hashes and array of
|
19
|
+
hashes that...). Below is a typical use case showing how Array#summaryse can be
|
20
|
+
used to merge YAML files. Simpler examples are given a bit later.
|
21
|
+
|
22
|
+
## An opinionated use-case -- YAML merging
|
23
|
+
|
24
|
+
In many projects of mine including
|
25
|
+
{https://github.com/blambeau/noe noe},
|
26
|
+
{https://github.com/blambeau/agora agora} or
|
27
|
+
{https://github.com/blambeau/dbagile dbagile}, a common need is merge YAML files.
|
28
|
+
Merging YAML files is complex because you need full control of how merging applies
|
29
|
+
on specific tree nodes. Summaryse solves this very effectively.
|
30
|
+
|
31
|
+
# This is left.yaml
|
32
|
+
left = YAML.load ... # syntactically wrong, but to avoid Yard's rewriting
|
33
|
+
hobbies:
|
34
|
+
- ruby
|
35
|
+
- rails
|
36
|
+
dependencies:
|
37
|
+
- {name: rspec, version: '2.6.4', for: [ runtime ]}
|
38
|
+
...
|
39
|
+
|
40
|
+
# This is right.yaml
|
41
|
+
right = YAML.load ...
|
42
|
+
hobbies:
|
43
|
+
- ruby
|
44
|
+
- music
|
45
|
+
dependencies:
|
46
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
47
|
+
- {name: rspec, version: '2.6.4', for: [ test ]}
|
48
|
+
...
|
49
|
+
|
50
|
+
# This is merge.yaml
|
51
|
+
merge = YAML.load ...
|
52
|
+
hobbies: # on hobbies, we simply make a set-based union
|
53
|
+
:union
|
54
|
+
dependencies: # on dependencies, we apply recursively
|
55
|
+
- [name, version] # - 'aggregate by name and version'
|
56
|
+
- for: :union # - compute the union of 'for' usage
|
57
|
+
...
|
58
|
+
|
59
|
+
# Merge and re-dump
|
60
|
+
[ left, right ].summaryse(merge).to_yaml
|
61
|
+
|
62
|
+
# This is the (pretty-printed) result
|
63
|
+
hobbies:
|
64
|
+
- ruby
|
65
|
+
- rails
|
66
|
+
- music
|
67
|
+
dependencies:
|
68
|
+
- {name: rspec, version: '2.6.4', for: [ runtime, test ]}
|
69
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
70
|
+
|
71
|
+
This is a very opinionated, yet already complex, case-study. Let me go back to
|
72
|
+
a more general explanation now.
|
73
|
+
|
74
|
+
## On simple values (integers, floats, ...)
|
75
|
+
|
76
|
+
Summarizing an array of simple values yields -> a simple value... Below are some
|
77
|
+
examples on integers. We are in ruby, so duck-typing applies everywhere.
|
78
|
+
|
79
|
+
### Arithmetics & Algebra
|
80
|
+
|
81
|
+
# :count, same as #size
|
82
|
+
[1, 4, 12, 7].summaryse(:count) # => 4
|
83
|
+
|
84
|
+
# :sum, same as #inject(:+)
|
85
|
+
[1, 4, 12, 7].summaryse(:sum) # => 24
|
86
|
+
|
87
|
+
# :avg, same as #inject(:+)/size
|
88
|
+
[1, 4, 12, 7].summaryse(:avg) # => 6.0
|
89
|
+
|
90
|
+
### Array theory
|
91
|
+
|
92
|
+
# :min, same as #min
|
93
|
+
[1, 4, 12, 7].summaryse(:min) # => 1
|
94
|
+
|
95
|
+
# :max, same as #max
|
96
|
+
[1, 4, 12, 7].summaryse(:max) # => 12
|
97
|
+
|
98
|
+
# :first, same as #first
|
99
|
+
[1, 4, 12, 7].summaryse(:first) # => 1
|
100
|
+
|
101
|
+
# :last, same as #last
|
102
|
+
[1, 4, 12, 7].summaryse(:last) # => 7
|
103
|
+
|
104
|
+
### Set theory
|
105
|
+
|
106
|
+
# :union, same as #inject(:|)
|
107
|
+
[ [1, 4], [12, 1, 7], [1] ].summaryse(:union) # => [1, 4, 12, 7]
|
108
|
+
|
109
|
+
# :intersection, same as #inject(:&)
|
110
|
+
[ [1, 4], [12, 1, 7], [1] ].summaryse(:intersection) # => [1]
|
111
|
+
|
112
|
+
## On Hash-es
|
113
|
+
|
114
|
+
Summarizing an Array of Hash-es yields -> a Hash.
|
115
|
+
|
116
|
+
Previous section provided the base cases. You can use them on elements of hashes
|
117
|
+
by passing a ... Hash of course:
|
118
|
+
|
119
|
+
[
|
120
|
+
{ :hobbies => [:ruby], :size => 12 },
|
121
|
+
{ :hobbies => [:music], :size => 17 }
|
122
|
+
].summaryse(:hobbies => :union, :size => :max)
|
123
|
+
# => {:hobbies => [:ruby, :music], :size => 17}
|
124
|
+
|
125
|
+
And it works recursively, of course:
|
126
|
+
|
127
|
+
[
|
128
|
+
{ :hobbies => {:day => [:ruby], :night => [:ruby] } },
|
129
|
+
{ :hobbies => {:day => [], :night => [:sleep]} }
|
130
|
+
].summaryse(:hobbies => {:day => :union, :night => :union})
|
131
|
+
# => {:hobbies => {:day => [:ruby], :night => [:ruby, :sleep]}}
|
132
|
+
|
133
|
+
### Specifying default behavior
|
134
|
+
|
135
|
+
By default, the returned hash only contains elements for which you have provided
|
136
|
+
a summarization heuristic. However, you can use a nil key to specify the default
|
137
|
+
behavior to use on others:
|
138
|
+
|
139
|
+
[
|
140
|
+
{ :hobbies => [:ruby], :size => 12 },
|
141
|
+
{ :hobbies => [:music], :size => 17 }
|
142
|
+
].summaryse(:hobbies => :union, nil => :first)
|
143
|
+
# => {:hobbies => [:ruby, :music], :size => 12}
|
144
|
+
|
145
|
+
### Specifying with lambdas
|
146
|
+
|
147
|
+
When no default summarization function fit your needs, just pass a lambda. It
|
148
|
+
will be called with the array of values on which aggregation must be done:
|
149
|
+
|
150
|
+
[
|
151
|
+
{ :hobbies => [:ruby], :size => 12 },
|
152
|
+
{ :hobbies => [:music], :size => 17 }
|
153
|
+
].summaryse(:hobbies => :union, :size => lambda{|a|
|
154
|
+
a.join(', ')
|
155
|
+
})
|
156
|
+
# => {:hobbies => [:ruby, :music], :size => "12, 17"}
|
157
|
+
|
158
|
+
## On Arrays of Hash-es
|
159
|
+
|
160
|
+
Summarizing an Array of Array-s of Hash-es yields -> an Array of Hash-es
|
161
|
+
|
162
|
+
There is a subtelty here, as you have to specify the "by key", that is, what
|
163
|
+
hash elements form the summarization grouping terms.
|
164
|
+
|
165
|
+
[
|
166
|
+
[ { :name => :yard, :for => [ :devel ] },
|
167
|
+
{ :name => :summaryse, :for => [ :runtime ] } ],
|
168
|
+
[ { :name => :summaryse, :for => [ :devel ] },
|
169
|
+
{ :name => :treetop, :for => [ :runtime ] } ]
|
170
|
+
].summaryse([ [:name], {:for => :union} ])
|
171
|
+
# => [ {:name => :yard, :for => [:devel] },
|
172
|
+
# {:name => :summaryse, :for => [:devel, :runtime] },
|
173
|
+
# {:name => :treetop, :for => [:runtime] } ]
|
174
|
+
|
175
|
+
A quick remark: when merging arrays of hashes, #summaryse guarantees that the
|
176
|
+
returned hashes are in order of encountered 'by key' values. That is, in the
|
177
|
+
example above, yard comes before summaryse that comes before treetop because
|
178
|
+
this is the order in which they have been seen initially.
|
179
|
+
|
180
|
+
# By the way, why this stupid name?
|
181
|
+
|
182
|
+
Just because summarize was already an {https://rubygems.org/gems/summarize existing gem}.
|
183
|
+
Summaryse is also much less likely to cause a name clash on the Array class. And
|
184
|
+
I'm a french-speaking developer :-)
|
185
|
+
|
186
|
+
And where does 'summarize' come from? The name is inspired by (yet not equivalent
|
187
|
+
to) {http://en.wikipedia.org/wiki/D_(data_language_specification)#Tutorial_D
|
188
|
+
TUTORIAL D}'s summarization operator on relations.
|
189
|
+
See my {https://github.com/blambeau/alf alf} project. Array#summaryse is
|
190
|
+
rubyiesque in mind and does not conform to a purely relational vision of
|
191
|
+
summarization, though.
|
192
|
+
|
193
|
+
# Contribute, Versioning and so on.
|
194
|
+
|
195
|
+
As usual: the code is on {http://github.com/blambeau/summaryse github}, I follow
|
196
|
+
{http://semver.org/ semantic versioning} (the public API is almost everything but
|
197
|
+
implementation details, that is, the method name, its recognized arguments and
|
198
|
+
the semantics of the returned value), etc.
|
199
|
+
|
200
|
+
Now, frankly, you can also copy/paste the source code of this simple array
|
201
|
+
extension in your own project. This tend to be much friendly and much simpler
|
202
|
+
than using a gem, IMHO. Reuse by copy-pasting even has a name:
|
203
|
+
{http://revision-zero.org/reuse code scavenging}.
|
204
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
begin
|
2
|
+
gem "bundler", "~> 1.0"
|
3
|
+
require "bundler/setup"
|
4
|
+
rescue LoadError => ex
|
5
|
+
puts ex.message
|
6
|
+
abort "Bundler failed to load, (did you run 'gem install bundler' ?)"
|
7
|
+
end
|
8
|
+
|
9
|
+
# Dynamically load the gem spec
|
10
|
+
$gemspec_file = File.expand_path('../summaryse.gemspec', __FILE__)
|
11
|
+
$gemspec = Kernel.eval(File.read($gemspec_file))
|
12
|
+
|
13
|
+
# We run tests by default
|
14
|
+
task :default => :test
|
15
|
+
|
16
|
+
#
|
17
|
+
# Install all tasks found in tasks folder
|
18
|
+
#
|
19
|
+
# See .rake files there for complete documentation.
|
20
|
+
#
|
21
|
+
Dir["tasks/*.rake"].each do |taskfile|
|
22
|
+
instance_eval File.read(taskfile), taskfile
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'summaryse'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
# This is left.yaml
|
6
|
+
left = YAML.load <<-Y
|
7
|
+
hobbies:
|
8
|
+
- ruby
|
9
|
+
- rails
|
10
|
+
dependencies:
|
11
|
+
- {name: rspec, version: '2.6.4', for: [ runtime ]}
|
12
|
+
Y
|
13
|
+
|
14
|
+
# This is right.yaml
|
15
|
+
right = YAML.load <<-Y
|
16
|
+
hobbies:
|
17
|
+
- ruby
|
18
|
+
- music
|
19
|
+
dependencies:
|
20
|
+
- {name: rails, version: '3.0', for: [ runtime ]}
|
21
|
+
- {name: rspec, version: '2.6.4', for: [ test ]}
|
22
|
+
Y
|
23
|
+
|
24
|
+
# This is merge.yaml
|
25
|
+
merge = YAML.load <<-M
|
26
|
+
hobbies:
|
27
|
+
:union
|
28
|
+
dependencies:
|
29
|
+
- [name, version]
|
30
|
+
- for: :union
|
31
|
+
M
|
32
|
+
|
33
|
+
puts [ left, right ].summaryse(merge).to_yaml
|
data/lib/summaryse.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
class Array
|
2
|
+
|
3
|
+
#
|
4
|
+
# Apply a summarization to self
|
5
|
+
#
|
6
|
+
# == Base
|
7
|
+
#
|
8
|
+
# Basic summarization operators are
|
9
|
+
# * :avg, :count, :sum
|
10
|
+
# * :mix, :max, :first, :last
|
11
|
+
# * :union, :intersection
|
12
|
+
#
|
13
|
+
# They suppose the existence of the corresponding dyadic operators like
|
14
|
+
# '+' (i.e. sum, avg), '|' (union), '&' (intersection) on the array values.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# [1, 4, 12, 7].summaryse(:sum) # => 24
|
18
|
+
#
|
19
|
+
# == Hashes
|
20
|
+
#
|
21
|
+
# Summarizing arrays of hashes may be done by passing an hash of summarization
|
22
|
+
# expressions. Calls will be made recursively in this case:
|
23
|
+
#
|
24
|
+
# Example:
|
25
|
+
# [
|
26
|
+
# { :hobbies => [:ruby], :size => 12 },
|
27
|
+
# { :hobbies => [:music], :size => 17 }
|
28
|
+
# ].summaryse(:hobbies => :union, :size => :max)
|
29
|
+
# # => {:hobbies => [:ruby, :music], :size => 17}
|
30
|
+
#
|
31
|
+
# Use the nil key to specify the default aggregation to apply if not explcitely
|
32
|
+
# specified. A Proc object can also be passed as aggregation operator. In this
|
33
|
+
# case, it will be called with the array of values on which the aggregation
|
34
|
+
# must be done:
|
35
|
+
#
|
36
|
+
# # In the example below, hobbies are summarized through default behavior
|
37
|
+
# # provided by the nil key. Sizes are summarized by the lambda.
|
38
|
+
# [
|
39
|
+
# { :hobbies => [:ruby], :size => 12 },
|
40
|
+
# { :hobbies => [:music], :size => 17 }
|
41
|
+
# ].summaryse(nil => :union, :size => lambda{|a| a.join(',')})
|
42
|
+
# # => {:hobbies => [:ruby, :music], :size => "12,17"}
|
43
|
+
#
|
44
|
+
# == Arrays of Hashes
|
45
|
+
#
|
46
|
+
# Summarizing arrays of arrays of hashes may be done by passing an array of
|
47
|
+
# two values. The first one is a 'by key', the second is the summarization
|
48
|
+
# hash to apply.
|
49
|
+
#
|
50
|
+
# Example:
|
51
|
+
# [
|
52
|
+
# [ { :name => :yard, :for => [ :devel ] },
|
53
|
+
# { :name => :summaryse, :for => [ :runtime ] } ],
|
54
|
+
# [ { :name => :summaryse, :for => [ :devel ] },
|
55
|
+
# { :name => :treetop, :for => [ :runtime ] } ]
|
56
|
+
# ].summaryse([ [:name], {:for => :union} ])
|
57
|
+
# # => [ {:name => :yard, :for => [:devel] },
|
58
|
+
# # {:name => :summaryse, :for => [:devel, :runtime] },
|
59
|
+
# # {:name => :treetop, :for => [:runtime] } ]
|
60
|
+
#
|
61
|
+
def summaryse(agg)
|
62
|
+
case agg
|
63
|
+
when Proc
|
64
|
+
agg.call(self)
|
65
|
+
when :avg
|
66
|
+
inject(:+).to_f/size
|
67
|
+
when :count
|
68
|
+
size
|
69
|
+
when :intersection
|
70
|
+
inject(:&)
|
71
|
+
when :sum
|
72
|
+
inject(:+)
|
73
|
+
when :union
|
74
|
+
inject(:|)
|
75
|
+
when Symbol
|
76
|
+
self.send(agg)
|
77
|
+
when Hash
|
78
|
+
big = Hash.new{|h,k| h[k] = []}
|
79
|
+
each{|t| t.each_pair{|k,v| big[k] << v}}
|
80
|
+
Hash[big.collect{|k,v|
|
81
|
+
if summ = (agg[k] || agg[nil])
|
82
|
+
[k,v.summaryse(summ)]
|
83
|
+
end
|
84
|
+
}.compact]
|
85
|
+
when Array
|
86
|
+
by, agg = agg
|
87
|
+
keys = []
|
88
|
+
grouped = Hash.new{|h,k| h[k] = []}
|
89
|
+
flatten.each{|t|
|
90
|
+
key = Hash[by.collect{|k| [k, t[k]] }]
|
91
|
+
keys << key
|
92
|
+
grouped[key] << t
|
93
|
+
}
|
94
|
+
agg = agg.merge(Hash[by.collect{|k| [k, :first]}])
|
95
|
+
keys.uniq.collect{|key| grouped[key].summaryse(agg)}
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|