rubadana 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +58 -21
- data/lib/rubadana.rb +45 -49
- data/lib/rubadana/version.rb +1 -1
- data/rubadana.gemspec +1 -1
- data/spec/analyse_invoices_spec.rb +47 -62
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1d90348a8965bea4b82b2ce6fedb26bd7a1f148
|
4
|
+
data.tar.gz: 4dc9622fc7400d63e8bade76e05825f460d835f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e01e9dceaed7d60c22432b6fd3834b69493ebb683d0c62c981050419c1d4e0e236699a3f6a24bcc736c33608b8870e90f618cb4f216c5d9852da33906e80def4
|
7
|
+
data.tar.gz: 77bc8d0c7241055b3d7bccf603a4b0f82490b5eaea2115c8fccf3f041f8498cf07e2125644d31c512d6b52c9182b6b7a5aa33ac46ed4b8a7ebb985651c7f9f3e
|
data/README.md
CHANGED
@@ -2,6 +2,9 @@
|
|
2
2
|
|
3
3
|
Rubadana is an elementary ruby data-analysis package. It works with plain old ruby objects, not sql or databases or anything fancy like that.
|
4
4
|
|
5
|
+
The aim is to create a summary overview from a list of objects by basically running a group-by/map/reduce operation on your list. The input, grouping,
|
6
|
+
mapping, reducing, and display of the result are all independently variable.
|
7
|
+
|
5
8
|
## Installation
|
6
9
|
|
7
10
|
Add this line to your application's Gemfile:
|
@@ -20,33 +23,72 @@ Or install it yourself as:
|
|
20
23
|
|
21
24
|
## Usage
|
22
25
|
|
23
|
-
|
26
|
+
Here's a trivial example, returning the number of requests per user:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
program = Rubadana::Registry.build groupings: [:user], mappers: [:self], reducers: [:count]
|
30
|
+
analysis = program.run Request.all
|
31
|
+
```
|
32
|
+
|
33
|
+
In this example, `:user`, `:self`, and `:count` are plugins you will have provided to rubadana for extracting and manipulating your data.
|
34
|
+
|
35
|
+
`analyser.run` returns a list of `Rubadana::Analysis` instances, with, for this example, the following attributes:
|
24
36
|
|
25
|
-
|
37
|
+
|`key` | a `Hash` instance with keys `:user` |
|
38
|
+
|`list` | the subset of `Request.all` with the corresponding value for `:user` |
|
39
|
+
|`mapped` | in this case, the same as `list` (assuming the `:identity` mapper returns the thing itself) |
|
40
|
+
|`reduced`| a list of one integers, equal to the size of the list |
|
41
|
+
|
42
|
+
|
43
|
+
In ordinary ruby, you would write `Request.all.group_by(:user).map {|user, requests| [user, requests.count] }` to get the same information.
|
44
|
+
|
45
|
+
Here's a richer example which returns the sum of debits, credits, and account balances from a set of accounting transactions:
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
program = Rubadana::Analyser.new group: [:month, :account_number], map: [:debits, :credits, :balance], reduce: [:sum, :sum, :sum]
|
49
|
+
analysis = program.run AccountingTransaction.all
|
50
|
+
```
|
51
|
+
|
52
|
+
In this example, `:month`, `:account_number`, `:debits` and so on, are plugins you will have provided to rubadana for extracting and manipulating your data.
|
53
|
+
|
54
|
+
`analyser.run` returns a list of `Rubadana::Analysis` instances, with the following attributes:
|
55
|
+
|
56
|
+
|`key` | a `Hash` instance with keys `:month` and `:account_number` |
|
57
|
+
|`list` | the subset of `AccountingTransaction.all` having the corresponding values for `:month` and `:account_number` |
|
58
|
+
|`mapped` | the output of the `map` operations on `list`. This is a list of n-tuples, where `n` is the number of operations specified by the `map` parameter |
|
59
|
+
|`reduced`| the output of the `reduce` operations on `mapped`. This is a list of n values, one for each operation specified by the `reduce` parameter. |
|
60
|
+
|
61
|
+
In this example, `reduced` gives us the sum of all debits, the sum of all credits, and the sum of all balances, per account-number
|
62
|
+
|
63
|
+
See spec for some examples.
|
64
|
+
|
65
|
+
## Steps
|
66
|
+
|
67
|
+
1. Create a Registry for your mappers and your reducers
|
26
68
|
|
27
69
|
my_registry = Rubadana::Registry.new
|
28
70
|
|
29
|
-
2. Create and register some
|
71
|
+
2. Create and register some mappers
|
30
72
|
|
31
73
|
```ruby
|
32
|
-
class SaleYear
|
33
|
-
def name
|
34
|
-
def
|
35
|
-
def
|
74
|
+
class SaleYear
|
75
|
+
def name ; :yearly ; end
|
76
|
+
def run thing ; thing.date.year ; end
|
77
|
+
def label value ; value ; end
|
36
78
|
end
|
37
79
|
|
38
|
-
my_registry.
|
80
|
+
my_registry.register_mapper SaleYear.new
|
39
81
|
```
|
40
82
|
|
41
|
-
3. Create and register some
|
83
|
+
3. Create and register some reducers:
|
42
84
|
|
43
85
|
```ruby
|
44
|
-
class
|
45
|
-
def name
|
46
|
-
def
|
86
|
+
class Sum
|
87
|
+
def name ; :sum ; end
|
88
|
+
def reduce things ; things.reduce :+ ; end
|
47
89
|
end
|
48
90
|
|
49
|
-
my_registry.
|
91
|
+
my_registry.register_reducer Sum.new
|
50
92
|
```
|
51
93
|
|
52
94
|
4. Build an analysis program and run it:
|
@@ -54,21 +96,16 @@ my_registry = Rubadana::Registry.new
|
|
54
96
|
```ruby
|
55
97
|
# this is a program to analyse invoices by year and product, giving the
|
56
98
|
# number of sales, the sum of sales and the average sale in each case
|
57
|
-
my_program = register.build
|
99
|
+
my_program = register.build group: %i{ yearly }, map: %i{ self sale_amount sale_amount }, reduce: %i{ count sum average }
|
58
100
|
|
59
101
|
data = my_program.run(invoices)
|
60
102
|
```
|
61
103
|
|
62
|
-
`#run` returns an array of `
|
63
|
-
|
64
|
-
* `analyser` - a `Dimension` instance
|
65
|
-
* `group_value` - the common value of this dimension for all objects in this data-set
|
66
|
-
* `data` - either an accumulated value given by an accumulator, or a nested array of `DataSet` instances
|
67
|
-
|
104
|
+
`#run` returns an array of `Rubadana::Analysis` as described above.
|
68
105
|
|
69
106
|
## Contributing
|
70
107
|
|
71
|
-
1. Fork it ( https://github.com/
|
108
|
+
1. Fork it ( https://github.com/conanite/rubadana/fork )
|
72
109
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
73
110
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
74
111
|
4. Push to the branch (`git push origin my-new-feature`)
|
data/lib/rubadana.rb
CHANGED
@@ -3,75 +3,71 @@ require "rubadana/version"
|
|
3
3
|
module Rubadana
|
4
4
|
class Registry
|
5
5
|
def initialize
|
6
|
-
@
|
7
|
-
@
|
6
|
+
@mappers = Hash.new
|
7
|
+
@reducers = Hash.new
|
8
8
|
end
|
9
9
|
|
10
|
-
def
|
11
|
-
def
|
12
|
-
def
|
13
|
-
def
|
14
|
-
def
|
15
|
-
def
|
16
|
-
def
|
17
|
-
|
18
|
-
def build dnames, anames
|
19
|
-
dd = dnames.compact.map { |n| dimension n }
|
20
|
-
aa = anames.compact.map { |n| accumulator n }
|
21
|
-
Program.new(dd + aa)
|
22
|
-
end
|
10
|
+
def register_mapper m ; @mappers[m.name.to_sym] = m ; end
|
11
|
+
def register_reducer r ; @reducers[r.name.to_sym] = r ; end
|
12
|
+
def mapper name ; @mappers[name.to_sym] || raise("unknown mapper #{name.inspect}") ; end
|
13
|
+
def reducer name ; @reducers[name.to_sym] || raise("unknown reducer #{name.inspect}") ; end
|
14
|
+
def mappers names ; names.map { |n| mapper n } ; end
|
15
|
+
def reducers names ; names.map { |n| reducer n } ; end
|
16
|
+
def build params ; Programmer.new(params).build(self) ; end
|
23
17
|
end
|
24
18
|
|
25
|
-
class
|
26
|
-
|
27
|
-
def
|
19
|
+
class Self
|
20
|
+
def name ; :self ; end
|
21
|
+
def run thing ; thing ; end
|
28
22
|
end
|
29
23
|
|
30
|
-
class
|
31
|
-
def name
|
32
|
-
def
|
33
|
-
def run things, after ; [DataSet.new(analyser: self, data: accumulate(things))] + after.run(things) ; end
|
24
|
+
class Sum
|
25
|
+
def name ; :sum ; end
|
26
|
+
def reduce things ; things.reduce(:+) ; end
|
34
27
|
end
|
35
28
|
|
36
|
-
|
37
|
-
|
38
|
-
def
|
39
|
-
def accumulate things ; things.map { |thing| value_for(thing) }.reduce :+ ; end
|
29
|
+
class Count
|
30
|
+
def name ; :count ; end
|
31
|
+
def reduce things ; things.count ; end
|
40
32
|
end
|
41
33
|
|
42
|
-
class
|
43
|
-
def name
|
44
|
-
def
|
34
|
+
class CountUnique
|
35
|
+
def name ; :count_unique ; end
|
36
|
+
def reduce things ; things.uniq.count ; end
|
45
37
|
end
|
46
38
|
|
47
|
-
class Average
|
48
|
-
def
|
39
|
+
class Average
|
40
|
+
def name ; :average ; end
|
41
|
+
def reduce things ; things.reduce(:+) / (1.0 * things.count) ; end
|
49
42
|
end
|
50
43
|
|
51
|
-
class
|
52
|
-
|
53
|
-
def
|
54
|
-
def
|
44
|
+
class Analysis < Aduki::Initializable
|
45
|
+
attr_accessor :program, :key, :list, :mapped, :reduced
|
46
|
+
def key_labels ; key_str = program.group.zip(key).map { |g,k| g.label k } ; end
|
47
|
+
def to_s ; "#{key_labels.join ", "} : #{reduced.join ", "}" ; end
|
48
|
+
end
|
55
49
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
class Programmer < Aduki::Initializable
|
51
|
+
attr_accessor :group, :map, :reduce
|
52
|
+
def build reg
|
53
|
+
Program.new group: reg.mappers(group), map: reg.mappers(map), reduce: reg.reducers(reduce)
|
60
54
|
end
|
61
55
|
end
|
62
56
|
|
63
|
-
class Program
|
64
|
-
attr_accessor :
|
57
|
+
class Program < Aduki::Initializable
|
58
|
+
attr_accessor :group, :map, :reduce, :groups
|
65
59
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
end
|
60
|
+
def run things
|
61
|
+
self.groups = Hash.new { |h, k| h[k] = [] }
|
62
|
+
things.each { |thing|
|
63
|
+
groups[group.map { |g| g.run thing }] << thing
|
64
|
+
}
|
72
65
|
|
73
|
-
|
74
|
-
|
66
|
+
groups.map { |key, things|
|
67
|
+
mapped = map.map { |m| things.map { |thing| m.run thing } }
|
68
|
+
reduced = reduce.zip(mapped).map { |r, m| r.reduce m }
|
69
|
+
Analysis.new(program: self, key: key, list: things, mapped: mapped, reduced: reduced )
|
70
|
+
}
|
75
71
|
end
|
76
72
|
end
|
77
73
|
end
|
data/lib/rubadana/version.rb
CHANGED
data/rubadana.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'rubadana/version'
|
@@ -22,4 +21,5 @@ Gem::Specification.new do |spec|
|
|
22
21
|
spec.add_development_dependency "bundler", "~> 1.7"
|
23
22
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
23
|
spec.add_development_dependency 'rspec'
|
24
|
+
spec.add_development_dependency 'rspec_numbering_formatter'
|
25
25
|
end
|
@@ -7,38 +7,34 @@ describe "analyse invoices" do
|
|
7
7
|
attr_accessor :type, :date, :amount
|
8
8
|
end
|
9
9
|
|
10
|
-
class InvoiceMonth
|
11
|
-
def name
|
12
|
-
def
|
13
|
-
def
|
10
|
+
class InvoiceMonth
|
11
|
+
def name ; "monthly" ; end
|
12
|
+
def run thing ; Date.new(thing.date.year, thing.date.month, 1) ; end # rails just use #beginning_of_month
|
13
|
+
def label value ; value.strftime "%B %Y" ; end # better with I18n
|
14
14
|
end
|
15
15
|
|
16
|
-
class InvoiceYear
|
17
|
-
def name
|
18
|
-
def
|
19
|
-
def
|
16
|
+
class InvoiceYear
|
17
|
+
def name ; "yearly" ; end
|
18
|
+
def run thing ; thing.date.year ; end
|
19
|
+
def label value ; value ; end
|
20
20
|
end
|
21
21
|
|
22
|
-
class InvoiceType
|
23
|
-
def name
|
24
|
-
def
|
25
|
-
def
|
22
|
+
class InvoiceType
|
23
|
+
def name ; "type" ; end
|
24
|
+
def run thing ; thing.type ; end
|
25
|
+
def label value ; value.to_s ; end
|
26
26
|
end
|
27
27
|
|
28
|
-
class InvoiceScale
|
29
|
-
def name
|
30
|
-
def
|
31
|
-
def
|
28
|
+
class InvoiceScale
|
29
|
+
def name ; "scale" ; end
|
30
|
+
def run thing ; Math.log(thing.amount, 10).to_i ; end
|
31
|
+
def label value ; value ; end
|
32
32
|
end
|
33
33
|
|
34
|
-
class
|
35
|
-
def name
|
36
|
-
def
|
37
|
-
|
38
|
-
|
39
|
-
class InvoiceAvg < Rubadana::Average
|
40
|
-
def name ; "avg-amount" ; end
|
41
|
-
def value_for thing ; thing.amount ; end
|
34
|
+
class InvoiceAmount
|
35
|
+
def name ; :invoice_amount ; end
|
36
|
+
def run thing ; thing.amount ; end
|
37
|
+
def label value ; value.to_s ; end
|
42
38
|
end
|
43
39
|
|
44
40
|
let(:i00) { Invoice.new type: "SalesInvoice" , date: date("2020-02-01"), amount: 53 }
|
@@ -62,19 +58,21 @@ describe "analyse invoices" do
|
|
62
58
|
let(:register) { Rubadana::Registry.new }
|
63
59
|
|
64
60
|
before {
|
65
|
-
register.
|
66
|
-
register.
|
67
|
-
register.
|
68
|
-
register.
|
69
|
-
register.
|
70
|
-
register.
|
71
|
-
register.
|
61
|
+
register.register_mapper InvoiceYear.new
|
62
|
+
register.register_mapper InvoiceMonth.new
|
63
|
+
register.register_mapper InvoiceType.new
|
64
|
+
register.register_mapper InvoiceScale.new
|
65
|
+
register.register_mapper InvoiceAmount.new
|
66
|
+
register.register_mapper Rubadana::Self.new
|
67
|
+
register.register_reducer Rubadana::Sum.new
|
68
|
+
register.register_reducer Rubadana::Average.new
|
69
|
+
register.register_reducer Rubadana::Count.new
|
72
70
|
}
|
73
71
|
|
74
72
|
it "groups items by month and counts them" do
|
75
|
-
program = register.build
|
73
|
+
program = register.build group: %i{ monthly }, map: %i{ self }, reduce: %i{ count }
|
76
74
|
data = program.run(invoices)
|
77
|
-
actual = data.sort_by(&:
|
75
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
78
76
|
expected = [
|
79
77
|
["February 2020", 3],
|
80
78
|
["May 2020" , 3],
|
@@ -88,9 +86,10 @@ describe "analyse invoices" do
|
|
88
86
|
end
|
89
87
|
|
90
88
|
it "groups items by year and sums them" do
|
91
|
-
program = register.build
|
89
|
+
program = register.build group: %i{ yearly }, map: %i{ invoice_amount }, reduce: %i{ sum }
|
92
90
|
data = program.run(invoices)
|
93
|
-
|
91
|
+
# data.sort_by(&:key).each { |d| puts d }
|
92
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
94
93
|
expected = [
|
95
94
|
[2020, 175166],
|
96
95
|
[2021, 4369],
|
@@ -100,9 +99,9 @@ describe "analyse invoices" do
|
|
100
99
|
end
|
101
100
|
|
102
101
|
it "groups items by year and counts them" do
|
103
|
-
program = register.build
|
102
|
+
program = register.build group: %i{ yearly }, map: %i{ self }, reduce: %i{ count }
|
104
103
|
data = program.run(invoices)
|
105
|
-
actual = data.sort_by(&:
|
104
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
106
105
|
expected = [
|
107
106
|
[2020, 8],
|
108
107
|
[2021, 4],
|
@@ -112,9 +111,9 @@ describe "analyse invoices" do
|
|
112
111
|
end
|
113
112
|
|
114
113
|
it "groups items by year and averages them" do
|
115
|
-
program = register.build
|
114
|
+
program = register.build group: %i{ yearly }, map: %i{ invoice_amount }, reduce: %i{ average }
|
116
115
|
data = program.run(invoices)
|
117
|
-
actual = data.sort_by(&:
|
116
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
118
117
|
expected = [
|
119
118
|
[2020, 21895.75],
|
120
119
|
[2021, 1092.25],
|
@@ -124,9 +123,9 @@ describe "analyse invoices" do
|
|
124
123
|
end
|
125
124
|
|
126
125
|
it "groups items by year and gives the count, sum, and average" do
|
127
|
-
program = register.build
|
126
|
+
program = register.build group: %i{ yearly }, map: %i{ invoice_amount invoice_amount invoice_amount }, reduce: %i{ count sum average }
|
128
127
|
data = program.run(invoices)
|
129
|
-
actual = data.sort_by(&:
|
128
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
130
129
|
expected = [
|
131
130
|
[2020, 8, 175166, 21895.75],
|
132
131
|
[2021, 4, 4369, 1092.25],
|
@@ -135,14 +134,10 @@ describe "analyse invoices" do
|
|
135
134
|
expect(actual).to eq expected
|
136
135
|
end
|
137
136
|
|
138
|
-
it "groups items by year and by type and
|
139
|
-
program = register.build
|
137
|
+
it "groups items by year and by type and counts them" do
|
138
|
+
program = register.build group: %i{ yearly type }, map: %i{ self }, reduce: %i{ count }
|
140
139
|
data = program.run(invoices)
|
141
|
-
actual = data.sort_by(&:
|
142
|
-
d.data.sort_by(&:group_value).each { |s|
|
143
|
-
arr << [d.value_label, s.value_label] + s.data.map(&:data) }
|
144
|
-
arr
|
145
|
-
}
|
140
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
146
141
|
|
147
142
|
expected = [
|
148
143
|
[2020 , "PurchaseCreditNote" , 2 ],
|
@@ -160,13 +155,9 @@ describe "analyse invoices" do
|
|
160
155
|
end
|
161
156
|
|
162
157
|
it "groups items by scale and by type and sums them" do
|
163
|
-
program = register.build
|
158
|
+
program = register.build group: %i{ scale type }, map: %i{ invoice_amount }, reduce: %i{ sum }
|
164
159
|
data = program.run(invoices)
|
165
|
-
actual = data.sort_by(&:
|
166
|
-
d.data.sort_by(&:group_value).each { |s|
|
167
|
-
arr << [d.value_label, s.value_label] + s.data.map(&:data) }
|
168
|
-
arr
|
169
|
-
}
|
160
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
170
161
|
|
171
162
|
expected = [
|
172
163
|
[1 , "Order" , 59 ] ,
|
@@ -187,15 +178,9 @@ describe "analyse invoices" do
|
|
187
178
|
end
|
188
179
|
|
189
180
|
it "groups items by year and by type and by scale and counts them" do
|
190
|
-
program = register.build
|
181
|
+
program = register.build group: %i{ yearly type scale }, map: %i{ invoice_amount }, reduce: %i{ sum }
|
191
182
|
data = program.run(invoices)
|
192
|
-
actual = data.sort_by(&:
|
193
|
-
d.data.sort_by(&:group_value).each { |s|
|
194
|
-
s.data.sort_by(&:group_value).each { |z|
|
195
|
-
arr << [d.value_label, s.value_label, z.value_label] + z.data.map(&:data) }
|
196
|
-
}
|
197
|
-
arr
|
198
|
-
}
|
183
|
+
actual = data.sort_by(&:key).map { |d| d.key_labels + d.reduced }
|
199
184
|
|
200
185
|
expected = [
|
201
186
|
[2020 , "PurchaseCreditNote" , 1 , 23.0 ],
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubadana
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Conan Dalton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aduki
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec_numbering_formatter
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
description: " Simple data grouping and calculations. Bring your own extractors. "
|
70
84
|
email:
|
71
85
|
- conan@conandalton.net
|