goldmine 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +23 -11
- data/README.md +186 -38
- data/goldmine.gemspec +1 -0
- data/lib/goldmine.rb +2 -0
- data/lib/goldmine/array_miner.rb +1 -1
- data/lib/goldmine/cache.rb +28 -0
- data/lib/goldmine/hash_miner.rb +3 -2
- data/lib/goldmine/hash_rollup.rb +24 -6
- data/lib/goldmine/rollup_context.rb +23 -0
- data/lib/goldmine/version.rb +1 -1
- data/test/test_goldmine.rb +66 -1
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 214cfc93706ed23076acfc401061d060e1753b59
|
4
|
+
data.tar.gz: 754cf23c6dd3c8437d18e83daf7dd9f86e063781
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43fdf070ee47b3d767027bcea6c1518c6f948bff9885a0f283dbbfb419d2c560fbd62cb1670027281fdce5c17989414270786b968db0a17b14ac19932b797a1b
|
7
|
+
data.tar.gz: e82d4461294d81e9afd982b429dac780ebd9fd840d4c55176896df95d05f8a38f2a6479446b31ea8dc219e397144c3a976cc48caa27df94ac1069dffaafb3d28
|
data/Gemfile.lock
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
goldmine (
|
4
|
+
goldmine (2.0.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
binding_of_caller (0.7.2)
|
10
10
|
debug_inspector (>= 0.0.1)
|
11
|
-
byebug (
|
11
|
+
byebug (5.0.0)
|
12
12
|
columnize (= 0.9.0)
|
13
13
|
coderay (1.1.0)
|
14
14
|
columnize (0.9.0)
|
15
|
-
coveralls (0.8.
|
15
|
+
coveralls (0.8.3)
|
16
16
|
json (~> 1.8)
|
17
17
|
rest-client (>= 1.6.8, < 2)
|
18
18
|
simplecov (~> 0.10.0)
|
@@ -20,22 +20,22 @@ GEM
|
|
20
20
|
thor (~> 0.19.1)
|
21
21
|
debug_inspector (0.0.2)
|
22
22
|
docile (1.1.5)
|
23
|
-
domain_name (0.5.
|
23
|
+
domain_name (0.5.25)
|
24
24
|
unf (>= 0.0.5, < 1.0.0)
|
25
25
|
http-cookie (1.0.2)
|
26
26
|
domain_name (~> 0.5)
|
27
27
|
interception (0.5)
|
28
|
-
json (1.8.
|
28
|
+
json (1.8.3)
|
29
29
|
method_source (0.8.2)
|
30
|
-
mime-types (2.6.
|
30
|
+
mime-types (2.6.2)
|
31
31
|
netrc (0.10.3)
|
32
32
|
os (0.9.6)
|
33
|
-
pry (0.10.
|
33
|
+
pry (0.10.3)
|
34
34
|
coderay (~> 1.1.0)
|
35
35
|
method_source (~> 0.8.1)
|
36
36
|
slop (~> 3.4)
|
37
|
-
pry-byebug (3.
|
38
|
-
byebug (~>
|
37
|
+
pry-byebug (3.2.0)
|
38
|
+
byebug (~> 5.0)
|
39
39
|
pry (~> 0.10)
|
40
40
|
pry-rescue (1.4.2)
|
41
41
|
interception (>= 0.5)
|
@@ -49,6 +49,9 @@ GEM
|
|
49
49
|
pry-byebug
|
50
50
|
pry-rescue
|
51
51
|
pry-stack_explorer
|
52
|
+
rack (1.6.4)
|
53
|
+
rack-protection (1.5.3)
|
54
|
+
rack
|
52
55
|
rake (10.4.2)
|
53
56
|
rest-client (1.8.0)
|
54
57
|
http-cookie (>= 1.0.2, < 2.0)
|
@@ -59,11 +62,16 @@ GEM
|
|
59
62
|
json (~> 1.8)
|
60
63
|
simplecov-html (~> 0.10.0)
|
61
64
|
simplecov-html (0.10.0)
|
65
|
+
sinatra (1.4.6)
|
66
|
+
rack (~> 1.4)
|
67
|
+
rack-protection (~> 1.4)
|
68
|
+
tilt (>= 1.3, < 3)
|
62
69
|
slop (3.6.0)
|
63
|
-
term-ansicolor (1.3.
|
70
|
+
term-ansicolor (1.3.2)
|
64
71
|
tins (~> 1.0)
|
65
72
|
thor (0.19.1)
|
66
|
-
|
73
|
+
tilt (2.0.1)
|
74
|
+
tins (1.6.0)
|
67
75
|
unf (0.1.4)
|
68
76
|
unf_ext
|
69
77
|
unf_ext (0.0.7.1)
|
@@ -76,3 +84,7 @@ DEPENDENCIES
|
|
76
84
|
goldmine!
|
77
85
|
pry-test
|
78
86
|
rake
|
87
|
+
sinatra
|
88
|
+
|
89
|
+
BUNDLED WITH
|
90
|
+
1.10.6
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
[![Lines of Code](http://img.shields.io/badge/lines_of_code-
|
1
|
+
[![Lines of Code](http://img.shields.io/badge/lines_of_code-193-brightgreen.svg?style=flat)](http://blog.codinghorror.com/the-best-code-is-no-code-at-all/)
|
2
2
|
[![Code Status](http://img.shields.io/codeclimate/github/hopsoft/goldmine.svg?style=flat)](https://codeclimate.com/github/hopsoft/goldmine)
|
3
3
|
[![Dependency Status](http://img.shields.io/gemnasium/hopsoft/goldmine.svg?style=flat)](https://gemnasium.com/hopsoft/goldmine)
|
4
4
|
[![Build Status](http://img.shields.io/travis/hopsoft/goldmine.svg?style=flat)](https://travis-ci.org/hopsoft/goldmine)
|
@@ -8,7 +8,9 @@
|
|
8
8
|
# Goldmine
|
9
9
|
|
10
10
|
Extract a wealth of information from Arrays & Hashes.
|
11
|
-
|
11
|
+
|
12
|
+
Goldmine is especially helpful when working with source data that is difficult to query.
|
13
|
+
e.g. CSV files, API results, etc...
|
12
14
|
|
13
15
|
## Uses
|
14
16
|
|
@@ -18,16 +20,9 @@ Think of __Goldmine__ as `Enumerable#group_by` on steroids.
|
|
18
20
|
- Data visualization prep
|
19
21
|
- CSV report generation
|
20
22
|
|
21
|
-
---
|
22
|
-
|
23
|
-
The [demo project](http://hopsoft.github.io/goldmine/) demonstrates some of Goldmine's uses.
|
24
|
-
`TODO: update the demo project to use the latest features`
|
25
|
-
|
26
|
-
---
|
27
|
-
|
28
23
|
## Quick Start
|
29
24
|
|
30
|
-
```
|
25
|
+
```sh
|
31
26
|
gem install goldmine
|
32
27
|
```
|
33
28
|
|
@@ -35,8 +30,8 @@ gem install goldmine
|
|
35
30
|
require "goldmine"
|
36
31
|
|
37
32
|
list = [1,2,3,4,5,6,7,8,9]
|
38
|
-
|
39
|
-
|
33
|
+
Goldmine::ArrayMiner.new(list)
|
34
|
+
.pivot { |i| i < 5 }
|
40
35
|
# result:
|
41
36
|
{
|
42
37
|
true => [1, 2, 3, 4],
|
@@ -48,8 +43,9 @@ list.pivot { |i| i < 5 }
|
|
48
43
|
|
49
44
|
```ruby
|
50
45
|
list = [1,2,3,4,5,6,7,8,9]
|
51
|
-
|
52
|
-
|
46
|
+
Goldmine::ArrayMiner.new(list)
|
47
|
+
.pivot { |i| i < 5 }
|
48
|
+
.pivot { |i| i % 2 == 0 }
|
53
49
|
# result:
|
54
50
|
{
|
55
51
|
[true, false] => [1, 3],
|
@@ -63,8 +59,8 @@ list.pivot { |i| i < 5 }.pivot { |i| i % 2 == 0 }
|
|
63
59
|
|
64
60
|
```ruby
|
65
61
|
list = [1,2,3,4,5,6,7,8,9]
|
66
|
-
|
67
|
-
|
62
|
+
Goldmine::ArrayMiner.new(list)
|
63
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
68
64
|
# result:
|
69
65
|
{
|
70
66
|
{ :less_than_5 => true } => [1, 2, 3, 4],
|
@@ -143,36 +139,82 @@ end
|
|
143
139
|
}
|
144
140
|
```
|
145
141
|
|
146
|
-
## Rollups
|
142
|
+
## Rollups
|
147
143
|
|
148
|
-
Rollups provide a clean way to aggregate pivoted data
|
144
|
+
Rollups provide a clean way to aggregate pivoted data...
|
145
|
+
think computed columns.
|
149
146
|
|
150
|
-
|
147
|
+
Rollup `blocks` are executed once for each pivot.
|
148
|
+
_Like pivots, rollups can be chained._
|
151
149
|
|
152
150
|
```ruby
|
153
151
|
list = [1,2,3,4,5,6,7,8,9]
|
154
|
-
|
155
|
-
pivoted = list
|
152
|
+
Goldmine::ArrayMiner.new(list)
|
156
153
|
.pivot(:less_than_5) { |i| i < 5 }
|
157
154
|
.pivot(:even) { |i| i % 2 == 0 }
|
155
|
+
.rollup(:count) { |matched| matched.size }
|
158
156
|
# result:
|
159
157
|
{
|
160
|
-
{ :less_than_5 => true, :even => false } =>
|
161
|
-
{ :less_than_5 => true, :even => true } =>
|
162
|
-
{ :less_than_5 => false, :even => false} =>
|
163
|
-
{ :less_than_5 => false, :even => true } =>
|
158
|
+
{ :less_than_5 => true, :even => false } => { :count => 2 },
|
159
|
+
{ :less_than_5 => true, :even => true } => { :count => 2 },
|
160
|
+
{ :less_than_5 => false, :even => false } => { :count => 3 },
|
161
|
+
{ :less_than_5 => false, :even => true } => { :count => 2 }
|
164
162
|
}
|
163
|
+
```
|
164
|
+
|
165
|
+
### Pre-Computed Results
|
165
166
|
|
166
|
-
|
167
|
+
Rollups can be computationally expensive _(depending upon how much logic you stuff into the `block`)_.
|
168
|
+
Goldmine caches rollup results & makes them available to subsequent rollups.
|
169
|
+
|
170
|
+
```ruby
|
171
|
+
list = [1,2,3,4,5,6,7,8,9]
|
172
|
+
Goldmine::ArrayMiner.new(list)
|
173
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
174
|
+
.rollup(:count, &:size)
|
175
|
+
.rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
|
176
|
+
.rollup(:even_percentage) { |list|
|
177
|
+
computed(:evens).for(list) / computed(:count).for(list).to_f
|
178
|
+
}
|
167
179
|
# result:
|
168
180
|
{
|
169
|
-
{:less_than_5=>true
|
170
|
-
{:less_than_5=>
|
171
|
-
{:less_than_5=>false, :even=>false}=>{:count=>3},
|
172
|
-
{:less_than_5=>false, :even=>true}=>{:count=>2}
|
181
|
+
{ :less_than_5 => true } => { :count => 4, :evens => 2, :even_percentage => 0.5 },
|
182
|
+
{ :less_than_5 => false } => { :count => 5, :evens => 2, :even_percentage => 0.4 }
|
173
183
|
}
|
184
|
+
```
|
185
|
+
|
186
|
+
### Rows
|
174
187
|
|
175
|
-
|
188
|
+
It's often helpful to flatten rollups into rows.
|
189
|
+
|
190
|
+
```ruby
|
191
|
+
list = [1,2,3,4,5,6,7,8,9]
|
192
|
+
Goldmine::ArrayMiner.new(list)
|
193
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
194
|
+
.rollup(:count, &:size)
|
195
|
+
.rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
|
196
|
+
.rollup(:even_percentage) { |list|
|
197
|
+
computed(:evens).for(list) / computed(:count).for(list).to_f
|
198
|
+
}
|
199
|
+
.to_rows
|
200
|
+
# result:
|
201
|
+
[
|
202
|
+
{ "less_than_5" => true, "count" => 4, "evens" => 2, "even_percentage" => 0.5 },
|
203
|
+
{ "less_than_5" => false, "count" => 5, "evens" => 2, "even_percentage" => 0.4 }
|
204
|
+
]
|
205
|
+
```
|
206
|
+
|
207
|
+
### Tabular
|
208
|
+
|
209
|
+
Rollups can also be converted into tabular format.
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
list = [1,2,3,4,5,6,7,8,9]
|
213
|
+
Goldmine::ArrayMiner.new(list)
|
214
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
215
|
+
.pivot(:even) { |i| i % 2 == 0 }
|
216
|
+
.rollup(:count) { |matched| matched.size }
|
217
|
+
.to_tabular
|
176
218
|
# result:
|
177
219
|
[
|
178
220
|
["less_than_5", "even", "count"],
|
@@ -181,22 +223,128 @@ rollup.to_tabular
|
|
181
223
|
[false, false, 3],
|
182
224
|
[false, true, 2]
|
183
225
|
]
|
226
|
+
```
|
227
|
+
|
228
|
+
### CSV
|
229
|
+
|
230
|
+
Goldmine makes producing CSV output simple.
|
184
231
|
|
185
|
-
|
232
|
+
```ruby
|
233
|
+
csv_table = Goldmine::ArrayMiner.new(list)
|
234
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
235
|
+
.pivot(:even) { |i| i % 2 == 0 }
|
236
|
+
.rollup(:count) { |matched| matched.size }
|
237
|
+
.to_csv_table
|
186
238
|
# result:
|
187
239
|
#<CSV::Table mode:col_or_row row_count:5>
|
188
240
|
|
189
|
-
|
241
|
+
csv_table.to_csv
|
190
242
|
# result:
|
191
243
|
"less_than_5,even,count\ntrue,false,2\ntrue,true,2\nfalse,false,3\nfalse,true,2\n"
|
192
244
|
```
|
193
245
|
|
246
|
+
## Examples
|
247
|
+
|
248
|
+
All examples are simple Sinatra apps.
|
249
|
+
They are designed to help communicate Goldmine use-cases.
|
250
|
+
|
251
|
+
### Setup
|
252
|
+
|
253
|
+
```sh
|
254
|
+
git clone git@github.com:hopsoft/goldmine.git
|
255
|
+
cd /path/to/goldmine
|
256
|
+
bundle
|
257
|
+
```
|
258
|
+
|
259
|
+
### [New York Wifi Hotspots](https://github.com/hopsoft/goldmine/tree/master/examples/new_york_wifi_hotspots)
|
260
|
+
|
261
|
+
In this example, we mine the following data.
|
262
|
+
|
263
|
+
* Total hotspots by city, zip, & area code
|
264
|
+
* Free hotspots by city, zip, & area code
|
265
|
+
* Paid hotspots by city, zip, & area code
|
266
|
+
* Library hotspots by city, zip, & area code
|
267
|
+
* Starbucks hotspots by city, zip, & area code
|
268
|
+
* McDonalds hotspots by city, zip, & area code
|
269
|
+
|
270
|
+
```sh
|
271
|
+
ruby examples/new_york_wifi_hotspots/app.rb
|
272
|
+
```
|
273
|
+
|
274
|
+
```sh
|
275
|
+
curl http://localhost:3000/raw
|
276
|
+
curl http://localhost:3000/pivoted
|
277
|
+
curl http://localhost:3000/rolled_up
|
278
|
+
curl http://localhost:3000/rows
|
279
|
+
curl http://localhost:3000/tabular
|
280
|
+
curl http://localhost:3000/csv
|
281
|
+
```
|
282
|
+
|
283
|
+
### [Medicare Physician Comparison](https://github.com/hopsoft/goldmine/tree/master/examples/medicare_physician_compare)
|
284
|
+
|
285
|
+
Uses data from http://dev.socrata.com/foundry/#/data.medicare.gov/aeay-dfax
|
286
|
+
|
287
|
+
In this example, we mine the following data.
|
288
|
+
|
289
|
+
* Total doctors by state & specialty
|
290
|
+
* Preferred doctors by state & specialty
|
291
|
+
* Female doctors by state & specialty
|
292
|
+
* Male doctors by state & specialty
|
293
|
+
* Preferred female doctors by state & specialty
|
294
|
+
* Preferred male doctors by state & specialty
|
295
|
+
|
296
|
+
```sh
|
297
|
+
ruby examples/medicare_physician_compare/app.rb
|
298
|
+
```
|
299
|
+
|
300
|
+
```sh
|
301
|
+
curl http://localhost:3000/raw
|
302
|
+
curl http://localhost:3000/pivoted
|
303
|
+
curl http://localhost:3000/rolled_up
|
304
|
+
curl http://localhost:3000/rows
|
305
|
+
curl http://localhost:3000/tabular
|
306
|
+
curl http://localhost:3000/csv
|
307
|
+
```
|
308
|
+
|
309
|
+
#### Performance
|
310
|
+
|
311
|
+
The Medicare dataset is large & works well for performance testing.
|
312
|
+
|
313
|
+
My Macbook Pro yields the following benchmarks.
|
314
|
+
|
315
|
+
* 3.1 GHz Intel Core i7
|
316
|
+
* 16 GB 1867 MHz DDR3
|
317
|
+
|
318
|
+
##### 100,000 Records
|
319
|
+
|
320
|
+
```
|
321
|
+
user system total real
|
322
|
+
pivoted 1.000000 0.020000 1.020000 ( 1.027810)
|
323
|
+
rolled_up 1.090000 0.020000 1.110000 ( 1.101082)
|
324
|
+
rows 0.020000 0.000000 0.020000 ( 0.022978)
|
325
|
+
tabular 0.010000 0.000000 0.010000 ( 0.005423)
|
326
|
+
csv 0.030000 0.000000 0.030000 ( 0.037245)
|
327
|
+
```
|
328
|
+
|
329
|
+
##### 1,000,000 Records
|
330
|
+
|
331
|
+
```
|
332
|
+
user system total real
|
333
|
+
pivoted 15.700000 0.490000 16.190000 ( 16.886677)
|
334
|
+
rolled_up 7.070000 0.350000 7.420000 ( 7.544060)
|
335
|
+
rows 0.020000 0.000000 0.020000 ( 0.028432)
|
336
|
+
tabular 0.010000 0.010000 0.020000 ( 0.007663)
|
337
|
+
csv 0.050000 0.000000 0.050000 ( 0.058925)
|
338
|
+
```
|
339
|
+
|
194
340
|
## Summary
|
195
341
|
|
196
|
-
Goldmine
|
197
|
-
|
342
|
+
Goldmine makes data highly malleable.
|
343
|
+
It allows you to combine the power of pivots, rollups, tabular data,
|
344
|
+
& csv to construct deep insights with minimal effort.
|
198
345
|
|
199
|
-
|
200
|
-
pivot the results, convert to csv, sort, pivot again,
|
201
|
-
then rollup the results to create data visualizations in the form of charts & graphs.
|
346
|
+
Real world use cases include:
|
202
347
|
|
348
|
+
* Build a better understanding of database data before canonizing reports in SQL
|
349
|
+
* Create source data for building user interfaces & data visualizations
|
350
|
+
* Transform CSV data from one format to another
|
data/goldmine.gemspec
CHANGED
data/lib/goldmine.rb
CHANGED
data/lib/goldmine/array_miner.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Goldmine
|
2
|
+
class Cache
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@hash = {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def [](*keys)
|
9
|
+
@hash[make_key(*keys)]
|
10
|
+
end
|
11
|
+
|
12
|
+
def []=(*keys, value)
|
13
|
+
@hash[make_key(*keys)] = value
|
14
|
+
end
|
15
|
+
|
16
|
+
def fetch(*keys)
|
17
|
+
@hash[make_key(*keys)] ||= yield
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def make_key(*keys)
|
23
|
+
keys.map do |key|
|
24
|
+
key.is_a?(String) ? key.to_sym : key.object_id
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/goldmine/hash_miner.rb
CHANGED
@@ -3,7 +3,8 @@ require "delegate"
|
|
3
3
|
module Goldmine
|
4
4
|
class HashMiner < SimpleDelegator
|
5
5
|
def initialize(hash={})
|
6
|
-
|
6
|
+
@rollup_cache = Cache.new
|
7
|
+
super @hash = hash.to_h
|
7
8
|
end
|
8
9
|
|
9
10
|
attr_accessor :goldmine
|
@@ -53,7 +54,7 @@ module Goldmine
|
|
53
54
|
# @yield [Object] Yields once for each pivoted grouping of values.
|
54
55
|
# @return [Hash] The rollup Hash of data.
|
55
56
|
def rollup(name, &block)
|
56
|
-
HashRollup.new(@hash).rollup(name, &block)
|
57
|
+
HashRollup.new(@hash, @rollup_cache).rollup(name, &block)
|
57
58
|
end
|
58
59
|
|
59
60
|
# Assigns a key/value pair to the Hash.
|
data/lib/goldmine/hash_rollup.rb
CHANGED
@@ -3,15 +3,23 @@ require "csv"
|
|
3
3
|
|
4
4
|
module Goldmine
|
5
5
|
class HashRollup < SimpleDelegator
|
6
|
-
|
6
|
+
attr_reader :names
|
7
|
+
|
8
|
+
def initialize(pivoted, cache=Cache.new)
|
9
|
+
@names = []
|
10
|
+
@cache = cache
|
11
|
+
@context = RollupContext.new(@cache)
|
7
12
|
@pivoted = pivoted
|
8
13
|
super @rolled = {}
|
9
14
|
end
|
10
15
|
|
11
|
-
def rollup(name)
|
16
|
+
def rollup(name, &block)
|
17
|
+
names << name
|
12
18
|
pivoted.each do |key, value|
|
13
|
-
|
14
|
-
|
19
|
+
@cache.fetch(name, value) do
|
20
|
+
rolled[key] ||= {}
|
21
|
+
rolled[key][name] = @context.instance_exec(value, &block)
|
22
|
+
end
|
15
23
|
end
|
16
24
|
self
|
17
25
|
end
|
@@ -24,11 +32,21 @@ module Goldmine
|
|
24
32
|
end
|
25
33
|
|
26
34
|
def to_csv_table
|
27
|
-
|
28
|
-
rows = tabular_rows.map { |row| CSV::Row.new(header, row) }
|
35
|
+
rows = to_rows.map { |row| CSV::Row.new(row.keys, row.values) }
|
29
36
|
CSV::Table.new rows
|
30
37
|
end
|
31
38
|
|
39
|
+
def to_rows
|
40
|
+
header = tabular_header
|
41
|
+
tabular_rows.map do |tabular_row|
|
42
|
+
{}.tap do |row|
|
43
|
+
header.each_with_index do |header_name, index|
|
44
|
+
row[header_name] = tabular_row[index]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
32
50
|
private
|
33
51
|
|
34
52
|
attr_reader :pivoted, :rolled
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Goldmine
|
2
|
+
class RollupContext
|
3
|
+
def initialize(cache)
|
4
|
+
@cache = cache
|
5
|
+
@computations = {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def computed(name)
|
9
|
+
@computations[name.to_sym] ||= Computation.new(name, @cache)
|
10
|
+
end
|
11
|
+
|
12
|
+
class Computation
|
13
|
+
def initialize(name, cache)
|
14
|
+
@name = name
|
15
|
+
@cache = cache
|
16
|
+
end
|
17
|
+
|
18
|
+
def for(list)
|
19
|
+
@cache[@name, list]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/goldmine/version.rb
CHANGED
data/test/test_goldmine.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "pry-test"
|
2
2
|
require "coveralls"
|
3
3
|
Coveralls.wear!
|
4
|
+
SimpleCov.command_name "pry-test"
|
4
5
|
require File.expand_path("../../lib/goldmine", __FILE__)
|
5
6
|
|
6
7
|
class TestGoldmine < PryTest::Test
|
@@ -318,7 +319,7 @@ class TestGoldmine < PryTest::Test
|
|
318
319
|
assert rolled.to_tabular == expected
|
319
320
|
end
|
320
321
|
|
321
|
-
test "named chained pivots rollup to_csv_table" do
|
322
|
+
test "named & chained pivots with rollup to_csv_table" do
|
322
323
|
list = [1,2,3,4,5,6,7,8,9]
|
323
324
|
list = Goldmine::ArrayMiner.new(list)
|
324
325
|
rolled = list.pivot("less than 5") { |i| i < 5 }.pivot("divisible by 2") { |i| i % 2 == 0 }.rollup(:count, &:size)
|
@@ -334,4 +335,68 @@ class TestGoldmine < PryTest::Test
|
|
334
335
|
assert row["divisible by 2"] == false
|
335
336
|
assert row ["count"] == 2
|
336
337
|
end
|
338
|
+
|
339
|
+
test "unnamed & chained pivots with rollup to rows" do
|
340
|
+
list = [1,2,3,4,5,6,7,8,9]
|
341
|
+
list = Goldmine::ArrayMiner.new(list)
|
342
|
+
rolled = list
|
343
|
+
.pivot { |i| i < 5 }
|
344
|
+
.rollup(:count, &:size)
|
345
|
+
.rollup(:evens) { |l| l.select { |i| i % 2 == 0 }.size }
|
346
|
+
.rollup(:even_percentage) { |l| computed(:evens).for(l) / computed(:count).for(l).to_f }
|
347
|
+
|
348
|
+
expected = [
|
349
|
+
{"column1"=>true, "count"=>4, "evens"=>2, "even_percentage"=>0.5},
|
350
|
+
{"column1"=>false, "count"=>5, "evens"=>2, "even_percentage"=>0.4}
|
351
|
+
]
|
352
|
+
|
353
|
+
assert rolled.to_rows == expected
|
354
|
+
end
|
355
|
+
|
356
|
+
test "named & chained pivots with rollup to rows" do
|
357
|
+
list = [1,2,3,4,5,6,7,8,9]
|
358
|
+
list = Goldmine::ArrayMiner.new(list)
|
359
|
+
rolled = list
|
360
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
361
|
+
.rollup(:count, &:size)
|
362
|
+
.rollup(:evens) { |l| l.select { |i| i % 2 == 0 }.size }
|
363
|
+
.rollup(:even_percentage) { |l| computed(:evens).for(l) / computed(:count).for(l).to_f }
|
364
|
+
|
365
|
+
expected = [
|
366
|
+
{"less_than_5"=>true, "count"=>4, "evens"=>2, "even_percentage"=>0.5},
|
367
|
+
{"less_than_5"=>false, "count"=>5, "evens"=>2, "even_percentage"=>0.4}
|
368
|
+
]
|
369
|
+
|
370
|
+
assert rolled.to_rows == expected
|
371
|
+
end
|
372
|
+
|
373
|
+
test "access to prior-computed rollups" do
|
374
|
+
list = [1,2,3,4,5,6,7,8,9]
|
375
|
+
list = Goldmine::ArrayMiner.new(list)
|
376
|
+
rolled = list
|
377
|
+
.pivot("less than 5") { |i| i < 5 }
|
378
|
+
.rollup(:count, &:size)
|
379
|
+
.rollup(:evens) { |pivoted_list| pivoted_list.select { |i| i % 2 == 0 }.size }
|
380
|
+
.rollup(:even_percentage) { |pivoted_list| computed(:evens).for(pivoted_list) / computed(:count).for(pivoted_list).to_f }
|
381
|
+
|
382
|
+
expected = [
|
383
|
+
["less than 5", "count", "evens", "even_percentage"],
|
384
|
+
[true, 4, 2, 0.5],
|
385
|
+
[false, 5, 2, 0.4]
|
386
|
+
]
|
387
|
+
assert rolled.to_tabular == expected
|
388
|
+
end
|
389
|
+
|
390
|
+
test "rollup names" do
|
391
|
+
list = [1,2,3,4,5,6,7,8,9]
|
392
|
+
list = Goldmine::ArrayMiner.new(list)
|
393
|
+
data = list.pivot { |i| i < 5 }
|
394
|
+
rolled = data
|
395
|
+
.rollup(:count) { |items| items.size }
|
396
|
+
.rollup(:div_by_3) { |items| items.keep_if { |i| i % 3 == 0 }.size }
|
397
|
+
|
398
|
+
expected = [:count, :div_by_3]
|
399
|
+
assert rolled.names == expected
|
400
|
+
end
|
401
|
+
|
337
402
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goldmine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Hopkins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sinatra
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description: Extract a wealth of information from Arrays and Hashes
|
56
70
|
email:
|
57
71
|
- natehop@gmail.com
|
@@ -66,8 +80,10 @@ files:
|
|
66
80
|
- goldmine.gemspec
|
67
81
|
- lib/goldmine.rb
|
68
82
|
- lib/goldmine/array_miner.rb
|
83
|
+
- lib/goldmine/cache.rb
|
69
84
|
- lib/goldmine/hash_miner.rb
|
70
85
|
- lib/goldmine/hash_rollup.rb
|
86
|
+
- lib/goldmine/rollup_context.rb
|
71
87
|
- lib/goldmine/version.rb
|
72
88
|
- license.md
|
73
89
|
- test/test_goldmine.rb
|