goldmine 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +23 -11
- data/README.md +186 -38
- data/goldmine.gemspec +1 -0
- data/lib/goldmine.rb +2 -0
- data/lib/goldmine/array_miner.rb +1 -1
- data/lib/goldmine/cache.rb +28 -0
- data/lib/goldmine/hash_miner.rb +3 -2
- data/lib/goldmine/hash_rollup.rb +24 -6
- data/lib/goldmine/rollup_context.rb +23 -0
- data/lib/goldmine/version.rb +1 -1
- data/test/test_goldmine.rb +66 -1
- metadata +18 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 214cfc93706ed23076acfc401061d060e1753b59
|
|
4
|
+
data.tar.gz: 754cf23c6dd3c8437d18e83daf7dd9f86e063781
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 43fdf070ee47b3d767027bcea6c1518c6f948bff9885a0f283dbbfb419d2c560fbd62cb1670027281fdce5c17989414270786b968db0a17b14ac19932b797a1b
|
|
7
|
+
data.tar.gz: e82d4461294d81e9afd982b429dac780ebd9fd840d4c55176896df95d05f8a38f2a6479446b31ea8dc219e397144c3a976cc48caa27df94ac1069dffaafb3d28
|
data/Gemfile.lock
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
goldmine (
|
|
4
|
+
goldmine (2.0.0)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
8
8
|
specs:
|
|
9
9
|
binding_of_caller (0.7.2)
|
|
10
10
|
debug_inspector (>= 0.0.1)
|
|
11
|
-
byebug (
|
|
11
|
+
byebug (5.0.0)
|
|
12
12
|
columnize (= 0.9.0)
|
|
13
13
|
coderay (1.1.0)
|
|
14
14
|
columnize (0.9.0)
|
|
15
|
-
coveralls (0.8.
|
|
15
|
+
coveralls (0.8.3)
|
|
16
16
|
json (~> 1.8)
|
|
17
17
|
rest-client (>= 1.6.8, < 2)
|
|
18
18
|
simplecov (~> 0.10.0)
|
|
@@ -20,22 +20,22 @@ GEM
|
|
|
20
20
|
thor (~> 0.19.1)
|
|
21
21
|
debug_inspector (0.0.2)
|
|
22
22
|
docile (1.1.5)
|
|
23
|
-
domain_name (0.5.
|
|
23
|
+
domain_name (0.5.25)
|
|
24
24
|
unf (>= 0.0.5, < 1.0.0)
|
|
25
25
|
http-cookie (1.0.2)
|
|
26
26
|
domain_name (~> 0.5)
|
|
27
27
|
interception (0.5)
|
|
28
|
-
json (1.8.
|
|
28
|
+
json (1.8.3)
|
|
29
29
|
method_source (0.8.2)
|
|
30
|
-
mime-types (2.6.
|
|
30
|
+
mime-types (2.6.2)
|
|
31
31
|
netrc (0.10.3)
|
|
32
32
|
os (0.9.6)
|
|
33
|
-
pry (0.10.
|
|
33
|
+
pry (0.10.3)
|
|
34
34
|
coderay (~> 1.1.0)
|
|
35
35
|
method_source (~> 0.8.1)
|
|
36
36
|
slop (~> 3.4)
|
|
37
|
-
pry-byebug (3.
|
|
38
|
-
byebug (~>
|
|
37
|
+
pry-byebug (3.2.0)
|
|
38
|
+
byebug (~> 5.0)
|
|
39
39
|
pry (~> 0.10)
|
|
40
40
|
pry-rescue (1.4.2)
|
|
41
41
|
interception (>= 0.5)
|
|
@@ -49,6 +49,9 @@ GEM
|
|
|
49
49
|
pry-byebug
|
|
50
50
|
pry-rescue
|
|
51
51
|
pry-stack_explorer
|
|
52
|
+
rack (1.6.4)
|
|
53
|
+
rack-protection (1.5.3)
|
|
54
|
+
rack
|
|
52
55
|
rake (10.4.2)
|
|
53
56
|
rest-client (1.8.0)
|
|
54
57
|
http-cookie (>= 1.0.2, < 2.0)
|
|
@@ -59,11 +62,16 @@ GEM
|
|
|
59
62
|
json (~> 1.8)
|
|
60
63
|
simplecov-html (~> 0.10.0)
|
|
61
64
|
simplecov-html (0.10.0)
|
|
65
|
+
sinatra (1.4.6)
|
|
66
|
+
rack (~> 1.4)
|
|
67
|
+
rack-protection (~> 1.4)
|
|
68
|
+
tilt (>= 1.3, < 3)
|
|
62
69
|
slop (3.6.0)
|
|
63
|
-
term-ansicolor (1.3.
|
|
70
|
+
term-ansicolor (1.3.2)
|
|
64
71
|
tins (~> 1.0)
|
|
65
72
|
thor (0.19.1)
|
|
66
|
-
|
|
73
|
+
tilt (2.0.1)
|
|
74
|
+
tins (1.6.0)
|
|
67
75
|
unf (0.1.4)
|
|
68
76
|
unf_ext
|
|
69
77
|
unf_ext (0.0.7.1)
|
|
@@ -76,3 +84,7 @@ DEPENDENCIES
|
|
|
76
84
|
goldmine!
|
|
77
85
|
pry-test
|
|
78
86
|
rake
|
|
87
|
+
sinatra
|
|
88
|
+
|
|
89
|
+
BUNDLED WITH
|
|
90
|
+
1.10.6
|
data/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
[](http://blog.codinghorror.com/the-best-code-is-no-code-at-all/)
|
|
2
2
|
[](https://codeclimate.com/github/hopsoft/goldmine)
|
|
3
3
|
[](https://gemnasium.com/hopsoft/goldmine)
|
|
4
4
|
[](https://travis-ci.org/hopsoft/goldmine)
|
|
@@ -8,7 +8,9 @@
|
|
|
8
8
|
# Goldmine
|
|
9
9
|
|
|
10
10
|
Extract a wealth of information from Arrays & Hashes.
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
Goldmine is especially helpful when working with source data that is difficult to query.
|
|
13
|
+
e.g. CSV files, API results, etc...
|
|
12
14
|
|
|
13
15
|
## Uses
|
|
14
16
|
|
|
@@ -18,16 +20,9 @@ Think of __Goldmine__ as `Enumerable#group_by` on steroids.
|
|
|
18
20
|
- Data visualization prep
|
|
19
21
|
- CSV report generation
|
|
20
22
|
|
|
21
|
-
---
|
|
22
|
-
|
|
23
|
-
The [demo project](http://hopsoft.github.io/goldmine/) demonstrates some of Goldmine's uses.
|
|
24
|
-
`TODO: update the demo project to use the latest features`
|
|
25
|
-
|
|
26
|
-
---
|
|
27
|
-
|
|
28
23
|
## Quick Start
|
|
29
24
|
|
|
30
|
-
```
|
|
25
|
+
```sh
|
|
31
26
|
gem install goldmine
|
|
32
27
|
```
|
|
33
28
|
|
|
@@ -35,8 +30,8 @@ gem install goldmine
|
|
|
35
30
|
require "goldmine"
|
|
36
31
|
|
|
37
32
|
list = [1,2,3,4,5,6,7,8,9]
|
|
38
|
-
|
|
39
|
-
|
|
33
|
+
Goldmine::ArrayMiner.new(list)
|
|
34
|
+
.pivot { |i| i < 5 }
|
|
40
35
|
# result:
|
|
41
36
|
{
|
|
42
37
|
true => [1, 2, 3, 4],
|
|
@@ -48,8 +43,9 @@ list.pivot { |i| i < 5 }
|
|
|
48
43
|
|
|
49
44
|
```ruby
|
|
50
45
|
list = [1,2,3,4,5,6,7,8,9]
|
|
51
|
-
|
|
52
|
-
|
|
46
|
+
Goldmine::ArrayMiner.new(list)
|
|
47
|
+
.pivot { |i| i < 5 }
|
|
48
|
+
.pivot { |i| i % 2 == 0 }
|
|
53
49
|
# result:
|
|
54
50
|
{
|
|
55
51
|
[true, false] => [1, 3],
|
|
@@ -63,8 +59,8 @@ list.pivot { |i| i < 5 }.pivot { |i| i % 2 == 0 }
|
|
|
63
59
|
|
|
64
60
|
```ruby
|
|
65
61
|
list = [1,2,3,4,5,6,7,8,9]
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
Goldmine::ArrayMiner.new(list)
|
|
63
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
68
64
|
# result:
|
|
69
65
|
{
|
|
70
66
|
{ :less_than_5 => true } => [1, 2, 3, 4],
|
|
@@ -143,36 +139,82 @@ end
|
|
|
143
139
|
}
|
|
144
140
|
```
|
|
145
141
|
|
|
146
|
-
## Rollups
|
|
142
|
+
## Rollups
|
|
147
143
|
|
|
148
|
-
Rollups provide a clean way to aggregate pivoted data
|
|
144
|
+
Rollups provide a clean way to aggregate pivoted data...
|
|
145
|
+
think computed columns.
|
|
149
146
|
|
|
150
|
-
|
|
147
|
+
Rollup `blocks` are executed once for each pivot.
|
|
148
|
+
_Like pivots, rollups can be chained._
|
|
151
149
|
|
|
152
150
|
```ruby
|
|
153
151
|
list = [1,2,3,4,5,6,7,8,9]
|
|
154
|
-
|
|
155
|
-
pivoted = list
|
|
152
|
+
Goldmine::ArrayMiner.new(list)
|
|
156
153
|
.pivot(:less_than_5) { |i| i < 5 }
|
|
157
154
|
.pivot(:even) { |i| i % 2 == 0 }
|
|
155
|
+
.rollup(:count) { |matched| matched.size }
|
|
158
156
|
# result:
|
|
159
157
|
{
|
|
160
|
-
{ :less_than_5 => true, :even => false } =>
|
|
161
|
-
{ :less_than_5 => true, :even => true } =>
|
|
162
|
-
{ :less_than_5 => false, :even => false} =>
|
|
163
|
-
{ :less_than_5 => false, :even => true } =>
|
|
158
|
+
{ :less_than_5 => true, :even => false } => { :count => 2 },
|
|
159
|
+
{ :less_than_5 => true, :even => true } => { :count => 2 },
|
|
160
|
+
{ :less_than_5 => false, :even => false } => { :count => 3 },
|
|
161
|
+
{ :less_than_5 => false, :even => true } => { :count => 2 }
|
|
164
162
|
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Pre-Computed Results
|
|
165
166
|
|
|
166
|
-
|
|
167
|
+
Rollups can be computationally expensive _(depending upon how much logic you stuff into the `block`)_.
|
|
168
|
+
Goldmine caches rollup results & makes them available to subsequent rollups.
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
172
|
+
Goldmine::ArrayMiner.new(list)
|
|
173
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
174
|
+
.rollup(:count, &:size)
|
|
175
|
+
.rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
|
|
176
|
+
.rollup(:even_percentage) { |list|
|
|
177
|
+
computed(:evens).for(list) / computed(:count).for(list).to_f
|
|
178
|
+
}
|
|
167
179
|
# result:
|
|
168
180
|
{
|
|
169
|
-
{:less_than_5=>true
|
|
170
|
-
{:less_than_5=>
|
|
171
|
-
{:less_than_5=>false, :even=>false}=>{:count=>3},
|
|
172
|
-
{:less_than_5=>false, :even=>true}=>{:count=>2}
|
|
181
|
+
{ :less_than_5 => true } => { :count => 4, :evens => 2, :even_percentage => 0.5 },
|
|
182
|
+
{ :less_than_5 => false } => { :count => 5, :evens => 2, :even_percentage => 0.4 }
|
|
173
183
|
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Rows
|
|
174
187
|
|
|
175
|
-
|
|
188
|
+
It's often helpful to flatten rollups into rows.
|
|
189
|
+
|
|
190
|
+
```ruby
|
|
191
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
192
|
+
Goldmine::ArrayMiner.new(list)
|
|
193
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
194
|
+
.rollup(:count, &:size)
|
|
195
|
+
.rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
|
|
196
|
+
.rollup(:even_percentage) { |list|
|
|
197
|
+
computed(:evens).for(list) / computed(:count).for(list).to_f
|
|
198
|
+
}
|
|
199
|
+
.to_rows
|
|
200
|
+
# result:
|
|
201
|
+
[
|
|
202
|
+
{ "less_than_5" => true, "count" => 4, "evens" => 2, "even_percentage" => 0.5 },
|
|
203
|
+
{ "less_than_5" => false, "count" => 5, "evens" => 2, "even_percentage" => 0.4 }
|
|
204
|
+
]
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Tabular
|
|
208
|
+
|
|
209
|
+
Rollups can also be converted into tabular format.
|
|
210
|
+
|
|
211
|
+
```ruby
|
|
212
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
213
|
+
Goldmine::ArrayMiner.new(list)
|
|
214
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
215
|
+
.pivot(:even) { |i| i % 2 == 0 }
|
|
216
|
+
.rollup(:count) { |matched| matched.size }
|
|
217
|
+
.to_tabular
|
|
176
218
|
# result:
|
|
177
219
|
[
|
|
178
220
|
["less_than_5", "even", "count"],
|
|
@@ -181,22 +223,128 @@ rollup.to_tabular
|
|
|
181
223
|
[false, false, 3],
|
|
182
224
|
[false, true, 2]
|
|
183
225
|
]
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### CSV
|
|
229
|
+
|
|
230
|
+
Goldmine makes producing CSV output simple.
|
|
184
231
|
|
|
185
|
-
|
|
232
|
+
```ruby
|
|
233
|
+
csv_table = Goldmine::ArrayMiner.new(list)
|
|
234
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
235
|
+
.pivot(:even) { |i| i % 2 == 0 }
|
|
236
|
+
.rollup(:count) { |matched| matched.size }
|
|
237
|
+
.to_csv_table
|
|
186
238
|
# result:
|
|
187
239
|
#<CSV::Table mode:col_or_row row_count:5>
|
|
188
240
|
|
|
189
|
-
|
|
241
|
+
csv_table.to_csv
|
|
190
242
|
# result:
|
|
191
243
|
"less_than_5,even,count\ntrue,false,2\ntrue,true,2\nfalse,false,3\nfalse,true,2\n"
|
|
192
244
|
```
|
|
193
245
|
|
|
246
|
+
## Examples
|
|
247
|
+
|
|
248
|
+
All examples are simple Sinatra apps.
|
|
249
|
+
They are designed to help communicate Goldmine use-cases.
|
|
250
|
+
|
|
251
|
+
### Setup
|
|
252
|
+
|
|
253
|
+
```sh
|
|
254
|
+
git clone git@github.com:hopsoft/goldmine.git
|
|
255
|
+
cd /path/to/goldmine
|
|
256
|
+
bundle
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### [New York Wifi Hotspots](https://github.com/hopsoft/goldmine/tree/master/examples/new_york_wifi_hotspots)
|
|
260
|
+
|
|
261
|
+
In this example, we mine the following data.
|
|
262
|
+
|
|
263
|
+
* Total hotspots by city, zip, & area code
|
|
264
|
+
* Free hotspots by city, zip, & area code
|
|
265
|
+
* Paid hotspots by city, zip, & area code
|
|
266
|
+
* Library hotspots by city, zip, & area code
|
|
267
|
+
* Starbucks hotspots by city, zip, & area code
|
|
268
|
+
* McDonalds hotspots by city, zip, & area code
|
|
269
|
+
|
|
270
|
+
```sh
|
|
271
|
+
ruby examples/new_york_wifi_hotspots/app.rb
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
```sh
|
|
275
|
+
curl http://localhost:3000/raw
|
|
276
|
+
curl http://localhost:3000/pivoted
|
|
277
|
+
curl http://localhost:3000/rolled_up
|
|
278
|
+
curl http://localhost:3000/rows
|
|
279
|
+
curl http://localhost:3000/tabular
|
|
280
|
+
curl http://localhost:3000/csv
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### [Medicare Physician Comparison](https://github.com/hopsoft/goldmine/tree/master/examples/medicare_physician_compare)
|
|
284
|
+
|
|
285
|
+
Uses data from http://dev.socrata.com/foundry/#/data.medicare.gov/aeay-dfax
|
|
286
|
+
|
|
287
|
+
In this example, we mine the following data.
|
|
288
|
+
|
|
289
|
+
* Total doctors by state & specialty
|
|
290
|
+
* Preferred doctors by state & specialty
|
|
291
|
+
* Female doctors by state & specialty
|
|
292
|
+
* Male doctors by state & specialty
|
|
293
|
+
* Preferred female doctors by state & specialty
|
|
294
|
+
* Preferred male doctors by state & specialty
|
|
295
|
+
|
|
296
|
+
```sh
|
|
297
|
+
ruby examples/medicare_physician_compare/app.rb
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
```sh
|
|
301
|
+
curl http://localhost:3000/raw
|
|
302
|
+
curl http://localhost:3000/pivoted
|
|
303
|
+
curl http://localhost:3000/rolled_up
|
|
304
|
+
curl http://localhost:3000/rows
|
|
305
|
+
curl http://localhost:3000/tabular
|
|
306
|
+
curl http://localhost:3000/csv
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
#### Performance
|
|
310
|
+
|
|
311
|
+
The Medicare dataset is large & works well for performance testing.
|
|
312
|
+
|
|
313
|
+
My Macbook Pro yields the following benchmarks.
|
|
314
|
+
|
|
315
|
+
* 3.1 GHz Intel Core i7
|
|
316
|
+
* 16 GB 1867 MHz DDR3
|
|
317
|
+
|
|
318
|
+
##### 100,000 Records
|
|
319
|
+
|
|
320
|
+
```
|
|
321
|
+
user system total real
|
|
322
|
+
pivoted 1.000000 0.020000 1.020000 ( 1.027810)
|
|
323
|
+
rolled_up 1.090000 0.020000 1.110000 ( 1.101082)
|
|
324
|
+
rows 0.020000 0.000000 0.020000 ( 0.022978)
|
|
325
|
+
tabular 0.010000 0.000000 0.010000 ( 0.005423)
|
|
326
|
+
csv 0.030000 0.000000 0.030000 ( 0.037245)
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
##### 1,000,000 Records
|
|
330
|
+
|
|
331
|
+
```
|
|
332
|
+
user system total real
|
|
333
|
+
pivoted 15.700000 0.490000 16.190000 ( 16.886677)
|
|
334
|
+
rolled_up 7.070000 0.350000 7.420000 ( 7.544060)
|
|
335
|
+
rows 0.020000 0.000000 0.020000 ( 0.028432)
|
|
336
|
+
tabular 0.010000 0.010000 0.020000 ( 0.007663)
|
|
337
|
+
csv 0.050000 0.000000 0.050000 ( 0.058925)
|
|
338
|
+
```
|
|
339
|
+
|
|
194
340
|
## Summary
|
|
195
341
|
|
|
196
|
-
Goldmine
|
|
197
|
-
|
|
342
|
+
Goldmine makes data highly malleable.
|
|
343
|
+
It allows you to combine the power of pivots, rollups, tabular data,
|
|
344
|
+
& csv to construct deep insights with minimal effort.
|
|
198
345
|
|
|
199
|
-
|
|
200
|
-
pivot the results, convert to csv, sort, pivot again,
|
|
201
|
-
then rollup the results to create data visualizations in the form of charts & graphs.
|
|
346
|
+
Real world use cases include:
|
|
202
347
|
|
|
348
|
+
* Build a better understanding of database data before canonizing reports in SQL
|
|
349
|
+
* Create source data for building user interfaces & data visualizations
|
|
350
|
+
* Transform CSV data from one format to another
|
data/goldmine.gemspec
CHANGED
data/lib/goldmine.rb
CHANGED
data/lib/goldmine/array_miner.rb
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Goldmine
|
|
2
|
+
class Cache
|
|
3
|
+
|
|
4
|
+
def initialize
|
|
5
|
+
@hash = {}
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def [](*keys)
|
|
9
|
+
@hash[make_key(*keys)]
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def []=(*keys, value)
|
|
13
|
+
@hash[make_key(*keys)] = value
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def fetch(*keys)
|
|
17
|
+
@hash[make_key(*keys)] ||= yield
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def make_key(*keys)
|
|
23
|
+
keys.map do |key|
|
|
24
|
+
key.is_a?(String) ? key.to_sym : key.object_id
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
data/lib/goldmine/hash_miner.rb
CHANGED
|
@@ -3,7 +3,8 @@ require "delegate"
|
|
|
3
3
|
module Goldmine
|
|
4
4
|
class HashMiner < SimpleDelegator
|
|
5
5
|
def initialize(hash={})
|
|
6
|
-
|
|
6
|
+
@rollup_cache = Cache.new
|
|
7
|
+
super @hash = hash.to_h
|
|
7
8
|
end
|
|
8
9
|
|
|
9
10
|
attr_accessor :goldmine
|
|
@@ -53,7 +54,7 @@ module Goldmine
|
|
|
53
54
|
# @yield [Object] Yields once for each pivoted grouping of values.
|
|
54
55
|
# @return [Hash] The rollup Hash of data.
|
|
55
56
|
def rollup(name, &block)
|
|
56
|
-
HashRollup.new(@hash).rollup(name, &block)
|
|
57
|
+
HashRollup.new(@hash, @rollup_cache).rollup(name, &block)
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
# Assigns a key/value pair to the Hash.
|
data/lib/goldmine/hash_rollup.rb
CHANGED
|
@@ -3,15 +3,23 @@ require "csv"
|
|
|
3
3
|
|
|
4
4
|
module Goldmine
|
|
5
5
|
class HashRollup < SimpleDelegator
|
|
6
|
-
|
|
6
|
+
attr_reader :names
|
|
7
|
+
|
|
8
|
+
def initialize(pivoted, cache=Cache.new)
|
|
9
|
+
@names = []
|
|
10
|
+
@cache = cache
|
|
11
|
+
@context = RollupContext.new(@cache)
|
|
7
12
|
@pivoted = pivoted
|
|
8
13
|
super @rolled = {}
|
|
9
14
|
end
|
|
10
15
|
|
|
11
|
-
def rollup(name)
|
|
16
|
+
def rollup(name, &block)
|
|
17
|
+
names << name
|
|
12
18
|
pivoted.each do |key, value|
|
|
13
|
-
|
|
14
|
-
|
|
19
|
+
@cache.fetch(name, value) do
|
|
20
|
+
rolled[key] ||= {}
|
|
21
|
+
rolled[key][name] = @context.instance_exec(value, &block)
|
|
22
|
+
end
|
|
15
23
|
end
|
|
16
24
|
self
|
|
17
25
|
end
|
|
@@ -24,11 +32,21 @@ module Goldmine
|
|
|
24
32
|
end
|
|
25
33
|
|
|
26
34
|
def to_csv_table
|
|
27
|
-
|
|
28
|
-
rows = tabular_rows.map { |row| CSV::Row.new(header, row) }
|
|
35
|
+
rows = to_rows.map { |row| CSV::Row.new(row.keys, row.values) }
|
|
29
36
|
CSV::Table.new rows
|
|
30
37
|
end
|
|
31
38
|
|
|
39
|
+
def to_rows
|
|
40
|
+
header = tabular_header
|
|
41
|
+
tabular_rows.map do |tabular_row|
|
|
42
|
+
{}.tap do |row|
|
|
43
|
+
header.each_with_index do |header_name, index|
|
|
44
|
+
row[header_name] = tabular_row[index]
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
32
50
|
private
|
|
33
51
|
|
|
34
52
|
attr_reader :pivoted, :rolled
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module Goldmine
|
|
2
|
+
class RollupContext
|
|
3
|
+
def initialize(cache)
|
|
4
|
+
@cache = cache
|
|
5
|
+
@computations = {}
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def computed(name)
|
|
9
|
+
@computations[name.to_sym] ||= Computation.new(name, @cache)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class Computation
|
|
13
|
+
def initialize(name, cache)
|
|
14
|
+
@name = name
|
|
15
|
+
@cache = cache
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def for(list)
|
|
19
|
+
@cache[@name, list]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/lib/goldmine/version.rb
CHANGED
data/test/test_goldmine.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
require "pry-test"
|
|
2
2
|
require "coveralls"
|
|
3
3
|
Coveralls.wear!
|
|
4
|
+
SimpleCov.command_name "pry-test"
|
|
4
5
|
require File.expand_path("../../lib/goldmine", __FILE__)
|
|
5
6
|
|
|
6
7
|
class TestGoldmine < PryTest::Test
|
|
@@ -318,7 +319,7 @@ class TestGoldmine < PryTest::Test
|
|
|
318
319
|
assert rolled.to_tabular == expected
|
|
319
320
|
end
|
|
320
321
|
|
|
321
|
-
test "named chained pivots rollup to_csv_table" do
|
|
322
|
+
test "named & chained pivots with rollup to_csv_table" do
|
|
322
323
|
list = [1,2,3,4,5,6,7,8,9]
|
|
323
324
|
list = Goldmine::ArrayMiner.new(list)
|
|
324
325
|
rolled = list.pivot("less than 5") { |i| i < 5 }.pivot("divisible by 2") { |i| i % 2 == 0 }.rollup(:count, &:size)
|
|
@@ -334,4 +335,68 @@ class TestGoldmine < PryTest::Test
|
|
|
334
335
|
assert row["divisible by 2"] == false
|
|
335
336
|
assert row ["count"] == 2
|
|
336
337
|
end
|
|
338
|
+
|
|
339
|
+
test "unnamed & chained pivots with rollup to rows" do
|
|
340
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
341
|
+
list = Goldmine::ArrayMiner.new(list)
|
|
342
|
+
rolled = list
|
|
343
|
+
.pivot { |i| i < 5 }
|
|
344
|
+
.rollup(:count, &:size)
|
|
345
|
+
.rollup(:evens) { |l| l.select { |i| i % 2 == 0 }.size }
|
|
346
|
+
.rollup(:even_percentage) { |l| computed(:evens).for(l) / computed(:count).for(l).to_f }
|
|
347
|
+
|
|
348
|
+
expected = [
|
|
349
|
+
{"column1"=>true, "count"=>4, "evens"=>2, "even_percentage"=>0.5},
|
|
350
|
+
{"column1"=>false, "count"=>5, "evens"=>2, "even_percentage"=>0.4}
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
assert rolled.to_rows == expected
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
test "named & chained pivots with rollup to rows" do
|
|
357
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
358
|
+
list = Goldmine::ArrayMiner.new(list)
|
|
359
|
+
rolled = list
|
|
360
|
+
.pivot(:less_than_5) { |i| i < 5 }
|
|
361
|
+
.rollup(:count, &:size)
|
|
362
|
+
.rollup(:evens) { |l| l.select { |i| i % 2 == 0 }.size }
|
|
363
|
+
.rollup(:even_percentage) { |l| computed(:evens).for(l) / computed(:count).for(l).to_f }
|
|
364
|
+
|
|
365
|
+
expected = [
|
|
366
|
+
{"less_than_5"=>true, "count"=>4, "evens"=>2, "even_percentage"=>0.5},
|
|
367
|
+
{"less_than_5"=>false, "count"=>5, "evens"=>2, "even_percentage"=>0.4}
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
assert rolled.to_rows == expected
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
test "access to prior-computed rollups" do
|
|
374
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
375
|
+
list = Goldmine::ArrayMiner.new(list)
|
|
376
|
+
rolled = list
|
|
377
|
+
.pivot("less than 5") { |i| i < 5 }
|
|
378
|
+
.rollup(:count, &:size)
|
|
379
|
+
.rollup(:evens) { |pivoted_list| pivoted_list.select { |i| i % 2 == 0 }.size }
|
|
380
|
+
.rollup(:even_percentage) { |pivoted_list| computed(:evens).for(pivoted_list) / computed(:count).for(pivoted_list).to_f }
|
|
381
|
+
|
|
382
|
+
expected = [
|
|
383
|
+
["less than 5", "count", "evens", "even_percentage"],
|
|
384
|
+
[true, 4, 2, 0.5],
|
|
385
|
+
[false, 5, 2, 0.4]
|
|
386
|
+
]
|
|
387
|
+
assert rolled.to_tabular == expected
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
test "rollup names" do
|
|
391
|
+
list = [1,2,3,4,5,6,7,8,9]
|
|
392
|
+
list = Goldmine::ArrayMiner.new(list)
|
|
393
|
+
data = list.pivot { |i| i < 5 }
|
|
394
|
+
rolled = data
|
|
395
|
+
.rollup(:count) { |items| items.size }
|
|
396
|
+
.rollup(:div_by_3) { |items| items.keep_if { |i| i % 3 == 0 }.size }
|
|
397
|
+
|
|
398
|
+
expected = [:count, :div_by_3]
|
|
399
|
+
assert rolled.names == expected
|
|
400
|
+
end
|
|
401
|
+
|
|
337
402
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: goldmine
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nathan Hopkins
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -52,6 +52,20 @@ dependencies:
|
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: sinatra
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
55
69
|
description: Extract a wealth of information from Arrays and Hashes
|
|
56
70
|
email:
|
|
57
71
|
- natehop@gmail.com
|
|
@@ -66,8 +80,10 @@ files:
|
|
|
66
80
|
- goldmine.gemspec
|
|
67
81
|
- lib/goldmine.rb
|
|
68
82
|
- lib/goldmine/array_miner.rb
|
|
83
|
+
- lib/goldmine/cache.rb
|
|
69
84
|
- lib/goldmine/hash_miner.rb
|
|
70
85
|
- lib/goldmine/hash_rollup.rb
|
|
86
|
+
- lib/goldmine/rollup_context.rb
|
|
71
87
|
- lib/goldmine/version.rb
|
|
72
88
|
- license.md
|
|
73
89
|
- test/test_goldmine.rb
|