goldmine 2.1.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 214cfc93706ed23076acfc401061d060e1753b59
4
- data.tar.gz: 754cf23c6dd3c8437d18e83daf7dd9f86e063781
3
+ metadata.gz: d630803c577229ae72b1ca7bd4e7125840e3f9b6
4
+ data.tar.gz: 8684e82fa2691af08618d5cb845ec500c99176b5
5
5
  SHA512:
6
- metadata.gz: 43fdf070ee47b3d767027bcea6c1518c6f948bff9885a0f283dbbfb419d2c560fbd62cb1670027281fdce5c17989414270786b968db0a17b14ac19932b797a1b
7
- data.tar.gz: e82d4461294d81e9afd982b429dac780ebd9fd840d4c55176896df95d05f8a38f2a6479446b31ea8dc219e397144c3a976cc48caa27df94ac1069dffaafb3d28
6
+ metadata.gz: bfc80b08483357bd6cb61260782b7424cff279c8bbdc8d59247edc54fb2e0ed070d9d6797a063bc5af6ab767547854beb9ca1929f20dee9a94b2406081bb550c
7
+ data.tar.gz: c7dab7e7f518893b98f5213238d10ad0975b9556209084267d7de1bdec1612bde78e3483e27490f201dd8e5c2ed1cd6924723a0bab6b828995123cd9f011faa7
data/Gemfile CHANGED
@@ -1,3 +1,2 @@
1
1
  source "https://rubygems.org"
2
2
  gemspec
3
-
@@ -1,41 +1,33 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- goldmine (2.0.0)
4
+ goldmine (3.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  binding_of_caller (0.7.2)
10
10
  debug_inspector (>= 0.0.1)
11
- byebug (5.0.0)
12
- columnize (= 0.9.0)
13
- coderay (1.1.0)
14
- columnize (0.9.0)
15
- coveralls (0.8.3)
11
+ byebug (8.2.4)
12
+ coderay (1.1.1)
13
+ coveralls (0.8.13)
16
14
  json (~> 1.8)
17
- rest-client (>= 1.6.8, < 2)
18
- simplecov (~> 0.10.0)
15
+ simplecov (~> 0.11.0)
19
16
  term-ansicolor (~> 1.3)
20
17
  thor (~> 0.19.1)
18
+ tins (~> 1.6.0)
21
19
  debug_inspector (0.0.2)
22
20
  docile (1.1.5)
23
- domain_name (0.5.25)
24
- unf (>= 0.0.5, < 1.0.0)
25
- http-cookie (1.0.2)
26
- domain_name (~> 0.5)
27
21
  interception (0.5)
28
22
  json (1.8.3)
29
23
  method_source (0.8.2)
30
- mime-types (2.6.2)
31
- netrc (0.10.3)
32
24
  os (0.9.6)
33
25
  pry (0.10.3)
34
26
  coderay (~> 1.1.0)
35
27
  method_source (~> 0.8.1)
36
28
  slop (~> 3.4)
37
- pry-byebug (3.2.0)
38
- byebug (~> 5.0)
29
+ pry-byebug (3.3.0)
30
+ byebug (~> 8.0)
39
31
  pry (~> 0.10)
40
32
  pry-rescue (1.4.2)
41
33
  interception (>= 0.5)
@@ -52,29 +44,22 @@ GEM
52
44
  rack (1.6.4)
53
45
  rack-protection (1.5.3)
54
46
  rack
55
- rake (10.4.2)
56
- rest-client (1.8.0)
57
- http-cookie (>= 1.0.2, < 2.0)
58
- mime-types (>= 1.16, < 3.0)
59
- netrc (~> 0.7)
60
- simplecov (0.10.0)
47
+ rake (11.1.2)
48
+ simplecov (0.11.2)
61
49
  docile (~> 1.1.0)
62
50
  json (~> 1.8)
63
51
  simplecov-html (~> 0.10.0)
64
52
  simplecov-html (0.10.0)
65
- sinatra (1.4.6)
66
- rack (~> 1.4)
53
+ sinatra (1.4.7)
54
+ rack (~> 1.5)
67
55
  rack-protection (~> 1.4)
68
56
  tilt (>= 1.3, < 3)
69
57
  slop (3.6.0)
70
58
  term-ansicolor (1.3.2)
71
59
  tins (~> 1.0)
72
60
  thor (0.19.1)
73
- tilt (2.0.1)
61
+ tilt (2.0.2)
74
62
  tins (1.6.0)
75
- unf (0.1.4)
76
- unf_ext
77
- unf_ext (0.0.7.1)
78
63
 
79
64
  PLATFORMS
80
65
  ruby
@@ -87,4 +72,4 @@ DEPENDENCIES
87
72
  sinatra
88
73
 
89
74
  BUNDLED WITH
90
- 1.10.6
75
+ 1.11.2
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Lines of Code](http://img.shields.io/badge/lines_of_code-193-brightgreen.svg?style=flat)](http://blog.codinghorror.com/the-best-code-is-no-code-at-all/)
1
+ [![Lines of Code](http://img.shields.io/badge/lines_of_code-181-brightgreen.svg?style=flat)](http://blog.codinghorror.com/the-best-code-is-no-code-at-all/)
2
2
  [![Code Status](http://img.shields.io/codeclimate/github/hopsoft/goldmine.svg?style=flat)](https://codeclimate.com/github/hopsoft/goldmine)
3
3
  [![Dependency Status](http://img.shields.io/gemnasium/hopsoft/goldmine.svg?style=flat)](https://gemnasium.com/hopsoft/goldmine)
4
4
  [![Build Status](http://img.shields.io/travis/hopsoft/goldmine.svg?style=flat)](https://travis-ci.org/hopsoft/goldmine)
@@ -7,7 +7,7 @@
7
7
 
8
8
  # Goldmine
9
9
 
10
- Extract a wealth of information from Arrays & Hashes.
10
+ Extract a wealth of information from Arrays.
11
11
 
12
12
  Goldmine is especially helpful when working with source data that is difficult to query.
13
13
  e.g. CSV files, API results, etc...
@@ -28,158 +28,129 @@ gem install goldmine
28
28
 
29
29
  ```ruby
30
30
  require "goldmine"
31
-
32
- list = [1,2,3,4,5,6,7,8,9]
33
- Goldmine::ArrayMiner.new(list)
34
- .pivot { |i| i < 5 }
35
- # result:
36
- {
37
- true => [1, 2, 3, 4],
38
- false => [5, 6, 7, 8, 9]
39
- }
40
31
  ```
41
32
 
42
- ## Chained Pivots
43
-
44
33
  ```ruby
45
34
  list = [1,2,3,4,5,6,7,8,9]
46
- Goldmine::ArrayMiner.new(list)
47
- .pivot { |i| i < 5 }
48
- .pivot { |i| i % 2 == 0 }
49
- # result:
50
- {
51
- [true, false] => [1, 3],
52
- [true, true] => [2, 4],
53
- [false, false] => [5, 7, 9],
54
- [false, true] => [6, 8]
55
- }
56
- ```
57
35
 
58
- ## Named Pivots
36
+ Goldmine(list)
37
+ .pivot("< 5") { |i| i < 5 }
38
+ .result
39
+ .to_h
40
+ ```
59
41
 
60
42
  ```ruby
61
- list = [1,2,3,4,5,6,7,8,9]
62
- Goldmine::ArrayMiner.new(list)
63
- .pivot(:less_than_5) { |i| i < 5 }
64
- # result:
65
43
  {
66
- { :less_than_5 => true } => [1, 2, 3, 4],
67
- { :less_than_5 => false } => [5, 6, 7, 8, 9]
44
+ [["< 5", true]] => [1, 2, 3, 4],
45
+ [["< 5", false]] => [5, 6, 7, 8, 9]
68
46
  }
69
47
  ```
70
48
 
71
- ## Value Pivots
49
+ ## Array Value Pivots
72
50
 
73
51
  ```ruby
74
- list = [
52
+ users = [
75
53
  { :name => "Sally", :favorite_colors => [:blue] },
76
54
  { :name => "John", :favorite_colors => [:blue, :green] },
77
55
  { :name => "Stephen", :favorite_colors => [:red, :pink, :purple] },
78
56
  { :name => "Emily", :favorite_colors => [:orange, :green] },
79
57
  { :name => "Joe", :favorite_colors => [:red] }
80
58
  ]
81
- list = Goldmine::ArrayMiner.new(list)
82
- list.pivot { |record| record[:favorite_colors] }
83
- # result:
59
+
60
+ Goldmine(users)
61
+ .pivot(:favorite_color) { |record| record[:favorite_colors] }
62
+ .result
63
+ .to_h
64
+ ```
65
+
66
+ ```ruby
84
67
  {
85
- :blue => [
86
- { :name => "Sally", :favorite_colors => [:blue] },
87
- { :name => "John", :favorite_colors => [:blue, :green] }
88
- ],
89
- :green => [
90
- { :name => "John", :favorite_colors => [:blue, :green] },
91
- { :name => "Emily", :favorite_colors => [:orange, :green] }
92
- ],
93
- :red => [
94
- { :name => "Stephen", :favorite_colors => [:red, :pink, :purple] },
95
- { :name => "Joe", :favorite_colors => [:red] }
96
- ],
97
- :pink => [
98
- { :name => "Stephen", :favorite_colors => [:red, :pink, :purple] }
99
- ],
100
- :purple => [
101
- { :name => "Stephen", :favorite_colors => [:red, :pink, :purple] }
102
- ],
103
- :orange => [
104
- { :name => "Emily", :favorite_colors => [:orange, :green] }
105
- ]
68
+ [:favorite_color, :blue] => [{:name=>"Sally", :favorite_colors=>[:blue]}, {:name=>"John", :favorite_colors=>[:blue, :green]}],
69
+ [:favorite_color, :green] => [{:name=>"John", :favorite_colors=>[:blue, :green]}, {:name=>"Emily", :favorite_colors=>[:orange, :green]}],
70
+ [:favorite_color, :red] => [{:name=>"Stephen", :favorite_colors=>[:red, :pink, :purple]}, {:name=>"Joe", :favorite_colors=>[:red]}],
71
+ [:favorite_color, :pink] => [{:name=>"Stephen", :favorite_colors=>[:red, :pink, :purple]}],
72
+ [:favorite_color, :purple] => [{:name=>"Stephen", :favorite_colors=>[:red, :pink, :purple]}],
73
+ [:favorite_color, :orange] => [{:name=>"Emily", :favorite_colors=>[:orange, :green]}]
106
74
  }
107
75
  ```
108
76
 
109
- ## Stacked pivots
77
+ ## Chained pivots
110
78
 
111
79
  ```ruby
112
- list = [
80
+ users = [
113
81
  { :name => "Sally", :age => 21 },
114
82
  { :name => "John", :age => 28 },
115
83
  { :name => "Stephen", :age => 37 },
116
84
  { :name => "Emily", :age => 32 },
117
85
  { :name => "Joe", :age => 18 }
118
86
  ]
119
- list = Goldmine::ArrayMiner.new(list)
120
- mined = list.pivot("Name has an 'e'") do |record|
121
- !!record[:name].match(/e/i)
122
- end
123
- mined = mined.pivot(">= 21 years old") do |record|
124
- record[:age] >= 21
125
- end
126
- # result:
87
+
88
+ Goldmine(users).
89
+ pivot("'e' in name") { |user| !!user[:name].match(/e/i) }.
90
+ pivot("21 or over") { |user| user[:age] >= 21 }.
91
+ result.
92
+ to_h
93
+ ```
94
+
95
+ ```ruby
127
96
  {
128
- { "Name has an 'e'" => false, ">= 21 years old" => true } => [
129
- { :name => "Sally", :age => 21 },
130
- { :name => "John", :age => 28 }
131
- ],
132
- { "Name has an 'e'" => true, ">= 21 years old" => true } => [
133
- { :name => "Stephen", :age => 37 },
134
- { :name => "Emily", :age => 32 }
135
- ],
136
- { "Name has an 'e'" => true, ">= 21 years old" => false } => [
137
- { :name => "Joe", :age => 18 }
138
- ]
97
+ [["'e' in name", false], ["21 or over", true]] => [{:name=>"Sally", :age=>21}, {:name=>"John", :age=>28}],
98
+ [["'e' in name", true], ["21 or over", true]] => [{:name=>"Stephen", :age=>37}, {:name=>"Emily", :age=>32}],
99
+ [["'e' in name", true], ["21 or over", false]] => [{:name=>"Joe", :age=>18}]
139
100
  }
140
101
  ```
141
102
 
142
103
  ## Rollups
143
104
 
144
- Rollups provide a clean way to aggregate pivoted data...
145
- think computed columns.
105
+ An intuitive way to aggregate pivoted data...
106
+ i.e. computed columns.
146
107
 
147
- Rollup `blocks` are executed once for each pivot.
148
- _Like pivots, rollups can be chained._
108
+ Rollups are `blocks` that get executed once for each pivot entry.
109
+ _They can be also be chained._
149
110
 
150
111
  ```ruby
151
112
  list = [1,2,3,4,5,6,7,8,9]
152
- Goldmine::ArrayMiner.new(list)
153
- .pivot(:less_than_5) { |i| i < 5 }
154
- .pivot(:even) { |i| i % 2 == 0 }
155
- .rollup(:count) { |matched| matched.size }
156
- # result:
113
+
114
+ Goldmine(list)
115
+ .pivot("< 5") { |i| i < 5 }
116
+ .pivot("even") { |i| i % 2 == 0 }
117
+ .result
118
+ .rollup("count", &:count)
119
+ .result
120
+ .to_h
121
+ ```
122
+
123
+ ```ruby
157
124
  {
158
- { :less_than_5 => true, :even => false } => { :count => 2 },
159
- { :less_than_5 => true, :even => true } => { :count => 2 },
160
- { :less_than_5 => false, :even => false } => { :count => 3 },
161
- { :less_than_5 => false, :even => true } => { :count => 2 }
125
+ [["< 5", true], ["even", false]] => [["count", 2]],
126
+ [["< 5", true], ["even", true]] => [["count", 2]],
127
+ [["< 5", false], ["even", false]] => [["count", 3]],
128
+ [["< 5", false], ["even", true]] => [["count", 2]]
162
129
  }
163
130
  ```
164
131
 
165
- ### Pre-Computed Results
132
+ ### Rollup Caching
166
133
 
167
- Rollups can be computationally expensive _(depending upon how much logic you stuff into the `block`)_.
168
- Goldmine caches rollup results & makes them available to subsequent rollups.
134
+ Rollups can be computationally expensive.
135
+ Optional caching can be used to reduce this computational overhead.
169
136
 
170
137
  ```ruby
171
138
  list = [1,2,3,4,5,6,7,8,9]
172
- Goldmine::ArrayMiner.new(list)
139
+
140
+ Goldmine(list)
173
141
  .pivot(:less_than_5) { |i| i < 5 }
174
- .rollup(:count, &:size)
175
- .rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
176
- .rollup(:even_percentage) { |list|
177
- computed(:evens).for(list) / computed(:count).for(list).to_f
178
- }
179
- # result:
142
+ .result
143
+ .rollup(:count, &:count)
144
+ .rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.count }
145
+ .rollup(:even_percentage) { |list| cache[:evens] / cache[:count].to_f }
146
+ .result(cache: true)
147
+ .to_h
148
+ ```
149
+
150
+ ```ruby
180
151
  {
181
- { :less_than_5 => true } => { :count => 4, :evens => 2, :even_percentage => 0.5 },
182
- { :less_than_5 => false } => { :count => 5, :evens => 2, :even_percentage => 0.4 }
152
+ [[:less_than_5, true]] => [[:count, 4], [:evens, 2], [:even_percentage, 0.5]],
153
+ [[:less_than_5, false]] => [[:count, 5], [:evens, 2], [:even_percentage, 0.4]]
183
154
  }
184
155
  ```
185
156
 
@@ -189,18 +160,35 @@ It's often helpful to flatten rollups into rows.
189
160
 
190
161
  ```ruby
191
162
  list = [1,2,3,4,5,6,7,8,9]
192
- Goldmine::ArrayMiner.new(list)
163
+
164
+ rollup = Goldmine(list)
193
165
  .pivot(:less_than_5) { |i| i < 5 }
194
- .rollup(:count, &:size)
195
- .rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.size }
196
- .rollup(:even_percentage) { |list|
197
- computed(:evens).for(list) / computed(:count).for(list).to_f
198
- }
199
- .to_rows
200
- # result:
166
+ .result
167
+ .rollup(:count, &:count)
168
+ .rollup(:evens) { |list| list.select { |i| i % 2 == 0 }.count }
169
+ .rollup(:even_percentage) { |list| cache[:evens] / cache[:count].to_f }
170
+ .result(cache: true)
171
+ ```
172
+
173
+ ```ruby
174
+ rollup.to_rows
175
+ ```
176
+
177
+ ```ruby
201
178
  [
202
- { "less_than_5" => true, "count" => 4, "evens" => 2, "even_percentage" => 0.5 },
203
- { "less_than_5" => false, "count" => 5, "evens" => 2, "even_percentage" => 0.4 }
179
+ [[:less_than_5, true], [:count, 4], [:evens, 2], [:even_percentage, 0.5]],
180
+ [[:less_than_5, false], [:count, 5], [:evens, 2], [:even_percentage, 0.4]]
181
+ ]
182
+ ```
183
+
184
+ ```ruby
185
+ rollup.to_hash_rows
186
+ ```
187
+
188
+ ```ruby
189
+ [
190
+ {:less_than_5=>true, :count=>4, :evens=>2, :even_percentage=>0.5},
191
+ {:less_than_5=>false, :count=>5, :evens=>2, :even_percentage=>0.4}
204
192
  ]
205
193
  ```
206
194
 
@@ -210,14 +198,19 @@ Rollups can also be converted into tabular format.
210
198
 
211
199
  ```ruby
212
200
  list = [1,2,3,4,5,6,7,8,9]
213
- Goldmine::ArrayMiner.new(list)
201
+
202
+ Goldmine(list)
214
203
  .pivot(:less_than_5) { |i| i < 5 }
215
204
  .pivot(:even) { |i| i % 2 == 0 }
216
- .rollup(:count) { |matched| matched.size }
205
+ .result
206
+ .rollup(:count, &:size)
207
+ .result
217
208
  .to_tabular
218
- # result:
209
+ ```
210
+
211
+ ```ruby
219
212
  [
220
- ["less_than_5", "even", "count"],
213
+ [:less_than_5, :even, :count],
221
214
  [true, false, 2],
222
215
  [true, true, 2],
223
216
  [false, false, 3],
@@ -230,22 +223,25 @@ Goldmine::ArrayMiner.new(list)
230
223
  Goldmine makes producing CSV output simple.
231
224
 
232
225
  ```ruby
233
- csv_table = Goldmine::ArrayMiner.new(list)
226
+ list = [1,2,3,4,5,6,7,8,9]
227
+
228
+ Goldmine(list)
234
229
  .pivot(:less_than_5) { |i| i < 5 }
235
230
  .pivot(:even) { |i| i % 2 == 0 }
231
+ .result
236
232
  .rollup(:count) { |matched| matched.size }
233
+ .result
237
234
  .to_csv_table
238
- # result:
239
- #<CSV::Table mode:col_or_row row_count:5>
235
+ .to_csv
236
+ ```
240
237
 
241
- csv_table.to_csv
242
- # result:
238
+ ```ruby
243
239
  "less_than_5,even,count\ntrue,false,2\ntrue,true,2\nfalse,false,3\nfalse,true,2\n"
244
240
  ```
245
241
 
246
- ## Examples
242
+ ## Example Apps
247
243
 
248
- All examples are simple Sinatra apps.
244
+ All examples are small Sinatra apps.
249
245
  They are designed to help communicate Goldmine use-cases.
250
246
 
251
247
  ### Setup
@@ -258,7 +254,9 @@ bundle
258
254
 
259
255
  ### [New York Wifi Hotspots](https://github.com/hopsoft/goldmine/tree/master/examples/new_york_wifi_hotspots)
260
256
 
261
- In this example, we mine the following data.
257
+ Uses data from https://github.com/hopsoft/goldmine/blob/master/examples/new_york_wifi_hotspots/DOITT_WIFI_HOTSPOT_01_13SEPT2010.csv
258
+
259
+ In this example, we mine out the following information.
262
260
 
263
261
  * Total hotspots by city, zip, & area code
264
262
  * Free hotspots by city, zip, & area code
@@ -284,7 +282,7 @@ curl http://localhost:3000/csv
284
282
 
285
283
  Uses data from http://dev.socrata.com/foundry/#/data.medicare.gov/aeay-dfax
286
284
 
287
- In this example, we mine the following data.
285
+ In this example, we mine out the following information.
288
286
 
289
287
  * Total doctors by state & specialty
290
288
  * Preferred doctors by state & specialty
@@ -319,22 +317,22 @@ My Macbook Pro yields the following benchmarks.
319
317
 
320
318
  ```
321
319
  user system total real
322
- pivoted 1.000000 0.020000 1.020000 ( 1.027810)
323
- rolled_up 1.090000 0.020000 1.110000 ( 1.101082)
324
- rows 0.020000 0.000000 0.020000 ( 0.022978)
325
- tabular 0.010000 0.000000 0.010000 ( 0.005423)
326
- csv 0.030000 0.000000 0.030000 ( 0.037245)
320
+ pivoted 0.630000 0.030000 0.660000 ( 0.670409)
321
+ rolled_up 0.570000 0.030000 0.600000 ( 0.626413)
322
+ rows 0.010000 0.000000 0.010000 ( 0.003258)
323
+ tabular 0.010000 0.000000 0.010000 ( 0.010110)
324
+ csv 0.050000 0.000000 0.050000 ( 0.057677)
327
325
  ```
328
326
 
329
327
  ##### 1,000,000 Records
330
328
 
331
329
  ```
332
330
  user system total real
333
- pivoted 15.700000 0.490000 16.190000 ( 16.886677)
334
- rolled_up 7.070000 0.350000 7.420000 ( 7.544060)
335
- rows 0.020000 0.000000 0.020000 ( 0.028432)
336
- tabular 0.010000 0.010000 0.020000 ( 0.007663)
337
- csv 0.050000 0.000000 0.050000 ( 0.058925)
331
+ pivoted 7.270000 0.300000 7.570000 ( 8.053166)
332
+ rolled_up 6.800000 0.830000 7.630000 ( 8.051707)
333
+ rows 0.000000 0.000000 0.000000 ( 0.003934)
334
+ tabular 0.010000 0.000000 0.010000 ( 0.011825)
335
+ csv 0.210000 0.010000 0.220000 ( 0.222752)
338
336
  ```
339
337
 
340
338
  ## Summary