goldmine 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +178 -1
- metadata +5 -4
data/README.md
CHANGED
@@ -1,3 +1,180 @@
|
|
1
1
|
# Goldmine
|
2
2
|
|
3
|
-
|
3
|
+
## Data mining made easy... the Ruby way.
|
4
|
+
### Turn any list into a treasure trove.
|
5
|
+
|
6
|
+
Goldmine allows you to apply pivot table logic to any list for powerful data mining capabilities.
|
7
|
+
|
8
|
+
In the nomenclature of Goldmine, we call this digging for data. So we've added a `dig` method to `Array`.
|
9
|
+
|
10
|
+
#### More reasons to love it
|
11
|
+
|
12
|
+
* Provides ETL like functionality... but simple and elegant
|
13
|
+
* Supports method chaining for deep data mining
|
14
|
+
* Handles values that are lists themselves
|
15
|
+
* Allows you to name your pivots
|
16
|
+
|
17
|
+
What does this all mean for you? Lets have a look.
|
18
|
+
|
19
|
+
## The Basics
|
20
|
+
|
21
|
+
#### Pivot a simple list of numbers based on whether or not they are less than 5
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
list = [1,2,3,4,5,6,7,8,9]
|
25
|
+
data = list.dig { |i| i < 5 }
|
26
|
+
|
27
|
+
# {
|
28
|
+
# true => [1, 2, 3, 4],
|
29
|
+
# false => [5, 6, 7, 8, 9]
|
30
|
+
# }
|
31
|
+
```
|
32
|
+
|
33
|
+
#### The same pivot as above but explicitly named
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
list = [1,2,3,4,5,6,7,8,9]
|
37
|
+
data = list.dig("less than 5") { |i| i < 5 }
|
38
|
+
|
39
|
+
# {
|
40
|
+
# "less than 5: true" => [1, 2, 3, 4],
|
41
|
+
# "less than 5: false" => [5, 6, 7, 8, 9]
|
42
|
+
# }
|
43
|
+
```
|
44
|
+
|
45
|
+
## Next Steps
|
46
|
+
|
47
|
+
#### Chain pivots together
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
list = [1,2,3,4,5,6,7,8,9]
|
51
|
+
data = list.dig { |i| i < 5 }.dig { |i| i % 2 == 0 }
|
52
|
+
|
53
|
+
# {
|
54
|
+
# [true, false] => [1, 3],
|
55
|
+
# [true, true] => [2, 4],
|
56
|
+
# [false, false] => [5, 7, 9],
|
57
|
+
# [false, true] => [6, 8]
|
58
|
+
# }
|
59
|
+
```
|
60
|
+
|
61
|
+
#### The same pivot as above but explicitly named
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
list = [1,2,3,4,5,6,7,8,9]
|
65
|
+
data = list.dig("less than 5") { |i| i < 5 }.dig("divisible by 2") { |i| i % 2 == 0 }
|
66
|
+
|
67
|
+
# {
|
68
|
+
# ["less than 5: true", "divisible by 2: false"] => [1, 3],
|
69
|
+
# ["less than 5: true", "divisible by 2: true"] => [2, 4],
|
70
|
+
# ["less than 5: false", "divisible by 2: false"] => [5, 7, 9],
|
71
|
+
# ["less than 5: false", "divisible by 2: true"] => [6, 8]
|
72
|
+
# }
|
73
|
+
```
|
74
|
+
|
75
|
+
## Deep Cuts
|
76
|
+
|
77
|
+
#### Pivot a list of users based on a value that is itself a list
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
list = [
|
81
|
+
{ :name => "Nathan", :projects => [:a, :b] },
|
82
|
+
{ :name => "Eric", :projects => [:a, :d, :g] },
|
83
|
+
{ :name => "Brian", :projects => [:b, :c, :e, :f] },
|
84
|
+
{ :name => "Mark", :projects => [:g] },
|
85
|
+
{ :name => "Josh", :projects => [:a, :c] },
|
86
|
+
{ :name => "Matthew", :projects => [:b, :c, :d] }
|
87
|
+
]
|
88
|
+
data = list.dig { |record| record[:projects] }
|
89
|
+
|
90
|
+
# {
|
91
|
+
# :a => [ { :name => "Nathan", :projects => [:a, :b] },
|
92
|
+
# { :name => "Eric", :projects => [:a, :d, :g] },
|
93
|
+
# { :name => "Josh", :projects => [:a, :c] } ],
|
94
|
+
# :b => [ { :name => "Nathan", :projects => [:a, :b] },
|
95
|
+
# { :name => "Brian", :projects => [:b, :c, :e, :f] },
|
96
|
+
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
|
97
|
+
# :d => [ { :name => "Eric", :projects => [:a, :d, :g] },
|
98
|
+
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
|
99
|
+
# :g => [ { :name => "Eric", :projects => [:a, :d, :g] },
|
100
|
+
# { :name => "Mark", :projects => [:g] } ],
|
101
|
+
# :c => [ { :name => "Brian", :projects => [:b, :c, :e, :f] },
|
102
|
+
# { :name => "Josh", :projects => [:a, :c] },
|
103
|
+
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
|
104
|
+
# :e => [ { :name => "Brian", :projects => [:b, :c, :e, :f] } ],
|
105
|
+
# :f => [ { :name => "Brian", :projects => [:b, :c, :e, :f] } ]
|
106
|
+
# }
|
107
|
+
|
108
|
+
```
|
109
|
+
|
110
|
+
#### Pivot a list of users based on lang and number of projects owned
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
list = [
|
114
|
+
{ :name => "Nathan", :langs => [:ruby, :javascript], :projects => [:a, :b] },
|
115
|
+
{ :name => "Eric", :langs => [:ruby, :javascript, :groovy], :projects => [:a, :d, :g] },
|
116
|
+
{ :name => "Brian", :langs => [:ruby, :javascript, :c, :go], :projects => [:b, :c, :e, :f] },
|
117
|
+
{ :name => "Mark", :langs => [:ruby, :java, :scala], :projects => [:g] },
|
118
|
+
{ :name => "Josh", :langs => [:ruby, :lisp, :clojure], :projects => [:a, :c] },
|
119
|
+
{ :name => "Matthew", :langs => [:ruby, :c, :clojure], :projects => [:b, :c, :d] }
|
120
|
+
]
|
121
|
+
data = list
|
122
|
+
.dig("lang") { |rec| rec[:langs] }
|
123
|
+
.dig("project count") { |rec| rec[:projects].length }
|
124
|
+
|
125
|
+
# {
|
126
|
+
# ["lang: ruby", "project count: 2"] => [ { :name => "Nathan", ... }, { :name => "Josh", ... } ],
|
127
|
+
# ["lang: ruby", "project count: 3"] => [ { :name => "Eric", ... }, { :name => "Matthew", ... } ],
|
128
|
+
# ["lang: ruby", "project count: 4"] => [ { :name => "Brian", ... } ],
|
129
|
+
# ["lang: ruby", "project count: 1"] => [ { :name => "Mark", ... } ],
|
130
|
+
# ["lang: javascript", "project count: 2"] => [ { :name => "Nathan", ... } ],
|
131
|
+
# ["lang: javascript", "project count: 3"] => [ { :name => "Eric", ... } ],
|
132
|
+
# ["lang: javascript", "project count: 4"] => [ { :name => "Brian", ... } ],
|
133
|
+
# ["lang: groovy", "project count: 3"] => [ { :name => "Eric", ... } ],
|
134
|
+
# ["lang: c", "project count: 4"] => [ { :name => "Brian", ... } ],
|
135
|
+
# ["lang: c", "project count: 3"] => [ { :name => "Matthew", ... } ],
|
136
|
+
# ["lang: go", "project count: 4"] => [ { :name => "Brian", ... } ],
|
137
|
+
# ["lang: java", "project count: 1"] => [ { :name => "Mark", ... } ],
|
138
|
+
# ["lang: scala", "project count: 1"] => [ { :name => "Mark", ... } ],
|
139
|
+
# ["lang: lisp", "project count: 2"] => [ { :name => "Josh", ... } ],
|
140
|
+
# ["lang: clojure", "project count: 2"] => [ { :name => "Josh", ... } ],
|
141
|
+
# ["lang: clojure", "project count: 3"] => [ { :name => "Matthew", ... } ]
|
142
|
+
# }
|
143
|
+
```
|
144
|
+
|
145
|
+
#### Pivot a list of users based on whether or not they know javascript, what other languages they know, and whether or not their name contains the letter 'a'
|
146
|
+
|
147
|
+
*Pretty contrived example here, but hopefully illustrates the type of power thats available.*
|
148
|
+
|
149
|
+
```ruby
|
150
|
+
list = [
|
151
|
+
{ :name => "Nathan", :langs => [:ruby, :javascript], :projects => [:a, :b] },
|
152
|
+
{ :name => "Eric", :langs => [:ruby, :javascript, :groovy], :projects => [:a, :d, :g] },
|
153
|
+
{ :name => "Brian", :langs => [:ruby, :javascript, :c, :go], :projects => [:b, :c, :e, :f] },
|
154
|
+
{ :name => "Mark", :langs => [:ruby, :java, :scala], :projects => [:g] },
|
155
|
+
{ :name => "Josh", :langs => [:ruby, :lisp, :clojure], :projects => [:a, :c] },
|
156
|
+
{ :name => "Matthew", :langs => [:ruby, :c, :clojure], :projects => [:b, :c, :d] }
|
157
|
+
]
|
158
|
+
data = list
|
159
|
+
.dig("knows javascript") { |rec| rec[:langs].include?(:javascript) }
|
160
|
+
.dig("lang") { |rec| rec[:langs] }
|
161
|
+
.dig("name includes 'a'") { |rec| rec[:name].include?("a") }
|
162
|
+
|
163
|
+
# {
|
164
|
+
# ["knows javascript: true", "lang: ruby", "name includes 'a': true"] => [ { :name => "Nathan", ... }, { :name => "Brian", ... } ],
|
165
|
+
# ["knows javascript: true", "lang: ruby", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
|
166
|
+
# ["knows javascript: true", "lang: javascript", "name includes 'a': true"] => [ { :name => "Nathan", ... }, { :name => "Brian", ... } ],
|
167
|
+
# ["knows javascript: true", "lang: javascript", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
|
168
|
+
# ["knows javascript: true", "lang: groovy", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
|
169
|
+
# ["knows javascript: true", "lang: c", "name includes 'a': true"] => [ { :name => "Brian", ... } ],
|
170
|
+
# ["knows javascript: true", "lang: go", "name includes 'a': true"] => [ { :name => "Brian", ... } ],
|
171
|
+
# ["knows javascript: false", "lang: ruby", "name includes 'a': true"] => [ { :name => "Mark", ... }, { :name => "Matthew", ... } ],
|
172
|
+
# ["knows javascript: false", "lang: ruby", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
|
173
|
+
# ["knows javascript: false", "lang: java", "name includes 'a': true"] => [ { :name => "Mark", ... } ],
|
174
|
+
# ["knows javascript: false", "lang: scala", "name includes 'a': true"] => [ { :name => "Mark", ... } ],
|
175
|
+
# ["knows javascript: false", "lang: lisp", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
|
176
|
+
# ["knows javascript: false", "lang: clojure", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
|
177
|
+
# ["knows javascript: false", "lang: clojure", "name includes 'a': true"] => [ { :name => "Matthew", ... } ],
|
178
|
+
# ["knows javascript: false", "lang: c", "name includes 'a': true"] => [ { :name => "Matthew", ... } ]
|
179
|
+
# }
|
180
|
+
```
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goldmine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,8 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-06-19 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: ! '
|
14
|
+
description: ! ' Goldmine allows you to apply pivot table logic to any list for
|
15
|
+
powerful data mining capabilities.
|
15
16
|
|
16
17
|
'
|
17
18
|
email:
|
@@ -25,7 +26,7 @@ files:
|
|
25
26
|
- Gemfile.lock
|
26
27
|
- Rakefile
|
27
28
|
- README.md
|
28
|
-
homepage:
|
29
|
+
homepage: http://hopsoft.github.com/goldmine/
|
29
30
|
licenses:
|
30
31
|
- MIT
|
31
32
|
post_install_message:
|
@@ -49,5 +50,5 @@ rubyforge_project:
|
|
49
50
|
rubygems_version: 1.8.10
|
50
51
|
signing_key:
|
51
52
|
specification_version: 3
|
52
|
-
summary:
|
53
|
+
summary: Data mining made easy... the Ruby way.
|
53
54
|
test_files: []
|