daru 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/version.rb
CHANGED
@@ -0,0 +1,529 @@
|
|
1
|
+
{
|
2
|
+
"metadata": {
|
3
|
+
"language": "ruby",
|
4
|
+
"name": "",
|
5
|
+
"signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e"
|
6
|
+
},
|
7
|
+
"nbformat": 3,
|
8
|
+
"nbformat_minor": 0,
|
9
|
+
"worksheets": [
|
10
|
+
{
|
11
|
+
"cells": [
|
12
|
+
{
|
13
|
+
"cell_type": "code",
|
14
|
+
"collapsed": false,
|
15
|
+
"input": [
|
16
|
+
"require 'daru'\n",
|
17
|
+
"\n",
|
18
|
+
"df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n",
|
19
|
+
"df.plot legends: [:a, :b], type: :line do |p,d|\n",
|
20
|
+
" p.yrange [0,100]\n",
|
21
|
+
" p.legend true\n",
|
22
|
+
" d.color \"green\"\n",
|
23
|
+
"end"
|
24
|
+
],
|
25
|
+
"language": "python",
|
26
|
+
"metadata": {},
|
27
|
+
"outputs": [
|
28
|
+
{
|
29
|
+
"html": [
|
30
|
+
"<script type='text/javascript'>if(window['d3'] === undefined ||\n",
|
31
|
+
" window['Nyaplot'] === undefined){\n",
|
32
|
+
" var path = {\"d3\":\"http://d3js.org/d3.v3.min\"};\n",
|
33
|
+
"\n",
|
34
|
+
"\n",
|
35
|
+
"\n",
|
36
|
+
" var shim = {\"d3\":{\"exports\":\"d3\"}};\n",
|
37
|
+
"\n",
|
38
|
+
" require.config({paths: path, shim:shim});\n",
|
39
|
+
"\n",
|
40
|
+
"\n",
|
41
|
+
"require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\n",
|
42
|
+
"\n",
|
43
|
+
"\tvar script = d3.select(\"head\")\n",
|
44
|
+
"\t .append(\"script\")\n",
|
45
|
+
"\t .attr(\"src\", \"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n",
|
46
|
+
"\t .attr(\"async\", true);\n",
|
47
|
+
"\n",
|
48
|
+
"\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n",
|
49
|
+
"\n",
|
50
|
+
"\n",
|
51
|
+
"\t var event = document.createEvent(\"HTMLEvents\");\n",
|
52
|
+
"\t event.initEvent(\"load_nyaplot\",false,false);\n",
|
53
|
+
"\t window.dispatchEvent(event);\n",
|
54
|
+
"\t console.log('Finished loading Nyaplotjs');\n",
|
55
|
+
"\n",
|
56
|
+
"\t};\n",
|
57
|
+
"\n",
|
58
|
+
"\n",
|
59
|
+
"});\n",
|
60
|
+
"}\n",
|
61
|
+
"</script>"
|
62
|
+
],
|
63
|
+
"metadata": {},
|
64
|
+
"output_type": "pyout",
|
65
|
+
"prompt_number": 1,
|
66
|
+
"text": [
|
67
|
+
"\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\""
|
68
|
+
]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"html": [
|
72
|
+
"<div id='vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1'></div>\n",
|
73
|
+
"<script>\n",
|
74
|
+
"(function(){\n",
|
75
|
+
" var render = function(){\n",
|
76
|
+
" var model = {\"panes\":[{\"diagrams\":[{\"type\":\"line\",\"options\":{\"x\":\"a\",\"y\":\"b\",\"color\":\"green\"},\"data\":\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}],\"options\":{\"yrange\":[0,100],\"legend\":true,\"zoom\":true,\"width\":800,\"xrange\":[1,5]}}],\"data\":{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\":[{\"a\":1,\"b\":10},{\"a\":2,\"b\":14},{\"a\":3,\"b\":15},{\"a\":4,\"b\":17},{\"a\":5,\"b\":44}]},\"extension\":[]}\n",
|
77
|
+
" Nyaplot.core.parse(model, '#vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1');\n",
|
78
|
+
" };\n",
|
79
|
+
" if(window['Nyaplot']==undefined){\n",
|
80
|
+
" window.addEventListener('load_nyaplot', render, false);\n",
|
81
|
+
"\treturn;\n",
|
82
|
+
" } else {\n",
|
83
|
+
" render();\n",
|
84
|
+
" }\n",
|
85
|
+
"})();\n",
|
86
|
+
"</script>\n"
|
87
|
+
],
|
88
|
+
"metadata": {},
|
89
|
+
"output_type": "pyout",
|
90
|
+
"prompt_number": 1,
|
91
|
+
"text": [
|
92
|
+
"#<Nyaplot::Frame:0x8ac8fd4 @properties={:panes=>[#<Nyaplot::Plot:0x8acb25c @properties={:diagrams=>[#<Nyaplot::Diagram:0x8ac97cc @properties={:type=>:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#<Nyaplot::DataFrame:0x8ac9d1c @name=\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\", @rows=[{:a=>1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>"
|
93
|
+
]
|
94
|
+
}
|
95
|
+
],
|
96
|
+
"prompt_number": 1
|
97
|
+
},
|
98
|
+
{
|
99
|
+
"cell_type": "code",
|
100
|
+
"collapsed": false,
|
101
|
+
"input": [
|
102
|
+
"require 'daru'\n",
|
103
|
+
"# Calculate statistics of numeric columns\n",
|
104
|
+
"df = Daru::DataFrame.new({\n",
|
105
|
+
" a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n",
|
106
|
+
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
|
107
|
+
" c: ['small','large','large','small','small','large','small','large','small'],\n",
|
108
|
+
" d: [1,2,2,3,3,4,5,6,7],\n",
|
109
|
+
" e: [2,4,4,6,6,8,10,12,14],\n",
|
110
|
+
" f: [10,20,20,30,30,40,50,60,70]\n",
|
111
|
+
" })\n",
|
112
|
+
"df.mean"
|
113
|
+
],
|
114
|
+
"language": "python",
|
115
|
+
"metadata": {},
|
116
|
+
"outputs": [
|
117
|
+
{
|
118
|
+
"html": [
|
119
|
+
"<table><tr><th> </th><th>nil</th></tr><tr><td>d</td><td>3.6666666666666665</td></tr><tr><td>e</td><td>7.333333333333333</td></tr><tr><td>f</td><td>36.666666666666664</td></tr></table>"
|
120
|
+
],
|
121
|
+
"metadata": {},
|
122
|
+
"output_type": "pyout",
|
123
|
+
"prompt_number": 2,
|
124
|
+
"text": [
|
125
|
+
"\n",
|
126
|
+
"#<Daru::Vector:72633550 @name = nil @size = 3 >\n",
|
127
|
+
" nil\n",
|
128
|
+
" d 3.6666666666666665\n",
|
129
|
+
" e 7.333333333333333\n",
|
130
|
+
" f 36.666666666666664\n"
|
131
|
+
]
|
132
|
+
}
|
133
|
+
],
|
134
|
+
"prompt_number": 2
|
135
|
+
},
|
136
|
+
{
|
137
|
+
"cell_type": "code",
|
138
|
+
"collapsed": false,
|
139
|
+
"input": [
|
140
|
+
"# Calculate multiple statistical measures in one shot\n",
|
141
|
+
"df.describe"
|
142
|
+
],
|
143
|
+
"language": "python",
|
144
|
+
"metadata": {},
|
145
|
+
"outputs": [
|
146
|
+
{
|
147
|
+
"html": [
|
148
|
+
"<table><tr><th></th><th>d</th><th>e</th><th>f</th></tr><tr><td>count</td><td>9</td><td>9</td><td>9</td></tr><tr><td>mean</td><td>3.6666666666666665</td><td>7.333333333333333</td><td>36.666666666666664</td></tr><tr><td>std</td><td>2.0</td><td>4.0</td><td>20.0</td></tr><tr><td>min</td><td>1</td><td>2</td><td>10</td></tr><tr><td>max</td><td>7</td><td>14</td><td>70</td></tr></table>"
|
149
|
+
],
|
150
|
+
"metadata": {},
|
151
|
+
"output_type": "pyout",
|
152
|
+
"prompt_number": 3,
|
153
|
+
"text": [
|
154
|
+
"\n",
|
155
|
+
"#<Daru::DataFrame:72528680 @name = c992bbe4-8948-46f1-bdd4-af2e117e94ac @size = 5>\n",
|
156
|
+
" d e f \n",
|
157
|
+
" count 9 9 9 \n",
|
158
|
+
" mean 3.66666666 7.33333333 36.6666666 \n",
|
159
|
+
" std 2.0 4.0 20.0 \n",
|
160
|
+
" min 1 2 10 \n",
|
161
|
+
" max 7 14 70 \n"
|
162
|
+
]
|
163
|
+
}
|
164
|
+
],
|
165
|
+
"prompt_number": 3
|
166
|
+
},
|
167
|
+
{
|
168
|
+
"cell_type": "code",
|
169
|
+
"collapsed": false,
|
170
|
+
"input": [
|
171
|
+
"# Create a multi-indexed DataFrame\n",
|
172
|
+
"tuples = [\n",
|
173
|
+
" [:a,:one,:bar],\n",
|
174
|
+
" [:a,:one,:baz],\n",
|
175
|
+
" [:a,:two,:bar],\n",
|
176
|
+
" [:a,:two,:baz],\n",
|
177
|
+
" [:b,:one,:bar],\n",
|
178
|
+
" [:b,:two,:bar],\n",
|
179
|
+
" [:b,:two,:baz],\n",
|
180
|
+
" [:b,:one,:foo],\n",
|
181
|
+
" [:c,:one,:bar],\n",
|
182
|
+
" [:c,:one,:baz],\n",
|
183
|
+
" [:c,:two,:foo],\n",
|
184
|
+
" [:c,:two,:bar]\n",
|
185
|
+
"]\n",
|
186
|
+
"multi_index = Daru::MultiIndex.new(tuples)\n",
|
187
|
+
"\n",
|
188
|
+
"vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n",
|
189
|
+
"vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n",
|
190
|
+
"\n",
|
191
|
+
"order_mi = Daru::MultiIndex.new([\n",
|
192
|
+
" [:a,:one,:bar],\n",
|
193
|
+
" [:a,:two,:baz],\n",
|
194
|
+
" [:b,:two,:foo],\n",
|
195
|
+
" [:b,:one,:foo]])\n",
|
196
|
+
"\n",
|
197
|
+
"df_mi = Daru::DataFrame.new([\n",
|
198
|
+
" vector_arry1, \n",
|
199
|
+
" vector_arry2, \n",
|
200
|
+
" vector_arry1, \n",
|
201
|
+
" vector_arry2], order: order_mi, index: multi_index)"
|
202
|
+
],
|
203
|
+
"language": "python",
|
204
|
+
"metadata": {},
|
205
|
+
"outputs": [
|
206
|
+
{
|
207
|
+
"html": [
|
208
|
+
"<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:a, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:a, :two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:a, :two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:b, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:b, :two, :bar]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:b, :two, :baz]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:b, :one, :foo]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:c, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:c, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:c, :two, :foo]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:c, :two, :bar]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
|
209
|
+
],
|
210
|
+
"metadata": {},
|
211
|
+
"output_type": "pyout",
|
212
|
+
"prompt_number": 4,
|
213
|
+
"text": [
|
214
|
+
"\n",
|
215
|
+
"#<Daru::DataFrame:72070870 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 12>\n",
|
216
|
+
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
|
217
|
+
"[:a, :one, 11 1 11 1 \n",
|
218
|
+
"[:a, :one, 12 2 12 2 \n",
|
219
|
+
"[:a, :two, 13 3 13 3 \n",
|
220
|
+
"[:a, :two, 14 4 14 4 \n",
|
221
|
+
"[:b, :one, 11 1 11 1 \n",
|
222
|
+
"[:b, :two, 12 2 12 2 \n",
|
223
|
+
"[:b, :two, 13 3 13 3 \n",
|
224
|
+
"[:b, :one, 14 4 14 4 \n",
|
225
|
+
"[:c, :one, 11 1 11 1 \n",
|
226
|
+
"[:c, :one, 12 2 12 2 \n",
|
227
|
+
"[:c, :two, 13 3 13 3 \n",
|
228
|
+
"[:c, :two, 14 4 14 4 \n"
|
229
|
+
]
|
230
|
+
}
|
231
|
+
],
|
232
|
+
"prompt_number": 4
|
233
|
+
},
|
234
|
+
{
|
235
|
+
"cell_type": "code",
|
236
|
+
"collapsed": false,
|
237
|
+
"input": [
|
238
|
+
"# Specify complete tuple to choose a single row\n",
|
239
|
+
"df_mi.row[:a, :one,:bar]"
|
240
|
+
],
|
241
|
+
"language": "python",
|
242
|
+
"metadata": {},
|
243
|
+
"outputs": [
|
244
|
+
{
|
245
|
+
"html": [
|
246
|
+
"<table><tr><th> </th><th>0</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td></tr><tr><td>[:a, :two, :baz]</td><td>1</td></tr><tr><td>[:b, :two, :foo]</td><td>11</td></tr><tr><td>[:b, :one, :foo]</td><td>1</td></tr></table>"
|
247
|
+
],
|
248
|
+
"metadata": {},
|
249
|
+
"output_type": "pyout",
|
250
|
+
"prompt_number": 5,
|
251
|
+
"text": [
|
252
|
+
"\n",
|
253
|
+
"#<Daru::Vector:77596600 @name = 0 @size = 4 >\n",
|
254
|
+
" 0\n",
|
255
|
+
"[:a, :one, :bar] 11\n",
|
256
|
+
"[:a, :two, :baz] 1\n",
|
257
|
+
"[:b, :two, :foo] 11\n",
|
258
|
+
"[:b, :one, :foo] 1\n"
|
259
|
+
]
|
260
|
+
}
|
261
|
+
],
|
262
|
+
"prompt_number": 5
|
263
|
+
},
|
264
|
+
{
|
265
|
+
"cell_type": "code",
|
266
|
+
"collapsed": false,
|
267
|
+
"input": [
|
268
|
+
"# Specify partial tuple to select index hierarchially\n",
|
269
|
+
"df_mi.row[:a]"
|
270
|
+
],
|
271
|
+
"language": "python",
|
272
|
+
"metadata": {},
|
273
|
+
"outputs": [
|
274
|
+
{
|
275
|
+
"html": [
|
276
|
+
"<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
|
277
|
+
],
|
278
|
+
"metadata": {},
|
279
|
+
"output_type": "pyout",
|
280
|
+
"prompt_number": 6,
|
281
|
+
"text": [
|
282
|
+
"\n",
|
283
|
+
"#<Daru::DataFrame:77518650 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 4>\n",
|
284
|
+
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
|
285
|
+
"[:one, :ba 11 1 11 1 \n",
|
286
|
+
"[:one, :ba 12 2 12 2 \n",
|
287
|
+
"[:two, :ba 13 3 13 3 \n",
|
288
|
+
"[:two, :ba 14 4 14 4 \n"
|
289
|
+
]
|
290
|
+
}
|
291
|
+
],
|
292
|
+
"prompt_number": 6
|
293
|
+
},
|
294
|
+
{
|
295
|
+
"cell_type": "code",
|
296
|
+
"collapsed": false,
|
297
|
+
"input": [
|
298
|
+
"# See grouped rows with the 'groups' method\n",
|
299
|
+
"\n",
|
300
|
+
"df = Daru::DataFrame.new({\n",
|
301
|
+
" a: %w{foo bar foo bar foo bar foo foo},\n",
|
302
|
+
" b: %w{one one two three two two one three},\n",
|
303
|
+
" c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n",
|
304
|
+
" d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n",
|
305
|
+
"})\n",
|
306
|
+
"grouped = df.group_by([:a, :b])\n",
|
307
|
+
"grouped.groups"
|
308
|
+
],
|
309
|
+
"language": "python",
|
310
|
+
"metadata": {},
|
311
|
+
"outputs": [
|
312
|
+
{
|
313
|
+
"metadata": {},
|
314
|
+
"output_type": "pyout",
|
315
|
+
"prompt_number": 7,
|
316
|
+
"text": [
|
317
|
+
"{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}"
|
318
|
+
]
|
319
|
+
}
|
320
|
+
],
|
321
|
+
"prompt_number": 7
|
322
|
+
},
|
323
|
+
{
|
324
|
+
"cell_type": "code",
|
325
|
+
"collapsed": false,
|
326
|
+
"input": [
|
327
|
+
"# First group by the columns :a and :b and then calculate mean of the grouped rows.\n",
|
328
|
+
"grouped.mean"
|
329
|
+
],
|
330
|
+
"language": "python",
|
331
|
+
"metadata": {},
|
332
|
+
"outputs": [
|
333
|
+
{
|
334
|
+
"html": [
|
335
|
+
"<table><tr><th></th><th>c</th><th>d</th></tr><tr><td>[:bar, :one]</td><td>2</td><td>22</td></tr><tr><td>[:bar, :three]</td><td>1</td><td>44</td></tr><tr><td>[:bar, :two]</td><td>6</td><td>66</td></tr><tr><td>[:foo, :one]</td><td>2.0</td><td>44.0</td></tr><tr><td>[:foo, :three]</td><td>8</td><td>88</td></tr><tr><td>[:foo, :two]</td><td>3.0</td><td>44.0</td></tr></table>"
|
336
|
+
],
|
337
|
+
"metadata": {},
|
338
|
+
"output_type": "pyout",
|
339
|
+
"prompt_number": 8,
|
340
|
+
"text": [
|
341
|
+
"\n",
|
342
|
+
"#<Daru::DataFrame:77290860 @name = 4916cdd9-84c7-4f86-9a8d-0f128876e7cf @size = 6>\n",
|
343
|
+
" c d \n",
|
344
|
+
"[:bar, :on 2 22 \n",
|
345
|
+
"[:bar, :th 1 44 \n",
|
346
|
+
"[:bar, :tw 6 66 \n",
|
347
|
+
"[:foo, :on 2.0 44.0 \n",
|
348
|
+
"[:foo, :th 8 88 \n",
|
349
|
+
"[:foo, :tw 3.0 44.0 \n"
|
350
|
+
]
|
351
|
+
}
|
352
|
+
],
|
353
|
+
"prompt_number": 8
|
354
|
+
},
|
355
|
+
{
|
356
|
+
"cell_type": "code",
|
357
|
+
"collapsed": false,
|
358
|
+
"input": [
|
359
|
+
"grouped.get_group([\"foo\", \"one\"])"
|
360
|
+
],
|
361
|
+
"language": "python",
|
362
|
+
"metadata": {},
|
363
|
+
"outputs": [
|
364
|
+
{
|
365
|
+
"html": [
|
366
|
+
"<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>0</td><td>foo</td><td>one</td><td>1</td><td>11</td></tr><tr><td>6</td><td>foo</td><td>one</td><td>3</td><td>77</td></tr></table>"
|
367
|
+
],
|
368
|
+
"metadata": {},
|
369
|
+
"output_type": "pyout",
|
370
|
+
"prompt_number": 9,
|
371
|
+
"text": [
|
372
|
+
"\n",
|
373
|
+
"#<Daru::DataFrame:77202350 @name = b5b75233-3de3-48e3-a646-ced6b736f064 @size = 2>\n",
|
374
|
+
" a b c d \n",
|
375
|
+
" 0 foo one 1 11 \n",
|
376
|
+
" 6 foo one 3 77 \n"
|
377
|
+
]
|
378
|
+
}
|
379
|
+
],
|
380
|
+
"prompt_number": 9
|
381
|
+
},
|
382
|
+
{
|
383
|
+
"cell_type": "code",
|
384
|
+
"collapsed": false,
|
385
|
+
"input": [
|
386
|
+
"require 'daru'\n",
|
387
|
+
"sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'"
|
388
|
+
],
|
389
|
+
"language": "python",
|
390
|
+
"metadata": {},
|
391
|
+
"outputs": [
|
392
|
+
{
|
393
|
+
"html": [
|
394
|
+
"<table><tr><th></th><th>account</th><th>manager</th><th>name</th><th>price</th><th>product</th><th>quantity</th><th>rep</th><th>status</th></tr><tr><td>0</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>30000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>1</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>10000</td><td>Software</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>2</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>5000</td><td>Maintenance</td><td>2</td><td>Craig Booker</td><td>pending</td></tr><tr><td>3</td><td>737550</td><td>Debra Henley</td><td>Fritsch, Russel and Anderson</td><td>35000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>declined</td></tr><tr><td>4</td><td>146832</td><td>Debra Henley</td><td>Kiehn-Spinka</td><td>65000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>won</td></tr><tr><td>5</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>40000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>pending</td></tr><tr><td>6</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>10000</td><td>Software</td><td>1</td><td>Daniel Hilton</td><td>presented</td></tr><tr><td>7</td><td>412290</td><td>Debra Henley</td><td>Jerde-Hilpert</td><td>5000</td><td>Maintenance</td><td>2</td><td>John Smith</td><td>pending</td></tr><tr><td>8</td><td>740150</td><td>Debra Henley</td><td>Barton LLC</td><td>35000</td><td>CPU</td><td>1</td><td>John Smith</td><td>declined</td></tr><tr><td>9</td><td>141962</td><td>Fred Anderson</td><td>Herman LLC</td><td>65000</td><td>CPU</td><td>2</td><td>Cedric Moss</td><td>won</td></tr><tr><td>10</td><td>163416</td><td>Fred Anderson</td><td>Purdy-Kunde</td><td>30000</td><td>CPU</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>11</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>5000</td><td>Maintenance</td><td>1</td><td>Cedric Moss</td><td>pending</td></tr><tr><td>12</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>10000</td><td>Software</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>13</td><td>307599</td><td>Fred Anderson</td><td>Kassulke, Ondricka and Metz</td><td>7000</td><td>Maintenance</td><td>3</td><td>Wendy Yule</td><td>won</td></tr><tr><td>14</td><td>688981</td><td>Fred Anderson</td><td>Keeling LLC</td><td>100000</td><td>CPU</td><td>5</td><td>Wendy Yule</td><td>won</td></tr><tr><td>15</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>65000</td><td>CPU</td><td>2</td><td>Wendy Yule</td><td>declined</td></tr><tr><td>16</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>5000</td><td>Monitor</td><td>2</td><td>Wendy Yule</td><td>presented</td></tr></table>"
|
395
|
+
],
|
396
|
+
"metadata": {},
|
397
|
+
"output_type": "pyout",
|
398
|
+
"prompt_number": 10,
|
399
|
+
"text": [
|
400
|
+
"\n",
|
401
|
+
"#<Daru::DataFrame:76599420 @name = 34c1c2a4-2a53-47d6-a863-3f4b05ffd9d7 @size = 17>\n",
|
402
|
+
" account manager name price product quantity rep status \n",
|
403
|
+
" 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n",
|
404
|
+
" 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n",
|
405
|
+
" 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n",
|
406
|
+
" 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n",
|
407
|
+
" 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n",
|
408
|
+
" 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n",
|
409
|
+
" 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n",
|
410
|
+
" 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n",
|
411
|
+
" 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n",
|
412
|
+
" 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n",
|
413
|
+
" 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n",
|
414
|
+
" 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n",
|
415
|
+
" 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n",
|
416
|
+
" 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n",
|
417
|
+
" 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n",
|
418
|
+
" ... ... ... ... ... ... ... ... ... \n"
|
419
|
+
]
|
420
|
+
}
|
421
|
+
],
|
422
|
+
"prompt_number": 10
|
423
|
+
},
|
424
|
+
{
|
425
|
+
"cell_type": "code",
|
426
|
+
"collapsed": false,
|
427
|
+
"input": [
|
428
|
+
"sales.pivot_table index: [:manager, :rep]"
|
429
|
+
],
|
430
|
+
"language": "python",
|
431
|
+
"metadata": {},
|
432
|
+
"outputs": [
|
433
|
+
{
|
434
|
+
"html": [
|
435
|
+
"<table><tr><th></th><th>account</th><th>price</th><th>quantity</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>720237.0</td><td>20000.0</td><td>1.25</td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>194874.0</td><td>38333.333333333336</td><td>1.6666666666666667</td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>576220.0</td><td>20000.0</td><td>1.5</td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>196016.5</td><td>27500.0</td><td>1.25</td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>614061.5</td><td>44250.0</td><td>3.0</td></tr></table>"
|
436
|
+
],
|
437
|
+
"metadata": {},
|
438
|
+
"output_type": "pyout",
|
439
|
+
"prompt_number": 11,
|
440
|
+
"text": [
|
441
|
+
"\n",
|
442
|
+
"#<Daru::DataFrame:75394000 @name = ceeb0166-ed8a-4260-a1d7-f3743bbbbf66 @size = 5>\n",
|
443
|
+
" account price quantity \n",
|
444
|
+
"[:\"Debra H 720237.0 20000.0 1.25 \n",
|
445
|
+
"[:\"Debra H 194874.0 38333.3333 1.66666666 \n",
|
446
|
+
"[:\"Debra H 576220.0 20000.0 1.5 \n",
|
447
|
+
"[:\"Fred An 196016.5 27500.0 1.25 \n",
|
448
|
+
"[:\"Fred An 614061.5 44250.0 3.0 \n"
|
449
|
+
]
|
450
|
+
}
|
451
|
+
],
|
452
|
+
"prompt_number": 11
|
453
|
+
},
|
454
|
+
{
|
455
|
+
"cell_type": "code",
|
456
|
+
"collapsed": false,
|
457
|
+
"input": [
|
458
|
+
"sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)"
|
459
|
+
],
|
460
|
+
"language": "python",
|
461
|
+
"metadata": {},
|
462
|
+
"outputs": [
|
463
|
+
{
|
464
|
+
"html": [
|
465
|
+
"<table><tr><th></th><th>[:price, :CPU]</th><th>[:price, :Software]</th><th>[:price, :Maintenance]</th><th>[:price, :Monitor]</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>65000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>105000</td><td>10000</td><td></td><td></td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>35000</td><td></td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>95000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>165000</td><td></td><td>7000</td><td>5000</td></tr></table>"
|
466
|
+
],
|
467
|
+
"metadata": {},
|
468
|
+
"output_type": "pyout",
|
469
|
+
"prompt_number": 12,
|
470
|
+
"text": [
|
471
|
+
"\n",
|
472
|
+
"#<Daru::DataFrame:75196920 @name = 74c20bf1-708c-4b7f-87fa-540fa82ed3f3 @size = 5>\n",
|
473
|
+
" [:price, : [:price, : [:price, : [:price, : \n",
|
474
|
+
"[:\"Debra H 65000 10000 5000 nil \n",
|
475
|
+
"[:\"Debra H 105000 10000 nil nil \n",
|
476
|
+
"[:\"Debra H 35000 nil 5000 nil \n",
|
477
|
+
"[:\"Fred An 95000 10000 5000 nil \n",
|
478
|
+
"[:\"Fred An 165000 nil 7000 5000 \n"
|
479
|
+
]
|
480
|
+
}
|
481
|
+
],
|
482
|
+
"prompt_number": 12
|
483
|
+
},
|
484
|
+
{
|
485
|
+
"cell_type": "code",
|
486
|
+
"collapsed": false,
|
487
|
+
"input": [
|
488
|
+
"df = Daru::DataFrame.new({\n",
|
489
|
+
" a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n",
|
490
|
+
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
|
491
|
+
" c: ['small','large','large','small','small','large','small','large','small'],\n",
|
492
|
+
" d: [-1,2,-2,3,-3,4,-5,6,7],\n",
|
493
|
+
" e: [2,4,4,6,6,8,10,12,14]\n",
|
494
|
+
" })\n",
|
495
|
+
" df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])"
|
496
|
+
],
|
497
|
+
"language": "python",
|
498
|
+
"metadata": {},
|
499
|
+
"outputs": [
|
500
|
+
{
|
501
|
+
"html": [
|
502
|
+
"<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th><th>e</th></tr><tr><td>6</td><td>efgg</td><td>one</td><td>small</td><td>-5</td><td>10</td></tr><tr><td>4</td><td>efef</td><td>two</td><td>small</td><td>-3</td><td>6</td></tr><tr><td>1</td><td>fwwq</td><td>one</td><td>large</td><td>2</td><td>4</td></tr><tr><td>5</td><td>zzzz</td><td>one</td><td>large</td><td>4</td><td>8</td></tr><tr><td>2</td><td>efe</td><td>one</td><td>large</td><td>-2</td><td>4</td></tr><tr><td>8</td><td>ggf</td><td>two</td><td>small</td><td>7</td><td>14</td></tr><tr><td>0</td><td>ff</td><td>one</td><td>small</td><td>-1</td><td>2</td></tr><tr><td>3</td><td>a</td><td>two</td><td>small</td><td>3</td><td>6</td></tr><tr><td>7</td><td>q</td><td>two</td><td>large</td><td>6</td><td>12</td></tr></table>"
|
503
|
+
],
|
504
|
+
"metadata": {},
|
505
|
+
"output_type": "pyout",
|
506
|
+
"prompt_number": 13,
|
507
|
+
"text": [
|
508
|
+
"\n",
|
509
|
+
"#<Daru::DataFrame:74792710 @name = 9e02295c-d12a-4c0b-b0c5-f8be81327c66 @size = 9>\n",
|
510
|
+
" a b c d e \n",
|
511
|
+
" 6 efgg one small -5 10 \n",
|
512
|
+
" 4 efef two small -3 6 \n",
|
513
|
+
" 1 fwwq one large 2 4 \n",
|
514
|
+
" 5 zzzz one large 4 8 \n",
|
515
|
+
" 2 efe one large -2 4 \n",
|
516
|
+
" 8 ggf two small 7 14 \n",
|
517
|
+
" 0 ff one small -1 2 \n",
|
518
|
+
" 3 a two small 3 6 \n",
|
519
|
+
" 7 q two large 6 12 \n"
|
520
|
+
]
|
521
|
+
}
|
522
|
+
],
|
523
|
+
"prompt_number": 13
|
524
|
+
}
|
525
|
+
],
|
526
|
+
"metadata": {}
|
527
|
+
}
|
528
|
+
]
|
529
|
+
}
|