daru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/version.rb
CHANGED
@@ -0,0 +1,529 @@
|
|
1
|
+
{
|
2
|
+
"metadata": {
|
3
|
+
"language": "ruby",
|
4
|
+
"name": "",
|
5
|
+
"signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e"
|
6
|
+
},
|
7
|
+
"nbformat": 3,
|
8
|
+
"nbformat_minor": 0,
|
9
|
+
"worksheets": [
|
10
|
+
{
|
11
|
+
"cells": [
|
12
|
+
{
|
13
|
+
"cell_type": "code",
|
14
|
+
"collapsed": false,
|
15
|
+
"input": [
|
16
|
+
"require 'daru'\n",
|
17
|
+
"\n",
|
18
|
+
"df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n",
|
19
|
+
"df.plot legends: [:a, :b], type: :line do |p,d|\n",
|
20
|
+
" p.yrange [0,100]\n",
|
21
|
+
" p.legend true\n",
|
22
|
+
" d.color \"green\"\n",
|
23
|
+
"end"
|
24
|
+
],
|
25
|
+
"language": "python",
|
26
|
+
"metadata": {},
|
27
|
+
"outputs": [
|
28
|
+
{
|
29
|
+
"html": [
|
30
|
+
"<script type='text/javascript'>if(window['d3'] === undefined ||\n",
|
31
|
+
" window['Nyaplot'] === undefined){\n",
|
32
|
+
" var path = {\"d3\":\"http://d3js.org/d3.v3.min\"};\n",
|
33
|
+
"\n",
|
34
|
+
"\n",
|
35
|
+
"\n",
|
36
|
+
" var shim = {\"d3\":{\"exports\":\"d3\"}};\n",
|
37
|
+
"\n",
|
38
|
+
" require.config({paths: path, shim:shim});\n",
|
39
|
+
"\n",
|
40
|
+
"\n",
|
41
|
+
"require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\n",
|
42
|
+
"\n",
|
43
|
+
"\tvar script = d3.select(\"head\")\n",
|
44
|
+
"\t .append(\"script\")\n",
|
45
|
+
"\t .attr(\"src\", \"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n",
|
46
|
+
"\t .attr(\"async\", true);\n",
|
47
|
+
"\n",
|
48
|
+
"\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n",
|
49
|
+
"\n",
|
50
|
+
"\n",
|
51
|
+
"\t var event = document.createEvent(\"HTMLEvents\");\n",
|
52
|
+
"\t event.initEvent(\"load_nyaplot\",false,false);\n",
|
53
|
+
"\t window.dispatchEvent(event);\n",
|
54
|
+
"\t console.log('Finished loading Nyaplotjs');\n",
|
55
|
+
"\n",
|
56
|
+
"\t};\n",
|
57
|
+
"\n",
|
58
|
+
"\n",
|
59
|
+
"});\n",
|
60
|
+
"}\n",
|
61
|
+
"</script>"
|
62
|
+
],
|
63
|
+
"metadata": {},
|
64
|
+
"output_type": "pyout",
|
65
|
+
"prompt_number": 1,
|
66
|
+
"text": [
|
67
|
+
"\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\""
|
68
|
+
]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"html": [
|
72
|
+
"<div id='vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1'></div>\n",
|
73
|
+
"<script>\n",
|
74
|
+
"(function(){\n",
|
75
|
+
" var render = function(){\n",
|
76
|
+
" var model = {\"panes\":[{\"diagrams\":[{\"type\":\"line\",\"options\":{\"x\":\"a\",\"y\":\"b\",\"color\":\"green\"},\"data\":\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}],\"options\":{\"yrange\":[0,100],\"legend\":true,\"zoom\":true,\"width\":800,\"xrange\":[1,5]}}],\"data\":{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\":[{\"a\":1,\"b\":10},{\"a\":2,\"b\":14},{\"a\":3,\"b\":15},{\"a\":4,\"b\":17},{\"a\":5,\"b\":44}]},\"extension\":[]}\n",
|
77
|
+
" Nyaplot.core.parse(model, '#vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1');\n",
|
78
|
+
" };\n",
|
79
|
+
" if(window['Nyaplot']==undefined){\n",
|
80
|
+
" window.addEventListener('load_nyaplot', render, false);\n",
|
81
|
+
"\treturn;\n",
|
82
|
+
" } else {\n",
|
83
|
+
" render();\n",
|
84
|
+
" }\n",
|
85
|
+
"})();\n",
|
86
|
+
"</script>\n"
|
87
|
+
],
|
88
|
+
"metadata": {},
|
89
|
+
"output_type": "pyout",
|
90
|
+
"prompt_number": 1,
|
91
|
+
"text": [
|
92
|
+
"#<Nyaplot::Frame:0x8ac8fd4 @properties={:panes=>[#<Nyaplot::Plot:0x8acb25c @properties={:diagrams=>[#<Nyaplot::Diagram:0x8ac97cc @properties={:type=>:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#<Nyaplot::DataFrame:0x8ac9d1c @name=\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\", @rows=[{:a=>1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>"
|
93
|
+
]
|
94
|
+
}
|
95
|
+
],
|
96
|
+
"prompt_number": 1
|
97
|
+
},
|
98
|
+
{
|
99
|
+
"cell_type": "code",
|
100
|
+
"collapsed": false,
|
101
|
+
"input": [
|
102
|
+
"require 'daru'\n",
|
103
|
+
"# Calculate statistics of numeric columns\n",
|
104
|
+
"df = Daru::DataFrame.new({\n",
|
105
|
+
" a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n",
|
106
|
+
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
|
107
|
+
" c: ['small','large','large','small','small','large','small','large','small'],\n",
|
108
|
+
" d: [1,2,2,3,3,4,5,6,7],\n",
|
109
|
+
" e: [2,4,4,6,6,8,10,12,14],\n",
|
110
|
+
" f: [10,20,20,30,30,40,50,60,70]\n",
|
111
|
+
" })\n",
|
112
|
+
"df.mean"
|
113
|
+
],
|
114
|
+
"language": "python",
|
115
|
+
"metadata": {},
|
116
|
+
"outputs": [
|
117
|
+
{
|
118
|
+
"html": [
|
119
|
+
"<table><tr><th> </th><th>nil</th></tr><tr><td>d</td><td>3.6666666666666665</td></tr><tr><td>e</td><td>7.333333333333333</td></tr><tr><td>f</td><td>36.666666666666664</td></tr></table>"
|
120
|
+
],
|
121
|
+
"metadata": {},
|
122
|
+
"output_type": "pyout",
|
123
|
+
"prompt_number": 2,
|
124
|
+
"text": [
|
125
|
+
"\n",
|
126
|
+
"#<Daru::Vector:72633550 @name = nil @size = 3 >\n",
|
127
|
+
" nil\n",
|
128
|
+
" d 3.6666666666666665\n",
|
129
|
+
" e 7.333333333333333\n",
|
130
|
+
" f 36.666666666666664\n"
|
131
|
+
]
|
132
|
+
}
|
133
|
+
],
|
134
|
+
"prompt_number": 2
|
135
|
+
},
|
136
|
+
{
|
137
|
+
"cell_type": "code",
|
138
|
+
"collapsed": false,
|
139
|
+
"input": [
|
140
|
+
"# Calculate multiple statistical measures in one shot\n",
|
141
|
+
"df.describe"
|
142
|
+
],
|
143
|
+
"language": "python",
|
144
|
+
"metadata": {},
|
145
|
+
"outputs": [
|
146
|
+
{
|
147
|
+
"html": [
|
148
|
+
"<table><tr><th></th><th>d</th><th>e</th><th>f</th></tr><tr><td>count</td><td>9</td><td>9</td><td>9</td></tr><tr><td>mean</td><td>3.6666666666666665</td><td>7.333333333333333</td><td>36.666666666666664</td></tr><tr><td>std</td><td>2.0</td><td>4.0</td><td>20.0</td></tr><tr><td>min</td><td>1</td><td>2</td><td>10</td></tr><tr><td>max</td><td>7</td><td>14</td><td>70</td></tr></table>"
|
149
|
+
],
|
150
|
+
"metadata": {},
|
151
|
+
"output_type": "pyout",
|
152
|
+
"prompt_number": 3,
|
153
|
+
"text": [
|
154
|
+
"\n",
|
155
|
+
"#<Daru::DataFrame:72528680 @name = c992bbe4-8948-46f1-bdd4-af2e117e94ac @size = 5>\n",
|
156
|
+
" d e f \n",
|
157
|
+
" count 9 9 9 \n",
|
158
|
+
" mean 3.66666666 7.33333333 36.6666666 \n",
|
159
|
+
" std 2.0 4.0 20.0 \n",
|
160
|
+
" min 1 2 10 \n",
|
161
|
+
" max 7 14 70 \n"
|
162
|
+
]
|
163
|
+
}
|
164
|
+
],
|
165
|
+
"prompt_number": 3
|
166
|
+
},
|
167
|
+
{
|
168
|
+
"cell_type": "code",
|
169
|
+
"collapsed": false,
|
170
|
+
"input": [
|
171
|
+
"# Create a multi-indexed DataFrame\n",
|
172
|
+
"tuples = [\n",
|
173
|
+
" [:a,:one,:bar],\n",
|
174
|
+
" [:a,:one,:baz],\n",
|
175
|
+
" [:a,:two,:bar],\n",
|
176
|
+
" [:a,:two,:baz],\n",
|
177
|
+
" [:b,:one,:bar],\n",
|
178
|
+
" [:b,:two,:bar],\n",
|
179
|
+
" [:b,:two,:baz],\n",
|
180
|
+
" [:b,:one,:foo],\n",
|
181
|
+
" [:c,:one,:bar],\n",
|
182
|
+
" [:c,:one,:baz],\n",
|
183
|
+
" [:c,:two,:foo],\n",
|
184
|
+
" [:c,:two,:bar]\n",
|
185
|
+
"]\n",
|
186
|
+
"multi_index = Daru::MultiIndex.new(tuples)\n",
|
187
|
+
"\n",
|
188
|
+
"vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n",
|
189
|
+
"vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n",
|
190
|
+
"\n",
|
191
|
+
"order_mi = Daru::MultiIndex.new([\n",
|
192
|
+
" [:a,:one,:bar],\n",
|
193
|
+
" [:a,:two,:baz],\n",
|
194
|
+
" [:b,:two,:foo],\n",
|
195
|
+
" [:b,:one,:foo]])\n",
|
196
|
+
"\n",
|
197
|
+
"df_mi = Daru::DataFrame.new([\n",
|
198
|
+
" vector_arry1, \n",
|
199
|
+
" vector_arry2, \n",
|
200
|
+
" vector_arry1, \n",
|
201
|
+
" vector_arry2], order: order_mi, index: multi_index)"
|
202
|
+
],
|
203
|
+
"language": "python",
|
204
|
+
"metadata": {},
|
205
|
+
"outputs": [
|
206
|
+
{
|
207
|
+
"html": [
|
208
|
+
"<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:a, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:a, :two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:a, :two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:b, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:b, :two, :bar]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:b, :two, :baz]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:b, :one, :foo]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:c, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:c, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:c, :two, :foo]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:c, :two, :bar]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
|
209
|
+
],
|
210
|
+
"metadata": {},
|
211
|
+
"output_type": "pyout",
|
212
|
+
"prompt_number": 4,
|
213
|
+
"text": [
|
214
|
+
"\n",
|
215
|
+
"#<Daru::DataFrame:72070870 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 12>\n",
|
216
|
+
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
|
217
|
+
"[:a, :one, 11 1 11 1 \n",
|
218
|
+
"[:a, :one, 12 2 12 2 \n",
|
219
|
+
"[:a, :two, 13 3 13 3 \n",
|
220
|
+
"[:a, :two, 14 4 14 4 \n",
|
221
|
+
"[:b, :one, 11 1 11 1 \n",
|
222
|
+
"[:b, :two, 12 2 12 2 \n",
|
223
|
+
"[:b, :two, 13 3 13 3 \n",
|
224
|
+
"[:b, :one, 14 4 14 4 \n",
|
225
|
+
"[:c, :one, 11 1 11 1 \n",
|
226
|
+
"[:c, :one, 12 2 12 2 \n",
|
227
|
+
"[:c, :two, 13 3 13 3 \n",
|
228
|
+
"[:c, :two, 14 4 14 4 \n"
|
229
|
+
]
|
230
|
+
}
|
231
|
+
],
|
232
|
+
"prompt_number": 4
|
233
|
+
},
|
234
|
+
{
|
235
|
+
"cell_type": "code",
|
236
|
+
"collapsed": false,
|
237
|
+
"input": [
|
238
|
+
"# Specify complete tuple to choose a single row\n",
|
239
|
+
"df_mi.row[:a, :one,:bar]"
|
240
|
+
],
|
241
|
+
"language": "python",
|
242
|
+
"metadata": {},
|
243
|
+
"outputs": [
|
244
|
+
{
|
245
|
+
"html": [
|
246
|
+
"<table><tr><th> </th><th>0</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td></tr><tr><td>[:a, :two, :baz]</td><td>1</td></tr><tr><td>[:b, :two, :foo]</td><td>11</td></tr><tr><td>[:b, :one, :foo]</td><td>1</td></tr></table>"
|
247
|
+
],
|
248
|
+
"metadata": {},
|
249
|
+
"output_type": "pyout",
|
250
|
+
"prompt_number": 5,
|
251
|
+
"text": [
|
252
|
+
"\n",
|
253
|
+
"#<Daru::Vector:77596600 @name = 0 @size = 4 >\n",
|
254
|
+
" 0\n",
|
255
|
+
"[:a, :one, :bar] 11\n",
|
256
|
+
"[:a, :two, :baz] 1\n",
|
257
|
+
"[:b, :two, :foo] 11\n",
|
258
|
+
"[:b, :one, :foo] 1\n"
|
259
|
+
]
|
260
|
+
}
|
261
|
+
],
|
262
|
+
"prompt_number": 5
|
263
|
+
},
|
264
|
+
{
|
265
|
+
"cell_type": "code",
|
266
|
+
"collapsed": false,
|
267
|
+
"input": [
|
268
|
+
"# Specify partial tuple to select index hierarchially\n",
|
269
|
+
"df_mi.row[:a]"
|
270
|
+
],
|
271
|
+
"language": "python",
|
272
|
+
"metadata": {},
|
273
|
+
"outputs": [
|
274
|
+
{
|
275
|
+
"html": [
|
276
|
+
"<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
|
277
|
+
],
|
278
|
+
"metadata": {},
|
279
|
+
"output_type": "pyout",
|
280
|
+
"prompt_number": 6,
|
281
|
+
"text": [
|
282
|
+
"\n",
|
283
|
+
"#<Daru::DataFrame:77518650 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 4>\n",
|
284
|
+
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
|
285
|
+
"[:one, :ba 11 1 11 1 \n",
|
286
|
+
"[:one, :ba 12 2 12 2 \n",
|
287
|
+
"[:two, :ba 13 3 13 3 \n",
|
288
|
+
"[:two, :ba 14 4 14 4 \n"
|
289
|
+
]
|
290
|
+
}
|
291
|
+
],
|
292
|
+
"prompt_number": 6
|
293
|
+
},
|
294
|
+
{
|
295
|
+
"cell_type": "code",
|
296
|
+
"collapsed": false,
|
297
|
+
"input": [
|
298
|
+
"# See grouped rows with the 'groups' method\n",
|
299
|
+
"\n",
|
300
|
+
"df = Daru::DataFrame.new({\n",
|
301
|
+
" a: %w{foo bar foo bar foo bar foo foo},\n",
|
302
|
+
" b: %w{one one two three two two one three},\n",
|
303
|
+
" c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n",
|
304
|
+
" d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n",
|
305
|
+
"})\n",
|
306
|
+
"grouped = df.group_by([:a, :b])\n",
|
307
|
+
"grouped.groups"
|
308
|
+
],
|
309
|
+
"language": "python",
|
310
|
+
"metadata": {},
|
311
|
+
"outputs": [
|
312
|
+
{
|
313
|
+
"metadata": {},
|
314
|
+
"output_type": "pyout",
|
315
|
+
"prompt_number": 7,
|
316
|
+
"text": [
|
317
|
+
"{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}"
|
318
|
+
]
|
319
|
+
}
|
320
|
+
],
|
321
|
+
"prompt_number": 7
|
322
|
+
},
|
323
|
+
{
|
324
|
+
"cell_type": "code",
|
325
|
+
"collapsed": false,
|
326
|
+
"input": [
|
327
|
+
"# First group by the columns :a and :b and then calculate mean of the grouped rows.\n",
|
328
|
+
"grouped.mean"
|
329
|
+
],
|
330
|
+
"language": "python",
|
331
|
+
"metadata": {},
|
332
|
+
"outputs": [
|
333
|
+
{
|
334
|
+
"html": [
|
335
|
+
"<table><tr><th></th><th>c</th><th>d</th></tr><tr><td>[:bar, :one]</td><td>2</td><td>22</td></tr><tr><td>[:bar, :three]</td><td>1</td><td>44</td></tr><tr><td>[:bar, :two]</td><td>6</td><td>66</td></tr><tr><td>[:foo, :one]</td><td>2.0</td><td>44.0</td></tr><tr><td>[:foo, :three]</td><td>8</td><td>88</td></tr><tr><td>[:foo, :two]</td><td>3.0</td><td>44.0</td></tr></table>"
|
336
|
+
],
|
337
|
+
"metadata": {},
|
338
|
+
"output_type": "pyout",
|
339
|
+
"prompt_number": 8,
|
340
|
+
"text": [
|
341
|
+
"\n",
|
342
|
+
"#<Daru::DataFrame:77290860 @name = 4916cdd9-84c7-4f86-9a8d-0f128876e7cf @size = 6>\n",
|
343
|
+
" c d \n",
|
344
|
+
"[:bar, :on 2 22 \n",
|
345
|
+
"[:bar, :th 1 44 \n",
|
346
|
+
"[:bar, :tw 6 66 \n",
|
347
|
+
"[:foo, :on 2.0 44.0 \n",
|
348
|
+
"[:foo, :th 8 88 \n",
|
349
|
+
"[:foo, :tw 3.0 44.0 \n"
|
350
|
+
]
|
351
|
+
}
|
352
|
+
],
|
353
|
+
"prompt_number": 8
|
354
|
+
},
|
355
|
+
{
|
356
|
+
"cell_type": "code",
|
357
|
+
"collapsed": false,
|
358
|
+
"input": [
|
359
|
+
"grouped.get_group([\"foo\", \"one\"])"
|
360
|
+
],
|
361
|
+
"language": "python",
|
362
|
+
"metadata": {},
|
363
|
+
"outputs": [
|
364
|
+
{
|
365
|
+
"html": [
|
366
|
+
"<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>0</td><td>foo</td><td>one</td><td>1</td><td>11</td></tr><tr><td>6</td><td>foo</td><td>one</td><td>3</td><td>77</td></tr></table>"
|
367
|
+
],
|
368
|
+
"metadata": {},
|
369
|
+
"output_type": "pyout",
|
370
|
+
"prompt_number": 9,
|
371
|
+
"text": [
|
372
|
+
"\n",
|
373
|
+
"#<Daru::DataFrame:77202350 @name = b5b75233-3de3-48e3-a646-ced6b736f064 @size = 2>\n",
|
374
|
+
" a b c d \n",
|
375
|
+
" 0 foo one 1 11 \n",
|
376
|
+
" 6 foo one 3 77 \n"
|
377
|
+
]
|
378
|
+
}
|
379
|
+
],
|
380
|
+
"prompt_number": 9
|
381
|
+
},
|
382
|
+
{
|
383
|
+
"cell_type": "code",
|
384
|
+
"collapsed": false,
|
385
|
+
"input": [
|
386
|
+
"require 'daru'\n",
|
387
|
+
"sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'"
|
388
|
+
],
|
389
|
+
"language": "python",
|
390
|
+
"metadata": {},
|
391
|
+
"outputs": [
|
392
|
+
{
|
393
|
+
"html": [
|
394
|
+
"<table><tr><th></th><th>account</th><th>manager</th><th>name</th><th>price</th><th>product</th><th>quantity</th><th>rep</th><th>status</th></tr><tr><td>0</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>30000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>1</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>10000</td><td>Software</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>2</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>5000</td><td>Maintenance</td><td>2</td><td>Craig Booker</td><td>pending</td></tr><tr><td>3</td><td>737550</td><td>Debra Henley</td><td>Fritsch, Russel and Anderson</td><td>35000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>declined</td></tr><tr><td>4</td><td>146832</td><td>Debra Henley</td><td>Kiehn-Spinka</td><td>65000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>won</td></tr><tr><td>5</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>40000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>pending</td></tr><tr><td>6</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>10000</td><td>Software</td><td>1</td><td>Daniel Hilton</td><td>presented</td></tr><tr><td>7</td><td>412290</td><td>Debra Henley</td><td>Jerde-Hilpert</td><td>5000</td><td>Maintenance</td><td>2</td><td>John Smith</td><td>pending</td></tr><tr><td>8</td><td>740150</td><td>Debra Henley</td><td>Barton LLC</td><td>35000</td><td>CPU</td><td>1</td><td>John Smith</td><td>declined</td></tr><tr><td>9</td><td>141962</td><td>Fred Anderson</td><td>Herman LLC</td><td>65000</td><td>CPU</td><td>2</td><td>Cedric Moss</td><td>won</td></tr><tr><td>10</td><td>163416</td><td>Fred Anderson</td><td>Purdy-Kunde</td><td>30000</td><td>CPU</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>11</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>5000</td><td>Maintenance</td><td>1</td><td>Cedric Moss</td><td>pending</td></tr><tr><td>12</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>10000</td><td>Software</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>13</td><td>307599</td><td>Fred Anderson</td><td>Kassulke, Ondricka and Metz</td><td>7000</td><td>Maintenance</td><td>3</td><td>Wendy Yule</td><td>won</td></tr><tr><td>14</td><td>688981</td><td>Fred Anderson</td><td>Keeling LLC</td><td>100000</td><td>CPU</td><td>5</td><td>Wendy Yule</td><td>won</td></tr><tr><td>15</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>65000</td><td>CPU</td><td>2</td><td>Wendy Yule</td><td>declined</td></tr><tr><td>16</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>5000</td><td>Monitor</td><td>2</td><td>Wendy Yule</td><td>presented</td></tr></table>"
|
395
|
+
],
|
396
|
+
"metadata": {},
|
397
|
+
"output_type": "pyout",
|
398
|
+
"prompt_number": 10,
|
399
|
+
"text": [
|
400
|
+
"\n",
|
401
|
+
"#<Daru::DataFrame:76599420 @name = 34c1c2a4-2a53-47d6-a863-3f4b05ffd9d7 @size = 17>\n",
|
402
|
+
" account manager name price product quantity rep status \n",
|
403
|
+
" 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n",
|
404
|
+
" 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n",
|
405
|
+
" 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n",
|
406
|
+
" 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n",
|
407
|
+
" 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n",
|
408
|
+
" 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n",
|
409
|
+
" 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n",
|
410
|
+
" 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n",
|
411
|
+
" 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n",
|
412
|
+
" 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n",
|
413
|
+
" 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n",
|
414
|
+
" 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n",
|
415
|
+
" 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n",
|
416
|
+
" 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n",
|
417
|
+
" 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n",
|
418
|
+
" ... ... ... ... ... ... ... ... ... \n"
|
419
|
+
]
|
420
|
+
}
|
421
|
+
],
|
422
|
+
"prompt_number": 10
|
423
|
+
},
|
424
|
+
{
|
425
|
+
"cell_type": "code",
|
426
|
+
"collapsed": false,
|
427
|
+
"input": [
|
428
|
+
"sales.pivot_table index: [:manager, :rep]"
|
429
|
+
],
|
430
|
+
"language": "python",
|
431
|
+
"metadata": {},
|
432
|
+
"outputs": [
|
433
|
+
{
|
434
|
+
"html": [
|
435
|
+
"<table><tr><th></th><th>account</th><th>price</th><th>quantity</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>720237.0</td><td>20000.0</td><td>1.25</td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>194874.0</td><td>38333.333333333336</td><td>1.6666666666666667</td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>576220.0</td><td>20000.0</td><td>1.5</td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>196016.5</td><td>27500.0</td><td>1.25</td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>614061.5</td><td>44250.0</td><td>3.0</td></tr></table>"
|
436
|
+
],
|
437
|
+
"metadata": {},
|
438
|
+
"output_type": "pyout",
|
439
|
+
"prompt_number": 11,
|
440
|
+
"text": [
|
441
|
+
"\n",
|
442
|
+
"#<Daru::DataFrame:75394000 @name = ceeb0166-ed8a-4260-a1d7-f3743bbbbf66 @size = 5>\n",
|
443
|
+
" account price quantity \n",
|
444
|
+
"[:\"Debra H 720237.0 20000.0 1.25 \n",
|
445
|
+
"[:\"Debra H 194874.0 38333.3333 1.66666666 \n",
|
446
|
+
"[:\"Debra H 576220.0 20000.0 1.5 \n",
|
447
|
+
"[:\"Fred An 196016.5 27500.0 1.25 \n",
|
448
|
+
"[:\"Fred An 614061.5 44250.0 3.0 \n"
|
449
|
+
]
|
450
|
+
}
|
451
|
+
],
|
452
|
+
"prompt_number": 11
|
453
|
+
},
|
454
|
+
{
|
455
|
+
"cell_type": "code",
|
456
|
+
"collapsed": false,
|
457
|
+
"input": [
|
458
|
+
"sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)"
|
459
|
+
],
|
460
|
+
"language": "python",
|
461
|
+
"metadata": {},
|
462
|
+
"outputs": [
|
463
|
+
{
|
464
|
+
"html": [
|
465
|
+
"<table><tr><th></th><th>[:price, :CPU]</th><th>[:price, :Software]</th><th>[:price, :Maintenance]</th><th>[:price, :Monitor]</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>65000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>105000</td><td>10000</td><td></td><td></td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>35000</td><td></td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>95000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>165000</td><td></td><td>7000</td><td>5000</td></tr></table>"
|
466
|
+
],
|
467
|
+
"metadata": {},
|
468
|
+
"output_type": "pyout",
|
469
|
+
"prompt_number": 12,
|
470
|
+
"text": [
|
471
|
+
"\n",
|
472
|
+
"#<Daru::DataFrame:75196920 @name = 74c20bf1-708c-4b7f-87fa-540fa82ed3f3 @size = 5>\n",
|
473
|
+
" [:price, : [:price, : [:price, : [:price, : \n",
|
474
|
+
"[:\"Debra H 65000 10000 5000 nil \n",
|
475
|
+
"[:\"Debra H 105000 10000 nil nil \n",
|
476
|
+
"[:\"Debra H 35000 nil 5000 nil \n",
|
477
|
+
"[:\"Fred An 95000 10000 5000 nil \n",
|
478
|
+
"[:\"Fred An 165000 nil 7000 5000 \n"
|
479
|
+
]
|
480
|
+
}
|
481
|
+
],
|
482
|
+
"prompt_number": 12
|
483
|
+
},
|
484
|
+
{
|
485
|
+
"cell_type": "code",
|
486
|
+
"collapsed": false,
|
487
|
+
"input": [
|
488
|
+
"df = Daru::DataFrame.new({\n",
|
489
|
+
" a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n",
|
490
|
+
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
|
491
|
+
" c: ['small','large','large','small','small','large','small','large','small'],\n",
|
492
|
+
" d: [-1,2,-2,3,-3,4,-5,6,7],\n",
|
493
|
+
" e: [2,4,4,6,6,8,10,12,14]\n",
|
494
|
+
" })\n",
|
495
|
+
" df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])"
|
496
|
+
],
|
497
|
+
"language": "python",
|
498
|
+
"metadata": {},
|
499
|
+
"outputs": [
|
500
|
+
{
|
501
|
+
"html": [
|
502
|
+
"<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th><th>e</th></tr><tr><td>6</td><td>efgg</td><td>one</td><td>small</td><td>-5</td><td>10</td></tr><tr><td>4</td><td>efef</td><td>two</td><td>small</td><td>-3</td><td>6</td></tr><tr><td>1</td><td>fwwq</td><td>one</td><td>large</td><td>2</td><td>4</td></tr><tr><td>5</td><td>zzzz</td><td>one</td><td>large</td><td>4</td><td>8</td></tr><tr><td>2</td><td>efe</td><td>one</td><td>large</td><td>-2</td><td>4</td></tr><tr><td>8</td><td>ggf</td><td>two</td><td>small</td><td>7</td><td>14</td></tr><tr><td>0</td><td>ff</td><td>one</td><td>small</td><td>-1</td><td>2</td></tr><tr><td>3</td><td>a</td><td>two</td><td>small</td><td>3</td><td>6</td></tr><tr><td>7</td><td>q</td><td>two</td><td>large</td><td>6</td><td>12</td></tr></table>"
|
503
|
+
],
|
504
|
+
"metadata": {},
|
505
|
+
"output_type": "pyout",
|
506
|
+
"prompt_number": 13,
|
507
|
+
"text": [
|
508
|
+
"\n",
|
509
|
+
"#<Daru::DataFrame:74792710 @name = 9e02295c-d12a-4c0b-b0c5-f8be81327c66 @size = 9>\n",
|
510
|
+
" a b c d e \n",
|
511
|
+
" 6 efgg one small -5 10 \n",
|
512
|
+
" 4 efef two small -3 6 \n",
|
513
|
+
" 1 fwwq one large 2 4 \n",
|
514
|
+
" 5 zzzz one large 4 8 \n",
|
515
|
+
" 2 efe one large -2 4 \n",
|
516
|
+
" 8 ggf two small 7 14 \n",
|
517
|
+
" 0 ff one small -1 2 \n",
|
518
|
+
" 3 a two small 3 6 \n",
|
519
|
+
" 7 q two large 6 12 \n"
|
520
|
+
]
|
521
|
+
}
|
522
|
+
],
|
523
|
+
"prompt_number": 13
|
524
|
+
}
|
525
|
+
],
|
526
|
+
"metadata": {}
|
527
|
+
}
|
528
|
+
]
|
529
|
+
}
|