daru 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
metadata CHANGED
@@ -1,29 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-28 00:00:00.000000000 Z
11
+ date: 2015-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: reportbuilder
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: spreadsheet
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.3
13
41
  - !ruby/object:Gem::Dependency
14
42
  name: bundler
15
43
  requirement: !ruby/object:Gem::Requirement
16
44
  requirements:
17
- - - ">="
45
+ - - "~>"
18
46
  - !ruby/object:Gem::Version
19
- version: '0'
47
+ version: '1.10'
20
48
  type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - ">="
52
+ - - "~>"
25
53
  - !ruby/object:Gem::Version
26
- version: '0'
54
+ version: '1.10'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: rake
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -38,6 +66,20 @@ dependencies:
38
66
  - - ">="
39
67
  - !ruby/object:Gem::Version
40
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rserve-client
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
41
83
  - !ruby/object:Gem::Dependency
42
84
  name: rspec
43
85
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +112,16 @@ dependencies:
70
112
  name: nyaplot
71
113
  requirement: !ruby/object:Gem::Requirement
72
114
  requirements:
73
- - - ">="
115
+ - - "~>"
74
116
  - !ruby/object:Gem::Version
75
- version: '0'
117
+ version: 0.1.5
76
118
  type: :development
77
119
  prerelease: false
78
120
  version_requirements: !ruby/object:Gem::Requirement
79
121
  requirements:
80
- - - ">="
122
+ - - "~>"
81
123
  - !ruby/object:Gem::Version
82
- version: '0'
124
+ version: 0.1.5
83
125
  - !ruby/object:Gem::Dependency
84
126
  name: nmatrix
85
127
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +136,34 @@ dependencies:
94
136
  - - "~>"
95
137
  - !ruby/object:Gem::Version
96
138
  version: 0.1.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: distribution
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.7'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.7'
153
+ - !ruby/object:Gem::Dependency
154
+ name: gsl-nmatrix
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.17'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.17'
97
167
  description: "Daru (Data Analysis in RUby) is a library for analysis, manipulation
98
168
  and visualization\nof data.\n\nDaru works with Ruby arrays and NMatrix, thus working
99
169
  seamlessly accross\nruby interpreters, at the same time providing speed for those
@@ -104,13 +174,13 @@ executables: []
104
174
  extensions: []
105
175
  extra_rdoc_files: []
106
176
  files:
177
+ - ".build.sh"
107
178
  - ".gitignore"
108
179
  - ".rspec"
109
180
  - ".travis.yml"
110
181
  - CONTRIBUTING.md
111
182
  - Gemfile
112
- - Gemfile.lock
113
- - History.txt
183
+ - History.md
114
184
  - LICENSE
115
185
  - README.md
116
186
  - Rakefile
@@ -119,10 +189,12 @@ files:
119
189
  - lib/daru/accessors/array_wrapper.rb
120
190
  - lib/daru/accessors/dataframe_by_row.rb
121
191
  - lib/daru/accessors/dataframe_by_vector.rb
192
+ - lib/daru/accessors/gsl_wrapper.rb
122
193
  - lib/daru/accessors/mdarray_wrapper.rb
123
194
  - lib/daru/accessors/nmatrix_wrapper.rb
124
195
  - lib/daru/core/group_by.rb
125
196
  - lib/daru/dataframe.rb
197
+ - lib/daru/extensions/rserve.rb
126
198
  - lib/daru/index.rb
127
199
  - lib/daru/io/io.rb
128
200
  - lib/daru/maths/arithmetic/dataframe.rb
@@ -134,16 +206,20 @@ files:
134
206
  - lib/daru/plotting/dataframe.rb
135
207
  - lib/daru/plotting/vector.rb
136
208
  - lib/daru/vector.rb
137
- - lib/version.rb
138
- - notebooks/grouping_splitting_pivots.ipynb
139
- - notebooks/intro_with_music_data_.ipynb
209
+ - lib/daru/version.rb
140
210
  - spec/accessors/wrappers_spec.rb
141
211
  - spec/core/group_by_spec.rb
212
+ - spec/daru_spec.rb
142
213
  - spec/dataframe_spec.rb
214
+ - spec/extensions/rserve_spec.rb
215
+ - spec/fixtures/bank2.dat
143
216
  - spec/fixtures/countries.json
144
217
  - spec/fixtures/matrix_test.csv
145
218
  - spec/fixtures/music_data.tsv
219
+ - spec/fixtures/repeated_fields.csv
146
220
  - spec/fixtures/sales-funnel.csv
221
+ - spec/fixtures/scientific_notation.csv
222
+ - spec/fixtures/test_xls.xls
147
223
  - spec/index_spec.rb
148
224
  - spec/io/io_spec.rb
149
225
  - spec/math/arithmetic/dataframe_spec.rb
@@ -174,18 +250,24 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
250
  version: '0'
175
251
  requirements: []
176
252
  rubyforge_project:
177
- rubygems_version: 2.2.0
253
+ rubygems_version: 2.4.6
178
254
  signing_key:
179
255
  specification_version: 4
180
256
  summary: Data Analysis in RUby
181
257
  test_files:
182
258
  - spec/accessors/wrappers_spec.rb
183
259
  - spec/core/group_by_spec.rb
260
+ - spec/daru_spec.rb
184
261
  - spec/dataframe_spec.rb
262
+ - spec/extensions/rserve_spec.rb
263
+ - spec/fixtures/bank2.dat
185
264
  - spec/fixtures/countries.json
186
265
  - spec/fixtures/matrix_test.csv
187
266
  - spec/fixtures/music_data.tsv
267
+ - spec/fixtures/repeated_fields.csv
188
268
  - spec/fixtures/sales-funnel.csv
269
+ - spec/fixtures/scientific_notation.csv
270
+ - spec/fixtures/test_xls.xls
189
271
  - spec/index_spec.rb
190
272
  - spec/io/io_spec.rb
191
273
  - spec/math/arithmetic/dataframe_spec.rb
@@ -196,3 +278,4 @@ test_files:
196
278
  - spec/multi_index_spec.rb
197
279
  - spec/spec_helper.rb
198
280
  - spec/vector_spec.rb
281
+ has_rdoc:
@@ -1,3 +0,0 @@
1
- module Daru
2
- VERSION = "0.0.5"
3
- end
@@ -1,529 +0,0 @@
1
- {
2
- "metadata": {
3
- "language": "ruby",
4
- "name": "",
5
- "signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e"
6
- },
7
- "nbformat": 3,
8
- "nbformat_minor": 0,
9
- "worksheets": [
10
- {
11
- "cells": [
12
- {
13
- "cell_type": "code",
14
- "collapsed": false,
15
- "input": [
16
- "require 'daru'\n",
17
- "\n",
18
- "df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n",
19
- "df.plot legends: [:a, :b], type: :line do |p,d|\n",
20
- " p.yrange [0,100]\n",
21
- " p.legend true\n",
22
- " d.color \"green\"\n",
23
- "end"
24
- ],
25
- "language": "python",
26
- "metadata": {},
27
- "outputs": [
28
- {
29
- "html": [
30
- "<script type='text/javascript'>if(window['d3'] === undefined ||\n",
31
- " window['Nyaplot'] === undefined){\n",
32
- " var path = {\"d3\":\"http://d3js.org/d3.v3.min\"};\n",
33
- "\n",
34
- "\n",
35
- "\n",
36
- " var shim = {\"d3\":{\"exports\":\"d3\"}};\n",
37
- "\n",
38
- " require.config({paths: path, shim:shim});\n",
39
- "\n",
40
- "\n",
41
- "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\n",
42
- "\n",
43
- "\tvar script = d3.select(\"head\")\n",
44
- "\t .append(\"script\")\n",
45
- "\t .attr(\"src\", \"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n",
46
- "\t .attr(\"async\", true);\n",
47
- "\n",
48
- "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n",
49
- "\n",
50
- "\n",
51
- "\t var event = document.createEvent(\"HTMLEvents\");\n",
52
- "\t event.initEvent(\"load_nyaplot\",false,false);\n",
53
- "\t window.dispatchEvent(event);\n",
54
- "\t console.log('Finished loading Nyaplotjs');\n",
55
- "\n",
56
- "\t};\n",
57
- "\n",
58
- "\n",
59
- "});\n",
60
- "}\n",
61
- "</script>"
62
- ],
63
- "metadata": {},
64
- "output_type": "pyout",
65
- "prompt_number": 1,
66
- "text": [
67
- "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\""
68
- ]
69
- },
70
- {
71
- "html": [
72
- "<div id='vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1'></div>\n",
73
- "<script>\n",
74
- "(function(){\n",
75
- " var render = function(){\n",
76
- " var model = {\"panes\":[{\"diagrams\":[{\"type\":\"line\",\"options\":{\"x\":\"a\",\"y\":\"b\",\"color\":\"green\"},\"data\":\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}],\"options\":{\"yrange\":[0,100],\"legend\":true,\"zoom\":true,\"width\":800,\"xrange\":[1,5]}}],\"data\":{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\":[{\"a\":1,\"b\":10},{\"a\":2,\"b\":14},{\"a\":3,\"b\":15},{\"a\":4,\"b\":17},{\"a\":5,\"b\":44}]},\"extension\":[]}\n",
77
- " Nyaplot.core.parse(model, '#vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1');\n",
78
- " };\n",
79
- " if(window['Nyaplot']==undefined){\n",
80
- " window.addEventListener('load_nyaplot', render, false);\n",
81
- "\treturn;\n",
82
- " } else {\n",
83
- " render();\n",
84
- " }\n",
85
- "})();\n",
86
- "</script>\n"
87
- ],
88
- "metadata": {},
89
- "output_type": "pyout",
90
- "prompt_number": 1,
91
- "text": [
92
- "#<Nyaplot::Frame:0x8ac8fd4 @properties={:panes=>[#<Nyaplot::Plot:0x8acb25c @properties={:diagrams=>[#<Nyaplot::Diagram:0x8ac97cc @properties={:type=>:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#<Nyaplot::DataFrame:0x8ac9d1c @name=\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\", @rows=[{:a=>1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>"
93
- ]
94
- }
95
- ],
96
- "prompt_number": 1
97
- },
98
- {
99
- "cell_type": "code",
100
- "collapsed": false,
101
- "input": [
102
- "require 'daru'\n",
103
- "# Calculate statistics of numeric columns\n",
104
- "df = Daru::DataFrame.new({\n",
105
- " a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n",
106
- " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
107
- " c: ['small','large','large','small','small','large','small','large','small'],\n",
108
- " d: [1,2,2,3,3,4,5,6,7],\n",
109
- " e: [2,4,4,6,6,8,10,12,14],\n",
110
- " f: [10,20,20,30,30,40,50,60,70]\n",
111
- " })\n",
112
- "df.mean"
113
- ],
114
- "language": "python",
115
- "metadata": {},
116
- "outputs": [
117
- {
118
- "html": [
119
- "<table><tr><th> </th><th>nil</th></tr><tr><td>d</td><td>3.6666666666666665</td></tr><tr><td>e</td><td>7.333333333333333</td></tr><tr><td>f</td><td>36.666666666666664</td></tr></table>"
120
- ],
121
- "metadata": {},
122
- "output_type": "pyout",
123
- "prompt_number": 2,
124
- "text": [
125
- "\n",
126
- "#<Daru::Vector:72633550 @name = nil @size = 3 >\n",
127
- " nil\n",
128
- " d 3.6666666666666665\n",
129
- " e 7.333333333333333\n",
130
- " f 36.666666666666664\n"
131
- ]
132
- }
133
- ],
134
- "prompt_number": 2
135
- },
136
- {
137
- "cell_type": "code",
138
- "collapsed": false,
139
- "input": [
140
- "# Calculate multiple statistical measures in one shot\n",
141
- "df.describe"
142
- ],
143
- "language": "python",
144
- "metadata": {},
145
- "outputs": [
146
- {
147
- "html": [
148
- "<table><tr><th></th><th>d</th><th>e</th><th>f</th></tr><tr><td>count</td><td>9</td><td>9</td><td>9</td></tr><tr><td>mean</td><td>3.6666666666666665</td><td>7.333333333333333</td><td>36.666666666666664</td></tr><tr><td>std</td><td>2.0</td><td>4.0</td><td>20.0</td></tr><tr><td>min</td><td>1</td><td>2</td><td>10</td></tr><tr><td>max</td><td>7</td><td>14</td><td>70</td></tr></table>"
149
- ],
150
- "metadata": {},
151
- "output_type": "pyout",
152
- "prompt_number": 3,
153
- "text": [
154
- "\n",
155
- "#<Daru::DataFrame:72528680 @name = c992bbe4-8948-46f1-bdd4-af2e117e94ac @size = 5>\n",
156
- " d e f \n",
157
- " count 9 9 9 \n",
158
- " mean 3.66666666 7.33333333 36.6666666 \n",
159
- " std 2.0 4.0 20.0 \n",
160
- " min 1 2 10 \n",
161
- " max 7 14 70 \n"
162
- ]
163
- }
164
- ],
165
- "prompt_number": 3
166
- },
167
- {
168
- "cell_type": "code",
169
- "collapsed": false,
170
- "input": [
171
- "# Create a multi-indexed DataFrame\n",
172
- "tuples = [\n",
173
- " [:a,:one,:bar],\n",
174
- " [:a,:one,:baz],\n",
175
- " [:a,:two,:bar],\n",
176
- " [:a,:two,:baz],\n",
177
- " [:b,:one,:bar],\n",
178
- " [:b,:two,:bar],\n",
179
- " [:b,:two,:baz],\n",
180
- " [:b,:one,:foo],\n",
181
- " [:c,:one,:bar],\n",
182
- " [:c,:one,:baz],\n",
183
- " [:c,:two,:foo],\n",
184
- " [:c,:two,:bar]\n",
185
- "]\n",
186
- "multi_index = Daru::MultiIndex.new(tuples)\n",
187
- "\n",
188
- "vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n",
189
- "vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n",
190
- "\n",
191
- "order_mi = Daru::MultiIndex.new([\n",
192
- " [:a,:one,:bar],\n",
193
- " [:a,:two,:baz],\n",
194
- " [:b,:two,:foo],\n",
195
- " [:b,:one,:foo]])\n",
196
- "\n",
197
- "df_mi = Daru::DataFrame.new([\n",
198
- " vector_arry1, \n",
199
- " vector_arry2, \n",
200
- " vector_arry1, \n",
201
- " vector_arry2], order: order_mi, index: multi_index)"
202
- ],
203
- "language": "python",
204
- "metadata": {},
205
- "outputs": [
206
- {
207
- "html": [
208
- "<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:a, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:a, :two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:a, :two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:b, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:b, :two, :bar]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:b, :two, :baz]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:b, :one, :foo]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:c, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:c, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:c, :two, :foo]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:c, :two, :bar]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
209
- ],
210
- "metadata": {},
211
- "output_type": "pyout",
212
- "prompt_number": 4,
213
- "text": [
214
- "\n",
215
- "#<Daru::DataFrame:72070870 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 12>\n",
216
- " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
217
- "[:a, :one, 11 1 11 1 \n",
218
- "[:a, :one, 12 2 12 2 \n",
219
- "[:a, :two, 13 3 13 3 \n",
220
- "[:a, :two, 14 4 14 4 \n",
221
- "[:b, :one, 11 1 11 1 \n",
222
- "[:b, :two, 12 2 12 2 \n",
223
- "[:b, :two, 13 3 13 3 \n",
224
- "[:b, :one, 14 4 14 4 \n",
225
- "[:c, :one, 11 1 11 1 \n",
226
- "[:c, :one, 12 2 12 2 \n",
227
- "[:c, :two, 13 3 13 3 \n",
228
- "[:c, :two, 14 4 14 4 \n"
229
- ]
230
- }
231
- ],
232
- "prompt_number": 4
233
- },
234
- {
235
- "cell_type": "code",
236
- "collapsed": false,
237
- "input": [
238
- "# Specify complete tuple to choose a single row\n",
239
- "df_mi.row[:a, :one,:bar]"
240
- ],
241
- "language": "python",
242
- "metadata": {},
243
- "outputs": [
244
- {
245
- "html": [
246
- "<table><tr><th> </th><th>0</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td></tr><tr><td>[:a, :two, :baz]</td><td>1</td></tr><tr><td>[:b, :two, :foo]</td><td>11</td></tr><tr><td>[:b, :one, :foo]</td><td>1</td></tr></table>"
247
- ],
248
- "metadata": {},
249
- "output_type": "pyout",
250
- "prompt_number": 5,
251
- "text": [
252
- "\n",
253
- "#<Daru::Vector:77596600 @name = 0 @size = 4 >\n",
254
- " 0\n",
255
- "[:a, :one, :bar] 11\n",
256
- "[:a, :two, :baz] 1\n",
257
- "[:b, :two, :foo] 11\n",
258
- "[:b, :one, :foo] 1\n"
259
- ]
260
- }
261
- ],
262
- "prompt_number": 5
263
- },
264
- {
265
- "cell_type": "code",
266
- "collapsed": false,
267
- "input": [
268
- "# Specify partial tuple to select index hierarchially\n",
269
- "df_mi.row[:a]"
270
- ],
271
- "language": "python",
272
- "metadata": {},
273
- "outputs": [
274
- {
275
- "html": [
276
- "<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
277
- ],
278
- "metadata": {},
279
- "output_type": "pyout",
280
- "prompt_number": 6,
281
- "text": [
282
- "\n",
283
- "#<Daru::DataFrame:77518650 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 4>\n",
284
- " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
285
- "[:one, :ba 11 1 11 1 \n",
286
- "[:one, :ba 12 2 12 2 \n",
287
- "[:two, :ba 13 3 13 3 \n",
288
- "[:two, :ba 14 4 14 4 \n"
289
- ]
290
- }
291
- ],
292
- "prompt_number": 6
293
- },
294
- {
295
- "cell_type": "code",
296
- "collapsed": false,
297
- "input": [
298
- "# See grouped rows with the 'groups' method\n",
299
- "\n",
300
- "df = Daru::DataFrame.new({\n",
301
- " a: %w{foo bar foo bar foo bar foo foo},\n",
302
- " b: %w{one one two three two two one three},\n",
303
- " c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n",
304
- " d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n",
305
- "})\n",
306
- "grouped = df.group_by([:a, :b])\n",
307
- "grouped.groups"
308
- ],
309
- "language": "python",
310
- "metadata": {},
311
- "outputs": [
312
- {
313
- "metadata": {},
314
- "output_type": "pyout",
315
- "prompt_number": 7,
316
- "text": [
317
- "{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}"
318
- ]
319
- }
320
- ],
321
- "prompt_number": 7
322
- },
323
- {
324
- "cell_type": "code",
325
- "collapsed": false,
326
- "input": [
327
- "# First group by the columns :a and :b and then calculate mean of the grouped rows.\n",
328
- "grouped.mean"
329
- ],
330
- "language": "python",
331
- "metadata": {},
332
- "outputs": [
333
- {
334
- "html": [
335
- "<table><tr><th></th><th>c</th><th>d</th></tr><tr><td>[:bar, :one]</td><td>2</td><td>22</td></tr><tr><td>[:bar, :three]</td><td>1</td><td>44</td></tr><tr><td>[:bar, :two]</td><td>6</td><td>66</td></tr><tr><td>[:foo, :one]</td><td>2.0</td><td>44.0</td></tr><tr><td>[:foo, :three]</td><td>8</td><td>88</td></tr><tr><td>[:foo, :two]</td><td>3.0</td><td>44.0</td></tr></table>"
336
- ],
337
- "metadata": {},
338
- "output_type": "pyout",
339
- "prompt_number": 8,
340
- "text": [
341
- "\n",
342
- "#<Daru::DataFrame:77290860 @name = 4916cdd9-84c7-4f86-9a8d-0f128876e7cf @size = 6>\n",
343
- " c d \n",
344
- "[:bar, :on 2 22 \n",
345
- "[:bar, :th 1 44 \n",
346
- "[:bar, :tw 6 66 \n",
347
- "[:foo, :on 2.0 44.0 \n",
348
- "[:foo, :th 8 88 \n",
349
- "[:foo, :tw 3.0 44.0 \n"
350
- ]
351
- }
352
- ],
353
- "prompt_number": 8
354
- },
355
- {
356
- "cell_type": "code",
357
- "collapsed": false,
358
- "input": [
359
- "grouped.get_group([\"foo\", \"one\"])"
360
- ],
361
- "language": "python",
362
- "metadata": {},
363
- "outputs": [
364
- {
365
- "html": [
366
- "<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>0</td><td>foo</td><td>one</td><td>1</td><td>11</td></tr><tr><td>6</td><td>foo</td><td>one</td><td>3</td><td>77</td></tr></table>"
367
- ],
368
- "metadata": {},
369
- "output_type": "pyout",
370
- "prompt_number": 9,
371
- "text": [
372
- "\n",
373
- "#<Daru::DataFrame:77202350 @name = b5b75233-3de3-48e3-a646-ced6b736f064 @size = 2>\n",
374
- " a b c d \n",
375
- " 0 foo one 1 11 \n",
376
- " 6 foo one 3 77 \n"
377
- ]
378
- }
379
- ],
380
- "prompt_number": 9
381
- },
382
- {
383
- "cell_type": "code",
384
- "collapsed": false,
385
- "input": [
386
- "require 'daru'\n",
387
- "sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'"
388
- ],
389
- "language": "python",
390
- "metadata": {},
391
- "outputs": [
392
- {
393
- "html": [
394
- "<table><tr><th></th><th>account</th><th>manager</th><th>name</th><th>price</th><th>product</th><th>quantity</th><th>rep</th><th>status</th></tr><tr><td>0</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>30000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>1</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>10000</td><td>Software</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>2</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>5000</td><td>Maintenance</td><td>2</td><td>Craig Booker</td><td>pending</td></tr><tr><td>3</td><td>737550</td><td>Debra Henley</td><td>Fritsch, Russel and Anderson</td><td>35000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>declined</td></tr><tr><td>4</td><td>146832</td><td>Debra Henley</td><td>Kiehn-Spinka</td><td>65000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>won</td></tr><tr><td>5</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>40000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>pending</td></tr><tr><td>6</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>10000</td><td>Software</td><td>1</td><td>Daniel Hilton</td><td>presented</td></tr><tr><td>7</td><td>412290</td><td>Debra Henley</td><td>Jerde-Hilpert</td><td>5000</td><td>Maintenance</td><td>2</td><td>John Smith</td><td>pending</td></tr><tr><td>8</td><td>740150</td><td>Debra Henley</td><td>Barton LLC</td><td>35000</td><td>CPU</td><td>1</td><td>John Smith</td><td>declined</td></tr><tr><td>9</td><td>141962</td><td>Fred Anderson</td><td>Herman LLC</td><td>65000</td><td>CPU</td><td>2</td><td>Cedric Moss</td><td>won</td></tr><tr><td>10</td><td>163416</td><td>Fred Anderson</td><td>Purdy-Kunde</td><td>30000</td><td>CPU</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>11</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>5000</td><td>Maintenance</td><td>1</td><td>Cedric Moss</td><td>pending</td></tr><tr><td>12</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>10000</td><td>Software</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>13</td><td>307599</td><td>Fred Anderson</td><td>Kassulke, Ondricka and Metz</td><td>7000</td><td>Maintenance</td><td>3</td><td>Wendy Yule</td><td>won</td></tr><tr><td>14</td><td>688981</td><td>Fred Anderson</td><td>Keeling LLC</td><td>100000</td><td>CPU</td><td>5</td><td>Wendy Yule</td><td>won</td></tr><tr><td>15</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>65000</td><td>CPU</td><td>2</td><td>Wendy Yule</td><td>declined</td></tr><tr><td>16</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>5000</td><td>Monitor</td><td>2</td><td>Wendy Yule</td><td>presented</td></tr></table>"
395
- ],
396
- "metadata": {},
397
- "output_type": "pyout",
398
- "prompt_number": 10,
399
- "text": [
400
- "\n",
401
- "#<Daru::DataFrame:76599420 @name = 34c1c2a4-2a53-47d6-a863-3f4b05ffd9d7 @size = 17>\n",
402
- " account manager name price product quantity rep status \n",
403
- " 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n",
404
- " 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n",
405
- " 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n",
406
- " 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n",
407
- " 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n",
408
- " 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n",
409
- " 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n",
410
- " 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n",
411
- " 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n",
412
- " 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n",
413
- " 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n",
414
- " 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n",
415
- " 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n",
416
- " 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n",
417
- " 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n",
418
- " ... ... ... ... ... ... ... ... ... \n"
419
- ]
420
- }
421
- ],
422
- "prompt_number": 10
423
- },
424
- {
425
- "cell_type": "code",
426
- "collapsed": false,
427
- "input": [
428
- "sales.pivot_table index: [:manager, :rep]"
429
- ],
430
- "language": "python",
431
- "metadata": {},
432
- "outputs": [
433
- {
434
- "html": [
435
- "<table><tr><th></th><th>account</th><th>price</th><th>quantity</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>720237.0</td><td>20000.0</td><td>1.25</td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>194874.0</td><td>38333.333333333336</td><td>1.6666666666666667</td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>576220.0</td><td>20000.0</td><td>1.5</td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>196016.5</td><td>27500.0</td><td>1.25</td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>614061.5</td><td>44250.0</td><td>3.0</td></tr></table>"
436
- ],
437
- "metadata": {},
438
- "output_type": "pyout",
439
- "prompt_number": 11,
440
- "text": [
441
- "\n",
442
- "#<Daru::DataFrame:75394000 @name = ceeb0166-ed8a-4260-a1d7-f3743bbbbf66 @size = 5>\n",
443
- " account price quantity \n",
444
- "[:\"Debra H 720237.0 20000.0 1.25 \n",
445
- "[:\"Debra H 194874.0 38333.3333 1.66666666 \n",
446
- "[:\"Debra H 576220.0 20000.0 1.5 \n",
447
- "[:\"Fred An 196016.5 27500.0 1.25 \n",
448
- "[:\"Fred An 614061.5 44250.0 3.0 \n"
449
- ]
450
- }
451
- ],
452
- "prompt_number": 11
453
- },
454
- {
455
- "cell_type": "code",
456
- "collapsed": false,
457
- "input": [
458
- "sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)"
459
- ],
460
- "language": "python",
461
- "metadata": {},
462
- "outputs": [
463
- {
464
- "html": [
465
- "<table><tr><th></th><th>[:price, :CPU]</th><th>[:price, :Software]</th><th>[:price, :Maintenance]</th><th>[:price, :Monitor]</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>65000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>105000</td><td>10000</td><td></td><td></td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>35000</td><td></td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>95000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>165000</td><td></td><td>7000</td><td>5000</td></tr></table>"
466
- ],
467
- "metadata": {},
468
- "output_type": "pyout",
469
- "prompt_number": 12,
470
- "text": [
471
- "\n",
472
- "#<Daru::DataFrame:75196920 @name = 74c20bf1-708c-4b7f-87fa-540fa82ed3f3 @size = 5>\n",
473
- " [:price, : [:price, : [:price, : [:price, : \n",
474
- "[:\"Debra H 65000 10000 5000 nil \n",
475
- "[:\"Debra H 105000 10000 nil nil \n",
476
- "[:\"Debra H 35000 nil 5000 nil \n",
477
- "[:\"Fred An 95000 10000 5000 nil \n",
478
- "[:\"Fred An 165000 nil 7000 5000 \n"
479
- ]
480
- }
481
- ],
482
- "prompt_number": 12
483
- },
484
- {
485
- "cell_type": "code",
486
- "collapsed": false,
487
- "input": [
488
- "df = Daru::DataFrame.new({\n",
489
- " a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n",
490
- " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
491
- " c: ['small','large','large','small','small','large','small','large','small'],\n",
492
- " d: [-1,2,-2,3,-3,4,-5,6,7],\n",
493
- " e: [2,4,4,6,6,8,10,12,14]\n",
494
- " })\n",
495
- " df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])"
496
- ],
497
- "language": "python",
498
- "metadata": {},
499
- "outputs": [
500
- {
501
- "html": [
502
- "<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th><th>e</th></tr><tr><td>6</td><td>efgg</td><td>one</td><td>small</td><td>-5</td><td>10</td></tr><tr><td>4</td><td>efef</td><td>two</td><td>small</td><td>-3</td><td>6</td></tr><tr><td>1</td><td>fwwq</td><td>one</td><td>large</td><td>2</td><td>4</td></tr><tr><td>5</td><td>zzzz</td><td>one</td><td>large</td><td>4</td><td>8</td></tr><tr><td>2</td><td>efe</td><td>one</td><td>large</td><td>-2</td><td>4</td></tr><tr><td>8</td><td>ggf</td><td>two</td><td>small</td><td>7</td><td>14</td></tr><tr><td>0</td><td>ff</td><td>one</td><td>small</td><td>-1</td><td>2</td></tr><tr><td>3</td><td>a</td><td>two</td><td>small</td><td>3</td><td>6</td></tr><tr><td>7</td><td>q</td><td>two</td><td>large</td><td>6</td><td>12</td></tr></table>"
503
- ],
504
- "metadata": {},
505
- "output_type": "pyout",
506
- "prompt_number": 13,
507
- "text": [
508
- "\n",
509
- "#<Daru::DataFrame:74792710 @name = 9e02295c-d12a-4c0b-b0c5-f8be81327c66 @size = 9>\n",
510
- " a b c d e \n",
511
- " 6 efgg one small -5 10 \n",
512
- " 4 efef two small -3 6 \n",
513
- " 1 fwwq one large 2 4 \n",
514
- " 5 zzzz one large 4 8 \n",
515
- " 2 efe one large -2 4 \n",
516
- " 8 ggf two small 7 14 \n",
517
- " 0 ff one small -1 2 \n",
518
- " 3 a two small 3 6 \n",
519
- " 7 q two large 6 12 \n"
520
- ]
521
- }
522
- ],
523
- "prompt_number": 13
524
- }
525
- ],
526
- "metadata": {}
527
- }
528
- ]
529
- }