daru 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
metadata CHANGED
@@ -1,29 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-28 00:00:00.000000000 Z
11
+ date: 2015-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: reportbuilder
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: spreadsheet
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.3
13
41
  - !ruby/object:Gem::Dependency
14
42
  name: bundler
15
43
  requirement: !ruby/object:Gem::Requirement
16
44
  requirements:
17
- - - ">="
45
+ - - "~>"
18
46
  - !ruby/object:Gem::Version
19
- version: '0'
47
+ version: '1.10'
20
48
  type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - ">="
52
+ - - "~>"
25
53
  - !ruby/object:Gem::Version
26
- version: '0'
54
+ version: '1.10'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: rake
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -38,6 +66,20 @@ dependencies:
38
66
  - - ">="
39
67
  - !ruby/object:Gem::Version
40
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rserve-client
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
41
83
  - !ruby/object:Gem::Dependency
42
84
  name: rspec
43
85
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +112,16 @@ dependencies:
70
112
  name: nyaplot
71
113
  requirement: !ruby/object:Gem::Requirement
72
114
  requirements:
73
- - - ">="
115
+ - - "~>"
74
116
  - !ruby/object:Gem::Version
75
- version: '0'
117
+ version: 0.1.5
76
118
  type: :development
77
119
  prerelease: false
78
120
  version_requirements: !ruby/object:Gem::Requirement
79
121
  requirements:
80
- - - ">="
122
+ - - "~>"
81
123
  - !ruby/object:Gem::Version
82
- version: '0'
124
+ version: 0.1.5
83
125
  - !ruby/object:Gem::Dependency
84
126
  name: nmatrix
85
127
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +136,34 @@ dependencies:
94
136
  - - "~>"
95
137
  - !ruby/object:Gem::Version
96
138
  version: 0.1.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: distribution
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.7'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.7'
153
+ - !ruby/object:Gem::Dependency
154
+ name: gsl-nmatrix
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.17'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.17'
97
167
  description: "Daru (Data Analysis in RUby) is a library for analysis, manipulation
98
168
  and visualization\nof data.\n\nDaru works with Ruby arrays and NMatrix, thus working
99
169
  seamlessly accross\nruby interpreters, at the same time providing speed for those
@@ -104,13 +174,13 @@ executables: []
104
174
  extensions: []
105
175
  extra_rdoc_files: []
106
176
  files:
177
+ - ".build.sh"
107
178
  - ".gitignore"
108
179
  - ".rspec"
109
180
  - ".travis.yml"
110
181
  - CONTRIBUTING.md
111
182
  - Gemfile
112
- - Gemfile.lock
113
- - History.txt
183
+ - History.md
114
184
  - LICENSE
115
185
  - README.md
116
186
  - Rakefile
@@ -119,10 +189,12 @@ files:
119
189
  - lib/daru/accessors/array_wrapper.rb
120
190
  - lib/daru/accessors/dataframe_by_row.rb
121
191
  - lib/daru/accessors/dataframe_by_vector.rb
192
+ - lib/daru/accessors/gsl_wrapper.rb
122
193
  - lib/daru/accessors/mdarray_wrapper.rb
123
194
  - lib/daru/accessors/nmatrix_wrapper.rb
124
195
  - lib/daru/core/group_by.rb
125
196
  - lib/daru/dataframe.rb
197
+ - lib/daru/extensions/rserve.rb
126
198
  - lib/daru/index.rb
127
199
  - lib/daru/io/io.rb
128
200
  - lib/daru/maths/arithmetic/dataframe.rb
@@ -134,16 +206,20 @@ files:
134
206
  - lib/daru/plotting/dataframe.rb
135
207
  - lib/daru/plotting/vector.rb
136
208
  - lib/daru/vector.rb
137
- - lib/version.rb
138
- - notebooks/grouping_splitting_pivots.ipynb
139
- - notebooks/intro_with_music_data_.ipynb
209
+ - lib/daru/version.rb
140
210
  - spec/accessors/wrappers_spec.rb
141
211
  - spec/core/group_by_spec.rb
212
+ - spec/daru_spec.rb
142
213
  - spec/dataframe_spec.rb
214
+ - spec/extensions/rserve_spec.rb
215
+ - spec/fixtures/bank2.dat
143
216
  - spec/fixtures/countries.json
144
217
  - spec/fixtures/matrix_test.csv
145
218
  - spec/fixtures/music_data.tsv
219
+ - spec/fixtures/repeated_fields.csv
146
220
  - spec/fixtures/sales-funnel.csv
221
+ - spec/fixtures/scientific_notation.csv
222
+ - spec/fixtures/test_xls.xls
147
223
  - spec/index_spec.rb
148
224
  - spec/io/io_spec.rb
149
225
  - spec/math/arithmetic/dataframe_spec.rb
@@ -174,18 +250,24 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
250
  version: '0'
175
251
  requirements: []
176
252
  rubyforge_project:
177
- rubygems_version: 2.2.0
253
+ rubygems_version: 2.4.6
178
254
  signing_key:
179
255
  specification_version: 4
180
256
  summary: Data Analysis in RUby
181
257
  test_files:
182
258
  - spec/accessors/wrappers_spec.rb
183
259
  - spec/core/group_by_spec.rb
260
+ - spec/daru_spec.rb
184
261
  - spec/dataframe_spec.rb
262
+ - spec/extensions/rserve_spec.rb
263
+ - spec/fixtures/bank2.dat
185
264
  - spec/fixtures/countries.json
186
265
  - spec/fixtures/matrix_test.csv
187
266
  - spec/fixtures/music_data.tsv
267
+ - spec/fixtures/repeated_fields.csv
188
268
  - spec/fixtures/sales-funnel.csv
269
+ - spec/fixtures/scientific_notation.csv
270
+ - spec/fixtures/test_xls.xls
189
271
  - spec/index_spec.rb
190
272
  - spec/io/io_spec.rb
191
273
  - spec/math/arithmetic/dataframe_spec.rb
@@ -196,3 +278,4 @@ test_files:
196
278
  - spec/multi_index_spec.rb
197
279
  - spec/spec_helper.rb
198
280
  - spec/vector_spec.rb
281
+ has_rdoc:
@@ -1,3 +0,0 @@
1
- module Daru
2
- VERSION = "0.0.5"
3
- end
@@ -1,529 +0,0 @@
1
- {
2
- "metadata": {
3
- "language": "ruby",
4
- "name": "",
5
- "signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e"
6
- },
7
- "nbformat": 3,
8
- "nbformat_minor": 0,
9
- "worksheets": [
10
- {
11
- "cells": [
12
- {
13
- "cell_type": "code",
14
- "collapsed": false,
15
- "input": [
16
- "require 'daru'\n",
17
- "\n",
18
- "df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n",
19
- "df.plot legends: [:a, :b], type: :line do |p,d|\n",
20
- " p.yrange [0,100]\n",
21
- " p.legend true\n",
22
- " d.color \"green\"\n",
23
- "end"
24
- ],
25
- "language": "python",
26
- "metadata": {},
27
- "outputs": [
28
- {
29
- "html": [
30
- "<script type='text/javascript'>if(window['d3'] === undefined ||\n",
31
- " window['Nyaplot'] === undefined){\n",
32
- " var path = {\"d3\":\"http://d3js.org/d3.v3.min\"};\n",
33
- "\n",
34
- "\n",
35
- "\n",
36
- " var shim = {\"d3\":{\"exports\":\"d3\"}};\n",
37
- "\n",
38
- " require.config({paths: path, shim:shim});\n",
39
- "\n",
40
- "\n",
41
- "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\n",
42
- "\n",
43
- "\tvar script = d3.select(\"head\")\n",
44
- "\t .append(\"script\")\n",
45
- "\t .attr(\"src\", \"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n",
46
- "\t .attr(\"async\", true);\n",
47
- "\n",
48
- "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n",
49
- "\n",
50
- "\n",
51
- "\t var event = document.createEvent(\"HTMLEvents\");\n",
52
- "\t event.initEvent(\"load_nyaplot\",false,false);\n",
53
- "\t window.dispatchEvent(event);\n",
54
- "\t console.log('Finished loading Nyaplotjs');\n",
55
- "\n",
56
- "\t};\n",
57
- "\n",
58
- "\n",
59
- "});\n",
60
- "}\n",
61
- "</script>"
62
- ],
63
- "metadata": {},
64
- "output_type": "pyout",
65
- "prompt_number": 1,
66
- "text": [
67
- "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\""
68
- ]
69
- },
70
- {
71
- "html": [
72
- "<div id='vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1'></div>\n",
73
- "<script>\n",
74
- "(function(){\n",
75
- " var render = function(){\n",
76
- " var model = {\"panes\":[{\"diagrams\":[{\"type\":\"line\",\"options\":{\"x\":\"a\",\"y\":\"b\",\"color\":\"green\"},\"data\":\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}],\"options\":{\"yrange\":[0,100],\"legend\":true,\"zoom\":true,\"width\":800,\"xrange\":[1,5]}}],\"data\":{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\":[{\"a\":1,\"b\":10},{\"a\":2,\"b\":14},{\"a\":3,\"b\":15},{\"a\":4,\"b\":17},{\"a\":5,\"b\":44}]},\"extension\":[]}\n",
77
- " Nyaplot.core.parse(model, '#vis-e8fbebaa-7e5b-44cd-bf8f-1d4080d079d1');\n",
78
- " };\n",
79
- " if(window['Nyaplot']==undefined){\n",
80
- " window.addEventListener('load_nyaplot', render, false);\n",
81
- "\treturn;\n",
82
- " } else {\n",
83
- " render();\n",
84
- " }\n",
85
- "})();\n",
86
- "</script>\n"
87
- ],
88
- "metadata": {},
89
- "output_type": "pyout",
90
- "prompt_number": 1,
91
- "text": [
92
- "#<Nyaplot::Frame:0x8ac8fd4 @properties={:panes=>[#<Nyaplot::Plot:0x8acb25c @properties={:diagrams=>[#<Nyaplot::Diagram:0x8ac97cc @properties={:type=>:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#<Nyaplot::DataFrame:0x8ac9d1c @name=\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\", @rows=[{:a=>1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>"
93
- ]
94
- }
95
- ],
96
- "prompt_number": 1
97
- },
98
- {
99
- "cell_type": "code",
100
- "collapsed": false,
101
- "input": [
102
- "require 'daru'\n",
103
- "# Calculate statistics of numeric columns\n",
104
- "df = Daru::DataFrame.new({\n",
105
- " a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n",
106
- " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
107
- " c: ['small','large','large','small','small','large','small','large','small'],\n",
108
- " d: [1,2,2,3,3,4,5,6,7],\n",
109
- " e: [2,4,4,6,6,8,10,12,14],\n",
110
- " f: [10,20,20,30,30,40,50,60,70]\n",
111
- " })\n",
112
- "df.mean"
113
- ],
114
- "language": "python",
115
- "metadata": {},
116
- "outputs": [
117
- {
118
- "html": [
119
- "<table><tr><th> </th><th>nil</th></tr><tr><td>d</td><td>3.6666666666666665</td></tr><tr><td>e</td><td>7.333333333333333</td></tr><tr><td>f</td><td>36.666666666666664</td></tr></table>"
120
- ],
121
- "metadata": {},
122
- "output_type": "pyout",
123
- "prompt_number": 2,
124
- "text": [
125
- "\n",
126
- "#<Daru::Vector:72633550 @name = nil @size = 3 >\n",
127
- " nil\n",
128
- " d 3.6666666666666665\n",
129
- " e 7.333333333333333\n",
130
- " f 36.666666666666664\n"
131
- ]
132
- }
133
- ],
134
- "prompt_number": 2
135
- },
136
- {
137
- "cell_type": "code",
138
- "collapsed": false,
139
- "input": [
140
- "# Calculate multiple statistical measures in one shot\n",
141
- "df.describe"
142
- ],
143
- "language": "python",
144
- "metadata": {},
145
- "outputs": [
146
- {
147
- "html": [
148
- "<table><tr><th></th><th>d</th><th>e</th><th>f</th></tr><tr><td>count</td><td>9</td><td>9</td><td>9</td></tr><tr><td>mean</td><td>3.6666666666666665</td><td>7.333333333333333</td><td>36.666666666666664</td></tr><tr><td>std</td><td>2.0</td><td>4.0</td><td>20.0</td></tr><tr><td>min</td><td>1</td><td>2</td><td>10</td></tr><tr><td>max</td><td>7</td><td>14</td><td>70</td></tr></table>"
149
- ],
150
- "metadata": {},
151
- "output_type": "pyout",
152
- "prompt_number": 3,
153
- "text": [
154
- "\n",
155
- "#<Daru::DataFrame:72528680 @name = c992bbe4-8948-46f1-bdd4-af2e117e94ac @size = 5>\n",
156
- " d e f \n",
157
- " count 9 9 9 \n",
158
- " mean 3.66666666 7.33333333 36.6666666 \n",
159
- " std 2.0 4.0 20.0 \n",
160
- " min 1 2 10 \n",
161
- " max 7 14 70 \n"
162
- ]
163
- }
164
- ],
165
- "prompt_number": 3
166
- },
167
- {
168
- "cell_type": "code",
169
- "collapsed": false,
170
- "input": [
171
- "# Create a multi-indexed DataFrame\n",
172
- "tuples = [\n",
173
- " [:a,:one,:bar],\n",
174
- " [:a,:one,:baz],\n",
175
- " [:a,:two,:bar],\n",
176
- " [:a,:two,:baz],\n",
177
- " [:b,:one,:bar],\n",
178
- " [:b,:two,:bar],\n",
179
- " [:b,:two,:baz],\n",
180
- " [:b,:one,:foo],\n",
181
- " [:c,:one,:bar],\n",
182
- " [:c,:one,:baz],\n",
183
- " [:c,:two,:foo],\n",
184
- " [:c,:two,:bar]\n",
185
- "]\n",
186
- "multi_index = Daru::MultiIndex.new(tuples)\n",
187
- "\n",
188
- "vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n",
189
- "vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n",
190
- "\n",
191
- "order_mi = Daru::MultiIndex.new([\n",
192
- " [:a,:one,:bar],\n",
193
- " [:a,:two,:baz],\n",
194
- " [:b,:two,:foo],\n",
195
- " [:b,:one,:foo]])\n",
196
- "\n",
197
- "df_mi = Daru::DataFrame.new([\n",
198
- " vector_arry1, \n",
199
- " vector_arry2, \n",
200
- " vector_arry1, \n",
201
- " vector_arry2], order: order_mi, index: multi_index)"
202
- ],
203
- "language": "python",
204
- "metadata": {},
205
- "outputs": [
206
- {
207
- "html": [
208
- "<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:a, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:a, :two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:a, :two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:b, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:b, :two, :bar]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:b, :two, :baz]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:b, :one, :foo]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr><tr><td>[:c, :one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:c, :one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:c, :two, :foo]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:c, :two, :bar]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
209
- ],
210
- "metadata": {},
211
- "output_type": "pyout",
212
- "prompt_number": 4,
213
- "text": [
214
- "\n",
215
- "#<Daru::DataFrame:72070870 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 12>\n",
216
- " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
217
- "[:a, :one, 11 1 11 1 \n",
218
- "[:a, :one, 12 2 12 2 \n",
219
- "[:a, :two, 13 3 13 3 \n",
220
- "[:a, :two, 14 4 14 4 \n",
221
- "[:b, :one, 11 1 11 1 \n",
222
- "[:b, :two, 12 2 12 2 \n",
223
- "[:b, :two, 13 3 13 3 \n",
224
- "[:b, :one, 14 4 14 4 \n",
225
- "[:c, :one, 11 1 11 1 \n",
226
- "[:c, :one, 12 2 12 2 \n",
227
- "[:c, :two, 13 3 13 3 \n",
228
- "[:c, :two, 14 4 14 4 \n"
229
- ]
230
- }
231
- ],
232
- "prompt_number": 4
233
- },
234
- {
235
- "cell_type": "code",
236
- "collapsed": false,
237
- "input": [
238
- "# Specify complete tuple to choose a single row\n",
239
- "df_mi.row[:a, :one,:bar]"
240
- ],
241
- "language": "python",
242
- "metadata": {},
243
- "outputs": [
244
- {
245
- "html": [
246
- "<table><tr><th> </th><th>0</th></tr><tr><td>[:a, :one, :bar]</td><td>11</td></tr><tr><td>[:a, :two, :baz]</td><td>1</td></tr><tr><td>[:b, :two, :foo]</td><td>11</td></tr><tr><td>[:b, :one, :foo]</td><td>1</td></tr></table>"
247
- ],
248
- "metadata": {},
249
- "output_type": "pyout",
250
- "prompt_number": 5,
251
- "text": [
252
- "\n",
253
- "#<Daru::Vector:77596600 @name = 0 @size = 4 >\n",
254
- " 0\n",
255
- "[:a, :one, :bar] 11\n",
256
- "[:a, :two, :baz] 1\n",
257
- "[:b, :two, :foo] 11\n",
258
- "[:b, :one, :foo] 1\n"
259
- ]
260
- }
261
- ],
262
- "prompt_number": 5
263
- },
264
- {
265
- "cell_type": "code",
266
- "collapsed": false,
267
- "input": [
268
- "# Specify partial tuple to select index hierarchially\n",
269
- "df_mi.row[:a]"
270
- ],
271
- "language": "python",
272
- "metadata": {},
273
- "outputs": [
274
- {
275
- "html": [
276
- "<table><tr><th></th><th>[:a, :one, :bar]</th><th>[:a, :two, :baz]</th><th>[:b, :two, :foo]</th><th>[:b, :one, :foo]</th></tr><tr><td>[:one, :bar]</td><td>11</td><td>1</td><td>11</td><td>1</td></tr><tr><td>[:one, :baz]</td><td>12</td><td>2</td><td>12</td><td>2</td></tr><tr><td>[:two, :bar]</td><td>13</td><td>3</td><td>13</td><td>3</td></tr><tr><td>[:two, :baz]</td><td>14</td><td>4</td><td>14</td><td>4</td></tr></table>"
277
- ],
278
- "metadata": {},
279
- "output_type": "pyout",
280
- "prompt_number": 6,
281
- "text": [
282
- "\n",
283
- "#<Daru::DataFrame:77518650 @name = f8812b5e-bd28-4e32-9173-911514741388 @size = 4>\n",
284
- " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
285
- "[:one, :ba 11 1 11 1 \n",
286
- "[:one, :ba 12 2 12 2 \n",
287
- "[:two, :ba 13 3 13 3 \n",
288
- "[:two, :ba 14 4 14 4 \n"
289
- ]
290
- }
291
- ],
292
- "prompt_number": 6
293
- },
294
- {
295
- "cell_type": "code",
296
- "collapsed": false,
297
- "input": [
298
- "# See grouped rows with the 'groups' method\n",
299
- "\n",
300
- "df = Daru::DataFrame.new({\n",
301
- " a: %w{foo bar foo bar foo bar foo foo},\n",
302
- " b: %w{one one two three two two one three},\n",
303
- " c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n",
304
- " d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n",
305
- "})\n",
306
- "grouped = df.group_by([:a, :b])\n",
307
- "grouped.groups"
308
- ],
309
- "language": "python",
310
- "metadata": {},
311
- "outputs": [
312
- {
313
- "metadata": {},
314
- "output_type": "pyout",
315
- "prompt_number": 7,
316
- "text": [
317
- "{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}"
318
- ]
319
- }
320
- ],
321
- "prompt_number": 7
322
- },
323
- {
324
- "cell_type": "code",
325
- "collapsed": false,
326
- "input": [
327
- "# First group by the columns :a and :b and then calculate mean of the grouped rows.\n",
328
- "grouped.mean"
329
- ],
330
- "language": "python",
331
- "metadata": {},
332
- "outputs": [
333
- {
334
- "html": [
335
- "<table><tr><th></th><th>c</th><th>d</th></tr><tr><td>[:bar, :one]</td><td>2</td><td>22</td></tr><tr><td>[:bar, :three]</td><td>1</td><td>44</td></tr><tr><td>[:bar, :two]</td><td>6</td><td>66</td></tr><tr><td>[:foo, :one]</td><td>2.0</td><td>44.0</td></tr><tr><td>[:foo, :three]</td><td>8</td><td>88</td></tr><tr><td>[:foo, :two]</td><td>3.0</td><td>44.0</td></tr></table>"
336
- ],
337
- "metadata": {},
338
- "output_type": "pyout",
339
- "prompt_number": 8,
340
- "text": [
341
- "\n",
342
- "#<Daru::DataFrame:77290860 @name = 4916cdd9-84c7-4f86-9a8d-0f128876e7cf @size = 6>\n",
343
- " c d \n",
344
- "[:bar, :on 2 22 \n",
345
- "[:bar, :th 1 44 \n",
346
- "[:bar, :tw 6 66 \n",
347
- "[:foo, :on 2.0 44.0 \n",
348
- "[:foo, :th 8 88 \n",
349
- "[:foo, :tw 3.0 44.0 \n"
350
- ]
351
- }
352
- ],
353
- "prompt_number": 8
354
- },
355
- {
356
- "cell_type": "code",
357
- "collapsed": false,
358
- "input": [
359
- "grouped.get_group([\"foo\", \"one\"])"
360
- ],
361
- "language": "python",
362
- "metadata": {},
363
- "outputs": [
364
- {
365
- "html": [
366
- "<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><td>0</td><td>foo</td><td>one</td><td>1</td><td>11</td></tr><tr><td>6</td><td>foo</td><td>one</td><td>3</td><td>77</td></tr></table>"
367
- ],
368
- "metadata": {},
369
- "output_type": "pyout",
370
- "prompt_number": 9,
371
- "text": [
372
- "\n",
373
- "#<Daru::DataFrame:77202350 @name = b5b75233-3de3-48e3-a646-ced6b736f064 @size = 2>\n",
374
- " a b c d \n",
375
- " 0 foo one 1 11 \n",
376
- " 6 foo one 3 77 \n"
377
- ]
378
- }
379
- ],
380
- "prompt_number": 9
381
- },
382
- {
383
- "cell_type": "code",
384
- "collapsed": false,
385
- "input": [
386
- "require 'daru'\n",
387
- "sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'"
388
- ],
389
- "language": "python",
390
- "metadata": {},
391
- "outputs": [
392
- {
393
- "html": [
394
- "<table><tr><th></th><th>account</th><th>manager</th><th>name</th><th>price</th><th>product</th><th>quantity</th><th>rep</th><th>status</th></tr><tr><td>0</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>30000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>1</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>10000</td><td>Software</td><td>1</td><td>Craig Booker</td><td>presented</td></tr><tr><td>2</td><td>714466</td><td>Debra Henley</td><td>Trantow-Barrows</td><td>5000</td><td>Maintenance</td><td>2</td><td>Craig Booker</td><td>pending</td></tr><tr><td>3</td><td>737550</td><td>Debra Henley</td><td>Fritsch, Russel and Anderson</td><td>35000</td><td>CPU</td><td>1</td><td>Craig Booker</td><td>declined</td></tr><tr><td>4</td><td>146832</td><td>Debra Henley</td><td>Kiehn-Spinka</td><td>65000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>won</td></tr><tr><td>5</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>40000</td><td>CPU</td><td>2</td><td>Daniel Hilton</td><td>pending</td></tr><tr><td>6</td><td>218895</td><td>Debra Henley</td><td>Kulas Inc</td><td>10000</td><td>Software</td><td>1</td><td>Daniel Hilton</td><td>presented</td></tr><tr><td>7</td><td>412290</td><td>Debra Henley</td><td>Jerde-Hilpert</td><td>5000</td><td>Maintenance</td><td>2</td><td>John Smith</td><td>pending</td></tr><tr><td>8</td><td>740150</td><td>Debra Henley</td><td>Barton LLC</td><td>35000</td><td>CPU</td><td>1</td><td>John Smith</td><td>declined</td></tr><tr><td>9</td><td>141962</td><td>Fred Anderson</td><td>Herman LLC</td><td>65000</td><td>CPU</td><td>2</td><td>Cedric Moss</td><td>won</td></tr><tr><td>10</td><td>163416</td><td>Fred Anderson</td><td>Purdy-Kunde</td><td>30000</td><td>CPU</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>11</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>5000</td><td>Maintenance</td><td>1</td><td>Cedric Moss</td><td>pending</td></tr><tr><td>12</td><td>239344</td><td>Fred Anderson</td><td>Stokes LLC</td><td>10000</td><td>Software</td><td>1</td><td>Cedric Moss</td><td>presented</td></tr><tr><td>13</td><td>307599</td><td>Fred Anderson</td><td>Kassulke, Ondricka and Metz</td><td>7000</td><td>Maintenance</td><td>3</td><td>Wendy Yule</td><td>won</td></tr><tr><td>14</td><td>688981</td><td>Fred Anderson</td><td>Keeling LLC</td><td>100000</td><td>CPU</td><td>5</td><td>Wendy Yule</td><td>won</td></tr><tr><td>15</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>65000</td><td>CPU</td><td>2</td><td>Wendy Yule</td><td>declined</td></tr><tr><td>16</td><td>729833</td><td>Fred Anderson</td><td>Koepp Ltd</td><td>5000</td><td>Monitor</td><td>2</td><td>Wendy Yule</td><td>presented</td></tr></table>"
395
- ],
396
- "metadata": {},
397
- "output_type": "pyout",
398
- "prompt_number": 10,
399
- "text": [
400
- "\n",
401
- "#<Daru::DataFrame:76599420 @name = 34c1c2a4-2a53-47d6-a863-3f4b05ffd9d7 @size = 17>\n",
402
- " account manager name price product quantity rep status \n",
403
- " 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n",
404
- " 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n",
405
- " 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n",
406
- " 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n",
407
- " 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n",
408
- " 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n",
409
- " 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n",
410
- " 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n",
411
- " 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n",
412
- " 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n",
413
- " 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n",
414
- " 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n",
415
- " 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n",
416
- " 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n",
417
- " 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n",
418
- " ... ... ... ... ... ... ... ... ... \n"
419
- ]
420
- }
421
- ],
422
- "prompt_number": 10
423
- },
424
- {
425
- "cell_type": "code",
426
- "collapsed": false,
427
- "input": [
428
- "sales.pivot_table index: [:manager, :rep]"
429
- ],
430
- "language": "python",
431
- "metadata": {},
432
- "outputs": [
433
- {
434
- "html": [
435
- "<table><tr><th></th><th>account</th><th>price</th><th>quantity</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>720237.0</td><td>20000.0</td><td>1.25</td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>194874.0</td><td>38333.333333333336</td><td>1.6666666666666667</td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>576220.0</td><td>20000.0</td><td>1.5</td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>196016.5</td><td>27500.0</td><td>1.25</td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>614061.5</td><td>44250.0</td><td>3.0</td></tr></table>"
436
- ],
437
- "metadata": {},
438
- "output_type": "pyout",
439
- "prompt_number": 11,
440
- "text": [
441
- "\n",
442
- "#<Daru::DataFrame:75394000 @name = ceeb0166-ed8a-4260-a1d7-f3743bbbbf66 @size = 5>\n",
443
- " account price quantity \n",
444
- "[:\"Debra H 720237.0 20000.0 1.25 \n",
445
- "[:\"Debra H 194874.0 38333.3333 1.66666666 \n",
446
- "[:\"Debra H 576220.0 20000.0 1.5 \n",
447
- "[:\"Fred An 196016.5 27500.0 1.25 \n",
448
- "[:\"Fred An 614061.5 44250.0 3.0 \n"
449
- ]
450
- }
451
- ],
452
- "prompt_number": 11
453
- },
454
- {
455
- "cell_type": "code",
456
- "collapsed": false,
457
- "input": [
458
- "sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)"
459
- ],
460
- "language": "python",
461
- "metadata": {},
462
- "outputs": [
463
- {
464
- "html": [
465
- "<table><tr><th></th><th>[:price, :CPU]</th><th>[:price, :Software]</th><th>[:price, :Maintenance]</th><th>[:price, :Monitor]</th></tr><tr><td>[:\"Debra Henley\", :\"Craig Booker\"]</td><td>65000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Debra Henley\", :\"Daniel Hilton\"]</td><td>105000</td><td>10000</td><td></td><td></td></tr><tr><td>[:\"Debra Henley\", :\"John Smith\"]</td><td>35000</td><td></td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Cedric Moss\"]</td><td>95000</td><td>10000</td><td>5000</td><td></td></tr><tr><td>[:\"Fred Anderson\", :\"Wendy Yule\"]</td><td>165000</td><td></td><td>7000</td><td>5000</td></tr></table>"
466
- ],
467
- "metadata": {},
468
- "output_type": "pyout",
469
- "prompt_number": 12,
470
- "text": [
471
- "\n",
472
- "#<Daru::DataFrame:75196920 @name = 74c20bf1-708c-4b7f-87fa-540fa82ed3f3 @size = 5>\n",
473
- " [:price, : [:price, : [:price, : [:price, : \n",
474
- "[:\"Debra H 65000 10000 5000 nil \n",
475
- "[:\"Debra H 105000 10000 nil nil \n",
476
- "[:\"Debra H 35000 nil 5000 nil \n",
477
- "[:\"Fred An 95000 10000 5000 nil \n",
478
- "[:\"Fred An 165000 nil 7000 5000 \n"
479
- ]
480
- }
481
- ],
482
- "prompt_number": 12
483
- },
484
- {
485
- "cell_type": "code",
486
- "collapsed": false,
487
- "input": [
488
- "df = Daru::DataFrame.new({\n",
489
- " a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n",
490
- " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
491
- " c: ['small','large','large','small','small','large','small','large','small'],\n",
492
- " d: [-1,2,-2,3,-3,4,-5,6,7],\n",
493
- " e: [2,4,4,6,6,8,10,12,14]\n",
494
- " })\n",
495
- " df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])"
496
- ],
497
- "language": "python",
498
- "metadata": {},
499
- "outputs": [
500
- {
501
- "html": [
502
- "<table><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th><th>e</th></tr><tr><td>6</td><td>efgg</td><td>one</td><td>small</td><td>-5</td><td>10</td></tr><tr><td>4</td><td>efef</td><td>two</td><td>small</td><td>-3</td><td>6</td></tr><tr><td>1</td><td>fwwq</td><td>one</td><td>large</td><td>2</td><td>4</td></tr><tr><td>5</td><td>zzzz</td><td>one</td><td>large</td><td>4</td><td>8</td></tr><tr><td>2</td><td>efe</td><td>one</td><td>large</td><td>-2</td><td>4</td></tr><tr><td>8</td><td>ggf</td><td>two</td><td>small</td><td>7</td><td>14</td></tr><tr><td>0</td><td>ff</td><td>one</td><td>small</td><td>-1</td><td>2</td></tr><tr><td>3</td><td>a</td><td>two</td><td>small</td><td>3</td><td>6</td></tr><tr><td>7</td><td>q</td><td>two</td><td>large</td><td>6</td><td>12</td></tr></table>"
503
- ],
504
- "metadata": {},
505
- "output_type": "pyout",
506
- "prompt_number": 13,
507
- "text": [
508
- "\n",
509
- "#<Daru::DataFrame:74792710 @name = 9e02295c-d12a-4c0b-b0c5-f8be81327c66 @size = 9>\n",
510
- " a b c d e \n",
511
- " 6 efgg one small -5 10 \n",
512
- " 4 efef two small -3 6 \n",
513
- " 1 fwwq one large 2 4 \n",
514
- " 5 zzzz one large 4 8 \n",
515
- " 2 efe one large -2 4 \n",
516
- " 8 ggf two small 7 14 \n",
517
- " 0 ff one small -1 2 \n",
518
- " 3 a two small 3 6 \n",
519
- " 7 q two large 6 12 \n"
520
- ]
521
- }
522
- ],
523
- "prompt_number": 13
524
- }
525
- ],
526
- "metadata": {}
527
- }
528
- ]
529
- }