daru 0.0.2.2 → 0.0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6266fb09be80748530baa8b92aface148f47bac6
4
- data.tar.gz: 37f2301d6b3fed23b15e11c5608892935befb04e
3
+ metadata.gz: e425d7fab01db79087549701e3e9e95df2b495d1
4
+ data.tar.gz: f3b186c9405b1fb14fb7b95a2ecde7a291decbb4
5
5
  SHA512:
6
- metadata.gz: fa67c4506dea445619c1cf79270c5194cd189da302705b8d0dcad6689b3c3717f08bc11ce15af9670c7245fc746b3ce8feb0e82ea22c69aa06bf8da15003f947
7
- data.tar.gz: 17d35384cf2daf28eca33aef370789edf7854da821094207e3eda9732933889ce23a44777e9aa451e62c732048a472d74378affb97e1a193cfe235426d83428e
6
+ metadata.gz: fd89ad49623169a8caf335746094a38e2613864ec05f059488773285deaff87eaa5a8a68b6f60a2a12e37f5ad29cc800e8cdb7b0fba12eeb3a21b9bb3dbcca69
7
+ data.tar.gz: 0b1458e1df11590ca858e9350503eef9a29420661ea92d1c73907ee47c33879637260f489bd3451aa9d7774d69e20b639e17ecdfac339c18f710c6ba25955908
data/History.txt CHANGED
@@ -0,0 +1,17 @@
1
+ == 0.0.1
2
+ * Added classes for DataFrame and Vector alongwith some super-basic functions to get off the ground
3
+
4
+ == 0.0.2
5
+ * Added iterators for dataframe and vector alongwith printing functions (to_html) to interface properly with iRuby notebook.
6
+
7
+ == 0.0.2.1
8
+ * Fixed bugs with previous code and more iterators
9
+
10
+ == 0.0.2.2
11
+ * Added test cases and multiple column access through the [] operator on DataFrames
12
+
13
+ == 0.0.2.3
14
+ * Added #filter\_rows and #delete_row to DataFrame and changed #row to return a row containing a Hash of column name and value.
15
+ * Vector objects passed into a DataFrame are now duplicated so that any changes dont affect the original vector.
16
+ * Added an optional opts argument to DataFrame.
17
+ * Sending more fields than vectors in DataFrame will cause addition of nil vectors.
data/README.md CHANGED
@@ -31,12 +31,11 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
31
31
 
32
32
  ## Roadmap
33
33
 
34
- * Print dataframe in same order as fields.
35
- * Return rows in same order as that of fields.
36
34
  * Automate testing for both MRI and JRuby.
37
35
  * Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
38
36
  * Add support for missing values in vectors.
39
- * Add normal and destructive map iterators.
37
+ * Destructive version #filter\_rows!
38
+ * NMatrix.first should return NMatrix (in vector).
40
39
  * Completely test all functionality for NMatrix and MDArray.
41
40
  * Basic Data manipulation and analysis operations:
42
41
  - Different kinds of join operations
@@ -51,4 +50,5 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
51
50
  - Runtime alteration of index.
52
51
  * Indexing on DataFrame.
53
52
  * Vector arithmetic - elementwise addition, subtraction, multiplication, division.
54
- * Transpose a dataframe.
53
+ * Transpose a dataframe.
54
+ * Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
data/lib/daru.rb CHANGED
@@ -3,4 +3,5 @@ require 'matrix'
3
3
  require 'csv'
4
4
 
5
5
  require 'daru/vector.rb'
6
- require 'daru/dataframe.rb'
6
+ require 'daru/dataframe.rb'
7
+ require 'daru/monkeys.rb'
@@ -9,18 +9,25 @@ module Daru
9
9
 
10
10
  attr_reader :name
11
11
 
12
- def initialize source, fields=[], name=SecureRandom.uuid
12
+ def initialize source, fields=[], name=SecureRandom.uuid, opts={}
13
+ @opts = opts
14
+ set_default_opts
15
+
13
16
  if source.empty?
14
17
  @vectors = fields.inject({}){ |a,x| a[x]=Daru::Vector.new; a}
15
18
  else
16
- @vectors = source
19
+ @vectors = source.inject({}) do |acc, h|
20
+ acc[h[0]] = h[1].dv.dup
21
+ acc
22
+ end
17
23
  end
18
24
 
19
25
  @fields = fields.empty? ? source.keys.sort : fields
20
26
  @name = name
21
-
27
+
22
28
  check_length
23
- set_fields_order if @vectors.keys.sort != @fields.sort
29
+ set_missing_vectors if @vectors.keys.size < @fields.size
30
+ set_fields_order if @vectors.keys.sort != @fields.sort
24
31
  set_vector_names
25
32
  end
26
33
 
@@ -53,20 +60,36 @@ module Daru
53
60
  @vectors[name]
54
61
  end
55
62
 
56
- def delete name
63
+ def delete_vector name
57
64
  @vectors.delete name
58
65
  @fields.delete name
59
66
  end
60
67
 
61
- # def filter_rows
62
-
63
- # end
68
+ alias_method :delete, :delete_vector
64
69
 
65
- # def filter_columns
66
-
67
- # end
70
+ def delete_row index
71
+ # TODO: Make this work with NMatrix and MDArray
72
+ raise "Expected index less than size." if index > @size
73
+
74
+ @fields.each do |field|
75
+ @vectors[field].delete index
76
+ end
77
+ puts @vectors
78
+ end
79
+
80
+ def filter_rows name=self.name, &block
81
+ df = DataFrame.new({}, @fields, name)
82
+
83
+ self.each_row do |row|
84
+ keep_row = yield row
85
+
86
+ df.insert_row(row.values) if keep_row
87
+ end
68
88
 
69
- def [](*name)
89
+ df
90
+ end
91
+
92
+ def [] *name
70
93
  unless name[1]
71
94
  return column(name[0])
72
95
  end
@@ -81,21 +104,21 @@ module Daru
81
104
  DataFrame.new h, req_fields, @name
82
105
  end
83
106
 
84
- def ==(other)
107
+ def == other
85
108
  @name == other.name and @vectors == other.vectors and
86
109
  @size == other.size and @fields == other.fields
87
110
  end
88
111
 
89
- def []=(name, vector)
112
+ def []= name, vector
90
113
  insert_vector name, vector
91
114
  end
92
115
 
93
116
  def row index
94
117
  raise Exception, "Expected index to be within bounds" if index > @size
95
118
 
96
- row = []
97
- self.each_column do |column|
98
- row << column[index]
119
+ row = {}
120
+ self.each_vector do |column|
121
+ row[column.name] = column[index]
99
122
  end
100
123
 
101
124
  row
@@ -105,7 +128,7 @@ module Daru
105
128
  !!@vectors[vector]
106
129
  end
107
130
 
108
- def each_row
131
+ def each_row(&block)
109
132
  0.upto(@size-1) do |index|
110
133
  yield row(index)
111
134
  end
@@ -113,7 +136,7 @@ module Daru
113
136
  self
114
137
  end
115
138
 
116
- def each_row_with_index
139
+ def each_row_with_index(&block)
117
140
  0.upto(@size-1) do |index|
118
141
  yield row(index), index
119
142
  end
@@ -121,7 +144,7 @@ module Daru
121
144
  self
122
145
  end
123
146
 
124
- def each_column
147
+ def each_vector(&block)
125
148
  @fields.each do |field|
126
149
  yield @vectors[field]
127
150
  end
@@ -129,7 +152,7 @@ module Daru
129
152
  self
130
153
  end
131
154
 
132
- def each_column_with_name
155
+ def each_vector_with_name(&block)
133
156
  @fields.each do |field|
134
157
  yield @vectors[field], field
135
158
  end
@@ -152,6 +175,8 @@ module Daru
152
175
  @fields.each_with_index do |field, index|
153
176
  @vectors[field] << row[index]
154
177
  end
178
+
179
+ @size += 1
155
180
  end
156
181
 
157
182
  def to_html(threshold=15)
@@ -164,7 +189,7 @@ module Daru
164
189
  self.each_row_with_index do |row, index|
165
190
  break if index > threshold and index <= @size
166
191
  html += '<tr>'
167
- row.each{ |val| html.concat('<td>' + val.to_s + '</td>') }
192
+ row.each_value { |val| html.concat('<td>' + val.to_s + '</td>') }
168
193
  html += '</tr>'
169
194
  if index == threshold
170
195
  html += '<tr>'
@@ -206,17 +231,30 @@ module Daru
206
231
  @size = size
207
232
  end
208
233
 
209
- def set_fields_order
210
- @fields = @vectors.keys & @fields
211
- @fields += @vecorts.keys.sort - @fields
234
+ def set_fields_order # vectors more than specified fields
235
+ @fields = @fields & @vectors.keys
236
+ @fields += @vectors.keys.sort - @fields
212
237
  end
213
238
 
214
239
  # Writes names specified in the hash to the actual name of the vector.
215
240
  # Will over-ride any previous name assigned to the vector.
216
- def set_vector_names
241
+ def set_vector_names
217
242
  @fields.each do |name|
218
243
  @vectors[name].name = name
219
244
  end
220
245
  end
246
+
247
+ def set_default_opts
248
+ # Future proofing
249
+ end
250
+
251
+ def set_missing_vectors
252
+ missing_fields = @fields - @vectors.keys
253
+
254
+ missing_fields.each do |field|
255
+ @vectors[field] = ([nil]*@size).dv
256
+ @fields << field
257
+ end
258
+ end
221
259
  end
222
260
  end
@@ -0,0 +1,39 @@
1
+ class Array
2
+ def daru_vector name=nil
3
+ Daru::Vector.new self, name
4
+ end
5
+
6
+ alias_method :dv, :daru_vector
7
+ end
8
+
9
+ class Range
10
+ def daru_vector name=nil
11
+ Daru::Vector.new self, name
12
+ end
13
+
14
+ alias_method :dv, :daru_vector
15
+ end
16
+
17
+ class Hash
18
+ def daru_vector
19
+ Daru::Vector.new self.values[0], self.keys[0]
20
+ end
21
+
22
+ alias_method :dv, :daru_vector
23
+ end
24
+
25
+ class NMatrix
26
+ def daru_vector name=nil
27
+ Daru::Vector.new self
28
+ end
29
+
30
+ alias_method :dv, :daru_vector
31
+ end
32
+
33
+ class MDArray
34
+ def daru_vector name=nil
35
+ Daru::Vector.new self, name
36
+ end
37
+
38
+ alias_method :dv, :daru_vector
39
+ end
data/lib/daru/vector.rb CHANGED
@@ -1,43 +1,3 @@
1
- class Array
2
- def daru_vector
3
- Daru::Vector.new self
4
- end
5
-
6
- alias_method :dv, :daru_vector
7
- end
8
-
9
- class Range
10
- def daru_vector
11
- Daru::Vector.new self
12
- end
13
-
14
- alias_method :dv, :daru_vector
15
- end
16
-
17
- class Hash
18
- def daru_vector
19
- Daru::Vector.new self.values[0], self.keys[0]
20
- end
21
-
22
- alias_method :dv, :daru_vector
23
- end
24
-
25
- class NMatrix
26
- def daru_vector
27
- Daru::Vector.new self
28
- end
29
-
30
- alias_method :dv, :daru_vector
31
- end
32
-
33
- class MDArray
34
- def daru_vector
35
- Daru::Vector.new self
36
- end
37
-
38
- alias_method :dv, :daru_vector
39
- end
40
-
41
1
  module Daru
42
2
  class Vector
43
3
  include Enumerable
@@ -79,11 +39,13 @@ module Daru
79
39
  end
80
40
 
81
41
  def ==(other)
82
- other.vector == @vector and other.name == @name
42
+ other.vector == @vector and other.name == @name and other.size == @size
83
43
  end
84
44
 
85
45
  def <<(element)
86
46
  @vector << element
47
+
48
+ @size += 1
87
49
  end
88
50
 
89
51
  def to_json
@@ -104,6 +66,12 @@ module Daru
104
66
  lim == 1 ? @vector.first : @vector.first(lim)
105
67
  end
106
68
 
69
+ def delete index
70
+ @vector[index] = nil
71
+ @vector.compact!
72
+ @size -= 1
73
+ end
74
+
107
75
  def to_html threshold=15
108
76
  html = '<table><tr><th>' + @name.to_s + '</th></tr>>'
109
77
 
@@ -117,7 +85,17 @@ module Daru
117
85
  end
118
86
 
119
87
  def dup
120
- Daru::Vector.new @vector.dup, @name.dup
88
+ Daru::Vector.new @vector.dup, @name
89
+ end
90
+
91
+ def daru_vector
92
+ self
93
+ end
94
+
95
+ alias_method :dv, :daru_vector
96
+
97
+ def compact!
98
+ @vector.compact!
121
99
  end
122
100
  end
123
101
  end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = "0.0.2.2"
2
+ VERSION = "0.0.2.3"
3
3
  end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
- context "DataFrame from normal array vectors" do
4
+ context "DataFrame of Array" do
5
5
 
6
6
  before :each do
7
7
  @df = Daru::DataFrame.new({a: Daru::Vector.new(1..3),
@@ -36,12 +36,12 @@ describe Daru::DataFrame do
36
36
  it "returns a row" do
37
37
  r = @df.row 0
38
38
 
39
- expect(r).to eq(['Jesse',1,50])
39
+ expect(r).to eq({:b_bad=> "Jesse", :a=> 1, :b=> 50})
40
40
  end
41
41
 
42
42
  it "iterates over columns in the specified order" do
43
43
  cols = []
44
- df = @df.each_column do |col|
44
+ df = @df.each_vector do |col|
45
45
  expect(col.is_a?(Daru::Vector)).to be(true)
46
46
  cols << col.name
47
47
  end
@@ -56,6 +56,13 @@ describe Daru::DataFrame do
56
56
  end
57
57
  end
58
58
 
59
+ it "filters rows" do
60
+ res = @df.filter_rows(@df.name) { |row| row[:b] == 50 }
61
+
62
+ expect(res).to eq(Daru::DataFrame.new({a: [1].dv, b: [50].dv, b_bad: ['Jesse'].dv},
63
+ @df.fields, @df.name))
64
+ end
65
+
59
66
  it "shows column fields" do
60
67
  expect(@df.fields).to eq([:b_bad, :a, :b])
61
68
  end
@@ -90,9 +97,26 @@ describe Daru::DataFrame do
90
97
 
91
98
  expect(@df[:a, :b]).to eq(req)
92
99
  end
100
+
101
+ it "creates DataFrame from Array" do
102
+ a_df = Daru::DataFrame.new({a: [1,2,3,4], b: [10,11,12,13]})
103
+
104
+ expect(a_df.a.is_a? Daru::Vector).to eq(true)
105
+ expect(a_df.a.vector).to eq([1,2,3,4])
106
+ end
107
+ end
108
+
109
+ context "Malformed DataFrame from Array" do
110
+ it "adds extra nil vectors from fields" do
111
+ df = Daru::DataFrame.new({a: (1..4).dv, b: (50..53).dv}, [:b, :a, :jazzy, :joe])
112
+
113
+ expect(df.fields).to eq([:b, :a, :jazzy, :joe])
114
+ expect(df.jazzy).to eq(([nil]*4).dv(:jazzy))
115
+ expect(df.joe).to eq(([nil]*4).dv(:joe))
116
+ end
93
117
  end
94
118
 
95
- context "DataFrame loads from files" do
119
+ context "DataFrame from files" do
96
120
 
97
121
  it "loads a DataFrame from CSV" do
98
122
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.2
4
+ version: 0.0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-11 00:00:00.000000000 Z
11
+ date: 2014-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -72,6 +72,7 @@ files:
72
72
  - daru.gemspec
73
73
  - lib/daru.rb
74
74
  - lib/daru/dataframe.rb
75
+ - lib/daru/monkeys.rb
75
76
  - lib/daru/vector.rb
76
77
  - lib/version.rb
77
78
  - spec/fixtures/matrix_test.csv