daru 0.0.2.2 → 0.0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6266fb09be80748530baa8b92aface148f47bac6
4
- data.tar.gz: 37f2301d6b3fed23b15e11c5608892935befb04e
3
+ metadata.gz: e425d7fab01db79087549701e3e9e95df2b495d1
4
+ data.tar.gz: f3b186c9405b1fb14fb7b95a2ecde7a291decbb4
5
5
  SHA512:
6
- metadata.gz: fa67c4506dea445619c1cf79270c5194cd189da302705b8d0dcad6689b3c3717f08bc11ce15af9670c7245fc746b3ce8feb0e82ea22c69aa06bf8da15003f947
7
- data.tar.gz: 17d35384cf2daf28eca33aef370789edf7854da821094207e3eda9732933889ce23a44777e9aa451e62c732048a472d74378affb97e1a193cfe235426d83428e
6
+ metadata.gz: fd89ad49623169a8caf335746094a38e2613864ec05f059488773285deaff87eaa5a8a68b6f60a2a12e37f5ad29cc800e8cdb7b0fba12eeb3a21b9bb3dbcca69
7
+ data.tar.gz: 0b1458e1df11590ca858e9350503eef9a29420661ea92d1c73907ee47c33879637260f489bd3451aa9d7774d69e20b639e17ecdfac339c18f710c6ba25955908
data/History.txt CHANGED
@@ -0,0 +1,17 @@
1
+ == 0.0.1
2
+ * Added classes for DataFrame and Vector alongwith some super-basic functions to get off the ground
3
+
4
+ == 0.0.2
5
+ * Added iterators for dataframe and vector alongwith printing functions (to_html) to interface properly with iRuby notebook.
6
+
7
+ == 0.0.2.1
8
+ * Fixed bugs with previous code and more iterators
9
+
10
+ == 0.0.2.2
11
+ * Added test cases and multiple column access through the [] operator on DataFrames
12
+
13
+ == 0.0.2.3
14
+ * Added #filter\_rows and #delete_row to DataFrame and changed #row to return a row containing a Hash of column name and value.
15
+ * Vector objects passed into a DataFrame are now duplicated so that any changes dont affect the original vector.
16
+ * Added an optional opts argument to DataFrame.
17
+ * Sending more fields than vectors in DataFrame will cause addition of nil vectors.
data/README.md CHANGED
@@ -31,12 +31,11 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
31
31
 
32
32
  ## Roadmap
33
33
 
34
- * Print dataframe in same order as fields.
35
- * Return rows in same order as that of fields.
36
34
  * Automate testing for both MRI and JRuby.
37
35
  * Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
38
36
  * Add support for missing values in vectors.
39
- * Add normal and destructive map iterators.
37
+ * Destructive version #filter\_rows!
38
+ * NMatrix.first should return NMatrix (in vector).
40
39
  * Completely test all functionality for NMatrix and MDArray.
41
40
  * Basic Data manipulation and analysis operations:
42
41
  - Different kinds of join operations
@@ -51,4 +50,5 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
51
50
  - Runtime alteration of index.
52
51
  * Indexing on DataFrame.
53
52
  * Vector arithmetic - elementwise addition, subtraction, multiplication, division.
54
- * Transpose a dataframe.
53
+ * Transpose a dataframe.
54
+ * Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
data/lib/daru.rb CHANGED
@@ -3,4 +3,5 @@ require 'matrix'
3
3
  require 'csv'
4
4
 
5
5
  require 'daru/vector.rb'
6
- require 'daru/dataframe.rb'
6
+ require 'daru/dataframe.rb'
7
+ require 'daru/monkeys.rb'
@@ -9,18 +9,25 @@ module Daru
9
9
 
10
10
  attr_reader :name
11
11
 
12
- def initialize source, fields=[], name=SecureRandom.uuid
12
+ def initialize source, fields=[], name=SecureRandom.uuid, opts={}
13
+ @opts = opts
14
+ set_default_opts
15
+
13
16
  if source.empty?
14
17
  @vectors = fields.inject({}){ |a,x| a[x]=Daru::Vector.new; a}
15
18
  else
16
- @vectors = source
19
+ @vectors = source.inject({}) do |acc, h|
20
+ acc[h[0]] = h[1].dv.dup
21
+ acc
22
+ end
17
23
  end
18
24
 
19
25
  @fields = fields.empty? ? source.keys.sort : fields
20
26
  @name = name
21
-
27
+
22
28
  check_length
23
- set_fields_order if @vectors.keys.sort != @fields.sort
29
+ set_missing_vectors if @vectors.keys.size < @fields.size
30
+ set_fields_order if @vectors.keys.sort != @fields.sort
24
31
  set_vector_names
25
32
  end
26
33
 
@@ -53,20 +60,36 @@ module Daru
53
60
  @vectors[name]
54
61
  end
55
62
 
56
- def delete name
63
+ def delete_vector name
57
64
  @vectors.delete name
58
65
  @fields.delete name
59
66
  end
60
67
 
61
- # def filter_rows
62
-
63
- # end
68
+ alias_method :delete, :delete_vector
64
69
 
65
- # def filter_columns
66
-
67
- # end
70
+ def delete_row index
71
+ # TODO: Make this work with NMatrix and MDArray
72
+ raise "Expected index less than size." if index > @size
73
+
74
+ @fields.each do |field|
75
+ @vectors[field].delete index
76
+ end
77
+ puts @vectors
78
+ end
79
+
80
+ def filter_rows name=self.name, &block
81
+ df = DataFrame.new({}, @fields, name)
82
+
83
+ self.each_row do |row|
84
+ keep_row = yield row
85
+
86
+ df.insert_row(row.values) if keep_row
87
+ end
68
88
 
69
- def [](*name)
89
+ df
90
+ end
91
+
92
+ def [] *name
70
93
  unless name[1]
71
94
  return column(name[0])
72
95
  end
@@ -81,21 +104,21 @@ module Daru
81
104
  DataFrame.new h, req_fields, @name
82
105
  end
83
106
 
84
- def ==(other)
107
+ def == other
85
108
  @name == other.name and @vectors == other.vectors and
86
109
  @size == other.size and @fields == other.fields
87
110
  end
88
111
 
89
- def []=(name, vector)
112
+ def []= name, vector
90
113
  insert_vector name, vector
91
114
  end
92
115
 
93
116
  def row index
94
117
  raise Exception, "Expected index to be within bounds" if index > @size
95
118
 
96
- row = []
97
- self.each_column do |column|
98
- row << column[index]
119
+ row = {}
120
+ self.each_vector do |column|
121
+ row[column.name] = column[index]
99
122
  end
100
123
 
101
124
  row
@@ -105,7 +128,7 @@ module Daru
105
128
  !!@vectors[vector]
106
129
  end
107
130
 
108
- def each_row
131
+ def each_row(&block)
109
132
  0.upto(@size-1) do |index|
110
133
  yield row(index)
111
134
  end
@@ -113,7 +136,7 @@ module Daru
113
136
  self
114
137
  end
115
138
 
116
- def each_row_with_index
139
+ def each_row_with_index(&block)
117
140
  0.upto(@size-1) do |index|
118
141
  yield row(index), index
119
142
  end
@@ -121,7 +144,7 @@ module Daru
121
144
  self
122
145
  end
123
146
 
124
- def each_column
147
+ def each_vector(&block)
125
148
  @fields.each do |field|
126
149
  yield @vectors[field]
127
150
  end
@@ -129,7 +152,7 @@ module Daru
129
152
  self
130
153
  end
131
154
 
132
- def each_column_with_name
155
+ def each_vector_with_name(&block)
133
156
  @fields.each do |field|
134
157
  yield @vectors[field], field
135
158
  end
@@ -152,6 +175,8 @@ module Daru
152
175
  @fields.each_with_index do |field, index|
153
176
  @vectors[field] << row[index]
154
177
  end
178
+
179
+ @size += 1
155
180
  end
156
181
 
157
182
  def to_html(threshold=15)
@@ -164,7 +189,7 @@ module Daru
164
189
  self.each_row_with_index do |row, index|
165
190
  break if index > threshold and index <= @size
166
191
  html += '<tr>'
167
- row.each{ |val| html.concat('<td>' + val.to_s + '</td>') }
192
+ row.each_value { |val| html.concat('<td>' + val.to_s + '</td>') }
168
193
  html += '</tr>'
169
194
  if index == threshold
170
195
  html += '<tr>'
@@ -206,17 +231,30 @@ module Daru
206
231
  @size = size
207
232
  end
208
233
 
209
- def set_fields_order
210
- @fields = @vectors.keys & @fields
211
- @fields += @vecorts.keys.sort - @fields
234
+ def set_fields_order # vectors more than specified fields
235
+ @fields = @fields & @vectors.keys
236
+ @fields += @vectors.keys.sort - @fields
212
237
  end
213
238
 
214
239
  # Writes names specified in the hash to the actual name of the vector.
215
240
  # Will over-ride any previous name assigned to the vector.
216
- def set_vector_names
241
+ def set_vector_names
217
242
  @fields.each do |name|
218
243
  @vectors[name].name = name
219
244
  end
220
245
  end
246
+
247
+ def set_default_opts
248
+ # Future proofing
249
+ end
250
+
251
+ def set_missing_vectors
252
+ missing_fields = @fields - @vectors.keys
253
+
254
+ missing_fields.each do |field|
255
+ @vectors[field] = ([nil]*@size).dv
256
+ @fields << field
257
+ end
258
+ end
221
259
  end
222
260
  end
@@ -0,0 +1,39 @@
1
+ class Array
2
+ def daru_vector name=nil
3
+ Daru::Vector.new self, name
4
+ end
5
+
6
+ alias_method :dv, :daru_vector
7
+ end
8
+
9
+ class Range
10
+ def daru_vector name=nil
11
+ Daru::Vector.new self, name
12
+ end
13
+
14
+ alias_method :dv, :daru_vector
15
+ end
16
+
17
+ class Hash
18
+ def daru_vector
19
+ Daru::Vector.new self.values[0], self.keys[0]
20
+ end
21
+
22
+ alias_method :dv, :daru_vector
23
+ end
24
+
25
+ class NMatrix
26
+ def daru_vector name=nil
27
+ Daru::Vector.new self
28
+ end
29
+
30
+ alias_method :dv, :daru_vector
31
+ end
32
+
33
+ class MDArray
34
+ def daru_vector name=nil
35
+ Daru::Vector.new self, name
36
+ end
37
+
38
+ alias_method :dv, :daru_vector
39
+ end
data/lib/daru/vector.rb CHANGED
@@ -1,43 +1,3 @@
1
- class Array
2
- def daru_vector
3
- Daru::Vector.new self
4
- end
5
-
6
- alias_method :dv, :daru_vector
7
- end
8
-
9
- class Range
10
- def daru_vector
11
- Daru::Vector.new self
12
- end
13
-
14
- alias_method :dv, :daru_vector
15
- end
16
-
17
- class Hash
18
- def daru_vector
19
- Daru::Vector.new self.values[0], self.keys[0]
20
- end
21
-
22
- alias_method :dv, :daru_vector
23
- end
24
-
25
- class NMatrix
26
- def daru_vector
27
- Daru::Vector.new self
28
- end
29
-
30
- alias_method :dv, :daru_vector
31
- end
32
-
33
- class MDArray
34
- def daru_vector
35
- Daru::Vector.new self
36
- end
37
-
38
- alias_method :dv, :daru_vector
39
- end
40
-
41
1
  module Daru
42
2
  class Vector
43
3
  include Enumerable
@@ -79,11 +39,13 @@ module Daru
79
39
  end
80
40
 
81
41
  def ==(other)
82
- other.vector == @vector and other.name == @name
42
+ other.vector == @vector and other.name == @name and other.size == @size
83
43
  end
84
44
 
85
45
  def <<(element)
86
46
  @vector << element
47
+
48
+ @size += 1
87
49
  end
88
50
 
89
51
  def to_json
@@ -104,6 +66,12 @@ module Daru
104
66
  lim == 1 ? @vector.first : @vector.first(lim)
105
67
  end
106
68
 
69
+ def delete index
70
+ @vector[index] = nil
71
+ @vector.compact!
72
+ @size -= 1
73
+ end
74
+
107
75
  def to_html threshold=15
108
76
  html = '<table><tr><th>' + @name.to_s + '</th></tr>>'
109
77
 
@@ -117,7 +85,17 @@ module Daru
117
85
  end
118
86
 
119
87
  def dup
120
- Daru::Vector.new @vector.dup, @name.dup
88
+ Daru::Vector.new @vector.dup, @name
89
+ end
90
+
91
+ def daru_vector
92
+ self
93
+ end
94
+
95
+ alias_method :dv, :daru_vector
96
+
97
+ def compact!
98
+ @vector.compact!
121
99
  end
122
100
  end
123
101
  end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = "0.0.2.2"
2
+ VERSION = "0.0.2.3"
3
3
  end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::DataFrame do
4
- context "DataFrame from normal array vectors" do
4
+ context "DataFrame of Array" do
5
5
 
6
6
  before :each do
7
7
  @df = Daru::DataFrame.new({a: Daru::Vector.new(1..3),
@@ -36,12 +36,12 @@ describe Daru::DataFrame do
36
36
  it "returns a row" do
37
37
  r = @df.row 0
38
38
 
39
- expect(r).to eq(['Jesse',1,50])
39
+ expect(r).to eq({:b_bad=> "Jesse", :a=> 1, :b=> 50})
40
40
  end
41
41
 
42
42
  it "iterates over columns in the specified order" do
43
43
  cols = []
44
- df = @df.each_column do |col|
44
+ df = @df.each_vector do |col|
45
45
  expect(col.is_a?(Daru::Vector)).to be(true)
46
46
  cols << col.name
47
47
  end
@@ -56,6 +56,13 @@ describe Daru::DataFrame do
56
56
  end
57
57
  end
58
58
 
59
+ it "filters rows" do
60
+ res = @df.filter_rows(@df.name) { |row| row[:b] == 50 }
61
+
62
+ expect(res).to eq(Daru::DataFrame.new({a: [1].dv, b: [50].dv, b_bad: ['Jesse'].dv},
63
+ @df.fields, @df.name))
64
+ end
65
+
59
66
  it "shows column fields" do
60
67
  expect(@df.fields).to eq([:b_bad, :a, :b])
61
68
  end
@@ -90,9 +97,26 @@ describe Daru::DataFrame do
90
97
 
91
98
  expect(@df[:a, :b]).to eq(req)
92
99
  end
100
+
101
+ it "creates DataFrame from Array" do
102
+ a_df = Daru::DataFrame.new({a: [1,2,3,4], b: [10,11,12,13]})
103
+
104
+ expect(a_df.a.is_a? Daru::Vector).to eq(true)
105
+ expect(a_df.a.vector).to eq([1,2,3,4])
106
+ end
107
+ end
108
+
109
+ context "Malformed DataFrame from Array" do
110
+ it "adds extra nil vectors from fields" do
111
+ df = Daru::DataFrame.new({a: (1..4).dv, b: (50..53).dv}, [:b, :a, :jazzy, :joe])
112
+
113
+ expect(df.fields).to eq([:b, :a, :jazzy, :joe])
114
+ expect(df.jazzy).to eq(([nil]*4).dv(:jazzy))
115
+ expect(df.joe).to eq(([nil]*4).dv(:joe))
116
+ end
93
117
  end
94
118
 
95
- context "DataFrame loads from files" do
119
+ context "DataFrame from files" do
96
120
 
97
121
  it "loads a DataFrame from CSV" do
98
122
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.2
4
+ version: 0.0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-11 00:00:00.000000000 Z
11
+ date: 2014-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -72,6 +72,7 @@ files:
72
72
  - daru.gemspec
73
73
  - lib/daru.rb
74
74
  - lib/daru/dataframe.rb
75
+ - lib/daru/monkeys.rb
75
76
  - lib/daru/vector.rb
76
77
  - lib/version.rb
77
78
  - spec/fixtures/matrix_test.csv