daru 0.0.2.2 → 0.0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +17 -0
- data/README.md +4 -4
- data/lib/daru.rb +2 -1
- data/lib/daru/dataframe.rb +64 -26
- data/lib/daru/monkeys.rb +39 -0
- data/lib/daru/vector.rb +20 -42
- data/lib/version.rb +1 -1
- data/spec/mri/dataframe_spec.rb +28 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e425d7fab01db79087549701e3e9e95df2b495d1
|
4
|
+
data.tar.gz: f3b186c9405b1fb14fb7b95a2ecde7a291decbb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd89ad49623169a8caf335746094a38e2613864ec05f059488773285deaff87eaa5a8a68b6f60a2a12e37f5ad29cc800e8cdb7b0fba12eeb3a21b9bb3dbcca69
|
7
|
+
data.tar.gz: 0b1458e1df11590ca858e9350503eef9a29420661ea92d1c73907ee47c33879637260f489bd3451aa9d7774d69e20b639e17ecdfac339c18f710c6ba25955908
|
data/History.txt
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
== 0.0.1
|
2
|
+
* Added classes for DataFrame and Vector alongwith some super-basic functions to get off the ground
|
3
|
+
|
4
|
+
== 0.0.2
|
5
|
+
* Added iterators for dataframe and vector alongwith printing functions (to_html) to interface properly with iRuby notebook.
|
6
|
+
|
7
|
+
== 0.0.2.1
|
8
|
+
* Fixed bugs with previous code and more iterators
|
9
|
+
|
10
|
+
== 0.0.2.2
|
11
|
+
* Added test cases and multiple column access through the [] operator on DataFrames
|
12
|
+
|
13
|
+
== 0.0.2.3
|
14
|
+
* Added #filter\_rows and #delete_row to DataFrame and changed #row to return a row containing a Hash of column name and value.
|
15
|
+
* Vector objects passed into a DataFrame are now duplicated so that any changes dont affect the original vector.
|
16
|
+
* Added an optional opts argument to DataFrame.
|
17
|
+
* Sending more fields than vectors in DataFrame will cause addition of nil vectors.
|
data/README.md
CHANGED
@@ -31,12 +31,11 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
|
|
31
31
|
|
32
32
|
## Roadmap
|
33
33
|
|
34
|
-
* Print dataframe in same order as fields.
|
35
|
-
* Return rows in same order as that of fields.
|
36
34
|
* Automate testing for both MRI and JRuby.
|
37
35
|
* Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
|
38
36
|
* Add support for missing values in vectors.
|
39
|
-
*
|
37
|
+
* Destructive version #filter\_rows!
|
38
|
+
* NMatrix.first should return NMatrix (in vector).
|
40
39
|
* Completely test all functionality for NMatrix and MDArray.
|
41
40
|
* Basic Data manipulation and analysis operations:
|
42
41
|
- Different kinds of join operations
|
@@ -51,4 +50,5 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
|
|
51
50
|
- Runtime alteration of index.
|
52
51
|
* Indexing on DataFrame.
|
53
52
|
* Vector arithmetic - elementwise addition, subtraction, multiplication, division.
|
54
|
-
* Transpose a dataframe.
|
53
|
+
* Transpose a dataframe.
|
54
|
+
* Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
|
data/lib/daru.rb
CHANGED
data/lib/daru/dataframe.rb
CHANGED
@@ -9,18 +9,25 @@ module Daru
|
|
9
9
|
|
10
10
|
attr_reader :name
|
11
11
|
|
12
|
-
def initialize source, fields=[], name=SecureRandom.uuid
|
12
|
+
def initialize source, fields=[], name=SecureRandom.uuid, opts={}
|
13
|
+
@opts = opts
|
14
|
+
set_default_opts
|
15
|
+
|
13
16
|
if source.empty?
|
14
17
|
@vectors = fields.inject({}){ |a,x| a[x]=Daru::Vector.new; a}
|
15
18
|
else
|
16
|
-
@vectors = source
|
19
|
+
@vectors = source.inject({}) do |acc, h|
|
20
|
+
acc[h[0]] = h[1].dv.dup
|
21
|
+
acc
|
22
|
+
end
|
17
23
|
end
|
18
24
|
|
19
25
|
@fields = fields.empty? ? source.keys.sort : fields
|
20
26
|
@name = name
|
21
|
-
|
27
|
+
|
22
28
|
check_length
|
23
|
-
|
29
|
+
set_missing_vectors if @vectors.keys.size < @fields.size
|
30
|
+
set_fields_order if @vectors.keys.sort != @fields.sort
|
24
31
|
set_vector_names
|
25
32
|
end
|
26
33
|
|
@@ -53,20 +60,36 @@ module Daru
|
|
53
60
|
@vectors[name]
|
54
61
|
end
|
55
62
|
|
56
|
-
def
|
63
|
+
def delete_vector name
|
57
64
|
@vectors.delete name
|
58
65
|
@fields.delete name
|
59
66
|
end
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
# end
|
68
|
+
alias_method :delete, :delete_vector
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
def delete_row index
|
71
|
+
# TODO: Make this work with NMatrix and MDArray
|
72
|
+
raise "Expected index less than size." if index > @size
|
73
|
+
|
74
|
+
@fields.each do |field|
|
75
|
+
@vectors[field].delete index
|
76
|
+
end
|
77
|
+
puts @vectors
|
78
|
+
end
|
79
|
+
|
80
|
+
def filter_rows name=self.name, &block
|
81
|
+
df = DataFrame.new({}, @fields, name)
|
82
|
+
|
83
|
+
self.each_row do |row|
|
84
|
+
keep_row = yield row
|
85
|
+
|
86
|
+
df.insert_row(row.values) if keep_row
|
87
|
+
end
|
68
88
|
|
69
|
-
|
89
|
+
df
|
90
|
+
end
|
91
|
+
|
92
|
+
def [] *name
|
70
93
|
unless name[1]
|
71
94
|
return column(name[0])
|
72
95
|
end
|
@@ -81,21 +104,21 @@ module Daru
|
|
81
104
|
DataFrame.new h, req_fields, @name
|
82
105
|
end
|
83
106
|
|
84
|
-
def ==
|
107
|
+
def == other
|
85
108
|
@name == other.name and @vectors == other.vectors and
|
86
109
|
@size == other.size and @fields == other.fields
|
87
110
|
end
|
88
111
|
|
89
|
-
def []=
|
112
|
+
def []= name, vector
|
90
113
|
insert_vector name, vector
|
91
114
|
end
|
92
115
|
|
93
116
|
def row index
|
94
117
|
raise Exception, "Expected index to be within bounds" if index > @size
|
95
118
|
|
96
|
-
row =
|
97
|
-
self.
|
98
|
-
row
|
119
|
+
row = {}
|
120
|
+
self.each_vector do |column|
|
121
|
+
row[column.name] = column[index]
|
99
122
|
end
|
100
123
|
|
101
124
|
row
|
@@ -105,7 +128,7 @@ module Daru
|
|
105
128
|
!!@vectors[vector]
|
106
129
|
end
|
107
130
|
|
108
|
-
def each_row
|
131
|
+
def each_row(&block)
|
109
132
|
0.upto(@size-1) do |index|
|
110
133
|
yield row(index)
|
111
134
|
end
|
@@ -113,7 +136,7 @@ module Daru
|
|
113
136
|
self
|
114
137
|
end
|
115
138
|
|
116
|
-
def each_row_with_index
|
139
|
+
def each_row_with_index(&block)
|
117
140
|
0.upto(@size-1) do |index|
|
118
141
|
yield row(index), index
|
119
142
|
end
|
@@ -121,7 +144,7 @@ module Daru
|
|
121
144
|
self
|
122
145
|
end
|
123
146
|
|
124
|
-
def
|
147
|
+
def each_vector(&block)
|
125
148
|
@fields.each do |field|
|
126
149
|
yield @vectors[field]
|
127
150
|
end
|
@@ -129,7 +152,7 @@ module Daru
|
|
129
152
|
self
|
130
153
|
end
|
131
154
|
|
132
|
-
def
|
155
|
+
def each_vector_with_name(&block)
|
133
156
|
@fields.each do |field|
|
134
157
|
yield @vectors[field], field
|
135
158
|
end
|
@@ -152,6 +175,8 @@ module Daru
|
|
152
175
|
@fields.each_with_index do |field, index|
|
153
176
|
@vectors[field] << row[index]
|
154
177
|
end
|
178
|
+
|
179
|
+
@size += 1
|
155
180
|
end
|
156
181
|
|
157
182
|
def to_html(threshold=15)
|
@@ -164,7 +189,7 @@ module Daru
|
|
164
189
|
self.each_row_with_index do |row, index|
|
165
190
|
break if index > threshold and index <= @size
|
166
191
|
html += '<tr>'
|
167
|
-
row.
|
192
|
+
row.each_value { |val| html.concat('<td>' + val.to_s + '</td>') }
|
168
193
|
html += '</tr>'
|
169
194
|
if index == threshold
|
170
195
|
html += '<tr>'
|
@@ -206,17 +231,30 @@ module Daru
|
|
206
231
|
@size = size
|
207
232
|
end
|
208
233
|
|
209
|
-
def set_fields_order
|
210
|
-
@fields = @
|
211
|
-
@fields += @
|
234
|
+
def set_fields_order # vectors more than specified fields
|
235
|
+
@fields = @fields & @vectors.keys
|
236
|
+
@fields += @vectors.keys.sort - @fields
|
212
237
|
end
|
213
238
|
|
214
239
|
# Writes names specified in the hash to the actual name of the vector.
|
215
240
|
# Will over-ride any previous name assigned to the vector.
|
216
|
-
def set_vector_names
|
241
|
+
def set_vector_names
|
217
242
|
@fields.each do |name|
|
218
243
|
@vectors[name].name = name
|
219
244
|
end
|
220
245
|
end
|
246
|
+
|
247
|
+
def set_default_opts
|
248
|
+
# Future proofing
|
249
|
+
end
|
250
|
+
|
251
|
+
def set_missing_vectors
|
252
|
+
missing_fields = @fields - @vectors.keys
|
253
|
+
|
254
|
+
missing_fields.each do |field|
|
255
|
+
@vectors[field] = ([nil]*@size).dv
|
256
|
+
@fields << field
|
257
|
+
end
|
258
|
+
end
|
221
259
|
end
|
222
260
|
end
|
data/lib/daru/monkeys.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
class Array
|
2
|
+
def daru_vector name=nil
|
3
|
+
Daru::Vector.new self, name
|
4
|
+
end
|
5
|
+
|
6
|
+
alias_method :dv, :daru_vector
|
7
|
+
end
|
8
|
+
|
9
|
+
class Range
|
10
|
+
def daru_vector name=nil
|
11
|
+
Daru::Vector.new self, name
|
12
|
+
end
|
13
|
+
|
14
|
+
alias_method :dv, :daru_vector
|
15
|
+
end
|
16
|
+
|
17
|
+
class Hash
|
18
|
+
def daru_vector
|
19
|
+
Daru::Vector.new self.values[0], self.keys[0]
|
20
|
+
end
|
21
|
+
|
22
|
+
alias_method :dv, :daru_vector
|
23
|
+
end
|
24
|
+
|
25
|
+
class NMatrix
|
26
|
+
def daru_vector name=nil
|
27
|
+
Daru::Vector.new self
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :dv, :daru_vector
|
31
|
+
end
|
32
|
+
|
33
|
+
class MDArray
|
34
|
+
def daru_vector name=nil
|
35
|
+
Daru::Vector.new self, name
|
36
|
+
end
|
37
|
+
|
38
|
+
alias_method :dv, :daru_vector
|
39
|
+
end
|
data/lib/daru/vector.rb
CHANGED
@@ -1,43 +1,3 @@
|
|
1
|
-
class Array
|
2
|
-
def daru_vector
|
3
|
-
Daru::Vector.new self
|
4
|
-
end
|
5
|
-
|
6
|
-
alias_method :dv, :daru_vector
|
7
|
-
end
|
8
|
-
|
9
|
-
class Range
|
10
|
-
def daru_vector
|
11
|
-
Daru::Vector.new self
|
12
|
-
end
|
13
|
-
|
14
|
-
alias_method :dv, :daru_vector
|
15
|
-
end
|
16
|
-
|
17
|
-
class Hash
|
18
|
-
def daru_vector
|
19
|
-
Daru::Vector.new self.values[0], self.keys[0]
|
20
|
-
end
|
21
|
-
|
22
|
-
alias_method :dv, :daru_vector
|
23
|
-
end
|
24
|
-
|
25
|
-
class NMatrix
|
26
|
-
def daru_vector
|
27
|
-
Daru::Vector.new self
|
28
|
-
end
|
29
|
-
|
30
|
-
alias_method :dv, :daru_vector
|
31
|
-
end
|
32
|
-
|
33
|
-
class MDArray
|
34
|
-
def daru_vector
|
35
|
-
Daru::Vector.new self
|
36
|
-
end
|
37
|
-
|
38
|
-
alias_method :dv, :daru_vector
|
39
|
-
end
|
40
|
-
|
41
1
|
module Daru
|
42
2
|
class Vector
|
43
3
|
include Enumerable
|
@@ -79,11 +39,13 @@ module Daru
|
|
79
39
|
end
|
80
40
|
|
81
41
|
def ==(other)
|
82
|
-
other.vector == @vector and other.name == @name
|
42
|
+
other.vector == @vector and other.name == @name and other.size == @size
|
83
43
|
end
|
84
44
|
|
85
45
|
def <<(element)
|
86
46
|
@vector << element
|
47
|
+
|
48
|
+
@size += 1
|
87
49
|
end
|
88
50
|
|
89
51
|
def to_json
|
@@ -104,6 +66,12 @@ module Daru
|
|
104
66
|
lim == 1 ? @vector.first : @vector.first(lim)
|
105
67
|
end
|
106
68
|
|
69
|
+
def delete index
|
70
|
+
@vector[index] = nil
|
71
|
+
@vector.compact!
|
72
|
+
@size -= 1
|
73
|
+
end
|
74
|
+
|
107
75
|
def to_html threshold=15
|
108
76
|
html = '<table><tr><th>' + @name.to_s + '</th></tr>>'
|
109
77
|
|
@@ -117,7 +85,17 @@ module Daru
|
|
117
85
|
end
|
118
86
|
|
119
87
|
def dup
|
120
|
-
Daru::Vector.new @vector.dup, @name
|
88
|
+
Daru::Vector.new @vector.dup, @name
|
89
|
+
end
|
90
|
+
|
91
|
+
def daru_vector
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
alias_method :dv, :daru_vector
|
96
|
+
|
97
|
+
def compact!
|
98
|
+
@vector.compact!
|
121
99
|
end
|
122
100
|
end
|
123
101
|
end
|
data/lib/version.rb
CHANGED
data/spec/mri/dataframe_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
-
context "DataFrame
|
4
|
+
context "DataFrame of Array" do
|
5
5
|
|
6
6
|
before :each do
|
7
7
|
@df = Daru::DataFrame.new({a: Daru::Vector.new(1..3),
|
@@ -36,12 +36,12 @@ describe Daru::DataFrame do
|
|
36
36
|
it "returns a row" do
|
37
37
|
r = @df.row 0
|
38
38
|
|
39
|
-
expect(r).to eq(
|
39
|
+
expect(r).to eq({:b_bad=> "Jesse", :a=> 1, :b=> 50})
|
40
40
|
end
|
41
41
|
|
42
42
|
it "iterates over columns in the specified order" do
|
43
43
|
cols = []
|
44
|
-
df = @df.
|
44
|
+
df = @df.each_vector do |col|
|
45
45
|
expect(col.is_a?(Daru::Vector)).to be(true)
|
46
46
|
cols << col.name
|
47
47
|
end
|
@@ -56,6 +56,13 @@ describe Daru::DataFrame do
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
+
it "filters rows" do
|
60
|
+
res = @df.filter_rows(@df.name) { |row| row[:b] == 50 }
|
61
|
+
|
62
|
+
expect(res).to eq(Daru::DataFrame.new({a: [1].dv, b: [50].dv, b_bad: ['Jesse'].dv},
|
63
|
+
@df.fields, @df.name))
|
64
|
+
end
|
65
|
+
|
59
66
|
it "shows column fields" do
|
60
67
|
expect(@df.fields).to eq([:b_bad, :a, :b])
|
61
68
|
end
|
@@ -90,9 +97,26 @@ describe Daru::DataFrame do
|
|
90
97
|
|
91
98
|
expect(@df[:a, :b]).to eq(req)
|
92
99
|
end
|
100
|
+
|
101
|
+
it "creates DataFrame from Array" do
|
102
|
+
a_df = Daru::DataFrame.new({a: [1,2,3,4], b: [10,11,12,13]})
|
103
|
+
|
104
|
+
expect(a_df.a.is_a? Daru::Vector).to eq(true)
|
105
|
+
expect(a_df.a.vector).to eq([1,2,3,4])
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context "Malformed DataFrame from Array" do
|
110
|
+
it "adds extra nil vectors from fields" do
|
111
|
+
df = Daru::DataFrame.new({a: (1..4).dv, b: (50..53).dv}, [:b, :a, :jazzy, :joe])
|
112
|
+
|
113
|
+
expect(df.fields).to eq([:b, :a, :jazzy, :joe])
|
114
|
+
expect(df.jazzy).to eq(([nil]*4).dv(:jazzy))
|
115
|
+
expect(df.joe).to eq(([nil]*4).dv(:joe))
|
116
|
+
end
|
93
117
|
end
|
94
118
|
|
95
|
-
context "DataFrame
|
119
|
+
context "DataFrame from files" do
|
96
120
|
|
97
121
|
it "loads a DataFrame from CSV" do
|
98
122
|
df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: daru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2.
|
4
|
+
version: 0.0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- daru.gemspec
|
73
73
|
- lib/daru.rb
|
74
74
|
- lib/daru/dataframe.rb
|
75
|
+
- lib/daru/monkeys.rb
|
75
76
|
- lib/daru/vector.rb
|
76
77
|
- lib/version.rb
|
77
78
|
- spec/fixtures/matrix_test.csv
|