daru 0.0.2.2 → 0.0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +17 -0
- data/README.md +4 -4
- data/lib/daru.rb +2 -1
- data/lib/daru/dataframe.rb +64 -26
- data/lib/daru/monkeys.rb +39 -0
- data/lib/daru/vector.rb +20 -42
- data/lib/version.rb +1 -1
- data/spec/mri/dataframe_spec.rb +28 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e425d7fab01db79087549701e3e9e95df2b495d1
|
4
|
+
data.tar.gz: f3b186c9405b1fb14fb7b95a2ecde7a291decbb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd89ad49623169a8caf335746094a38e2613864ec05f059488773285deaff87eaa5a8a68b6f60a2a12e37f5ad29cc800e8cdb7b0fba12eeb3a21b9bb3dbcca69
|
7
|
+
data.tar.gz: 0b1458e1df11590ca858e9350503eef9a29420661ea92d1c73907ee47c33879637260f489bd3451aa9d7774d69e20b639e17ecdfac339c18f710c6ba25955908
|
data/History.txt
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
== 0.0.1
|
2
|
+
* Added classes for DataFrame and Vector alongwith some super-basic functions to get off the ground
|
3
|
+
|
4
|
+
== 0.0.2
|
5
|
+
* Added iterators for dataframe and vector alongwith printing functions (to_html) to interface properly with iRuby notebook.
|
6
|
+
|
7
|
+
== 0.0.2.1
|
8
|
+
* Fixed bugs with previous code and more iterators
|
9
|
+
|
10
|
+
== 0.0.2.2
|
11
|
+
* Added test cases and multiple column access through the [] operator on DataFrames
|
12
|
+
|
13
|
+
== 0.0.2.3
|
14
|
+
* Added #filter\_rows and #delete_row to DataFrame and changed #row to return a row containing a Hash of column name and value.
|
15
|
+
* Vector objects passed into a DataFrame are now duplicated so that any changes dont affect the original vector.
|
16
|
+
* Added an optional opts argument to DataFrame.
|
17
|
+
* Sending more fields than vectors in DataFrame will cause addition of nil vectors.
|
data/README.md
CHANGED
@@ -31,12 +31,11 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
|
|
31
31
|
|
32
32
|
## Roadmap
|
33
33
|
|
34
|
-
* Print dataframe in same order as fields.
|
35
|
-
* Return rows in same order as that of fields.
|
36
34
|
* Automate testing for both MRI and JRuby.
|
37
35
|
* Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
|
38
36
|
* Add support for missing values in vectors.
|
39
|
-
*
|
37
|
+
* Destructive version #filter\_rows!
|
38
|
+
* NMatrix.first should return NMatrix (in vector).
|
40
39
|
* Completely test all functionality for NMatrix and MDArray.
|
41
40
|
* Basic Data manipulation and analysis operations:
|
42
41
|
- Different kinds of join operations
|
@@ -51,4 +50,5 @@ Then switch to MRI, do a normal `bundle install` followed by `rspec` for testing
|
|
51
50
|
- Runtime alteration of index.
|
52
51
|
* Indexing on DataFrame.
|
53
52
|
* Vector arithmetic - elementwise addition, subtraction, multiplication, division.
|
54
|
-
* Transpose a dataframe.
|
53
|
+
* Transpose a dataframe.
|
54
|
+
* Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
|
data/lib/daru.rb
CHANGED
data/lib/daru/dataframe.rb
CHANGED
@@ -9,18 +9,25 @@ module Daru
|
|
9
9
|
|
10
10
|
attr_reader :name
|
11
11
|
|
12
|
-
def initialize source, fields=[], name=SecureRandom.uuid
|
12
|
+
def initialize source, fields=[], name=SecureRandom.uuid, opts={}
|
13
|
+
@opts = opts
|
14
|
+
set_default_opts
|
15
|
+
|
13
16
|
if source.empty?
|
14
17
|
@vectors = fields.inject({}){ |a,x| a[x]=Daru::Vector.new; a}
|
15
18
|
else
|
16
|
-
@vectors = source
|
19
|
+
@vectors = source.inject({}) do |acc, h|
|
20
|
+
acc[h[0]] = h[1].dv.dup
|
21
|
+
acc
|
22
|
+
end
|
17
23
|
end
|
18
24
|
|
19
25
|
@fields = fields.empty? ? source.keys.sort : fields
|
20
26
|
@name = name
|
21
|
-
|
27
|
+
|
22
28
|
check_length
|
23
|
-
|
29
|
+
set_missing_vectors if @vectors.keys.size < @fields.size
|
30
|
+
set_fields_order if @vectors.keys.sort != @fields.sort
|
24
31
|
set_vector_names
|
25
32
|
end
|
26
33
|
|
@@ -53,20 +60,36 @@ module Daru
|
|
53
60
|
@vectors[name]
|
54
61
|
end
|
55
62
|
|
56
|
-
def
|
63
|
+
def delete_vector name
|
57
64
|
@vectors.delete name
|
58
65
|
@fields.delete name
|
59
66
|
end
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
# end
|
68
|
+
alias_method :delete, :delete_vector
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
def delete_row index
|
71
|
+
# TODO: Make this work with NMatrix and MDArray
|
72
|
+
raise "Expected index less than size." if index > @size
|
73
|
+
|
74
|
+
@fields.each do |field|
|
75
|
+
@vectors[field].delete index
|
76
|
+
end
|
77
|
+
puts @vectors
|
78
|
+
end
|
79
|
+
|
80
|
+
def filter_rows name=self.name, &block
|
81
|
+
df = DataFrame.new({}, @fields, name)
|
82
|
+
|
83
|
+
self.each_row do |row|
|
84
|
+
keep_row = yield row
|
85
|
+
|
86
|
+
df.insert_row(row.values) if keep_row
|
87
|
+
end
|
68
88
|
|
69
|
-
|
89
|
+
df
|
90
|
+
end
|
91
|
+
|
92
|
+
def [] *name
|
70
93
|
unless name[1]
|
71
94
|
return column(name[0])
|
72
95
|
end
|
@@ -81,21 +104,21 @@ module Daru
|
|
81
104
|
DataFrame.new h, req_fields, @name
|
82
105
|
end
|
83
106
|
|
84
|
-
def ==
|
107
|
+
def == other
|
85
108
|
@name == other.name and @vectors == other.vectors and
|
86
109
|
@size == other.size and @fields == other.fields
|
87
110
|
end
|
88
111
|
|
89
|
-
def []=
|
112
|
+
def []= name, vector
|
90
113
|
insert_vector name, vector
|
91
114
|
end
|
92
115
|
|
93
116
|
def row index
|
94
117
|
raise Exception, "Expected index to be within bounds" if index > @size
|
95
118
|
|
96
|
-
row =
|
97
|
-
self.
|
98
|
-
row
|
119
|
+
row = {}
|
120
|
+
self.each_vector do |column|
|
121
|
+
row[column.name] = column[index]
|
99
122
|
end
|
100
123
|
|
101
124
|
row
|
@@ -105,7 +128,7 @@ module Daru
|
|
105
128
|
!!@vectors[vector]
|
106
129
|
end
|
107
130
|
|
108
|
-
def each_row
|
131
|
+
def each_row(&block)
|
109
132
|
0.upto(@size-1) do |index|
|
110
133
|
yield row(index)
|
111
134
|
end
|
@@ -113,7 +136,7 @@ module Daru
|
|
113
136
|
self
|
114
137
|
end
|
115
138
|
|
116
|
-
def each_row_with_index
|
139
|
+
def each_row_with_index(&block)
|
117
140
|
0.upto(@size-1) do |index|
|
118
141
|
yield row(index), index
|
119
142
|
end
|
@@ -121,7 +144,7 @@ module Daru
|
|
121
144
|
self
|
122
145
|
end
|
123
146
|
|
124
|
-
def
|
147
|
+
def each_vector(&block)
|
125
148
|
@fields.each do |field|
|
126
149
|
yield @vectors[field]
|
127
150
|
end
|
@@ -129,7 +152,7 @@ module Daru
|
|
129
152
|
self
|
130
153
|
end
|
131
154
|
|
132
|
-
def
|
155
|
+
def each_vector_with_name(&block)
|
133
156
|
@fields.each do |field|
|
134
157
|
yield @vectors[field], field
|
135
158
|
end
|
@@ -152,6 +175,8 @@ module Daru
|
|
152
175
|
@fields.each_with_index do |field, index|
|
153
176
|
@vectors[field] << row[index]
|
154
177
|
end
|
178
|
+
|
179
|
+
@size += 1
|
155
180
|
end
|
156
181
|
|
157
182
|
def to_html(threshold=15)
|
@@ -164,7 +189,7 @@ module Daru
|
|
164
189
|
self.each_row_with_index do |row, index|
|
165
190
|
break if index > threshold and index <= @size
|
166
191
|
html += '<tr>'
|
167
|
-
row.
|
192
|
+
row.each_value { |val| html.concat('<td>' + val.to_s + '</td>') }
|
168
193
|
html += '</tr>'
|
169
194
|
if index == threshold
|
170
195
|
html += '<tr>'
|
@@ -206,17 +231,30 @@ module Daru
|
|
206
231
|
@size = size
|
207
232
|
end
|
208
233
|
|
209
|
-
def set_fields_order
|
210
|
-
@fields = @
|
211
|
-
@fields += @
|
234
|
+
def set_fields_order # vectors more than specified fields
|
235
|
+
@fields = @fields & @vectors.keys
|
236
|
+
@fields += @vectors.keys.sort - @fields
|
212
237
|
end
|
213
238
|
|
214
239
|
# Writes names specified in the hash to the actual name of the vector.
|
215
240
|
# Will over-ride any previous name assigned to the vector.
|
216
|
-
def set_vector_names
|
241
|
+
def set_vector_names
|
217
242
|
@fields.each do |name|
|
218
243
|
@vectors[name].name = name
|
219
244
|
end
|
220
245
|
end
|
246
|
+
|
247
|
+
def set_default_opts
|
248
|
+
# Future proofing
|
249
|
+
end
|
250
|
+
|
251
|
+
def set_missing_vectors
|
252
|
+
missing_fields = @fields - @vectors.keys
|
253
|
+
|
254
|
+
missing_fields.each do |field|
|
255
|
+
@vectors[field] = ([nil]*@size).dv
|
256
|
+
@fields << field
|
257
|
+
end
|
258
|
+
end
|
221
259
|
end
|
222
260
|
end
|
data/lib/daru/monkeys.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
class Array
|
2
|
+
def daru_vector name=nil
|
3
|
+
Daru::Vector.new self, name
|
4
|
+
end
|
5
|
+
|
6
|
+
alias_method :dv, :daru_vector
|
7
|
+
end
|
8
|
+
|
9
|
+
class Range
|
10
|
+
def daru_vector name=nil
|
11
|
+
Daru::Vector.new self, name
|
12
|
+
end
|
13
|
+
|
14
|
+
alias_method :dv, :daru_vector
|
15
|
+
end
|
16
|
+
|
17
|
+
class Hash
|
18
|
+
def daru_vector
|
19
|
+
Daru::Vector.new self.values[0], self.keys[0]
|
20
|
+
end
|
21
|
+
|
22
|
+
alias_method :dv, :daru_vector
|
23
|
+
end
|
24
|
+
|
25
|
+
class NMatrix
|
26
|
+
def daru_vector name=nil
|
27
|
+
Daru::Vector.new self
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :dv, :daru_vector
|
31
|
+
end
|
32
|
+
|
33
|
+
class MDArray
|
34
|
+
def daru_vector name=nil
|
35
|
+
Daru::Vector.new self, name
|
36
|
+
end
|
37
|
+
|
38
|
+
alias_method :dv, :daru_vector
|
39
|
+
end
|
data/lib/daru/vector.rb
CHANGED
@@ -1,43 +1,3 @@
|
|
1
|
-
class Array
|
2
|
-
def daru_vector
|
3
|
-
Daru::Vector.new self
|
4
|
-
end
|
5
|
-
|
6
|
-
alias_method :dv, :daru_vector
|
7
|
-
end
|
8
|
-
|
9
|
-
class Range
|
10
|
-
def daru_vector
|
11
|
-
Daru::Vector.new self
|
12
|
-
end
|
13
|
-
|
14
|
-
alias_method :dv, :daru_vector
|
15
|
-
end
|
16
|
-
|
17
|
-
class Hash
|
18
|
-
def daru_vector
|
19
|
-
Daru::Vector.new self.values[0], self.keys[0]
|
20
|
-
end
|
21
|
-
|
22
|
-
alias_method :dv, :daru_vector
|
23
|
-
end
|
24
|
-
|
25
|
-
class NMatrix
|
26
|
-
def daru_vector
|
27
|
-
Daru::Vector.new self
|
28
|
-
end
|
29
|
-
|
30
|
-
alias_method :dv, :daru_vector
|
31
|
-
end
|
32
|
-
|
33
|
-
class MDArray
|
34
|
-
def daru_vector
|
35
|
-
Daru::Vector.new self
|
36
|
-
end
|
37
|
-
|
38
|
-
alias_method :dv, :daru_vector
|
39
|
-
end
|
40
|
-
|
41
1
|
module Daru
|
42
2
|
class Vector
|
43
3
|
include Enumerable
|
@@ -79,11 +39,13 @@ module Daru
|
|
79
39
|
end
|
80
40
|
|
81
41
|
def ==(other)
|
82
|
-
other.vector == @vector and other.name == @name
|
42
|
+
other.vector == @vector and other.name == @name and other.size == @size
|
83
43
|
end
|
84
44
|
|
85
45
|
def <<(element)
|
86
46
|
@vector << element
|
47
|
+
|
48
|
+
@size += 1
|
87
49
|
end
|
88
50
|
|
89
51
|
def to_json
|
@@ -104,6 +66,12 @@ module Daru
|
|
104
66
|
lim == 1 ? @vector.first : @vector.first(lim)
|
105
67
|
end
|
106
68
|
|
69
|
+
def delete index
|
70
|
+
@vector[index] = nil
|
71
|
+
@vector.compact!
|
72
|
+
@size -= 1
|
73
|
+
end
|
74
|
+
|
107
75
|
def to_html threshold=15
|
108
76
|
html = '<table><tr><th>' + @name.to_s + '</th></tr>>'
|
109
77
|
|
@@ -117,7 +85,17 @@ module Daru
|
|
117
85
|
end
|
118
86
|
|
119
87
|
def dup
|
120
|
-
Daru::Vector.new @vector.dup, @name
|
88
|
+
Daru::Vector.new @vector.dup, @name
|
89
|
+
end
|
90
|
+
|
91
|
+
def daru_vector
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
alias_method :dv, :daru_vector
|
96
|
+
|
97
|
+
def compact!
|
98
|
+
@vector.compact!
|
121
99
|
end
|
122
100
|
end
|
123
101
|
end
|
data/lib/version.rb
CHANGED
data/spec/mri/dataframe_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
3
|
describe Daru::DataFrame do
|
4
|
-
context "DataFrame
|
4
|
+
context "DataFrame of Array" do
|
5
5
|
|
6
6
|
before :each do
|
7
7
|
@df = Daru::DataFrame.new({a: Daru::Vector.new(1..3),
|
@@ -36,12 +36,12 @@ describe Daru::DataFrame do
|
|
36
36
|
it "returns a row" do
|
37
37
|
r = @df.row 0
|
38
38
|
|
39
|
-
expect(r).to eq(
|
39
|
+
expect(r).to eq({:b_bad=> "Jesse", :a=> 1, :b=> 50})
|
40
40
|
end
|
41
41
|
|
42
42
|
it "iterates over columns in the specified order" do
|
43
43
|
cols = []
|
44
|
-
df = @df.
|
44
|
+
df = @df.each_vector do |col|
|
45
45
|
expect(col.is_a?(Daru::Vector)).to be(true)
|
46
46
|
cols << col.name
|
47
47
|
end
|
@@ -56,6 +56,13 @@ describe Daru::DataFrame do
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
+
it "filters rows" do
|
60
|
+
res = @df.filter_rows(@df.name) { |row| row[:b] == 50 }
|
61
|
+
|
62
|
+
expect(res).to eq(Daru::DataFrame.new({a: [1].dv, b: [50].dv, b_bad: ['Jesse'].dv},
|
63
|
+
@df.fields, @df.name))
|
64
|
+
end
|
65
|
+
|
59
66
|
it "shows column fields" do
|
60
67
|
expect(@df.fields).to eq([:b_bad, :a, :b])
|
61
68
|
end
|
@@ -90,9 +97,26 @@ describe Daru::DataFrame do
|
|
90
97
|
|
91
98
|
expect(@df[:a, :b]).to eq(req)
|
92
99
|
end
|
100
|
+
|
101
|
+
it "creates DataFrame from Array" do
|
102
|
+
a_df = Daru::DataFrame.new({a: [1,2,3,4], b: [10,11,12,13]})
|
103
|
+
|
104
|
+
expect(a_df.a.is_a? Daru::Vector).to eq(true)
|
105
|
+
expect(a_df.a.vector).to eq([1,2,3,4])
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context "Malformed DataFrame from Array" do
|
110
|
+
it "adds extra nil vectors from fields" do
|
111
|
+
df = Daru::DataFrame.new({a: (1..4).dv, b: (50..53).dv}, [:b, :a, :jazzy, :joe])
|
112
|
+
|
113
|
+
expect(df.fields).to eq([:b, :a, :jazzy, :joe])
|
114
|
+
expect(df.jazzy).to eq(([nil]*4).dv(:jazzy))
|
115
|
+
expect(df.joe).to eq(([nil]*4).dv(:joe))
|
116
|
+
end
|
93
117
|
end
|
94
118
|
|
95
|
-
context "DataFrame
|
119
|
+
context "DataFrame from files" do
|
96
120
|
|
97
121
|
it "loads a DataFrame from CSV" do
|
98
122
|
df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: daru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2.
|
4
|
+
version: 0.0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- daru.gemspec
|
73
73
|
- lib/daru.rb
|
74
74
|
- lib/daru/dataframe.rb
|
75
|
+
- lib/daru/monkeys.rb
|
75
76
|
- lib/daru/vector.rb
|
76
77
|
- lib/version.rb
|
77
78
|
- spec/fixtures/matrix_test.csv
|