daru 0.0.3.1 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +16 -0
- data/README.md +83 -23
- data/daru.gemspec +7 -0
- data/lib/daru/accessors/array_wrapper.rb +248 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +252 -0
- data/lib/daru/dataframe.rb +171 -72
- data/lib/daru/index.rb +29 -5
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/{math → maths}/arithmetic/dataframe.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +75 -0
- data/lib/daru/{math → maths}/statistics/dataframe.rb +1 -1
- data/lib/daru/maths/statistics/vector.rb +147 -0
- data/lib/daru/monkeys.rb +16 -10
- data/lib/daru/plotting/dataframe.rb +47 -0
- data/lib/daru/plotting/vector.rb +41 -0
- data/lib/daru/vector.rb +166 -40
- data/lib/version.rb +1 -1
- data/notebooks/intro_with_music_data_.ipynb +318 -0
- data/spec/dataframe_spec.rb +528 -472
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/index_spec.rb +8 -0
- data/spec/io/io_spec.rb +1 -0
- data/spec/math/statistics/vector_spec.rb +144 -3
- data/spec/vector_spec.rb +165 -148
- metadata +32 -6
- data/lib/daru/math/arithmetic/vector.rb +0 -71
- data/lib/daru/math/statistics/vector.rb +0 -9
@@ -1,9 +1,261 @@
|
|
1
|
+
require 'nmatrix'
|
2
|
+
|
1
3
|
module Daru
|
2
4
|
module Accessors
|
3
5
|
|
4
6
|
# Internal class for wrapping NMatrix
|
5
7
|
class NMatrixWrapper
|
8
|
+
module Statistics
|
9
|
+
# def average_deviation_population m=nil
|
10
|
+
# m ||= self.mean
|
11
|
+
# (self.reduce(0){|memo, val| val + (val - m).abs})/self.length
|
12
|
+
# end
|
13
|
+
|
14
|
+
# def coefficient_of_variation
|
15
|
+
# self.standard_deviation_sample/self.mean
|
16
|
+
# end
|
17
|
+
|
18
|
+
# def count x=false
|
19
|
+
# if block_given?
|
20
|
+
# self.reduce(0){|memo, val| memo += 1 if yield val; memo}
|
21
|
+
# else
|
22
|
+
# val = self.frequencies[x]
|
23
|
+
# val.nil? ? 0 : val
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
|
27
|
+
# def factors
|
28
|
+
# index = @data.sorted_indices
|
29
|
+
# index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
|
30
|
+
# end
|
31
|
+
|
32
|
+
# def frequencies
|
33
|
+
# index = @data.sorted_indices
|
34
|
+
# index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
|
35
|
+
# end
|
36
|
+
|
37
|
+
# def has_missing_data?
|
38
|
+
# @missing_data
|
39
|
+
# end
|
40
|
+
|
41
|
+
# def is_valid?
|
42
|
+
# true
|
43
|
+
# end
|
44
|
+
|
45
|
+
# def kurtosis(m=nil)
|
46
|
+
# m ||= self.mean
|
47
|
+
# fo=self.reduce(0){|a, x| a+((x-m)**4)}
|
48
|
+
# fo.quo(self.length*sd(m)**4)-3
|
49
|
+
# end
|
50
|
+
|
51
|
+
# def mean
|
52
|
+
# @vector[0...@size].mean.first
|
53
|
+
# end
|
54
|
+
|
55
|
+
# def median
|
56
|
+
# self.percentil(50)
|
57
|
+
# end
|
58
|
+
|
59
|
+
# def median_absolute_deviation
|
60
|
+
# m = self.median
|
61
|
+
# self.recode{|val| (val-m).abls}.median
|
62
|
+
# end
|
63
|
+
|
64
|
+
# def mode
|
65
|
+
# self.frequencies.max
|
66
|
+
# end
|
67
|
+
|
68
|
+
# def ==(other)
|
69
|
+
# @data==other
|
70
|
+
# end
|
71
|
+
|
72
|
+
# def n_valid
|
73
|
+
# self.length
|
74
|
+
# end
|
75
|
+
|
76
|
+
# def percentil(percent)
|
77
|
+
# index = @data.sorted_indices
|
78
|
+
# pos = (self.length * percent)/100
|
79
|
+
# if pos.to_i == pos
|
80
|
+
# @data[index[pos.to_i]]
|
81
|
+
# else
|
82
|
+
# pos = (pos-0.5).to_i
|
83
|
+
# (@data[index[pos]] + @data[index[pos+1]])/2
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
|
87
|
+
# def product
|
88
|
+
# @data.inject(1){|memo, val| memo*val}
|
89
|
+
# end
|
90
|
+
|
91
|
+
# def proportion(val=1)
|
92
|
+
# self.frequencies[val]/self.n_valid
|
93
|
+
# end
|
94
|
+
|
95
|
+
# def proportion_confidence_interval_t
|
96
|
+
# raise "NotImplementedError"
|
97
|
+
# end
|
98
|
+
|
99
|
+
# def proportion_confidence_interval_z
|
100
|
+
# raise "NotImplementedError"
|
101
|
+
# end
|
102
|
+
|
103
|
+
# def proportions
|
104
|
+
# len = self.n_valid
|
105
|
+
# self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
|
106
|
+
# end
|
107
|
+
|
108
|
+
# def push(val)
|
109
|
+
# self.expand(self.length+1)
|
110
|
+
# self[self.length-1] = recode
|
111
|
+
# end
|
112
|
+
|
113
|
+
# def range
|
114
|
+
# max - min
|
115
|
+
# end
|
116
|
+
|
117
|
+
# def ranked
|
118
|
+
# sum = 0
|
119
|
+
# r = self.frequencies.sort.reduce({}) do |memo, val|
|
120
|
+
# memo[val[0]] = ((sum+1) + (sum+val[1]))/2
|
121
|
+
# sum += val[1]
|
122
|
+
# memo
|
123
|
+
# end
|
124
|
+
# Mikon::DArray.new(self.reduce{|val| r[val]})
|
125
|
+
# end
|
126
|
+
|
127
|
+
# def recode(&block)
|
128
|
+
# Mikon::DArray.new(@data.map(&block))
|
129
|
+
# end
|
130
|
+
|
131
|
+
# def recode!(&block)
|
132
|
+
# @data.map!(&block)
|
133
|
+
# end
|
134
|
+
|
135
|
+
# def skew(m=nil)
|
136
|
+
# m ||= self.mean
|
137
|
+
# th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
|
138
|
+
# th/((self.length)*self.sd(m)**3)
|
139
|
+
# end
|
140
|
+
|
141
|
+
# def standard_deviation_population(m=nil)
|
142
|
+
# m ||= self.mean
|
143
|
+
# Maths.sqrt(self.variance_population(m))
|
144
|
+
# end
|
145
|
+
|
146
|
+
# def standard_deviation_sample(m=nil)
|
147
|
+
# if !m.nil?
|
148
|
+
# Maths.sqrt(variance_sample(m))
|
149
|
+
# else
|
150
|
+
# @data.std.first
|
151
|
+
# end
|
152
|
+
# end
|
153
|
+
|
154
|
+
# def standard_error
|
155
|
+
# self.standard_deviation_sample/(Maths.sqrt(self.length))
|
156
|
+
# end
|
157
|
+
|
158
|
+
# def sum_of_squared_deviation
|
159
|
+
# self.reduce(0){|memo, val| val**2 + memo}
|
160
|
+
# end
|
161
|
+
|
162
|
+
# def sum_of_squares(m=nil)
|
163
|
+
# m ||= self.mean
|
164
|
+
# self.reduce(0){|memo, val| memo + (val-m)**2}
|
165
|
+
# end
|
166
|
+
|
167
|
+
# def sum
|
168
|
+
# @data.sum.first
|
169
|
+
# end
|
170
|
+
|
171
|
+
# def variance_sample(m=nil)
|
172
|
+
# m ||= self.mean
|
173
|
+
# self.sum_of_squares(m)/(self.length-1)
|
174
|
+
# end
|
175
|
+
end # module Statistics
|
176
|
+
|
177
|
+
include Statistics
|
178
|
+
include Enumerable
|
179
|
+
|
180
|
+
def each(&block)
|
181
|
+
@vector.each(&block)
|
182
|
+
end
|
183
|
+
|
184
|
+
attr_reader :size, :vector, :missing_data
|
185
|
+
|
186
|
+
def initialize vector, caller
|
187
|
+
@size = vector.size
|
188
|
+
@vector = NMatrix.new [@size*2], vector.to_a
|
189
|
+
@missing_data = false
|
190
|
+
@caller = caller
|
191
|
+
# init with twice the storage for reducing the need to resize
|
192
|
+
end
|
193
|
+
|
194
|
+
def [] index
|
195
|
+
@vector[index]
|
196
|
+
end
|
197
|
+
|
198
|
+
def []= index, value
|
199
|
+
resize if index >= @size
|
200
|
+
|
201
|
+
if value.nil?
|
202
|
+
@missing_data = true
|
203
|
+
@vector = @vector.cast(dtype: :object)
|
204
|
+
end
|
205
|
+
@vector[index] = value
|
206
|
+
end
|
207
|
+
|
208
|
+
def == other
|
209
|
+
@vector == other and @size == other.size
|
210
|
+
end
|
211
|
+
|
212
|
+
def delete_at index
|
213
|
+
arry = @vector.to_a
|
214
|
+
arry.delete_at index
|
215
|
+
@vector = NMatrix.new [@size-1], arry
|
216
|
+
@size -= 1
|
217
|
+
end
|
218
|
+
|
219
|
+
def index key
|
220
|
+
@vector.to_a.index key
|
221
|
+
end
|
222
|
+
|
223
|
+
def << element
|
224
|
+
if @size >= @vector.size
|
225
|
+
resize
|
226
|
+
end
|
227
|
+
|
228
|
+
self[@size] = element
|
229
|
+
|
230
|
+
@size += 1
|
231
|
+
end
|
232
|
+
|
233
|
+
def to_a
|
234
|
+
@vector.to_a
|
235
|
+
end
|
236
|
+
|
237
|
+
def dup
|
238
|
+
NMatrixWrapper.new @vector.to_a
|
239
|
+
end
|
240
|
+
|
241
|
+
def coerce dtype
|
242
|
+
case
|
243
|
+
when dtype == Array
|
244
|
+
Daru::Accessors::ArrayWrapper.new @vector[0..(@size-1)].to_a, @caller
|
245
|
+
when dtype == NMatrix
|
246
|
+
self
|
247
|
+
when dtype == MDArray
|
248
|
+
raise NotImplementedError
|
249
|
+
else
|
250
|
+
raise ArgumentError, "Cant coerce to dtype #{dtype}"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def resize size = @size*2
|
255
|
+
raise "Size must be greater than current size" if size < @size
|
6
256
|
|
257
|
+
@vector = NMatrix.new [size], @vector.to_a
|
258
|
+
end
|
7
259
|
end
|
8
260
|
end
|
9
261
|
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,40 +1,94 @@
|
|
1
1
|
require_relative 'accessors/dataframe_by_row.rb'
|
2
2
|
require_relative 'accessors/dataframe_by_vector.rb'
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
3
|
+
require_relative 'maths/arithmetic/dataframe.rb'
|
4
|
+
require_relative 'maths/statistics/dataframe.rb'
|
5
|
+
require_relative 'plotting/dataframe.rb'
|
5
6
|
require_relative 'io/io.rb'
|
6
7
|
|
7
8
|
module Daru
|
8
9
|
class DataFrame
|
9
10
|
|
10
|
-
include Daru::
|
11
|
-
include Daru::
|
11
|
+
include Daru::Maths::Arithmetic::DataFrame
|
12
|
+
include Daru::Maths::Statistics::DataFrame
|
13
|
+
include Daru::Plotting::DataFrame
|
12
14
|
|
13
15
|
class << self
|
16
|
+
# Load data from a CSV file.
|
17
|
+
# Arguments - path, options, block(optional)
|
18
|
+
#
|
19
|
+
# Accepts a block for pre-conditioning of CSV data if any.
|
14
20
|
def from_csv path, opts={}, &block
|
15
21
|
Daru::IO.from_csv path, opts, &block
|
16
22
|
end
|
23
|
+
|
24
|
+
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
25
|
+
# Daru::Vector objects.
|
26
|
+
def rows source, opts={}
|
27
|
+
if source.all? { |v| v.size == source[0].size }
|
28
|
+
first = source[0]
|
29
|
+
index = []
|
30
|
+
order =
|
31
|
+
unless opts[:order]
|
32
|
+
if first.is_a?(Daru::Vector) # assume that all are Vectors only
|
33
|
+
source.each { |vec| index << vec.name }
|
34
|
+
first.index.to_a
|
35
|
+
elsif first.is_a?(Array)
|
36
|
+
Array.new(first.size) { |i| i.to_s }
|
37
|
+
end
|
38
|
+
else
|
39
|
+
opts[:order]
|
40
|
+
end
|
41
|
+
|
42
|
+
opts[:order] = order
|
43
|
+
df = Daru::DataFrame.new({}, opts)
|
44
|
+
source.each_with_index do |row,idx|
|
45
|
+
df[(index[idx] || idx), :row] = row
|
46
|
+
end
|
47
|
+
else
|
48
|
+
raise SizeError, "All vectors must have same length"
|
49
|
+
end
|
50
|
+
|
51
|
+
df
|
52
|
+
end
|
17
53
|
end
|
18
54
|
|
55
|
+
# The vectors (columns) index of the DataFrame
|
19
56
|
attr_reader :vectors
|
57
|
+
|
58
|
+
# The index of the rows of the DataFrame
|
20
59
|
attr_reader :index
|
60
|
+
|
61
|
+
# The name of the DataFrame
|
21
62
|
attr_reader :name
|
63
|
+
|
64
|
+
# The number of rows present in the DataFrame
|
22
65
|
attr_reader :size
|
23
66
|
|
24
67
|
# DataFrame basically consists of an Array of Vector objects.
|
25
68
|
# These objects are indexed by row and column by vectors and index Index objects.
|
26
|
-
# Arguments - source, vectors, index, name
|
69
|
+
# Arguments - source, vectors, index, name.
|
70
|
+
#
|
71
|
+
# == Usage
|
72
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
73
|
+
# index: [:a, :b, :c, :d], name: :spider_man)
|
74
|
+
#
|
75
|
+
# # =>
|
76
|
+
# # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
|
77
|
+
# # b a
|
78
|
+
# # a 6 1
|
79
|
+
# # b 7 2
|
80
|
+
# # c 8 3
|
81
|
+
# # d 9 4
|
27
82
|
def initialize source, opts={}
|
28
|
-
vectors = opts[:
|
83
|
+
vectors = opts[:order]
|
29
84
|
index = opts[:index]
|
85
|
+
@dtype = opts[:dtype] || Array
|
30
86
|
@name = (opts[:name] || SecureRandom.uuid).to_sym
|
31
|
-
|
32
|
-
@data = []
|
87
|
+
@data = []
|
33
88
|
|
34
89
|
if source.empty?
|
35
90
|
@vectors = Daru::Index.new vectors
|
36
91
|
@index = Daru::Index.new index
|
37
|
-
|
38
92
|
create_empty_vectors
|
39
93
|
else
|
40
94
|
case source
|
@@ -53,25 +107,21 @@ module Daru
|
|
53
107
|
|
54
108
|
@vectors.each do |name|
|
55
109
|
v = []
|
56
|
-
|
57
110
|
source.each do |hsh|
|
58
111
|
v << (hsh[name] || hsh[name.to_s])
|
59
112
|
end
|
60
113
|
|
61
|
-
@data << v.dv(name, @index)
|
114
|
+
@data << v.dv(name, @index, @dtype)
|
62
115
|
end
|
63
116
|
when Hash
|
64
117
|
create_vectors_index_with vectors, source
|
65
|
-
|
66
118
|
if all_daru_vectors_in_source? source
|
67
|
-
|
68
119
|
if !index.nil?
|
69
120
|
@index = index.to_index
|
70
121
|
elsif all_vectors_have_equal_indexes? source
|
71
122
|
@index = source.values[0].index.dup
|
72
123
|
else
|
73
124
|
all_indexes = []
|
74
|
-
|
75
125
|
source.each_value do |vector|
|
76
126
|
all_indexes << vector.index.to_a
|
77
127
|
end
|
@@ -80,9 +130,8 @@ module Daru
|
|
80
130
|
|
81
131
|
@index = Daru::Index.new all_indexes
|
82
132
|
end
|
83
|
-
|
84
133
|
@vectors.each do |vector|
|
85
|
-
@data << Daru::Vector.new([], name: vector, index: @index)
|
134
|
+
@data << Daru::Vector.new([], name: vector, index: @index, dtype: @dtype)
|
86
135
|
|
87
136
|
@index.each do |idx|
|
88
137
|
begin
|
@@ -97,7 +146,6 @@ module Daru
|
|
97
146
|
end
|
98
147
|
else
|
99
148
|
index = source.values[0].size if index.nil?
|
100
|
-
|
101
149
|
if index.is_a?(Daru::Index)
|
102
150
|
@index = index.to_index
|
103
151
|
else
|
@@ -105,10 +153,9 @@ module Daru
|
|
105
153
|
end
|
106
154
|
|
107
155
|
@vectors.each do |name|
|
108
|
-
@data << source[name].dup.dv(name, @index)
|
156
|
+
@data << source[name].dup.dv(name, @index, @dtype)
|
109
157
|
end
|
110
158
|
end
|
111
|
-
|
112
159
|
end
|
113
160
|
end
|
114
161
|
|
@@ -116,6 +163,10 @@ module Daru
|
|
116
163
|
validate
|
117
164
|
end
|
118
165
|
|
166
|
+
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
167
|
+
# Use of this method is not recommended for accessing rows or vectors.
|
168
|
+
# Use df.row[:a] for accessing row with index ':a' or df.vector[:vec] for
|
169
|
+
# accessing vector with index ':vec'
|
119
170
|
def [](*names, axis)
|
120
171
|
if axis == :vector
|
121
172
|
access_vector *names
|
@@ -126,6 +177,13 @@ module Daru
|
|
126
177
|
end
|
127
178
|
end
|
128
179
|
|
180
|
+
# Insert a new row/vector of the specified name or modify a previous row.
|
181
|
+
# Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
|
182
|
+
# a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
|
183
|
+
#
|
184
|
+
# In case a Daru::Vector is specified after the equality the sign, the indexes
|
185
|
+
# of the vector will be matched against the row/vector indexes of the DataFrame
|
186
|
+
# before an insertion is performed. Unmatched indexes will be set to nil.
|
129
187
|
def []=(name, axis ,vector)
|
130
188
|
if axis == :vector
|
131
189
|
insert_or_modify_vector name, vector
|
@@ -136,29 +194,42 @@ module Daru
|
|
136
194
|
end
|
137
195
|
end
|
138
196
|
|
197
|
+
# Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
|
198
|
+
#
|
199
|
+
# == Usage
|
200
|
+
# df.vector[:a] # access vector named ':a'
|
201
|
+
# df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
|
139
202
|
def vector
|
140
203
|
Daru::Accessors::DataFrameByVector.new(self)
|
141
204
|
end
|
142
205
|
|
206
|
+
# Access a row or set/create a row. Refer #[] and #[]= docs for details.
|
207
|
+
#
|
208
|
+
# == Usage
|
209
|
+
# df.row[:a] # access row named ':a'
|
210
|
+
# df.row[:b] = [1,2,3] # set row ':b' to [1,2,3]
|
143
211
|
def row
|
144
212
|
Daru::Accessors::DataFrameByRow.new(self)
|
145
213
|
end
|
146
214
|
|
215
|
+
# Duplicate the DataFrame entirely.
|
147
216
|
def dup
|
148
217
|
src = {}
|
149
218
|
@vectors.each do |vector|
|
150
|
-
src[vector] = @data[@vectors[vector]]
|
219
|
+
src[vector] = @data[@vectors[vector]].dup
|
151
220
|
end
|
152
221
|
|
153
|
-
Daru::DataFrame.new src,
|
222
|
+
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name, dtype: @dtype
|
154
223
|
end
|
155
224
|
|
225
|
+
# Iterate over each vector
|
156
226
|
def each_vector(&block)
|
157
227
|
@data.each(&block)
|
158
228
|
|
159
229
|
self
|
160
230
|
end
|
161
231
|
|
232
|
+
# Iterate over each vector alongwith the name of the vector
|
162
233
|
def each_vector_with_index(&block)
|
163
234
|
@vectors.each do |vector|
|
164
235
|
yield @data[@vectors[vector]], vector
|
@@ -167,6 +238,7 @@ module Daru
|
|
167
238
|
self
|
168
239
|
end
|
169
240
|
|
241
|
+
# Iterate over each row
|
170
242
|
def each_row(&block)
|
171
243
|
@index.each do |index|
|
172
244
|
yield access_row(index)
|
@@ -183,9 +255,10 @@ module Daru
|
|
183
255
|
self
|
184
256
|
end
|
185
257
|
|
258
|
+
# Map each vector. Returns a DataFrame whose vectors are modified according
|
259
|
+
# to the value returned by the block.
|
186
260
|
def map_vectors(&block)
|
187
261
|
df = self.dup
|
188
|
-
|
189
262
|
df.each_vector_with_index do |vector, name|
|
190
263
|
df[name, :vector] = yield(vector)
|
191
264
|
end
|
@@ -195,7 +268,6 @@ module Daru
|
|
195
268
|
|
196
269
|
def map_vectors_with_index(&block)
|
197
270
|
df = self.dup
|
198
|
-
|
199
271
|
df.each_vector_with_index do |vector, name|
|
200
272
|
df[name, :vector] = yield(vector, name)
|
201
273
|
end
|
@@ -203,9 +275,9 @@ module Daru
|
|
203
275
|
df
|
204
276
|
end
|
205
277
|
|
278
|
+
# Map each row
|
206
279
|
def map_rows(&block)
|
207
280
|
df = self.dup
|
208
|
-
|
209
281
|
df.each_row_with_index do |row, index|
|
210
282
|
df[index, :row] = yield(row)
|
211
283
|
end
|
@@ -215,7 +287,6 @@ module Daru
|
|
215
287
|
|
216
288
|
def map_rows_with_index(&block)
|
217
289
|
df = self.dup
|
218
|
-
|
219
290
|
df.each_row_with_index do |row, index|
|
220
291
|
df[index, :row] = yield(row, index)
|
221
292
|
end
|
@@ -223,6 +294,7 @@ module Daru
|
|
223
294
|
df
|
224
295
|
end
|
225
296
|
|
297
|
+
# Delete a vector
|
226
298
|
def delete_vector vector
|
227
299
|
if @vectors.include? vector
|
228
300
|
@data.delete_at @vectors[vector]
|
@@ -237,7 +309,6 @@ module Daru
|
|
237
309
|
|
238
310
|
if @index.include? idx
|
239
311
|
@index = (@index.to_a - [idx]).to_index
|
240
|
-
|
241
312
|
self.each_vector do |vector|
|
242
313
|
vector.delete_at idx
|
243
314
|
end
|
@@ -256,7 +327,6 @@ module Daru
|
|
256
327
|
|
257
328
|
deletion << index unless keep_row
|
258
329
|
end
|
259
|
-
|
260
330
|
deletion.each { |idx|
|
261
331
|
delete_row idx
|
262
332
|
}
|
@@ -270,13 +340,14 @@ module Daru
|
|
270
340
|
end
|
271
341
|
end
|
272
342
|
|
343
|
+
# Iterates over each row and retains it in a new DataFrame if the block returns
|
344
|
+
# true for that row.
|
273
345
|
def filter_rows &block
|
274
|
-
df = Daru::DataFrame.new({},
|
346
|
+
df = Daru::DataFrame.new({}, order: @vectors.to_a)
|
275
347
|
marked = []
|
276
348
|
|
277
349
|
@index.each do |index|
|
278
350
|
keep_row = yield access_row(index)
|
279
|
-
|
280
351
|
marked << index if keep_row
|
281
352
|
end
|
282
353
|
|
@@ -287,18 +358,36 @@ module Daru
|
|
287
358
|
df
|
288
359
|
end
|
289
360
|
|
361
|
+
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
362
|
+
# true for that vector.
|
290
363
|
def filter_vectors &block
|
291
364
|
df = self.dup
|
292
|
-
|
293
365
|
df.keep_vector_if &block
|
294
366
|
|
295
367
|
df
|
296
368
|
end
|
297
369
|
|
370
|
+
# Check if a vector is present
|
298
371
|
def has_vector? name
|
299
372
|
!!@vectors[name]
|
300
373
|
end
|
301
374
|
|
375
|
+
def head quantity=10
|
376
|
+
self[0..quantity, :row]
|
377
|
+
end
|
378
|
+
|
379
|
+
def tail quantity=10
|
380
|
+
self[(@size - quantity)..@size, :row]
|
381
|
+
end
|
382
|
+
|
383
|
+
# def sort_by_row name
|
384
|
+
|
385
|
+
# end
|
386
|
+
|
387
|
+
# def sort_by_vector name
|
388
|
+
|
389
|
+
# end
|
390
|
+
|
302
391
|
# Converts the DataFrame into an array of hashes where key is vector name
|
303
392
|
# and value is the corresponding element.
|
304
393
|
# The 0th index of the array contains the array of hashes while the 1th
|
@@ -307,11 +396,9 @@ module Daru
|
|
307
396
|
# the same index.
|
308
397
|
def to_a
|
309
398
|
arry = [[],[]]
|
310
|
-
|
311
399
|
self.each_row do |row|
|
312
400
|
arry[0] << row.to_hash
|
313
401
|
end
|
314
|
-
|
315
402
|
arry[1] = @index.to_a
|
316
403
|
|
317
404
|
arry
|
@@ -325,11 +412,10 @@ module Daru
|
|
325
412
|
end
|
326
413
|
end
|
327
414
|
|
328
|
-
|
329
|
-
|
330
|
-
|
415
|
+
# Convert to html for IRuby.
|
416
|
+
def to_html threshold=30
|
417
|
+
html = '<table><tr><th></th>'
|
331
418
|
@vectors.each { |vector| html += '<th>' + vector.to_s + '</th>' }
|
332
|
-
|
333
419
|
html += '</tr>'
|
334
420
|
|
335
421
|
@index.each_with_index do |index, num|
|
@@ -339,8 +425,8 @@ module Daru
|
|
339
425
|
self.row[index].each do |element|
|
340
426
|
html += '<td>' + element.to_s + '</td>'
|
341
427
|
end
|
342
|
-
html += '</tr>'
|
343
428
|
|
429
|
+
html += '</tr>'
|
344
430
|
if num > threshold
|
345
431
|
html += '<tr>'
|
346
432
|
(@vectors + 1).size.times { html += '<td>...</td>' }
|
@@ -348,7 +434,6 @@ module Daru
|
|
348
434
|
break
|
349
435
|
end
|
350
436
|
end
|
351
|
-
|
352
437
|
html += '</table>'
|
353
438
|
|
354
439
|
html
|
@@ -358,6 +443,7 @@ module Daru
|
|
358
443
|
to_html
|
359
444
|
end
|
360
445
|
|
446
|
+
# Pretty print in a nice table format for the command line (irb)
|
361
447
|
def inspect spacing=10, threshold=15
|
362
448
|
longest = [@name.to_s.size,
|
363
449
|
@vectors.map(&:to_s).map(&:size).max,
|
@@ -370,32 +456,36 @@ module Daru
|
|
370
456
|
formatter = "\n"
|
371
457
|
|
372
458
|
(@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
|
373
|
-
|
374
459
|
content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
|
375
460
|
name.to_s + " @size = " + @size.to_s + ">"
|
376
|
-
|
377
461
|
content += sprintf formatter, "" , *@vectors.map(&:to_s)
|
378
|
-
|
379
|
-
row_num = 1
|
462
|
+
row_num = 1
|
380
463
|
|
381
464
|
self.each_row_with_index do |row, index|
|
382
465
|
content += sprintf formatter, index.to_s, *row.to_hash.values.map { |e| (e || 'nil').to_s }
|
383
|
-
|
384
466
|
row_num += 1
|
385
467
|
if row_num > threshold
|
386
468
|
dots = []
|
387
469
|
|
388
470
|
(@vectors.size + 1).times { dots << "..." }
|
389
|
-
content +=
|
471
|
+
content += sprintf formatter, *dots
|
390
472
|
break
|
391
473
|
end
|
392
474
|
end
|
393
|
-
|
394
475
|
content += "\n"
|
395
476
|
|
396
477
|
content
|
397
478
|
end
|
398
479
|
|
480
|
+
def dtype= dtype
|
481
|
+
@dtype = dtype
|
482
|
+
|
483
|
+
@vectors.each do |vec|
|
484
|
+
pos = @vectors[vec]
|
485
|
+
@data[pos] = @data[pos].coerce(@dtype)
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
399
489
|
def == other
|
400
490
|
@index == other.index and @size == other.size and @vectors.all? { |vector|
|
401
491
|
self[vector, :vector] == other[vector, :vector] }
|
@@ -407,7 +497,7 @@ module Daru
|
|
407
497
|
elsif self.has_vector? name
|
408
498
|
self[name, :vector]
|
409
499
|
else
|
410
|
-
super(name, *args)
|
500
|
+
super(name, *args, &block)
|
411
501
|
end
|
412
502
|
end
|
413
503
|
|
@@ -423,7 +513,6 @@ module Daru
|
|
423
513
|
raise IndexError, "Specified index #{names[0]} does not exist."
|
424
514
|
end
|
425
515
|
end
|
426
|
-
|
427
516
|
new_vcs = {}
|
428
517
|
|
429
518
|
names.each do |name|
|
@@ -431,42 +520,52 @@ module Daru
|
|
431
520
|
|
432
521
|
new_vcs[name] = @data[@vectors[name]]
|
433
522
|
end
|
434
|
-
|
435
|
-
Daru::DataFrame.new new_vcs, vectors: new_vcs.keys, index: @index, name: @name
|
523
|
+
Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
|
436
524
|
end
|
437
525
|
|
438
526
|
def access_row *names
|
439
|
-
|
440
|
-
|
527
|
+
if names[1].nil?
|
528
|
+
access_token = names[0]
|
529
|
+
if access_token.is_a?(Range)
|
530
|
+
index_arry = @index.to_a
|
531
|
+
|
532
|
+
range =
|
533
|
+
if access_token.first.is_a?(Numeric)
|
534
|
+
access_token
|
535
|
+
else
|
536
|
+
first_index = index_arry.index access_token.first
|
537
|
+
last_index = index_arry.index access_token.last
|
441
538
|
|
442
|
-
|
539
|
+
first_index..last_index
|
540
|
+
end
|
443
541
|
|
444
|
-
|
445
|
-
name = names[0]
|
446
|
-
elsif @index.key names[0]
|
447
|
-
name = @index.key names[0]
|
542
|
+
names = index_arry[range]
|
448
543
|
else
|
449
|
-
|
450
|
-
|
544
|
+
row = []
|
545
|
+
name = named_index_for names[0]
|
546
|
+
@vectors.each do |vector|
|
547
|
+
row << @data[@vectors[vector]][name]
|
548
|
+
end
|
451
549
|
|
452
|
-
|
453
|
-
row << @data[@vectors[vector]][name]
|
550
|
+
return Daru::Vector.new(row, index: @vectors, name: name, dtype: @dtype)
|
454
551
|
end
|
455
|
-
|
456
|
-
Daru::Vector.new row, index: @vectors, name: name
|
457
|
-
else
|
458
|
-
# TODO: Access multiple rows
|
459
552
|
end
|
553
|
+
# Access multiple rows
|
554
|
+
rows = []
|
555
|
+
names.each do |name|
|
556
|
+
rows << self.row[name]
|
557
|
+
end
|
558
|
+
|
559
|
+
Daru::DataFrame.rows rows, name: @name, dtype: @dtype
|
460
560
|
end
|
461
561
|
|
462
562
|
def insert_or_modify_vector name, vector
|
463
563
|
@vectors = @vectors.re_index(@vectors + name)
|
464
|
-
|
465
|
-
v = nil
|
564
|
+
v = nil
|
466
565
|
|
467
566
|
if vector.is_a?(Daru::Vector)
|
468
|
-
v = Daru::Vector.new [], name: name, index: @index
|
469
|
-
|
567
|
+
v = Daru::Vector.new [], name: name, index: @index, dtype: @dtype
|
568
|
+
nil_data = false
|
470
569
|
@index.each do |idx|
|
471
570
|
begin
|
472
571
|
v[idx] = vector[idx]
|
@@ -478,7 +577,7 @@ module Daru
|
|
478
577
|
raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
479
578
|
@size != vector.size
|
480
579
|
|
481
|
-
v = vector.dv(name, @index)
|
580
|
+
v = vector.dv(name, @index, @dtype)
|
482
581
|
end
|
483
582
|
|
484
583
|
@data[@vectors[name]] = v
|
@@ -486,7 +585,7 @@ module Daru
|
|
486
585
|
|
487
586
|
def insert_or_modify_row name, vector
|
488
587
|
if @index.include? name
|
489
|
-
v = vector.dv(name, @vectors)
|
588
|
+
v = vector.dv(name, @vectors, @dtype)
|
490
589
|
|
491
590
|
@vectors.each do |vector|
|
492
591
|
begin
|
@@ -497,7 +596,7 @@ module Daru
|
|
497
596
|
end
|
498
597
|
else
|
499
598
|
@index = @index.re_index(@index + name)
|
500
|
-
v = vector.dv(name, @vectors)
|
599
|
+
v = vector.dv(name, @vectors, @dtype)
|
501
600
|
|
502
601
|
@vectors.each do |vector|
|
503
602
|
begin
|
@@ -513,7 +612,7 @@ module Daru
|
|
513
612
|
|
514
613
|
def create_empty_vectors
|
515
614
|
@vectors.each do |name|
|
516
|
-
@data << Daru::Vector.new([],name: name, index: @index)
|
615
|
+
@data << Daru::Vector.new([],name: name, index: @index, dtype: @dtype)
|
517
616
|
end
|
518
617
|
end
|
519
618
|
|