daru 0.0.3.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +16 -0
- data/README.md +83 -23
- data/daru.gemspec +7 -0
- data/lib/daru/accessors/array_wrapper.rb +248 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +252 -0
- data/lib/daru/dataframe.rb +171 -72
- data/lib/daru/index.rb +29 -5
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/{math → maths}/arithmetic/dataframe.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +75 -0
- data/lib/daru/{math → maths}/statistics/dataframe.rb +1 -1
- data/lib/daru/maths/statistics/vector.rb +147 -0
- data/lib/daru/monkeys.rb +16 -10
- data/lib/daru/plotting/dataframe.rb +47 -0
- data/lib/daru/plotting/vector.rb +41 -0
- data/lib/daru/vector.rb +166 -40
- data/lib/version.rb +1 -1
- data/notebooks/intro_with_music_data_.ipynb +318 -0
- data/spec/dataframe_spec.rb +528 -472
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/index_spec.rb +8 -0
- data/spec/io/io_spec.rb +1 -0
- data/spec/math/statistics/vector_spec.rb +144 -3
- data/spec/vector_spec.rb +165 -148
- metadata +32 -6
- data/lib/daru/math/arithmetic/vector.rb +0 -71
- data/lib/daru/math/statistics/vector.rb +0 -9
@@ -1,9 +1,261 @@
|
|
1
|
+
require 'nmatrix'
|
2
|
+
|
1
3
|
module Daru
|
2
4
|
module Accessors
|
3
5
|
|
4
6
|
# Internal class for wrapping NMatrix
|
5
7
|
class NMatrixWrapper
|
8
|
+
module Statistics
|
9
|
+
# def average_deviation_population m=nil
|
10
|
+
# m ||= self.mean
|
11
|
+
# (self.reduce(0){|memo, val| val + (val - m).abs})/self.length
|
12
|
+
# end
|
13
|
+
|
14
|
+
# def coefficient_of_variation
|
15
|
+
# self.standard_deviation_sample/self.mean
|
16
|
+
# end
|
17
|
+
|
18
|
+
# def count x=false
|
19
|
+
# if block_given?
|
20
|
+
# self.reduce(0){|memo, val| memo += 1 if yield val; memo}
|
21
|
+
# else
|
22
|
+
# val = self.frequencies[x]
|
23
|
+
# val.nil? ? 0 : val
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
|
27
|
+
# def factors
|
28
|
+
# index = @data.sorted_indices
|
29
|
+
# index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
|
30
|
+
# end
|
31
|
+
|
32
|
+
# def frequencies
|
33
|
+
# index = @data.sorted_indices
|
34
|
+
# index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
|
35
|
+
# end
|
36
|
+
|
37
|
+
# def has_missing_data?
|
38
|
+
# @missing_data
|
39
|
+
# end
|
40
|
+
|
41
|
+
# def is_valid?
|
42
|
+
# true
|
43
|
+
# end
|
44
|
+
|
45
|
+
# def kurtosis(m=nil)
|
46
|
+
# m ||= self.mean
|
47
|
+
# fo=self.reduce(0){|a, x| a+((x-m)**4)}
|
48
|
+
# fo.quo(self.length*sd(m)**4)-3
|
49
|
+
# end
|
50
|
+
|
51
|
+
# def mean
|
52
|
+
# @vector[0...@size].mean.first
|
53
|
+
# end
|
54
|
+
|
55
|
+
# def median
|
56
|
+
# self.percentil(50)
|
57
|
+
# end
|
58
|
+
|
59
|
+
# def median_absolute_deviation
|
60
|
+
# m = self.median
|
61
|
+
# self.recode{|val| (val-m).abls}.median
|
62
|
+
# end
|
63
|
+
|
64
|
+
# def mode
|
65
|
+
# self.frequencies.max
|
66
|
+
# end
|
67
|
+
|
68
|
+
# def ==(other)
|
69
|
+
# @data==other
|
70
|
+
# end
|
71
|
+
|
72
|
+
# def n_valid
|
73
|
+
# self.length
|
74
|
+
# end
|
75
|
+
|
76
|
+
# def percentil(percent)
|
77
|
+
# index = @data.sorted_indices
|
78
|
+
# pos = (self.length * percent)/100
|
79
|
+
# if pos.to_i == pos
|
80
|
+
# @data[index[pos.to_i]]
|
81
|
+
# else
|
82
|
+
# pos = (pos-0.5).to_i
|
83
|
+
# (@data[index[pos]] + @data[index[pos+1]])/2
|
84
|
+
# end
|
85
|
+
# end
|
86
|
+
|
87
|
+
# def product
|
88
|
+
# @data.inject(1){|memo, val| memo*val}
|
89
|
+
# end
|
90
|
+
|
91
|
+
# def proportion(val=1)
|
92
|
+
# self.frequencies[val]/self.n_valid
|
93
|
+
# end
|
94
|
+
|
95
|
+
# def proportion_confidence_interval_t
|
96
|
+
# raise "NotImplementedError"
|
97
|
+
# end
|
98
|
+
|
99
|
+
# def proportion_confidence_interval_z
|
100
|
+
# raise "NotImplementedError"
|
101
|
+
# end
|
102
|
+
|
103
|
+
# def proportions
|
104
|
+
# len = self.n_valid
|
105
|
+
# self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
|
106
|
+
# end
|
107
|
+
|
108
|
+
# def push(val)
|
109
|
+
# self.expand(self.length+1)
|
110
|
+
# self[self.length-1] = recode
|
111
|
+
# end
|
112
|
+
|
113
|
+
# def range
|
114
|
+
# max - min
|
115
|
+
# end
|
116
|
+
|
117
|
+
# def ranked
|
118
|
+
# sum = 0
|
119
|
+
# r = self.frequencies.sort.reduce({}) do |memo, val|
|
120
|
+
# memo[val[0]] = ((sum+1) + (sum+val[1]))/2
|
121
|
+
# sum += val[1]
|
122
|
+
# memo
|
123
|
+
# end
|
124
|
+
# Mikon::DArray.new(self.reduce{|val| r[val]})
|
125
|
+
# end
|
126
|
+
|
127
|
+
# def recode(&block)
|
128
|
+
# Mikon::DArray.new(@data.map(&block))
|
129
|
+
# end
|
130
|
+
|
131
|
+
# def recode!(&block)
|
132
|
+
# @data.map!(&block)
|
133
|
+
# end
|
134
|
+
|
135
|
+
# def skew(m=nil)
|
136
|
+
# m ||= self.mean
|
137
|
+
# th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
|
138
|
+
# th/((self.length)*self.sd(m)**3)
|
139
|
+
# end
|
140
|
+
|
141
|
+
# def standard_deviation_population(m=nil)
|
142
|
+
# m ||= self.mean
|
143
|
+
# Maths.sqrt(self.variance_population(m))
|
144
|
+
# end
|
145
|
+
|
146
|
+
# def standard_deviation_sample(m=nil)
|
147
|
+
# if !m.nil?
|
148
|
+
# Maths.sqrt(variance_sample(m))
|
149
|
+
# else
|
150
|
+
# @data.std.first
|
151
|
+
# end
|
152
|
+
# end
|
153
|
+
|
154
|
+
# def standard_error
|
155
|
+
# self.standard_deviation_sample/(Maths.sqrt(self.length))
|
156
|
+
# end
|
157
|
+
|
158
|
+
# def sum_of_squared_deviation
|
159
|
+
# self.reduce(0){|memo, val| val**2 + memo}
|
160
|
+
# end
|
161
|
+
|
162
|
+
# def sum_of_squares(m=nil)
|
163
|
+
# m ||= self.mean
|
164
|
+
# self.reduce(0){|memo, val| memo + (val-m)**2}
|
165
|
+
# end
|
166
|
+
|
167
|
+
# def sum
|
168
|
+
# @data.sum.first
|
169
|
+
# end
|
170
|
+
|
171
|
+
# def variance_sample(m=nil)
|
172
|
+
# m ||= self.mean
|
173
|
+
# self.sum_of_squares(m)/(self.length-1)
|
174
|
+
# end
|
175
|
+
end # module Statistics
|
176
|
+
|
177
|
+
include Statistics
|
178
|
+
include Enumerable
|
179
|
+
|
180
|
+
def each(&block)
|
181
|
+
@vector.each(&block)
|
182
|
+
end
|
183
|
+
|
184
|
+
attr_reader :size, :vector, :missing_data
|
185
|
+
|
186
|
+
def initialize vector, caller
|
187
|
+
@size = vector.size
|
188
|
+
@vector = NMatrix.new [@size*2], vector.to_a
|
189
|
+
@missing_data = false
|
190
|
+
@caller = caller
|
191
|
+
# init with twice the storage for reducing the need to resize
|
192
|
+
end
|
193
|
+
|
194
|
+
def [] index
|
195
|
+
@vector[index]
|
196
|
+
end
|
197
|
+
|
198
|
+
def []= index, value
|
199
|
+
resize if index >= @size
|
200
|
+
|
201
|
+
if value.nil?
|
202
|
+
@missing_data = true
|
203
|
+
@vector = @vector.cast(dtype: :object)
|
204
|
+
end
|
205
|
+
@vector[index] = value
|
206
|
+
end
|
207
|
+
|
208
|
+
def == other
|
209
|
+
@vector == other and @size == other.size
|
210
|
+
end
|
211
|
+
|
212
|
+
def delete_at index
|
213
|
+
arry = @vector.to_a
|
214
|
+
arry.delete_at index
|
215
|
+
@vector = NMatrix.new [@size-1], arry
|
216
|
+
@size -= 1
|
217
|
+
end
|
218
|
+
|
219
|
+
def index key
|
220
|
+
@vector.to_a.index key
|
221
|
+
end
|
222
|
+
|
223
|
+
def << element
|
224
|
+
if @size >= @vector.size
|
225
|
+
resize
|
226
|
+
end
|
227
|
+
|
228
|
+
self[@size] = element
|
229
|
+
|
230
|
+
@size += 1
|
231
|
+
end
|
232
|
+
|
233
|
+
def to_a
|
234
|
+
@vector.to_a
|
235
|
+
end
|
236
|
+
|
237
|
+
def dup
|
238
|
+
NMatrixWrapper.new @vector.to_a
|
239
|
+
end
|
240
|
+
|
241
|
+
def coerce dtype
|
242
|
+
case
|
243
|
+
when dtype == Array
|
244
|
+
Daru::Accessors::ArrayWrapper.new @vector[0..(@size-1)].to_a, @caller
|
245
|
+
when dtype == NMatrix
|
246
|
+
self
|
247
|
+
when dtype == MDArray
|
248
|
+
raise NotImplementedError
|
249
|
+
else
|
250
|
+
raise ArgumentError, "Cant coerce to dtype #{dtype}"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def resize size = @size*2
|
255
|
+
raise "Size must be greater than current size" if size < @size
|
6
256
|
|
257
|
+
@vector = NMatrix.new [size], @vector.to_a
|
258
|
+
end
|
7
259
|
end
|
8
260
|
end
|
9
261
|
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,40 +1,94 @@
|
|
1
1
|
require_relative 'accessors/dataframe_by_row.rb'
|
2
2
|
require_relative 'accessors/dataframe_by_vector.rb'
|
3
|
-
require_relative '
|
4
|
-
require_relative '
|
3
|
+
require_relative 'maths/arithmetic/dataframe.rb'
|
4
|
+
require_relative 'maths/statistics/dataframe.rb'
|
5
|
+
require_relative 'plotting/dataframe.rb'
|
5
6
|
require_relative 'io/io.rb'
|
6
7
|
|
7
8
|
module Daru
|
8
9
|
class DataFrame
|
9
10
|
|
10
|
-
include Daru::
|
11
|
-
include Daru::
|
11
|
+
include Daru::Maths::Arithmetic::DataFrame
|
12
|
+
include Daru::Maths::Statistics::DataFrame
|
13
|
+
include Daru::Plotting::DataFrame
|
12
14
|
|
13
15
|
class << self
|
16
|
+
# Load data from a CSV file.
|
17
|
+
# Arguments - path, options, block(optional)
|
18
|
+
#
|
19
|
+
# Accepts a block for pre-conditioning of CSV data if any.
|
14
20
|
def from_csv path, opts={}, &block
|
15
21
|
Daru::IO.from_csv path, opts, &block
|
16
22
|
end
|
23
|
+
|
24
|
+
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
25
|
+
# Daru::Vector objects.
|
26
|
+
def rows source, opts={}
|
27
|
+
if source.all? { |v| v.size == source[0].size }
|
28
|
+
first = source[0]
|
29
|
+
index = []
|
30
|
+
order =
|
31
|
+
unless opts[:order]
|
32
|
+
if first.is_a?(Daru::Vector) # assume that all are Vectors only
|
33
|
+
source.each { |vec| index << vec.name }
|
34
|
+
first.index.to_a
|
35
|
+
elsif first.is_a?(Array)
|
36
|
+
Array.new(first.size) { |i| i.to_s }
|
37
|
+
end
|
38
|
+
else
|
39
|
+
opts[:order]
|
40
|
+
end
|
41
|
+
|
42
|
+
opts[:order] = order
|
43
|
+
df = Daru::DataFrame.new({}, opts)
|
44
|
+
source.each_with_index do |row,idx|
|
45
|
+
df[(index[idx] || idx), :row] = row
|
46
|
+
end
|
47
|
+
else
|
48
|
+
raise SizeError, "All vectors must have same length"
|
49
|
+
end
|
50
|
+
|
51
|
+
df
|
52
|
+
end
|
17
53
|
end
|
18
54
|
|
55
|
+
# The vectors (columns) index of the DataFrame
|
19
56
|
attr_reader :vectors
|
57
|
+
|
58
|
+
# The index of the rows of the DataFrame
|
20
59
|
attr_reader :index
|
60
|
+
|
61
|
+
# The name of the DataFrame
|
21
62
|
attr_reader :name
|
63
|
+
|
64
|
+
# The number of rows present in the DataFrame
|
22
65
|
attr_reader :size
|
23
66
|
|
24
67
|
# DataFrame basically consists of an Array of Vector objects.
|
25
68
|
# These objects are indexed by row and column by vectors and index Index objects.
|
26
|
-
# Arguments - source, vectors, index, name
|
69
|
+
# Arguments - source, vectors, index, name.
|
70
|
+
#
|
71
|
+
# == Usage
|
72
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
73
|
+
# index: [:a, :b, :c, :d], name: :spider_man)
|
74
|
+
#
|
75
|
+
# # =>
|
76
|
+
# # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
|
77
|
+
# # b a
|
78
|
+
# # a 6 1
|
79
|
+
# # b 7 2
|
80
|
+
# # c 8 3
|
81
|
+
# # d 9 4
|
27
82
|
def initialize source, opts={}
|
28
|
-
vectors = opts[:
|
83
|
+
vectors = opts[:order]
|
29
84
|
index = opts[:index]
|
85
|
+
@dtype = opts[:dtype] || Array
|
30
86
|
@name = (opts[:name] || SecureRandom.uuid).to_sym
|
31
|
-
|
32
|
-
@data = []
|
87
|
+
@data = []
|
33
88
|
|
34
89
|
if source.empty?
|
35
90
|
@vectors = Daru::Index.new vectors
|
36
91
|
@index = Daru::Index.new index
|
37
|
-
|
38
92
|
create_empty_vectors
|
39
93
|
else
|
40
94
|
case source
|
@@ -53,25 +107,21 @@ module Daru
|
|
53
107
|
|
54
108
|
@vectors.each do |name|
|
55
109
|
v = []
|
56
|
-
|
57
110
|
source.each do |hsh|
|
58
111
|
v << (hsh[name] || hsh[name.to_s])
|
59
112
|
end
|
60
113
|
|
61
|
-
@data << v.dv(name, @index)
|
114
|
+
@data << v.dv(name, @index, @dtype)
|
62
115
|
end
|
63
116
|
when Hash
|
64
117
|
create_vectors_index_with vectors, source
|
65
|
-
|
66
118
|
if all_daru_vectors_in_source? source
|
67
|
-
|
68
119
|
if !index.nil?
|
69
120
|
@index = index.to_index
|
70
121
|
elsif all_vectors_have_equal_indexes? source
|
71
122
|
@index = source.values[0].index.dup
|
72
123
|
else
|
73
124
|
all_indexes = []
|
74
|
-
|
75
125
|
source.each_value do |vector|
|
76
126
|
all_indexes << vector.index.to_a
|
77
127
|
end
|
@@ -80,9 +130,8 @@ module Daru
|
|
80
130
|
|
81
131
|
@index = Daru::Index.new all_indexes
|
82
132
|
end
|
83
|
-
|
84
133
|
@vectors.each do |vector|
|
85
|
-
@data << Daru::Vector.new([], name: vector, index: @index)
|
134
|
+
@data << Daru::Vector.new([], name: vector, index: @index, dtype: @dtype)
|
86
135
|
|
87
136
|
@index.each do |idx|
|
88
137
|
begin
|
@@ -97,7 +146,6 @@ module Daru
|
|
97
146
|
end
|
98
147
|
else
|
99
148
|
index = source.values[0].size if index.nil?
|
100
|
-
|
101
149
|
if index.is_a?(Daru::Index)
|
102
150
|
@index = index.to_index
|
103
151
|
else
|
@@ -105,10 +153,9 @@ module Daru
|
|
105
153
|
end
|
106
154
|
|
107
155
|
@vectors.each do |name|
|
108
|
-
@data << source[name].dup.dv(name, @index)
|
156
|
+
@data << source[name].dup.dv(name, @index, @dtype)
|
109
157
|
end
|
110
158
|
end
|
111
|
-
|
112
159
|
end
|
113
160
|
end
|
114
161
|
|
@@ -116,6 +163,10 @@ module Daru
|
|
116
163
|
validate
|
117
164
|
end
|
118
165
|
|
166
|
+
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
167
|
+
# Use of this method is not recommended for accessing rows or vectors.
|
168
|
+
# Use df.row[:a] for accessing row with index ':a' or df.vector[:vec] for
|
169
|
+
# accessing vector with index ':vec'
|
119
170
|
def [](*names, axis)
|
120
171
|
if axis == :vector
|
121
172
|
access_vector *names
|
@@ -126,6 +177,13 @@ module Daru
|
|
126
177
|
end
|
127
178
|
end
|
128
179
|
|
180
|
+
# Insert a new row/vector of the specified name or modify a previous row.
|
181
|
+
# Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
|
182
|
+
# a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
|
183
|
+
#
|
184
|
+
# In case a Daru::Vector is specified after the equality the sign, the indexes
|
185
|
+
# of the vector will be matched against the row/vector indexes of the DataFrame
|
186
|
+
# before an insertion is performed. Unmatched indexes will be set to nil.
|
129
187
|
def []=(name, axis ,vector)
|
130
188
|
if axis == :vector
|
131
189
|
insert_or_modify_vector name, vector
|
@@ -136,29 +194,42 @@ module Daru
|
|
136
194
|
end
|
137
195
|
end
|
138
196
|
|
197
|
+
# Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
|
198
|
+
#
|
199
|
+
# == Usage
|
200
|
+
# df.vector[:a] # access vector named ':a'
|
201
|
+
# df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
|
139
202
|
def vector
|
140
203
|
Daru::Accessors::DataFrameByVector.new(self)
|
141
204
|
end
|
142
205
|
|
206
|
+
# Access a row or set/create a row. Refer #[] and #[]= docs for details.
|
207
|
+
#
|
208
|
+
# == Usage
|
209
|
+
# df.row[:a] # access row named ':a'
|
210
|
+
# df.row[:b] = [1,2,3] # set row ':b' to [1,2,3]
|
143
211
|
def row
|
144
212
|
Daru::Accessors::DataFrameByRow.new(self)
|
145
213
|
end
|
146
214
|
|
215
|
+
# Duplicate the DataFrame entirely.
|
147
216
|
def dup
|
148
217
|
src = {}
|
149
218
|
@vectors.each do |vector|
|
150
|
-
src[vector] = @data[@vectors[vector]]
|
219
|
+
src[vector] = @data[@vectors[vector]].dup
|
151
220
|
end
|
152
221
|
|
153
|
-
Daru::DataFrame.new src,
|
222
|
+
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name, dtype: @dtype
|
154
223
|
end
|
155
224
|
|
225
|
+
# Iterate over each vector
|
156
226
|
def each_vector(&block)
|
157
227
|
@data.each(&block)
|
158
228
|
|
159
229
|
self
|
160
230
|
end
|
161
231
|
|
232
|
+
# Iterate over each vector alongwith the name of the vector
|
162
233
|
def each_vector_with_index(&block)
|
163
234
|
@vectors.each do |vector|
|
164
235
|
yield @data[@vectors[vector]], vector
|
@@ -167,6 +238,7 @@ module Daru
|
|
167
238
|
self
|
168
239
|
end
|
169
240
|
|
241
|
+
# Iterate over each row
|
170
242
|
def each_row(&block)
|
171
243
|
@index.each do |index|
|
172
244
|
yield access_row(index)
|
@@ -183,9 +255,10 @@ module Daru
|
|
183
255
|
self
|
184
256
|
end
|
185
257
|
|
258
|
+
# Map each vector. Returns a DataFrame whose vectors are modified according
|
259
|
+
# to the value returned by the block.
|
186
260
|
def map_vectors(&block)
|
187
261
|
df = self.dup
|
188
|
-
|
189
262
|
df.each_vector_with_index do |vector, name|
|
190
263
|
df[name, :vector] = yield(vector)
|
191
264
|
end
|
@@ -195,7 +268,6 @@ module Daru
|
|
195
268
|
|
196
269
|
def map_vectors_with_index(&block)
|
197
270
|
df = self.dup
|
198
|
-
|
199
271
|
df.each_vector_with_index do |vector, name|
|
200
272
|
df[name, :vector] = yield(vector, name)
|
201
273
|
end
|
@@ -203,9 +275,9 @@ module Daru
|
|
203
275
|
df
|
204
276
|
end
|
205
277
|
|
278
|
+
# Map each row
|
206
279
|
def map_rows(&block)
|
207
280
|
df = self.dup
|
208
|
-
|
209
281
|
df.each_row_with_index do |row, index|
|
210
282
|
df[index, :row] = yield(row)
|
211
283
|
end
|
@@ -215,7 +287,6 @@ module Daru
|
|
215
287
|
|
216
288
|
def map_rows_with_index(&block)
|
217
289
|
df = self.dup
|
218
|
-
|
219
290
|
df.each_row_with_index do |row, index|
|
220
291
|
df[index, :row] = yield(row, index)
|
221
292
|
end
|
@@ -223,6 +294,7 @@ module Daru
|
|
223
294
|
df
|
224
295
|
end
|
225
296
|
|
297
|
+
# Delete a vector
|
226
298
|
def delete_vector vector
|
227
299
|
if @vectors.include? vector
|
228
300
|
@data.delete_at @vectors[vector]
|
@@ -237,7 +309,6 @@ module Daru
|
|
237
309
|
|
238
310
|
if @index.include? idx
|
239
311
|
@index = (@index.to_a - [idx]).to_index
|
240
|
-
|
241
312
|
self.each_vector do |vector|
|
242
313
|
vector.delete_at idx
|
243
314
|
end
|
@@ -256,7 +327,6 @@ module Daru
|
|
256
327
|
|
257
328
|
deletion << index unless keep_row
|
258
329
|
end
|
259
|
-
|
260
330
|
deletion.each { |idx|
|
261
331
|
delete_row idx
|
262
332
|
}
|
@@ -270,13 +340,14 @@ module Daru
|
|
270
340
|
end
|
271
341
|
end
|
272
342
|
|
343
|
+
# Iterates over each row and retains it in a new DataFrame if the block returns
|
344
|
+
# true for that row.
|
273
345
|
def filter_rows &block
|
274
|
-
df = Daru::DataFrame.new({},
|
346
|
+
df = Daru::DataFrame.new({}, order: @vectors.to_a)
|
275
347
|
marked = []
|
276
348
|
|
277
349
|
@index.each do |index|
|
278
350
|
keep_row = yield access_row(index)
|
279
|
-
|
280
351
|
marked << index if keep_row
|
281
352
|
end
|
282
353
|
|
@@ -287,18 +358,36 @@ module Daru
|
|
287
358
|
df
|
288
359
|
end
|
289
360
|
|
361
|
+
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
362
|
+
# true for that vector.
|
290
363
|
def filter_vectors &block
|
291
364
|
df = self.dup
|
292
|
-
|
293
365
|
df.keep_vector_if &block
|
294
366
|
|
295
367
|
df
|
296
368
|
end
|
297
369
|
|
370
|
+
# Check if a vector is present
|
298
371
|
def has_vector? name
|
299
372
|
!!@vectors[name]
|
300
373
|
end
|
301
374
|
|
375
|
+
def head quantity=10
|
376
|
+
self[0..quantity, :row]
|
377
|
+
end
|
378
|
+
|
379
|
+
def tail quantity=10
|
380
|
+
self[(@size - quantity)..@size, :row]
|
381
|
+
end
|
382
|
+
|
383
|
+
# def sort_by_row name
|
384
|
+
|
385
|
+
# end
|
386
|
+
|
387
|
+
# def sort_by_vector name
|
388
|
+
|
389
|
+
# end
|
390
|
+
|
302
391
|
# Converts the DataFrame into an array of hashes where key is vector name
|
303
392
|
# and value is the corresponding element.
|
304
393
|
# The 0th index of the array contains the array of hashes while the 1th
|
@@ -307,11 +396,9 @@ module Daru
|
|
307
396
|
# the same index.
|
308
397
|
def to_a
|
309
398
|
arry = [[],[]]
|
310
|
-
|
311
399
|
self.each_row do |row|
|
312
400
|
arry[0] << row.to_hash
|
313
401
|
end
|
314
|
-
|
315
402
|
arry[1] = @index.to_a
|
316
403
|
|
317
404
|
arry
|
@@ -325,11 +412,10 @@ module Daru
|
|
325
412
|
end
|
326
413
|
end
|
327
414
|
|
328
|
-
|
329
|
-
|
330
|
-
|
415
|
+
# Convert to html for IRuby.
|
416
|
+
def to_html threshold=30
|
417
|
+
html = '<table><tr><th></th>'
|
331
418
|
@vectors.each { |vector| html += '<th>' + vector.to_s + '</th>' }
|
332
|
-
|
333
419
|
html += '</tr>'
|
334
420
|
|
335
421
|
@index.each_with_index do |index, num|
|
@@ -339,8 +425,8 @@ module Daru
|
|
339
425
|
self.row[index].each do |element|
|
340
426
|
html += '<td>' + element.to_s + '</td>'
|
341
427
|
end
|
342
|
-
html += '</tr>'
|
343
428
|
|
429
|
+
html += '</tr>'
|
344
430
|
if num > threshold
|
345
431
|
html += '<tr>'
|
346
432
|
(@vectors + 1).size.times { html += '<td>...</td>' }
|
@@ -348,7 +434,6 @@ module Daru
|
|
348
434
|
break
|
349
435
|
end
|
350
436
|
end
|
351
|
-
|
352
437
|
html += '</table>'
|
353
438
|
|
354
439
|
html
|
@@ -358,6 +443,7 @@ module Daru
|
|
358
443
|
to_html
|
359
444
|
end
|
360
445
|
|
446
|
+
# Pretty print in a nice table format for the command line (irb)
|
361
447
|
def inspect spacing=10, threshold=15
|
362
448
|
longest = [@name.to_s.size,
|
363
449
|
@vectors.map(&:to_s).map(&:size).max,
|
@@ -370,32 +456,36 @@ module Daru
|
|
370
456
|
formatter = "\n"
|
371
457
|
|
372
458
|
(@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
|
373
|
-
|
374
459
|
content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
|
375
460
|
name.to_s + " @size = " + @size.to_s + ">"
|
376
|
-
|
377
461
|
content += sprintf formatter, "" , *@vectors.map(&:to_s)
|
378
|
-
|
379
|
-
row_num = 1
|
462
|
+
row_num = 1
|
380
463
|
|
381
464
|
self.each_row_with_index do |row, index|
|
382
465
|
content += sprintf formatter, index.to_s, *row.to_hash.values.map { |e| (e || 'nil').to_s }
|
383
|
-
|
384
466
|
row_num += 1
|
385
467
|
if row_num > threshold
|
386
468
|
dots = []
|
387
469
|
|
388
470
|
(@vectors.size + 1).times { dots << "..." }
|
389
|
-
content +=
|
471
|
+
content += sprintf formatter, *dots
|
390
472
|
break
|
391
473
|
end
|
392
474
|
end
|
393
|
-
|
394
475
|
content += "\n"
|
395
476
|
|
396
477
|
content
|
397
478
|
end
|
398
479
|
|
480
|
+
def dtype= dtype
|
481
|
+
@dtype = dtype
|
482
|
+
|
483
|
+
@vectors.each do |vec|
|
484
|
+
pos = @vectors[vec]
|
485
|
+
@data[pos] = @data[pos].coerce(@dtype)
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
399
489
|
def == other
|
400
490
|
@index == other.index and @size == other.size and @vectors.all? { |vector|
|
401
491
|
self[vector, :vector] == other[vector, :vector] }
|
@@ -407,7 +497,7 @@ module Daru
|
|
407
497
|
elsif self.has_vector? name
|
408
498
|
self[name, :vector]
|
409
499
|
else
|
410
|
-
super(name, *args)
|
500
|
+
super(name, *args, &block)
|
411
501
|
end
|
412
502
|
end
|
413
503
|
|
@@ -423,7 +513,6 @@ module Daru
|
|
423
513
|
raise IndexError, "Specified index #{names[0]} does not exist."
|
424
514
|
end
|
425
515
|
end
|
426
|
-
|
427
516
|
new_vcs = {}
|
428
517
|
|
429
518
|
names.each do |name|
|
@@ -431,42 +520,52 @@ module Daru
|
|
431
520
|
|
432
521
|
new_vcs[name] = @data[@vectors[name]]
|
433
522
|
end
|
434
|
-
|
435
|
-
Daru::DataFrame.new new_vcs, vectors: new_vcs.keys, index: @index, name: @name
|
523
|
+
Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
|
436
524
|
end
|
437
525
|
|
438
526
|
def access_row *names
|
439
|
-
|
440
|
-
|
527
|
+
if names[1].nil?
|
528
|
+
access_token = names[0]
|
529
|
+
if access_token.is_a?(Range)
|
530
|
+
index_arry = @index.to_a
|
531
|
+
|
532
|
+
range =
|
533
|
+
if access_token.first.is_a?(Numeric)
|
534
|
+
access_token
|
535
|
+
else
|
536
|
+
first_index = index_arry.index access_token.first
|
537
|
+
last_index = index_arry.index access_token.last
|
441
538
|
|
442
|
-
|
539
|
+
first_index..last_index
|
540
|
+
end
|
443
541
|
|
444
|
-
|
445
|
-
name = names[0]
|
446
|
-
elsif @index.key names[0]
|
447
|
-
name = @index.key names[0]
|
542
|
+
names = index_arry[range]
|
448
543
|
else
|
449
|
-
|
450
|
-
|
544
|
+
row = []
|
545
|
+
name = named_index_for names[0]
|
546
|
+
@vectors.each do |vector|
|
547
|
+
row << @data[@vectors[vector]][name]
|
548
|
+
end
|
451
549
|
|
452
|
-
|
453
|
-
row << @data[@vectors[vector]][name]
|
550
|
+
return Daru::Vector.new(row, index: @vectors, name: name, dtype: @dtype)
|
454
551
|
end
|
455
|
-
|
456
|
-
Daru::Vector.new row, index: @vectors, name: name
|
457
|
-
else
|
458
|
-
# TODO: Access multiple rows
|
459
552
|
end
|
553
|
+
# Access multiple rows
|
554
|
+
rows = []
|
555
|
+
names.each do |name|
|
556
|
+
rows << self.row[name]
|
557
|
+
end
|
558
|
+
|
559
|
+
Daru::DataFrame.rows rows, name: @name, dtype: @dtype
|
460
560
|
end
|
461
561
|
|
462
562
|
def insert_or_modify_vector name, vector
|
463
563
|
@vectors = @vectors.re_index(@vectors + name)
|
464
|
-
|
465
|
-
v = nil
|
564
|
+
v = nil
|
466
565
|
|
467
566
|
if vector.is_a?(Daru::Vector)
|
468
|
-
v = Daru::Vector.new [], name: name, index: @index
|
469
|
-
|
567
|
+
v = Daru::Vector.new [], name: name, index: @index, dtype: @dtype
|
568
|
+
nil_data = false
|
470
569
|
@index.each do |idx|
|
471
570
|
begin
|
472
571
|
v[idx] = vector[idx]
|
@@ -478,7 +577,7 @@ module Daru
|
|
478
577
|
raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
479
578
|
@size != vector.size
|
480
579
|
|
481
|
-
v = vector.dv(name, @index)
|
580
|
+
v = vector.dv(name, @index, @dtype)
|
482
581
|
end
|
483
582
|
|
484
583
|
@data[@vectors[name]] = v
|
@@ -486,7 +585,7 @@ module Daru
|
|
486
585
|
|
487
586
|
def insert_or_modify_row name, vector
|
488
587
|
if @index.include? name
|
489
|
-
v = vector.dv(name, @vectors)
|
588
|
+
v = vector.dv(name, @vectors, @dtype)
|
490
589
|
|
491
590
|
@vectors.each do |vector|
|
492
591
|
begin
|
@@ -497,7 +596,7 @@ module Daru
|
|
497
596
|
end
|
498
597
|
else
|
499
598
|
@index = @index.re_index(@index + name)
|
500
|
-
v = vector.dv(name, @vectors)
|
599
|
+
v = vector.dv(name, @vectors, @dtype)
|
501
600
|
|
502
601
|
@vectors.each do |vector|
|
503
602
|
begin
|
@@ -513,7 +612,7 @@ module Daru
|
|
513
612
|
|
514
613
|
def create_empty_vectors
|
515
614
|
@vectors.each do |name|
|
516
|
-
@data << Daru::Vector.new([],name: name, index: @index)
|
615
|
+
@data << Daru::Vector.new([],name: name, index: @index, dtype: @dtype)
|
517
616
|
end
|
518
617
|
end
|
519
618
|
|