daru 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
@@ -0,0 +1,183 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
class GroupBy
|
4
|
+
|
5
|
+
attr_reader :groups
|
6
|
+
|
7
|
+
def initialize context, names
|
8
|
+
@groups = {}
|
9
|
+
@non_group_vectors = context.vectors.to_a - names
|
10
|
+
@context = context
|
11
|
+
vectors = names.map { |vec| context.vector[vec].to_a }
|
12
|
+
tuples = vectors[0].zip(*vectors[1..-1])
|
13
|
+
keys = tuples.uniq.sort
|
14
|
+
|
15
|
+
keys.each do |key|
|
16
|
+
@groups[key] = all_indices_for(tuples, key)
|
17
|
+
end
|
18
|
+
@groups.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def size
|
22
|
+
index =
|
23
|
+
if multi_indexed_grouping?
|
24
|
+
Daru::MultiIndex.new symbolize(@groups.keys)
|
25
|
+
else
|
26
|
+
Daru::Index.new symbolize(@groups.keys.flatten)
|
27
|
+
end
|
28
|
+
|
29
|
+
values = @groups.values.map { |e| e.size }
|
30
|
+
Daru::Vector.new(values, index: index, name: :size)
|
31
|
+
end
|
32
|
+
|
33
|
+
def first
|
34
|
+
head(1)
|
35
|
+
end
|
36
|
+
|
37
|
+
def last
|
38
|
+
tail(1)
|
39
|
+
end
|
40
|
+
|
41
|
+
def head quantity=5
|
42
|
+
select_groups_from :first, quantity
|
43
|
+
end
|
44
|
+
|
45
|
+
def tail quantity=5
|
46
|
+
select_groups_from :last, quantity
|
47
|
+
end
|
48
|
+
|
49
|
+
# Calculate mean of numeric groups, excluding missing values.
|
50
|
+
def mean
|
51
|
+
apply_method :numeric, :mean
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate the median of numeric groups, excluding missing values.
|
55
|
+
def median
|
56
|
+
apply_method :numeric, :median
|
57
|
+
end
|
58
|
+
|
59
|
+
# Calculate sum of numeric groups, excluding missing values.
|
60
|
+
def sum
|
61
|
+
apply_method :numeric, :sum
|
62
|
+
end
|
63
|
+
|
64
|
+
def count
|
65
|
+
width = @non_group_vectors.size
|
66
|
+
Daru::DataFrame.new([size]*width, order: @non_group_vectors)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Calculate sample standard deviation of numeric vector groups, excluding
|
70
|
+
# missing values.
|
71
|
+
def std
|
72
|
+
apply_method :numeric, :std
|
73
|
+
end
|
74
|
+
|
75
|
+
# Find the max element of each numeric vector group.
|
76
|
+
def max
|
77
|
+
apply_method :numeric, :max
|
78
|
+
end
|
79
|
+
|
80
|
+
# Find the min element of each numeric vector group.
|
81
|
+
def min
|
82
|
+
apply_method :numeric, :min
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns one of the selected groups as a DataFrame.
|
86
|
+
def get_group group
|
87
|
+
indexes = @groups[group]
|
88
|
+
elements = []
|
89
|
+
|
90
|
+
@context.each_vector do |vector|
|
91
|
+
elements << vector.to_a
|
92
|
+
end
|
93
|
+
rows = []
|
94
|
+
transpose = elements.transpose
|
95
|
+
|
96
|
+
indexes.each do |idx|
|
97
|
+
rows << transpose[idx]
|
98
|
+
end
|
99
|
+
Daru::DataFrame.rows(rows, index: @context.index[indexes], order: @context.vectors)
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def select_groups_from method, quantity
|
105
|
+
selection = @context
|
106
|
+
rows, indexes = [], []
|
107
|
+
|
108
|
+
@groups.each_value do |index|
|
109
|
+
index.send(method, quantity).each do |idx|
|
110
|
+
rows << selection.row[idx].to_a
|
111
|
+
indexes << idx
|
112
|
+
end
|
113
|
+
end
|
114
|
+
indexes.flatten!
|
115
|
+
|
116
|
+
Daru::DataFrame.rows(rows, order: @context.vectors, index: indexes)
|
117
|
+
end
|
118
|
+
|
119
|
+
def apply_method method_type, method
|
120
|
+
multi_index = multi_indexed_grouping?
|
121
|
+
rows, order = [], []
|
122
|
+
|
123
|
+
@groups.each do |group, indexes|
|
124
|
+
single_row = []
|
125
|
+
@non_group_vectors.each do |ngvector|
|
126
|
+
vector = @context.vector[ngvector]
|
127
|
+
if method_type == :numeric and vector.type == :numeric
|
128
|
+
slice = vector[*indexes]
|
129
|
+
|
130
|
+
single_row << (slice.is_a?(Numeric) ? slice : slice.send(method))
|
131
|
+
order << ngvector
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
rows << single_row
|
136
|
+
end
|
137
|
+
|
138
|
+
index = symbolize @groups.keys
|
139
|
+
index = multi_index ? Daru::MultiIndex.new(index) : Daru::Index.new(index.flatten)
|
140
|
+
order = symbolize order
|
141
|
+
order =
|
142
|
+
if order.all?{ |e| e.is_a?(Array) }
|
143
|
+
Daru::MultiIndex.new(order)
|
144
|
+
else
|
145
|
+
Daru::Index.new(order)
|
146
|
+
end
|
147
|
+
|
148
|
+
Daru::DataFrame.new(rows.transpose, index: index, order: order)
|
149
|
+
end
|
150
|
+
|
151
|
+
def all_indices_for arry, element
|
152
|
+
found, index, indexes = -1, -1, []
|
153
|
+
while found
|
154
|
+
found = arry[index+1..-1].index(element)
|
155
|
+
if found
|
156
|
+
index = index + found + 1
|
157
|
+
indexes << index
|
158
|
+
end
|
159
|
+
end
|
160
|
+
indexes
|
161
|
+
end
|
162
|
+
|
163
|
+
def symbolize arry
|
164
|
+
symbolized_arry =
|
165
|
+
if arry.all? { |e| e.is_a?(Array) }
|
166
|
+
arry.map do |sub_arry|
|
167
|
+
sub_arry.map do |e|
|
168
|
+
e.is_a?(Numeric) ? e : e.to_sym
|
169
|
+
end
|
170
|
+
end
|
171
|
+
else
|
172
|
+
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
173
|
+
end
|
174
|
+
|
175
|
+
symbolized_arry
|
176
|
+
end
|
177
|
+
|
178
|
+
def multi_indexed_grouping?
|
179
|
+
@groups.keys[0][1] ? true : false
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require 'accessors/dataframe_by_row.rb'
|
4
|
+
require 'accessors/dataframe_by_vector.rb'
|
5
|
+
require 'maths/arithmetic/dataframe.rb'
|
6
|
+
require 'maths/statistics/dataframe.rb'
|
7
|
+
require 'plotting/dataframe.rb'
|
8
|
+
require 'io/io.rb'
|
7
9
|
|
8
10
|
module Daru
|
9
11
|
class DataFrame
|
@@ -14,7 +16,7 @@ module Daru
|
|
14
16
|
|
15
17
|
class << self
|
16
18
|
# Load data from a CSV file.
|
17
|
-
#
|
19
|
+
# Arguments - path, options, block(optional)
|
18
20
|
#
|
19
21
|
# Accepts a block for pre-conditioning of CSV data if any.
|
20
22
|
def from_csv path, opts={}, &block
|
@@ -24,25 +26,25 @@ module Daru
|
|
24
26
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
25
27
|
# Daru::Vector objects.
|
26
28
|
def rows source, opts={}
|
29
|
+
df = nil
|
27
30
|
if source.all? { |v| v.size == source[0].size }
|
28
31
|
first = source[0]
|
29
32
|
index = []
|
30
|
-
order
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
Array.new(first.size) { |i| i.to_s }
|
37
|
-
end
|
38
|
-
else
|
39
|
-
opts[:order]
|
33
|
+
opts[:order] ||=
|
34
|
+
if first.is_a?(Daru::Vector) # assume that all are Vectors
|
35
|
+
source.each { |vec| index << vec.name }
|
36
|
+
first.index.to_a
|
37
|
+
elsif first.is_a?(Array)
|
38
|
+
Array.new(first.size) { |i| i.to_s }
|
40
39
|
end
|
41
40
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
df
|
41
|
+
if source.all? { |s| s.is_a?(Array) }
|
42
|
+
df = Daru::DataFrame.new(source.transpose, opts)
|
43
|
+
else # array of Daru::Vectors
|
44
|
+
df = Daru::DataFrame.new({}, opts)
|
45
|
+
source.each_with_index do |row, idx|
|
46
|
+
df[(index[idx] || idx), :row] = row
|
47
|
+
end
|
46
48
|
end
|
47
49
|
else
|
48
50
|
raise SizeError, "All vectors must have same length"
|
@@ -65,8 +67,8 @@ module Daru
|
|
65
67
|
attr_reader :size
|
66
68
|
|
67
69
|
# DataFrame basically consists of an Array of Vector objects.
|
68
|
-
#
|
69
|
-
#
|
70
|
+
# These objects are indexed by row and column by vectors and index Index objects.
|
71
|
+
# Arguments - source, vectors, index, name.
|
70
72
|
#
|
71
73
|
# == Usage
|
72
74
|
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
@@ -82,43 +84,55 @@ module Daru
|
|
82
84
|
def initialize source, opts={}
|
83
85
|
vectors = opts[:order]
|
84
86
|
index = opts[:index]
|
85
|
-
@dtype = opts[:dtype] || Array
|
86
87
|
@name = (opts[:name] || SecureRandom.uuid).to_sym
|
87
88
|
@data = []
|
88
89
|
|
89
90
|
if source.empty?
|
90
|
-
@vectors =
|
91
|
-
@index =
|
91
|
+
@vectors = create_index vectors
|
92
|
+
@index = create_index index
|
92
93
|
create_empty_vectors
|
93
94
|
else
|
94
95
|
case source
|
95
96
|
when Array
|
96
|
-
if
|
97
|
-
|
98
|
-
|
99
|
-
@vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
|
100
|
-
end
|
97
|
+
if source.all? { |s| s.is_a?(Array) }
|
98
|
+
raise ArgumentError, "Number of vectors (#{vectors.size}) should \
|
99
|
+
equal order size (#{source.size})" if source.size != vectors.size
|
101
100
|
|
102
|
-
|
103
|
-
@
|
104
|
-
else
|
105
|
-
@index = Daru::Index.new index
|
106
|
-
end
|
101
|
+
@index = create_index(index || source[0].size)
|
102
|
+
@vectors = create_index(vectors)
|
107
103
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
104
|
+
@vectors.each_with_index do |vec,idx|
|
105
|
+
@data << Daru::Vector.new(source[idx], index: @index)
|
106
|
+
end
|
107
|
+
elsif source.all? { |s| s.is_a?(Daru::Vector) }
|
108
|
+
hsh = {}
|
109
|
+
vectors.each_with_index do |name, idx|
|
110
|
+
hsh[name] = source[idx]
|
111
|
+
end
|
112
|
+
initialize(hsh, index: index, order: vectors, name: @name)
|
113
|
+
else # array of hashes
|
114
|
+
if vectors.nil?
|
115
|
+
@vectors = Daru::Index.new source[0].keys.map(&:to_sym)
|
116
|
+
else
|
117
|
+
@vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
|
112
118
|
end
|
119
|
+
@index = Daru::Index.new(index || source.size)
|
120
|
+
|
121
|
+
@vectors.each do |name|
|
122
|
+
v = []
|
123
|
+
source.each do |hsh|
|
124
|
+
v << (hsh[name] || hsh[name.to_s])
|
125
|
+
end
|
113
126
|
|
114
|
-
|
127
|
+
@data << Daru::Vector.new(v, name: set_name(name), index: @index)
|
128
|
+
end
|
115
129
|
end
|
116
130
|
when Hash
|
117
131
|
create_vectors_index_with vectors, source
|
118
132
|
if all_daru_vectors_in_source? source
|
119
133
|
if !index.nil?
|
120
|
-
@index = index
|
121
|
-
elsif all_vectors_have_equal_indexes?
|
134
|
+
@index = create_index index
|
135
|
+
elsif all_vectors_have_equal_indexes?(source)
|
122
136
|
@index = source.values[0].index.dup
|
123
137
|
else
|
124
138
|
all_indexes = []
|
@@ -131,29 +145,17 @@ module Daru
|
|
131
145
|
@index = Daru::Index.new all_indexes
|
132
146
|
end
|
133
147
|
@vectors.each do |vector|
|
134
|
-
@data << Daru::Vector.new([], name: vector, index: @index
|
148
|
+
@data << Daru::Vector.new([], name: vector, index: @index)
|
135
149
|
|
136
150
|
@index.each do |idx|
|
137
|
-
|
138
|
-
@data[@vectors[vector]][idx] = source[vector][idx]
|
139
|
-
rescue IndexError
|
140
|
-
# If the index is not present in the vector under consideration
|
141
|
-
# (in source) then an error is raised. Put a nil in that place if
|
142
|
-
# that is the case.
|
143
|
-
@data[@vectors[vector]][idx] = nil
|
144
|
-
end
|
151
|
+
@data[@vectors[vector]][idx] = source[vector][idx]
|
145
152
|
end
|
146
153
|
end
|
147
|
-
else
|
148
|
-
index = source.values[0].size
|
149
|
-
if index.is_a?(Daru::Index)
|
150
|
-
@index = index.to_index
|
151
|
-
else
|
152
|
-
@index = Daru::Index.new index
|
153
|
-
end
|
154
|
+
else
|
155
|
+
@index = create_index(index || source.values[0].size)
|
154
156
|
|
155
157
|
@vectors.each do |name|
|
156
|
-
@data << source[name].dup
|
158
|
+
@data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
|
157
159
|
end
|
158
160
|
end
|
159
161
|
end
|
@@ -164,10 +166,17 @@ module Daru
|
|
164
166
|
end
|
165
167
|
|
166
168
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
167
|
-
# Use of this method is not recommended for accessing
|
168
|
-
# Use df.row[:a] for accessing row with index ':a' or
|
169
|
-
# accessing vector with index
|
170
|
-
def [](*names
|
169
|
+
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
170
|
+
# rows or vectors. Use df.row[:a] for accessing row with index ':a' or
|
171
|
+
# df.vector[:vec] for accessing vector with index *:vec*.
|
172
|
+
def [](*names)
|
173
|
+
if names[-1] == :vector or names[-1] == :row
|
174
|
+
axis = names[-1]
|
175
|
+
names = names[0..-2]
|
176
|
+
else
|
177
|
+
axis = :vector
|
178
|
+
end
|
179
|
+
|
171
180
|
if axis == :vector
|
172
181
|
access_vector *names
|
173
182
|
elsif axis == :row
|
@@ -184,7 +193,12 @@ module Daru
|
|
184
193
|
# In case a Daru::Vector is specified after the equality the sign, the indexes
|
185
194
|
# of the vector will be matched against the row/vector indexes of the DataFrame
|
186
195
|
# before an insertion is performed. Unmatched indexes will be set to nil.
|
187
|
-
def []=(
|
196
|
+
def []=(*args)
|
197
|
+
name = args[0]
|
198
|
+
axis = args[1]
|
199
|
+
vector = args[-1]
|
200
|
+
|
201
|
+
axis = (!axis.is_a?(Symbol) and (axis != :vector or axis != :row)) ? :vector : axis
|
188
202
|
if axis == :vector
|
189
203
|
insert_or_modify_vector name, vector
|
190
204
|
elsif axis == :row
|
@@ -203,6 +217,11 @@ module Daru
|
|
203
217
|
Daru::Accessors::DataFrameByVector.new(self)
|
204
218
|
end
|
205
219
|
|
220
|
+
# Access a vector by name.
|
221
|
+
def column name
|
222
|
+
vector[name]
|
223
|
+
end
|
224
|
+
|
206
225
|
# Access a row or set/create a row. Refer #[] and #[]= docs for details.
|
207
226
|
#
|
208
227
|
# == Usage
|
@@ -219,18 +238,24 @@ module Daru
|
|
219
238
|
src[vector] = @data[@vectors[vector]].dup
|
220
239
|
end
|
221
240
|
|
222
|
-
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name
|
241
|
+
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name
|
223
242
|
end
|
224
243
|
|
225
244
|
# Iterate over each vector
|
226
245
|
def each_vector(&block)
|
246
|
+
return to_enum(:each_vector) unless block_given?
|
247
|
+
|
227
248
|
@data.each(&block)
|
228
249
|
|
229
250
|
self
|
230
251
|
end
|
231
252
|
|
253
|
+
alias_method :each_column, :each_vector
|
254
|
+
|
232
255
|
# Iterate over each vector alongwith the name of the vector
|
233
256
|
def each_vector_with_index(&block)
|
257
|
+
return to_enum(:each_vector_with_index) unless block_given?
|
258
|
+
|
234
259
|
@vectors.each do |vector|
|
235
260
|
yield @data[@vectors[vector]], vector
|
236
261
|
end
|
@@ -238,8 +263,12 @@ module Daru
|
|
238
263
|
self
|
239
264
|
end
|
240
265
|
|
266
|
+
alias_method :each_column_with_index, :each_vector_with_index
|
267
|
+
|
241
268
|
# Iterate over each row
|
242
269
|
def each_row(&block)
|
270
|
+
return to_enum(:each_row) unless block_given?
|
271
|
+
|
243
272
|
@index.each do |index|
|
244
273
|
yield access_row(index)
|
245
274
|
end
|
@@ -248,6 +277,8 @@ module Daru
|
|
248
277
|
end
|
249
278
|
|
250
279
|
def each_row_with_index(&block)
|
280
|
+
return to_enum(:each_row_with_index) unless block_given?
|
281
|
+
|
251
282
|
@index.each do |index|
|
252
283
|
yield access_row(index), index
|
253
284
|
end
|
@@ -256,17 +287,27 @@ module Daru
|
|
256
287
|
end
|
257
288
|
|
258
289
|
# Map each vector. Returns a DataFrame whose vectors are modified according
|
259
|
-
# to the value returned by the block.
|
290
|
+
# to the value returned by the block. As is the case with Enumerable#map,
|
291
|
+
# the object returned by each block must be a Daru::Vector for the dataframe
|
292
|
+
# to remain relevant.
|
260
293
|
def map_vectors(&block)
|
261
|
-
|
262
|
-
df.each_vector_with_index do |vector, name|
|
263
|
-
df[name, :vector] = yield(vector)
|
264
|
-
end
|
294
|
+
return to_enum(:map_vectors) unless block_given?
|
265
295
|
|
266
|
-
|
296
|
+
self.dup.map_vectors!(&block)
|
267
297
|
end
|
268
298
|
|
299
|
+
# Destructive form of #map_vectors
|
300
|
+
def map_vectors!(&block)
|
301
|
+
return to_enum(:map_vectors!) unless block_given?
|
302
|
+
|
303
|
+
@data.map!(&block)
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
# Map vectors alongwith the index.
|
269
308
|
def map_vectors_with_index(&block)
|
309
|
+
return to_enum(:map_vectors_with_index) unless block_given?
|
310
|
+
|
270
311
|
df = self.dup
|
271
312
|
df.each_vector_with_index do |vector, name|
|
272
313
|
df[name, :vector] = yield(vector, name)
|
@@ -277,6 +318,8 @@ module Daru
|
|
277
318
|
|
278
319
|
# Map each row
|
279
320
|
def map_rows(&block)
|
321
|
+
return to_enum(:map_rows) unless block_given?
|
322
|
+
|
280
323
|
df = self.dup
|
281
324
|
df.each_row_with_index do |row, index|
|
282
325
|
df[index, :row] = yield(row)
|
@@ -286,6 +329,8 @@ module Daru
|
|
286
329
|
end
|
287
330
|
|
288
331
|
def map_rows_with_index(&block)
|
332
|
+
return to_enum(:map_rows_with_index) unless block_given?
|
333
|
+
|
289
334
|
df = self.dup
|
290
335
|
df.each_row_with_index do |row, index|
|
291
336
|
df[index, :row] = yield(row, index)
|
@@ -302,13 +347,16 @@ module Daru
|
|
302
347
|
else
|
303
348
|
raise IndexError, "Vector #{vector} does not exist."
|
304
349
|
end
|
350
|
+
|
351
|
+
self
|
305
352
|
end
|
306
353
|
|
354
|
+
# Delete a row
|
307
355
|
def delete_row index
|
308
356
|
idx = named_index_for index
|
309
357
|
|
310
358
|
if @index.include? idx
|
311
|
-
@index = (@index.to_a - [idx])
|
359
|
+
@index = reassign_index_as(@index.to_a - [idx])
|
312
360
|
self.each_vector do |vector|
|
313
361
|
vector.delete_at idx
|
314
362
|
end
|
@@ -343,6 +391,8 @@ module Daru
|
|
343
391
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
344
392
|
# true for that row.
|
345
393
|
def filter_rows &block
|
394
|
+
return to_enum(:filter_rows) unless block_given?
|
395
|
+
|
346
396
|
df = Daru::DataFrame.new({}, order: @vectors.to_a)
|
347
397
|
marked = []
|
348
398
|
|
@@ -361,39 +411,255 @@ module Daru
|
|
361
411
|
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
362
412
|
# true for that vector.
|
363
413
|
def filter_vectors &block
|
414
|
+
return to_enum(:filter_vectors) unless block_given?
|
415
|
+
|
364
416
|
df = self.dup
|
365
417
|
df.keep_vector_if &block
|
366
418
|
|
367
419
|
df
|
368
420
|
end
|
369
421
|
|
422
|
+
# Return the number of rows and columns of the DataFrame in an Array.
|
423
|
+
def shape
|
424
|
+
[@index.size, @vectors.size]
|
425
|
+
end
|
426
|
+
|
427
|
+
# The number of rows
|
428
|
+
def rows
|
429
|
+
shape[0]
|
430
|
+
end
|
431
|
+
|
432
|
+
# The number of vectors
|
433
|
+
def cols
|
434
|
+
shape[1]
|
435
|
+
end
|
436
|
+
|
370
437
|
# Check if a vector is present
|
371
|
-
def has_vector?
|
372
|
-
!!@vectors[
|
438
|
+
def has_vector? vector
|
439
|
+
!!@vectors[*vector]
|
373
440
|
end
|
374
441
|
|
442
|
+
# The first ten elements of the DataFrame
|
443
|
+
#
|
444
|
+
# @param [Fixnum] quantity (10) The number of elements to display from the top.
|
375
445
|
def head quantity=10
|
376
446
|
self[0..quantity, :row]
|
377
447
|
end
|
378
448
|
|
449
|
+
# The last ten elements of the DataFrame
|
450
|
+
#
|
451
|
+
# @param [Fixnum] quantity (10) The number of elements to display from the bottom.
|
379
452
|
def tail quantity=10
|
380
|
-
self[(@size - quantity)
|
453
|
+
self[(@size - quantity)..(@size-1), :row]
|
381
454
|
end
|
382
455
|
|
383
|
-
#
|
384
|
-
|
385
|
-
|
456
|
+
# Group elements by vector to perform operations on them.
|
457
|
+
def group_by vectors
|
458
|
+
vectors = [vectors] if vectors.is_a?(Symbol)
|
459
|
+
vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
|
460
|
+
has_vector?(v) }
|
461
|
+
|
462
|
+
Daru::Core::GroupBy.new(self, vectors)
|
463
|
+
end
|
464
|
+
|
465
|
+
# Change the index of the DataFrame and its underlying vectors. Destructive.
|
466
|
+
#
|
467
|
+
# @param [Symbol, Array] new_index Specify an Array if
|
468
|
+
def reindex! new_index
|
469
|
+
raise ArgumentError, "Index size must equal dataframe size" if new_index.is_a?(Array) and new_index.size != @size
|
470
|
+
|
471
|
+
@index = possibly_multi_index?(new_index == :seq ? @size : new_index)
|
472
|
+
@data.map! do |vector|
|
473
|
+
vector.reindex possibly_multi_index?(@index.to_a)
|
474
|
+
end
|
475
|
+
|
476
|
+
self
|
477
|
+
end
|
478
|
+
|
479
|
+
# Non-destructive version of #reindex!
|
480
|
+
def reindex new_index
|
481
|
+
self.dup.reindex! new_index
|
482
|
+
end
|
483
|
+
|
484
|
+
# Return the names of all the numeric vectors. Will include vectors with nils
|
485
|
+
# alongwith numbers.
|
486
|
+
def numeric_vectors
|
487
|
+
numerics = []
|
488
|
+
|
489
|
+
each_vector do |vec|
|
490
|
+
numerics << vec.name if(vec.type == :numeric)
|
491
|
+
end
|
492
|
+
numerics
|
493
|
+
end
|
494
|
+
|
495
|
+
# Sorts a dataframe (ascending/descending)according to the given sequence of
|
496
|
+
# vectors, using the attributes provided in the blocks. Works for 2 LEVELS ONLY.
|
497
|
+
#
|
498
|
+
# @param order [Array] The order of vector names in which the DataFrame
|
499
|
+
# should be sorted.
|
500
|
+
# @param [Hash] opts The options to sort with.
|
501
|
+
# @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
|
502
|
+
# or descending order. Specify Array corresponding to *order* for multiple
|
503
|
+
# sort orders.
|
504
|
+
# @option opts [Hash] :by ({|a,b| a <=> b}) Specify attributes of objects to
|
505
|
+
# to be used for sorting, for each vector name in *order* as a hash of
|
506
|
+
# vector name and lambda pairs. In case a lambda for a vector is not
|
507
|
+
# specified, the default will be used.
|
508
|
+
#
|
509
|
+
# == Usage
|
510
|
+
#
|
511
|
+
# df = Daru::DataFrame.new({a: [-3,2,-1,4], b: [4,3,2,1]})
|
512
|
+
#
|
513
|
+
# #<Daru::DataFrame:140630680 @name = 04e00197-f8d5-4161-bca2-93266bfabc6f @size = 4>
|
514
|
+
# # a b
|
515
|
+
# # 0 -3 4
|
516
|
+
# # 1 2 3
|
517
|
+
# # 2 -1 2
|
518
|
+
# # 3 4 1
|
519
|
+
# df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
|
520
|
+
def sort! vector_order, opts={}
|
521
|
+
raise ArgumentError, "Required atleast one vector name" if vector_order.size < 1
|
522
|
+
opts = {
|
523
|
+
ascending: true,
|
524
|
+
type: :quick_sort,
|
525
|
+
by: {}
|
526
|
+
}.merge(opts)
|
527
|
+
|
528
|
+
opts[:by] = create_logic_blocks vector_order, opts[:by]
|
529
|
+
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
|
530
|
+
index = @index.to_a
|
531
|
+
send(opts[:type], vector_order, index, opts[:by], opts[:ascending])
|
532
|
+
reindex! index
|
533
|
+
end
|
534
|
+
|
535
|
+
# Non-destructive version of #sort!
|
536
|
+
def sort vector_order, opts={}
|
537
|
+
self.dup.sort! vector_order, opts
|
538
|
+
end
|
539
|
+
|
540
|
+
# Pivots a data frame on specified vectors and applies an aggregate function
|
541
|
+
# to quickly generate a summary.
|
542
|
+
#
|
543
|
+
# == Options
|
544
|
+
#
|
545
|
+
# +:index+ - Keys to group by on the pivot table row index. Pass vector names
|
546
|
+
# contained in an Array.
|
547
|
+
#
|
548
|
+
# +:vectors+ - Keys to group by on the pivot table column index. Pass vector
|
549
|
+
# names contained in an Array.
|
550
|
+
#
|
551
|
+
# +:agg+ - Function to aggregate the grouped values. Default to *:mean*. Can
|
552
|
+
# use any of the statistics functions applicable on Vectors that can be found in
|
553
|
+
# the Daru::Statistics::Vector module.
|
554
|
+
#
|
555
|
+
# +:values+ - Columns to aggregate. Will consider all numeric columns not
|
556
|
+
# specified in *:index* or *:vectors*. Optional.
|
557
|
+
#
|
558
|
+
# == Usage
|
559
|
+
#
|
560
|
+
# df = Daru::DataFrame.new({
|
561
|
+
# a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
562
|
+
# b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
563
|
+
# c: ['small','large','large','small','small','large','small','large','small'],
|
564
|
+
# d: [1,2,2,3,3,4,5,6,7],
|
565
|
+
# e: [2,4,4,6,6,8,10,12,14]
|
566
|
+
# })
|
567
|
+
# df.pivot_table(index: [:a], vectors: [:b], agg: :sum, values: :e)
|
568
|
+
#
|
569
|
+
# #=>
|
570
|
+
# # #<Daru::DataFrame:88342020 @name = 08cdaf4e-b154-4186-9084-e76dd191b2c9 @size = 2>
|
571
|
+
# # [:e, :one] [:e, :two]
|
572
|
+
# # [:bar] 18 26
|
573
|
+
# # [:foo] 10 12
|
574
|
+
def pivot_table opts={}
|
575
|
+
raise ArgumentError, "Specify grouping index" if !opts[:index] or opts[:index].empty?
|
386
576
|
|
387
|
-
|
577
|
+
index = opts[:index]
|
578
|
+
vectors = opts[:vectors] || []
|
579
|
+
aggregate_function = opts[:agg] || :mean
|
580
|
+
values =
|
581
|
+
if opts[:values].is_a?(Symbol)
|
582
|
+
[opts[:values]]
|
583
|
+
elsif opts[:values].is_a?(Array)
|
584
|
+
opts[:values]
|
585
|
+
else # nil
|
586
|
+
(@vectors.to_a - (index | vectors)) & numeric_vectors
|
587
|
+
end
|
388
588
|
|
389
|
-
|
589
|
+
raise IndexError, "No numeric vectors to aggregate" if values.empty?
|
590
|
+
|
591
|
+
grouped = group_by(index)
|
592
|
+
|
593
|
+
unless vectors.empty?
|
594
|
+
super_hash = {}
|
595
|
+
values.each do |value|
|
596
|
+
grouped.groups.each do |group_name, row_numbers|
|
597
|
+
super_hash[group_name] ||= {}
|
598
|
+
|
599
|
+
row_numbers.each do |num|
|
600
|
+
arry = []
|
601
|
+
arry << value
|
602
|
+
vectors.each { |v| arry << self[v][num] }
|
603
|
+
sub_hash = super_hash[group_name]
|
604
|
+
sub_hash[arry] ||= []
|
605
|
+
|
606
|
+
sub_hash[arry] << self[value][num]
|
607
|
+
end
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
super_hash.each_value do |sub_hash|
|
612
|
+
sub_hash.each do |group_name, aggregates|
|
613
|
+
sub_hash[group_name] = Daru::Vector.new(aggregates).send(aggregate_function)
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
df_index = Daru::MultiIndex.new(symbolize(super_hash.keys))
|
618
|
+
|
619
|
+
vector_indexes = []
|
620
|
+
super_hash.each_value do |sub_hash|
|
621
|
+
vector_indexes.concat sub_hash.keys
|
622
|
+
end
|
623
|
+
df_vectors = Daru::MultiIndex.new symbolize(vector_indexes.uniq)
|
624
|
+
pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
|
625
|
+
|
626
|
+
super_hash.each do |row_index, sub_h|
|
627
|
+
sub_h.each do |vector_index, val|
|
628
|
+
pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
629
|
+
end
|
630
|
+
end
|
631
|
+
return pivoted_dataframe
|
632
|
+
else
|
633
|
+
grouped.send(aggregate_function)
|
634
|
+
end
|
635
|
+
end
|
636
|
+
|
637
|
+
# Convert all vectors of type *:numeric* into a Matrix.
|
638
|
+
def to_matrix
|
639
|
+
numerics_as_arrays = []
|
640
|
+
each_vector do |vector|
|
641
|
+
numerics_as_arrays << vector.to_a if(vector.type == :numeric)
|
642
|
+
end
|
643
|
+
|
644
|
+
Matrix.columns numerics_as_arrays
|
645
|
+
end
|
646
|
+
|
647
|
+
# Convert all vectors of type *:numeric* and not containing nils into an NMatrix.
|
648
|
+
def to_nmatrix
|
649
|
+
numerics_as_arrays = []
|
650
|
+
each_vector do |vector|
|
651
|
+
numerics_as_arrays << vector.to_a if(vector.type == :numeric and
|
652
|
+
vector.nil_positions.size == 0)
|
653
|
+
end
|
654
|
+
|
655
|
+
numerics_as_arrays.transpose.to_nm
|
656
|
+
end
|
390
657
|
|
391
658
|
# Converts the DataFrame into an array of hashes where key is vector name
|
392
|
-
#
|
393
|
-
#
|
394
|
-
#
|
395
|
-
#
|
396
|
-
# the same index.
|
659
|
+
# and value is the corresponding element. The 0th index of the array contains
|
660
|
+
# the array of hashes while the 1th index contains the indexes of each row
|
661
|
+
# of the dataframe. Each element in the index array corresponds to its row
|
662
|
+
# in the array of hashes, which has the same index.
|
397
663
|
def to_a
|
398
664
|
arry = [[],[]]
|
399
665
|
self.each_row do |row|
|
@@ -443,7 +709,28 @@ module Daru
|
|
443
709
|
to_html
|
444
710
|
end
|
445
711
|
|
446
|
-
#
|
712
|
+
# Change dtypes of vectors by supplying a hash of :vector_name => :new_dtype
|
713
|
+
#
|
714
|
+
# == Usage
|
715
|
+
# df = Daru::DataFrame.new({a: [1,2,3], b: [1,2,3], c: [1,2,3]})
|
716
|
+
# df.recast a: :nmatrix, c: :nmatrix
|
717
|
+
def recast opts={}
|
718
|
+
opts.each do |vector_name, dtype|
|
719
|
+
vector[vector_name].cast(dtype: dtype)
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
# Transpose a DataFrame, tranposing elements and row, column indexing.
|
724
|
+
def transpose
|
725
|
+
arrys = []
|
726
|
+
each_vector do |vec|
|
727
|
+
arrys << vec.to_a
|
728
|
+
end
|
729
|
+
|
730
|
+
Daru::DataFrame.new(arrys.transpose, index: @vectors, order: @index, dtype: @dtype, name: @name)
|
731
|
+
end
|
732
|
+
|
733
|
+
# Pretty print in a nice table format for the command line (irb/pry/iruby)
|
447
734
|
def inspect spacing=10, threshold=15
|
448
735
|
longest = [@name.to_s.size,
|
449
736
|
@vectors.map(&:to_s).map(&:size).max,
|
@@ -477,23 +764,14 @@ module Daru
|
|
477
764
|
content
|
478
765
|
end
|
479
766
|
|
480
|
-
def dtype= dtype
|
481
|
-
@dtype = dtype
|
482
|
-
|
483
|
-
@vectors.each do |vec|
|
484
|
-
pos = @vectors[vec]
|
485
|
-
@data[pos] = @data[pos].coerce(@dtype)
|
486
|
-
end
|
487
|
-
end
|
488
|
-
|
489
767
|
def == other
|
490
|
-
@index == other.index and @size == other.size and @vectors.
|
491
|
-
|
768
|
+
@index == other.index and @size == other.size and @vectors == other.vectors and
|
769
|
+
@vectors.all? { |vector| self[vector, :vector] == other[vector, :vector] }
|
492
770
|
end
|
493
771
|
|
494
772
|
def method_missing(name, *args, &block)
|
495
773
|
if md = name.match(/(.+)\=/)
|
496
|
-
insert_or_modify_vector name[/(.+)\=/].delete("="), args[0]
|
774
|
+
insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
|
497
775
|
elsif self.has_vector? name
|
498
776
|
self[name, :vector]
|
499
777
|
else
|
@@ -503,81 +781,234 @@ module Daru
|
|
503
781
|
|
504
782
|
private
|
505
783
|
|
506
|
-
def
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
784
|
+
def possibly_multi_index? index
|
785
|
+
if @index.is_a?(MultiIndex)
|
786
|
+
Daru::MultiIndex.new(index)
|
787
|
+
else
|
788
|
+
Daru::Index.new(index)
|
789
|
+
end
|
790
|
+
end
|
791
|
+
|
792
|
+
def quick_sort vector_order, index, by, ascending
|
793
|
+
recursive_quick_sort vector_order, index, by, ascending, 0, @size-1
|
794
|
+
end
|
795
|
+
|
796
|
+
# == Arguments
|
797
|
+
#
|
798
|
+
# vector_order -
|
799
|
+
# index -
|
800
|
+
# by -
|
801
|
+
# ascending -
|
802
|
+
# left_lower -
|
803
|
+
# right_upper -
|
804
|
+
def recursive_quick_sort vector_order, index, by, ascending, left_lower, right_upper
|
805
|
+
if left_lower < right_upper
|
806
|
+
left_upper, right_lower = partition(vector_order, index, by, ascending, left_lower, right_upper)
|
807
|
+
if left_upper - left_lower < right_upper - right_lower
|
808
|
+
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
809
|
+
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
512
810
|
else
|
513
|
-
|
811
|
+
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
812
|
+
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
514
813
|
end
|
515
814
|
end
|
516
|
-
|
815
|
+
end
|
517
816
|
|
518
|
-
|
519
|
-
|
817
|
+
def partition vector_order, index, by, ascending, left_lower, right_upper
|
818
|
+
mindex = (left_lower + right_upper) / 2
|
819
|
+
mvalues = vector_order.inject([]) { |a, vector_name| a << vector[vector_name][mindex]; a }
|
820
|
+
i = left_lower
|
821
|
+
j = right_upper
|
822
|
+
descending = ascending.map { |a| !a }
|
823
|
+
|
824
|
+
i += 1 while(keep?(i, mvalues, vector_order, ascending , by, 0))
|
825
|
+
j -= 1 while(keep?(j, mvalues, vector_order, descending, by, 0))
|
826
|
+
|
827
|
+
while i < j - 1
|
828
|
+
@data.each do |vector|
|
829
|
+
vector[i], vector[j] = vector[j], vector[i]
|
830
|
+
end
|
831
|
+
index[i], index[j] = index[j], index[i]
|
832
|
+
i += 1
|
833
|
+
j -= 1
|
520
834
|
|
521
|
-
|
835
|
+
i += 1 while(keep?(i, mvalues, vector_order, ascending , by,0))
|
836
|
+
j -= 1 while(keep?(j, mvalues, vector_order, descending, by,0))
|
522
837
|
end
|
523
|
-
|
838
|
+
|
839
|
+
if i <= j
|
840
|
+
if i < j
|
841
|
+
@data.each do |vector|
|
842
|
+
vector[i], vector[j] = vector[j], vector[i]
|
843
|
+
end
|
844
|
+
index[i], index[j] = index[j], index[i]
|
845
|
+
end
|
846
|
+
i += 1
|
847
|
+
j -= 1
|
848
|
+
end
|
849
|
+
|
850
|
+
[j,i]
|
524
851
|
end
|
525
852
|
|
526
|
-
def
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
if
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
853
|
+
def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
|
854
|
+
vector_name = vector_order[vector_order_index]
|
855
|
+
if vector_name
|
856
|
+
vec = vector[vector_name]
|
857
|
+
eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
|
858
|
+
|
859
|
+
if sort_order[vector_order_index] # sort in ascending order
|
860
|
+
return false if eval == 1
|
861
|
+
return true if eval == -1
|
862
|
+
if eval == 0
|
863
|
+
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
864
|
+
end
|
865
|
+
else # sort in descending order
|
866
|
+
return false if eval == -1
|
867
|
+
return true if eval == 1
|
868
|
+
if eval == 0
|
869
|
+
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
870
|
+
end
|
871
|
+
end
|
872
|
+
end
|
873
|
+
end
|
538
874
|
|
539
|
-
|
875
|
+
def create_logic_blocks vector_order, by={}
|
876
|
+
universal_block = lambda { |a,b| a <=> b }
|
877
|
+
vector_order.each do |vector|
|
878
|
+
by[vector] ||= universal_block
|
879
|
+
end
|
880
|
+
|
881
|
+
by
|
882
|
+
end
|
883
|
+
|
884
|
+
def sort_order_array vector_order, ascending
|
885
|
+
if ascending.is_a?(Array)
|
886
|
+
raise ArgumentError, "Specify same number of vector names and sort orders" if
|
887
|
+
vector_order.size != ascending.size
|
888
|
+
return ascending
|
889
|
+
else
|
890
|
+
Array.new(vector_order.size, ascending)
|
891
|
+
end
|
892
|
+
end
|
893
|
+
|
894
|
+
def vectors_index_for location
|
895
|
+
if @vectors.include?(location)
|
896
|
+
@vectors[location]
|
897
|
+
elsif location[0].is_a?(Integer)
|
898
|
+
location[0]
|
899
|
+
end
|
900
|
+
end
|
901
|
+
|
902
|
+
def access_vector *names
|
903
|
+
location = names[0]
|
904
|
+
if @vectors.is_a?(MultiIndex)
|
905
|
+
pos = vectors_index_for names
|
906
|
+
|
907
|
+
if pos.is_a?(Integer)
|
908
|
+
return @data[pos]
|
909
|
+
else # MultiIndex
|
910
|
+
new_vectors = pos.map do |tuple|
|
911
|
+
@data[vectors_index_for(names + tuple)]
|
540
912
|
end
|
913
|
+
Daru::DataFrame.new(new_vectors, index: @index, order: Daru::MultiIndex.new(pos.to_a))
|
914
|
+
end
|
915
|
+
else
|
916
|
+
unless names[1]
|
917
|
+
pos = vectors_index_for location
|
918
|
+
return @data[pos]
|
919
|
+
end
|
920
|
+
|
921
|
+
new_vcs = {}
|
922
|
+
names.each do |name|
|
923
|
+
name = name.to_sym unless name.is_a?(Integer)
|
924
|
+
new_vcs[name] = @data[@vectors[name]]
|
925
|
+
end
|
926
|
+
Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
|
927
|
+
end
|
928
|
+
end
|
929
|
+
|
930
|
+
def access_row *names
|
931
|
+
location = names[0]
|
541
932
|
|
542
|
-
|
933
|
+
if @index.is_a?(MultiIndex)
|
934
|
+
pos = row_index_for names
|
935
|
+
if pos.is_a?(Integer)
|
936
|
+
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
|
543
937
|
else
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
938
|
+
new_rows =
|
939
|
+
if location.is_a?(Range)
|
940
|
+
pos.map { |tuple| populate_row_for(tuple) }
|
941
|
+
else
|
942
|
+
pos.map { |tuple| populate_row_for(names + tuple) }
|
548
943
|
end
|
944
|
+
|
945
|
+
Daru::DataFrame.rows(new_rows, order: @vectors, name: @name,
|
946
|
+
index: Daru::MultiIndex.new(pos.to_a))
|
947
|
+
end
|
948
|
+
else
|
949
|
+
if names[1].nil?
|
950
|
+
if location.is_a?(Range)
|
951
|
+
index_arry = @index.to_a
|
549
952
|
|
550
|
-
|
953
|
+
range =
|
954
|
+
if location.first.is_a?(Numeric)
|
955
|
+
location
|
956
|
+
else
|
957
|
+
first_index = index_arry.index location.first
|
958
|
+
last_index = index_arry.index location.last
|
959
|
+
|
960
|
+
first_index..last_index
|
961
|
+
end
|
962
|
+
|
963
|
+
names = index_arry[range]
|
964
|
+
else
|
965
|
+
row = []
|
966
|
+
name = named_index_for names[0]
|
967
|
+
@vectors.each do |vector|
|
968
|
+
row << @data[@vectors[vector]][name]
|
969
|
+
end
|
970
|
+
|
971
|
+
return Daru::Vector.new(row, index: @vectors, name: set_name(name))
|
972
|
+
end
|
551
973
|
end
|
974
|
+
# Access multiple rows
|
975
|
+
rows = []
|
976
|
+
names.each do |name|
|
977
|
+
rows << self.row[name]
|
978
|
+
end
|
979
|
+
|
980
|
+
Daru::DataFrame.rows rows, name: @name
|
552
981
|
end
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
982
|
+
end
|
983
|
+
|
984
|
+
def row_index_for location
|
985
|
+
if @index.include?(location) or location[0].is_a?(Range)
|
986
|
+
@index[location]
|
987
|
+
elsif location[0].is_a?(Integer)
|
988
|
+
location[0]
|
989
|
+
end
|
990
|
+
end
|
991
|
+
|
992
|
+
def populate_row_for pos
|
993
|
+
@vectors.map do |vector|
|
994
|
+
@data[@vectors[vector]][pos]
|
557
995
|
end
|
558
|
-
|
559
|
-
Daru::DataFrame.rows rows, name: @name, dtype: @dtype
|
560
996
|
end
|
561
997
|
|
562
998
|
def insert_or_modify_vector name, vector
|
563
|
-
@vectors =
|
999
|
+
@vectors = reassign_index_as(@vectors + name)
|
564
1000
|
v = nil
|
565
1001
|
|
566
1002
|
if vector.is_a?(Daru::Vector)
|
567
|
-
v = Daru::Vector.new [], name: name, index: @index
|
568
|
-
nil_data = false
|
1003
|
+
v = Daru::Vector.new [], name: set_name(name), index: @index
|
569
1004
|
@index.each do |idx|
|
570
|
-
|
571
|
-
v[idx] = vector[idx]
|
572
|
-
rescue IndexError
|
573
|
-
v[idx] = nil
|
574
|
-
end
|
1005
|
+
v[idx] = vector[idx]
|
575
1006
|
end
|
576
1007
|
else
|
577
1008
|
raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
578
1009
|
@size != vector.size
|
579
1010
|
|
580
|
-
v =
|
1011
|
+
v = Daru::Vector.new(vector, name: set_name(name), index: @index)
|
581
1012
|
end
|
582
1013
|
|
583
1014
|
@data[@vectors[name]] = v
|
@@ -585,25 +1016,17 @@ module Daru
|
|
585
1016
|
|
586
1017
|
def insert_or_modify_row name, vector
|
587
1018
|
if @index.include? name
|
588
|
-
v = vector.dv(name, @vectors, @dtype)
|
1019
|
+
v = vector.dv(name, @vectors, @dtype)
|
589
1020
|
|
590
1021
|
@vectors.each do |vector|
|
591
|
-
|
592
|
-
@data[@vectors[vector]][name] = v[vector]
|
593
|
-
rescue IndexError
|
594
|
-
@data[@vectors[vector]][name] = nil
|
595
|
-
end
|
1022
|
+
@data[@vectors[vector]][name] = v[vector]
|
596
1023
|
end
|
597
1024
|
else
|
598
|
-
@index =
|
599
|
-
v =
|
1025
|
+
@index = reassign_index_as(@index + name)
|
1026
|
+
v = Daru::Vector.new(vector, name: set_name(name), index: @vectors)
|
600
1027
|
|
601
1028
|
@vectors.each do |vector|
|
602
|
-
|
603
|
-
@data[@vectors[vector]].concat v[vector], name
|
604
|
-
rescue IndexError
|
605
|
-
@data[@vectors[vector]].concat nil, name
|
606
|
-
end
|
1029
|
+
@data[@vectors[vector]].concat v[vector], name
|
607
1030
|
end
|
608
1031
|
end
|
609
1032
|
|
@@ -612,16 +1035,16 @@ module Daru
|
|
612
1035
|
|
613
1036
|
def create_empty_vectors
|
614
1037
|
@vectors.each do |name|
|
615
|
-
@data << Daru::Vector.new([],name: name, index: @index
|
1038
|
+
@data << Daru::Vector.new([], name: set_name(name), index: @index)
|
616
1039
|
end
|
617
1040
|
end
|
618
1041
|
|
619
1042
|
def validate_labels
|
620
|
-
raise IndexError, "Expected equal number of vectors for number of
|
621
|
-
@vectors.size != @data.size
|
1043
|
+
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
|
1044
|
+
@vectors and @vectors.size != @data.size
|
622
1045
|
|
623
1046
|
raise IndexError, "Expected number of indexes same as number of rows" if
|
624
|
-
@index.size != @data[0].size
|
1047
|
+
@index and @data[0] and @index.size != @data[0].size
|
625
1048
|
end
|
626
1049
|
|
627
1050
|
def validate_vector_sizes
|
@@ -631,8 +1054,6 @@ module Daru
|
|
631
1054
|
end
|
632
1055
|
|
633
1056
|
def validate
|
634
|
-
# TODO: [IMP] when vectors of different dimensions are specified, they should
|
635
|
-
# be inserted into the dataframe by inserting nils wherever necessary.
|
636
1057
|
validate_labels
|
637
1058
|
validate_vector_sizes
|
638
1059
|
end
|
@@ -660,10 +1081,10 @@ module Daru
|
|
660
1081
|
def create_vectors_index_with vectors, source
|
661
1082
|
vectors = source.keys.sort if vectors.nil?
|
662
1083
|
|
663
|
-
|
664
|
-
@vectors = vectors.to_index
|
665
|
-
else
|
1084
|
+
unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
|
666
1085
|
@vectors = Daru::Index.new (vectors + (source.keys - vectors)).uniq.map(&:to_sym)
|
1086
|
+
else
|
1087
|
+
@vectors = vectors
|
667
1088
|
end
|
668
1089
|
end
|
669
1090
|
|
@@ -674,5 +1095,32 @@ module Daru
|
|
674
1095
|
index == vector.index
|
675
1096
|
end
|
676
1097
|
end
|
1098
|
+
|
1099
|
+
def reassign_index_as new_index
|
1100
|
+
Daru::Index.new new_index
|
1101
|
+
end
|
1102
|
+
|
1103
|
+
def create_index index
|
1104
|
+
index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
def set_name potential_name
|
1108
|
+
potential_name.is_a?(Array) ? potential_name.join.to_sym : potential_name
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
def symbolize arry
|
1112
|
+
symbolized_arry =
|
1113
|
+
if arry.all? { |e| e.is_a?(Array) }
|
1114
|
+
arry.map do |sub_arry|
|
1115
|
+
sub_arry.map do |e|
|
1116
|
+
e.is_a?(Numeric) ? e : e.to_sym
|
1117
|
+
end
|
1118
|
+
end
|
1119
|
+
else
|
1120
|
+
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
symbolized_arry
|
1124
|
+
end
|
677
1125
|
end
|
678
1126
|
end
|