daru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
@@ -0,0 +1,183 @@
|
|
1
|
+
module Daru
|
2
|
+
module Core
|
3
|
+
class GroupBy
|
4
|
+
|
5
|
+
attr_reader :groups
|
6
|
+
|
7
|
+
def initialize context, names
|
8
|
+
@groups = {}
|
9
|
+
@non_group_vectors = context.vectors.to_a - names
|
10
|
+
@context = context
|
11
|
+
vectors = names.map { |vec| context.vector[vec].to_a }
|
12
|
+
tuples = vectors[0].zip(*vectors[1..-1])
|
13
|
+
keys = tuples.uniq.sort
|
14
|
+
|
15
|
+
keys.each do |key|
|
16
|
+
@groups[key] = all_indices_for(tuples, key)
|
17
|
+
end
|
18
|
+
@groups.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def size
|
22
|
+
index =
|
23
|
+
if multi_indexed_grouping?
|
24
|
+
Daru::MultiIndex.new symbolize(@groups.keys)
|
25
|
+
else
|
26
|
+
Daru::Index.new symbolize(@groups.keys.flatten)
|
27
|
+
end
|
28
|
+
|
29
|
+
values = @groups.values.map { |e| e.size }
|
30
|
+
Daru::Vector.new(values, index: index, name: :size)
|
31
|
+
end
|
32
|
+
|
33
|
+
def first
|
34
|
+
head(1)
|
35
|
+
end
|
36
|
+
|
37
|
+
def last
|
38
|
+
tail(1)
|
39
|
+
end
|
40
|
+
|
41
|
+
def head quantity=5
|
42
|
+
select_groups_from :first, quantity
|
43
|
+
end
|
44
|
+
|
45
|
+
def tail quantity=5
|
46
|
+
select_groups_from :last, quantity
|
47
|
+
end
|
48
|
+
|
49
|
+
# Calculate mean of numeric groups, excluding missing values.
|
50
|
+
def mean
|
51
|
+
apply_method :numeric, :mean
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate the median of numeric groups, excluding missing values.
|
55
|
+
def median
|
56
|
+
apply_method :numeric, :median
|
57
|
+
end
|
58
|
+
|
59
|
+
# Calculate sum of numeric groups, excluding missing values.
|
60
|
+
def sum
|
61
|
+
apply_method :numeric, :sum
|
62
|
+
end
|
63
|
+
|
64
|
+
def count
|
65
|
+
width = @non_group_vectors.size
|
66
|
+
Daru::DataFrame.new([size]*width, order: @non_group_vectors)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Calculate sample standard deviation of numeric vector groups, excluding
|
70
|
+
# missing values.
|
71
|
+
def std
|
72
|
+
apply_method :numeric, :std
|
73
|
+
end
|
74
|
+
|
75
|
+
# Find the max element of each numeric vector group.
|
76
|
+
def max
|
77
|
+
apply_method :numeric, :max
|
78
|
+
end
|
79
|
+
|
80
|
+
# Find the min element of each numeric vector group.
|
81
|
+
def min
|
82
|
+
apply_method :numeric, :min
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns one of the selected groups as a DataFrame.
|
86
|
+
def get_group group
|
87
|
+
indexes = @groups[group]
|
88
|
+
elements = []
|
89
|
+
|
90
|
+
@context.each_vector do |vector|
|
91
|
+
elements << vector.to_a
|
92
|
+
end
|
93
|
+
rows = []
|
94
|
+
transpose = elements.transpose
|
95
|
+
|
96
|
+
indexes.each do |idx|
|
97
|
+
rows << transpose[idx]
|
98
|
+
end
|
99
|
+
Daru::DataFrame.rows(rows, index: @context.index[indexes], order: @context.vectors)
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def select_groups_from method, quantity
|
105
|
+
selection = @context
|
106
|
+
rows, indexes = [], []
|
107
|
+
|
108
|
+
@groups.each_value do |index|
|
109
|
+
index.send(method, quantity).each do |idx|
|
110
|
+
rows << selection.row[idx].to_a
|
111
|
+
indexes << idx
|
112
|
+
end
|
113
|
+
end
|
114
|
+
indexes.flatten!
|
115
|
+
|
116
|
+
Daru::DataFrame.rows(rows, order: @context.vectors, index: indexes)
|
117
|
+
end
|
118
|
+
|
119
|
+
def apply_method method_type, method
|
120
|
+
multi_index = multi_indexed_grouping?
|
121
|
+
rows, order = [], []
|
122
|
+
|
123
|
+
@groups.each do |group, indexes|
|
124
|
+
single_row = []
|
125
|
+
@non_group_vectors.each do |ngvector|
|
126
|
+
vector = @context.vector[ngvector]
|
127
|
+
if method_type == :numeric and vector.type == :numeric
|
128
|
+
slice = vector[*indexes]
|
129
|
+
|
130
|
+
single_row << (slice.is_a?(Numeric) ? slice : slice.send(method))
|
131
|
+
order << ngvector
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
rows << single_row
|
136
|
+
end
|
137
|
+
|
138
|
+
index = symbolize @groups.keys
|
139
|
+
index = multi_index ? Daru::MultiIndex.new(index) : Daru::Index.new(index.flatten)
|
140
|
+
order = symbolize order
|
141
|
+
order =
|
142
|
+
if order.all?{ |e| e.is_a?(Array) }
|
143
|
+
Daru::MultiIndex.new(order)
|
144
|
+
else
|
145
|
+
Daru::Index.new(order)
|
146
|
+
end
|
147
|
+
|
148
|
+
Daru::DataFrame.new(rows.transpose, index: index, order: order)
|
149
|
+
end
|
150
|
+
|
151
|
+
def all_indices_for arry, element
|
152
|
+
found, index, indexes = -1, -1, []
|
153
|
+
while found
|
154
|
+
found = arry[index+1..-1].index(element)
|
155
|
+
if found
|
156
|
+
index = index + found + 1
|
157
|
+
indexes << index
|
158
|
+
end
|
159
|
+
end
|
160
|
+
indexes
|
161
|
+
end
|
162
|
+
|
163
|
+
def symbolize arry
|
164
|
+
symbolized_arry =
|
165
|
+
if arry.all? { |e| e.is_a?(Array) }
|
166
|
+
arry.map do |sub_arry|
|
167
|
+
sub_arry.map do |e|
|
168
|
+
e.is_a?(Numeric) ? e : e.to_sym
|
169
|
+
end
|
170
|
+
end
|
171
|
+
else
|
172
|
+
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
173
|
+
end
|
174
|
+
|
175
|
+
symbolized_arry
|
176
|
+
end
|
177
|
+
|
178
|
+
def multi_indexed_grouping?
|
179
|
+
@groups.keys[0][1] ? true : false
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require 'accessors/dataframe_by_row.rb'
|
4
|
+
require 'accessors/dataframe_by_vector.rb'
|
5
|
+
require 'maths/arithmetic/dataframe.rb'
|
6
|
+
require 'maths/statistics/dataframe.rb'
|
7
|
+
require 'plotting/dataframe.rb'
|
8
|
+
require 'io/io.rb'
|
7
9
|
|
8
10
|
module Daru
|
9
11
|
class DataFrame
|
@@ -14,7 +16,7 @@ module Daru
|
|
14
16
|
|
15
17
|
class << self
|
16
18
|
# Load data from a CSV file.
|
17
|
-
#
|
19
|
+
# Arguments - path, options, block(optional)
|
18
20
|
#
|
19
21
|
# Accepts a block for pre-conditioning of CSV data if any.
|
20
22
|
def from_csv path, opts={}, &block
|
@@ -24,25 +26,25 @@ module Daru
|
|
24
26
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
25
27
|
# Daru::Vector objects.
|
26
28
|
def rows source, opts={}
|
29
|
+
df = nil
|
27
30
|
if source.all? { |v| v.size == source[0].size }
|
28
31
|
first = source[0]
|
29
32
|
index = []
|
30
|
-
order
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
Array.new(first.size) { |i| i.to_s }
|
37
|
-
end
|
38
|
-
else
|
39
|
-
opts[:order]
|
33
|
+
opts[:order] ||=
|
34
|
+
if first.is_a?(Daru::Vector) # assume that all are Vectors
|
35
|
+
source.each { |vec| index << vec.name }
|
36
|
+
first.index.to_a
|
37
|
+
elsif first.is_a?(Array)
|
38
|
+
Array.new(first.size) { |i| i.to_s }
|
40
39
|
end
|
41
40
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
df
|
41
|
+
if source.all? { |s| s.is_a?(Array) }
|
42
|
+
df = Daru::DataFrame.new(source.transpose, opts)
|
43
|
+
else # array of Daru::Vectors
|
44
|
+
df = Daru::DataFrame.new({}, opts)
|
45
|
+
source.each_with_index do |row, idx|
|
46
|
+
df[(index[idx] || idx), :row] = row
|
47
|
+
end
|
46
48
|
end
|
47
49
|
else
|
48
50
|
raise SizeError, "All vectors must have same length"
|
@@ -65,8 +67,8 @@ module Daru
|
|
65
67
|
attr_reader :size
|
66
68
|
|
67
69
|
# DataFrame basically consists of an Array of Vector objects.
|
68
|
-
#
|
69
|
-
#
|
70
|
+
# These objects are indexed by row and column by vectors and index Index objects.
|
71
|
+
# Arguments - source, vectors, index, name.
|
70
72
|
#
|
71
73
|
# == Usage
|
72
74
|
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
@@ -82,43 +84,55 @@ module Daru
|
|
82
84
|
def initialize source, opts={}
|
83
85
|
vectors = opts[:order]
|
84
86
|
index = opts[:index]
|
85
|
-
@dtype = opts[:dtype] || Array
|
86
87
|
@name = (opts[:name] || SecureRandom.uuid).to_sym
|
87
88
|
@data = []
|
88
89
|
|
89
90
|
if source.empty?
|
90
|
-
@vectors =
|
91
|
-
@index =
|
91
|
+
@vectors = create_index vectors
|
92
|
+
@index = create_index index
|
92
93
|
create_empty_vectors
|
93
94
|
else
|
94
95
|
case source
|
95
96
|
when Array
|
96
|
-
if
|
97
|
-
|
98
|
-
|
99
|
-
@vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
|
100
|
-
end
|
97
|
+
if source.all? { |s| s.is_a?(Array) }
|
98
|
+
raise ArgumentError, "Number of vectors (#{vectors.size}) should \
|
99
|
+
equal order size (#{source.size})" if source.size != vectors.size
|
101
100
|
|
102
|
-
|
103
|
-
@
|
104
|
-
else
|
105
|
-
@index = Daru::Index.new index
|
106
|
-
end
|
101
|
+
@index = create_index(index || source[0].size)
|
102
|
+
@vectors = create_index(vectors)
|
107
103
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
104
|
+
@vectors.each_with_index do |vec,idx|
|
105
|
+
@data << Daru::Vector.new(source[idx], index: @index)
|
106
|
+
end
|
107
|
+
elsif source.all? { |s| s.is_a?(Daru::Vector) }
|
108
|
+
hsh = {}
|
109
|
+
vectors.each_with_index do |name, idx|
|
110
|
+
hsh[name] = source[idx]
|
111
|
+
end
|
112
|
+
initialize(hsh, index: index, order: vectors, name: @name)
|
113
|
+
else # array of hashes
|
114
|
+
if vectors.nil?
|
115
|
+
@vectors = Daru::Index.new source[0].keys.map(&:to_sym)
|
116
|
+
else
|
117
|
+
@vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
|
112
118
|
end
|
119
|
+
@index = Daru::Index.new(index || source.size)
|
120
|
+
|
121
|
+
@vectors.each do |name|
|
122
|
+
v = []
|
123
|
+
source.each do |hsh|
|
124
|
+
v << (hsh[name] || hsh[name.to_s])
|
125
|
+
end
|
113
126
|
|
114
|
-
|
127
|
+
@data << Daru::Vector.new(v, name: set_name(name), index: @index)
|
128
|
+
end
|
115
129
|
end
|
116
130
|
when Hash
|
117
131
|
create_vectors_index_with vectors, source
|
118
132
|
if all_daru_vectors_in_source? source
|
119
133
|
if !index.nil?
|
120
|
-
@index = index
|
121
|
-
elsif all_vectors_have_equal_indexes?
|
134
|
+
@index = create_index index
|
135
|
+
elsif all_vectors_have_equal_indexes?(source)
|
122
136
|
@index = source.values[0].index.dup
|
123
137
|
else
|
124
138
|
all_indexes = []
|
@@ -131,29 +145,17 @@ module Daru
|
|
131
145
|
@index = Daru::Index.new all_indexes
|
132
146
|
end
|
133
147
|
@vectors.each do |vector|
|
134
|
-
@data << Daru::Vector.new([], name: vector, index: @index
|
148
|
+
@data << Daru::Vector.new([], name: vector, index: @index)
|
135
149
|
|
136
150
|
@index.each do |idx|
|
137
|
-
|
138
|
-
@data[@vectors[vector]][idx] = source[vector][idx]
|
139
|
-
rescue IndexError
|
140
|
-
# If the index is not present in the vector under consideration
|
141
|
-
# (in source) then an error is raised. Put a nil in that place if
|
142
|
-
# that is the case.
|
143
|
-
@data[@vectors[vector]][idx] = nil
|
144
|
-
end
|
151
|
+
@data[@vectors[vector]][idx] = source[vector][idx]
|
145
152
|
end
|
146
153
|
end
|
147
|
-
else
|
148
|
-
index = source.values[0].size
|
149
|
-
if index.is_a?(Daru::Index)
|
150
|
-
@index = index.to_index
|
151
|
-
else
|
152
|
-
@index = Daru::Index.new index
|
153
|
-
end
|
154
|
+
else
|
155
|
+
@index = create_index(index || source.values[0].size)
|
154
156
|
|
155
157
|
@vectors.each do |name|
|
156
|
-
@data << source[name].dup
|
158
|
+
@data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
|
157
159
|
end
|
158
160
|
end
|
159
161
|
end
|
@@ -164,10 +166,17 @@ module Daru
|
|
164
166
|
end
|
165
167
|
|
166
168
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
167
|
-
# Use of this method is not recommended for accessing
|
168
|
-
# Use df.row[:a] for accessing row with index ':a' or
|
169
|
-
# accessing vector with index
|
170
|
-
def [](*names
|
169
|
+
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
170
|
+
# rows or vectors. Use df.row[:a] for accessing row with index ':a' or
|
171
|
+
# df.vector[:vec] for accessing vector with index *:vec*.
|
172
|
+
def [](*names)
|
173
|
+
if names[-1] == :vector or names[-1] == :row
|
174
|
+
axis = names[-1]
|
175
|
+
names = names[0..-2]
|
176
|
+
else
|
177
|
+
axis = :vector
|
178
|
+
end
|
179
|
+
|
171
180
|
if axis == :vector
|
172
181
|
access_vector *names
|
173
182
|
elsif axis == :row
|
@@ -184,7 +193,12 @@ module Daru
|
|
184
193
|
# In case a Daru::Vector is specified after the equality the sign, the indexes
|
185
194
|
# of the vector will be matched against the row/vector indexes of the DataFrame
|
186
195
|
# before an insertion is performed. Unmatched indexes will be set to nil.
|
187
|
-
def []=(
|
196
|
+
def []=(*args)
|
197
|
+
name = args[0]
|
198
|
+
axis = args[1]
|
199
|
+
vector = args[-1]
|
200
|
+
|
201
|
+
axis = (!axis.is_a?(Symbol) and (axis != :vector or axis != :row)) ? :vector : axis
|
188
202
|
if axis == :vector
|
189
203
|
insert_or_modify_vector name, vector
|
190
204
|
elsif axis == :row
|
@@ -203,6 +217,11 @@ module Daru
|
|
203
217
|
Daru::Accessors::DataFrameByVector.new(self)
|
204
218
|
end
|
205
219
|
|
220
|
+
# Access a vector by name.
|
221
|
+
def column name
|
222
|
+
vector[name]
|
223
|
+
end
|
224
|
+
|
206
225
|
# Access a row or set/create a row. Refer #[] and #[]= docs for details.
|
207
226
|
#
|
208
227
|
# == Usage
|
@@ -219,18 +238,24 @@ module Daru
|
|
219
238
|
src[vector] = @data[@vectors[vector]].dup
|
220
239
|
end
|
221
240
|
|
222
|
-
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name
|
241
|
+
Daru::DataFrame.new src, order: @vectors.dup, index: @index.dup, name: @name
|
223
242
|
end
|
224
243
|
|
225
244
|
# Iterate over each vector
|
226
245
|
def each_vector(&block)
|
246
|
+
return to_enum(:each_vector) unless block_given?
|
247
|
+
|
227
248
|
@data.each(&block)
|
228
249
|
|
229
250
|
self
|
230
251
|
end
|
231
252
|
|
253
|
+
alias_method :each_column, :each_vector
|
254
|
+
|
232
255
|
# Iterate over each vector alongwith the name of the vector
|
233
256
|
def each_vector_with_index(&block)
|
257
|
+
return to_enum(:each_vector_with_index) unless block_given?
|
258
|
+
|
234
259
|
@vectors.each do |vector|
|
235
260
|
yield @data[@vectors[vector]], vector
|
236
261
|
end
|
@@ -238,8 +263,12 @@ module Daru
|
|
238
263
|
self
|
239
264
|
end
|
240
265
|
|
266
|
+
alias_method :each_column_with_index, :each_vector_with_index
|
267
|
+
|
241
268
|
# Iterate over each row
|
242
269
|
def each_row(&block)
|
270
|
+
return to_enum(:each_row) unless block_given?
|
271
|
+
|
243
272
|
@index.each do |index|
|
244
273
|
yield access_row(index)
|
245
274
|
end
|
@@ -248,6 +277,8 @@ module Daru
|
|
248
277
|
end
|
249
278
|
|
250
279
|
def each_row_with_index(&block)
|
280
|
+
return to_enum(:each_row_with_index) unless block_given?
|
281
|
+
|
251
282
|
@index.each do |index|
|
252
283
|
yield access_row(index), index
|
253
284
|
end
|
@@ -256,17 +287,27 @@ module Daru
|
|
256
287
|
end
|
257
288
|
|
258
289
|
# Map each vector. Returns a DataFrame whose vectors are modified according
|
259
|
-
# to the value returned by the block.
|
290
|
+
# to the value returned by the block. As is the case with Enumerable#map,
|
291
|
+
# the object returned by each block must be a Daru::Vector for the dataframe
|
292
|
+
# to remain relevant.
|
260
293
|
def map_vectors(&block)
|
261
|
-
|
262
|
-
df.each_vector_with_index do |vector, name|
|
263
|
-
df[name, :vector] = yield(vector)
|
264
|
-
end
|
294
|
+
return to_enum(:map_vectors) unless block_given?
|
265
295
|
|
266
|
-
|
296
|
+
self.dup.map_vectors!(&block)
|
267
297
|
end
|
268
298
|
|
299
|
+
# Destructive form of #map_vectors
|
300
|
+
def map_vectors!(&block)
|
301
|
+
return to_enum(:map_vectors!) unless block_given?
|
302
|
+
|
303
|
+
@data.map!(&block)
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
# Map vectors alongwith the index.
|
269
308
|
def map_vectors_with_index(&block)
|
309
|
+
return to_enum(:map_vectors_with_index) unless block_given?
|
310
|
+
|
270
311
|
df = self.dup
|
271
312
|
df.each_vector_with_index do |vector, name|
|
272
313
|
df[name, :vector] = yield(vector, name)
|
@@ -277,6 +318,8 @@ module Daru
|
|
277
318
|
|
278
319
|
# Map each row
|
279
320
|
def map_rows(&block)
|
321
|
+
return to_enum(:map_rows) unless block_given?
|
322
|
+
|
280
323
|
df = self.dup
|
281
324
|
df.each_row_with_index do |row, index|
|
282
325
|
df[index, :row] = yield(row)
|
@@ -286,6 +329,8 @@ module Daru
|
|
286
329
|
end
|
287
330
|
|
288
331
|
def map_rows_with_index(&block)
|
332
|
+
return to_enum(:map_rows_with_index) unless block_given?
|
333
|
+
|
289
334
|
df = self.dup
|
290
335
|
df.each_row_with_index do |row, index|
|
291
336
|
df[index, :row] = yield(row, index)
|
@@ -302,13 +347,16 @@ module Daru
|
|
302
347
|
else
|
303
348
|
raise IndexError, "Vector #{vector} does not exist."
|
304
349
|
end
|
350
|
+
|
351
|
+
self
|
305
352
|
end
|
306
353
|
|
354
|
+
# Delete a row
|
307
355
|
def delete_row index
|
308
356
|
idx = named_index_for index
|
309
357
|
|
310
358
|
if @index.include? idx
|
311
|
-
@index = (@index.to_a - [idx])
|
359
|
+
@index = reassign_index_as(@index.to_a - [idx])
|
312
360
|
self.each_vector do |vector|
|
313
361
|
vector.delete_at idx
|
314
362
|
end
|
@@ -343,6 +391,8 @@ module Daru
|
|
343
391
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
344
392
|
# true for that row.
|
345
393
|
def filter_rows &block
|
394
|
+
return to_enum(:filter_rows) unless block_given?
|
395
|
+
|
346
396
|
df = Daru::DataFrame.new({}, order: @vectors.to_a)
|
347
397
|
marked = []
|
348
398
|
|
@@ -361,39 +411,255 @@ module Daru
|
|
361
411
|
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
362
412
|
# true for that vector.
|
363
413
|
def filter_vectors &block
|
414
|
+
return to_enum(:filter_vectors) unless block_given?
|
415
|
+
|
364
416
|
df = self.dup
|
365
417
|
df.keep_vector_if &block
|
366
418
|
|
367
419
|
df
|
368
420
|
end
|
369
421
|
|
422
|
+
# Return the number of rows and columns of the DataFrame in an Array.
|
423
|
+
def shape
|
424
|
+
[@index.size, @vectors.size]
|
425
|
+
end
|
426
|
+
|
427
|
+
# The number of rows
|
428
|
+
def rows
|
429
|
+
shape[0]
|
430
|
+
end
|
431
|
+
|
432
|
+
# The number of vectors
|
433
|
+
def cols
|
434
|
+
shape[1]
|
435
|
+
end
|
436
|
+
|
370
437
|
# Check if a vector is present
|
371
|
-
def has_vector?
|
372
|
-
!!@vectors[
|
438
|
+
def has_vector? vector
|
439
|
+
!!@vectors[*vector]
|
373
440
|
end
|
374
441
|
|
442
|
+
# The first ten elements of the DataFrame
|
443
|
+
#
|
444
|
+
# @param [Fixnum] quantity (10) The number of elements to display from the top.
|
375
445
|
def head quantity=10
|
376
446
|
self[0..quantity, :row]
|
377
447
|
end
|
378
448
|
|
449
|
+
# The last ten elements of the DataFrame
|
450
|
+
#
|
451
|
+
# @param [Fixnum] quantity (10) The number of elements to display from the bottom.
|
379
452
|
def tail quantity=10
|
380
|
-
self[(@size - quantity)
|
453
|
+
self[(@size - quantity)..(@size-1), :row]
|
381
454
|
end
|
382
455
|
|
383
|
-
#
|
384
|
-
|
385
|
-
|
456
|
+
# Group elements by vector to perform operations on them.
|
457
|
+
def group_by vectors
|
458
|
+
vectors = [vectors] if vectors.is_a?(Symbol)
|
459
|
+
vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
|
460
|
+
has_vector?(v) }
|
461
|
+
|
462
|
+
Daru::Core::GroupBy.new(self, vectors)
|
463
|
+
end
|
464
|
+
|
465
|
+
# Change the index of the DataFrame and its underlying vectors. Destructive.
|
466
|
+
#
|
467
|
+
# @param [Symbol, Array] new_index Specify an Array if
|
468
|
+
def reindex! new_index
|
469
|
+
raise ArgumentError, "Index size must equal dataframe size" if new_index.is_a?(Array) and new_index.size != @size
|
470
|
+
|
471
|
+
@index = possibly_multi_index?(new_index == :seq ? @size : new_index)
|
472
|
+
@data.map! do |vector|
|
473
|
+
vector.reindex possibly_multi_index?(@index.to_a)
|
474
|
+
end
|
475
|
+
|
476
|
+
self
|
477
|
+
end
|
478
|
+
|
479
|
+
# Non-destructive version of #reindex!
|
480
|
+
def reindex new_index
|
481
|
+
self.dup.reindex! new_index
|
482
|
+
end
|
483
|
+
|
484
|
+
# Return the names of all the numeric vectors. Will include vectors with nils
|
485
|
+
# alongwith numbers.
|
486
|
+
def numeric_vectors
|
487
|
+
numerics = []
|
488
|
+
|
489
|
+
each_vector do |vec|
|
490
|
+
numerics << vec.name if(vec.type == :numeric)
|
491
|
+
end
|
492
|
+
numerics
|
493
|
+
end
|
494
|
+
|
495
|
+
# Sorts a dataframe (ascending/descending)according to the given sequence of
|
496
|
+
# vectors, using the attributes provided in the blocks. Works for 2 LEVELS ONLY.
|
497
|
+
#
|
498
|
+
# @param order [Array] The order of vector names in which the DataFrame
|
499
|
+
# should be sorted.
|
500
|
+
# @param [Hash] opts The options to sort with.
|
501
|
+
# @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
|
502
|
+
# or descending order. Specify Array corresponding to *order* for multiple
|
503
|
+
# sort orders.
|
504
|
+
# @option opts [Hash] :by ({|a,b| a <=> b}) Specify attributes of objects to
|
505
|
+
# to be used for sorting, for each vector name in *order* as a hash of
|
506
|
+
# vector name and lambda pairs. In case a lambda for a vector is not
|
507
|
+
# specified, the default will be used.
|
508
|
+
#
|
509
|
+
# == Usage
|
510
|
+
#
|
511
|
+
# df = Daru::DataFrame.new({a: [-3,2,-1,4], b: [4,3,2,1]})
|
512
|
+
#
|
513
|
+
# #<Daru::DataFrame:140630680 @name = 04e00197-f8d5-4161-bca2-93266bfabc6f @size = 4>
|
514
|
+
# # a b
|
515
|
+
# # 0 -3 4
|
516
|
+
# # 1 2 3
|
517
|
+
# # 2 -1 2
|
518
|
+
# # 3 4 1
|
519
|
+
# df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
|
520
|
+
def sort! vector_order, opts={}
|
521
|
+
raise ArgumentError, "Required atleast one vector name" if vector_order.size < 1
|
522
|
+
opts = {
|
523
|
+
ascending: true,
|
524
|
+
type: :quick_sort,
|
525
|
+
by: {}
|
526
|
+
}.merge(opts)
|
527
|
+
|
528
|
+
opts[:by] = create_logic_blocks vector_order, opts[:by]
|
529
|
+
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
|
530
|
+
index = @index.to_a
|
531
|
+
send(opts[:type], vector_order, index, opts[:by], opts[:ascending])
|
532
|
+
reindex! index
|
533
|
+
end
|
534
|
+
|
535
|
+
# Non-destructive version of #sort!
|
536
|
+
def sort vector_order, opts={}
|
537
|
+
self.dup.sort! vector_order, opts
|
538
|
+
end
|
539
|
+
|
540
|
+
# Pivots a data frame on specified vectors and applies an aggregate function
|
541
|
+
# to quickly generate a summary.
|
542
|
+
#
|
543
|
+
# == Options
|
544
|
+
#
|
545
|
+
# +:index+ - Keys to group by on the pivot table row index. Pass vector names
|
546
|
+
# contained in an Array.
|
547
|
+
#
|
548
|
+
# +:vectors+ - Keys to group by on the pivot table column index. Pass vector
|
549
|
+
# names contained in an Array.
|
550
|
+
#
|
551
|
+
# +:agg+ - Function to aggregate the grouped values. Default to *:mean*. Can
|
552
|
+
# use any of the statistics functions applicable on Vectors that can be found in
|
553
|
+
# the Daru::Statistics::Vector module.
|
554
|
+
#
|
555
|
+
# +:values+ - Columns to aggregate. Will consider all numeric columns not
|
556
|
+
# specified in *:index* or *:vectors*. Optional.
|
557
|
+
#
|
558
|
+
# == Usage
|
559
|
+
#
|
560
|
+
# df = Daru::DataFrame.new({
|
561
|
+
# a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
562
|
+
# b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
563
|
+
# c: ['small','large','large','small','small','large','small','large','small'],
|
564
|
+
# d: [1,2,2,3,3,4,5,6,7],
|
565
|
+
# e: [2,4,4,6,6,8,10,12,14]
|
566
|
+
# })
|
567
|
+
# df.pivot_table(index: [:a], vectors: [:b], agg: :sum, values: :e)
|
568
|
+
#
|
569
|
+
# #=>
|
570
|
+
# # #<Daru::DataFrame:88342020 @name = 08cdaf4e-b154-4186-9084-e76dd191b2c9 @size = 2>
|
571
|
+
# # [:e, :one] [:e, :two]
|
572
|
+
# # [:bar] 18 26
|
573
|
+
# # [:foo] 10 12
|
574
|
+
def pivot_table opts={}
|
575
|
+
raise ArgumentError, "Specify grouping index" if !opts[:index] or opts[:index].empty?
|
386
576
|
|
387
|
-
|
577
|
+
index = opts[:index]
|
578
|
+
vectors = opts[:vectors] || []
|
579
|
+
aggregate_function = opts[:agg] || :mean
|
580
|
+
values =
|
581
|
+
if opts[:values].is_a?(Symbol)
|
582
|
+
[opts[:values]]
|
583
|
+
elsif opts[:values].is_a?(Array)
|
584
|
+
opts[:values]
|
585
|
+
else # nil
|
586
|
+
(@vectors.to_a - (index | vectors)) & numeric_vectors
|
587
|
+
end
|
388
588
|
|
389
|
-
|
589
|
+
raise IndexError, "No numeric vectors to aggregate" if values.empty?
|
590
|
+
|
591
|
+
grouped = group_by(index)
|
592
|
+
|
593
|
+
unless vectors.empty?
|
594
|
+
super_hash = {}
|
595
|
+
values.each do |value|
|
596
|
+
grouped.groups.each do |group_name, row_numbers|
|
597
|
+
super_hash[group_name] ||= {}
|
598
|
+
|
599
|
+
row_numbers.each do |num|
|
600
|
+
arry = []
|
601
|
+
arry << value
|
602
|
+
vectors.each { |v| arry << self[v][num] }
|
603
|
+
sub_hash = super_hash[group_name]
|
604
|
+
sub_hash[arry] ||= []
|
605
|
+
|
606
|
+
sub_hash[arry] << self[value][num]
|
607
|
+
end
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
super_hash.each_value do |sub_hash|
|
612
|
+
sub_hash.each do |group_name, aggregates|
|
613
|
+
sub_hash[group_name] = Daru::Vector.new(aggregates).send(aggregate_function)
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
df_index = Daru::MultiIndex.new(symbolize(super_hash.keys))
|
618
|
+
|
619
|
+
vector_indexes = []
|
620
|
+
super_hash.each_value do |sub_hash|
|
621
|
+
vector_indexes.concat sub_hash.keys
|
622
|
+
end
|
623
|
+
df_vectors = Daru::MultiIndex.new symbolize(vector_indexes.uniq)
|
624
|
+
pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
|
625
|
+
|
626
|
+
super_hash.each do |row_index, sub_h|
|
627
|
+
sub_h.each do |vector_index, val|
|
628
|
+
pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
|
629
|
+
end
|
630
|
+
end
|
631
|
+
return pivoted_dataframe
|
632
|
+
else
|
633
|
+
grouped.send(aggregate_function)
|
634
|
+
end
|
635
|
+
end
|
636
|
+
|
637
|
+
# Convert all vectors of type *:numeric* into a Matrix.
|
638
|
+
def to_matrix
|
639
|
+
numerics_as_arrays = []
|
640
|
+
each_vector do |vector|
|
641
|
+
numerics_as_arrays << vector.to_a if(vector.type == :numeric)
|
642
|
+
end
|
643
|
+
|
644
|
+
Matrix.columns numerics_as_arrays
|
645
|
+
end
|
646
|
+
|
647
|
+
# Convert all vectors of type *:numeric* and not containing nils into an NMatrix.
|
648
|
+
def to_nmatrix
|
649
|
+
numerics_as_arrays = []
|
650
|
+
each_vector do |vector|
|
651
|
+
numerics_as_arrays << vector.to_a if(vector.type == :numeric and
|
652
|
+
vector.nil_positions.size == 0)
|
653
|
+
end
|
654
|
+
|
655
|
+
numerics_as_arrays.transpose.to_nm
|
656
|
+
end
|
390
657
|
|
391
658
|
# Converts the DataFrame into an array of hashes where key is vector name
|
392
|
-
#
|
393
|
-
#
|
394
|
-
#
|
395
|
-
#
|
396
|
-
# the same index.
|
659
|
+
# and value is the corresponding element. The 0th index of the array contains
|
660
|
+
# the array of hashes while the 1th index contains the indexes of each row
|
661
|
+
# of the dataframe. Each element in the index array corresponds to its row
|
662
|
+
# in the array of hashes, which has the same index.
|
397
663
|
def to_a
|
398
664
|
arry = [[],[]]
|
399
665
|
self.each_row do |row|
|
@@ -443,7 +709,28 @@ module Daru
|
|
443
709
|
to_html
|
444
710
|
end
|
445
711
|
|
446
|
-
#
|
712
|
+
# Change dtypes of vectors by supplying a hash of :vector_name => :new_dtype
|
713
|
+
#
|
714
|
+
# == Usage
|
715
|
+
# df = Daru::DataFrame.new({a: [1,2,3], b: [1,2,3], c: [1,2,3]})
|
716
|
+
# df.recast a: :nmatrix, c: :nmatrix
|
717
|
+
def recast opts={}
|
718
|
+
opts.each do |vector_name, dtype|
|
719
|
+
vector[vector_name].cast(dtype: dtype)
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
# Transpose a DataFrame, tranposing elements and row, column indexing.
|
724
|
+
def transpose
|
725
|
+
arrys = []
|
726
|
+
each_vector do |vec|
|
727
|
+
arrys << vec.to_a
|
728
|
+
end
|
729
|
+
|
730
|
+
Daru::DataFrame.new(arrys.transpose, index: @vectors, order: @index, dtype: @dtype, name: @name)
|
731
|
+
end
|
732
|
+
|
733
|
+
# Pretty print in a nice table format for the command line (irb/pry/iruby)
|
447
734
|
def inspect spacing=10, threshold=15
|
448
735
|
longest = [@name.to_s.size,
|
449
736
|
@vectors.map(&:to_s).map(&:size).max,
|
@@ -477,23 +764,14 @@ module Daru
|
|
477
764
|
content
|
478
765
|
end
|
479
766
|
|
480
|
-
def dtype= dtype
|
481
|
-
@dtype = dtype
|
482
|
-
|
483
|
-
@vectors.each do |vec|
|
484
|
-
pos = @vectors[vec]
|
485
|
-
@data[pos] = @data[pos].coerce(@dtype)
|
486
|
-
end
|
487
|
-
end
|
488
|
-
|
489
767
|
def == other
|
490
|
-
@index == other.index and @size == other.size and @vectors.
|
491
|
-
|
768
|
+
@index == other.index and @size == other.size and @vectors == other.vectors and
|
769
|
+
@vectors.all? { |vector| self[vector, :vector] == other[vector, :vector] }
|
492
770
|
end
|
493
771
|
|
494
772
|
def method_missing(name, *args, &block)
|
495
773
|
if md = name.match(/(.+)\=/)
|
496
|
-
insert_or_modify_vector name[/(.+)\=/].delete("="), args[0]
|
774
|
+
insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
|
497
775
|
elsif self.has_vector? name
|
498
776
|
self[name, :vector]
|
499
777
|
else
|
@@ -503,81 +781,234 @@ module Daru
|
|
503
781
|
|
504
782
|
private
|
505
783
|
|
506
|
-
def
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
784
|
+
def possibly_multi_index? index
|
785
|
+
if @index.is_a?(MultiIndex)
|
786
|
+
Daru::MultiIndex.new(index)
|
787
|
+
else
|
788
|
+
Daru::Index.new(index)
|
789
|
+
end
|
790
|
+
end
|
791
|
+
|
792
|
+
def quick_sort vector_order, index, by, ascending
|
793
|
+
recursive_quick_sort vector_order, index, by, ascending, 0, @size-1
|
794
|
+
end
|
795
|
+
|
796
|
+
# == Arguments
|
797
|
+
#
|
798
|
+
# vector_order -
|
799
|
+
# index -
|
800
|
+
# by -
|
801
|
+
# ascending -
|
802
|
+
# left_lower -
|
803
|
+
# right_upper -
|
804
|
+
def recursive_quick_sort vector_order, index, by, ascending, left_lower, right_upper
|
805
|
+
if left_lower < right_upper
|
806
|
+
left_upper, right_lower = partition(vector_order, index, by, ascending, left_lower, right_upper)
|
807
|
+
if left_upper - left_lower < right_upper - right_lower
|
808
|
+
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
809
|
+
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
512
810
|
else
|
513
|
-
|
811
|
+
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
812
|
+
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
514
813
|
end
|
515
814
|
end
|
516
|
-
|
815
|
+
end
|
517
816
|
|
518
|
-
|
519
|
-
|
817
|
+
def partition vector_order, index, by, ascending, left_lower, right_upper
|
818
|
+
mindex = (left_lower + right_upper) / 2
|
819
|
+
mvalues = vector_order.inject([]) { |a, vector_name| a << vector[vector_name][mindex]; a }
|
820
|
+
i = left_lower
|
821
|
+
j = right_upper
|
822
|
+
descending = ascending.map { |a| !a }
|
823
|
+
|
824
|
+
i += 1 while(keep?(i, mvalues, vector_order, ascending , by, 0))
|
825
|
+
j -= 1 while(keep?(j, mvalues, vector_order, descending, by, 0))
|
826
|
+
|
827
|
+
while i < j - 1
|
828
|
+
@data.each do |vector|
|
829
|
+
vector[i], vector[j] = vector[j], vector[i]
|
830
|
+
end
|
831
|
+
index[i], index[j] = index[j], index[i]
|
832
|
+
i += 1
|
833
|
+
j -= 1
|
520
834
|
|
521
|
-
|
835
|
+
i += 1 while(keep?(i, mvalues, vector_order, ascending , by,0))
|
836
|
+
j -= 1 while(keep?(j, mvalues, vector_order, descending, by,0))
|
522
837
|
end
|
523
|
-
|
838
|
+
|
839
|
+
if i <= j
|
840
|
+
if i < j
|
841
|
+
@data.each do |vector|
|
842
|
+
vector[i], vector[j] = vector[j], vector[i]
|
843
|
+
end
|
844
|
+
index[i], index[j] = index[j], index[i]
|
845
|
+
end
|
846
|
+
i += 1
|
847
|
+
j -= 1
|
848
|
+
end
|
849
|
+
|
850
|
+
[j,i]
|
524
851
|
end
|
525
852
|
|
526
|
-
def
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
if
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
853
|
+
def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
|
854
|
+
vector_name = vector_order[vector_order_index]
|
855
|
+
if vector_name
|
856
|
+
vec = vector[vector_name]
|
857
|
+
eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
|
858
|
+
|
859
|
+
if sort_order[vector_order_index] # sort in ascending order
|
860
|
+
return false if eval == 1
|
861
|
+
return true if eval == -1
|
862
|
+
if eval == 0
|
863
|
+
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
864
|
+
end
|
865
|
+
else # sort in descending order
|
866
|
+
return false if eval == -1
|
867
|
+
return true if eval == 1
|
868
|
+
if eval == 0
|
869
|
+
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
870
|
+
end
|
871
|
+
end
|
872
|
+
end
|
873
|
+
end
|
538
874
|
|
539
|
-
|
875
|
+
def create_logic_blocks vector_order, by={}
|
876
|
+
universal_block = lambda { |a,b| a <=> b }
|
877
|
+
vector_order.each do |vector|
|
878
|
+
by[vector] ||= universal_block
|
879
|
+
end
|
880
|
+
|
881
|
+
by
|
882
|
+
end
|
883
|
+
|
884
|
+
def sort_order_array vector_order, ascending
|
885
|
+
if ascending.is_a?(Array)
|
886
|
+
raise ArgumentError, "Specify same number of vector names and sort orders" if
|
887
|
+
vector_order.size != ascending.size
|
888
|
+
return ascending
|
889
|
+
else
|
890
|
+
Array.new(vector_order.size, ascending)
|
891
|
+
end
|
892
|
+
end
|
893
|
+
|
894
|
+
def vectors_index_for location
|
895
|
+
if @vectors.include?(location)
|
896
|
+
@vectors[location]
|
897
|
+
elsif location[0].is_a?(Integer)
|
898
|
+
location[0]
|
899
|
+
end
|
900
|
+
end
|
901
|
+
|
902
|
+
def access_vector *names
|
903
|
+
location = names[0]
|
904
|
+
if @vectors.is_a?(MultiIndex)
|
905
|
+
pos = vectors_index_for names
|
906
|
+
|
907
|
+
if pos.is_a?(Integer)
|
908
|
+
return @data[pos]
|
909
|
+
else # MultiIndex
|
910
|
+
new_vectors = pos.map do |tuple|
|
911
|
+
@data[vectors_index_for(names + tuple)]
|
540
912
|
end
|
913
|
+
Daru::DataFrame.new(new_vectors, index: @index, order: Daru::MultiIndex.new(pos.to_a))
|
914
|
+
end
|
915
|
+
else
|
916
|
+
unless names[1]
|
917
|
+
pos = vectors_index_for location
|
918
|
+
return @data[pos]
|
919
|
+
end
|
920
|
+
|
921
|
+
new_vcs = {}
|
922
|
+
names.each do |name|
|
923
|
+
name = name.to_sym unless name.is_a?(Integer)
|
924
|
+
new_vcs[name] = @data[@vectors[name]]
|
925
|
+
end
|
926
|
+
Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
|
927
|
+
end
|
928
|
+
end
|
929
|
+
|
930
|
+
def access_row *names
|
931
|
+
location = names[0]
|
541
932
|
|
542
|
-
|
933
|
+
if @index.is_a?(MultiIndex)
|
934
|
+
pos = row_index_for names
|
935
|
+
if pos.is_a?(Integer)
|
936
|
+
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
|
543
937
|
else
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
938
|
+
new_rows =
|
939
|
+
if location.is_a?(Range)
|
940
|
+
pos.map { |tuple| populate_row_for(tuple) }
|
941
|
+
else
|
942
|
+
pos.map { |tuple| populate_row_for(names + tuple) }
|
548
943
|
end
|
944
|
+
|
945
|
+
Daru::DataFrame.rows(new_rows, order: @vectors, name: @name,
|
946
|
+
index: Daru::MultiIndex.new(pos.to_a))
|
947
|
+
end
|
948
|
+
else
|
949
|
+
if names[1].nil?
|
950
|
+
if location.is_a?(Range)
|
951
|
+
index_arry = @index.to_a
|
549
952
|
|
550
|
-
|
953
|
+
range =
|
954
|
+
if location.first.is_a?(Numeric)
|
955
|
+
location
|
956
|
+
else
|
957
|
+
first_index = index_arry.index location.first
|
958
|
+
last_index = index_arry.index location.last
|
959
|
+
|
960
|
+
first_index..last_index
|
961
|
+
end
|
962
|
+
|
963
|
+
names = index_arry[range]
|
964
|
+
else
|
965
|
+
row = []
|
966
|
+
name = named_index_for names[0]
|
967
|
+
@vectors.each do |vector|
|
968
|
+
row << @data[@vectors[vector]][name]
|
969
|
+
end
|
970
|
+
|
971
|
+
return Daru::Vector.new(row, index: @vectors, name: set_name(name))
|
972
|
+
end
|
551
973
|
end
|
974
|
+
# Access multiple rows
|
975
|
+
rows = []
|
976
|
+
names.each do |name|
|
977
|
+
rows << self.row[name]
|
978
|
+
end
|
979
|
+
|
980
|
+
Daru::DataFrame.rows rows, name: @name
|
552
981
|
end
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
982
|
+
end
|
983
|
+
|
984
|
+
def row_index_for location
|
985
|
+
if @index.include?(location) or location[0].is_a?(Range)
|
986
|
+
@index[location]
|
987
|
+
elsif location[0].is_a?(Integer)
|
988
|
+
location[0]
|
989
|
+
end
|
990
|
+
end
|
991
|
+
|
992
|
+
def populate_row_for pos
|
993
|
+
@vectors.map do |vector|
|
994
|
+
@data[@vectors[vector]][pos]
|
557
995
|
end
|
558
|
-
|
559
|
-
Daru::DataFrame.rows rows, name: @name, dtype: @dtype
|
560
996
|
end
|
561
997
|
|
562
998
|
def insert_or_modify_vector name, vector
|
563
|
-
@vectors =
|
999
|
+
@vectors = reassign_index_as(@vectors + name)
|
564
1000
|
v = nil
|
565
1001
|
|
566
1002
|
if vector.is_a?(Daru::Vector)
|
567
|
-
v = Daru::Vector.new [], name: name, index: @index
|
568
|
-
nil_data = false
|
1003
|
+
v = Daru::Vector.new [], name: set_name(name), index: @index
|
569
1004
|
@index.each do |idx|
|
570
|
-
|
571
|
-
v[idx] = vector[idx]
|
572
|
-
rescue IndexError
|
573
|
-
v[idx] = nil
|
574
|
-
end
|
1005
|
+
v[idx] = vector[idx]
|
575
1006
|
end
|
576
1007
|
else
|
577
1008
|
raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
578
1009
|
@size != vector.size
|
579
1010
|
|
580
|
-
v =
|
1011
|
+
v = Daru::Vector.new(vector, name: set_name(name), index: @index)
|
581
1012
|
end
|
582
1013
|
|
583
1014
|
@data[@vectors[name]] = v
|
@@ -585,25 +1016,17 @@ module Daru
|
|
585
1016
|
|
586
1017
|
def insert_or_modify_row name, vector
|
587
1018
|
if @index.include? name
|
588
|
-
v = vector.dv(name, @vectors, @dtype)
|
1019
|
+
v = vector.dv(name, @vectors, @dtype)
|
589
1020
|
|
590
1021
|
@vectors.each do |vector|
|
591
|
-
|
592
|
-
@data[@vectors[vector]][name] = v[vector]
|
593
|
-
rescue IndexError
|
594
|
-
@data[@vectors[vector]][name] = nil
|
595
|
-
end
|
1022
|
+
@data[@vectors[vector]][name] = v[vector]
|
596
1023
|
end
|
597
1024
|
else
|
598
|
-
@index =
|
599
|
-
v =
|
1025
|
+
@index = reassign_index_as(@index + name)
|
1026
|
+
v = Daru::Vector.new(vector, name: set_name(name), index: @vectors)
|
600
1027
|
|
601
1028
|
@vectors.each do |vector|
|
602
|
-
|
603
|
-
@data[@vectors[vector]].concat v[vector], name
|
604
|
-
rescue IndexError
|
605
|
-
@data[@vectors[vector]].concat nil, name
|
606
|
-
end
|
1029
|
+
@data[@vectors[vector]].concat v[vector], name
|
607
1030
|
end
|
608
1031
|
end
|
609
1032
|
|
@@ -612,16 +1035,16 @@ module Daru
|
|
612
1035
|
|
613
1036
|
def create_empty_vectors
|
614
1037
|
@vectors.each do |name|
|
615
|
-
@data << Daru::Vector.new([],name: name, index: @index
|
1038
|
+
@data << Daru::Vector.new([], name: set_name(name), index: @index)
|
616
1039
|
end
|
617
1040
|
end
|
618
1041
|
|
619
1042
|
def validate_labels
|
620
|
-
raise IndexError, "Expected equal number of vectors for number of
|
621
|
-
@vectors.size != @data.size
|
1043
|
+
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
|
1044
|
+
@vectors and @vectors.size != @data.size
|
622
1045
|
|
623
1046
|
raise IndexError, "Expected number of indexes same as number of rows" if
|
624
|
-
@index.size != @data[0].size
|
1047
|
+
@index and @data[0] and @index.size != @data[0].size
|
625
1048
|
end
|
626
1049
|
|
627
1050
|
def validate_vector_sizes
|
@@ -631,8 +1054,6 @@ module Daru
|
|
631
1054
|
end
|
632
1055
|
|
633
1056
|
def validate
|
634
|
-
# TODO: [IMP] when vectors of different dimensions are specified, they should
|
635
|
-
# be inserted into the dataframe by inserting nils wherever necessary.
|
636
1057
|
validate_labels
|
637
1058
|
validate_vector_sizes
|
638
1059
|
end
|
@@ -660,10 +1081,10 @@ module Daru
|
|
660
1081
|
def create_vectors_index_with vectors, source
|
661
1082
|
vectors = source.keys.sort if vectors.nil?
|
662
1083
|
|
663
|
-
|
664
|
-
@vectors = vectors.to_index
|
665
|
-
else
|
1084
|
+
unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
|
666
1085
|
@vectors = Daru::Index.new (vectors + (source.keys - vectors)).uniq.map(&:to_sym)
|
1086
|
+
else
|
1087
|
+
@vectors = vectors
|
667
1088
|
end
|
668
1089
|
end
|
669
1090
|
|
@@ -674,5 +1095,32 @@ module Daru
|
|
674
1095
|
index == vector.index
|
675
1096
|
end
|
676
1097
|
end
|
1098
|
+
|
1099
|
+
def reassign_index_as new_index
|
1100
|
+
Daru::Index.new new_index
|
1101
|
+
end
|
1102
|
+
|
1103
|
+
def create_index index
|
1104
|
+
index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
def set_name potential_name
|
1108
|
+
potential_name.is_a?(Array) ? potential_name.join.to_sym : potential_name
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
def symbolize arry
|
1112
|
+
symbolized_arry =
|
1113
|
+
if arry.all? { |e| e.is_a?(Array) }
|
1114
|
+
arry.map do |sub_arry|
|
1115
|
+
sub_arry.map do |e|
|
1116
|
+
e.is_a?(Numeric) ? e : e.to_sym
|
1117
|
+
end
|
1118
|
+
end
|
1119
|
+
else
|
1120
|
+
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
symbolized_arry
|
1124
|
+
end
|
677
1125
|
end
|
678
1126
|
end
|