daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/core/merge.rb
CHANGED
@@ -4,17 +4,17 @@ module Daru
|
|
4
4
|
class << self
|
5
5
|
def replace_keys_if_duplicates hash, matcher
|
6
6
|
matched = nil
|
7
|
-
hash.keys.each { |d|
|
7
|
+
hash.keys.each { |d|
|
8
8
|
if matcher.match(Regexp.new(d.to_s))
|
9
9
|
matched = d
|
10
10
|
break
|
11
|
-
end
|
11
|
+
end
|
12
12
|
}
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
return unless matched
|
15
|
+
|
16
|
+
hash[matcher] = hash[matched]
|
17
|
+
hash.delete matched
|
18
18
|
end
|
19
19
|
|
20
20
|
def resolve_duplicates df_hash1, df_hash2, on
|
@@ -29,198 +29,182 @@ module Daru
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def hashify df
|
32
|
-
hsh = df.
|
32
|
+
hsh = df.to_h
|
33
33
|
hsh.each { |k,v| hsh[k] = v.to_a }
|
34
34
|
hsh
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
def arrayify df
|
38
38
|
arr = df.to_a
|
39
39
|
col_names = arr[0][0].keys
|
40
|
-
values = arr[0].map
|
40
|
+
values = arr[0].map(&:values)
|
41
41
|
|
42
|
-
|
42
|
+
[col_names, values]
|
43
43
|
end
|
44
44
|
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
45
|
+
def arrayify_with_sort_keys(size, df_hash, on)
|
46
|
+
# Converting to a hash and then to an array is more complex
|
47
|
+
# than using df.to_a or df.map(:row). However, it's
|
48
|
+
# substantially faster this way.
|
50
49
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
end
|
50
|
+
# idx_keys = on.map { |key| df_hash.keys.index(key) }
|
51
|
+
|
52
|
+
(0...size).reduce([]) do |r, idx|
|
53
|
+
key_values = on.map { |col| df_hash[col][idx] }
|
54
|
+
row_values = df_hash.map { |_col, val| val[idx] }
|
55
|
+
r << [key_values, row_values]
|
59
56
|
end
|
60
57
|
|
61
|
-
|
58
|
+
# Conceptually simpler and does the same thing, but slows down the
|
59
|
+
# total merge algorithm by 2x. Would be nice to improve the performance
|
60
|
+
# of df.map(:row)
|
61
|
+
#
|
62
|
+
# df.map(:row) do |row|
|
63
|
+
# key_values = on.map { |key| row[key] }
|
64
|
+
# [key_values, row.to_a]
|
65
|
+
# end
|
62
66
|
end
|
63
67
|
|
64
|
-
def
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
if (col_names1.include?(name))
|
73
|
-
col_names1[col_names1.index(name)] = (name.to_s + "_1").to_sym unless on.include?(name)
|
74
|
-
(name.to_s + "_2").to_sym
|
75
|
-
else
|
76
|
-
name
|
77
|
-
end
|
78
|
-
end
|
68
|
+
def verify_dataframes df_hash1, df_hash2, on
|
69
|
+
raise ArgumentError,
|
70
|
+
'All fields in :on must be present in self' unless on.all? { |e| df_hash1[e] }
|
71
|
+
raise ArgumentError,
|
72
|
+
'All fields in :on must be present in other DF' unless on.all? { |e| df_hash2[e] }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
79
76
|
|
80
|
-
|
81
|
-
|
82
|
-
|
77
|
+
class MergeFrame
|
78
|
+
def initialize(df1, df2, on: nil)
|
79
|
+
@df1 = df1
|
80
|
+
@df2 = df2
|
81
|
+
@on = on
|
82
|
+
end
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
def inner _opts
|
85
|
+
merge_join(left: false, right: false)
|
86
|
+
end
|
87
87
|
|
88
|
-
|
89
|
-
|
88
|
+
def left _opts
|
89
|
+
merge_join(left: true, right: false)
|
90
|
+
end
|
90
91
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if (bf.include?(x))
|
95
|
-
{x_ind => on_cols2.each_index.select{|y_ind| on_cols2[y_ind] == x}}
|
96
|
-
else
|
97
|
-
{x_ind => []}
|
98
|
-
end
|
99
|
-
end
|
100
|
-
.reduce({}) {|h,pairs| pairs.each {|k,v| (h[k] ||= []) << v}; h}
|
101
|
-
.flat_map{|ind1, inds2| inds2.flatten.map{|ind2| [table1[ind1], table2[ind2]].flatten} if inds2.flatten.size > 0}
|
92
|
+
def right _opts
|
93
|
+
merge_join(left: false, right: true)
|
94
|
+
end
|
102
95
|
|
103
|
-
|
104
|
-
|
105
|
-
|
96
|
+
def outer _opts
|
97
|
+
merge_join(left: true, right: true)
|
98
|
+
end
|
106
99
|
|
107
|
-
|
108
|
-
|
100
|
+
def merge_join(left: true, right: true)
|
101
|
+
MergeHelper.verify_dataframes df1_hash, df2_hash, @on
|
102
|
+
MergeHelper.resolve_duplicates df1_hash, df2_hash, @on
|
109
103
|
|
110
|
-
|
111
|
-
|
112
|
-
|
104
|
+
# TODO: Use native dataframe sorting.
|
105
|
+
# It would be ideal to reuse sorting functionality that is native
|
106
|
+
# to dataframes. Unfortunately, native dataframe sort introduces
|
107
|
+
# an overhead that reduces join performance by a factor of 4! Until
|
108
|
+
# that aspect is improved, we resort to a simpler array sort.
|
109
|
+
df1_array.sort_by! { |row| [row[0].nil? ? 0 : 1, row[0]] }
|
110
|
+
df2_array.sort_by! { |row| [row[0].nil? ? 0 : 1, row[0]] }
|
113
111
|
|
114
|
-
|
115
|
-
|
116
|
-
end
|
112
|
+
idx1 = 0
|
113
|
+
idx2 = 0
|
117
114
|
|
118
|
-
|
119
|
-
joined_hash = {}
|
120
|
-
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
121
|
-
joined_hash[k] = []
|
122
|
-
end
|
115
|
+
while idx1 < @df1.size || idx2 < @df2.size
|
123
116
|
|
124
|
-
|
125
|
-
|
126
|
-
joined = false
|
127
|
-
(0...df2.size).each do |id2|
|
128
|
-
if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
|
129
|
-
joined = true
|
130
|
-
joined_hash.each do |k,v|
|
131
|
-
v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
117
|
+
key1 = df1_array[idx1][0] if idx1 < @df1.size
|
118
|
+
key2 = df2_array[idx2][0] if idx2 < @df2.size
|
135
119
|
|
136
|
-
|
137
|
-
|
138
|
-
joined_hash[k] << df_hash1[k][id1]
|
139
|
-
end
|
120
|
+
if key1 == key2 && idx1 < @df1.size && idx2 < @df2.size
|
121
|
+
idx2_start = idx2
|
140
122
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
joined = false
|
123
|
+
while (idx2 < @df2.size) && (df1_array[idx1][0] == df2_array[idx2][0])
|
124
|
+
add_merge_row_to_hash([df1_array[idx1], df2_array[idx2]], joined_hash)
|
125
|
+
idx2 += 1
|
145
126
|
end
|
127
|
+
|
128
|
+
idx2 = idx2_start if idx1+1 < @df1.size && df1_array[idx1][0] == df1_array[idx1+1][0]
|
129
|
+
idx1 += 1
|
130
|
+
elsif ((key2.nil? || [key1,key2].sort == [key1,key2]) && idx1 < @df1.size) || idx2 == @df2.size
|
131
|
+
add_merge_row_to_hash([df1_array[idx1], nil], joined_hash) if left
|
132
|
+
idx1 += 1
|
133
|
+
elsif idx2 < @df2.size || idx1 == @df1.size
|
134
|
+
add_merge_row_to_hash([nil, df2_array[idx2]], joined_hash) if right
|
135
|
+
idx2 += 1
|
136
|
+
else
|
137
|
+
raise 'Unexpected condition met during merge'
|
146
138
|
end
|
139
|
+
end
|
147
140
|
|
148
|
-
|
149
|
-
|
141
|
+
Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def joined_hash
|
147
|
+
return @joined_hash if @joined_hash
|
148
|
+
@joined_hash ||= {}
|
149
|
+
|
150
|
+
((df1_keys - @on) | @on | (df2_keys - @on)).each do |k|
|
151
|
+
@joined_hash[k] = []
|
150
152
|
end
|
151
153
|
|
152
|
-
|
153
|
-
|
154
|
-
((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
|
155
|
-
joined_hash[k] = []
|
156
|
-
end
|
154
|
+
@joined_hash
|
155
|
+
end
|
157
156
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
if on.all? { |n| df_hash2[n][id1] == df_hash1[n][id2] }
|
162
|
-
joined = true
|
163
|
-
joined_hash.each do |k,v|
|
164
|
-
v << (df_hash2.has_key?(k) ? df_hash2[k][id1] : df_hash1[k][id2])
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
157
|
+
def df1_hash
|
158
|
+
@df1_hash ||= MergeHelper.hashify @df1
|
159
|
+
end
|
168
160
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
end
|
161
|
+
def df2_hash
|
162
|
+
@df2_hash ||= MergeHelper.hashify @df2
|
163
|
+
end
|
173
164
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
joined = false
|
178
|
-
end
|
179
|
-
end
|
165
|
+
def df1_array
|
166
|
+
@df1_array ||= MergeHelper.arrayify_with_sort_keys @df1.size, df1_hash, @on
|
167
|
+
end
|
180
168
|
|
181
|
-
|
182
|
-
|
183
|
-
|
169
|
+
def df2_array
|
170
|
+
@df2_array ||= MergeHelper.arrayify_with_sort_keys @df2.size, df2_hash, @on
|
171
|
+
end
|
184
172
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
173
|
+
def df1_keys
|
174
|
+
df1_hash.keys
|
175
|
+
end
|
176
|
+
|
177
|
+
def df2_keys
|
178
|
+
df2_hash.keys
|
179
|
+
end
|
180
|
+
|
181
|
+
# Private: The merge row contains two elements, the first is the row from the
|
182
|
+
# first dataframe, the second is the row from the second dataframe.
|
183
|
+
def add_merge_row_to_hash row, hash
|
184
|
+
@df1_key_to_index ||= df1_keys.each_with_index.map { |k,idx| [k, idx] }.to_h
|
185
|
+
@df2_key_to_index ||= df2_keys.each_with_index.map { |k,idx| [k, idx] }.to_h
|
186
|
+
|
187
|
+
hash.each do |k,v|
|
188
|
+
v ||= []
|
189
|
+
|
190
|
+
left = df1_keys.include?(k) ? row[0] && row[0][1][@df1_key_to_index[k]] : nil
|
191
|
+
right = df2_keys.include?(k) ? row[1] && row[1][1][@df2_key_to_index[k]] : nil
|
192
|
+
|
193
|
+
v << (left || right)
|
190
194
|
end
|
191
195
|
end
|
192
196
|
end
|
197
|
+
|
193
198
|
# Private module containing methods for join, merge, concat operations on
|
194
199
|
# dataframes and vectors.
|
195
200
|
# @private
|
196
201
|
module Merge
|
197
202
|
class << self
|
198
203
|
def join df1, df2, opts={}
|
199
|
-
helper = MergeHelper
|
200
|
-
|
201
|
-
df_hash1 = helper.hashify df1
|
202
|
-
df_hash2 = helper.hashify df2
|
203
204
|
on = opts[:on]
|
204
205
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
case opts[:how]
|
209
|
-
when :inner
|
210
|
-
if Daru.has_bloomfilter_rb?
|
211
|
-
helper.bf_inner_join df1, df2, on
|
212
|
-
else
|
213
|
-
helper.inner_join df1, df2, df_hash1, df_hash2, on
|
214
|
-
end
|
215
|
-
when :outer
|
216
|
-
helper.full_outer_join df1, df2, df_hash1, df_hash2, on
|
217
|
-
when :left
|
218
|
-
helper.left_outer_join df1, df2, df_hash1, df_hash2, on
|
219
|
-
when :right
|
220
|
-
helper.right_outer_join df1, df2, df_hash1, df_hash2, on
|
221
|
-
else
|
222
|
-
raise ArgumentError, "Unrecognized option in :how => #{opts[:how]}"
|
223
|
-
end
|
206
|
+
mf = MergeFrame.new df1, df2, on: on
|
207
|
+
mf.send opts[:how], {}
|
224
208
|
end
|
225
209
|
end
|
226
210
|
end
|
data/lib/daru/core/query.rb
CHANGED
@@ -9,31 +9,19 @@ module Daru
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def & other
|
12
|
-
|
13
|
-
other_barry = other.barry
|
14
|
-
@barry.each_with_index do |b, i|
|
15
|
-
new_bool << (b and other_barry[i])
|
16
|
-
end
|
17
|
-
|
18
|
-
BoolArray.new(new_bool)
|
12
|
+
BoolArray.new @barry.zip(other.barry).map { |b, o| b && o }
|
19
13
|
end
|
20
14
|
|
21
15
|
alias :and :&
|
22
16
|
|
23
17
|
def | other
|
24
|
-
|
25
|
-
other_barry = other.barry
|
26
|
-
@barry.each_with_index do |b, i|
|
27
|
-
new_bool << (b or other_barry[i])
|
28
|
-
end
|
29
|
-
|
30
|
-
BoolArray.new(new_bool)
|
18
|
+
BoolArray.new @barry.zip(other.barry).map { |b, o| b || o }
|
31
19
|
end
|
32
20
|
|
33
21
|
alias :or :|
|
34
22
|
|
35
23
|
def !
|
36
|
-
BoolArray.new(@barry.map
|
24
|
+
BoolArray.new(@barry.map(&:!))
|
37
25
|
end
|
38
26
|
|
39
27
|
def == other
|
@@ -45,27 +33,17 @@ module Daru
|
|
45
33
|
end
|
46
34
|
|
47
35
|
def inspect
|
48
|
-
"(#{self.class}:#{
|
36
|
+
"(#{self.class}:#{object_id} bool_arry=#{@barry})"
|
49
37
|
end
|
50
38
|
end
|
51
39
|
|
52
40
|
class << self
|
53
41
|
def apply_scalar_operator operator, data, other
|
54
|
-
|
55
|
-
memo << (d.send(operator, other) ? true : false)
|
56
|
-
memo
|
57
|
-
end
|
58
|
-
|
59
|
-
BoolArray.new(arry)
|
42
|
+
BoolArray.new data.map { |d| !!d.send(operator, other) }
|
60
43
|
end
|
61
44
|
|
62
45
|
def apply_vector_operator operator, vector, other
|
63
|
-
|
64
|
-
vector.each_with_index do |d, i|
|
65
|
-
bool_arry << (d.send(operator, other[i]) ? true : false)
|
66
|
-
end
|
67
|
-
|
68
|
-
BoolArray.new(bool_arry)
|
46
|
+
BoolArray.new vector.zip(other).map { |d, o| !!d.send(operator, o) }
|
69
47
|
end
|
70
48
|
|
71
49
|
def df_where data_frame, bool_array
|
@@ -74,7 +52,8 @@ module Daru
|
|
74
52
|
end
|
75
53
|
|
76
54
|
Daru::DataFrame.new(
|
77
|
-
vecs, order: data_frame.vectors, index: vecs[0].index, clone: false
|
55
|
+
vecs, order: data_frame.vectors, index: vecs[0].index, clone: false
|
56
|
+
)
|
78
57
|
end
|
79
58
|
|
80
59
|
def vector_where data, index, bool_array, dtype
|
@@ -92,4 +71,4 @@ module Daru
|
|
92
71
|
end
|
93
72
|
end
|
94
73
|
end
|
95
|
-
end
|
74
|
+
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -1,14 +1,11 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require 'plotting/dataframe.rb'
|
7
|
-
require 'io/io.rb'
|
1
|
+
require 'daru/accessors/dataframe_by_row.rb'
|
2
|
+
require 'daru/maths/arithmetic/dataframe.rb'
|
3
|
+
require 'daru/maths/statistics/dataframe.rb'
|
4
|
+
require 'daru/plotting/dataframe.rb'
|
5
|
+
require 'daru/io/io.rb'
|
8
6
|
|
9
7
|
module Daru
|
10
8
|
class DataFrame
|
11
|
-
|
12
9
|
include Daru::Maths::Arithmetic::DataFrame
|
13
10
|
include Daru::Maths::Statistics::DataFrame
|
14
11
|
include Daru::Plotting::DataFrame if Daru.has_nyaplot?
|
@@ -115,31 +112,30 @@ module Daru
|
|
115
112
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
116
113
|
# Daru::Vector objects.
|
117
114
|
def rows source, opts={}
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
115
|
+
first = source.first
|
116
|
+
|
117
|
+
raise SizeError, 'All vectors must have same length' \
|
118
|
+
unless source.all? { |v| v.size == first.size }
|
119
|
+
|
120
|
+
index = []
|
121
|
+
opts[:order] ||=
|
122
|
+
case first
|
123
|
+
when Daru::Vector # assume that all are Vectors
|
124
|
+
index = source.map(&:name)
|
125
125
|
first.index.to_a
|
126
|
-
|
127
|
-
Array.new(first.size
|
126
|
+
when Array
|
127
|
+
Array.new(first.size, &:to_s)
|
128
128
|
end
|
129
129
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
130
|
+
if source.all? { |s| s.is_a?(Array) }
|
131
|
+
Daru::DataFrame.new(source.transpose, opts)
|
132
|
+
else # array of Daru::Vectors
|
133
|
+
Daru::DataFrame.new({}, opts).tap do |df|
|
134
134
|
source.each_with_index do |row, idx|
|
135
|
-
df[
|
135
|
+
df[index[idx] || idx, :row] = row
|
136
136
|
end
|
137
137
|
end
|
138
|
-
else
|
139
|
-
raise SizeError, "All vectors must have same length"
|
140
138
|
end
|
141
|
-
|
142
|
-
df
|
143
139
|
end
|
144
140
|
|
145
141
|
# Generates a new dataset, using three vectors
|
@@ -162,18 +158,16 @@ module Daru
|
|
162
158
|
#
|
163
159
|
# Useful to process outputs from databases
|
164
160
|
def crosstab_by_assignation rows, columns, values
|
165
|
-
raise
|
166
|
-
rows.size != columns.size
|
161
|
+
raise 'Three vectors should be equal size' if
|
162
|
+
rows.size != columns.size || rows.size!=values.size
|
167
163
|
|
168
164
|
cols_values = columns.factors
|
169
165
|
cols_n = cols_values.size
|
170
166
|
|
171
|
-
h_rows = rows.factors.
|
172
|
-
a[v] = cols_values.
|
167
|
+
h_rows = rows.factors.each_with_object({}) do |v, a|
|
168
|
+
a[v] = cols_values.each_with_object({}) do |v1, a1|
|
173
169
|
a1[v1]=nil
|
174
|
-
a1
|
175
170
|
end
|
176
|
-
a
|
177
171
|
end
|
178
172
|
|
179
173
|
values.each_index do |i|
|
@@ -250,7 +244,7 @@ module Daru
|
|
250
244
|
@data = []
|
251
245
|
|
252
246
|
temp_name = opts[:name]
|
253
|
-
@name
|
247
|
+
@name = temp_name || SecureRandom.uuid
|
254
248
|
|
255
249
|
if source.empty?
|
256
250
|
@vectors = try_create_index vectors
|
@@ -266,7 +260,7 @@ module Daru
|
|
266
260
|
@index = try_create_index(index || source[0].size)
|
267
261
|
@vectors = try_create_index(vectors)
|
268
262
|
|
269
|
-
@vectors.each_with_index do |
|
263
|
+
@vectors.each_with_index do |_vec,idx|
|
270
264
|
@data << Daru::Vector.new(source[idx], index: @index)
|
271
265
|
end
|
272
266
|
elsif source.all? { |s| s.is_a?(Daru::Vector) }
|
@@ -276,18 +270,18 @@ module Daru
|
|
276
270
|
end
|
277
271
|
initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
|
278
272
|
else # array of hashes
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
(vectors + (source[0].keys - vectors)).uniq)
|
284
|
-
|
273
|
+
@vectors =
|
274
|
+
if vectors.nil?
|
275
|
+
Daru::Index.new source[0].keys
|
276
|
+
else
|
277
|
+
Daru::Index.new((vectors + (source[0].keys - vectors)).uniq)
|
278
|
+
end
|
285
279
|
@index = Daru::Index.new(index || source.size)
|
286
280
|
|
287
281
|
@vectors.each do |name|
|
288
282
|
v = []
|
289
|
-
source.each do |
|
290
|
-
v << (
|
283
|
+
source.each do |h|
|
284
|
+
v << (h[name] || h[name.to_s])
|
291
285
|
end
|
292
286
|
|
293
287
|
@data << Daru::Vector.new(v, name: set_name(name), index: @index)
|
@@ -296,10 +290,10 @@ module Daru
|
|
296
290
|
when Hash
|
297
291
|
create_vectors_index_with vectors, source
|
298
292
|
if all_daru_vectors_in_source? source
|
293
|
+
vectors_have_same_index = all_vectors_have_equal_indexes?(source)
|
299
294
|
if !index.nil?
|
300
295
|
@index = try_create_index index
|
301
|
-
elsif
|
302
|
-
vectors_have_same_index = true
|
296
|
+
elsif vectors_have_same_index
|
303
297
|
@index = source.values[0].index.dup
|
304
298
|
else
|
305
299
|
all_indexes = []
|
@@ -320,14 +314,10 @@ module Daru
|
|
320
314
|
if vectors_have_same_index
|
321
315
|
v = source[vector].dup
|
322
316
|
else
|
323
|
-
v = Daru::Vector.new([], name: vector, index: @index)
|
317
|
+
v = Daru::Vector.new([], name: vector, metadata: source[vector].metadata.dup, index: @index)
|
324
318
|
|
325
319
|
@index.each do |idx|
|
326
|
-
|
327
|
-
v[idx] = source[vector][idx]
|
328
|
-
else
|
329
|
-
v[idx] = nil
|
330
|
-
end
|
320
|
+
v[idx] = source[vector].index.include?(idx) ? source[vector][idx] : nil
|
331
321
|
end
|
332
322
|
end
|
333
323
|
@data << v
|
@@ -339,7 +329,8 @@ module Daru
|
|
339
329
|
@index = try_create_index(index || source.values[0].size)
|
340
330
|
|
341
331
|
@vectors.each do |name|
|
342
|
-
|
332
|
+
meta_opt = source[name].respond_to?(:metadata) ? {metadata: source[name].metadata.dup} : {}
|
333
|
+
@data << Daru::Vector.new(source[name].dup, name: set_name(name), **meta_opt, index: @index)
|
343
334
|
end
|
344
335
|
end
|
345
336
|
end
|
@@ -350,17 +341,16 @@ module Daru
|
|
350
341
|
update
|
351
342
|
end
|
352
343
|
|
353
|
-
def vector
|
354
|
-
$stderr.puts
|
344
|
+
def vector(*)
|
345
|
+
$stderr.puts '#vector has been deprecated in favour of #[]. Please use that.'
|
355
346
|
self[*names]
|
356
347
|
end
|
357
348
|
|
358
349
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
359
350
|
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
360
|
-
# rows
|
361
|
-
# df.vector[:vec] for accessing vector with index *:vec*.
|
351
|
+
# rows. Use df.row[:a] for accessing row with index ':a'.
|
362
352
|
def [](*names)
|
363
|
-
if names[-1] == :vector
|
353
|
+
if names[-1] == :vector || names[-1] == :row
|
364
354
|
axis = names[-1]
|
365
355
|
names = names[0..-2]
|
366
356
|
else
|
@@ -368,9 +358,9 @@ module Daru
|
|
368
358
|
end
|
369
359
|
|
370
360
|
if axis == :vector
|
371
|
-
access_vector
|
361
|
+
access_vector(*names)
|
372
362
|
elsif axis == :row
|
373
|
-
access_row
|
363
|
+
access_row(*names)
|
374
364
|
else
|
375
365
|
raise IndexError, "Expected axis to be row or vector not #{axis}"
|
376
366
|
end
|
@@ -433,7 +423,7 @@ module Daru
|
|
433
423
|
|
434
424
|
src = []
|
435
425
|
vectors_to_dup.each do |vec|
|
436
|
-
src << @data[@vectors[vec]].
|
426
|
+
src << @data[@vectors[vec]].dup
|
437
427
|
end
|
438
428
|
new_order = Daru::Index.new(vectors_to_dup)
|
439
429
|
|
@@ -454,11 +444,10 @@ module Daru
|
|
454
444
|
# a view of the whole data frame otherwise.
|
455
445
|
def clone *vectors_to_clone
|
456
446
|
vectors_to_clone.flatten! unless vectors_to_clone.all? { |a| !a.is_a?(Array) }
|
457
|
-
|
447
|
+
vectors_to_clone = @vectors.to_a if vectors_to_clone.empty?
|
458
448
|
|
459
|
-
h = vectors_to_clone.
|
449
|
+
h = vectors_to_clone.each_with_object({}) do |vec, hsh|
|
460
450
|
hsh[vec] = self[vec]
|
461
|
-
hsh
|
462
451
|
end
|
463
452
|
Daru::DataFrame.new(h, clone: false)
|
464
453
|
end
|
@@ -476,9 +465,8 @@ module Daru
|
|
476
465
|
# Creates a new duplicate dataframe containing only rows
|
477
466
|
# without a single missing value.
|
478
467
|
def dup_only_valid vecs=nil
|
479
|
-
rows_with_nil = @data.
|
468
|
+
rows_with_nil = @data.each_with_object([]) do |vector, memo|
|
480
469
|
memo.concat vector.missing_positions
|
481
|
-
memo
|
482
470
|
end.uniq
|
483
471
|
|
484
472
|
row_indexes = @index.to_a
|
@@ -505,7 +493,7 @@ module Daru
|
|
505
493
|
alias_method :each_column, :each_vector
|
506
494
|
|
507
495
|
# Iterate over each vector alongwith the name of the vector
|
508
|
-
def each_vector_with_index
|
496
|
+
def each_vector_with_index
|
509
497
|
return to_enum(:each_vector_with_index) unless block_given?
|
510
498
|
|
511
499
|
@vectors.each do |vector|
|
@@ -518,7 +506,7 @@ module Daru
|
|
518
506
|
alias_method :each_column_with_index, :each_vector_with_index
|
519
507
|
|
520
508
|
# Iterate over each row
|
521
|
-
def each_row
|
509
|
+
def each_row
|
522
510
|
return to_enum(:each_row) unless block_given?
|
523
511
|
|
524
512
|
@index.each do |index|
|
@@ -528,7 +516,7 @@ module Daru
|
|
528
516
|
self
|
529
517
|
end
|
530
518
|
|
531
|
-
def each_row_with_index
|
519
|
+
def each_row_with_index
|
532
520
|
return to_enum(:each_row_with_index) unless block_given?
|
533
521
|
|
534
522
|
@index.each do |index|
|
@@ -552,7 +540,7 @@ module Daru
|
|
552
540
|
# * +axis+ - The axis to iterate over. Can be :vector (or :column)
|
553
541
|
# or :row. Default to :vector.
|
554
542
|
def each axis=:vector, &block
|
555
|
-
if axis == :vector
|
543
|
+
if axis == :vector || axis == :column
|
556
544
|
each_vector(&block)
|
557
545
|
elsif axis == :row
|
558
546
|
each_row(&block)
|
@@ -577,7 +565,7 @@ module Daru
|
|
577
565
|
# * +axis+ - The axis to iterate over. Can be :vector (or :column)
|
578
566
|
# or :row. Default to :vector.
|
579
567
|
def collect axis=:vector, &block
|
580
|
-
if axis == :vector
|
568
|
+
if axis == :vector || axis == :column
|
581
569
|
collect_vectors(&block)
|
582
570
|
elsif axis == :row
|
583
571
|
collect_rows(&block)
|
@@ -603,7 +591,7 @@ module Daru
|
|
603
591
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
604
592
|
# Default to :vector.
|
605
593
|
def map axis=:vector, &block
|
606
|
-
if axis == :vector
|
594
|
+
if axis == :vector || axis == :column
|
607
595
|
map_vectors(&block)
|
608
596
|
elsif axis == :row
|
609
597
|
map_rows(&block)
|
@@ -621,7 +609,7 @@ module Daru
|
|
621
609
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
622
610
|
# Default to :vector.
|
623
611
|
def map! axis=:vector, &block
|
624
|
-
if axis == :vector
|
612
|
+
if axis == :vector || axis == :column
|
625
613
|
map_vectors!(&block)
|
626
614
|
elsif axis == :row
|
627
615
|
map_rows!(&block)
|
@@ -646,7 +634,7 @@ module Daru
|
|
646
634
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
647
635
|
# Default to :vector.
|
648
636
|
def recode axis=:vector, &block
|
649
|
-
if axis == :vector
|
637
|
+
if axis == :vector || axis == :column
|
650
638
|
recode_vectors(&block)
|
651
639
|
elsif axis == :row
|
652
640
|
recode_rows(&block)
|
@@ -682,17 +670,17 @@ module Daru
|
|
682
670
|
# row[:a] + row[:d] < 100
|
683
671
|
# end
|
684
672
|
def filter axis=:vector, &block
|
685
|
-
if axis == :vector
|
673
|
+
if axis == :vector || axis == :column
|
686
674
|
filter_vectors(&block)
|
687
675
|
elsif axis == :row
|
688
676
|
filter_rows(&block)
|
689
677
|
end
|
690
678
|
end
|
691
679
|
|
692
|
-
def recode_vectors
|
680
|
+
def recode_vectors
|
693
681
|
block_given? or return to_enum(:recode_vectors)
|
694
682
|
|
695
|
-
df =
|
683
|
+
df = dup
|
696
684
|
df.each_vector_with_index do |v, i|
|
697
685
|
ret = yield v
|
698
686
|
ret.is_a?(Daru::Vector) or
|
@@ -703,10 +691,10 @@ module Daru
|
|
703
691
|
df
|
704
692
|
end
|
705
693
|
|
706
|
-
def recode_rows
|
694
|
+
def recode_rows
|
707
695
|
block_given? or return to_enum(:recode_rows)
|
708
696
|
|
709
|
-
df =
|
697
|
+
df = dup
|
710
698
|
df.each_row_with_index do |r, i|
|
711
699
|
ret = yield r
|
712
700
|
ret.is_a?(Daru::Vector) or raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
|
@@ -717,7 +705,7 @@ module Daru
|
|
717
705
|
end
|
718
706
|
|
719
707
|
# Map each vector and return an Array.
|
720
|
-
def map_vectors
|
708
|
+
def map_vectors
|
721
709
|
return to_enum(:map_vectors) unless block_given?
|
722
710
|
|
723
711
|
arry = []
|
@@ -729,7 +717,7 @@ module Daru
|
|
729
717
|
end
|
730
718
|
|
731
719
|
# Destructive form of #map_vectors
|
732
|
-
def map_vectors!
|
720
|
+
def map_vectors!
|
733
721
|
return to_enum(:map_vectors!) unless block_given?
|
734
722
|
|
735
723
|
vectors.dup.each do |n|
|
@@ -742,7 +730,7 @@ module Daru
|
|
742
730
|
end
|
743
731
|
|
744
732
|
# Map vectors alongwith the index.
|
745
|
-
def map_vectors_with_index
|
733
|
+
def map_vectors_with_index
|
746
734
|
return to_enum(:map_vectors_with_index) unless block_given?
|
747
735
|
|
748
736
|
dt = []
|
@@ -754,7 +742,7 @@ module Daru
|
|
754
742
|
end
|
755
743
|
|
756
744
|
# Map each row
|
757
|
-
def map_rows
|
745
|
+
def map_rows
|
758
746
|
return to_enum(:map_rows) unless block_given?
|
759
747
|
|
760
748
|
dt = []
|
@@ -765,7 +753,7 @@ module Daru
|
|
765
753
|
dt
|
766
754
|
end
|
767
755
|
|
768
|
-
def map_rows_with_index
|
756
|
+
def map_rows_with_index
|
769
757
|
return to_enum(:map_rows_with_index) unless block_given?
|
770
758
|
|
771
759
|
dt = []
|
@@ -776,13 +764,13 @@ module Daru
|
|
776
764
|
dt
|
777
765
|
end
|
778
766
|
|
779
|
-
def map_rows!
|
767
|
+
def map_rows!
|
780
768
|
return to_enum(:map_rows!) unless block_given?
|
781
769
|
|
782
770
|
index.dup.each do |i|
|
783
|
-
r = yield
|
771
|
+
r = yield row[i]
|
784
772
|
r.is_a?(Daru::Vector) or raise TypeError, "Returned object must be Daru::Vector not #{r.class}"
|
785
|
-
|
773
|
+
row[i] = r
|
786
774
|
end
|
787
775
|
|
788
776
|
self
|
@@ -790,7 +778,7 @@ module Daru
|
|
790
778
|
|
791
779
|
# Retrieves a Daru::Vector, based on the result of calculation
|
792
780
|
# performed on each row.
|
793
|
-
def collect_rows
|
781
|
+
def collect_rows
|
794
782
|
return to_enum(:collect_rows) unless block_given?
|
795
783
|
|
796
784
|
data = []
|
@@ -801,7 +789,7 @@ module Daru
|
|
801
789
|
Daru::Vector.new(data, index: @index)
|
802
790
|
end
|
803
791
|
|
804
|
-
def collect_row_with_index
|
792
|
+
def collect_row_with_index
|
805
793
|
return to_enum(:collect_row_with_index) unless block_given?
|
806
794
|
|
807
795
|
data = []
|
@@ -814,7 +802,7 @@ module Daru
|
|
814
802
|
|
815
803
|
# Retrives a Daru::Vector, based on the result of calculation
|
816
804
|
# performed on each vector.
|
817
|
-
def collect_vectors
|
805
|
+
def collect_vectors
|
818
806
|
return to_enum(:collect_vectors) unless block_given?
|
819
807
|
|
820
808
|
data = []
|
@@ -825,7 +813,7 @@ module Daru
|
|
825
813
|
Daru::Vector.new(data, index: @vectors)
|
826
814
|
end
|
827
815
|
|
828
|
-
def collect_vector_with_index
|
816
|
+
def collect_vector_with_index
|
829
817
|
return to_enum(:collect_vector_with_index) unless block_given?
|
830
818
|
|
831
819
|
data = []
|
@@ -852,15 +840,19 @@ module Daru
|
|
852
840
|
Matrix.rows(rows)
|
853
841
|
end
|
854
842
|
|
855
|
-
|
856
843
|
# Delete a vector
|
857
844
|
def delete_vector vector
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
845
|
+
raise IndexError, "Vector #{vector} does not exist." unless @vectors.include?(vector)
|
846
|
+
|
847
|
+
@data.delete_at @vectors[vector]
|
848
|
+
@vectors = Daru::Index.new @vectors.to_a - [vector]
|
849
|
+
|
850
|
+
self
|
851
|
+
end
|
852
|
+
|
853
|
+
# Deletes a list of vectors
|
854
|
+
def delete_vectors *vectors
|
855
|
+
Array(vectors).each { |vec| delete_vector vec }
|
864
856
|
|
865
857
|
self
|
866
858
|
end
|
@@ -869,13 +861,10 @@ module Daru
|
|
869
861
|
def delete_row index
|
870
862
|
idx = named_index_for index
|
871
863
|
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
end
|
877
|
-
else
|
878
|
-
raise IndexError, "Index #{index} does not exist."
|
864
|
+
raise IndexError, "Index #{index} does not exist." unless @index.include? idx
|
865
|
+
@index = Daru::Index.new(@index.to_a - [idx])
|
866
|
+
each_vector do |vector|
|
867
|
+
vector.delete_at idx
|
879
868
|
end
|
880
869
|
|
881
870
|
set_size
|
@@ -895,7 +884,7 @@ module Daru
|
|
895
884
|
ds_boot
|
896
885
|
end
|
897
886
|
|
898
|
-
def keep_row_if
|
887
|
+
def keep_row_if
|
899
888
|
deletion = []
|
900
889
|
|
901
890
|
@index.each do |index|
|
@@ -908,7 +897,7 @@ module Daru
|
|
908
897
|
}
|
909
898
|
end
|
910
899
|
|
911
|
-
def keep_vector_if
|
900
|
+
def keep_vector_if
|
912
901
|
@vectors.each do |vector|
|
913
902
|
keep_vector = yield @data[@vectors[vector]], vector
|
914
903
|
|
@@ -923,27 +912,17 @@ module Daru
|
|
923
912
|
d.push(row[vec]) if yield row
|
924
913
|
end
|
925
914
|
|
926
|
-
Daru::Vector.new(d)
|
915
|
+
Daru::Vector.new(d, metadata: self[vec].metadata.dup)
|
927
916
|
end
|
928
917
|
|
929
918
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
930
919
|
# true for that row.
|
931
|
-
def filter_rows
|
920
|
+
def filter_rows
|
932
921
|
return to_enum(:filter_rows) unless block_given?
|
933
922
|
|
934
|
-
|
935
|
-
marked = []
|
936
|
-
|
937
|
-
@index.each do |index|
|
938
|
-
keep_row = yield access_row(index)
|
939
|
-
marked << index if keep_row
|
940
|
-
end
|
941
|
-
|
942
|
-
marked.each do |idx|
|
943
|
-
df.row[idx] = self[idx, :row]
|
944
|
-
end
|
923
|
+
keep_rows = @index.map { |index| yield access_row(index) }
|
945
924
|
|
946
|
-
|
925
|
+
where keep_rows
|
947
926
|
end
|
948
927
|
|
949
928
|
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
@@ -951,8 +930,8 @@ module Daru
|
|
951
930
|
def filter_vectors &block
|
952
931
|
return to_enum(:filter_vectors) unless block_given?
|
953
932
|
|
954
|
-
df =
|
955
|
-
df.keep_vector_if
|
933
|
+
df = dup
|
934
|
+
df.keep_vector_if(&block)
|
956
935
|
|
957
936
|
df
|
958
937
|
end
|
@@ -962,7 +941,7 @@ module Daru
|
|
962
941
|
#
|
963
942
|
# The function returns an array with all errors.
|
964
943
|
def verify(*tests)
|
965
|
-
if
|
944
|
+
if tests[0].is_a? Symbol
|
966
945
|
id = tests[0]
|
967
946
|
tests.shift
|
968
947
|
else
|
@@ -974,13 +953,12 @@ module Daru
|
|
974
953
|
each(:row) do |row|
|
975
954
|
i += 1
|
976
955
|
tests.each do |test|
|
977
|
-
if
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
end
|
982
|
-
vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
|
956
|
+
next if test[2].call(row)
|
957
|
+
values = ''
|
958
|
+
unless test[1].empty?
|
959
|
+
values = ' (' + test[1].collect { |k| "#{k}=#{row[k]}" }.join(', ') + ')'
|
983
960
|
end
|
961
|
+
vr.push("#{i} [#{row[id]}]: #{test[0]}#{values}")
|
984
962
|
end
|
985
963
|
end
|
986
964
|
vr
|
@@ -1051,7 +1029,7 @@ module Daru
|
|
1051
1029
|
alias :vector_missing_values :missing_values_rows
|
1052
1030
|
|
1053
1031
|
def has_missing_data?
|
1054
|
-
!!@data.any?
|
1032
|
+
!!@data.any?(&:has_missing_data?)
|
1055
1033
|
end
|
1056
1034
|
|
1057
1035
|
alias :flawed? :has_missing_data?
|
@@ -1075,9 +1053,9 @@ module Daru
|
|
1075
1053
|
name = row[tree_keys.last]
|
1076
1054
|
if !block
|
1077
1055
|
current[name] ||= []
|
1078
|
-
current[name].push(row.
|
1056
|
+
current[name].push(row.to_h.delete_if { |key,_value| tree_keys.include? key })
|
1079
1057
|
else
|
1080
|
-
current[name] =
|
1058
|
+
current[name] = yield(row, current, name)
|
1081
1059
|
end
|
1082
1060
|
end
|
1083
1061
|
|
@@ -1087,7 +1065,7 @@ module Daru
|
|
1087
1065
|
def vector_count_characters vecs=nil
|
1088
1066
|
vecs ||= @vectors.to_a
|
1089
1067
|
|
1090
|
-
|
1068
|
+
collect_rows do |row|
|
1091
1069
|
vecs.inject(0) do |memo, vec|
|
1092
1070
|
memo + (row[vec].nil? ? 0 : row[vec].to_s.size)
|
1093
1071
|
end
|
@@ -1129,7 +1107,7 @@ module Daru
|
|
1129
1107
|
# row[:a] < 3 and row[:b] == 'b'
|
1130
1108
|
# end #=> true
|
1131
1109
|
def any? axis=:vector, &block
|
1132
|
-
if axis == :vector
|
1110
|
+
if axis == :vector || axis == :column
|
1133
1111
|
@data.any?(&block)
|
1134
1112
|
elsif axis == :row
|
1135
1113
|
each_row do |row|
|
@@ -1151,7 +1129,7 @@ module Daru
|
|
1151
1129
|
# row[:a] < 10
|
1152
1130
|
# end #=> true
|
1153
1131
|
def all? axis=:vector, &block
|
1154
|
-
if axis == :vector
|
1132
|
+
if axis == :vector || axis == :column
|
1155
1133
|
@data.all?(&block)
|
1156
1134
|
elsif axis == :row
|
1157
1135
|
each_row do |row|
|
@@ -1236,46 +1214,52 @@ module Daru
|
|
1236
1214
|
# # ["foo", "two", 3]=>[2, 4]}
|
1237
1215
|
def group_by *vectors
|
1238
1216
|
vectors.flatten!
|
1239
|
-
vectors.each { |v|
|
1240
|
-
has_vector?(v)
|
1217
|
+
vectors.each { |v|
|
1218
|
+
raise(ArgumentError, "Vector #{v} does not exist") unless has_vector?(v)
|
1219
|
+
}
|
1241
1220
|
|
1242
1221
|
Daru::Core::GroupBy.new(self, vectors)
|
1243
1222
|
end
|
1244
1223
|
|
1245
1224
|
def reindex_vectors new_vectors
|
1246
|
-
raise ArgumentError,
|
1247
|
-
"subclasses, not #{new_index.class}" unless new_vectors.
|
1225
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1226
|
+
"subclasses, not #{new_index.class}" unless new_vectors.is_a?(Daru::Index)
|
1248
1227
|
|
1249
1228
|
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
1250
1229
|
new_vectors.each do |vec|
|
1251
|
-
|
1252
|
-
cl[vec] = self[vec]
|
1253
|
-
else
|
1254
|
-
cl[vec] = [nil]*nrows
|
1255
|
-
end
|
1230
|
+
cl[vec] = @vectors.include?(vec) ? self[vec] : cl[vec] = [nil]*nrows
|
1256
1231
|
end
|
1257
1232
|
|
1258
1233
|
cl
|
1259
1234
|
end
|
1260
1235
|
|
1261
1236
|
# Concatenate another DataFrame along corresponding columns.
|
1262
|
-
#
|
1237
|
+
# If columns do not exist in both dataframes, they are filled with nils
|
1263
1238
|
def concat other_df
|
1264
|
-
vectors =
|
1265
|
-
|
1266
|
-
|
1239
|
+
vectors = @vectors.to_a
|
1240
|
+
data = []
|
1241
|
+
|
1242
|
+
vectors.each do |v|
|
1243
|
+
other_vec = other_df.vectors.include?(v) ? other_df[v].to_a : [nil] * other_df.size
|
1244
|
+
data << self[v].dup.to_a.concat(other_vec)
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
other_df.vectors.each do |v|
|
1248
|
+
next if vectors.include?(v)
|
1249
|
+
vectors << v
|
1250
|
+
data << ([nil] * size).concat(other_df[v].to_a)
|
1267
1251
|
end
|
1268
1252
|
|
1269
|
-
Daru::DataFrame.new(
|
1253
|
+
Daru::DataFrame.new(data, order: vectors)
|
1270
1254
|
end
|
1271
1255
|
|
1272
1256
|
# Set a particular column as the new DF
|
1273
1257
|
def set_index new_index, opts={}
|
1274
|
-
raise ArgumentError,
|
1258
|
+
raise ArgumentError, 'All elements in new index must be unique.' if
|
1275
1259
|
@size != self[new_index].uniq.size
|
1276
1260
|
|
1277
1261
|
self.index = Daru::Index.new(self[new_index].to_a)
|
1278
|
-
|
1262
|
+
delete_vector(new_index) unless opts[:keep]
|
1279
1263
|
|
1280
1264
|
self
|
1281
1265
|
end
|
@@ -1303,16 +1287,12 @@ module Daru
|
|
1303
1287
|
# # a 1 11
|
1304
1288
|
# # g nil nil
|
1305
1289
|
def reindex new_index
|
1306
|
-
raise ArgumentError,
|
1307
|
-
"subclasses, not #{new_index.class}" unless new_index.
|
1290
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1291
|
+
"subclasses, not #{new_index.class}" unless new_index.is_a?(Daru::Index)
|
1308
1292
|
|
1309
1293
|
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
1310
1294
|
new_index.each do |idx|
|
1311
|
-
|
1312
|
-
cl.row[idx] = self.row[idx]
|
1313
|
-
else
|
1314
|
-
cl.row[idx] = [nil]*ncols
|
1315
|
-
end
|
1295
|
+
cl.row[idx] = @index.include?(idx) ? row[idx] : [nil]*ncols
|
1316
1296
|
end
|
1317
1297
|
|
1318
1298
|
cl
|
@@ -1330,7 +1310,7 @@ module Daru
|
|
1330
1310
|
# df.index.to_a #=> ['a','b','c','d']
|
1331
1311
|
# df.row['a'].to_a #=> [1,11]
|
1332
1312
|
def index= idx
|
1333
|
-
@data.each { |vec| vec.index = idx}
|
1313
|
+
@data.each { |vec| vec.index = idx }
|
1334
1314
|
@index = idx
|
1335
1315
|
|
1336
1316
|
self
|
@@ -1347,8 +1327,8 @@ module Daru
|
|
1347
1327
|
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
|
1348
1328
|
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
1349
1329
|
def vectors= idx
|
1350
|
-
raise ArgumentError,
|
1351
|
-
index.
|
1330
|
+
raise ArgumentError, 'Can only reindex with Index and its subclasses' unless
|
1331
|
+
index.is_a?(Daru::Index)
|
1352
1332
|
raise ArgumentError, "Specified index length #{idx.size} not equal to"\
|
1353
1333
|
"dataframe size #{ncols}" if idx.size != ncols
|
1354
1334
|
|
@@ -1356,13 +1336,35 @@ module Daru
|
|
1356
1336
|
self
|
1357
1337
|
end
|
1358
1338
|
|
1339
|
+
# Renames the vectors
|
1340
|
+
#
|
1341
|
+
# == Arguments
|
1342
|
+
#
|
1343
|
+
# * name_map - A hash where the keys are the exising vector names and
|
1344
|
+
# the values are the new names. If a vector is renamed
|
1345
|
+
# to a vector name that is already in use, the existing
|
1346
|
+
# one is overwritten.
|
1347
|
+
#
|
1348
|
+
# == Usage
|
1349
|
+
#
|
1350
|
+
# df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] })
|
1351
|
+
# df.rename_vectors :a => :alpha, :c => :gamma
|
1352
|
+
# df.vectors.to_a #=> [:alpha, :b, :gamma]
|
1353
|
+
def rename_vectors name_map
|
1354
|
+
existing_targets = name_map.select { |k,v| k != v }.values & vectors.to_a
|
1355
|
+
delete_vectors(*existing_targets)
|
1356
|
+
|
1357
|
+
new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
|
1358
|
+
self.vectors = Daru::Index.new new_names
|
1359
|
+
end
|
1360
|
+
|
1359
1361
|
# Return the indexes of all the numeric vectors. Will include vectors with nils
|
1360
1362
|
# alongwith numbers.
|
1361
1363
|
def numeric_vectors
|
1362
1364
|
numerics = []
|
1363
1365
|
|
1364
1366
|
each_vector_with_index do |vec, i|
|
1365
|
-
numerics << i if
|
1367
|
+
numerics << i if vec.type == :numeric
|
1366
1368
|
end
|
1367
1369
|
numerics
|
1368
1370
|
end
|
@@ -1371,7 +1373,7 @@ module Daru
|
|
1371
1373
|
numerics = []
|
1372
1374
|
|
1373
1375
|
@vectors.each do |v|
|
1374
|
-
numerics << v if
|
1376
|
+
numerics << v if self[v].type == :numeric
|
1375
1377
|
end
|
1376
1378
|
numerics
|
1377
1379
|
end
|
@@ -1382,9 +1384,8 @@ module Daru
|
|
1382
1384
|
def only_numerics opts={}
|
1383
1385
|
cln = opts[:clone] == false ? false : true
|
1384
1386
|
nv = numeric_vectors
|
1385
|
-
arry = nv.
|
1387
|
+
arry = nv.each_with_object([]) do |v, arr|
|
1386
1388
|
arr << self[v]
|
1387
|
-
arr
|
1388
1389
|
end
|
1389
1390
|
|
1390
1391
|
order = Index.new(nv)
|
@@ -1392,12 +1393,12 @@ module Daru
|
|
1392
1393
|
end
|
1393
1394
|
|
1394
1395
|
# Generate a summary of this DataFrame with ReportBuilder.
|
1395
|
-
def summary(method
|
1396
|
+
def summary(method=:to_text)
|
1396
1397
|
ReportBuilder.new(no_title: true).add(self).send(method)
|
1397
1398
|
end
|
1398
1399
|
|
1399
1400
|
def report_building(b) # :nodoc: #
|
1400
|
-
b.section(:name
|
1401
|
+
b.section(name: @name) do |g|
|
1401
1402
|
g.text "Number of rows: #{nrows}"
|
1402
1403
|
@vectors.each do |v|
|
1403
1404
|
g.text "Element:[#{v}]"
|
@@ -1406,8 +1407,8 @@ module Daru
|
|
1406
1407
|
end
|
1407
1408
|
end
|
1408
1409
|
|
1409
|
-
# Sorts a dataframe (ascending/descending)
|
1410
|
-
# vectors,
|
1410
|
+
# Sorts a dataframe (ascending/descending) in the given pripority sequence of
|
1411
|
+
# vectors, with or without a block.
|
1411
1412
|
#
|
1412
1413
|
# @param order [Array] The order of vector names in which the DataFrame
|
1413
1414
|
# should be sorted.
|
@@ -1415,42 +1416,121 @@ module Daru
|
|
1415
1416
|
# @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
|
1416
1417
|
# or descending order. Specify Array corresponding to *order* for multiple
|
1417
1418
|
# sort orders.
|
1418
|
-
# @option opts [Hash] :by ({|a
|
1419
|
+
# @option opts [Hash] :by (lambda{|a| a }) Specify attributes of objects to
|
1419
1420
|
# to be used for sorting, for each vector name in *order* as a hash of
|
1420
|
-
# vector name and lambda
|
1421
|
+
# vector name and lambda expressions. In case a lambda for a vector is not
|
1421
1422
|
# specified, the default will be used.
|
1423
|
+
# @option opts [TrueClass,FalseClass,Array] :handle_nils (false) Handle nils
|
1424
|
+
# automatically or not when a block is provided.
|
1425
|
+
# If set to True, nils will appear at top after sorting.
|
1422
1426
|
#
|
1423
|
-
#
|
1427
|
+
# @example Sort a dataframe with a vector sequence.
|
1428
|
+
#
|
1429
|
+
#
|
1430
|
+
# df = Daru::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]})
|
1431
|
+
#
|
1432
|
+
# df.sort [:a, :b]
|
1433
|
+
# # =>
|
1434
|
+
# # <Daru::DataFrame:30604000 @name = d6a9294e-2c09-418f-b646-aa9244653444 @size = 5>
|
1435
|
+
# # a b
|
1436
|
+
# # 2 1 3
|
1437
|
+
# # 0 1 5
|
1438
|
+
# # 3 2 2
|
1439
|
+
# # 1 2 4
|
1440
|
+
# # 4 3 1
|
1441
|
+
#
|
1442
|
+
# @example Sort a dataframe without a block. Here nils will be handled automatically.
|
1443
|
+
#
|
1444
|
+
# df = Daru::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]})
|
1445
|
+
#
|
1446
|
+
# df.sort([:a])
|
1447
|
+
# # =>
|
1448
|
+
# # <Daru::DataFrame:14810920 @name = c07fb5c7-2201-458d-b679-6a1f7ebfe49f @size = 5>
|
1449
|
+
# # a b
|
1450
|
+
# # 1 nil 3
|
1451
|
+
# # 3 nil 1
|
1452
|
+
# # 0 -3 4
|
1453
|
+
# # 2 -1 2
|
1454
|
+
# # 4 5 4
|
1455
|
+
#
|
1456
|
+
# @example Sort a dataframe with a block with nils handled automatically.
|
1457
|
+
#
|
1458
|
+
# df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
|
1424
1459
|
#
|
1425
|
-
# df
|
1460
|
+
# df.sort [:b], by: {b: lambda { |a| a.length } }
|
1461
|
+
# # NoMethodError: undefined method `length' for nil:NilClass
|
1462
|
+
# # from (pry):8:in `block in __pry__'
|
1426
1463
|
#
|
1427
|
-
#
|
1428
|
-
#
|
1429
|
-
# #
|
1430
|
-
# #
|
1431
|
-
# #
|
1432
|
-
# #
|
1433
|
-
#
|
1464
|
+
# df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true
|
1465
|
+
#
|
1466
|
+
# # =>
|
1467
|
+
# # <Daru::DataFrame:28469540 @name = 5f986508-556f-468b-be0c-88cc3534445c @size = 6>
|
1468
|
+
# # a b
|
1469
|
+
# # 2 1 nil
|
1470
|
+
# # 5 1 nil
|
1471
|
+
# # 4 -1 x
|
1472
|
+
# # 1 -1 aa
|
1473
|
+
# # 0 nil aaa
|
1474
|
+
# # 3 nil baaa
|
1475
|
+
#
|
1476
|
+
# @example Sort a dataframe with a block with nils handled manually.
|
1477
|
+
#
|
1478
|
+
# df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
|
1479
|
+
#
|
1480
|
+
# # To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] }
|
1481
|
+
# df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true
|
1482
|
+
#
|
1483
|
+
# # =>
|
1484
|
+
# #<Daru::DataFrame:22214180 @name = cd7703c7-1dca-4560-840b-5ea51a852ef9 @size = 6>
|
1485
|
+
# # a b
|
1486
|
+
# # 4 -1 x
|
1487
|
+
# # 1 -1 aa
|
1488
|
+
# # 0 nil aaa
|
1489
|
+
# # 3 nil baaa
|
1490
|
+
# # 2 1 nil
|
1491
|
+
# # 5 1 nil
|
1492
|
+
|
1434
1493
|
def sort! vector_order, opts={}
|
1435
|
-
raise ArgumentError,
|
1494
|
+
raise ArgumentError, 'Required atleast one vector name' if vector_order.empty?
|
1436
1495
|
opts = {
|
1437
1496
|
ascending: true,
|
1438
|
-
|
1497
|
+
handle_nils: false,
|
1439
1498
|
by: {}
|
1440
1499
|
}.merge(opts)
|
1441
1500
|
|
1442
|
-
opts[:by] = create_logic_blocks vector_order, opts[:by]
|
1443
1501
|
opts[:ascending] = sort_order_array vector_order, opts[:ascending]
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1502
|
+
opts[:handle_nils] = handle_nils_array vector_order, opts[:handle_nils]
|
1503
|
+
blocks = create_logic_blocks vector_order, opts[:by], opts[:ascending]
|
1504
|
+
|
1505
|
+
block = lambda do |r1, r2|
|
1506
|
+
# Build left and right array to compare two rows
|
1507
|
+
left = build_array_from_blocks vector_order, opts, blocks, r1, r2
|
1508
|
+
right = build_array_from_blocks vector_order, opts, blocks, r2, r1
|
1509
|
+
|
1510
|
+
# Resolve conflict by Index if all attributes are same
|
1511
|
+
left << r1
|
1512
|
+
right << r2
|
1513
|
+
left <=> right
|
1514
|
+
end
|
1515
|
+
|
1516
|
+
idx = (0..@index.size-1).sort(&block)
|
1517
|
+
|
1518
|
+
old_index = @index.to_a
|
1519
|
+
self.index = Daru::Index.new(idx.map { |i| old_index[i] })
|
1520
|
+
|
1521
|
+
vectors.each do |v|
|
1522
|
+
@data[@vectors[v]] = Daru::Vector.new(
|
1523
|
+
idx.map { |i| @data[@vectors[v]].data[i] },
|
1524
|
+
name: self[v].name, metadata: self[v].metadata.dup, index: index
|
1525
|
+
)
|
1526
|
+
end
|
1447
1527
|
|
1448
1528
|
self
|
1449
1529
|
end
|
1450
1530
|
|
1451
1531
|
# Non-destructive version of #sort!
|
1452
1532
|
def sort vector_order, opts={}
|
1453
|
-
|
1533
|
+
dup.sort! vector_order, opts
|
1454
1534
|
end
|
1455
1535
|
|
1456
1536
|
# Pivots a data frame on specified vectors and applies an aggregate function
|
@@ -1489,25 +1569,27 @@ module Daru
|
|
1489
1569
|
# # [:foo] 10 12
|
1490
1570
|
def pivot_table opts={}
|
1491
1571
|
raise ArgumentError,
|
1492
|
-
|
1572
|
+
'Specify grouping index' if !opts[:index] || opts[:index].empty?
|
1493
1573
|
|
1494
1574
|
index = opts[:index]
|
1495
1575
|
vectors = opts[:vectors] || []
|
1496
1576
|
aggregate_function = opts[:agg] || :mean
|
1497
1577
|
values =
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1578
|
+
if opts[:values].is_a?(Symbol)
|
1579
|
+
[opts[:values]]
|
1580
|
+
elsif opts[:values].is_a?(Array)
|
1581
|
+
opts[:values]
|
1582
|
+
else # nil
|
1583
|
+
(@vectors.to_a - (index | vectors)) & numeric_vector_names
|
1584
|
+
end
|
1505
1585
|
|
1506
|
-
raise IndexError,
|
1586
|
+
raise IndexError, 'No numeric vectors to aggregate' if values.empty?
|
1507
1587
|
|
1508
|
-
grouped
|
1588
|
+
grouped = group_by(index)
|
1509
1589
|
|
1510
|
-
|
1590
|
+
if vectors.empty?
|
1591
|
+
grouped.send(aggregate_function)
|
1592
|
+
else
|
1511
1593
|
super_hash = {}
|
1512
1594
|
values.each do |value|
|
1513
1595
|
grouped.groups.each do |group_name, row_numbers|
|
@@ -1548,8 +1630,6 @@ module Daru
|
|
1548
1630
|
end
|
1549
1631
|
end
|
1550
1632
|
return pivoted_dataframe
|
1551
|
-
else
|
1552
|
-
grouped.send(aggregate_function)
|
1553
1633
|
end
|
1554
1634
|
end
|
1555
1635
|
|
@@ -1561,8 +1641,8 @@ module Daru
|
|
1561
1641
|
raise "Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}" unless nrows == other_df.nrows
|
1562
1642
|
|
1563
1643
|
new_fields = (@vectors.to_a + other_df.vectors.to_a)
|
1564
|
-
|
1565
|
-
|
1644
|
+
.recode_repeated
|
1645
|
+
.map(&:to_sym)
|
1566
1646
|
df_new = DataFrame.new({}, order: new_fields)
|
1567
1647
|
|
1568
1648
|
(0...nrows).to_a.each do |i|
|
@@ -1603,7 +1683,6 @@ module Daru
|
|
1603
1683
|
Daru::Core::Merge.join(self, other_df, opts)
|
1604
1684
|
end
|
1605
1685
|
|
1606
|
-
|
1607
1686
|
# Creates a new dataset for one to many relations
|
1608
1687
|
# on a dataset, based on pattern of field names.
|
1609
1688
|
#
|
@@ -1632,26 +1711,25 @@ module Daru
|
|
1632
1711
|
# # ["white", "2", 20]
|
1633
1712
|
# # ]
|
1634
1713
|
def one_to_many(parent_fields, pattern)
|
1635
|
-
re = Regexp.new pattern.gsub(
|
1714
|
+
re = Regexp.new pattern.gsub('%v','(.+?)').gsub('%n','(\\d+?)')
|
1636
1715
|
ds_vars = parent_fields.dup
|
1637
1716
|
vars = []
|
1638
1717
|
max_n = 0
|
1639
|
-
h = parent_fields.
|
1718
|
+
h = parent_fields.each_with_object({}) { |v, a|
|
1640
1719
|
a[v] = Daru::Vector.new([])
|
1641
|
-
a
|
1642
1720
|
}
|
1643
1721
|
# Adding _row_id
|
1644
1722
|
h['_col_id'] = Daru::Vector.new([])
|
1645
1723
|
ds_vars.push('_col_id')
|
1646
1724
|
|
1647
1725
|
@vectors.each do |f|
|
1648
|
-
|
1649
|
-
|
1650
|
-
|
1651
|
-
|
1652
|
-
end
|
1653
|
-
max_n = $2.to_i if max_n < $2.to_i
|
1726
|
+
next unless f =~ re
|
1727
|
+
unless vars.include? $1
|
1728
|
+
vars.push($1)
|
1729
|
+
h[$1] = Daru::Vector.new([])
|
1654
1730
|
end
|
1731
|
+
|
1732
|
+
max_n = $2.to_i if max_n < $2.to_i
|
1655
1733
|
end
|
1656
1734
|
ds = DataFrame.new(h, order: ds_vars+vars)
|
1657
1735
|
|
@@ -1662,12 +1740,12 @@ module Daru
|
|
1662
1740
|
end
|
1663
1741
|
|
1664
1742
|
max_n.times do |n1|
|
1665
|
-
n
|
1743
|
+
n = n1+1
|
1666
1744
|
any_data = false
|
1667
1745
|
vars.each do |v|
|
1668
|
-
data = row[pattern.gsub(
|
1746
|
+
data = row[pattern.gsub('%v',v.to_s).gsub('%n',n.to_s)]
|
1669
1747
|
row_out[v] = data
|
1670
|
-
any_data = true
|
1748
|
+
any_data = true unless data.nil?
|
1671
1749
|
end
|
1672
1750
|
|
1673
1751
|
if any_data
|
@@ -1685,7 +1763,7 @@ module Daru
|
|
1685
1763
|
i = 1
|
1686
1764
|
split.each { |k,v|
|
1687
1765
|
new_field = name_.to_s + join + i.to_s
|
1688
|
-
v.rename name_.to_s +
|
1766
|
+
v.rename name_.to_s + ':' + k.to_s
|
1689
1767
|
self[new_field.to_sym] = v
|
1690
1768
|
i += 1
|
1691
1769
|
}
|
@@ -1707,11 +1785,11 @@ module Daru
|
|
1707
1785
|
# ds.create_sql('names')
|
1708
1786
|
# #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
|
1709
1787
|
#
|
1710
|
-
def create_sql(table,charset=
|
1788
|
+
def create_sql(table,charset='UTF8')
|
1711
1789
|
sql = "CREATE TABLE #{table} ("
|
1712
|
-
fields =
|
1790
|
+
fields = vectors.to_a.collect do |f|
|
1713
1791
|
v = self[f]
|
1714
|
-
f.to_s +
|
1792
|
+
f.to_s + ' ' + v.db_type
|
1715
1793
|
end
|
1716
1794
|
|
1717
1795
|
sql + fields.join(",\n ")+") CHARACTER SET=#{charset};"
|
@@ -1724,14 +1802,14 @@ module Daru
|
|
1724
1802
|
numerics_as_arrays << self[n].to_a
|
1725
1803
|
end
|
1726
1804
|
|
1727
|
-
GSL::Matrix.alloc
|
1805
|
+
GSL::Matrix.alloc(*numerics_as_arrays.transpose)
|
1728
1806
|
end
|
1729
1807
|
|
1730
1808
|
# Convert all vectors of type *:numeric* into a Matrix.
|
1731
1809
|
def to_matrix
|
1732
1810
|
numerics_as_arrays = []
|
1733
1811
|
each_vector do |vector|
|
1734
|
-
numerics_as_arrays << vector.to_a if
|
1812
|
+
numerics_as_arrays << vector.to_a if vector.type == :numeric
|
1735
1813
|
end
|
1736
1814
|
|
1737
1815
|
Matrix.columns numerics_as_arrays
|
@@ -1746,8 +1824,8 @@ module Daru
|
|
1746
1824
|
def to_nmatrix
|
1747
1825
|
numerics_as_arrays = []
|
1748
1826
|
each_vector do |vector|
|
1749
|
-
numerics_as_arrays << vector.to_a if
|
1750
|
-
|
1827
|
+
numerics_as_arrays << vector.to_a if vector.type == :numeric &&
|
1828
|
+
vector.missing_positions.empty?
|
1751
1829
|
end
|
1752
1830
|
|
1753
1831
|
numerics_as_arrays.transpose.to_nm
|
@@ -1760,8 +1838,8 @@ module Daru
|
|
1760
1838
|
# in the array of hashes, which has the same index.
|
1761
1839
|
def to_a
|
1762
1840
|
arry = [[],[]]
|
1763
|
-
|
1764
|
-
arry[0] << row.
|
1841
|
+
each_row do |row|
|
1842
|
+
arry[0] << row.to_h
|
1765
1843
|
end
|
1766
1844
|
arry[1] = @index.to_a
|
1767
1845
|
|
@@ -1772,15 +1850,15 @@ module Daru
|
|
1772
1850
|
# in the JSON thus created.
|
1773
1851
|
def to_json no_index=true
|
1774
1852
|
if no_index
|
1775
|
-
|
1853
|
+
to_a[0].to_json
|
1776
1854
|
else
|
1777
|
-
|
1855
|
+
to_a.to_json
|
1778
1856
|
end
|
1779
1857
|
end
|
1780
1858
|
|
1781
|
-
# Converts DataFrame to a hash with keys as vector names and values as
|
1859
|
+
# Converts DataFrame to a hash (explicit) with keys as vector names and values as
|
1782
1860
|
# the corresponding vectors.
|
1783
|
-
def
|
1861
|
+
def to_h
|
1784
1862
|
hsh = {}
|
1785
1863
|
@vectors.each_with_index do |vec_name, idx|
|
1786
1864
|
hsh[vec_name] = @data[idx]
|
@@ -1791,12 +1869,12 @@ module Daru
|
|
1791
1869
|
|
1792
1870
|
# Convert to html for IRuby.
|
1793
1871
|
def to_html threshold=30
|
1794
|
-
html =
|
1795
|
-
|
1796
|
-
"<th colspan=\"#{@vectors.size+1}\">"
|
1797
|
-
"Daru::DataFrame:#{
|
1798
|
-
|
1799
|
-
|
1872
|
+
html = '<table>' \
|
1873
|
+
'<tr>' \
|
1874
|
+
"<th colspan=\"#{@vectors.size+1}\">" \
|
1875
|
+
"Daru::DataFrame:#{object_id} " + " rows: #{nrows} " + " cols: #{ncols}" \
|
1876
|
+
'</th>' \
|
1877
|
+
'</tr>'
|
1800
1878
|
html +='<tr><th></th>'
|
1801
1879
|
@vectors.each { |vector| html += '<th>' + vector.to_s + '</th>' }
|
1802
1880
|
html += '</tr>'
|
@@ -1805,26 +1883,26 @@ module Daru
|
|
1805
1883
|
html += '<tr>'
|
1806
1884
|
html += '<td>' + index.to_s + '</td>'
|
1807
1885
|
|
1808
|
-
|
1886
|
+
row[index].each do |element|
|
1809
1887
|
html += '<td>' + element.to_s + '</td>'
|
1810
1888
|
end
|
1811
1889
|
|
1812
1890
|
html += '</tr>'
|
1813
|
-
if num
|
1814
|
-
|
1815
|
-
|
1816
|
-
|
1817
|
-
|
1818
|
-
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1824
|
-
|
1825
|
-
html += '</tr>'
|
1826
|
-
break
|
1891
|
+
next if num <= threshold
|
1892
|
+
|
1893
|
+
html += '<tr>'
|
1894
|
+
(@vectors.size + 1).times { html += '<td>...</td>' }
|
1895
|
+
html += '</tr>'
|
1896
|
+
|
1897
|
+
last_index = @index.to_a.last
|
1898
|
+
last_row = row[last_index]
|
1899
|
+
html += '<tr>'
|
1900
|
+
html += '<td>' + last_index.to_s + '</td>'
|
1901
|
+
(0..(ncols - 1)).to_a.each do |i|
|
1902
|
+
html += '<td>' + last_row[i].to_s + '</td>'
|
1827
1903
|
end
|
1904
|
+
html += '</tr>'
|
1905
|
+
break
|
1828
1906
|
end
|
1829
1907
|
html += '</table>'
|
1830
1908
|
|
@@ -1841,7 +1919,7 @@ module Daru
|
|
1841
1919
|
# assignment/deletion of elements is done. Updating data this way is called
|
1842
1920
|
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
1843
1921
|
def update
|
1844
|
-
@data.each
|
1922
|
+
@data.each(&:update) if Daru.lazy_update
|
1845
1923
|
end
|
1846
1924
|
|
1847
1925
|
# Rename the DataFrame.
|
@@ -1890,19 +1968,18 @@ module Daru
|
|
1890
1968
|
Daru::IO.dataframe_write_sql self, dbh, table
|
1891
1969
|
end
|
1892
1970
|
|
1893
|
-
|
1894
1971
|
# Use marshalling to save dataframe to a file.
|
1895
1972
|
def save filename
|
1896
1973
|
Daru::IO.save self, filename
|
1897
1974
|
end
|
1898
1975
|
|
1899
|
-
def _dump
|
1900
|
-
Marshal.dump(
|
1976
|
+
def _dump(_depth)
|
1977
|
+
Marshal.dump(
|
1901
1978
|
data: @data,
|
1902
1979
|
index: @index.to_a,
|
1903
1980
|
order: @vectors.to_a,
|
1904
1981
|
name: @name
|
1905
|
-
|
1982
|
+
)
|
1906
1983
|
end
|
1907
1984
|
|
1908
1985
|
def self._load data
|
@@ -1939,29 +2016,29 @@ module Daru
|
|
1939
2016
|
longest = [@name.to_s.size,
|
1940
2017
|
(@vectors.map(&:to_s).map(&:size).max || 0),
|
1941
2018
|
(@index .map(&:to_s).map(&:size).max || 0),
|
1942
|
-
(@data .map{ |v| v.map(&:to_s).map(&:size).max}.max || 0)].max
|
2019
|
+
(@data .map { |v| v.map(&:to_s).map(&:size).max }.max || 0)].max
|
1943
2020
|
|
1944
2021
|
name = @name || 'nil'
|
1945
|
-
content =
|
2022
|
+
content = ''
|
1946
2023
|
longest = spacing if longest > spacing
|
1947
2024
|
formatter = "\n"
|
1948
2025
|
|
1949
2026
|
(@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
|
1950
|
-
content += "\n#<" + self.class.to_s +
|
1951
|
-
|
1952
|
-
content +=
|
2027
|
+
content += "\n#<" + self.class.to_s + ':' + object_id.to_s + ' @name = ' +
|
2028
|
+
name.to_s + ' @size = ' + @size.to_s + '>'
|
2029
|
+
content += formatter % ['', *@vectors.map(&:to_s)]
|
1953
2030
|
row_num = 1
|
1954
2031
|
|
1955
|
-
|
1956
|
-
content +=
|
2032
|
+
each_row_with_index do |row, index|
|
2033
|
+
content += formatter % [index.to_s, *row.to_h.values.map { |e| (e || 'nil').to_s }]
|
1957
2034
|
row_num += 1
|
1958
|
-
if row_num
|
1959
|
-
dots = []
|
2035
|
+
next if row_num <= threshold
|
1960
2036
|
|
1961
|
-
|
1962
|
-
|
1963
|
-
|
1964
|
-
|
2037
|
+
dots = []
|
2038
|
+
|
2039
|
+
(@vectors.size + 1).times { dots << '...' }
|
2040
|
+
content += formatter % dots
|
2041
|
+
break
|
1965
2042
|
end
|
1966
2043
|
content += "\n"
|
1967
2044
|
|
@@ -1974,24 +2051,24 @@ module Daru
|
|
1974
2051
|
end
|
1975
2052
|
|
1976
2053
|
def == other
|
1977
|
-
self.class == other.class
|
1978
|
-
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
2054
|
+
self.class == other.class &&
|
2055
|
+
@size == other.size &&
|
2056
|
+
@index == other.index &&
|
2057
|
+
@vectors == other.vectors &&
|
2058
|
+
@vectors.to_a.all? { |v| self[v] == other[v] }
|
1982
2059
|
end
|
1983
2060
|
|
1984
2061
|
def method_missing(name, *args, &block)
|
1985
|
-
if
|
1986
|
-
insert_or_modify_vector name[/(.+)\=/].delete(
|
1987
|
-
elsif
|
2062
|
+
if name =~ /(.+)\=/
|
2063
|
+
insert_or_modify_vector name[/(.+)\=/].delete('=').to_sym, args[0]
|
2064
|
+
elsif has_vector? name
|
1988
2065
|
self[name]
|
1989
2066
|
else
|
1990
2067
|
super(name, *args, &block)
|
1991
2068
|
end
|
1992
2069
|
end
|
1993
2070
|
|
1994
|
-
|
2071
|
+
private
|
1995
2072
|
|
1996
2073
|
def possibly_multi_index? index
|
1997
2074
|
if @index.is_a?(MultiIndex)
|
@@ -2001,101 +2078,51 @@ module Daru
|
|
2001
2078
|
end
|
2002
2079
|
end
|
2003
2080
|
|
2004
|
-
def
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
|
2010
|
-
|
2011
|
-
|
2012
|
-
|
2013
|
-
|
2014
|
-
|
2015
|
-
# right_upper -
|
2016
|
-
def recursive_quick_sort vector_order, index, by, ascending, left_lower, right_upper
|
2017
|
-
if left_lower < right_upper
|
2018
|
-
left_upper, right_lower = partition(vector_order, index, by, ascending, left_lower, right_upper)
|
2019
|
-
if left_upper - left_lower < right_upper - right_lower
|
2020
|
-
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
2021
|
-
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
2022
|
-
else
|
2023
|
-
recursive_quick_sort(vector_order, index, by, ascending, right_lower, right_upper)
|
2024
|
-
recursive_quick_sort(vector_order, index, by, ascending, left_lower, left_upper)
|
2025
|
-
end
|
2026
|
-
end
|
2027
|
-
end
|
2028
|
-
|
2029
|
-
def partition vector_order, index, by, ascending, left_lower, right_upper
|
2030
|
-
mindex = (left_lower + right_upper) / 2
|
2031
|
-
mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
|
2032
|
-
i = left_lower
|
2033
|
-
j = right_upper
|
2034
|
-
descending = ascending.map { |a| !a }
|
2035
|
-
|
2036
|
-
i += 1 while(keep?(i, mvalues, vector_order, ascending , by, 0))
|
2037
|
-
j -= 1 while(keep?(j, mvalues, vector_order, descending, by, 0))
|
2038
|
-
|
2039
|
-
while i < j - 1
|
2040
|
-
@data.each do |vector|
|
2041
|
-
vector[i], vector[j] = vector[j], vector[i]
|
2042
|
-
end
|
2043
|
-
index[i], index[j] = index[j], index[i]
|
2044
|
-
i += 1
|
2045
|
-
j -= 1
|
2046
|
-
|
2047
|
-
i += 1 while(keep?(i, mvalues, vector_order, ascending , by,0))
|
2048
|
-
j -= 1 while(keep?(j, mvalues, vector_order, descending, by,0))
|
2049
|
-
end
|
2050
|
-
|
2051
|
-
if i <= j
|
2052
|
-
if i < j
|
2053
|
-
@data.each do |vector|
|
2054
|
-
vector[i], vector[j] = vector[j], vector[i]
|
2081
|
+
def create_logic_blocks vector_order, _by, ascending
|
2082
|
+
# Create blocks to handle nils
|
2083
|
+
blocks = {}
|
2084
|
+
universal_block_ascending = ->(a) { [a.nil? ? 0 : 1, a] }
|
2085
|
+
universal_block_decending = ->(a) { [a.nil? ? 1 : 0, a] }
|
2086
|
+
vector_order.each_with_index do |vector, i|
|
2087
|
+
blocks[vector] =
|
2088
|
+
if ascending[i]
|
2089
|
+
universal_block_ascending
|
2090
|
+
else
|
2091
|
+
universal_block_decending
|
2055
2092
|
end
|
2056
|
-
index[i], index[j] = index[j], index[i]
|
2057
|
-
end
|
2058
|
-
i += 1
|
2059
|
-
j -= 1
|
2060
2093
|
end
|
2061
2094
|
|
2062
|
-
|
2095
|
+
blocks
|
2063
2096
|
end
|
2064
2097
|
|
2065
|
-
def
|
2066
|
-
|
2067
|
-
|
2068
|
-
|
2069
|
-
|
2098
|
+
def build_array_from_blocks vector_order, opts, blocks, r1, r2
|
2099
|
+
# Create an array to be used for comparison of two rows in sorting
|
2100
|
+
vector_order.map.each_with_index do |v, i|
|
2101
|
+
value = if opts[:ascending][i]
|
2102
|
+
@data[@vectors[v]].data[r1]
|
2103
|
+
else
|
2104
|
+
@data[@vectors[v]].data[r2]
|
2105
|
+
end
|
2070
2106
|
|
2071
|
-
if
|
2072
|
-
|
2073
|
-
|
2074
|
-
if eval == 0
|
2075
|
-
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
2076
|
-
end
|
2077
|
-
else # sort in descending order
|
2078
|
-
return false if eval == -1
|
2079
|
-
return true if eval == 1
|
2080
|
-
if eval == 0
|
2081
|
-
keep?(current_index, mvalues, vector_order, sort_order, by, vector_order_index + 1)
|
2082
|
-
end
|
2083
|
-
end
|
2084
|
-
end
|
2085
|
-
end
|
2107
|
+
if opts[:by][v] && !opts[:handle_nils][i]
|
2108
|
+
# Block given and nils handled manually
|
2109
|
+
value = opts[:by][v].call value
|
2086
2110
|
|
2087
|
-
|
2088
|
-
|
2089
|
-
|
2090
|
-
|
2091
|
-
end
|
2111
|
+
elsif opts[:by][v] && opts[:handle_nils][i]
|
2112
|
+
# Block given and nils handled automatically
|
2113
|
+
value = opts[:by][v].call value rescue nil
|
2114
|
+
blocks[v].call value
|
2092
2115
|
|
2093
|
-
|
2116
|
+
else
|
2117
|
+
# Block not given and nils handled automatically
|
2118
|
+
blocks[v].call value
|
2119
|
+
end
|
2120
|
+
end
|
2094
2121
|
end
|
2095
2122
|
|
2096
2123
|
def sort_order_array vector_order, ascending
|
2097
|
-
if ascending.is_a?
|
2098
|
-
raise ArgumentError,
|
2124
|
+
if ascending.is_a? Array
|
2125
|
+
raise ArgumentError, 'Specify same number of vector names and sort orders' if
|
2099
2126
|
vector_order.size != ascending.size
|
2100
2127
|
return ascending
|
2101
2128
|
else
|
@@ -2103,6 +2130,16 @@ module Daru
|
|
2103
2130
|
end
|
2104
2131
|
end
|
2105
2132
|
|
2133
|
+
def handle_nils_array vector_order, handle_nils
|
2134
|
+
if handle_nils.is_a? Array
|
2135
|
+
raise ArgumentError, 'Specify same number of vector names and handle nils' if
|
2136
|
+
vector_order.size != handle_nils.size
|
2137
|
+
return handle_nils
|
2138
|
+
else
|
2139
|
+
Array.new(vector_order.size, handle_nils)
|
2140
|
+
end
|
2141
|
+
end
|
2142
|
+
|
2106
2143
|
def vectors_index_for location
|
2107
2144
|
if @vectors.include?(location)
|
2108
2145
|
@vectors[location]
|
@@ -2118,39 +2155,35 @@ module Daru
|
|
2118
2155
|
if @vectors.is_a?(MultiIndex)
|
2119
2156
|
pos = @vectors[names]
|
2120
2157
|
|
2121
|
-
if pos.is_a?(Integer)
|
2122
|
-
return @data[pos]
|
2123
|
-
else # MultiIndex
|
2124
|
-
new_vectors = pos.map do |tuple|
|
2125
|
-
@data[@vectors[tuple]]
|
2126
|
-
end
|
2158
|
+
return @data[pos] if pos.is_a?(Integer)
|
2127
2159
|
|
2128
|
-
|
2129
|
-
|
2130
|
-
|
2160
|
+
# MultiIndex
|
2161
|
+
new_vectors = pos.map do |tuple|
|
2162
|
+
@data[@vectors[tuple]]
|
2163
|
+
end
|
2131
2164
|
|
2132
|
-
|
2133
|
-
|
2165
|
+
if !location.is_a?(Range) && names.size < @vectors.width
|
2166
|
+
pos = pos.drop_left_level names.size
|
2134
2167
|
end
|
2168
|
+
|
2169
|
+
Daru::DataFrame.new(new_vectors, index: @index, order: pos)
|
2135
2170
|
else
|
2136
2171
|
unless names[1]
|
2137
2172
|
pos = @vectors[location]
|
2138
2173
|
|
2139
|
-
if pos.is_a?(Numeric)
|
2140
|
-
|
2141
|
-
|
2142
|
-
names = pos
|
2143
|
-
end
|
2174
|
+
return @data[pos] if pos.is_a?(Numeric)
|
2175
|
+
|
2176
|
+
names = pos
|
2144
2177
|
end
|
2145
2178
|
|
2146
|
-
|
2179
|
+
new_vectors = {}
|
2147
2180
|
names.each do |name|
|
2148
|
-
|
2181
|
+
new_vectors[name] = @data[@vectors[name]]
|
2149
2182
|
end
|
2150
2183
|
|
2151
2184
|
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2152
|
-
Daru::DataFrame.new(
|
2153
|
-
|
2185
|
+
Daru::DataFrame.new(new_vectors, order: order,
|
2186
|
+
index: @index, name: @name)
|
2154
2187
|
end
|
2155
2188
|
end
|
2156
2189
|
|
@@ -2161,16 +2194,15 @@ module Daru
|
|
2161
2194
|
pos = @index[names]
|
2162
2195
|
if pos.is_a?(Integer)
|
2163
2196
|
return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
|
2164
|
-
|
2165
|
-
new_rows = pos.map { |tuple| populate_row_for(tuple) }
|
2197
|
+
end
|
2166
2198
|
|
2167
|
-
|
2168
|
-
pos = pos.drop_left_level names.size
|
2169
|
-
end
|
2199
|
+
new_rows = pos.map { |tuple| populate_row_for(tuple) }
|
2170
2200
|
|
2171
|
-
|
2172
|
-
|
2201
|
+
if !location.is_a?(Range) && names.size < @index.width
|
2202
|
+
pos = pos.drop_left_level names.size
|
2173
2203
|
end
|
2204
|
+
|
2205
|
+
Daru::DataFrame.rows(new_rows, order: @vectors, name: @name, index: pos)
|
2174
2206
|
else
|
2175
2207
|
if names[1].nil?
|
2176
2208
|
names = @index[location]
|
@@ -2189,7 +2221,7 @@ module Daru
|
|
2189
2221
|
rows << self.row[name].to_a
|
2190
2222
|
end
|
2191
2223
|
|
2192
|
-
Daru::DataFrame.rows rows, index: names
|
2224
|
+
Daru::DataFrame.rows rows, index: names,name: @name, order: @vectors
|
2193
2225
|
end
|
2194
2226
|
end
|
2195
2227
|
|
@@ -2201,17 +2233,22 @@ module Daru
|
|
2201
2233
|
|
2202
2234
|
def insert_or_modify_vector name, vector
|
2203
2235
|
name = name[0] unless @vectors.is_a?(MultiIndex)
|
2204
|
-
|
2236
|
+
vec = nil
|
2205
2237
|
|
2206
2238
|
if @index.empty?
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2239
|
+
vec = if vector.is_a?(Daru::Vector)
|
2240
|
+
vector
|
2241
|
+
else
|
2242
|
+
Daru::Vector.new(vector.to_a, name: set_name(name))
|
2243
|
+
end
|
2244
|
+
|
2245
|
+
@index = vec.index
|
2246
|
+
assign_or_add_vector name, vec
|
2210
2247
|
set_size
|
2211
2248
|
|
2212
2249
|
@data.map! do |v|
|
2213
|
-
if v.
|
2214
|
-
Daru::Vector.new([nil]*@size, name: set_name(name), index: @index)
|
2250
|
+
if v.empty?
|
2251
|
+
Daru::Vector.new([nil]*@size, name: set_name(name), metadata: v.metadata, index: @index)
|
2215
2252
|
else
|
2216
2253
|
v
|
2217
2254
|
end
|
@@ -2219,15 +2256,11 @@ module Daru
|
|
2219
2256
|
else
|
2220
2257
|
if vector.is_a?(Daru::Vector)
|
2221
2258
|
if vector.index == @index # so that index-by-index assignment is avoided when possible.
|
2222
|
-
|
2259
|
+
vec = vector.dup
|
2223
2260
|
else
|
2224
|
-
|
2261
|
+
vec = Daru::Vector.new [], name: set_name(name), metadata: vector.metadata.dup, index: @index
|
2225
2262
|
@index.each do |idx|
|
2226
|
-
|
2227
|
-
v[idx] = vector[idx]
|
2228
|
-
else
|
2229
|
-
v[idx] = nil
|
2230
|
-
end
|
2263
|
+
vec[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2231
2264
|
end
|
2232
2265
|
end
|
2233
2266
|
else
|
@@ -2235,26 +2268,30 @@ module Daru
|
|
2235
2268
|
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
|
2236
2269
|
@size != vector.size
|
2237
2270
|
|
2238
|
-
|
2271
|
+
vec = Daru::Vector.new(vector, name: set_name(name), index: @index)
|
2239
2272
|
end
|
2240
2273
|
|
2241
|
-
assign_or_add_vector name,
|
2274
|
+
assign_or_add_vector name, vec
|
2242
2275
|
end
|
2243
2276
|
end
|
2244
2277
|
|
2245
2278
|
def assign_or_add_vector name, v
|
2246
|
-
#FIXME: fix this jugaad. need to make changes in Indexing itself.
|
2247
|
-
|
2279
|
+
# FIXME: fix this jugaad. need to make changes in Indexing itself.
|
2280
|
+
begin
|
2281
|
+
pos = @vectors[name]
|
2282
|
+
rescue IndexError
|
2283
|
+
pos = name
|
2284
|
+
end
|
2248
2285
|
|
2249
|
-
if !pos.
|
2250
|
-
|
2286
|
+
if !pos.is_a?(Daru::Index) && pos == name &&
|
2287
|
+
(@vectors.include?(name) || (pos.is_a?(Integer) && pos < @data.size))
|
2251
2288
|
@data[pos] = v
|
2252
|
-
elsif pos.
|
2289
|
+
elsif pos.is_a?(Daru::Index)
|
2253
2290
|
pos.each do |p|
|
2254
2291
|
@data[@vectors[p]] = v
|
2255
2292
|
end
|
2256
2293
|
else
|
2257
|
-
@vectors
|
2294
|
+
@vectors |= [name] unless @vectors.include?(name)
|
2258
2295
|
@data[@vectors[name]] = v
|
2259
2296
|
end
|
2260
2297
|
end
|
@@ -2264,21 +2301,21 @@ module Daru
|
|
2264
2301
|
# TODO
|
2265
2302
|
else
|
2266
2303
|
name = name[0]
|
2267
|
-
|
2268
|
-
|
2269
|
-
|
2270
|
-
|
2271
|
-
|
2272
|
-
|
2304
|
+
vec =
|
2305
|
+
if vector.is_a?(Daru::Vector)
|
2306
|
+
vector
|
2307
|
+
else
|
2308
|
+
Daru::Vector.new(vector, name: set_name(name), index: @vectors)
|
2309
|
+
end
|
2273
2310
|
|
2274
2311
|
if @index.include? name
|
2275
|
-
|
2276
|
-
|
2312
|
+
each_vector_with_index do |v,i|
|
2313
|
+
v[name] = vec.index.include?(i) ? vec[i] : nil
|
2277
2314
|
end
|
2278
2315
|
else
|
2279
|
-
@index
|
2280
|
-
|
2281
|
-
|
2316
|
+
@index |= [name]
|
2317
|
+
each_vector_with_index do |v,i|
|
2318
|
+
v.concat((vec.index.include?(i) ? vec[i] : nil), name)
|
2282
2319
|
end
|
2283
2320
|
end
|
2284
2321
|
|
@@ -2294,15 +2331,15 @@ module Daru
|
|
2294
2331
|
|
2295
2332
|
def validate_labels
|
2296
2333
|
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
|
2297
|
-
@vectors
|
2334
|
+
@vectors && @vectors.size != @data.size
|
2298
2335
|
|
2299
|
-
raise IndexError,
|
2300
|
-
@index
|
2336
|
+
raise IndexError, 'Expected number of indexes same as number of rows' if
|
2337
|
+
@index && @data[0] && @index.size != @data[0].size
|
2301
2338
|
end
|
2302
2339
|
|
2303
2340
|
def validate_vector_sizes
|
2304
2341
|
@data.each do |vector|
|
2305
|
-
raise IndexError,
|
2342
|
+
raise IndexError, 'Expected vectors with equal length' if vector.size != @size
|
2306
2343
|
end
|
2307
2344
|
end
|
2308
2345
|
|
@@ -2332,14 +2369,14 @@ module Daru
|
|
2332
2369
|
end
|
2333
2370
|
|
2334
2371
|
def create_vectors_index_with vectors, source
|
2335
|
-
vectors = source.keys.sort_by
|
2372
|
+
vectors = source.keys.sort_by(&:to_s) if vectors.nil?
|
2336
2373
|
|
2337
2374
|
@vectors =
|
2338
|
-
|
2339
|
-
|
2340
|
-
|
2341
|
-
|
2342
|
-
|
2375
|
+
if vectors.is_a?(Index) || vectors.is_a?(MultiIndex)
|
2376
|
+
vectors
|
2377
|
+
else
|
2378
|
+
Daru::Index.new((vectors + (source.keys - vectors)).uniq)
|
2379
|
+
end
|
2343
2380
|
end
|
2344
2381
|
|
2345
2382
|
def all_vectors_have_equal_indexes? source
|
@@ -2351,24 +2388,24 @@ module Daru
|
|
2351
2388
|
end
|
2352
2389
|
|
2353
2390
|
def try_create_index index
|
2354
|
-
index.
|
2391
|
+
index.is_a?(Index) ? index : Daru::Index.new(index)
|
2355
2392
|
end
|
2356
2393
|
|
2357
|
-
def set_name potential_name
|
2394
|
+
def set_name potential_name # rubocop:disable Style/AccessorMethodName
|
2358
2395
|
potential_name.is_a?(Array) ? potential_name.join : potential_name
|
2359
2396
|
end
|
2360
2397
|
|
2361
2398
|
def symbolize arry
|
2362
2399
|
symbolized_arry =
|
2363
|
-
|
2364
|
-
|
2365
|
-
|
2366
|
-
|
2400
|
+
if arry.all? { |e| e.is_a?(Array) }
|
2401
|
+
arry.map do |sub_arry|
|
2402
|
+
sub_arry.map do |e|
|
2403
|
+
e.is_a?(Numeric) ? e : e.to_sym
|
2404
|
+
end
|
2367
2405
|
end
|
2406
|
+
else
|
2407
|
+
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
2368
2408
|
end
|
2369
|
-
else
|
2370
|
-
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
2371
|
-
end
|
2372
2409
|
|
2373
2410
|
symbolized_arry
|
2374
2411
|
end
|