daru 0.1.4.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +27 -3
- data/Guardfile +7 -0
- data/History.md +39 -1
- data/README.md +1 -1
- data/daru.gemspec +9 -2
- data/lib/daru.rb +4 -1
- data/lib/daru/accessors/gsl_wrapper.rb +93 -91
- data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
- data/lib/daru/category.rb +22 -15
- data/lib/daru/core/group_by.rb +13 -2
- data/lib/daru/core/merge.rb +37 -31
- data/lib/daru/core/query.rb +10 -2
- data/lib/daru/dataframe.rb +95 -34
- data/lib/daru/date_time/index.rb +15 -16
- data/lib/daru/date_time/offsets.rb +14 -11
- data/lib/daru/formatters/table.rb +2 -2
- data/lib/daru/index/categorical_index.rb +201 -0
- data/lib/daru/index/index.rb +289 -0
- data/lib/daru/index/multi_index.rb +266 -0
- data/lib/daru/maths/statistics/vector.rb +13 -9
- data/lib/daru/monkeys.rb +0 -7
- data/lib/daru/plotting/gruff/category.rb +1 -0
- data/lib/daru/plotting/gruff/dataframe.rb +3 -3
- data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
- data/lib/daru/vector.rb +36 -21
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
- data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
- data/spec/core/group_by_spec.rb +17 -1
- data/spec/core/merge_spec.rb +38 -1
- data/spec/core/query_spec.rb +5 -0
- data/spec/dataframe_spec.rb +230 -57
- data/spec/date_time/offsets_spec.rb +84 -3
- data/spec/formatters/table_formatter_spec.rb +9 -0
- data/spec/index/categorical_index_spec.rb +2 -0
- data/spec/index/index_spec.rb +17 -2
- data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
- data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
- data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
- data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
- data/spec/plotting/gruff/category_spec.rb +44 -0
- data/spec/plotting/gruff/dataframe_spec.rb +84 -0
- data/spec/plotting/gruff/vector_spec.rb +70 -0
- data/spec/plotting/nyaplot/category_spec.rb +51 -0
- data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
- data/spec/plotting/nyaplot/vector_spec.rb +66 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/vector_spec.rb +68 -1
- metadata +53 -24
- data/lib/daru/index.rb +0 -761
- data/spec/plotting/vector_spec.rb +0 -230
data/lib/daru/category.rb
CHANGED
@@ -3,9 +3,6 @@ module Daru
|
|
3
3
|
attr_accessor :base_category
|
4
4
|
attr_reader :index, :coding_scheme, :name
|
5
5
|
|
6
|
-
# For debuggin. To be removed
|
7
|
-
attr_reader :array, :cat_hash, :map_int_cat
|
8
|
-
|
9
6
|
# Initializes a vector to store categorical data.
|
10
7
|
# @note Base category is set to the first category encountered in the vector.
|
11
8
|
# @param [Array] data the categorical data
|
@@ -64,11 +61,13 @@ module Daru
|
|
64
61
|
case lib
|
65
62
|
when :gruff, :nyaplot
|
66
63
|
@plotting_library = lib
|
67
|
-
|
68
|
-
|
69
|
-
|
64
|
+
if Daru.send("has_#{lib}?".to_sym)
|
65
|
+
extend Module.const_get(
|
66
|
+
"Daru::Plotting::Category::#{lib.to_s.capitalize}Library"
|
67
|
+
)
|
68
|
+
end
|
70
69
|
else
|
71
|
-
raise
|
70
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
72
71
|
'Supported libraries are :nyaplot and :gruff'
|
73
72
|
end
|
74
73
|
end
|
@@ -160,6 +159,9 @@ module Daru
|
|
160
159
|
counts.map { |c| c / size.to_f }
|
161
160
|
when :percentage
|
162
161
|
counts.map { |c| c / size.to_f * 100 }
|
162
|
+
else
|
163
|
+
raise ArgumentError, 'Type should be either :count, :fraction or'\
|
164
|
+
" :percentage. #{type} not supported."
|
163
165
|
end
|
164
166
|
Daru::Vector.new values, index: categories, name: name
|
165
167
|
end
|
@@ -463,12 +465,13 @@ module Daru
|
|
463
465
|
|
464
466
|
# Contrast code the vector acording to the coding scheme set.
|
465
467
|
# @note To set the coding scheme use #coding_scheme=
|
466
|
-
# @param [
|
467
|
-
#
|
468
|
+
# @param [Hash] opts The options to pass for coding.
|
469
|
+
# @option opts [TrueClass, FalseClass] :full (false) True if you want k variables
|
470
|
+
# for k categories, false if you want k-1 variables for k categories.
|
468
471
|
# @return [Daru::DataFrame] dataframe containing all coded variables
|
469
472
|
# @example
|
470
473
|
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
471
|
-
# dv.contrast_code
|
474
|
+
# dv.contrast_code full: false
|
472
475
|
# # => #<Daru::DataFrame(5x2)>
|
473
476
|
# # daru_1 daru_c
|
474
477
|
# # 0 0 0
|
@@ -771,9 +774,10 @@ module Daru
|
|
771
774
|
|
772
775
|
def assert_ordered operation
|
773
776
|
# TODO: Change ArgumentError to something more expressive
|
777
|
+
return if ordered?
|
778
|
+
|
774
779
|
raise ArgumentError, "Can not apply #{operation} when vector is unordered. "\
|
775
780
|
'To make the categorical data ordered, use #ordered = true'\
|
776
|
-
unless ordered?
|
777
781
|
end
|
778
782
|
|
779
783
|
def dummy_coding full
|
@@ -896,14 +900,17 @@ module Daru
|
|
896
900
|
|
897
901
|
def validate_index index
|
898
902
|
# Change to SizeError
|
903
|
+
return unless size != index.size
|
904
|
+
|
899
905
|
raise ArgumentError, "Size of index (#{index.size}) does not matches"\
|
900
|
-
"size of vector (#{size})"
|
906
|
+
"size of vector (#{size})"
|
901
907
|
end
|
902
908
|
|
903
909
|
def modify_category_at pos, category
|
904
|
-
|
905
|
-
|
906
|
-
|
910
|
+
unless categories.include? category
|
911
|
+
raise ArgumentError, "Invalid category #{category}, "\
|
912
|
+
'to add a new category use #add_category'
|
913
|
+
end
|
907
914
|
old_category = category_from_position pos
|
908
915
|
@array[pos] = int_from_cat category
|
909
916
|
@cat_hash[old_category].delete pos
|
data/lib/daru/core/group_by.rb
CHANGED
@@ -25,6 +25,13 @@ module Daru
|
|
25
25
|
@context = context
|
26
26
|
vectors = names.map { |vec| context[vec].to_a }
|
27
27
|
tuples = vectors[0].zip(*vectors[1..-1])
|
28
|
+
# FIXME: It feels like we don't want to sort here. Ruby's #group_by
|
29
|
+
# never sorts:
|
30
|
+
#
|
31
|
+
# ['test', 'me', 'please'].group_by(&:size)
|
32
|
+
# # => {4=>["test"], 2=>["me"], 6=>["please"]}
|
33
|
+
#
|
34
|
+
# - zverok, 2016-09-12
|
28
35
|
keys = tuples.uniq.sort(&TUPLE_SORTER)
|
29
36
|
|
30
37
|
keys.each do |key|
|
@@ -295,12 +302,16 @@ module Daru
|
|
295
302
|
indexes << index
|
296
303
|
end
|
297
304
|
end
|
298
|
-
indexes
|
305
|
+
if indexes.count == 1
|
306
|
+
[@context.index.at(*indexes)]
|
307
|
+
else
|
308
|
+
@context.index.at(*indexes).to_a
|
309
|
+
end
|
299
310
|
end
|
300
311
|
|
301
312
|
def multi_indexed_grouping?
|
302
313
|
return false unless @groups.keys[0]
|
303
|
-
@groups.keys[0].size > 1
|
314
|
+
@groups.keys[0].size > 1
|
304
315
|
end
|
305
316
|
end
|
306
317
|
end
|
data/lib/daru/core/merge.rb
CHANGED
@@ -18,11 +18,8 @@ module Daru
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def initialize left_df, right_df, opts={}
|
21
|
-
|
22
|
-
@keep_left, @keep_right = extract_left_right(opts[:how])
|
23
|
-
|
21
|
+
init_opts(opts)
|
24
22
|
validate_on!(left_df, right_df)
|
25
|
-
|
26
23
|
key_sanitizer = ->(h) { sanitize_merge_keys(h.values_at(*on)) }
|
27
24
|
|
28
25
|
@left = df_to_a(left_df)
|
@@ -46,12 +43,12 @@ module Daru
|
|
46
43
|
row(lkey, rkey).tap { |r| res << r if r }
|
47
44
|
end
|
48
45
|
|
49
|
-
Daru::DataFrame.new(res, order:
|
46
|
+
Daru::DataFrame.new(res, order: dataframe_vector_names)
|
50
47
|
end
|
51
48
|
|
52
49
|
private
|
53
50
|
|
54
|
-
attr_reader :on,
|
51
|
+
attr_reader :on, :indicator,
|
55
52
|
:left, :left_key_values, :keep_left, :left_keys,
|
56
53
|
:right, :right_key_values, :keep_right, :right_keys
|
57
54
|
|
@@ -65,13 +62,23 @@ module Daru
|
|
65
62
|
outer: [true, true]
|
66
63
|
}.freeze
|
67
64
|
|
65
|
+
def init_opts(opts)
|
66
|
+
@on = opts[:on]
|
67
|
+
@keep_left, @keep_right = extract_left_right(opts[:how])
|
68
|
+
@indicator = opts[:indicator]
|
69
|
+
end
|
70
|
+
|
71
|
+
def dataframe_vector_names
|
72
|
+
left_keys.values + on + right_keys.values + Array(indicator)
|
73
|
+
end
|
74
|
+
|
68
75
|
def extract_left_right(how)
|
69
76
|
LEFT_RIGHT_COMBINATIONS[how] or
|
70
77
|
raise ArgumentError, "Unrecognized join option: #{how}"
|
71
78
|
end
|
72
79
|
|
73
80
|
def sanitize_merge_keys(merge_keys)
|
74
|
-
merge_keys.map { |v| v
|
81
|
+
merge_keys.map { |v| v.nil? ? NilSorter.new : v }
|
75
82
|
end
|
76
83
|
|
77
84
|
def df_to_a df
|
@@ -110,19 +117,27 @@ module Daru
|
|
110
117
|
# :nocov:
|
111
118
|
when lkey == rkey
|
112
119
|
self.merge_key = lkey
|
113
|
-
merge_matching_rows
|
120
|
+
add_indicator(merge_matching_rows, :both)
|
114
121
|
when !rkey || lt(lkey, rkey)
|
115
|
-
left_row_missing_right
|
122
|
+
add_indicator(left_row_missing_right, :left_only)
|
116
123
|
else # !lkey || lt(rkey, lkey)
|
117
|
-
right_row_missing_left
|
124
|
+
add_indicator(right_row_missing_left, :right_only)
|
118
125
|
end
|
119
126
|
end
|
120
127
|
|
128
|
+
def add_indicator(row, indicator_value)
|
129
|
+
return row unless indicator
|
130
|
+
row[indicator] = indicator_value
|
131
|
+
row
|
132
|
+
end
|
133
|
+
|
121
134
|
def merge_matching_rows
|
122
135
|
if one_to_one_merge?
|
123
136
|
merge_rows(one_to_one_left_row, one_to_one_right_row)
|
124
137
|
elsif one_to_many_merge?
|
125
|
-
merge_rows(
|
138
|
+
result = merge_rows(left.first, right.first)
|
139
|
+
one_to_many_shift
|
140
|
+
result
|
126
141
|
else
|
127
142
|
result = cartesian_product.shift
|
128
143
|
end_cartesian_product if cartesian_product.empty?
|
@@ -130,6 +145,13 @@ module Daru
|
|
130
145
|
end
|
131
146
|
end
|
132
147
|
|
148
|
+
def one_to_many_shift
|
149
|
+
shift_left = first_right_key != next_right_key
|
150
|
+
shift_right = first_left_key != next_left_key
|
151
|
+
one_to_one_left_row if shift_left
|
152
|
+
one_to_one_right_row if shift_right
|
153
|
+
end
|
154
|
+
|
133
155
|
def one_to_one_merge?
|
134
156
|
merge_key != next_left_key && merge_key != next_right_key
|
135
157
|
end
|
@@ -143,29 +165,11 @@ module Daru
|
|
143
165
|
left.shift
|
144
166
|
end
|
145
167
|
|
146
|
-
def one_to_many_left_row
|
147
|
-
if next_right_key && first_right_key == next_right_key
|
148
|
-
left.first
|
149
|
-
else
|
150
|
-
left_key_values.shift
|
151
|
-
left.shift
|
152
|
-
end
|
153
|
-
end
|
154
|
-
|
155
168
|
def one_to_one_right_row
|
156
169
|
right_key_values.shift
|
157
170
|
right.shift
|
158
171
|
end
|
159
172
|
|
160
|
-
def one_to_many_right_row
|
161
|
-
if next_left_key && first_left_key == next_left_key
|
162
|
-
right.first
|
163
|
-
else
|
164
|
-
right_key_values.shift
|
165
|
-
right.shift
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
173
|
def left_row_missing_right
|
170
174
|
val = one_to_one_left_row
|
171
175
|
expand_row(val, left_keys) if keep_left
|
@@ -184,6 +188,7 @@ module Daru
|
|
184
188
|
left_keys
|
185
189
|
.map { |from, to| [to, lrow[from]] }.to_h
|
186
190
|
.merge(on.map { |col| [col, lrow[col]] }.to_h)
|
191
|
+
.merge(indicator ? {indicator => nil} : {})
|
187
192
|
.merge(right_keys.map { |from, to| [to, rrow[from]] }.to_h)
|
188
193
|
end
|
189
194
|
|
@@ -191,6 +196,7 @@ module Daru
|
|
191
196
|
renamings
|
192
197
|
.map { |from, to| [to, row[from]] }.to_h
|
193
198
|
.merge(on.map { |col| [col, row[col]] }.to_h)
|
199
|
+
.merge(indicator ? {indicator => nil} : {})
|
194
200
|
end
|
195
201
|
|
196
202
|
def first_right_key
|
@@ -198,7 +204,7 @@ module Daru
|
|
198
204
|
end
|
199
205
|
|
200
206
|
def next_right_key
|
201
|
-
right_key_values
|
207
|
+
right_key_values[1]
|
202
208
|
end
|
203
209
|
|
204
210
|
def first_left_key
|
@@ -206,7 +212,7 @@ module Daru
|
|
206
212
|
end
|
207
213
|
|
208
214
|
def next_left_key
|
209
|
-
left_key_values
|
215
|
+
left_key_values[1]
|
210
216
|
end
|
211
217
|
|
212
218
|
def left_rows_at_merge_key
|
data/lib/daru/core/query.rb
CHANGED
@@ -75,8 +75,16 @@ module Daru
|
|
75
75
|
def fetch_new_data_and_index dv, bool_array
|
76
76
|
barry = bool_array.to_a
|
77
77
|
positions = dv.size.times.select { |i| barry[i] }
|
78
|
-
|
79
|
-
|
78
|
+
# FIXME: The below 4 lines should be replaced with values_at when the
|
79
|
+
# stack error is fixed in Ruby.
|
80
|
+
# eg - new_data = dv.data.data.values_at(*positions)
|
81
|
+
|
82
|
+
data = dv.type == :category ? dv.to_a : dv.data.data # non-cat optimize
|
83
|
+
new_data = positions.map { |i| data[i] }
|
84
|
+
|
85
|
+
idx = dv.index.to_a
|
86
|
+
new_index = positions.map { |i| idx[i] }
|
87
|
+
|
80
88
|
[new_data, new_index]
|
81
89
|
end
|
82
90
|
end
|
data/lib/daru/dataframe.rb
CHANGED
@@ -120,7 +120,7 @@ module Daru
|
|
120
120
|
|
121
121
|
opts[:order] ||= guess_order(source)
|
122
122
|
|
123
|
-
if ArrayHelper.array_of?(source, Array)
|
123
|
+
if ArrayHelper.array_of?(source, Array) || source.empty?
|
124
124
|
DataFrame.new(source.transpose, opts)
|
125
125
|
elsif ArrayHelper.array_of?(source, Vector)
|
126
126
|
from_vector_rows(source, opts)
|
@@ -265,9 +265,11 @@ module Daru
|
|
265
265
|
case lib
|
266
266
|
when :gruff, :nyaplot
|
267
267
|
@plotting_library = lib
|
268
|
-
|
269
|
-
|
270
|
-
|
268
|
+
if Daru.send("has_#{lib}?".to_sym)
|
269
|
+
extend Module.const_get(
|
270
|
+
"Daru::Plotting::DataFrame::#{lib.to_s.capitalize}Library"
|
271
|
+
)
|
272
|
+
end
|
271
273
|
else
|
272
274
|
raise ArguementError, "Plotting library #{lib} not supported. "\
|
273
275
|
'Supported libraries are :nyaplot and :gruff'
|
@@ -974,6 +976,26 @@ module Daru
|
|
974
976
|
Daru::Vector.new a, index: @index
|
975
977
|
end
|
976
978
|
|
979
|
+
# Reorder the vectors in a dataframe
|
980
|
+
# @param [Array] order_array new order of the vectors
|
981
|
+
# @example
|
982
|
+
# df = Daru::DataFrame({
|
983
|
+
# a: [1, 2, 3],
|
984
|
+
# b: [4, 5, 6]
|
985
|
+
# }, order: [:a, :b])
|
986
|
+
# df.order = [:b, :a]
|
987
|
+
# df
|
988
|
+
# # => #<Daru::DataFrame(3x2)>
|
989
|
+
# # b a
|
990
|
+
# # 0 4 1
|
991
|
+
# # 1 5 2
|
992
|
+
# # 2 6 3
|
993
|
+
def order=(order_array)
|
994
|
+
raise ArgumentError, 'Invalid order' unless
|
995
|
+
order_array.sort == vectors.to_a.sort
|
996
|
+
initialize(to_h, order: order_array)
|
997
|
+
end
|
998
|
+
|
977
999
|
# Returns a vector, based on a string with a calculation based
|
978
1000
|
# on vector.
|
979
1001
|
#
|
@@ -1103,7 +1125,7 @@ module Daru
|
|
1103
1125
|
each_row do |row|
|
1104
1126
|
return true if yield(row)
|
1105
1127
|
end
|
1106
|
-
|
1128
|
+
false
|
1107
1129
|
else
|
1108
1130
|
raise ArgumentError, "Unidentified axis #{axis}"
|
1109
1131
|
end
|
@@ -1204,12 +1226,16 @@ module Daru
|
|
1204
1226
|
raise(ArgumentError, "Vector #{v} does not exist") unless has_vector?(v)
|
1205
1227
|
}
|
1206
1228
|
|
1229
|
+
vectors = [@vectors.first] if vectors.empty?
|
1230
|
+
|
1207
1231
|
Daru::Core::GroupBy.new(self, vectors)
|
1208
1232
|
end
|
1209
1233
|
|
1210
1234
|
def reindex_vectors new_vectors
|
1211
|
-
|
1212
|
-
|
1235
|
+
unless new_vectors.is_a?(Daru::Index)
|
1236
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1237
|
+
"subclasses, not #{new_index.class}"
|
1238
|
+
end
|
1213
1239
|
|
1214
1240
|
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
1215
1241
|
new_vectors.each_with_object(cl) do |vec, memo|
|
@@ -1233,6 +1259,19 @@ module Daru
|
|
1233
1259
|
Daru::DataFrame.new(data, order: vectors)
|
1234
1260
|
end
|
1235
1261
|
|
1262
|
+
# Concatenates another DataFrame as #concat.
|
1263
|
+
# Additionally it tries to preserve the index. If the indices contain
|
1264
|
+
# common elements, #union will overwrite the according rows in the
|
1265
|
+
# first dataframe.
|
1266
|
+
def union other_df
|
1267
|
+
index = (@index.to_a + other_df.index.to_a).uniq
|
1268
|
+
df = row[*(@index.to_a - other_df.index.to_a)]
|
1269
|
+
|
1270
|
+
df = df.concat(other_df)
|
1271
|
+
df.index = Daru::Index.new(index)
|
1272
|
+
df
|
1273
|
+
end
|
1274
|
+
|
1236
1275
|
# Set a particular column as the new DF
|
1237
1276
|
def set_index new_index, opts={}
|
1238
1277
|
raise ArgumentError, 'All elements in new index must be unique.' if
|
@@ -1267,8 +1306,10 @@ module Daru
|
|
1267
1306
|
# # a 1 11
|
1268
1307
|
# # g nil nil
|
1269
1308
|
def reindex new_index
|
1270
|
-
|
1271
|
-
|
1309
|
+
unless new_index.is_a?(Daru::Index)
|
1310
|
+
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1311
|
+
"subclasses, not #{new_index.class}"
|
1312
|
+
end
|
1272
1313
|
|
1273
1314
|
cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
1274
1315
|
new_index.each_with_object(cl) do |idx, memo|
|
@@ -1304,13 +1345,20 @@ module Daru
|
|
1304
1345
|
#
|
1305
1346
|
# df.vectors = Daru::Index.new([:foo, :bar, :baz])
|
1306
1347
|
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
1307
|
-
def vectors=
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
"dataframe size #{ncols}" if idx.size != ncols
|
1348
|
+
def vectors= new_index
|
1349
|
+
unless new_index.is_a?(Daru::Index)
|
1350
|
+
raise ArgumentError, 'Can only reindex with Index and its subclasses'
|
1351
|
+
end
|
1312
1352
|
|
1313
|
-
|
1353
|
+
if new_index.size != ncols
|
1354
|
+
raise ArgumentError, "Specified index length #{new_index.size} not equal to"\
|
1355
|
+
"dataframe size #{ncols}"
|
1356
|
+
end
|
1357
|
+
|
1358
|
+
@vectors = new_index
|
1359
|
+
@data.zip(new_index.to_a).each do |vect, name|
|
1360
|
+
vect.name = name
|
1361
|
+
end
|
1314
1362
|
self
|
1315
1363
|
end
|
1316
1364
|
|
@@ -1521,7 +1569,7 @@ module Daru
|
|
1521
1569
|
# # [:bar] 18 26
|
1522
1570
|
# # [:foo] 10 12
|
1523
1571
|
def pivot_table opts={}
|
1524
|
-
raise ArgumentError, 'Specify grouping index' if opts[:index].
|
1572
|
+
raise ArgumentError, 'Specify grouping index' if Array(opts[:index]).empty?
|
1525
1573
|
|
1526
1574
|
index = opts[:index]
|
1527
1575
|
vectors = opts[:vectors] || []
|
@@ -1542,9 +1590,10 @@ module Daru
|
|
1542
1590
|
#
|
1543
1591
|
# @return {Daru::DataFrame}
|
1544
1592
|
def merge other_df # rubocop:disable Metrics/AbcSize
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1593
|
+
unless nrows == other_df.nrows
|
1594
|
+
raise ArgumentError,
|
1595
|
+
"Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}"
|
1596
|
+
end
|
1548
1597
|
|
1549
1598
|
new_fields = (@vectors.to_a + other_df.vectors.to_a)
|
1550
1599
|
new_fields = ArrayHelper.recode_repeated(new_fields)
|
@@ -1567,6 +1616,9 @@ module Daru
|
|
1567
1616
|
# @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
|
1568
1617
|
# @option :on [Array] The columns on which the join is to be performed.
|
1569
1618
|
# Column names specified here must be common to both DataFrames.
|
1619
|
+
# @option :indicator [Symbol] The name of a vector to add to the resultant
|
1620
|
+
# dataframe that indicates whether the record was in the left (:left_only),
|
1621
|
+
# right (:right_only), or both (:both) joining dataframes.
|
1570
1622
|
# @return [Daru::DataFrame]
|
1571
1623
|
# @example Inner Join
|
1572
1624
|
# left = Daru::DataFrame.new({
|
@@ -1892,10 +1944,15 @@ module Daru
|
|
1892
1944
|
end
|
1893
1945
|
|
1894
1946
|
def method_missing(name, *args, &block)
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1947
|
+
case
|
1948
|
+
when name =~ /(.+)\=/
|
1949
|
+
name = name[/(.+)\=/].delete('=')
|
1950
|
+
name = name.to_sym unless has_vector?(name)
|
1951
|
+
insert_or_modify_vector [name], args[0]
|
1952
|
+
when has_vector?(name)
|
1898
1953
|
self[name]
|
1954
|
+
when has_vector?(name.to_s)
|
1955
|
+
self[name.to_s]
|
1899
1956
|
else
|
1900
1957
|
super
|
1901
1958
|
end
|
@@ -2060,7 +2117,7 @@ module Daru
|
|
2060
2117
|
end
|
2061
2118
|
|
2062
2119
|
def populate_row_for pos
|
2063
|
-
@data.map { |vector| vector
|
2120
|
+
@data.map { |vector| vector.at(*pos) }
|
2064
2121
|
end
|
2065
2122
|
|
2066
2123
|
def insert_or_modify_vector name, vector
|
@@ -2128,9 +2185,10 @@ module Daru
|
|
2128
2185
|
}
|
2129
2186
|
else
|
2130
2187
|
# FIXME: No spec checks this case... And SizeError is not a thing - zverok, 2016-05-08
|
2131
|
-
|
2132
|
-
|
2133
|
-
|
2188
|
+
if @size != vector.size
|
2189
|
+
raise SizeError,
|
2190
|
+
"Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}"
|
2191
|
+
end
|
2134
2192
|
|
2135
2193
|
Daru::Vector.new(vector, name: coerce_name(name), index: @index)
|
2136
2194
|
end
|
@@ -2157,12 +2215,13 @@ module Daru
|
|
2157
2215
|
end
|
2158
2216
|
|
2159
2217
|
def validate_labels
|
2160
|
-
|
2161
|
-
"
|
2162
|
-
|
2218
|
+
if @vectors && @vectors.size != @data.size
|
2219
|
+
raise IndexError, "Expected equal number of vector names (#{@vectors.size}) " \
|
2220
|
+
"for number of vectors (#{@data.size})."
|
2221
|
+
end
|
2163
2222
|
|
2164
|
-
|
2165
|
-
|
2223
|
+
return unless @index && @data[0] && @index.size != @data[0].size
|
2224
|
+
raise IndexError, 'Expected number of indexes same as number of rows'
|
2166
2225
|
end
|
2167
2226
|
|
2168
2227
|
def validate_vector_sizes
|
@@ -2228,8 +2287,10 @@ module Daru
|
|
2228
2287
|
end
|
2229
2288
|
|
2230
2289
|
def initialize_from_array_of_arrays source, vectors, index, _opts
|
2231
|
-
|
2232
|
-
|
2290
|
+
if source.size != vectors.size
|
2291
|
+
raise ArgumentError, "Number of vectors (#{vectors.size}) should " \
|
2292
|
+
"equal order size (#{source.size})"
|
2293
|
+
end
|
2233
2294
|
|
2234
2295
|
@index = Index.coerce(index || source[0].size)
|
2235
2296
|
@vectors = Index.coerce(vectors)
|
@@ -2258,7 +2319,7 @@ module Daru
|
|
2258
2319
|
@index = Daru::Index.new(index || source.size)
|
2259
2320
|
|
2260
2321
|
@data = @vectors.map do |name|
|
2261
|
-
v = source.map { |h| h
|
2322
|
+
v = source.map { |h| h.fetch(name) { h[name.to_s] } }
|
2262
2323
|
Daru::Vector.new(v, name: coerce_name(name), index: @index)
|
2263
2324
|
end
|
2264
2325
|
end
|