daru 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,170 @@
1
+ module Daru
2
+ module Core
3
+ module MergeHelper
4
+ class << self
5
+ def replace_keys_if_duplicates hash, matcher
6
+ matched = nil
7
+ hash.keys.each { |d|
8
+ if matcher.match(Regexp.new(d.to_s))
9
+ matched = d
10
+ break
11
+ end
12
+ }
13
+
14
+ if matched
15
+ hash[matcher] = hash[matched]
16
+ hash.delete matched
17
+ end
18
+ end
19
+
20
+ def resolve_duplicates df_hash1, df_hash2, on
21
+ hk = df_hash1.keys + df_hash2.keys - on
22
+ recoded = hk.recode_repeated.map(&:to_sym)
23
+ diff = (recoded - hk).sort
24
+
25
+ diff.each_slice(2) do |a|
26
+ replace_keys_if_duplicates df_hash1, a[0]
27
+ replace_keys_if_duplicates df_hash2, a[1]
28
+ end
29
+ end
30
+
31
+ def hashify df
32
+ hsh = df.to_hash
33
+ hsh.each { |k,v| hsh[k] = v.to_a }
34
+ hsh
35
+ end
36
+
37
+ def inner_join df1, df2, df_hash1, df_hash2, on
38
+ joined_hash = {}
39
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
40
+ joined_hash[k] = []
41
+ end
42
+
43
+ (0...df1.size).each do |id1|
44
+ (0...df2.size).each do |id2|
45
+ if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
46
+ joined_hash.each do |k,v|
47
+ v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
54
+ end
55
+
56
+ def full_outer_join df1, df2, df_hash1, df_hash2, on
57
+ left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
58
+ right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
59
+
60
+ Daru::DataFrame.rows(
61
+ (left.values.transpose | right.values.transpose), order: left.keys)
62
+ end
63
+
64
+ def left_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
65
+ joined_hash = {}
66
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
67
+ joined_hash[k] = []
68
+ end
69
+
70
+
71
+ (0...df1.size).each do |id1|
72
+ joined = false
73
+ (0...df2.size).each do |id2|
74
+ if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
75
+ joined = true
76
+ joined_hash.each do |k,v|
77
+ v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
78
+ end
79
+ end
80
+ end
81
+
82
+ unless joined
83
+ df_hash1.keys.each do |k|
84
+ joined_hash[k] << df_hash1[k][id1]
85
+ end
86
+
87
+ (joined_hash.keys - df_hash1.keys).each do |k|
88
+ joined_hash[k] << nil
89
+ end
90
+ joined = false
91
+ end
92
+ end
93
+
94
+ return joined_hash if as_hash
95
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
96
+ end
97
+
98
+ def right_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
99
+ joined_hash = {}
100
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
101
+ joined_hash[k] = []
102
+ end
103
+
104
+ (0...df2.size).each do |id1|
105
+ joined = false
106
+ (0...df1.size).each do |id2|
107
+ if on.all? { |n| df_hash2[n][id1] == df_hash1[n][id2] }
108
+ joined = true
109
+ joined_hash.each do |k,v|
110
+ v << (df_hash2.has_key?(k) ? df_hash2[k][id1] : df_hash1[k][id2])
111
+ end
112
+ end
113
+ end
114
+
115
+ unless joined
116
+ df_hash2.keys.each do |k|
117
+ joined_hash[k] << df_hash2[k][id1]
118
+ end
119
+
120
+ (joined_hash.keys - df_hash2.keys).each do |k|
121
+ joined_hash[k] << nil
122
+ end
123
+ joined = false
124
+ end
125
+ end
126
+
127
+ return joined_hash if as_hash
128
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
129
+ end
130
+
131
+ def verify_dataframes df_hash1, df_hash2, on
132
+ raise ArgumentError,
133
+ "All fields in :on must be present in self" if !on.all? { |e| df_hash1[e] }
134
+ raise ArgumentError,
135
+ "All fields in :on must be present in other DF" if !on.all? { |e| df_hash2[e] }
136
+ end
137
+ end
138
+ end
139
+ # Private module containing methods for join, merge, concat operations on
140
+ # dataframes and vectors.
141
+ # @private
142
+ module Merge
143
+ class << self
144
+ def join df1, df2, opts={}
145
+ helper = MergeHelper
146
+
147
+ df_hash1 = helper.hashify df1
148
+ df_hash2 = helper.hashify df2
149
+ on = opts[:on]
150
+
151
+ helper.verify_dataframes df_hash1, df_hash2, on
152
+ helper.resolve_duplicates df_hash1, df_hash2, on
153
+
154
+ case opts[:how]
155
+ when :inner
156
+ helper.inner_join df1, df2, df_hash1, df_hash2, on
157
+ when :outer
158
+ helper.full_outer_join df1, df2, df_hash1, df_hash2, on
159
+ when :left
160
+ helper.left_outer_join df1, df2, df_hash1, df_hash2, on
161
+ when :right
162
+ helper.right_outer_join df1, df2, df_hash1, df_hash2, on
163
+ else
164
+ raise ArgumentError, "Unrecognized option in :how => #{opts[:how]}"
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,95 @@
1
+ module Daru
2
+ module Core
3
+ module Query
4
+ class BoolArray
5
+ attr_reader :barry
6
+
7
+ def initialize barry
8
+ @barry = barry
9
+ end
10
+
11
+ def & other
12
+ new_bool = []
13
+ other_barry = other.barry
14
+ @barry.each_with_index do |b, i|
15
+ new_bool << (b and other_barry[i])
16
+ end
17
+
18
+ BoolArray.new(new_bool)
19
+ end
20
+
21
+ alias :and :&
22
+
23
+ def | other
24
+ new_bool = []
25
+ other_barry = other.barry
26
+ @barry.each_with_index do |b, i|
27
+ new_bool << (b or other_barry[i])
28
+ end
29
+
30
+ BoolArray.new(new_bool)
31
+ end
32
+
33
+ alias :or :|
34
+
35
+ def !
36
+ BoolArray.new(@barry.map { |b| !b })
37
+ end
38
+
39
+ def == other
40
+ @barry == other.barry
41
+ end
42
+
43
+ def to_a
44
+ @barry
45
+ end
46
+
47
+ def inspect
48
+ "(#{self.class}:#{self.object_id} bool_arry=#{@barry})"
49
+ end
50
+ end
51
+
52
+ class << self
53
+ def apply_scalar_operator operator, data, other
54
+ arry = data.inject([]) do |memo,d|
55
+ memo << (d.send(operator, other) ? true : false)
56
+ memo
57
+ end
58
+
59
+ BoolArray.new(arry)
60
+ end
61
+
62
+ def apply_vector_operator operator, vector, other
63
+ bool_arry = []
64
+ vector.each_with_index do |d, i|
65
+ bool_arry << (d.send(operator, other[i]) ? true : false)
66
+ end
67
+
68
+ BoolArray.new(bool_arry)
69
+ end
70
+
71
+ def df_where data_frame, bool_array
72
+ vecs = data_frame.map do |vector|
73
+ vector.where(bool_array)
74
+ end
75
+
76
+ Daru::DataFrame.new(
77
+ vecs, order: data_frame.vectors, index: vecs[0].index, clone: false)
78
+ end
79
+
80
+ def vector_where data, index, bool_array, dtype
81
+ new_data = []
82
+ new_index = []
83
+ bool_array.to_a.each_with_index do |b, i|
84
+ if b
85
+ new_data << data[i]
86
+ new_index << index[i]
87
+ end
88
+ end
89
+
90
+ Daru::Vector.new(new_data, index: new_index, dtype: dtype)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -1,7 +1,6 @@
1
1
  $:.unshift File.dirname(__FILE__)
2
2
 
3
3
  require 'accessors/dataframe_by_row.rb'
4
- require 'accessors/dataframe_by_vector.rb'
5
4
  require 'maths/arithmetic/dataframe.rb'
6
5
  require 'maths/statistics/dataframe.rb'
7
6
  require 'plotting/dataframe.rb'
@@ -226,11 +225,11 @@ module Daru
226
225
  @data = []
227
226
 
228
227
  temp_name = opts[:name]
229
- @name = temp_name.is_a?(Numeric) ? temp_name : (temp_name || SecureRandom.uuid).to_sym
228
+ @name = temp_name || SecureRandom.uuid
230
229
 
231
230
  if source.empty?
232
- @vectors = create_index vectors
233
- @index = create_index index
231
+ @vectors = try_create_index vectors
232
+ @index = try_create_index index
234
233
  create_empty_vectors
235
234
  else
236
235
  case source
@@ -239,8 +238,8 @@ module Daru
239
238
  raise ArgumentError, "Number of vectors (#{vectors.size}) should \
240
239
  equal order size (#{source.size})" if source.size != vectors.size
241
240
 
242
- @index = create_index(index || source[0].size)
243
- @vectors = create_index(vectors)
241
+ @index = try_create_index(index || source[0].size)
242
+ @vectors = try_create_index(vectors)
244
243
 
245
244
  @vectors.each_with_index do |vec,idx|
246
245
  @data << Daru::Vector.new(source[idx], index: @index)
@@ -253,9 +252,10 @@ module Daru
253
252
  initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
254
253
  else # array of hashes
255
254
  if vectors.nil?
256
- @vectors = Daru::Index.new source[0].keys.map(&:to_sym)
255
+ @vectors = Daru::Index.new source[0].keys
257
256
  else
258
- @vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
257
+ @vectors = Daru::Index.new(
258
+ (vectors + (source[0].keys - vectors)).uniq)
259
259
  end
260
260
  @index = Daru::Index.new(index || source.size)
261
261
 
@@ -272,8 +272,9 @@ module Daru
272
272
  create_vectors_index_with vectors, source
273
273
  if all_daru_vectors_in_source? source
274
274
  if !index.nil?
275
- @index = create_index index
275
+ @index = try_create_index index
276
276
  elsif all_vectors_have_equal_indexes?(source)
277
+ vectors_have_same_index = true
277
278
  @index = source.values[0].index.dup
278
279
  else
279
280
  all_indexes = []
@@ -289,17 +290,28 @@ module Daru
289
290
 
290
291
  if clone
291
292
  @vectors.each do |vector|
292
- @data << Daru::Vector.new([], name: vector, index: @index)
293
-
294
- @index.each do |idx|
295
- @data[@vectors[vector]][idx] = source[vector][idx]
293
+ # avoids matching indexes of vectors if all the supplied vectors
294
+ # have the same index.
295
+ if vectors_have_same_index
296
+ v = source[vector].dup
297
+ else
298
+ v = Daru::Vector.new([], name: vector, index: @index)
299
+
300
+ @index.each do |idx|
301
+ if source[vector].index.include? idx
302
+ v[idx] = source[vector][idx]
303
+ else
304
+ v[idx] = nil
305
+ end
306
+ end
296
307
  end
308
+ @data << v
297
309
  end
298
310
  else
299
311
  @data.concat source.values
300
312
  end
301
313
  else
302
- @index = create_index(index || source.values[0].size)
314
+ @index = try_create_index(index || source.values[0].size)
303
315
 
304
316
  @vectors.each do |name|
305
317
  @data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
@@ -313,6 +325,11 @@ module Daru
313
325
  update
314
326
  end
315
327
 
328
+ def vector *args
329
+ $stderr.puts "#vector has been deprecated in favour of #[]. Please use that."
330
+ self[*names]
331
+ end
332
+
316
333
  # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
317
334
  # Defaults to *:vector*. Use of this method is not recommended for accessing
318
335
  # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
@@ -324,7 +341,6 @@ module Daru
324
341
  else
325
342
  axis = :vector
326
343
  end
327
- names.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
328
344
 
329
345
  if axis == :vector
330
346
  access_vector *names
@@ -349,7 +365,6 @@ module Daru
349
365
 
350
366
  name = args[0..-2]
351
367
  vector = args[-1]
352
- name.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
353
368
 
354
369
  if axis == :vector
355
370
  insert_or_modify_vector name, vector
@@ -360,15 +375,6 @@ module Daru
360
375
  end
361
376
  end
362
377
 
363
- # Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
364
- #
365
- # == Usage
366
- # df.vector[:a] # access vector named ':a'
367
- # df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
368
- def vector
369
- Daru::Accessors::DataFrameByVector.new(self)
370
- end
371
-
372
378
  # Access a vector by name.
373
379
  def column name
374
380
  vector[name]
@@ -398,24 +404,13 @@ module Daru
398
404
  # * +vectors_to_dup+ - An Array specifying the names of Vectors to
399
405
  # be duplicated. Will duplicate the entire DataFrame if not specified.
400
406
  def dup vectors_to_dup=nil
401
- vectors_to_dup = @vectors unless vectors_to_dup
407
+ vectors_to_dup = @vectors.to_a unless vectors_to_dup
402
408
 
403
- new_order =
404
- if vectors.is_a?(MultiIndex)
405
- src = []
406
- vectors_to_dup.each do |vec|
407
- src << @data[@vectors[vec]].dup
408
- end
409
-
410
- Daru::MultiIndex.new(vectors_to_dup)
411
- else
412
- src = {}
413
- vectors_to_dup.each do |vector|
414
- src[vector] = @data[@vectors[vector]].dup
415
- end
416
-
417
- Daru::Index.new(vectors_to_dup)
409
+ src = []
410
+ vectors_to_dup.each do |vec|
411
+ src << @data[@vectors[vec]].to_a
418
412
  end
413
+ new_order = Daru::Index.new(vectors_to_dup)
419
414
 
420
415
  Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
421
416
  end
@@ -465,6 +460,14 @@ module Daru
465
460
  (vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
466
461
  end
467
462
 
463
+ # Iterate over each index of the DataFrame.
464
+ def each_index &block
465
+ return to_enum(:each_index) unless block_given?
466
+
467
+ @index.each(&block)
468
+ self
469
+ end
470
+
468
471
  # Iterate over each vector
469
472
  def each_vector(&block)
470
473
  return to_enum(:each_vector) unless block_given?
@@ -608,7 +611,7 @@ module Daru
608
611
  #
609
612
  # Recode works similarly to #map, but an important difference between
610
613
  # the two is that recode returns a modified Daru::DataFrame instead
611
- # of an Array. For this reason, #recodeexpects that every run of the
614
+ # of an Array. For this reason, #recode expects that every run of the
612
615
  # block to return a Daru::Vector.
613
616
  #
614
617
  # Just like map and each, recode also accepts an optional _axis_ argument.
@@ -667,7 +670,8 @@ module Daru
667
670
  df = self.dup
668
671
  df.each_vector_with_index do |v, i|
669
672
  ret = yield v
670
- ret.is_a?(Daru::Vector) or raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
673
+ ret.is_a?(Daru::Vector) or
674
+ raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
671
675
  df[*i] = ret
672
676
  end
673
677
 
@@ -841,7 +845,7 @@ module Daru
841
845
  idx = named_index_for index
842
846
 
843
847
  if @index.include? idx
844
- @index = reassign_index_as(@index.to_a - [idx])
848
+ @index = Daru::Index.new(@index.to_a - [idx])
845
849
  self.each_vector do |vector|
846
850
  vector.delete_at idx
847
851
  end
@@ -1015,7 +1019,7 @@ module Daru
1015
1019
  number_of_missing << row.missing_positions.size
1016
1020
  end
1017
1021
 
1018
- Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows".to_sym
1022
+ Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
1019
1023
  end
1020
1024
 
1021
1025
  # TODO: remove next version
@@ -1087,9 +1091,18 @@ module Daru
1087
1091
 
1088
1092
  # Check if a vector is present
1089
1093
  def has_vector? vector
1090
- !!@vectors[*vector]
1094
+ @vectors.include? vector
1091
1095
  end
1092
1096
 
1097
+ # Works like Array#any?.
1098
+ #
1099
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
1100
+ # :row. A Daru::Vector object is yielded in the block.
1101
+ # @example Using any?
1102
+ # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1103
+ # df.any?(:row) do |row|
1104
+ # row[:a] < 3 and row[:b] == 'b'
1105
+ # end #=> true
1093
1106
  def any? axis=:vector, &block
1094
1107
  if axis == :vector or axis == :column
1095
1108
  @data.any?(&block)
@@ -1103,6 +1116,15 @@ module Daru
1103
1116
  end
1104
1117
  end
1105
1118
 
1119
+ # Works like Array#all?
1120
+ #
1121
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
1122
+ # :row. A Daru::Vector object is yielded in the block.
1123
+ # @example Using all?
1124
+ # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1125
+ # df.all?(:row) do |row|
1126
+ # row[:a] < 10
1127
+ # end #=> true
1106
1128
  def all? axis=:vector, &block
1107
1129
  if axis == :vector or axis == :column
1108
1130
  @data.all?(&block)
@@ -1183,38 +1205,126 @@ module Daru
1183
1205
  # # ["foo", "one", 3]=>[6],
1184
1206
  # # ["foo", "three", 8]=>[7],
1185
1207
  # # ["foo", "two", 3]=>[2, 4]}
1186
- def group_by vectors
1187
- vectors = [vectors] if vectors.is_a?(Symbol)
1208
+ def group_by *vectors
1209
+ vectors.flatten!
1188
1210
  vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
1189
1211
  has_vector?(v) }
1190
1212
 
1191
1213
  Daru::Core::GroupBy.new(self, vectors)
1192
1214
  end
1193
1215
 
1194
- def reindex_vectors! new_vectors
1195
- raise ArgumentError, "Number of vectors passed into function (#{new_vectors.size}) should equal that present in the DataFrame (#{@vectors.size})" if
1196
- @vectors.size != new_vectors.size
1216
+ def reindex_vectors new_vectors
1217
+ raise ArgumentError, "Must pass the new index of type Index or its "\
1218
+ "subclasses, not #{new_index.class}" unless new_vectors.kind_of?(Daru::Index)
1219
+
1220
+ cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
1221
+ new_vectors.each do |vec|
1222
+ if @vectors.include?(vec)
1223
+ cl[vec] = self[vec]
1224
+ else
1225
+ cl[vec] = [nil]*nrows
1226
+ end
1227
+ end
1228
+
1229
+ cl
1230
+ end
1231
+
1232
+ # Concatenate another DataFrame along corresponding columns.
1233
+ # Very premature implementation. Use with caution.
1234
+ def concat other_df
1235
+ vectors = []
1236
+ @vectors.each do |v|
1237
+ vectors << self[v].to_a.concat(other_df[v].to_a)
1238
+ end
1239
+
1240
+ Daru::DataFrame.new(vectors, order: @vectors)
1241
+ end
1242
+
1243
+ # Set a particular column as the new DF
1244
+ def set_index new_index, opts={}
1245
+ raise ArgumentError, "All elements in new index must be unique." if
1246
+ @size != self[new_index].uniq.size
1247
+
1248
+ self.index = Daru::Index.new(self[new_index].to_a)
1249
+ self.delete_vector(new_index) unless opts[:keep]
1197
1250
 
1198
- @vectors = Daru::Index.new new_vectors.map(&:to_sym), new_vectors.map { |e| @vectors[e] }
1251
+ self
1199
1252
  end
1200
1253
 
1201
- # Change the index of the DataFrame and its underlying vectors. Destructive.
1254
+ # Change the index of the DataFrame and preserve the labels of the previous
1255
+ # indexing. New index can be Daru::Index or any of its subclasses.
1202
1256
  #
1203
- # @param [Symbol, Array] new_index Specify an Array if
1204
- def reindex! new_index
1205
- raise ArgumentError, "Index size must equal dataframe size" if new_index.is_a?(Array) and new_index.size != @size
1257
+ # @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
1258
+ # @example Reindexing DataFrame
1259
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1260
+ # index: ['a','b','c','d'])
1261
+ # #=>
1262
+ # ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1263
+ # # a b
1264
+ # # a 1 11
1265
+ # # b 2 22
1266
+ # # c 3 33
1267
+ # # d 4 44
1268
+ # df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
1269
+ # #=>
1270
+ # ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1271
+ # # a b
1272
+ # # b 2 22
1273
+ # # 0 nil nil
1274
+ # # a 1 11
1275
+ # # g nil nil
1276
+ def reindex new_index
1277
+ raise ArgumentError, "Must pass the new index of type Index or its "\
1278
+ "subclasses, not #{new_index.class}" unless new_index.kind_of?(Daru::Index)
1206
1279
 
1207
- @index = possibly_multi_index?(new_index == :seq ? @size : new_index)
1208
- @data.map! do |vector|
1209
- vector.reindex possibly_multi_index?(@index.to_a)
1280
+ cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
1281
+ new_index.each do |idx|
1282
+ if @index.include?(idx)
1283
+ cl.row[idx] = self.row[idx]
1284
+ else
1285
+ cl.row[idx] = [nil]*ncols
1286
+ end
1210
1287
  end
1211
1288
 
1289
+ cl
1290
+ end
1291
+
1292
+ # Reassign index with a new index of type Daru::Index or any of its subclasses.
1293
+ #
1294
+ # @param [Daru::Index] idx New index object on which the rows of the dataframe
1295
+ # are to be indexed.
1296
+ # @example Reassgining index of a DataFrame
1297
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
1298
+ # df.index.to_a #=> [0,1,2,3]
1299
+ #
1300
+ # df.index = Daru::Index.new(['a','b','c','d'])
1301
+ # df.index.to_a #=> ['a','b','c','d']
1302
+ # df.row['a'].to_a #=> [1,11]
1303
+ def index= idx
1304
+ @data.each { |vec| vec.index = idx}
1305
+ @index = idx
1306
+
1212
1307
  self
1213
1308
  end
1214
1309
 
1215
- # Non-destructive version of #reindex!
1216
- def reindex new_index
1217
- self.dup.reindex! new_index
1310
+ # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
1311
+ #
1312
+ # @param [Daru::Index] idx The new index object on which the vectors are to
1313
+ # be indexed. Must of the same size as ncols.
1314
+ # @example Reassigning vectors of a DataFrame
1315
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
1316
+ # df.vectors.to_a #=> [:a, :b, :c]
1317
+ #
1318
+ # df.vectors = Daru::Index.new([:foo, :bar, :baz])
1319
+ # df.vectors.to_a #=> [:foo, :bar, :baz]
1320
+ def vectors= idx
1321
+ raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1322
+ index.kind_of?(Daru::Index)
1323
+ raise ArgumentError, "Specified index length #{idx.size} not equal to"\
1324
+ "dataframe size #{ncols}" if idx.size != ncols
1325
+
1326
+ @vectors = idx
1327
+ self
1218
1328
  end
1219
1329
 
1220
1330
  # Return the indexes of all the numeric vectors. Will include vectors with nils
@@ -1231,8 +1341,8 @@ module Daru
1231
1341
  def numeric_vector_names
1232
1342
  numerics = []
1233
1343
 
1234
- each_vector do |vec, i|
1235
- numerics << vec.name if(vec.type == :numeric)
1344
+ @vectors.each do |v|
1345
+ numerics << v if (self[v].type == :numeric)
1236
1346
  end
1237
1347
  numerics
1238
1348
  end
@@ -1248,7 +1358,7 @@ module Daru
1248
1358
  arr
1249
1359
  end
1250
1360
 
1251
- order = @vectors.is_a?(MultiIndex) ? MultiIndex.new(nv) : Index.new(nv)
1361
+ order = Index.new(nv)
1252
1362
  Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
1253
1363
  end
1254
1364
 
@@ -1302,9 +1412,11 @@ module Daru
1302
1412
 
1303
1413
  opts[:by] = create_logic_blocks vector_order, opts[:by]
1304
1414
  opts[:ascending] = sort_order_array vector_order, opts[:ascending]
1305
- index = @index.to_a
1306
- send(opts[:type], vector_order, index, opts[:by], opts[:ascending])
1307
- reindex! index
1415
+ idx = @index.to_a
1416
+ send(opts[:type], vector_order, idx, opts[:by], opts[:ascending])
1417
+ self.index = Daru::Index.new(idx)
1418
+
1419
+ self
1308
1420
  end
1309
1421
 
1310
1422
  # Non-destructive version of #sort!
@@ -1347,7 +1459,8 @@ module Daru
1347
1459
  # # [:bar] 18 26
1348
1460
  # # [:foo] 10 12
1349
1461
  def pivot_table opts={}
1350
- raise ArgumentError, "Specify grouping index" if !opts[:index] or opts[:index].empty?
1462
+ raise ArgumentError,
1463
+ "Specify grouping index" if !opts[:index] or opts[:index].empty?
1351
1464
 
1352
1465
  index = opts[:index]
1353
1466
  vectors = opts[:vectors] || []
@@ -1389,18 +1502,20 @@ module Daru
1389
1502
  end
1390
1503
  end
1391
1504
 
1392
- df_index = Daru::MultiIndex.new(symbolize(super_hash.keys))
1505
+ df_index = Daru::MultiIndex.from_tuples super_hash.keys
1393
1506
 
1394
1507
  vector_indexes = []
1395
1508
  super_hash.each_value do |sub_hash|
1396
1509
  vector_indexes.concat sub_hash.keys
1397
1510
  end
1398
- df_vectors = Daru::MultiIndex.new symbolize(vector_indexes.uniq)
1511
+
1512
+ df_vectors = Daru::MultiIndex.from_tuples vector_indexes.uniq
1399
1513
  pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
1400
1514
 
1401
1515
  super_hash.each do |row_index, sub_h|
1402
1516
  sub_h.each do |vector_index, val|
1403
- pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
1517
+ # pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
1518
+ pivoted_dataframe[vector_index][row_index] = val
1404
1519
  end
1405
1520
  end
1406
1521
  return pivoted_dataframe
@@ -1430,47 +1545,33 @@ module Daru
1430
1545
  df_new
1431
1546
  end
1432
1547
 
1433
- # Join 2 DataFrames by given fields
1434
- # type is one of :left and :inner, default is :left
1435
- #
1436
- # Untested! Use at your own risk.
1437
- #
1438
- # @return {Daru::DataFrame}
1439
- def join(other_ds,fields_1=[],fields_2=[],type=:left)
1440
- fields_new = other_ds.vectors.to_a - fields_2
1441
- fields = self.vectors.to_a + fields_new
1442
-
1443
- other_ds_hash = {}
1444
- other_ds.each_row do |row|
1445
- key = row.to_hash.select { |k,v| fields_2.include?(k) }.values
1446
- value = row.to_hash.select { |k,v| fields_new.include?(k) }
1447
-
1448
- if other_ds_hash[key].nil?
1449
- other_ds_hash[key] = [value]
1450
- else
1451
- other_ds_hash[key] << value
1452
- end
1453
- end
1454
-
1455
- new_ds = DataFrame.new({}, order: fields)
1456
-
1457
- self.each_row do |row|
1458
- key = row.to_hash.select{|k,v| fields_1.include?(k)}.values
1459
- new_case = row.to_hash
1460
-
1461
- if other_ds_hash[key].nil?
1462
- if type == :left
1463
- fields_new.each{|field| new_case[field] = nil}
1464
- new_ds.add_row(Daru::Vector.new(new_case))
1465
- end
1466
- else
1467
- other_ds_hash[key].each do |new_values|
1468
- new_ds.add_row(Daru::Vector.new(new_case.merge(new_values)))
1469
- end
1470
- end
1471
- end
1472
-
1473
- new_ds
1548
+ # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1549
+ # outer, right outer and full outer joins.
1550
+ #
1551
+ # @param [Daru::DataFrame] other_df Another DataFrame on which the join is
1552
+ # to be performed.
1553
+ # @param [Hash] opts Options Hash
1554
+ # @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
1555
+ # @option :on [Array] The columns on which the join is to be performed.
1556
+ # Column names specified here must be common to both DataFrames.
1557
+ # @return [Daru::DataFrame]
1558
+ # @example Inner Join
1559
+ # left = Daru::DataFrame.new({
1560
+ # :id => [1,2,3,4],
1561
+ # :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti']
1562
+ # })
1563
+ # right = Daru::DataFrame.new({
1564
+ # :id => [1,2,3,4],
1565
+ # :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
1566
+ # })
1567
+ # left.join(right, how: :inner, on: [:name])
1568
+ # #=>
1569
+ # ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
1570
+ # # id_1 name id_2
1571
+ # # 0 1 Pirate 2
1572
+ # # 1 3 Ninja 4
1573
+ def join(other_df,opts={})
1574
+ Daru::Core::Merge.join(self, other_df, opts)
1474
1575
  end
1475
1576
 
1476
1577
 
@@ -1486,7 +1587,7 @@ module Daru
1486
1587
  # to new dataset, and fields which responds to second
1487
1588
  # pattern will be added one case for each different %n.
1488
1589
  #
1489
- # == Usage
1590
+ # @example
1490
1591
  # cases=[
1491
1592
  # ['1','george','red',10,'blue',20,nil,nil],
1492
1593
  # ['2','fred','green',15,'orange',30,'white',20],
@@ -1494,17 +1595,16 @@ module Daru
1494
1595
  # ]
1495
1596
  # ds=Daru::DataFrame.rows(cases, order: [:id, :name, :car_color1, :car_value1, :car_color2, :car_value2, :car_color3, :car_value3])
1496
1597
  # ds.one_to_many([:id],'car_%v%n').to_matrix
1497
- # => Matrix[
1498
- # ["red", "1", 10],
1499
- # ["blue", "1", 20],
1500
- # ["green", "2", 15],
1501
- # ["orange", "2", 30],
1502
- # ["white", "2", 20]
1503
- # ]
1504
- #
1598
+ # #=> Matrix[
1599
+ # # ["red", "1", 10],
1600
+ # # ["blue", "1", 20],
1601
+ # # ["green", "2", 15],
1602
+ # # ["orange", "2", 30],
1603
+ # # ["white", "2", 20]
1604
+ # # ]
1505
1605
  def one_to_many(parent_fields, pattern)
1506
1606
  re = Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
1507
- ds_vars = parent_fields
1607
+ ds_vars = parent_fields.dup
1508
1608
  vars = []
1509
1609
  max_n = 0
1510
1610
  h = parent_fields.inject({}) { |a,v|
@@ -1512,8 +1612,8 @@ module Daru
1512
1612
  a
1513
1613
  }
1514
1614
  # Adding _row_id
1515
- h[:_col_id] = Daru::Vector.new([])
1516
- ds_vars.push(:_col_id)
1615
+ h['_col_id'] = Daru::Vector.new([])
1616
+ ds_vars.push('_col_id')
1517
1617
 
1518
1618
  @vectors.each do |f|
1519
1619
  if f =~ re
@@ -1529,20 +1629,20 @@ module Daru
1529
1629
  each_row do |row|
1530
1630
  row_out = {}
1531
1631
  parent_fields.each do |f|
1532
- row_out[f]=row[f]
1632
+ row_out[f] = row[f]
1533
1633
  end
1534
1634
 
1535
1635
  max_n.times do |n1|
1536
1636
  n = n1+1
1537
1637
  any_data = false
1538
1638
  vars.each do |v|
1539
- data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s).to_sym]
1639
+ data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
1540
1640
  row_out[v] = data
1541
1641
  any_data = true if !data.nil?
1542
1642
  end
1543
1643
 
1544
1644
  if any_data
1545
- row_out[:_col_id] = n
1645
+ row_out['_col_id'] = n
1546
1646
  ds.add_row(row_out)
1547
1647
  end
1548
1648
  end
@@ -1569,14 +1669,14 @@ module Daru
1569
1669
  # * table - String specifying name of the table that will created in SQL.
1570
1670
  # * charset - Character set. Default is "UTF8".
1571
1671
  #
1572
- # == Usage
1672
+ # @example
1573
1673
  #
1574
1674
  # ds = Daru::DataFrame.new({
1575
1675
  # :id => Daru::Vector.new([1,2,3,4,5]),
1576
1676
  # :name => Daru::Vector.new(%w{Alex Peter Susan Mary John})
1577
1677
  # })
1578
1678
  # ds.create_sql('names')
1579
- # ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
1679
+ # #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
1580
1680
  #
1581
1681
  def create_sql(table,charset="UTF8")
1582
1682
  sql = "CREATE TABLE #{table} ("
@@ -1639,6 +1739,8 @@ module Daru
1639
1739
  arry
1640
1740
  end
1641
1741
 
1742
+ # Convert to json. If no_index is false then the index will NOT be included
1743
+ # in the JSON thus created.
1642
1744
  def to_json no_index=true
1643
1745
  if no_index
1644
1746
  self.to_a[0].to_json
@@ -1681,7 +1783,7 @@ module Daru
1681
1783
  html += '</tr>'
1682
1784
  if num > threshold
1683
1785
  html += '<tr>'
1684
- (@vectors + 1).size.times { html += '<td>...</td>' }
1786
+ (@vectors.size + 1).times { html += '<td>...</td>' }
1685
1787
  html += '</tr>'
1686
1788
 
1687
1789
  last_index = @index.to_a.last
@@ -1713,12 +1815,9 @@ module Daru
1713
1815
  @data.each { |v| v.update } if Daru.lazy_update
1714
1816
  end
1715
1817
 
1818
+ # Rename the DataFrame.
1716
1819
  def rename new_name
1717
- if new_name.is_a?(Numeric)
1718
- @name = new_name
1719
- return
1720
- end
1721
- @name = new_name.to_sym
1820
+ @name = new_name
1722
1821
  end
1723
1822
 
1724
1823
  # Write this DataFrame to a CSV file.
@@ -1792,7 +1891,7 @@ module Daru
1792
1891
  # df.recast a: :nmatrix, c: :nmatrix
1793
1892
  def recast opts={}
1794
1893
  opts.each do |vector_name, dtype|
1795
- vector[vector_name].cast(dtype: dtype)
1894
+ self[vector_name].cast(dtype: dtype)
1796
1895
  end
1797
1896
  end
1798
1897
 
@@ -1840,16 +1939,24 @@ module Daru
1840
1939
  content
1841
1940
  end
1842
1941
 
1942
+ # Query a DataFrame by passing a Daru::Core::Query::BoolArray object.
1943
+ def where bool_array
1944
+ Daru::Core::Query.df_where self, bool_array
1945
+ end
1946
+
1843
1947
  def == other
1844
- @index == other.index and @size == other.size and @vectors == other.vectors and
1845
- @vectors.all? { |vector| self[vector, :vector] == other[vector, :vector] }
1948
+ self.class == other.class and
1949
+ @size == other.size and
1950
+ @index == other.index and
1951
+ @vectors == other.vectors and
1952
+ @vectors.to_a.all? { |v| self[v] == other[v] }
1846
1953
  end
1847
1954
 
1848
1955
  def method_missing(name, *args, &block)
1849
1956
  if md = name.match(/(.+)\=/)
1850
1957
  insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
1851
1958
  elsif self.has_vector? name
1852
- self[name, :vector]
1959
+ self[name]
1853
1960
  else
1854
1961
  super(name, *args, &block)
1855
1962
  end
@@ -1859,7 +1966,7 @@ module Daru
1859
1966
 
1860
1967
  def possibly_multi_index? index
1861
1968
  if @index.is_a?(MultiIndex)
1862
- Daru::MultiIndex.new(index)
1969
+ Daru::MultiIndex.from_tuples(index)
1863
1970
  else
1864
1971
  Daru::Index.new(index)
1865
1972
  end
@@ -1892,7 +1999,7 @@ module Daru
1892
1999
 
1893
2000
  def partition vector_order, index, by, ascending, left_lower, right_upper
1894
2001
  mindex = (left_lower + right_upper) / 2
1895
- mvalues = vector_order.inject([]) { |a, vector_name| a << vector[vector_name][mindex]; a }
2002
+ mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
1896
2003
  i = left_lower
1897
2004
  j = right_upper
1898
2005
  descending = ascending.map { |a| !a }
@@ -1929,7 +2036,7 @@ module Daru
1929
2036
  def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
1930
2037
  vector_name = vector_order[vector_order_index]
1931
2038
  if vector_name
1932
- vec = vector[vector_name]
2039
+ vec = self[vector_name]
1933
2040
  eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
1934
2041
 
1935
2042
  if sort_order[vector_order_index] # sort in ascending order
@@ -1980,28 +2087,41 @@ module Daru
1980
2087
 
1981
2088
  return dup(@vectors[location]) if location.is_a?(Range)
1982
2089
  if @vectors.is_a?(MultiIndex)
1983
- pos = vectors_index_for names
2090
+ pos = @vectors[names]
1984
2091
 
1985
2092
  if pos.is_a?(Integer)
1986
2093
  return @data[pos]
1987
2094
  else # MultiIndex
1988
2095
  new_vectors = pos.map do |tuple|
1989
- @data[vectors_index_for(names + tuple)]
2096
+ @data[@vectors[tuple]]
1990
2097
  end
1991
- Daru::DataFrame.new(new_vectors, index: @index, order: Daru::MultiIndex.new(pos.to_a))
2098
+
2099
+ if !location.is_a?(Range) and names.size < @vectors.width
2100
+ pos = pos.drop_left_level names.size
2101
+ end
2102
+
2103
+ Daru::DataFrame.new(
2104
+ new_vectors, index: @index, order: pos)
1992
2105
  end
1993
2106
  else
1994
2107
  unless names[1]
1995
- pos = vectors_index_for location
1996
- return @data[pos]
2108
+ pos = @vectors[location]
2109
+
2110
+ if pos.is_a?(Numeric)
2111
+ return @data[pos]
2112
+ else
2113
+ names = pos
2114
+ end
1997
2115
  end
1998
2116
 
1999
- new_vcs = {}
2117
+ new_vcs = []
2000
2118
  names.each do |name|
2001
- name = name.to_sym unless name.is_a?(Integer)
2002
- new_vcs[name] = @data[@vectors[name]]
2119
+ new_vcs << @data[@vectors[name]].to_a
2003
2120
  end
2004
- Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
2121
+
2122
+ order = names.is_a?(Array) ? Daru::Index.new(names) : names
2123
+ Daru::DataFrame.new(new_vcs, order: order,
2124
+ index: @index, name: @name)
2005
2125
  end
2006
2126
  end
2007
2127
 
@@ -2009,82 +2129,55 @@ module Daru
2009
2129
  location = names[0]
2010
2130
 
2011
2131
  if @index.is_a?(MultiIndex)
2012
- pos = row_index_for names
2132
+ pos = @index[names]
2013
2133
  if pos.is_a?(Integer)
2014
2134
  return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
2015
2135
  else
2016
- new_rows =
2017
- if location.is_a?(Range)
2018
- pos.map { |tuple| populate_row_for(tuple) }
2019
- else
2020
- pos.map { |tuple| populate_row_for(names + tuple) }
2021
- end
2136
+ new_rows = pos.map { |tuple| populate_row_for(tuple) }
2022
2137
 
2023
- Daru::DataFrame.rows(new_rows, order: @vectors, name: @name,
2024
- index: Daru::MultiIndex.new(pos.to_a))
2138
+ if !location.is_a?(Range) and names.size < @index.width
2139
+ pos = pos.drop_left_level names.size
2140
+ end
2141
+
2142
+ Daru::DataFrame.rows(
2143
+ new_rows, order: @vectors, name: @name, index: pos)
2025
2144
  end
2026
2145
  else
2027
2146
  if names[1].nil?
2028
- if location.is_a?(Range)
2029
- index_arry = @index.to_a
2030
-
2031
- range =
2032
- if location.first.is_a?(Numeric)
2033
- location
2034
- else
2035
- first_index = index_arry.index location.first
2036
- last_index = index_arry.index location.last
2037
-
2038
- first_index..last_index
2147
+ names = @index[location]
2148
+ if names.is_a?(Numeric)
2149
+ row = []
2150
+ @data.each do |vector|
2151
+ row << vector[location]
2039
2152
  end
2040
2153
 
2041
- names = index_arry[range]
2042
- else
2043
- row = []
2044
- name = named_index_for names[0]
2045
- @vectors.each do |vector|
2046
- row << @data[@vectors[vector]][name]
2047
- end
2048
-
2049
- return Daru::Vector.new(row, index: @vectors, name: set_name(name))
2154
+ return Daru::Vector.new(row, index: @vectors, name: set_name(location))
2050
2155
  end
2051
2156
  end
2052
2157
  # Access multiple rows
2053
2158
  rows = []
2054
2159
  names.each do |name|
2055
- rows << self.row[name]
2160
+ rows << self.row[name].to_a
2056
2161
  end
2057
2162
 
2058
- Daru::DataFrame.rows rows, name: @name
2059
- end
2060
- end
2061
-
2062
- def row_index_for location
2063
- if @index.include?(location) or location[0].is_a?(Range)
2064
- @index[location]
2065
- elsif location[0].is_a?(Integer)
2066
- location[0]
2163
+ Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2067
2164
  end
2068
2165
  end
2069
2166
 
2070
2167
  def populate_row_for pos
2071
- @vectors.map do |vector|
2072
- @data[@vectors[vector]][pos]
2168
+ @data.map do |vector|
2169
+ vector[pos]
2073
2170
  end
2074
2171
  end
2075
2172
 
2076
2173
  def insert_or_modify_vector name, vector
2077
- if vectors.is_a?(Index)
2078
- name = name[0]
2079
- end
2174
+ name = name[0] unless @vectors.is_a?(MultiIndex)
2175
+ v = nil
2080
2176
 
2081
- @vectors = @vectors + name if !@vectors.include?(name)
2082
- v = nil
2083
-
2084
2177
  if @index.empty?
2085
2178
  v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2086
2179
  @index = v.index
2087
- @data[@vectors[name]] = v
2180
+ assign_or_add_vector name, v
2088
2181
  set_size
2089
2182
 
2090
2183
  @data.map! do |v|
@@ -2096,21 +2189,47 @@ module Daru
2096
2189
  end
2097
2190
  else
2098
2191
  if vector.is_a?(Daru::Vector)
2099
- v = Daru::Vector.new [], name: set_name(name), index: @index
2100
- @index.each do |idx|
2101
- v[idx] = vector[idx]
2192
+ if vector.index == @index # so that index-by-index assignment is avoided when possible.
2193
+ v = vector.dup
2194
+ else
2195
+ v = Daru::Vector.new [], name: set_name(name), index: @index
2196
+ @index.each do |idx|
2197
+ if vector.index.include? idx
2198
+ v[idx] = vector[idx]
2199
+ else
2200
+ v[idx] = nil
2201
+ end
2202
+ end
2102
2203
  end
2103
2204
  else
2104
- raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
2205
+ raise SizeError,
2206
+ "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
2105
2207
  @size != vector.size
2106
2208
 
2107
2209
  v = Daru::Vector.new(vector, name: set_name(name), index: @index)
2108
2210
  end
2109
2211
 
2110
- @data[@vectors[name]] = v
2212
+ assign_or_add_vector name, v
2111
2213
  end
2112
2214
  end
2113
2215
 
2216
+ def assign_or_add_vector name, v
2217
+ #FIXME: fix this jugaad. need to make changes in Indexing itself.
2218
+ pos = @vectors[name]
2219
+
2220
+ if !pos.kind_of?(Daru::Index) and pos == name and
2221
+ (@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
2222
+ @data[pos] = v
2223
+ elsif pos.kind_of?(Daru::Index)
2224
+ pos.each do |p|
2225
+ @data[@vectors[p]] = v
2226
+ end
2227
+ else
2228
+ @vectors = @vectors | [name] if !@vectors.include?(name)
2229
+ @data[@vectors[name]] = v
2230
+ end
2231
+ end
2232
+
2114
2233
  def insert_or_modify_row name, vector
2115
2234
  if index.is_a?(MultiIndex)
2116
2235
  # TODO
@@ -2124,13 +2243,13 @@ module Daru
2124
2243
  end
2125
2244
 
2126
2245
  if @index.include? name
2127
- @vectors.each do |vector|
2128
- @data[@vectors[vector]][name] = v[vector]
2246
+ self.each_vector_with_index do |vector,i|
2247
+ vector[name] = v.index.include?(i) ? v[i] : nil
2129
2248
  end
2130
2249
  else
2131
- @index = reassign_index_as(@index + name)
2132
- @vectors.each do |vector|
2133
- @data[@vectors[vector]].concat v[vector], name
2250
+ @index = @index | [name]
2251
+ self.each_vector_with_index do |vector,i|
2252
+ vector.concat((v.index.include?(i) ? v[i] : nil), name)
2134
2253
  end
2135
2254
  end
2136
2255
 
@@ -2184,14 +2303,11 @@ module Daru
2184
2303
  end
2185
2304
 
2186
2305
  def create_vectors_index_with vectors, source
2187
- vectors = source.keys.sort if vectors.nil?
2306
+ vectors = source.keys.sort_by { |a| a.to_s } if vectors.nil?
2188
2307
 
2189
2308
  @vectors =
2190
2309
  unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
2191
- Daru::Index.new((vectors + (source.keys - vectors))
2192
- .uniq
2193
- .map { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
2194
- )
2310
+ Daru::Index.new((vectors + (source.keys - vectors)).uniq)
2195
2311
  else
2196
2312
  vectors
2197
2313
  end
@@ -2200,21 +2316,17 @@ module Daru
2200
2316
  def all_vectors_have_equal_indexes? source
2201
2317
  idx = source.values[0].index
2202
2318
 
2203
- source.all? do |name, vector|
2319
+ source.values.all? do |vector|
2204
2320
  idx == vector.index
2205
2321
  end
2206
2322
  end
2207
2323
 
2208
- def reassign_index_as new_index
2209
- Daru::Index.new new_index
2210
- end
2211
-
2212
- def create_index index
2213
- index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
2324
+ def try_create_index index
2325
+ index.kind_of?(Index) ? index : Daru::Index.new(index)
2214
2326
  end
2215
2327
 
2216
2328
  def set_name potential_name
2217
- potential_name.is_a?(Array) ? potential_name.join.to_sym : potential_name
2329
+ potential_name.is_a?(Array) ? potential_name.join : potential_name
2218
2330
  end
2219
2331
 
2220
2332
  def symbolize arry