daru 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,170 @@
1
+ module Daru
2
+ module Core
3
+ module MergeHelper
4
+ class << self
5
+ def replace_keys_if_duplicates hash, matcher
6
+ matched = nil
7
+ hash.keys.each { |d|
8
+ if matcher.match(Regexp.new(d.to_s))
9
+ matched = d
10
+ break
11
+ end
12
+ }
13
+
14
+ if matched
15
+ hash[matcher] = hash[matched]
16
+ hash.delete matched
17
+ end
18
+ end
19
+
20
+ def resolve_duplicates df_hash1, df_hash2, on
21
+ hk = df_hash1.keys + df_hash2.keys - on
22
+ recoded = hk.recode_repeated.map(&:to_sym)
23
+ diff = (recoded - hk).sort
24
+
25
+ diff.each_slice(2) do |a|
26
+ replace_keys_if_duplicates df_hash1, a[0]
27
+ replace_keys_if_duplicates df_hash2, a[1]
28
+ end
29
+ end
30
+
31
+ def hashify df
32
+ hsh = df.to_hash
33
+ hsh.each { |k,v| hsh[k] = v.to_a }
34
+ hsh
35
+ end
36
+
37
+ def inner_join df1, df2, df_hash1, df_hash2, on
38
+ joined_hash = {}
39
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
40
+ joined_hash[k] = []
41
+ end
42
+
43
+ (0...df1.size).each do |id1|
44
+ (0...df2.size).each do |id2|
45
+ if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
46
+ joined_hash.each do |k,v|
47
+ v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
54
+ end
55
+
56
+ def full_outer_join df1, df2, df_hash1, df_hash2, on
57
+ left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
58
+ right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
59
+
60
+ Daru::DataFrame.rows(
61
+ (left.values.transpose | right.values.transpose), order: left.keys)
62
+ end
63
+
64
+ def left_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
65
+ joined_hash = {}
66
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
67
+ joined_hash[k] = []
68
+ end
69
+
70
+
71
+ (0...df1.size).each do |id1|
72
+ joined = false
73
+ (0...df2.size).each do |id2|
74
+ if on.all? { |n| df_hash1[n][id1] == df_hash2[n][id2] }
75
+ joined = true
76
+ joined_hash.each do |k,v|
77
+ v << (df_hash1.has_key?(k) ? df_hash1[k][id1] : df_hash2[k][id2])
78
+ end
79
+ end
80
+ end
81
+
82
+ unless joined
83
+ df_hash1.keys.each do |k|
84
+ joined_hash[k] << df_hash1[k][id1]
85
+ end
86
+
87
+ (joined_hash.keys - df_hash1.keys).each do |k|
88
+ joined_hash[k] << nil
89
+ end
90
+ joined = false
91
+ end
92
+ end
93
+
94
+ return joined_hash if as_hash
95
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
96
+ end
97
+
98
+ def right_outer_join df1, df2, df_hash1, df_hash2, on, as_hash=false
99
+ joined_hash = {}
100
+ ((df_hash1.keys - on) | on | (df_hash2.keys - on)).each do |k|
101
+ joined_hash[k] = []
102
+ end
103
+
104
+ (0...df2.size).each do |id1|
105
+ joined = false
106
+ (0...df1.size).each do |id2|
107
+ if on.all? { |n| df_hash2[n][id1] == df_hash1[n][id2] }
108
+ joined = true
109
+ joined_hash.each do |k,v|
110
+ v << (df_hash2.has_key?(k) ? df_hash2[k][id1] : df_hash1[k][id2])
111
+ end
112
+ end
113
+ end
114
+
115
+ unless joined
116
+ df_hash2.keys.each do |k|
117
+ joined_hash[k] << df_hash2[k][id1]
118
+ end
119
+
120
+ (joined_hash.keys - df_hash2.keys).each do |k|
121
+ joined_hash[k] << nil
122
+ end
123
+ joined = false
124
+ end
125
+ end
126
+
127
+ return joined_hash if as_hash
128
+ Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
129
+ end
130
+
131
+ def verify_dataframes df_hash1, df_hash2, on
132
+ raise ArgumentError,
133
+ "All fields in :on must be present in self" if !on.all? { |e| df_hash1[e] }
134
+ raise ArgumentError,
135
+ "All fields in :on must be present in other DF" if !on.all? { |e| df_hash2[e] }
136
+ end
137
+ end
138
+ end
139
+ # Private module containing methods for join, merge, concat operations on
140
+ # dataframes and vectors.
141
+ # @private
142
+ module Merge
143
+ class << self
144
+ def join df1, df2, opts={}
145
+ helper = MergeHelper
146
+
147
+ df_hash1 = helper.hashify df1
148
+ df_hash2 = helper.hashify df2
149
+ on = opts[:on]
150
+
151
+ helper.verify_dataframes df_hash1, df_hash2, on
152
+ helper.resolve_duplicates df_hash1, df_hash2, on
153
+
154
+ case opts[:how]
155
+ when :inner
156
+ helper.inner_join df1, df2, df_hash1, df_hash2, on
157
+ when :outer
158
+ helper.full_outer_join df1, df2, df_hash1, df_hash2, on
159
+ when :left
160
+ helper.left_outer_join df1, df2, df_hash1, df_hash2, on
161
+ when :right
162
+ helper.right_outer_join df1, df2, df_hash1, df_hash2, on
163
+ else
164
+ raise ArgumentError, "Unrecognized option in :how => #{opts[:how]}"
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,95 @@
1
+ module Daru
2
+ module Core
3
+ module Query
4
+ class BoolArray
5
+ attr_reader :barry
6
+
7
+ def initialize barry
8
+ @barry = barry
9
+ end
10
+
11
+ def & other
12
+ new_bool = []
13
+ other_barry = other.barry
14
+ @barry.each_with_index do |b, i|
15
+ new_bool << (b and other_barry[i])
16
+ end
17
+
18
+ BoolArray.new(new_bool)
19
+ end
20
+
21
+ alias :and :&
22
+
23
+ def | other
24
+ new_bool = []
25
+ other_barry = other.barry
26
+ @barry.each_with_index do |b, i|
27
+ new_bool << (b or other_barry[i])
28
+ end
29
+
30
+ BoolArray.new(new_bool)
31
+ end
32
+
33
+ alias :or :|
34
+
35
+ def !
36
+ BoolArray.new(@barry.map { |b| !b })
37
+ end
38
+
39
+ def == other
40
+ @barry == other.barry
41
+ end
42
+
43
+ def to_a
44
+ @barry
45
+ end
46
+
47
+ def inspect
48
+ "(#{self.class}:#{self.object_id} bool_arry=#{@barry})"
49
+ end
50
+ end
51
+
52
+ class << self
53
+ def apply_scalar_operator operator, data, other
54
+ arry = data.inject([]) do |memo,d|
55
+ memo << (d.send(operator, other) ? true : false)
56
+ memo
57
+ end
58
+
59
+ BoolArray.new(arry)
60
+ end
61
+
62
+ def apply_vector_operator operator, vector, other
63
+ bool_arry = []
64
+ vector.each_with_index do |d, i|
65
+ bool_arry << (d.send(operator, other[i]) ? true : false)
66
+ end
67
+
68
+ BoolArray.new(bool_arry)
69
+ end
70
+
71
+ def df_where data_frame, bool_array
72
+ vecs = data_frame.map do |vector|
73
+ vector.where(bool_array)
74
+ end
75
+
76
+ Daru::DataFrame.new(
77
+ vecs, order: data_frame.vectors, index: vecs[0].index, clone: false)
78
+ end
79
+
80
+ def vector_where data, index, bool_array, dtype
81
+ new_data = []
82
+ new_index = []
83
+ bool_array.to_a.each_with_index do |b, i|
84
+ if b
85
+ new_data << data[i]
86
+ new_index << index[i]
87
+ end
88
+ end
89
+
90
+ Daru::Vector.new(new_data, index: new_index, dtype: dtype)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -1,7 +1,6 @@
1
1
  $:.unshift File.dirname(__FILE__)
2
2
 
3
3
  require 'accessors/dataframe_by_row.rb'
4
- require 'accessors/dataframe_by_vector.rb'
5
4
  require 'maths/arithmetic/dataframe.rb'
6
5
  require 'maths/statistics/dataframe.rb'
7
6
  require 'plotting/dataframe.rb'
@@ -226,11 +225,11 @@ module Daru
226
225
  @data = []
227
226
 
228
227
  temp_name = opts[:name]
229
- @name = temp_name.is_a?(Numeric) ? temp_name : (temp_name || SecureRandom.uuid).to_sym
228
+ @name = temp_name || SecureRandom.uuid
230
229
 
231
230
  if source.empty?
232
- @vectors = create_index vectors
233
- @index = create_index index
231
+ @vectors = try_create_index vectors
232
+ @index = try_create_index index
234
233
  create_empty_vectors
235
234
  else
236
235
  case source
@@ -239,8 +238,8 @@ module Daru
239
238
  raise ArgumentError, "Number of vectors (#{vectors.size}) should \
240
239
  equal order size (#{source.size})" if source.size != vectors.size
241
240
 
242
- @index = create_index(index || source[0].size)
243
- @vectors = create_index(vectors)
241
+ @index = try_create_index(index || source[0].size)
242
+ @vectors = try_create_index(vectors)
244
243
 
245
244
  @vectors.each_with_index do |vec,idx|
246
245
  @data << Daru::Vector.new(source[idx], index: @index)
@@ -253,9 +252,10 @@ module Daru
253
252
  initialize(hsh, index: index, order: vectors, name: @name, clone: clone)
254
253
  else # array of hashes
255
254
  if vectors.nil?
256
- @vectors = Daru::Index.new source[0].keys.map(&:to_sym)
255
+ @vectors = Daru::Index.new source[0].keys
257
256
  else
258
- @vectors = Daru::Index.new (vectors + (source[0].keys - vectors)).uniq.map(&:to_sym)
257
+ @vectors = Daru::Index.new(
258
+ (vectors + (source[0].keys - vectors)).uniq)
259
259
  end
260
260
  @index = Daru::Index.new(index || source.size)
261
261
 
@@ -272,8 +272,9 @@ module Daru
272
272
  create_vectors_index_with vectors, source
273
273
  if all_daru_vectors_in_source? source
274
274
  if !index.nil?
275
- @index = create_index index
275
+ @index = try_create_index index
276
276
  elsif all_vectors_have_equal_indexes?(source)
277
+ vectors_have_same_index = true
277
278
  @index = source.values[0].index.dup
278
279
  else
279
280
  all_indexes = []
@@ -289,17 +290,28 @@ module Daru
289
290
 
290
291
  if clone
291
292
  @vectors.each do |vector|
292
- @data << Daru::Vector.new([], name: vector, index: @index)
293
-
294
- @index.each do |idx|
295
- @data[@vectors[vector]][idx] = source[vector][idx]
293
+ # avoids matching indexes of vectors if all the supplied vectors
294
+ # have the same index.
295
+ if vectors_have_same_index
296
+ v = source[vector].dup
297
+ else
298
+ v = Daru::Vector.new([], name: vector, index: @index)
299
+
300
+ @index.each do |idx|
301
+ if source[vector].index.include? idx
302
+ v[idx] = source[vector][idx]
303
+ else
304
+ v[idx] = nil
305
+ end
306
+ end
296
307
  end
308
+ @data << v
297
309
  end
298
310
  else
299
311
  @data.concat source.values
300
312
  end
301
313
  else
302
- @index = create_index(index || source.values[0].size)
314
+ @index = try_create_index(index || source.values[0].size)
303
315
 
304
316
  @vectors.each do |name|
305
317
  @data << Daru::Vector.new(source[name].dup, name: set_name(name), index: @index)
@@ -313,6 +325,11 @@ module Daru
313
325
  update
314
326
  end
315
327
 
328
+ def vector *args
329
+ $stderr.puts "#vector has been deprecated in favour of #[]. Please use that."
330
+ self[*names]
331
+ end
332
+
316
333
  # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
317
334
  # Defaults to *:vector*. Use of this method is not recommended for accessing
318
335
  # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
@@ -324,7 +341,6 @@ module Daru
324
341
  else
325
342
  axis = :vector
326
343
  end
327
- names.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
328
344
 
329
345
  if axis == :vector
330
346
  access_vector *names
@@ -349,7 +365,6 @@ module Daru
349
365
 
350
366
  name = args[0..-2]
351
367
  vector = args[-1]
352
- name.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
353
368
 
354
369
  if axis == :vector
355
370
  insert_or_modify_vector name, vector
@@ -360,15 +375,6 @@ module Daru
360
375
  end
361
376
  end
362
377
 
363
- # Access a vector or set/create a vector. Refer #[] and #[]= docs for details.
364
- #
365
- # == Usage
366
- # df.vector[:a] # access vector named ':a'
367
- # df.vector[:b] = [1,2,3] # set vector ':b' to [1,2,3]
368
- def vector
369
- Daru::Accessors::DataFrameByVector.new(self)
370
- end
371
-
372
378
  # Access a vector by name.
373
379
  def column name
374
380
  vector[name]
@@ -398,24 +404,13 @@ module Daru
398
404
  # * +vectors_to_dup+ - An Array specifying the names of Vectors to
399
405
  # be duplicated. Will duplicate the entire DataFrame if not specified.
400
406
  def dup vectors_to_dup=nil
401
- vectors_to_dup = @vectors unless vectors_to_dup
407
+ vectors_to_dup = @vectors.to_a unless vectors_to_dup
402
408
 
403
- new_order =
404
- if vectors.is_a?(MultiIndex)
405
- src = []
406
- vectors_to_dup.each do |vec|
407
- src << @data[@vectors[vec]].dup
408
- end
409
-
410
- Daru::MultiIndex.new(vectors_to_dup)
411
- else
412
- src = {}
413
- vectors_to_dup.each do |vector|
414
- src[vector] = @data[@vectors[vector]].dup
415
- end
416
-
417
- Daru::Index.new(vectors_to_dup)
409
+ src = []
410
+ vectors_to_dup.each do |vec|
411
+ src << @data[@vectors[vec]].to_a
418
412
  end
413
+ new_order = Daru::Index.new(vectors_to_dup)
419
414
 
420
415
  Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
421
416
  end
@@ -465,6 +460,14 @@ module Daru
465
460
  (vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
466
461
  end
467
462
 
463
+ # Iterate over each index of the DataFrame.
464
+ def each_index &block
465
+ return to_enum(:each_index) unless block_given?
466
+
467
+ @index.each(&block)
468
+ self
469
+ end
470
+
468
471
  # Iterate over each vector
469
472
  def each_vector(&block)
470
473
  return to_enum(:each_vector) unless block_given?
@@ -608,7 +611,7 @@ module Daru
608
611
  #
609
612
  # Recode works similarly to #map, but an important difference between
610
613
  # the two is that recode returns a modified Daru::DataFrame instead
611
- # of an Array. For this reason, #recodeexpects that every run of the
614
+ # of an Array. For this reason, #recode expects that every run of the
612
615
  # block to return a Daru::Vector.
613
616
  #
614
617
  # Just like map and each, recode also accepts an optional _axis_ argument.
@@ -667,7 +670,8 @@ module Daru
667
670
  df = self.dup
668
671
  df.each_vector_with_index do |v, i|
669
672
  ret = yield v
670
- ret.is_a?(Daru::Vector) or raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
673
+ ret.is_a?(Daru::Vector) or
674
+ raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
671
675
  df[*i] = ret
672
676
  end
673
677
 
@@ -841,7 +845,7 @@ module Daru
841
845
  idx = named_index_for index
842
846
 
843
847
  if @index.include? idx
844
- @index = reassign_index_as(@index.to_a - [idx])
848
+ @index = Daru::Index.new(@index.to_a - [idx])
845
849
  self.each_vector do |vector|
846
850
  vector.delete_at idx
847
851
  end
@@ -1015,7 +1019,7 @@ module Daru
1015
1019
  number_of_missing << row.missing_positions.size
1016
1020
  end
1017
1021
 
1018
- Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows".to_sym
1022
+ Daru::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
1019
1023
  end
1020
1024
 
1021
1025
  # TODO: remove next version
@@ -1087,9 +1091,18 @@ module Daru
1087
1091
 
1088
1092
  # Check if a vector is present
1089
1093
  def has_vector? vector
1090
- !!@vectors[*vector]
1094
+ @vectors.include? vector
1091
1095
  end
1092
1096
 
1097
+ # Works like Array#any?.
1098
+ #
1099
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
1100
+ # :row. A Daru::Vector object is yielded in the block.
1101
+ # @example Using any?
1102
+ # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1103
+ # df.any?(:row) do |row|
1104
+ # row[:a] < 3 and row[:b] == 'b'
1105
+ # end #=> true
1093
1106
  def any? axis=:vector, &block
1094
1107
  if axis == :vector or axis == :column
1095
1108
  @data.any?(&block)
@@ -1103,6 +1116,15 @@ module Daru
1103
1116
  end
1104
1117
  end
1105
1118
 
1119
+ # Works like Array#all?
1120
+ #
1121
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
1122
+ # :row. A Daru::Vector object is yielded in the block.
1123
+ # @example Using all?
1124
+ # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1125
+ # df.all?(:row) do |row|
1126
+ # row[:a] < 10
1127
+ # end #=> true
1106
1128
  def all? axis=:vector, &block
1107
1129
  if axis == :vector or axis == :column
1108
1130
  @data.all?(&block)
@@ -1183,38 +1205,126 @@ module Daru
1183
1205
  # # ["foo", "one", 3]=>[6],
1184
1206
  # # ["foo", "three", 8]=>[7],
1185
1207
  # # ["foo", "two", 3]=>[2, 4]}
1186
- def group_by vectors
1187
- vectors = [vectors] if vectors.is_a?(Symbol)
1208
+ def group_by *vectors
1209
+ vectors.flatten!
1188
1210
  vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
1189
1211
  has_vector?(v) }
1190
1212
 
1191
1213
  Daru::Core::GroupBy.new(self, vectors)
1192
1214
  end
1193
1215
 
1194
- def reindex_vectors! new_vectors
1195
- raise ArgumentError, "Number of vectors passed into function (#{new_vectors.size}) should equal that present in the DataFrame (#{@vectors.size})" if
1196
- @vectors.size != new_vectors.size
1216
+ def reindex_vectors new_vectors
1217
+ raise ArgumentError, "Must pass the new index of type Index or its "\
1218
+ "subclasses, not #{new_index.class}" unless new_vectors.kind_of?(Daru::Index)
1219
+
1220
+ cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
1221
+ new_vectors.each do |vec|
1222
+ if @vectors.include?(vec)
1223
+ cl[vec] = self[vec]
1224
+ else
1225
+ cl[vec] = [nil]*nrows
1226
+ end
1227
+ end
1228
+
1229
+ cl
1230
+ end
1231
+
1232
+ # Concatenate another DataFrame along corresponding columns.
1233
+ # Very premature implementation. Use with caution.
1234
+ def concat other_df
1235
+ vectors = []
1236
+ @vectors.each do |v|
1237
+ vectors << self[v].to_a.concat(other_df[v].to_a)
1238
+ end
1239
+
1240
+ Daru::DataFrame.new(vectors, order: @vectors)
1241
+ end
1242
+
1243
+ # Set a particular column as the new DF
1244
+ def set_index new_index, opts={}
1245
+ raise ArgumentError, "All elements in new index must be unique." if
1246
+ @size != self[new_index].uniq.size
1247
+
1248
+ self.index = Daru::Index.new(self[new_index].to_a)
1249
+ self.delete_vector(new_index) unless opts[:keep]
1197
1250
 
1198
- @vectors = Daru::Index.new new_vectors.map(&:to_sym), new_vectors.map { |e| @vectors[e] }
1251
+ self
1199
1252
  end
1200
1253
 
1201
- # Change the index of the DataFrame and its underlying vectors. Destructive.
1254
+ # Change the index of the DataFrame and preserve the labels of the previous
1255
+ # indexing. New index can be Daru::Index or any of its subclasses.
1202
1256
  #
1203
- # @param [Symbol, Array] new_index Specify an Array if
1204
- def reindex! new_index
1205
- raise ArgumentError, "Index size must equal dataframe size" if new_index.is_a?(Array) and new_index.size != @size
1257
+ # @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
1258
+ # @example Reindexing DataFrame
1259
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1260
+ # index: ['a','b','c','d'])
1261
+ # #=>
1262
+ # ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1263
+ # # a b
1264
+ # # a 1 11
1265
+ # # b 2 22
1266
+ # # c 3 33
1267
+ # # d 4 44
1268
+ # df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
1269
+ # #=>
1270
+ # ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1271
+ # # a b
1272
+ # # b 2 22
1273
+ # # 0 nil nil
1274
+ # # a 1 11
1275
+ # # g nil nil
1276
+ def reindex new_index
1277
+ raise ArgumentError, "Must pass the new index of type Index or its "\
1278
+ "subclasses, not #{new_index.class}" unless new_index.kind_of?(Daru::Index)
1206
1279
 
1207
- @index = possibly_multi_index?(new_index == :seq ? @size : new_index)
1208
- @data.map! do |vector|
1209
- vector.reindex possibly_multi_index?(@index.to_a)
1280
+ cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
1281
+ new_index.each do |idx|
1282
+ if @index.include?(idx)
1283
+ cl.row[idx] = self.row[idx]
1284
+ else
1285
+ cl.row[idx] = [nil]*ncols
1286
+ end
1210
1287
  end
1211
1288
 
1289
+ cl
1290
+ end
1291
+
1292
+ # Reassign index with a new index of type Daru::Index or any of its subclasses.
1293
+ #
1294
+ # @param [Daru::Index] idx New index object on which the rows of the dataframe
1295
+ # are to be indexed.
1296
+ # @example Reassgining index of a DataFrame
1297
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
1298
+ # df.index.to_a #=> [0,1,2,3]
1299
+ #
1300
+ # df.index = Daru::Index.new(['a','b','c','d'])
1301
+ # df.index.to_a #=> ['a','b','c','d']
1302
+ # df.row['a'].to_a #=> [1,11]
1303
+ def index= idx
1304
+ @data.each { |vec| vec.index = idx}
1305
+ @index = idx
1306
+
1212
1307
  self
1213
1308
  end
1214
1309
 
1215
- # Non-destructive version of #reindex!
1216
- def reindex new_index
1217
- self.dup.reindex! new_index
1310
+ # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
1311
+ #
1312
+ # @param [Daru::Index] idx The new index object on which the vectors are to
1313
+ # be indexed. Must of the same size as ncols.
1314
+ # @example Reassigning vectors of a DataFrame
1315
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
1316
+ # df.vectors.to_a #=> [:a, :b, :c]
1317
+ #
1318
+ # df.vectors = Daru::Index.new([:foo, :bar, :baz])
1319
+ # df.vectors.to_a #=> [:foo, :bar, :baz]
1320
+ def vectors= idx
1321
+ raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1322
+ index.kind_of?(Daru::Index)
1323
+ raise ArgumentError, "Specified index length #{idx.size} not equal to"\
1324
+ "dataframe size #{ncols}" if idx.size != ncols
1325
+
1326
+ @vectors = idx
1327
+ self
1218
1328
  end
1219
1329
 
1220
1330
  # Return the indexes of all the numeric vectors. Will include vectors with nils
@@ -1231,8 +1341,8 @@ module Daru
1231
1341
  def numeric_vector_names
1232
1342
  numerics = []
1233
1343
 
1234
- each_vector do |vec, i|
1235
- numerics << vec.name if(vec.type == :numeric)
1344
+ @vectors.each do |v|
1345
+ numerics << v if (self[v].type == :numeric)
1236
1346
  end
1237
1347
  numerics
1238
1348
  end
@@ -1248,7 +1358,7 @@ module Daru
1248
1358
  arr
1249
1359
  end
1250
1360
 
1251
- order = @vectors.is_a?(MultiIndex) ? MultiIndex.new(nv) : Index.new(nv)
1361
+ order = Index.new(nv)
1252
1362
  Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
1253
1363
  end
1254
1364
 
@@ -1302,9 +1412,11 @@ module Daru
1302
1412
 
1303
1413
  opts[:by] = create_logic_blocks vector_order, opts[:by]
1304
1414
  opts[:ascending] = sort_order_array vector_order, opts[:ascending]
1305
- index = @index.to_a
1306
- send(opts[:type], vector_order, index, opts[:by], opts[:ascending])
1307
- reindex! index
1415
+ idx = @index.to_a
1416
+ send(opts[:type], vector_order, idx, opts[:by], opts[:ascending])
1417
+ self.index = Daru::Index.new(idx)
1418
+
1419
+ self
1308
1420
  end
1309
1421
 
1310
1422
  # Non-destructive version of #sort!
@@ -1347,7 +1459,8 @@ module Daru
1347
1459
  # # [:bar] 18 26
1348
1460
  # # [:foo] 10 12
1349
1461
  def pivot_table opts={}
1350
- raise ArgumentError, "Specify grouping index" if !opts[:index] or opts[:index].empty?
1462
+ raise ArgumentError,
1463
+ "Specify grouping index" if !opts[:index] or opts[:index].empty?
1351
1464
 
1352
1465
  index = opts[:index]
1353
1466
  vectors = opts[:vectors] || []
@@ -1389,18 +1502,20 @@ module Daru
1389
1502
  end
1390
1503
  end
1391
1504
 
1392
- df_index = Daru::MultiIndex.new(symbolize(super_hash.keys))
1505
+ df_index = Daru::MultiIndex.from_tuples super_hash.keys
1393
1506
 
1394
1507
  vector_indexes = []
1395
1508
  super_hash.each_value do |sub_hash|
1396
1509
  vector_indexes.concat sub_hash.keys
1397
1510
  end
1398
- df_vectors = Daru::MultiIndex.new symbolize(vector_indexes.uniq)
1511
+
1512
+ df_vectors = Daru::MultiIndex.from_tuples vector_indexes.uniq
1399
1513
  pivoted_dataframe = Daru::DataFrame.new({}, index: df_index, order: df_vectors)
1400
1514
 
1401
1515
  super_hash.each do |row_index, sub_h|
1402
1516
  sub_h.each do |vector_index, val|
1403
- pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
1517
+ # pivoted_dataframe[symbolize(vector_index)][symbolize(row_index)] = val
1518
+ pivoted_dataframe[vector_index][row_index] = val
1404
1519
  end
1405
1520
  end
1406
1521
  return pivoted_dataframe
@@ -1430,47 +1545,33 @@ module Daru
1430
1545
  df_new
1431
1546
  end
1432
1547
 
1433
- # Join 2 DataFrames by given fields
1434
- # type is one of :left and :inner, default is :left
1435
- #
1436
- # Untested! Use at your own risk.
1437
- #
1438
- # @return {Daru::DataFrame}
1439
- def join(other_ds,fields_1=[],fields_2=[],type=:left)
1440
- fields_new = other_ds.vectors.to_a - fields_2
1441
- fields = self.vectors.to_a + fields_new
1442
-
1443
- other_ds_hash = {}
1444
- other_ds.each_row do |row|
1445
- key = row.to_hash.select { |k,v| fields_2.include?(k) }.values
1446
- value = row.to_hash.select { |k,v| fields_new.include?(k) }
1447
-
1448
- if other_ds_hash[key].nil?
1449
- other_ds_hash[key] = [value]
1450
- else
1451
- other_ds_hash[key] << value
1452
- end
1453
- end
1454
-
1455
- new_ds = DataFrame.new({}, order: fields)
1456
-
1457
- self.each_row do |row|
1458
- key = row.to_hash.select{|k,v| fields_1.include?(k)}.values
1459
- new_case = row.to_hash
1460
-
1461
- if other_ds_hash[key].nil?
1462
- if type == :left
1463
- fields_new.each{|field| new_case[field] = nil}
1464
- new_ds.add_row(Daru::Vector.new(new_case))
1465
- end
1466
- else
1467
- other_ds_hash[key].each do |new_values|
1468
- new_ds.add_row(Daru::Vector.new(new_case.merge(new_values)))
1469
- end
1470
- end
1471
- end
1472
-
1473
- new_ds
1548
+ # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1549
+ # outer, right outer and full outer joins.
1550
+ #
1551
+ # @param [Daru::DataFrame] other_df Another DataFrame on which the join is
1552
+ # to be performed.
1553
+ # @param [Hash] opts Options Hash
1554
+ # @option :how [Symbol] Can be one of :inner, :left, :right or :outer.
1555
+ # @option :on [Array] The columns on which the join is to be performed.
1556
+ # Column names specified here must be common to both DataFrames.
1557
+ # @return [Daru::DataFrame]
1558
+ # @example Inner Join
1559
+ # left = Daru::DataFrame.new({
1560
+ # :id => [1,2,3,4],
1561
+ # :name => ['Pirate', 'Monkey', 'Ninja', 'Spaghetti']
1562
+ # })
1563
+ # right = Daru::DataFrame.new({
1564
+ # :id => [1,2,3,4],
1565
+ # :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
1566
+ # })
1567
+ # left.join(right, how: :inner, on: [:name])
1568
+ # #=>
1569
+ # ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
1570
+ # # id_1 name id_2
1571
+ # # 0 1 Pirate 2
1572
+ # # 1 3 Ninja 4
1573
+ def join(other_df,opts={})
1574
+ Daru::Core::Merge.join(self, other_df, opts)
1474
1575
  end
1475
1576
 
1476
1577
 
@@ -1486,7 +1587,7 @@ module Daru
1486
1587
  # to new dataset, and fields which responds to second
1487
1588
  # pattern will be added one case for each different %n.
1488
1589
  #
1489
- # == Usage
1590
+ # @example
1490
1591
  # cases=[
1491
1592
  # ['1','george','red',10,'blue',20,nil,nil],
1492
1593
  # ['2','fred','green',15,'orange',30,'white',20],
@@ -1494,17 +1595,16 @@ module Daru
1494
1595
  # ]
1495
1596
  # ds=Daru::DataFrame.rows(cases, order: [:id, :name, :car_color1, :car_value1, :car_color2, :car_value2, :car_color3, :car_value3])
1496
1597
  # ds.one_to_many([:id],'car_%v%n').to_matrix
1497
- # => Matrix[
1498
- # ["red", "1", 10],
1499
- # ["blue", "1", 20],
1500
- # ["green", "2", 15],
1501
- # ["orange", "2", 30],
1502
- # ["white", "2", 20]
1503
- # ]
1504
- #
1598
+ # #=> Matrix[
1599
+ # # ["red", "1", 10],
1600
+ # # ["blue", "1", 20],
1601
+ # # ["green", "2", 15],
1602
+ # # ["orange", "2", 30],
1603
+ # # ["white", "2", 20]
1604
+ # # ]
1505
1605
  def one_to_many(parent_fields, pattern)
1506
1606
  re = Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
1507
- ds_vars = parent_fields
1607
+ ds_vars = parent_fields.dup
1508
1608
  vars = []
1509
1609
  max_n = 0
1510
1610
  h = parent_fields.inject({}) { |a,v|
@@ -1512,8 +1612,8 @@ module Daru
1512
1612
  a
1513
1613
  }
1514
1614
  # Adding _row_id
1515
- h[:_col_id] = Daru::Vector.new([])
1516
- ds_vars.push(:_col_id)
1615
+ h['_col_id'] = Daru::Vector.new([])
1616
+ ds_vars.push('_col_id')
1517
1617
 
1518
1618
  @vectors.each do |f|
1519
1619
  if f =~ re
@@ -1529,20 +1629,20 @@ module Daru
1529
1629
  each_row do |row|
1530
1630
  row_out = {}
1531
1631
  parent_fields.each do |f|
1532
- row_out[f]=row[f]
1632
+ row_out[f] = row[f]
1533
1633
  end
1534
1634
 
1535
1635
  max_n.times do |n1|
1536
1636
  n = n1+1
1537
1637
  any_data = false
1538
1638
  vars.each do |v|
1539
- data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s).to_sym]
1639
+ data = row[pattern.gsub("%v",v.to_s).gsub("%n",n.to_s)]
1540
1640
  row_out[v] = data
1541
1641
  any_data = true if !data.nil?
1542
1642
  end
1543
1643
 
1544
1644
  if any_data
1545
- row_out[:_col_id] = n
1645
+ row_out['_col_id'] = n
1546
1646
  ds.add_row(row_out)
1547
1647
  end
1548
1648
  end
@@ -1569,14 +1669,14 @@ module Daru
1569
1669
  # * table - String specifying name of the table that will created in SQL.
1570
1670
  # * charset - Character set. Default is "UTF8".
1571
1671
  #
1572
- # == Usage
1672
+ # @example
1573
1673
  #
1574
1674
  # ds = Daru::DataFrame.new({
1575
1675
  # :id => Daru::Vector.new([1,2,3,4,5]),
1576
1676
  # :name => Daru::Vector.new(%w{Alex Peter Susan Mary John})
1577
1677
  # })
1578
1678
  # ds.create_sql('names')
1579
- # ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
1679
+ # #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
1580
1680
  #
1581
1681
  def create_sql(table,charset="UTF8")
1582
1682
  sql = "CREATE TABLE #{table} ("
@@ -1639,6 +1739,8 @@ module Daru
1639
1739
  arry
1640
1740
  end
1641
1741
 
1742
+ # Convert to json. If no_index is false then the index will NOT be included
1743
+ # in the JSON thus created.
1642
1744
  def to_json no_index=true
1643
1745
  if no_index
1644
1746
  self.to_a[0].to_json
@@ -1681,7 +1783,7 @@ module Daru
1681
1783
  html += '</tr>'
1682
1784
  if num > threshold
1683
1785
  html += '<tr>'
1684
- (@vectors + 1).size.times { html += '<td>...</td>' }
1786
+ (@vectors.size + 1).times { html += '<td>...</td>' }
1685
1787
  html += '</tr>'
1686
1788
 
1687
1789
  last_index = @index.to_a.last
@@ -1713,12 +1815,9 @@ module Daru
1713
1815
  @data.each { |v| v.update } if Daru.lazy_update
1714
1816
  end
1715
1817
 
1818
+ # Rename the DataFrame.
1716
1819
  def rename new_name
1717
- if new_name.is_a?(Numeric)
1718
- @name = new_name
1719
- return
1720
- end
1721
- @name = new_name.to_sym
1820
+ @name = new_name
1722
1821
  end
1723
1822
 
1724
1823
  # Write this DataFrame to a CSV file.
@@ -1792,7 +1891,7 @@ module Daru
1792
1891
  # df.recast a: :nmatrix, c: :nmatrix
1793
1892
  def recast opts={}
1794
1893
  opts.each do |vector_name, dtype|
1795
- vector[vector_name].cast(dtype: dtype)
1894
+ self[vector_name].cast(dtype: dtype)
1796
1895
  end
1797
1896
  end
1798
1897
 
@@ -1840,16 +1939,24 @@ module Daru
1840
1939
  content
1841
1940
  end
1842
1941
 
1942
+ # Query a DataFrame by passing a Daru::Core::Query::BoolArray object.
1943
+ def where bool_array
1944
+ Daru::Core::Query.df_where self, bool_array
1945
+ end
1946
+
1843
1947
  def == other
1844
- @index == other.index and @size == other.size and @vectors == other.vectors and
1845
- @vectors.all? { |vector| self[vector, :vector] == other[vector, :vector] }
1948
+ self.class == other.class and
1949
+ @size == other.size and
1950
+ @index == other.index and
1951
+ @vectors == other.vectors and
1952
+ @vectors.to_a.all? { |v| self[v] == other[v] }
1846
1953
  end
1847
1954
 
1848
1955
  def method_missing(name, *args, &block)
1849
1956
  if md = name.match(/(.+)\=/)
1850
1957
  insert_or_modify_vector name[/(.+)\=/].delete("=").to_sym, args[0]
1851
1958
  elsif self.has_vector? name
1852
- self[name, :vector]
1959
+ self[name]
1853
1960
  else
1854
1961
  super(name, *args, &block)
1855
1962
  end
@@ -1859,7 +1966,7 @@ module Daru
1859
1966
 
1860
1967
  def possibly_multi_index? index
1861
1968
  if @index.is_a?(MultiIndex)
1862
- Daru::MultiIndex.new(index)
1969
+ Daru::MultiIndex.from_tuples(index)
1863
1970
  else
1864
1971
  Daru::Index.new(index)
1865
1972
  end
@@ -1892,7 +1999,7 @@ module Daru
1892
1999
 
1893
2000
  def partition vector_order, index, by, ascending, left_lower, right_upper
1894
2001
  mindex = (left_lower + right_upper) / 2
1895
- mvalues = vector_order.inject([]) { |a, vector_name| a << vector[vector_name][mindex]; a }
2002
+ mvalues = vector_order.inject([]) { |a, vector_name| a << self[vector_name][mindex]; a }
1896
2003
  i = left_lower
1897
2004
  j = right_upper
1898
2005
  descending = ascending.map { |a| !a }
@@ -1929,7 +2036,7 @@ module Daru
1929
2036
  def keep? current_index, mvalues, vector_order, sort_order, by, vector_order_index
1930
2037
  vector_name = vector_order[vector_order_index]
1931
2038
  if vector_name
1932
- vec = vector[vector_name]
2039
+ vec = self[vector_name]
1933
2040
  eval = by[vector_name].call(vec[current_index], mvalues[vector_order_index])
1934
2041
 
1935
2042
  if sort_order[vector_order_index] # sort in ascending order
@@ -1980,28 +2087,41 @@ module Daru
1980
2087
 
1981
2088
  return dup(@vectors[location]) if location.is_a?(Range)
1982
2089
  if @vectors.is_a?(MultiIndex)
1983
- pos = vectors_index_for names
2090
+ pos = @vectors[names]
1984
2091
 
1985
2092
  if pos.is_a?(Integer)
1986
2093
  return @data[pos]
1987
2094
  else # MultiIndex
1988
2095
  new_vectors = pos.map do |tuple|
1989
- @data[vectors_index_for(names + tuple)]
2096
+ @data[@vectors[tuple]]
1990
2097
  end
1991
- Daru::DataFrame.new(new_vectors, index: @index, order: Daru::MultiIndex.new(pos.to_a))
2098
+
2099
+ if !location.is_a?(Range) and names.size < @vectors.width
2100
+ pos = pos.drop_left_level names.size
2101
+ end
2102
+
2103
+ Daru::DataFrame.new(
2104
+ new_vectors, index: @index, order: pos)
1992
2105
  end
1993
2106
  else
1994
2107
  unless names[1]
1995
- pos = vectors_index_for location
1996
- return @data[pos]
2108
+ pos = @vectors[location]
2109
+
2110
+ if pos.is_a?(Numeric)
2111
+ return @data[pos]
2112
+ else
2113
+ names = pos
2114
+ end
1997
2115
  end
1998
2116
 
1999
- new_vcs = {}
2117
+ new_vcs = []
2000
2118
  names.each do |name|
2001
- name = name.to_sym unless name.is_a?(Integer)
2002
- new_vcs[name] = @data[@vectors[name]]
2119
+ new_vcs << @data[@vectors[name]].to_a
2003
2120
  end
2004
- Daru::DataFrame.new new_vcs, order: new_vcs.keys, index: @index, name: @name
2121
+
2122
+ order = names.is_a?(Array) ? Daru::Index.new(names) : names
2123
+ Daru::DataFrame.new(new_vcs, order: order,
2124
+ index: @index, name: @name)
2005
2125
  end
2006
2126
  end
2007
2127
 
@@ -2009,82 +2129,55 @@ module Daru
2009
2129
  location = names[0]
2010
2130
 
2011
2131
  if @index.is_a?(MultiIndex)
2012
- pos = row_index_for names
2132
+ pos = @index[names]
2013
2133
  if pos.is_a?(Integer)
2014
2134
  return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
2015
2135
  else
2016
- new_rows =
2017
- if location.is_a?(Range)
2018
- pos.map { |tuple| populate_row_for(tuple) }
2019
- else
2020
- pos.map { |tuple| populate_row_for(names + tuple) }
2021
- end
2136
+ new_rows = pos.map { |tuple| populate_row_for(tuple) }
2022
2137
 
2023
- Daru::DataFrame.rows(new_rows, order: @vectors, name: @name,
2024
- index: Daru::MultiIndex.new(pos.to_a))
2138
+ if !location.is_a?(Range) and names.size < @index.width
2139
+ pos = pos.drop_left_level names.size
2140
+ end
2141
+
2142
+ Daru::DataFrame.rows(
2143
+ new_rows, order: @vectors, name: @name, index: pos)
2025
2144
  end
2026
2145
  else
2027
2146
  if names[1].nil?
2028
- if location.is_a?(Range)
2029
- index_arry = @index.to_a
2030
-
2031
- range =
2032
- if location.first.is_a?(Numeric)
2033
- location
2034
- else
2035
- first_index = index_arry.index location.first
2036
- last_index = index_arry.index location.last
2037
-
2038
- first_index..last_index
2147
+ names = @index[location]
2148
+ if names.is_a?(Numeric)
2149
+ row = []
2150
+ @data.each do |vector|
2151
+ row << vector[location]
2039
2152
  end
2040
2153
 
2041
- names = index_arry[range]
2042
- else
2043
- row = []
2044
- name = named_index_for names[0]
2045
- @vectors.each do |vector|
2046
- row << @data[@vectors[vector]][name]
2047
- end
2048
-
2049
- return Daru::Vector.new(row, index: @vectors, name: set_name(name))
2154
+ return Daru::Vector.new(row, index: @vectors, name: set_name(location))
2050
2155
  end
2051
2156
  end
2052
2157
  # Access multiple rows
2053
2158
  rows = []
2054
2159
  names.each do |name|
2055
- rows << self.row[name]
2160
+ rows << self.row[name].to_a
2056
2161
  end
2057
2162
 
2058
- Daru::DataFrame.rows rows, name: @name
2059
- end
2060
- end
2061
-
2062
- def row_index_for location
2063
- if @index.include?(location) or location[0].is_a?(Range)
2064
- @index[location]
2065
- elsif location[0].is_a?(Integer)
2066
- location[0]
2163
+ Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2067
2164
  end
2068
2165
  end
2069
2166
 
2070
2167
  def populate_row_for pos
2071
- @vectors.map do |vector|
2072
- @data[@vectors[vector]][pos]
2168
+ @data.map do |vector|
2169
+ vector[pos]
2073
2170
  end
2074
2171
  end
2075
2172
 
2076
2173
  def insert_or_modify_vector name, vector
2077
- if vectors.is_a?(Index)
2078
- name = name[0]
2079
- end
2174
+ name = name[0] unless @vectors.is_a?(MultiIndex)
2175
+ v = nil
2080
2176
 
2081
- @vectors = @vectors + name if !@vectors.include?(name)
2082
- v = nil
2083
-
2084
2177
  if @index.empty?
2085
2178
  v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2086
2179
  @index = v.index
2087
- @data[@vectors[name]] = v
2180
+ assign_or_add_vector name, v
2088
2181
  set_size
2089
2182
 
2090
2183
  @data.map! do |v|
@@ -2096,21 +2189,47 @@ module Daru
2096
2189
  end
2097
2190
  else
2098
2191
  if vector.is_a?(Daru::Vector)
2099
- v = Daru::Vector.new [], name: set_name(name), index: @index
2100
- @index.each do |idx|
2101
- v[idx] = vector[idx]
2192
+ if vector.index == @index # so that index-by-index assignment is avoided when possible.
2193
+ v = vector.dup
2194
+ else
2195
+ v = Daru::Vector.new [], name: set_name(name), index: @index
2196
+ @index.each do |idx|
2197
+ if vector.index.include? idx
2198
+ v[idx] = vector[idx]
2199
+ else
2200
+ v[idx] = nil
2201
+ end
2202
+ end
2102
2203
  end
2103
2204
  else
2104
- raise Exception, "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
2205
+ raise SizeError,
2206
+ "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}" if
2105
2207
  @size != vector.size
2106
2208
 
2107
2209
  v = Daru::Vector.new(vector, name: set_name(name), index: @index)
2108
2210
  end
2109
2211
 
2110
- @data[@vectors[name]] = v
2212
+ assign_or_add_vector name, v
2111
2213
  end
2112
2214
  end
2113
2215
 
2216
+ def assign_or_add_vector name, v
2217
+ #FIXME: fix this jugaad. need to make changes in Indexing itself.
2218
+ pos = @vectors[name]
2219
+
2220
+ if !pos.kind_of?(Daru::Index) and pos == name and
2221
+ (@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
2222
+ @data[pos] = v
2223
+ elsif pos.kind_of?(Daru::Index)
2224
+ pos.each do |p|
2225
+ @data[@vectors[p]] = v
2226
+ end
2227
+ else
2228
+ @vectors = @vectors | [name] if !@vectors.include?(name)
2229
+ @data[@vectors[name]] = v
2230
+ end
2231
+ end
2232
+
2114
2233
  def insert_or_modify_row name, vector
2115
2234
  if index.is_a?(MultiIndex)
2116
2235
  # TODO
@@ -2124,13 +2243,13 @@ module Daru
2124
2243
  end
2125
2244
 
2126
2245
  if @index.include? name
2127
- @vectors.each do |vector|
2128
- @data[@vectors[vector]][name] = v[vector]
2246
+ self.each_vector_with_index do |vector,i|
2247
+ vector[name] = v.index.include?(i) ? v[i] : nil
2129
2248
  end
2130
2249
  else
2131
- @index = reassign_index_as(@index + name)
2132
- @vectors.each do |vector|
2133
- @data[@vectors[vector]].concat v[vector], name
2250
+ @index = @index | [name]
2251
+ self.each_vector_with_index do |vector,i|
2252
+ vector.concat((v.index.include?(i) ? v[i] : nil), name)
2134
2253
  end
2135
2254
  end
2136
2255
 
@@ -2184,14 +2303,11 @@ module Daru
2184
2303
  end
2185
2304
 
2186
2305
  def create_vectors_index_with vectors, source
2187
- vectors = source.keys.sort if vectors.nil?
2306
+ vectors = source.keys.sort_by { |a| a.to_s } if vectors.nil?
2188
2307
 
2189
2308
  @vectors =
2190
2309
  unless vectors.is_a?(Index) or vectors.is_a?(MultiIndex)
2191
- Daru::Index.new((vectors + (source.keys - vectors))
2192
- .uniq
2193
- .map { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
2194
- )
2310
+ Daru::Index.new((vectors + (source.keys - vectors)).uniq)
2195
2311
  else
2196
2312
  vectors
2197
2313
  end
@@ -2200,21 +2316,17 @@ module Daru
2200
2316
  def all_vectors_have_equal_indexes? source
2201
2317
  idx = source.values[0].index
2202
2318
 
2203
- source.all? do |name, vector|
2319
+ source.values.all? do |vector|
2204
2320
  idx == vector.index
2205
2321
  end
2206
2322
  end
2207
2323
 
2208
- def reassign_index_as new_index
2209
- Daru::Index.new new_index
2210
- end
2211
-
2212
- def create_index index
2213
- index.is_a?(MultiIndex) ? index : Daru::Index.new(index)
2324
+ def try_create_index index
2325
+ index.kind_of?(Index) ? index : Daru::Index.new(index)
2214
2326
  end
2215
2327
 
2216
2328
  def set_name potential_name
2217
- potential_name.is_a?(Array) ? potential_name.join.to_sym : potential_name
2329
+ potential_name.is_a?(Array) ? potential_name.join : potential_name
2218
2330
  end
2219
2331
 
2220
2332
  def symbolize arry