daru 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,6 +33,14 @@ module Daru
33
33
  hsh.each { |k,v| hsh[k] = v.to_a }
34
34
  hsh
35
35
  end
36
+
37
+ def arrayify df
38
+ arr = df.to_a
39
+ col_names = arr[0][0].keys
40
+ values = arr[0].map{|h| h.values}
41
+
42
+ return col_names, values
43
+ end
36
44
 
37
45
  def inner_join df1, df2, df_hash1, df_hash2, on
38
46
  joined_hash = {}
@@ -53,6 +61,52 @@ module Daru
53
61
  Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
54
62
  end
55
63
 
64
+ def bf_inner_join df1, df2, on
65
+ col_names1, table1 = arrayify df1
66
+ col_names2, table2 = arrayify df2
67
+
68
+ #resolve duplicates
69
+ indicies1 = on.map{|i| col_names1.index(i)}
70
+ indicies2 = on.map{|i| col_names2.index(i)}
71
+ col_names2.map! do |name|
72
+ if (col_names1.include?(name))
73
+ col_names1[col_names1.index(name)] = (name.to_s + "_1").to_sym unless on.include?(name)
74
+ (name.to_s + "_2").to_sym
75
+ else
76
+ name
77
+ end
78
+ end
79
+
80
+ #combine key columns to a single column value
81
+ on_cols1 = table1.flat_map{|x| indicies1.map{|i| x[i].to_s}.join("+")}
82
+ on_cols2 = table2.flat_map{|x| indicies2.map{|i| x[i].to_s}.join("+")}
83
+
84
+ #parameters for a BF with approx 0.1% false positives
85
+ m = on_cols2.size * 15
86
+ k = 11
87
+
88
+ bf = BloomFilter::Native.new({:size => m, :hashes => k, :bucket => 1})
89
+ on_cols2.each{|x| bf.insert(x)}
90
+
91
+ x_ind = -1
92
+ joined_new = on_cols1.map do |x|
93
+ x_ind+=1
94
+ if (bf.include?(x))
95
+ {x_ind => on_cols2.each_index.select{|y_ind| on_cols2[y_ind] == x}}
96
+ else
97
+ {x_ind => []}
98
+ end
99
+ end
100
+ .reduce({}) {|h,pairs| pairs.each {|k,v| (h[k] ||= []) << v}; h}
101
+ .flat_map{|ind1, inds2| inds2.flatten.map{|ind2| [table1[ind1], table2[ind2]].flatten} if inds2.flatten.size > 0}
102
+
103
+ joined_cols = [col_names1, col_names2].flatten
104
+ df = Daru::DataFrame.rows(joined_new.compact, order: joined_cols)
105
+ on.each{|x| df.delete_vector (x.to_s + "_2").to_sym}
106
+
107
+ df
108
+ end
109
+
56
110
  def full_outer_join df1, df2, df_hash1, df_hash2, on
57
111
  left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
58
112
  right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
@@ -153,7 +207,11 @@ module Daru
153
207
 
154
208
  case opts[:how]
155
209
  when :inner
156
- helper.inner_join df1, df2, df_hash1, df_hash2, on
210
+ if Daru.has_bloomfilter_rb?
211
+ helper.bf_inner_join df1, df2, on
212
+ else
213
+ helper.inner_join df1, df2, df_hash1, df_hash2, on
214
+ end
157
215
  when :outer
158
216
  helper.full_outer_join df1, df2, df_hash1, df_hash2, on
159
217
  when :left
@@ -14,30 +14,30 @@ module Daru
14
14
  include Daru::Plotting::DataFrame if Daru.has_nyaplot?
15
15
 
16
16
  class << self
17
- # Load data from a CSV file. Specify an optional block to grab the CSV
18
- # object and pre-condition it (for example use the `convert` or
17
+ # Load data from a CSV file. Specify an optional block to grab the CSV
18
+ # object and pre-condition it (for example use the `convert` or
19
19
  # `header_convert` methods).
20
- #
20
+ #
21
21
  # == Arguments
22
- #
22
+ #
23
23
  # * path - Path of the file to load specified as a String.
24
- #
24
+ #
25
25
  # == Options
26
- #
26
+ #
27
27
  # Accepts the same options as the Daru::DataFrame constructor and CSV.open()
28
28
  # and uses those to eventually construct the resulting DataFrame.
29
29
  #
30
30
  # == Verbose Description
31
31
  #
32
- # You can specify all the options to the `.from_csv` function that you
32
+ # You can specify all the options to the `.from_csv` function that you
33
33
  # do to the Ruby `CSV.read()` function, since this is what is used internally.
34
34
  #
35
- # For example, if the columns in your CSV file are separated by something
36
- # other that commas, you can use the `:col_sep` option. If you want to
37
- # convert numeric values to numbers and not keep them as strings, you can
35
+ # For example, if the columns in your CSV file are separated by something
36
+ # other that commas, you can use the `:col_sep` option. If you want to
37
+ # convert numeric values to numbers and not keep them as strings, you can
38
38
  # use the `:converters` option and set it to `:numeric`.
39
39
  #
40
- # The `.from_csv` function uses the following defaults for reading CSV files
40
+ # The `.from_csv` function uses the following defaults for reading CSV files
41
41
  # (that are passed into the `CSV.read()` function):
42
42
  #
43
43
  # {
@@ -45,24 +45,29 @@ module Daru
45
45
  # :converters => :numeric
46
46
  # }
47
47
  def from_csv path, opts={}, &block
48
- Daru::IO.from_csv path, opts, &block
48
+ Daru::IO.from_csv path, opts, &block
49
49
  end
50
50
 
51
51
  # Read data from an Excel file into a DataFrame.
52
- #
52
+ #
53
53
  # == Arguments
54
- #
54
+ #
55
55
  # * path - Path of the file to be read.
56
- #
56
+ #
57
57
  # == Options
58
- #
58
+ #
59
59
  # *:worksheet_id - ID of the worksheet that is to be read.
60
- def from_excel path, opts={}, &block
60
+ def from_excel path, opts={}, &block
61
61
  Daru::IO.from_excel path, opts, &block
62
62
  end
63
63
 
64
64
  # Read a database query and returns a Dataset
65
65
  #
66
+ # @param dbh [DBI::DatabaseHandle] A DBI connection to be used to run the query
67
+ # @param query [String] The query to be executed
68
+ #
69
+ # @return A dataframe containing the data resulting from the query
70
+ #
66
71
  # USE:
67
72
  #
68
73
  # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
@@ -71,17 +76,37 @@ module Daru
71
76
  Daru::IO.from_sql dbh, query
72
77
  end
73
78
 
79
+ # Read a dataframe from AR::Relation
80
+ #
81
+ # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
82
+ # @params fields [Array] Field names to be loaded (optional)
83
+ #
84
+ # @return A dataframe containing the data loaded from the relation
85
+ #
86
+ # USE:
87
+ #
88
+ # # When Post model is defined as:
89
+ # class Post < ActiveRecord::Base
90
+ # scope :active, -> { where.not(published_at: nil) }
91
+ # end
92
+ #
93
+ # # You can load active posts into a dataframe by:
94
+ # Daru::DataFrame.from_activerecord(Post.active, :title, :published_at)
95
+ def from_activerecord relation, *fields
96
+ Daru::IO.from_activerecord relation, *fields
97
+ end
98
+
74
99
  # Read the database from a plaintext file. For this method to work,
75
100
  # the data should be present in a plain text file in columns. See
76
101
  # spec/fixtures/bank2.dat for an example.
77
- #
102
+ #
78
103
  # == Arguments
79
- #
104
+ #
80
105
  # * path - Path of the file to be read.
81
106
  # * fields - Vector names of the resulting database.
82
- #
107
+ #
83
108
  # == Usage
84
- #
109
+ #
85
110
  # df = Daru::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
86
111
  def from_plaintext path, fields
87
112
  Daru::IO.from_plaintext path, fields
@@ -137,15 +162,15 @@ module Daru
137
162
  #
138
163
  # Useful to process outputs from databases
139
164
  def crosstab_by_assignation rows, columns, values
140
- raise "Three vectors should be equal size" if
165
+ raise "Three vectors should be equal size" if
141
166
  rows.size != columns.size or rows.size!=values.size
142
167
 
143
168
  cols_values = columns.factors
144
169
  cols_n = cols_values.size
145
170
 
146
- h_rows = rows.factors.inject({}) do |a,v|
147
- a[v] = cols_values.inject({}) do |a1,v1|
148
- a1[v1]=nil
171
+ h_rows = rows.factors.inject({}) do |a,v|
172
+ a[v] = cols_values.inject({}) do |a1,v1|
173
+ a1[v1]=nil
149
174
  a1
150
175
  end
151
176
  a
@@ -186,38 +211,38 @@ module Daru
186
211
  # These objects are indexed by row and column by vectors and index Index objects.
187
212
  #
188
213
  # == Arguments
189
- #
214
+ #
190
215
  # * source - Source from the DataFrame is to be initialized. Can be a Hash
191
216
  # of names and vectors (array or Daru::Vector), an array of arrays or
192
217
  # array of Daru::Vectors.
193
- #
218
+ #
194
219
  # == Options
195
- #
196
- # +:order+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order in
220
+ #
221
+ # +:order+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order in
197
222
  # which Vectors should appear in the DataFrame.
198
- #
223
+ #
199
224
  # +:index+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order
200
225
  # in which rows of the DataFrame will be named.
201
- #
226
+ #
202
227
  # +:name+ - A name for the DataFrame.
203
228
  #
204
229
  # +:clone+ - Specify as *true* or *false*. When set to false, and Vector
205
230
  # objects are passed for the source, the Vector objects will not duplicated
206
- # when creating the DataFrame. Will have no effect if Array is passed in
207
- # the source, or if the passed Daru::Vectors have different indexes.
231
+ # when creating the DataFrame. Will have no effect if Array is passed in
232
+ # the source, or if the passed Daru::Vectors have different indexes.
208
233
  # Default to *true*.
209
- #
234
+ #
210
235
  # == Usage
211
- # df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
236
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
212
237
  # index: [:a, :b, :c, :d], name: :spider_man)
213
- #
214
- # # =>
238
+ #
239
+ # # =>
215
240
  # # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
216
- # # b a
217
- # # a 6 1
218
- # # b 7 2
219
- # # c 8 3
220
- # # d 9 4
241
+ # # b a
242
+ # # a 6 1
243
+ # # b 7 2
244
+ # # c 8 3
245
+ # # d 9 4
221
246
  def initialize source, opts={}
222
247
  vectors = opts[:order]
223
248
  index = opts[:index]
@@ -292,7 +317,7 @@ module Daru
292
317
  @vectors.each do |vector|
293
318
  # avoids matching indexes of vectors if all the supplied vectors
294
319
  # have the same index.
295
- if vectors_have_same_index
320
+ if vectors_have_same_index
296
321
  v = source[vector].dup
297
322
  else
298
323
  v = Daru::Vector.new([], name: vector, index: @index)
@@ -331,8 +356,8 @@ module Daru
331
356
  end
332
357
 
333
358
  # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
334
- # Defaults to *:vector*. Use of this method is not recommended for accessing
335
- # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
359
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
360
+ # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
336
361
  # df.vector[:vec] for accessing vector with index *:vec*.
337
362
  def [](*names)
338
363
  if names[-1] == :vector or names[-1] == :row
@@ -354,7 +379,7 @@ module Daru
354
379
  # Insert a new row/vector of the specified name or modify a previous row.
355
380
  # Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
356
381
  # a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
357
- #
382
+ #
358
383
  # In case a Daru::Vector is specified after the equality the sign, the indexes
359
384
  # of the vector will be matched against the row/vector indexes of the DataFrame
360
385
  # before an insertion is performed. Unmatched indexes will be set to nil.
@@ -368,7 +393,7 @@ module Daru
368
393
 
369
394
  if axis == :vector
370
395
  insert_or_modify_vector name, vector
371
- elsif axis == :row
396
+ elsif axis == :row
372
397
  insert_or_modify_row name, vector
373
398
  else
374
399
  raise IndexError, "Expected axis to be row or vector, not #{axis}."
@@ -389,7 +414,7 @@ module Daru
389
414
  end
390
415
 
391
416
  # Access a row or set/create a row. Refer #[] and #[]= docs for details.
392
- #
417
+ #
393
418
  # == Usage
394
419
  # df.row[:a] # access row named ':a'
395
420
  # df.row[:b] = [1,2,3] # set row ':b' to [1,2,3]
@@ -398,17 +423,17 @@ module Daru
398
423
  end
399
424
 
400
425
  # Duplicate the DataFrame entirely.
401
- #
426
+ #
402
427
  # == Arguments
403
- #
404
- # * +vectors_to_dup+ - An Array specifying the names of Vectors to
428
+ #
429
+ # * +vectors_to_dup+ - An Array specifying the names of Vectors to
405
430
  # be duplicated. Will duplicate the entire DataFrame if not specified.
406
431
  def dup vectors_to_dup=nil
407
432
  vectors_to_dup = @vectors.to_a unless vectors_to_dup
408
433
 
409
434
  src = []
410
435
  vectors_to_dup.each do |vec|
411
- src << @data[@vectors[vec]].to_a
436
+ src << @data[@vectors[vec]].to_a.dup
412
437
  end
413
438
  new_order = Daru::Index.new(vectors_to_dup)
414
439
 
@@ -422,9 +447,9 @@ module Daru
422
447
 
423
448
  # Returns a 'view' of the DataFrame, i.e the object ID's of vectors are
424
449
  # preserved.
425
- #
450
+ #
426
451
  # == Arguments
427
- #
452
+ #
428
453
  # +vectors_to_clone+ - Names of vectors to clone. Optional. Will return
429
454
  # a view of the whole data frame otherwise.
430
455
  def clone *vectors_to_clone
@@ -438,7 +463,7 @@ module Daru
438
463
  Daru::DataFrame.new(h, clone: false)
439
464
  end
440
465
 
441
- # Returns a 'shallow' copy of DataFrame if missing data is not present,
466
+ # Returns a 'shallow' copy of DataFrame if missing data is not present,
442
467
  # or a full copy of only valid data if missing data is present.
443
468
  def clone_only_valid
444
469
  if has_missing_data?
@@ -448,7 +473,7 @@ module Daru
448
473
  end
449
474
  end
450
475
 
451
- # Creates a new duplicate dataframe containing only rows
476
+ # Creates a new duplicate dataframe containing only rows
452
477
  # without a single missing value.
453
478
  def dup_only_valid vecs=nil
454
479
  rows_with_nil = @data.inject([]) do |memo, vector|
@@ -485,7 +510,7 @@ module Daru
485
510
 
486
511
  @vectors.each do |vector|
487
512
  yield @data[@vectors[vector]], vector
488
- end
513
+ end
489
514
 
490
515
  self
491
516
  end
@@ -518,12 +543,12 @@ module Daru
518
543
  #
519
544
  # == Description
520
545
  #
521
- # `#each` works exactly like Array#each. The default mode for `each`
522
- # is to iterate over the columns of the DataFrame. To iterate over
546
+ # `#each` works exactly like Array#each. The default mode for `each`
547
+ # is to iterate over the columns of the DataFrame. To iterate over
523
548
  # rows you must pass the axis, i.e `:row` as an argument.
524
- #
549
+ #
525
550
  # == Arguments
526
- #
551
+ #
527
552
  # * +axis+ - The axis to iterate over. Can be :vector (or :column)
528
553
  # or :row. Default to :vector.
529
554
  def each axis=:vector, &block
@@ -541,14 +566,14 @@ module Daru
541
566
  #
542
567
  # == Description
543
568
  #
544
- # The #collect iterator works similar to #map, the only difference
545
- # being that it returns a Daru::Vector comprising of the results of
546
- # each block run. The resultant Vector has the same index as that
547
- # of the axis over which collect has iterated. It also accepts the
569
+ # The #collect iterator works similar to #map, the only difference
570
+ # being that it returns a Daru::Vector comprising of the results of
571
+ # each block run. The resultant Vector has the same index as that
572
+ # of the axis over which collect has iterated. It also accepts the
548
573
  # optional axis argument.
549
574
  #
550
575
  # == Arguments
551
- #
576
+ #
552
577
  # * +axis+ - The axis to iterate over. Can be :vector (or :column)
553
578
  # or :row. Default to :vector.
554
579
  def collect axis=:vector, &block
@@ -565,16 +590,16 @@ module Daru
565
590
  # the argument specified. Will return an Array of the resulting
566
591
  # elements. To map over each row/vector and get a DataFrame,
567
592
  # see #recode.
568
- #
593
+ #
569
594
  # == Description
570
- #
571
- # The #map iterator works like Array#map. The value returned by
572
- # each run of the block is added to an Array and the Array is
573
- # returned. This method also accepts an axis argument, like #each.
595
+ #
596
+ # The #map iterator works like Array#map. The value returned by
597
+ # each run of the block is added to an Array and the Array is
598
+ # returned. This method also accepts an axis argument, like #each.
574
599
  # The default is :vector.
575
- #
600
+ #
576
601
  # == Arguments
577
- #
602
+ #
578
603
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
579
604
  # Default to :vector.
580
605
  def map axis=:vector, &block
@@ -590,9 +615,9 @@ module Daru
590
615
  # Destructive map. Modifies the DataFrame. Each run of the block
591
616
  # must return a Daru::Vector. You can specify the axis to map over
592
617
  # as the argument. Default to :vector.
593
- #
618
+ #
594
619
  # == Arguments
595
- #
620
+ #
596
621
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
597
622
  # Default to :vector.
598
623
  def map! axis=:vector, &block
@@ -609,15 +634,15 @@ module Daru
609
634
  #
610
635
  # == Description
611
636
  #
612
- # Recode works similarly to #map, but an important difference between
613
- # the two is that recode returns a modified Daru::DataFrame instead
614
- # of an Array. For this reason, #recode expects that every run of the
637
+ # Recode works similarly to #map, but an important difference between
638
+ # the two is that recode returns a modified Daru::DataFrame instead
639
+ # of an Array. For this reason, #recode expects that every run of the
615
640
  # block to return a Daru::Vector.
616
641
  #
617
642
  # Just like map and each, recode also accepts an optional _axis_ argument.
618
- #
643
+ #
619
644
  # == Arguments
620
- #
645
+ #
621
646
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
622
647
  # Default to :vector.
623
648
  def recode axis=:vector, &block
@@ -629,22 +654,22 @@ module Daru
629
654
  end
630
655
 
631
656
  # Retain vectors or rows if the block returns a truthy value.
632
- #
657
+ #
633
658
  # == Description
634
- #
635
- # For filtering out certain rows/vectors based on their values,
636
- # use the #filter method. By default it iterates over vectors and
637
- # keeps those vectors for which the block returns true. It accepts
638
- # an optional axis argument which lets you specify whether you want
659
+ #
660
+ # For filtering out certain rows/vectors based on their values,
661
+ # use the #filter method. By default it iterates over vectors and
662
+ # keeps those vectors for which the block returns true. It accepts
663
+ # an optional axis argument which lets you specify whether you want
639
664
  # to iterate over vectors or rows.
640
- #
665
+ #
641
666
  # == Arguments
642
- #
667
+ #
643
668
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
644
669
  # Default to :vector.
645
- #
670
+ #
646
671
  # == Usage
647
- #
672
+ #
648
673
  # # Filter vectors
649
674
  #
650
675
  # df.filter do |vector|
@@ -665,12 +690,12 @@ module Daru
665
690
  end
666
691
 
667
692
  def recode_vectors &block
668
- block_given? or return to_enum(:recode_vectors)
693
+ block_given? or return to_enum(:recode_vectors)
669
694
 
670
695
  df = self.dup
671
696
  df.each_vector_with_index do |v, i|
672
697
  ret = yield v
673
- ret.is_a?(Daru::Vector) or
698
+ ret.is_a?(Daru::Vector) or
674
699
  raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
675
700
  df[*i] = ret
676
701
  end
@@ -763,7 +788,7 @@ module Daru
763
788
  self
764
789
  end
765
790
 
766
- # Retrieves a Daru::Vector, based on the result of calculation
791
+ # Retrieves a Daru::Vector, based on the result of calculation
767
792
  # performed on each row.
768
793
  def collect_rows &block
769
794
  return to_enum(:collect_rows) unless block_given?
@@ -878,15 +903,15 @@ module Daru
878
903
 
879
904
  deletion << index unless keep_row
880
905
  end
881
- deletion.each { |idx|
882
- delete_row idx
906
+ deletion.each { |idx|
907
+ delete_row idx
883
908
  }
884
909
  end
885
910
 
886
911
  def keep_vector_if &block
887
912
  @vectors.each do |vector|
888
913
  keep_vector = yield @data[@vectors[vector]], vector
889
-
914
+
890
915
  delete_vector vector unless keep_vector
891
916
  end
892
917
  end
@@ -925,7 +950,7 @@ module Daru
925
950
  # true for that vector.
926
951
  def filter_vectors &block
927
952
  return to_enum(:filter_vectors) unless block_given?
928
-
953
+
929
954
  df = self.dup
930
955
  df.keep_vector_if &block
931
956
 
@@ -934,7 +959,7 @@ module Daru
934
959
 
935
960
  # Test each row with one or more tests. Each test is a Proc with the form
936
961
  # *Proc.new {|row| row[:age] > 0}*
937
- #
962
+ #
938
963
  # The function returns an array with all errors.
939
964
  def verify(*tests)
940
965
  if(tests[0].is_a? Symbol)
@@ -963,9 +988,9 @@ module Daru
963
988
 
964
989
  # DSL for yielding each row and returning a Daru::Vector based on the
965
990
  # value each run of the block returns.
966
- #
991
+ #
967
992
  # == Usage
968
- #
993
+ #
969
994
  # a1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7])
970
995
  # a2 = Daru::Vector.new([10, 20, 30, 40, 50, 60, 70])
971
996
  # a3 = Daru::Vector.new([100, 200, 300, 400, 500, 600, 700])
@@ -991,10 +1016,10 @@ module Daru
991
1016
 
992
1017
  # Returns a vector, based on a string with a calculation based
993
1018
  # on vector.
994
- #
1019
+ #
995
1020
  # The calculation will be eval'ed, so you can put any variable
996
1021
  # or expression valid on ruby.
997
- #
1022
+ #
998
1023
  # For example:
999
1024
  # a = Daru::Vector.new [1,2]
1000
1025
  # b = Daru::Vector.new [3,4]
@@ -1003,14 +1028,14 @@ module Daru
1003
1028
  # => Vector [4,6]
1004
1029
  def compute text, &block
1005
1030
  return instance_eval(&block) if block_given?
1006
- instance_eval(text)
1031
+ instance_eval(text)
1007
1032
  end
1008
1033
 
1009
1034
  # Return a vector with the number of missing values in each row.
1010
- #
1035
+ #
1011
1036
  # == Arguments
1012
- #
1013
- # * +missing_values+ - An Array of the values that should be
1037
+ #
1038
+ # * +missing_values+ - An Array of the values that should be
1014
1039
  # treated as 'missing'. The default missing value is *nil*.
1015
1040
  def missing_values_rows missing_values=[nil]
1016
1041
  number_of_missing = []
@@ -1031,9 +1056,9 @@ module Daru
1031
1056
 
1032
1057
  alias :flawed? :has_missing_data?
1033
1058
 
1034
- # Return a nested hash using vector names as keys and an array constructed of
1059
+ # Return a nested hash using vector names as keys and an array constructed of
1035
1060
  # hashes with other values. If block provided, is used to provide the
1036
- # values, with parameters +row+ of dataset, +current+ last hash on
1061
+ # values, with parameters +row+ of dataset, +current+ last hash on
1037
1062
  # hierarchy and +name+ of the key to include
1038
1063
  def nest *tree_keys, &block
1039
1064
  tree_keys = tree_keys[0] if tree_keys[0].is_a? Array
@@ -1101,7 +1126,7 @@ module Daru
1101
1126
  # @example Using any?
1102
1127
  # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1103
1128
  # df.any?(:row) do |row|
1104
- # row[:a] < 3 and row[:b] == 'b'
1129
+ # row[:a] < 3 and row[:b] == 'b'
1105
1130
  # end #=> true
1106
1131
  def any? axis=:vector, &block
1107
1132
  if axis == :vector or axis == :column
@@ -1123,7 +1148,7 @@ module Daru
1123
1148
  # @example Using all?
1124
1149
  # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1125
1150
  # df.all?(:row) do |row|
1126
- # row[:a] < 10
1151
+ # row[:a] < 10
1127
1152
  # end #=> true
1128
1153
  def all? axis=:vector, &block
1129
1154
  if axis == :vector or axis == :column
@@ -1145,14 +1170,18 @@ module Daru
1145
1170
  self[0..(quantity-1), :row]
1146
1171
  end
1147
1172
 
1173
+ alias :first :head
1174
+
1148
1175
  # The last ten elements of the DataFrame
1149
- #
1176
+ #
1150
1177
  # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
1151
1178
  def tail quantity=10
1152
1179
  self[(@size - quantity)..(@size-1), :row]
1153
1180
  end
1154
1181
 
1155
- # Returns a vector with sum of all vectors specified in the argument.
1182
+ alias :last :tail
1183
+
1184
+ # Returns a vector with sum of all vectors specified in the argument.
1156
1185
  # Tf vecs parameter is empty, sum all numeric vector.
1157
1186
  def vector_sum vecs=nil
1158
1187
  vecs ||= numeric_vectors
@@ -1166,9 +1195,9 @@ module Daru
1166
1195
  end
1167
1196
 
1168
1197
  # Calculate mean of the rows of the dataframe.
1169
- #
1198
+ #
1170
1199
  # == Arguments
1171
- #
1200
+ #
1172
1201
  # * +max_missing+ - The maximum number of elements in the row that can be
1173
1202
  # zero for the mean calculation to happen. Default to 0.
1174
1203
  def vector_mean max_missing=0
@@ -1181,16 +1210,16 @@ module Daru
1181
1210
  mean_vec
1182
1211
  end
1183
1212
 
1184
- # Group elements by vector to perform operations on them. Returns a
1213
+ # Group elements by vector to perform operations on them. Returns a
1185
1214
  # Daru::Core::GroupBy object.See the Daru::Core::GroupBy docs for a detailed
1186
1215
  # list of possible operations.
1187
- #
1216
+ #
1188
1217
  # == Arguments
1189
- #
1218
+ #
1190
1219
  # * vectors - An Array contatining names of vectors to group by.
1191
- #
1220
+ #
1192
1221
  # == Usage
1193
- #
1222
+ #
1194
1223
  # df = Daru::DataFrame.new({
1195
1224
  # a: %w{foo bar foo bar foo bar foo foo},
1196
1225
  # b: %w{one one two three two two one three},
@@ -1209,7 +1238,7 @@ module Daru
1209
1238
  vectors.flatten!
1210
1239
  vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
1211
1240
  has_vector?(v) }
1212
-
1241
+
1213
1242
  Daru::Core::GroupBy.new(self, vectors)
1214
1243
  end
1215
1244
 
@@ -1234,7 +1263,7 @@ module Daru
1234
1263
  def concat other_df
1235
1264
  vectors = []
1236
1265
  @vectors.each do |v|
1237
- vectors << self[v].to_a.concat(other_df[v].to_a)
1266
+ vectors << self[v].to_a.dup.concat(other_df[v].to_a)
1238
1267
  end
1239
1268
 
1240
1269
  Daru::DataFrame.new(vectors, order: @vectors)
@@ -1242,9 +1271,9 @@ module Daru
1242
1271
 
1243
1272
  # Set a particular column as the new DF
1244
1273
  def set_index new_index, opts={}
1245
- raise ArgumentError, "All elements in new index must be unique." if
1274
+ raise ArgumentError, "All elements in new index must be unique." if
1246
1275
  @size != self[new_index].uniq.size
1247
-
1276
+
1248
1277
  self.index = Daru::Index.new(self[new_index].to_a)
1249
1278
  self.delete_vector(new_index) unless opts[:keep]
1250
1279
 
@@ -1253,25 +1282,25 @@ module Daru
1253
1282
 
1254
1283
  # Change the index of the DataFrame and preserve the labels of the previous
1255
1284
  # indexing. New index can be Daru::Index or any of its subclasses.
1256
- #
1285
+ #
1257
1286
  # @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
1258
1287
  # @example Reindexing DataFrame
1259
- # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1288
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1260
1289
  # index: ['a','b','c','d'])
1261
- # #=>
1290
+ # #=>
1262
1291
  # ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1263
- # # a b
1264
- # # a 1 11
1265
- # # b 2 22
1266
- # # c 3 33
1267
- # # d 4 44
1292
+ # # a b
1293
+ # # a 1 11
1294
+ # # b 2 22
1295
+ # # c 3 33
1296
+ # # d 4 44
1268
1297
  # df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
1269
- # #=>
1298
+ # #=>
1270
1299
  # ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1271
- # # a b
1272
- # # b 2 22
1273
- # # 0 nil nil
1274
- # # a 1 11
1300
+ # # a b
1301
+ # # b 2 22
1302
+ # # 0 nil nil
1303
+ # # a 1 11
1275
1304
  # # g nil nil
1276
1305
  def reindex new_index
1277
1306
  raise ArgumentError, "Must pass the new index of type Index or its "\
@@ -1296,10 +1325,10 @@ module Daru
1296
1325
  # @example Reassgining index of a DataFrame
1297
1326
  # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
1298
1327
  # df.index.to_a #=> [0,1,2,3]
1299
- #
1328
+ #
1300
1329
  # df.index = Daru::Index.new(['a','b','c','d'])
1301
1330
  # df.index.to_a #=> ['a','b','c','d']
1302
- # df.row['a'].to_a #=> [1,11]
1331
+ # df.row['a'].to_a #=> [1,11]
1303
1332
  def index= idx
1304
1333
  @data.each { |vec| vec.index = idx}
1305
1334
  @index = idx
@@ -1308,17 +1337,17 @@ module Daru
1308
1337
  end
1309
1338
 
1310
1339
  # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
1311
- #
1340
+ #
1312
1341
  # @param [Daru::Index] idx The new index object on which the vectors are to
1313
1342
  # be indexed. Must of the same size as ncols.
1314
1343
  # @example Reassigning vectors of a DataFrame
1315
1344
  # df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
1316
1345
  # df.vectors.to_a #=> [:a, :b, :c]
1317
- #
1346
+ #
1318
1347
  # df.vectors = Daru::Index.new([:foo, :bar, :baz])
1319
1348
  # df.vectors.to_a #=> [:foo, :bar, :baz]
1320
1349
  def vectors= idx
1321
- raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1350
+ raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1322
1351
  index.kind_of?(Daru::Index)
1323
1352
  raise ArgumentError, "Specified index length #{idx.size} not equal to"\
1324
1353
  "dataframe size #{ncols}" if idx.size != ncols
@@ -1377,9 +1406,9 @@ module Daru
1377
1406
  end
1378
1407
  end
1379
1408
 
1380
- # Sorts a dataframe (ascending/descending)according to the given sequence of
1409
+ # Sorts a dataframe (ascending/descending)according to the given sequence of
1381
1410
  # vectors, using the attributes provided in the blocks.
1382
- #
1411
+ #
1383
1412
  # @param order [Array] The order of vector names in which the DataFrame
1384
1413
  # should be sorted.
1385
1414
  # @param [Hash] opts The options to sort with.
@@ -1387,21 +1416,21 @@ module Daru
1387
1416
  # or descending order. Specify Array corresponding to *order* for multiple
1388
1417
  # sort orders.
1389
1418
  # @option opts [Hash] :by ({|a,b| a <=> b}) Specify attributes of objects to
1390
- # to be used for sorting, for each vector name in *order* as a hash of
1419
+ # to be used for sorting, for each vector name in *order* as a hash of
1391
1420
  # vector name and lambda pairs. In case a lambda for a vector is not
1392
1421
  # specified, the default will be used.
1393
- #
1422
+ #
1394
1423
  # == Usage
1395
- #
1424
+ #
1396
1425
  # df = Daru::DataFrame.new({a: [-3,2,-1,4], b: [4,3,2,1]})
1397
- #
1426
+ #
1398
1427
  # #<Daru::DataFrame:140630680 @name = 04e00197-f8d5-4161-bca2-93266bfabc6f @size = 4>
1399
- # # a b
1400
- # # 0 -3 4
1401
- # # 1 2 3
1402
- # # 2 -1 2
1403
- # # 3 4 1
1404
- # df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
1428
+ # # a b
1429
+ # # 0 -3 4
1430
+ # # 1 2 3
1431
+ # # 2 -1 2
1432
+ # # 3 4 1
1433
+ # df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
1405
1434
  def sort! vector_order, opts={}
1406
1435
  raise ArgumentError, "Required atleast one vector name" if vector_order.size < 1
1407
1436
  opts = {
@@ -1426,46 +1455,46 @@ module Daru
1426
1455
 
1427
1456
  # Pivots a data frame on specified vectors and applies an aggregate function
1428
1457
  # to quickly generate a summary.
1429
- #
1458
+ #
1430
1459
  # == Options
1431
- #
1460
+ #
1432
1461
  # +:index+ - Keys to group by on the pivot table row index. Pass vector names
1433
1462
  # contained in an Array.
1434
- #
1463
+ #
1435
1464
  # +:vectors+ - Keys to group by on the pivot table column index. Pass vector
1436
1465
  # names contained in an Array.
1437
- #
1466
+ #
1438
1467
  # +:agg+ - Function to aggregate the grouped values. Default to *:mean*. Can
1439
- # use any of the statistics functions applicable on Vectors that can be found in
1468
+ # use any of the statistics functions applicable on Vectors that can be found in
1440
1469
  # the Daru::Statistics::Vector module.
1441
- #
1442
- # +:values+ - Columns to aggregate. Will consider all numeric columns not
1470
+ #
1471
+ # +:values+ - Columns to aggregate. Will consider all numeric columns not
1443
1472
  # specified in *:index* or *:vectors*. Optional.
1444
- #
1473
+ #
1445
1474
  # == Usage
1446
- #
1475
+ #
1447
1476
  # df = Daru::DataFrame.new({
1448
- # a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
1477
+ # a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
1449
1478
  # b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1450
1479
  # c: ['small','large','large','small','small','large','small','large','small'],
1451
1480
  # d: [1,2,2,3,3,4,5,6,7],
1452
1481
  # e: [2,4,4,6,6,8,10,12,14]
1453
1482
  # })
1454
1483
  # df.pivot_table(index: [:a], vectors: [:b], agg: :sum, values: :e)
1455
- #
1456
- # #=>
1484
+ #
1485
+ # #=>
1457
1486
  # # #<Daru::DataFrame:88342020 @name = 08cdaf4e-b154-4186-9084-e76dd191b2c9 @size = 2>
1458
- # # [:e, :one] [:e, :two]
1459
- # # [:bar] 18 26
1460
- # # [:foo] 10 12
1487
+ # # [:e, :one] [:e, :two]
1488
+ # # [:bar] 18 26
1489
+ # # [:foo] 10 12
1461
1490
  def pivot_table opts={}
1462
- raise ArgumentError,
1491
+ raise ArgumentError,
1463
1492
  "Specify grouping index" if !opts[:index] or opts[:index].empty?
1464
1493
 
1465
1494
  index = opts[:index]
1466
1495
  vectors = opts[:vectors] || []
1467
1496
  aggregate_function = opts[:agg] || :mean
1468
- values =
1497
+ values =
1469
1498
  if opts[:values].is_a?(Symbol)
1470
1499
  [opts[:values]]
1471
1500
  elsif opts[:values].is_a?(Array)
@@ -1473,7 +1502,7 @@ module Daru
1473
1502
  else # nil
1474
1503
  (@vectors.to_a - (index | vectors)) & numeric_vector_names
1475
1504
  end
1476
-
1505
+
1477
1506
  raise IndexError, "No numeric vectors to aggregate" if values.empty?
1478
1507
 
1479
1508
  grouped = group_by(index)
@@ -1524,7 +1553,7 @@ module Daru
1524
1553
  end
1525
1554
  end
1526
1555
 
1527
- # Merge vectors from two DataFrames. In case of name collision,
1556
+ # Merge vectors from two DataFrames. In case of name collision,
1528
1557
  # the vectors names are changed to x_1, x_2 ....
1529
1558
  #
1530
1559
  # @return {Daru::DataFrame}
@@ -1545,9 +1574,9 @@ module Daru
1545
1574
  df_new
1546
1575
  end
1547
1576
 
1548
- # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1577
+ # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1549
1578
  # outer, right outer and full outer joins.
1550
- #
1579
+ #
1551
1580
  # @param [Daru::DataFrame] other_df Another DataFrame on which the join is
1552
1581
  # to be performed.
1553
1582
  # @param [Hash] opts Options Hash
@@ -1565,11 +1594,11 @@ module Daru
1565
1594
  # :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
1566
1595
  # })
1567
1596
  # left.join(right, how: :inner, on: [:name])
1568
- # #=>
1597
+ # #=>
1569
1598
  # ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
1570
- # # id_1 name id_2
1571
- # # 0 1 Pirate 2
1572
- # # 1 3 Ninja 4
1599
+ # # id_1 name id_2
1600
+ # # 0 1 Pirate 2
1601
+ # # 1 3 Ninja 4
1573
1602
  def join(other_df,opts={})
1574
1603
  Daru::Core::Merge.join(self, other_df, opts)
1575
1604
  end
@@ -1586,7 +1615,7 @@ module Daru
1586
1615
  # the field of first parameters will be copied verbatim
1587
1616
  # to new dataset, and fields which responds to second
1588
1617
  # pattern will be added one case for each different %n.
1589
- #
1618
+ #
1590
1619
  # @example
1591
1620
  # cases=[
1592
1621
  # ['1','george','red',10,'blue',20,nil,nil],
@@ -1607,9 +1636,9 @@ module Daru
1607
1636
  ds_vars = parent_fields.dup
1608
1637
  vars = []
1609
1638
  max_n = 0
1610
- h = parent_fields.inject({}) { |a,v|
1639
+ h = parent_fields.inject({}) { |a,v|
1611
1640
  a[v] = Daru::Vector.new([])
1612
- a
1641
+ a
1613
1642
  }
1614
1643
  # Adding _row_id
1615
1644
  h['_col_id'] = Daru::Vector.new([])
@@ -1663,12 +1692,12 @@ module Daru
1663
1692
  end
1664
1693
 
1665
1694
  # Create a sql, basen on a given Dataset
1666
- #
1695
+ #
1667
1696
  # == Arguments
1668
- #
1697
+ #
1669
1698
  # * table - String specifying name of the table that will created in SQL.
1670
1699
  # * charset - Character set. Default is "UTF8".
1671
- #
1700
+ #
1672
1701
  # @example
1673
1702
  #
1674
1703
  # ds = Daru::DataFrame.new({
@@ -1717,17 +1746,17 @@ module Daru
1717
1746
  def to_nmatrix
1718
1747
  numerics_as_arrays = []
1719
1748
  each_vector do |vector|
1720
- numerics_as_arrays << vector.to_a if(vector.type == :numeric and
1749
+ numerics_as_arrays << vector.to_a if(vector.type == :numeric and
1721
1750
  vector.missing_positions.size == 0)
1722
1751
  end
1723
1752
 
1724
1753
  numerics_as_arrays.transpose.to_nm
1725
1754
  end
1726
-
1755
+
1727
1756
  # Converts the DataFrame into an array of hashes where key is vector name
1728
- # and value is the corresponding element. The 0th index of the array contains
1729
- # the array of hashes while the 1th index contains the indexes of each row
1730
- # of the dataframe. Each element in the index array corresponds to its row
1757
+ # and value is the corresponding element. The 0th index of the array contains
1758
+ # the array of hashes while the 1th index contains the indexes of each row
1759
+ # of the dataframe. Each element in the index array corresponds to its row
1731
1760
  # in the array of hashes, which has the same index.
1732
1761
  def to_a
1733
1762
  arry = [[],[]]
@@ -1762,10 +1791,10 @@ module Daru
1762
1791
 
1763
1792
  # Convert to html for IRuby.
1764
1793
  def to_html threshold=30
1765
- html = "<table>" +
1794
+ html = "<table>" +
1766
1795
  "<tr>" +
1767
- "<th colspan=\"#{@vectors.size+1}\">" +
1768
- "Daru::DataFrame:#{self.object_id} " + " rows: #{nrows} " + " cols: #{ncols}"
1796
+ "<th colspan=\"#{@vectors.size+1}\">" +
1797
+ "Daru::DataFrame:#{self.object_id} " + " rows: #{nrows} " + " cols: #{ncols}"
1769
1798
  "</th>" +
1770
1799
  "</tr>"
1771
1800
  html +='<tr><th></th>'
@@ -1791,7 +1820,7 @@ module Daru
1791
1820
  html += '<tr>'
1792
1821
  html += "<td>" + last_index.to_s + "</td>"
1793
1822
  (0..(ncols - 1)).to_a.each do |i|
1794
- html += '<td>' + last_row[i].to_s + '</td>'
1823
+ html += '<td>' + last_row[i].to_s + '</td>'
1795
1824
  end
1796
1825
  html += '</tr>'
1797
1826
  break
@@ -1825,21 +1854,21 @@ module Daru
1825
1854
  # == Arguements
1826
1855
  #
1827
1856
  # * filename - Path of CSV file where the DataFrame is to be saved.
1828
- #
1857
+ #
1829
1858
  # == Options
1830
- #
1859
+ #
1831
1860
  # * convert_comma - If set to *true*, will convert any commas in any
1832
1861
  # of the data to full stops ('.').
1833
- # All the options accepted by CSV.read() can also be passed into this
1862
+ # All the options accepted by CSV.read() can also be passed into this
1834
1863
  # function.
1835
1864
  def write_csv filename, opts={}
1836
1865
  Daru::IO.dataframe_write_csv self, filename, opts
1837
1866
  end
1838
1867
 
1839
1868
  # Write this dataframe to an Excel Spreadsheet
1840
- #
1869
+ #
1841
1870
  # == Arguments
1842
- #
1871
+ #
1843
1872
  # * filename - The path of the file where the DataFrame should be written.
1844
1873
  def write_excel filename, opts={}
1845
1874
  Daru::IO.dataframe_write_excel self, filename, opts
@@ -1848,10 +1877,10 @@ module Daru
1848
1877
  # Insert each case of the Dataset on the selected table
1849
1878
  #
1850
1879
  # == Arguments
1851
- #
1880
+ #
1852
1881
  # * dbh - DBI database connection object.
1853
1882
  # * query - Query string.
1854
- #
1883
+ #
1855
1884
  # == Usage
1856
1885
  #
1857
1886
  # ds = Daru::DataFrame.new({:id=>Daru::Vector.new([1,2,3]), :name=>Daru::Vector.new(["a","b","c"])})
@@ -1869,8 +1898,8 @@ module Daru
1869
1898
 
1870
1899
  def _dump depth
1871
1900
  Marshal.dump({
1872
- data: @data,
1873
- index: @index.to_a,
1901
+ data: @data,
1902
+ index: @index.to_a,
1874
1903
  order: @vectors.to_a,
1875
1904
  name: @name
1876
1905
  })
@@ -1878,14 +1907,14 @@ module Daru
1878
1907
 
1879
1908
  def self._load data
1880
1909
  h = Marshal.load data
1881
- Daru::DataFrame.new(h[:data],
1882
- index: h[:index],
1910
+ Daru::DataFrame.new(h[:data],
1911
+ index: h[:index],
1883
1912
  order: h[:order],
1884
1913
  name: h[:name])
1885
1914
  end
1886
1915
 
1887
1916
  # Change dtypes of vectors by supplying a hash of :vector_name => :new_dtype
1888
- #
1917
+ #
1889
1918
  # == Usage
1890
1919
  # df = Daru::DataFrame.new({a: [1,2,3], b: [1,2,3], c: [1,2,3]})
1891
1920
  # df.recast a: :nmatrix, c: :nmatrix
@@ -1908,7 +1937,7 @@ module Daru
1908
1937
  # Pretty print in a nice table format for the command line (irb/pry/iruby)
1909
1938
  def inspect spacing=10, threshold=15
1910
1939
  longest = [@name.to_s.size,
1911
- (@vectors.map(&:to_s).map(&:size).max || 0),
1940
+ (@vectors.map(&:to_s).map(&:size).max || 0),
1912
1941
  (@index .map(&:to_s).map(&:size).max || 0),
1913
1942
  (@data .map{ |v| v.map(&:to_s).map(&:size).max}.max || 0)].max
1914
1943
 
@@ -1918,7 +1947,7 @@ module Daru
1918
1947
  formatter = "\n"
1919
1948
 
1920
1949
  (@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
1921
- content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
1950
+ content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
1922
1951
  name.to_s + " @size = " + @size.to_s + ">"
1923
1952
  content += sprintf formatter, "" , *@vectors.map(&:to_s)
1924
1953
  row_num = 1
@@ -1945,10 +1974,10 @@ module Daru
1945
1974
  end
1946
1975
 
1947
1976
  def == other
1948
- self.class == other.class and
1949
- @size == other.size and
1977
+ self.class == other.class and
1978
+ @size == other.size and
1950
1979
  @index == other.index and
1951
- @vectors == other.vectors and
1980
+ @vectors == other.vectors and
1952
1981
  @vectors.to_a.all? { |v| self[v] == other[v] }
1953
1982
  end
1954
1983
 
@@ -1977,9 +2006,9 @@ module Daru
1977
2006
  end
1978
2007
 
1979
2008
  # == Arguments
1980
- #
1981
- # vector_order -
1982
- # index -
2009
+ #
2010
+ # vector_order -
2011
+ # index -
1983
2012
  # by -
1984
2013
  # ascending -
1985
2014
  # left_lower -
@@ -2120,7 +2149,7 @@ module Daru
2120
2149
  end
2121
2150
 
2122
2151
  order = names.is_a?(Array) ? Daru::Index.new(names) : names
2123
- Daru::DataFrame.new(new_vcs, order: order,
2152
+ Daru::DataFrame.new(new_vcs, order: order,
2124
2153
  index: @index, name: @name)
2125
2154
  end
2126
2155
  end
@@ -2134,7 +2163,7 @@ module Daru
2134
2163
  return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
2135
2164
  else
2136
2165
  new_rows = pos.map { |tuple| populate_row_for(tuple) }
2137
-
2166
+
2138
2167
  if !location.is_a?(Range) and names.size < @index.width
2139
2168
  pos = pos.drop_left_level names.size
2140
2169
  end
@@ -2143,7 +2172,7 @@ module Daru
2143
2172
  new_rows, order: @vectors, name: @name, index: pos)
2144
2173
  end
2145
2174
  else
2146
- if names[1].nil?
2175
+ if names[1].nil?
2147
2176
  names = @index[location]
2148
2177
  if names.is_a?(Numeric)
2149
2178
  row = []
@@ -2159,8 +2188,8 @@ module Daru
2159
2188
  names.each do |name|
2160
2189
  rows << self.row[name].to_a
2161
2190
  end
2162
-
2163
- Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2191
+
2192
+ Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2164
2193
  end
2165
2194
  end
2166
2195
 
@@ -2171,11 +2200,11 @@ module Daru
2171
2200
  end
2172
2201
 
2173
2202
  def insert_or_modify_vector name, vector
2174
- name = name[0] unless @vectors.is_a?(MultiIndex)
2203
+ name = name[0] unless @vectors.is_a?(MultiIndex)
2175
2204
  v = nil
2176
2205
 
2177
2206
  if @index.empty?
2178
- v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2207
+ v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2179
2208
  @index = v.index
2180
2209
  assign_or_add_vector name, v
2181
2210
  set_size
@@ -2217,7 +2246,7 @@ module Daru
2217
2246
  #FIXME: fix this jugaad. need to make changes in Indexing itself.
2218
2247
  pos = @vectors[name]
2219
2248
 
2220
- if !pos.kind_of?(Daru::Index) and pos == name and
2249
+ if !pos.kind_of?(Daru::Index) and pos == name and
2221
2250
  (@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
2222
2251
  @data[pos] = v
2223
2252
  elsif pos.kind_of?(Daru::Index)
@@ -2227,10 +2256,10 @@ module Daru
2227
2256
  else
2228
2257
  @vectors = @vectors | [name] if !@vectors.include?(name)
2229
2258
  @data[@vectors[name]] = v
2230
- end
2259
+ end
2231
2260
  end
2232
2261
 
2233
- def insert_or_modify_row name, vector
2262
+ def insert_or_modify_row name, vector
2234
2263
  if index.is_a?(MultiIndex)
2235
2264
  # TODO
2236
2265
  else
@@ -2264,7 +2293,7 @@ module Daru
2264
2293
  end
2265
2294
 
2266
2295
  def validate_labels
2267
- raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
2296
+ raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
2268
2297
  @vectors and @vectors.size != @data.size
2269
2298
 
2270
2299
  raise IndexError, "Expected number of indexes same as number of rows" if
@@ -2330,7 +2359,7 @@ module Daru
2330
2359
  end
2331
2360
 
2332
2361
  def symbolize arry
2333
- symbolized_arry =
2362
+ symbolized_arry =
2334
2363
  if arry.all? { |e| e.is_a?(Array) }
2335
2364
  arry.map do |sub_arry|
2336
2365
  sub_arry.map do |e|
@@ -2344,4 +2373,4 @@ module Daru
2344
2373
  symbolized_arry
2345
2374
  end
2346
2375
  end
2347
- end
2376
+ end