daru 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,14 @@ module Daru
33
33
  hsh.each { |k,v| hsh[k] = v.to_a }
34
34
  hsh
35
35
  end
36
+
37
+ def arrayify df
38
+ arr = df.to_a
39
+ col_names = arr[0][0].keys
40
+ values = arr[0].map{|h| h.values}
41
+
42
+ return col_names, values
43
+ end
36
44
 
37
45
  def inner_join df1, df2, df_hash1, df_hash2, on
38
46
  joined_hash = {}
@@ -53,6 +61,52 @@ module Daru
53
61
  Daru::DataFrame.new(joined_hash, order: joined_hash.keys)
54
62
  end
55
63
 
64
+ def bf_inner_join df1, df2, on
65
+ col_names1, table1 = arrayify df1
66
+ col_names2, table2 = arrayify df2
67
+
68
+ #resolve duplicates
69
+ indicies1 = on.map{|i| col_names1.index(i)}
70
+ indicies2 = on.map{|i| col_names2.index(i)}
71
+ col_names2.map! do |name|
72
+ if (col_names1.include?(name))
73
+ col_names1[col_names1.index(name)] = (name.to_s + "_1").to_sym unless on.include?(name)
74
+ (name.to_s + "_2").to_sym
75
+ else
76
+ name
77
+ end
78
+ end
79
+
80
+ #combine key columns to a single column value
81
+ on_cols1 = table1.flat_map{|x| indicies1.map{|i| x[i].to_s}.join("+")}
82
+ on_cols2 = table2.flat_map{|x| indicies2.map{|i| x[i].to_s}.join("+")}
83
+
84
+ #parameters for a BF with approx 0.1% false positives
85
+ m = on_cols2.size * 15
86
+ k = 11
87
+
88
+ bf = BloomFilter::Native.new({:size => m, :hashes => k, :bucket => 1})
89
+ on_cols2.each{|x| bf.insert(x)}
90
+
91
+ x_ind = -1
92
+ joined_new = on_cols1.map do |x|
93
+ x_ind+=1
94
+ if (bf.include?(x))
95
+ {x_ind => on_cols2.each_index.select{|y_ind| on_cols2[y_ind] == x}}
96
+ else
97
+ {x_ind => []}
98
+ end
99
+ end
100
+ .reduce({}) {|h,pairs| pairs.each {|k,v| (h[k] ||= []) << v}; h}
101
+ .flat_map{|ind1, inds2| inds2.flatten.map{|ind2| [table1[ind1], table2[ind2]].flatten} if inds2.flatten.size > 0}
102
+
103
+ joined_cols = [col_names1, col_names2].flatten
104
+ df = Daru::DataFrame.rows(joined_new.compact, order: joined_cols)
105
+ on.each{|x| df.delete_vector (x.to_s + "_2").to_sym}
106
+
107
+ df
108
+ end
109
+
56
110
  def full_outer_join df1, df2, df_hash1, df_hash2, on
57
111
  left = left_outer_join df1, df2, df_hash1, df_hash2, on, true
58
112
  right = right_outer_join df1, df2, df_hash1, df_hash2, on, true
@@ -153,7 +207,11 @@ module Daru
153
207
 
154
208
  case opts[:how]
155
209
  when :inner
156
- helper.inner_join df1, df2, df_hash1, df_hash2, on
210
+ if Daru.has_bloomfilter_rb?
211
+ helper.bf_inner_join df1, df2, on
212
+ else
213
+ helper.inner_join df1, df2, df_hash1, df_hash2, on
214
+ end
157
215
  when :outer
158
216
  helper.full_outer_join df1, df2, df_hash1, df_hash2, on
159
217
  when :left
@@ -14,30 +14,30 @@ module Daru
14
14
  include Daru::Plotting::DataFrame if Daru.has_nyaplot?
15
15
 
16
16
  class << self
17
- # Load data from a CSV file. Specify an optional block to grab the CSV
18
- # object and pre-condition it (for example use the `convert` or
17
+ # Load data from a CSV file. Specify an optional block to grab the CSV
18
+ # object and pre-condition it (for example use the `convert` or
19
19
  # `header_convert` methods).
20
- #
20
+ #
21
21
  # == Arguments
22
- #
22
+ #
23
23
  # * path - Path of the file to load specified as a String.
24
- #
24
+ #
25
25
  # == Options
26
- #
26
+ #
27
27
  # Accepts the same options as the Daru::DataFrame constructor and CSV.open()
28
28
  # and uses those to eventually construct the resulting DataFrame.
29
29
  #
30
30
  # == Verbose Description
31
31
  #
32
- # You can specify all the options to the `.from_csv` function that you
32
+ # You can specify all the options to the `.from_csv` function that you
33
33
  # do to the Ruby `CSV.read()` function, since this is what is used internally.
34
34
  #
35
- # For example, if the columns in your CSV file are separated by something
36
- # other that commas, you can use the `:col_sep` option. If you want to
37
- # convert numeric values to numbers and not keep them as strings, you can
35
+ # For example, if the columns in your CSV file are separated by something
36
+ # other that commas, you can use the `:col_sep` option. If you want to
37
+ # convert numeric values to numbers and not keep them as strings, you can
38
38
  # use the `:converters` option and set it to `:numeric`.
39
39
  #
40
- # The `.from_csv` function uses the following defaults for reading CSV files
40
+ # The `.from_csv` function uses the following defaults for reading CSV files
41
41
  # (that are passed into the `CSV.read()` function):
42
42
  #
43
43
  # {
@@ -45,24 +45,29 @@ module Daru
45
45
  # :converters => :numeric
46
46
  # }
47
47
  def from_csv path, opts={}, &block
48
- Daru::IO.from_csv path, opts, &block
48
+ Daru::IO.from_csv path, opts, &block
49
49
  end
50
50
 
51
51
  # Read data from an Excel file into a DataFrame.
52
- #
52
+ #
53
53
  # == Arguments
54
- #
54
+ #
55
55
  # * path - Path of the file to be read.
56
- #
56
+ #
57
57
  # == Options
58
- #
58
+ #
59
59
  # *:worksheet_id - ID of the worksheet that is to be read.
60
- def from_excel path, opts={}, &block
60
+ def from_excel path, opts={}, &block
61
61
  Daru::IO.from_excel path, opts, &block
62
62
  end
63
63
 
64
64
  # Read a database query and returns a Dataset
65
65
  #
66
+ # @param dbh [DBI::DatabaseHandle] A DBI connection to be used to run the query
67
+ # @param query [String] The query to be executed
68
+ #
69
+ # @return A dataframe containing the data resulting from the query
70
+ #
66
71
  # USE:
67
72
  #
68
73
  # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
@@ -71,17 +76,37 @@ module Daru
71
76
  Daru::IO.from_sql dbh, query
72
77
  end
73
78
 
79
+ # Read a dataframe from AR::Relation
80
+ #
81
+ # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
82
+ # @params fields [Array] Field names to be loaded (optional)
83
+ #
84
+ # @return A dataframe containing the data loaded from the relation
85
+ #
86
+ # USE:
87
+ #
88
+ # # When Post model is defined as:
89
+ # class Post < ActiveRecord::Base
90
+ # scope :active, -> { where.not(published_at: nil) }
91
+ # end
92
+ #
93
+ # # You can load active posts into a dataframe by:
94
+ # Daru::DataFrame.from_activerecord(Post.active, :title, :published_at)
95
+ def from_activerecord relation, *fields
96
+ Daru::IO.from_activerecord relation, *fields
97
+ end
98
+
74
99
  # Read the database from a plaintext file. For this method to work,
75
100
  # the data should be present in a plain text file in columns. See
76
101
  # spec/fixtures/bank2.dat for an example.
77
- #
102
+ #
78
103
  # == Arguments
79
- #
104
+ #
80
105
  # * path - Path of the file to be read.
81
106
  # * fields - Vector names of the resulting database.
82
- #
107
+ #
83
108
  # == Usage
84
- #
109
+ #
85
110
  # df = Daru::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
86
111
  def from_plaintext path, fields
87
112
  Daru::IO.from_plaintext path, fields
@@ -137,15 +162,15 @@ module Daru
137
162
  #
138
163
  # Useful to process outputs from databases
139
164
  def crosstab_by_assignation rows, columns, values
140
- raise "Three vectors should be equal size" if
165
+ raise "Three vectors should be equal size" if
141
166
  rows.size != columns.size or rows.size!=values.size
142
167
 
143
168
  cols_values = columns.factors
144
169
  cols_n = cols_values.size
145
170
 
146
- h_rows = rows.factors.inject({}) do |a,v|
147
- a[v] = cols_values.inject({}) do |a1,v1|
148
- a1[v1]=nil
171
+ h_rows = rows.factors.inject({}) do |a,v|
172
+ a[v] = cols_values.inject({}) do |a1,v1|
173
+ a1[v1]=nil
149
174
  a1
150
175
  end
151
176
  a
@@ -186,38 +211,38 @@ module Daru
186
211
  # These objects are indexed by row and column by vectors and index Index objects.
187
212
  #
188
213
  # == Arguments
189
- #
214
+ #
190
215
  # * source - Source from the DataFrame is to be initialized. Can be a Hash
191
216
  # of names and vectors (array or Daru::Vector), an array of arrays or
192
217
  # array of Daru::Vectors.
193
- #
218
+ #
194
219
  # == Options
195
- #
196
- # +:order+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order in
220
+ #
221
+ # +:order+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order in
197
222
  # which Vectors should appear in the DataFrame.
198
- #
223
+ #
199
224
  # +:index+ - An *Array*/*Daru::Index*/*Daru::MultiIndex* containing the order
200
225
  # in which rows of the DataFrame will be named.
201
- #
226
+ #
202
227
  # +:name+ - A name for the DataFrame.
203
228
  #
204
229
  # +:clone+ - Specify as *true* or *false*. When set to false, and Vector
205
230
  # objects are passed for the source, the Vector objects will not duplicated
206
- # when creating the DataFrame. Will have no effect if Array is passed in
207
- # the source, or if the passed Daru::Vectors have different indexes.
231
+ # when creating the DataFrame. Will have no effect if Array is passed in
232
+ # the source, or if the passed Daru::Vectors have different indexes.
208
233
  # Default to *true*.
209
- #
234
+ #
210
235
  # == Usage
211
- # df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
236
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
212
237
  # index: [:a, :b, :c, :d], name: :spider_man)
213
- #
214
- # # =>
238
+ #
239
+ # # =>
215
240
  # # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
216
- # # b a
217
- # # a 6 1
218
- # # b 7 2
219
- # # c 8 3
220
- # # d 9 4
241
+ # # b a
242
+ # # a 6 1
243
+ # # b 7 2
244
+ # # c 8 3
245
+ # # d 9 4
221
246
  def initialize source, opts={}
222
247
  vectors = opts[:order]
223
248
  index = opts[:index]
@@ -292,7 +317,7 @@ module Daru
292
317
  @vectors.each do |vector|
293
318
  # avoids matching indexes of vectors if all the supplied vectors
294
319
  # have the same index.
295
- if vectors_have_same_index
320
+ if vectors_have_same_index
296
321
  v = source[vector].dup
297
322
  else
298
323
  v = Daru::Vector.new([], name: vector, index: @index)
@@ -331,8 +356,8 @@ module Daru
331
356
  end
332
357
 
333
358
  # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
334
- # Defaults to *:vector*. Use of this method is not recommended for accessing
335
- # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
359
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
360
+ # rows or vectors. Use df.row[:a] for accessing row with index ':a' or
336
361
  # df.vector[:vec] for accessing vector with index *:vec*.
337
362
  def [](*names)
338
363
  if names[-1] == :vector or names[-1] == :row
@@ -354,7 +379,7 @@ module Daru
354
379
  # Insert a new row/vector of the specified name or modify a previous row.
355
380
  # Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
356
381
  # a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
357
- #
382
+ #
358
383
  # In case a Daru::Vector is specified after the equality the sign, the indexes
359
384
  # of the vector will be matched against the row/vector indexes of the DataFrame
360
385
  # before an insertion is performed. Unmatched indexes will be set to nil.
@@ -368,7 +393,7 @@ module Daru
368
393
 
369
394
  if axis == :vector
370
395
  insert_or_modify_vector name, vector
371
- elsif axis == :row
396
+ elsif axis == :row
372
397
  insert_or_modify_row name, vector
373
398
  else
374
399
  raise IndexError, "Expected axis to be row or vector, not #{axis}."
@@ -389,7 +414,7 @@ module Daru
389
414
  end
390
415
 
391
416
  # Access a row or set/create a row. Refer #[] and #[]= docs for details.
392
- #
417
+ #
393
418
  # == Usage
394
419
  # df.row[:a] # access row named ':a'
395
420
  # df.row[:b] = [1,2,3] # set row ':b' to [1,2,3]
@@ -398,17 +423,17 @@ module Daru
398
423
  end
399
424
 
400
425
  # Duplicate the DataFrame entirely.
401
- #
426
+ #
402
427
  # == Arguments
403
- #
404
- # * +vectors_to_dup+ - An Array specifying the names of Vectors to
428
+ #
429
+ # * +vectors_to_dup+ - An Array specifying the names of Vectors to
405
430
  # be duplicated. Will duplicate the entire DataFrame if not specified.
406
431
  def dup vectors_to_dup=nil
407
432
  vectors_to_dup = @vectors.to_a unless vectors_to_dup
408
433
 
409
434
  src = []
410
435
  vectors_to_dup.each do |vec|
411
- src << @data[@vectors[vec]].to_a
436
+ src << @data[@vectors[vec]].to_a.dup
412
437
  end
413
438
  new_order = Daru::Index.new(vectors_to_dup)
414
439
 
@@ -422,9 +447,9 @@ module Daru
422
447
 
423
448
  # Returns a 'view' of the DataFrame, i.e the object ID's of vectors are
424
449
  # preserved.
425
- #
450
+ #
426
451
  # == Arguments
427
- #
452
+ #
428
453
  # +vectors_to_clone+ - Names of vectors to clone. Optional. Will return
429
454
  # a view of the whole data frame otherwise.
430
455
  def clone *vectors_to_clone
@@ -438,7 +463,7 @@ module Daru
438
463
  Daru::DataFrame.new(h, clone: false)
439
464
  end
440
465
 
441
- # Returns a 'shallow' copy of DataFrame if missing data is not present,
466
+ # Returns a 'shallow' copy of DataFrame if missing data is not present,
442
467
  # or a full copy of only valid data if missing data is present.
443
468
  def clone_only_valid
444
469
  if has_missing_data?
@@ -448,7 +473,7 @@ module Daru
448
473
  end
449
474
  end
450
475
 
451
- # Creates a new duplicate dataframe containing only rows
476
+ # Creates a new duplicate dataframe containing only rows
452
477
  # without a single missing value.
453
478
  def dup_only_valid vecs=nil
454
479
  rows_with_nil = @data.inject([]) do |memo, vector|
@@ -485,7 +510,7 @@ module Daru
485
510
 
486
511
  @vectors.each do |vector|
487
512
  yield @data[@vectors[vector]], vector
488
- end
513
+ end
489
514
 
490
515
  self
491
516
  end
@@ -518,12 +543,12 @@ module Daru
518
543
  #
519
544
  # == Description
520
545
  #
521
- # `#each` works exactly like Array#each. The default mode for `each`
522
- # is to iterate over the columns of the DataFrame. To iterate over
546
+ # `#each` works exactly like Array#each. The default mode for `each`
547
+ # is to iterate over the columns of the DataFrame. To iterate over
523
548
  # rows you must pass the axis, i.e `:row` as an argument.
524
- #
549
+ #
525
550
  # == Arguments
526
- #
551
+ #
527
552
  # * +axis+ - The axis to iterate over. Can be :vector (or :column)
528
553
  # or :row. Default to :vector.
529
554
  def each axis=:vector, &block
@@ -541,14 +566,14 @@ module Daru
541
566
  #
542
567
  # == Description
543
568
  #
544
- # The #collect iterator works similar to #map, the only difference
545
- # being that it returns a Daru::Vector comprising of the results of
546
- # each block run. The resultant Vector has the same index as that
547
- # of the axis over which collect has iterated. It also accepts the
569
+ # The #collect iterator works similar to #map, the only difference
570
+ # being that it returns a Daru::Vector comprising of the results of
571
+ # each block run. The resultant Vector has the same index as that
572
+ # of the axis over which collect has iterated. It also accepts the
548
573
  # optional axis argument.
549
574
  #
550
575
  # == Arguments
551
- #
576
+ #
552
577
  # * +axis+ - The axis to iterate over. Can be :vector (or :column)
553
578
  # or :row. Default to :vector.
554
579
  def collect axis=:vector, &block
@@ -565,16 +590,16 @@ module Daru
565
590
  # the argument specified. Will return an Array of the resulting
566
591
  # elements. To map over each row/vector and get a DataFrame,
567
592
  # see #recode.
568
- #
593
+ #
569
594
  # == Description
570
- #
571
- # The #map iterator works like Array#map. The value returned by
572
- # each run of the block is added to an Array and the Array is
573
- # returned. This method also accepts an axis argument, like #each.
595
+ #
596
+ # The #map iterator works like Array#map. The value returned by
597
+ # each run of the block is added to an Array and the Array is
598
+ # returned. This method also accepts an axis argument, like #each.
574
599
  # The default is :vector.
575
- #
600
+ #
576
601
  # == Arguments
577
- #
602
+ #
578
603
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
579
604
  # Default to :vector.
580
605
  def map axis=:vector, &block
@@ -590,9 +615,9 @@ module Daru
590
615
  # Destructive map. Modifies the DataFrame. Each run of the block
591
616
  # must return a Daru::Vector. You can specify the axis to map over
592
617
  # as the argument. Default to :vector.
593
- #
618
+ #
594
619
  # == Arguments
595
- #
620
+ #
596
621
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
597
622
  # Default to :vector.
598
623
  def map! axis=:vector, &block
@@ -609,15 +634,15 @@ module Daru
609
634
  #
610
635
  # == Description
611
636
  #
612
- # Recode works similarly to #map, but an important difference between
613
- # the two is that recode returns a modified Daru::DataFrame instead
614
- # of an Array. For this reason, #recode expects that every run of the
637
+ # Recode works similarly to #map, but an important difference between
638
+ # the two is that recode returns a modified Daru::DataFrame instead
639
+ # of an Array. For this reason, #recode expects that every run of the
615
640
  # block to return a Daru::Vector.
616
641
  #
617
642
  # Just like map and each, recode also accepts an optional _axis_ argument.
618
- #
643
+ #
619
644
  # == Arguments
620
- #
645
+ #
621
646
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
622
647
  # Default to :vector.
623
648
  def recode axis=:vector, &block
@@ -629,22 +654,22 @@ module Daru
629
654
  end
630
655
 
631
656
  # Retain vectors or rows if the block returns a truthy value.
632
- #
657
+ #
633
658
  # == Description
634
- #
635
- # For filtering out certain rows/vectors based on their values,
636
- # use the #filter method. By default it iterates over vectors and
637
- # keeps those vectors for which the block returns true. It accepts
638
- # an optional axis argument which lets you specify whether you want
659
+ #
660
+ # For filtering out certain rows/vectors based on their values,
661
+ # use the #filter method. By default it iterates over vectors and
662
+ # keeps those vectors for which the block returns true. It accepts
663
+ # an optional axis argument which lets you specify whether you want
639
664
  # to iterate over vectors or rows.
640
- #
665
+ #
641
666
  # == Arguments
642
- #
667
+ #
643
668
  # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
644
669
  # Default to :vector.
645
- #
670
+ #
646
671
  # == Usage
647
- #
672
+ #
648
673
  # # Filter vectors
649
674
  #
650
675
  # df.filter do |vector|
@@ -665,12 +690,12 @@ module Daru
665
690
  end
666
691
 
667
692
  def recode_vectors &block
668
- block_given? or return to_enum(:recode_vectors)
693
+ block_given? or return to_enum(:recode_vectors)
669
694
 
670
695
  df = self.dup
671
696
  df.each_vector_with_index do |v, i|
672
697
  ret = yield v
673
- ret.is_a?(Daru::Vector) or
698
+ ret.is_a?(Daru::Vector) or
674
699
  raise TypeError, "Every iteration must return Daru::Vector not #{ret.class}"
675
700
  df[*i] = ret
676
701
  end
@@ -763,7 +788,7 @@ module Daru
763
788
  self
764
789
  end
765
790
 
766
- # Retrieves a Daru::Vector, based on the result of calculation
791
+ # Retrieves a Daru::Vector, based on the result of calculation
767
792
  # performed on each row.
768
793
  def collect_rows &block
769
794
  return to_enum(:collect_rows) unless block_given?
@@ -878,15 +903,15 @@ module Daru
878
903
 
879
904
  deletion << index unless keep_row
880
905
  end
881
- deletion.each { |idx|
882
- delete_row idx
906
+ deletion.each { |idx|
907
+ delete_row idx
883
908
  }
884
909
  end
885
910
 
886
911
  def keep_vector_if &block
887
912
  @vectors.each do |vector|
888
913
  keep_vector = yield @data[@vectors[vector]], vector
889
-
914
+
890
915
  delete_vector vector unless keep_vector
891
916
  end
892
917
  end
@@ -925,7 +950,7 @@ module Daru
925
950
  # true for that vector.
926
951
  def filter_vectors &block
927
952
  return to_enum(:filter_vectors) unless block_given?
928
-
953
+
929
954
  df = self.dup
930
955
  df.keep_vector_if &block
931
956
 
@@ -934,7 +959,7 @@ module Daru
934
959
 
935
960
  # Test each row with one or more tests. Each test is a Proc with the form
936
961
  # *Proc.new {|row| row[:age] > 0}*
937
- #
962
+ #
938
963
  # The function returns an array with all errors.
939
964
  def verify(*tests)
940
965
  if(tests[0].is_a? Symbol)
@@ -963,9 +988,9 @@ module Daru
963
988
 
964
989
  # DSL for yielding each row and returning a Daru::Vector based on the
965
990
  # value each run of the block returns.
966
- #
991
+ #
967
992
  # == Usage
968
- #
993
+ #
969
994
  # a1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7])
970
995
  # a2 = Daru::Vector.new([10, 20, 30, 40, 50, 60, 70])
971
996
  # a3 = Daru::Vector.new([100, 200, 300, 400, 500, 600, 700])
@@ -991,10 +1016,10 @@ module Daru
991
1016
 
992
1017
  # Returns a vector, based on a string with a calculation based
993
1018
  # on vector.
994
- #
1019
+ #
995
1020
  # The calculation will be eval'ed, so you can put any variable
996
1021
  # or expression valid on ruby.
997
- #
1022
+ #
998
1023
  # For example:
999
1024
  # a = Daru::Vector.new [1,2]
1000
1025
  # b = Daru::Vector.new [3,4]
@@ -1003,14 +1028,14 @@ module Daru
1003
1028
  # => Vector [4,6]
1004
1029
  def compute text, &block
1005
1030
  return instance_eval(&block) if block_given?
1006
- instance_eval(text)
1031
+ instance_eval(text)
1007
1032
  end
1008
1033
 
1009
1034
  # Return a vector with the number of missing values in each row.
1010
- #
1035
+ #
1011
1036
  # == Arguments
1012
- #
1013
- # * +missing_values+ - An Array of the values that should be
1037
+ #
1038
+ # * +missing_values+ - An Array of the values that should be
1014
1039
  # treated as 'missing'. The default missing value is *nil*.
1015
1040
  def missing_values_rows missing_values=[nil]
1016
1041
  number_of_missing = []
@@ -1031,9 +1056,9 @@ module Daru
1031
1056
 
1032
1057
  alias :flawed? :has_missing_data?
1033
1058
 
1034
- # Return a nested hash using vector names as keys and an array constructed of
1059
+ # Return a nested hash using vector names as keys and an array constructed of
1035
1060
  # hashes with other values. If block provided, is used to provide the
1036
- # values, with parameters +row+ of dataset, +current+ last hash on
1061
+ # values, with parameters +row+ of dataset, +current+ last hash on
1037
1062
  # hierarchy and +name+ of the key to include
1038
1063
  def nest *tree_keys, &block
1039
1064
  tree_keys = tree_keys[0] if tree_keys[0].is_a? Array
@@ -1101,7 +1126,7 @@ module Daru
1101
1126
  # @example Using any?
1102
1127
  # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1103
1128
  # df.any?(:row) do |row|
1104
- # row[:a] < 3 and row[:b] == 'b'
1129
+ # row[:a] < 3 and row[:b] == 'b'
1105
1130
  # end #=> true
1106
1131
  def any? axis=:vector, &block
1107
1132
  if axis == :vector or axis == :column
@@ -1123,7 +1148,7 @@ module Daru
1123
1148
  # @example Using all?
1124
1149
  # df = Daru::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
1125
1150
  # df.all?(:row) do |row|
1126
- # row[:a] < 10
1151
+ # row[:a] < 10
1127
1152
  # end #=> true
1128
1153
  def all? axis=:vector, &block
1129
1154
  if axis == :vector or axis == :column
@@ -1145,14 +1170,18 @@ module Daru
1145
1170
  self[0..(quantity-1), :row]
1146
1171
  end
1147
1172
 
1173
+ alias :first :head
1174
+
1148
1175
  # The last ten elements of the DataFrame
1149
- #
1176
+ #
1150
1177
  # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
1151
1178
  def tail quantity=10
1152
1179
  self[(@size - quantity)..(@size-1), :row]
1153
1180
  end
1154
1181
 
1155
- # Returns a vector with sum of all vectors specified in the argument.
1182
+ alias :last :tail
1183
+
1184
+ # Returns a vector with sum of all vectors specified in the argument.
1156
1185
  # Tf vecs parameter is empty, sum all numeric vector.
1157
1186
  def vector_sum vecs=nil
1158
1187
  vecs ||= numeric_vectors
@@ -1166,9 +1195,9 @@ module Daru
1166
1195
  end
1167
1196
 
1168
1197
  # Calculate mean of the rows of the dataframe.
1169
- #
1198
+ #
1170
1199
  # == Arguments
1171
- #
1200
+ #
1172
1201
  # * +max_missing+ - The maximum number of elements in the row that can be
1173
1202
  # zero for the mean calculation to happen. Default to 0.
1174
1203
  def vector_mean max_missing=0
@@ -1181,16 +1210,16 @@ module Daru
1181
1210
  mean_vec
1182
1211
  end
1183
1212
 
1184
- # Group elements by vector to perform operations on them. Returns a
1213
+ # Group elements by vector to perform operations on them. Returns a
1185
1214
  # Daru::Core::GroupBy object.See the Daru::Core::GroupBy docs for a detailed
1186
1215
  # list of possible operations.
1187
- #
1216
+ #
1188
1217
  # == Arguments
1189
- #
1218
+ #
1190
1219
  # * vectors - An Array contatining names of vectors to group by.
1191
- #
1220
+ #
1192
1221
  # == Usage
1193
- #
1222
+ #
1194
1223
  # df = Daru::DataFrame.new({
1195
1224
  # a: %w{foo bar foo bar foo bar foo foo},
1196
1225
  # b: %w{one one two three two two one three},
@@ -1209,7 +1238,7 @@ module Daru
1209
1238
  vectors.flatten!
1210
1239
  vectors.each { |v| raise(ArgumentError, "Vector #{v} does not exist") unless
1211
1240
  has_vector?(v) }
1212
-
1241
+
1213
1242
  Daru::Core::GroupBy.new(self, vectors)
1214
1243
  end
1215
1244
 
@@ -1234,7 +1263,7 @@ module Daru
1234
1263
  def concat other_df
1235
1264
  vectors = []
1236
1265
  @vectors.each do |v|
1237
- vectors << self[v].to_a.concat(other_df[v].to_a)
1266
+ vectors << self[v].to_a.dup.concat(other_df[v].to_a)
1238
1267
  end
1239
1268
 
1240
1269
  Daru::DataFrame.new(vectors, order: @vectors)
@@ -1242,9 +1271,9 @@ module Daru
1242
1271
 
1243
1272
  # Set a particular column as the new DF
1244
1273
  def set_index new_index, opts={}
1245
- raise ArgumentError, "All elements in new index must be unique." if
1274
+ raise ArgumentError, "All elements in new index must be unique." if
1246
1275
  @size != self[new_index].uniq.size
1247
-
1276
+
1248
1277
  self.index = Daru::Index.new(self[new_index].to_a)
1249
1278
  self.delete_vector(new_index) unless opts[:keep]
1250
1279
 
@@ -1253,25 +1282,25 @@ module Daru
1253
1282
 
1254
1283
  # Change the index of the DataFrame and preserve the labels of the previous
1255
1284
  # indexing. New index can be Daru::Index or any of its subclasses.
1256
- #
1285
+ #
1257
1286
  # @param [Daru::Index] new_index The new Index for reindexing the DataFrame.
1258
1287
  # @example Reindexing DataFrame
1259
- # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1288
+ # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
1260
1289
  # index: ['a','b','c','d'])
1261
- # #=>
1290
+ # #=>
1262
1291
  # ##<Daru::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1263
- # # a b
1264
- # # a 1 11
1265
- # # b 2 22
1266
- # # c 3 33
1267
- # # d 4 44
1292
+ # # a b
1293
+ # # a 1 11
1294
+ # # b 2 22
1295
+ # # c 3 33
1296
+ # # d 4 44
1268
1297
  # df.reindex Daru::Index.new(['b', 0, 'a', 'g'])
1269
- # #=>
1298
+ # #=>
1270
1299
  # ##<Daru::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
1271
- # # a b
1272
- # # b 2 22
1273
- # # 0 nil nil
1274
- # # a 1 11
1300
+ # # a b
1301
+ # # b 2 22
1302
+ # # 0 nil nil
1303
+ # # a 1 11
1275
1304
  # # g nil nil
1276
1305
  def reindex new_index
1277
1306
  raise ArgumentError, "Must pass the new index of type Index or its "\
@@ -1296,10 +1325,10 @@ module Daru
1296
1325
  # @example Reassgining index of a DataFrame
1297
1326
  # df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
1298
1327
  # df.index.to_a #=> [0,1,2,3]
1299
- #
1328
+ #
1300
1329
  # df.index = Daru::Index.new(['a','b','c','d'])
1301
1330
  # df.index.to_a #=> ['a','b','c','d']
1302
- # df.row['a'].to_a #=> [1,11]
1331
+ # df.row['a'].to_a #=> [1,11]
1303
1332
  def index= idx
1304
1333
  @data.each { |vec| vec.index = idx}
1305
1334
  @index = idx
@@ -1308,17 +1337,17 @@ module Daru
1308
1337
  end
1309
1338
 
1310
1339
  # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
1311
- #
1340
+ #
1312
1341
  # @param [Daru::Index] idx The new index object on which the vectors are to
1313
1342
  # be indexed. Must of the same size as ncols.
1314
1343
  # @example Reassigning vectors of a DataFrame
1315
1344
  # df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
1316
1345
  # df.vectors.to_a #=> [:a, :b, :c]
1317
- #
1346
+ #
1318
1347
  # df.vectors = Daru::Index.new([:foo, :bar, :baz])
1319
1348
  # df.vectors.to_a #=> [:foo, :bar, :baz]
1320
1349
  def vectors= idx
1321
- raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1350
+ raise ArgumentError, "Can only reindex with Index and its subclasses" unless
1322
1351
  index.kind_of?(Daru::Index)
1323
1352
  raise ArgumentError, "Specified index length #{idx.size} not equal to"\
1324
1353
  "dataframe size #{ncols}" if idx.size != ncols
@@ -1377,9 +1406,9 @@ module Daru
1377
1406
  end
1378
1407
  end
1379
1408
 
1380
- # Sorts a dataframe (ascending/descending)according to the given sequence of
1409
+ # Sorts a dataframe (ascending/descending)according to the given sequence of
1381
1410
  # vectors, using the attributes provided in the blocks.
1382
- #
1411
+ #
1383
1412
  # @param order [Array] The order of vector names in which the DataFrame
1384
1413
  # should be sorted.
1385
1414
  # @param [Hash] opts The options to sort with.
@@ -1387,21 +1416,21 @@ module Daru
1387
1416
  # or descending order. Specify Array corresponding to *order* for multiple
1388
1417
  # sort orders.
1389
1418
  # @option opts [Hash] :by ({|a,b| a <=> b}) Specify attributes of objects to
1390
- # to be used for sorting, for each vector name in *order* as a hash of
1419
+ # to be used for sorting, for each vector name in *order* as a hash of
1391
1420
  # vector name and lambda pairs. In case a lambda for a vector is not
1392
1421
  # specified, the default will be used.
1393
- #
1422
+ #
1394
1423
  # == Usage
1395
- #
1424
+ #
1396
1425
  # df = Daru::DataFrame.new({a: [-3,2,-1,4], b: [4,3,2,1]})
1397
- #
1426
+ #
1398
1427
  # #<Daru::DataFrame:140630680 @name = 04e00197-f8d5-4161-bca2-93266bfabc6f @size = 4>
1399
- # # a b
1400
- # # 0 -3 4
1401
- # # 1 2 3
1402
- # # 2 -1 2
1403
- # # 3 4 1
1404
- # df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
1428
+ # # a b
1429
+ # # 0 -3 4
1430
+ # # 1 2 3
1431
+ # # 2 -1 2
1432
+ # # 3 4 1
1433
+ # df.sort([:a], by: { a: lambda { |a,b| a.abs <=> b.abs } })
1405
1434
  def sort! vector_order, opts={}
1406
1435
  raise ArgumentError, "Required atleast one vector name" if vector_order.size < 1
1407
1436
  opts = {
@@ -1426,46 +1455,46 @@ module Daru
1426
1455
 
1427
1456
  # Pivots a data frame on specified vectors and applies an aggregate function
1428
1457
  # to quickly generate a summary.
1429
- #
1458
+ #
1430
1459
  # == Options
1431
- #
1460
+ #
1432
1461
  # +:index+ - Keys to group by on the pivot table row index. Pass vector names
1433
1462
  # contained in an Array.
1434
- #
1463
+ #
1435
1464
  # +:vectors+ - Keys to group by on the pivot table column index. Pass vector
1436
1465
  # names contained in an Array.
1437
- #
1466
+ #
1438
1467
  # +:agg+ - Function to aggregate the grouped values. Default to *:mean*. Can
1439
- # use any of the statistics functions applicable on Vectors that can be found in
1468
+ # use any of the statistics functions applicable on Vectors that can be found in
1440
1469
  # the Daru::Statistics::Vector module.
1441
- #
1442
- # +:values+ - Columns to aggregate. Will consider all numeric columns not
1470
+ #
1471
+ # +:values+ - Columns to aggregate. Will consider all numeric columns not
1443
1472
  # specified in *:index* or *:vectors*. Optional.
1444
- #
1473
+ #
1445
1474
  # == Usage
1446
- #
1475
+ #
1447
1476
  # df = Daru::DataFrame.new({
1448
- # a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
1477
+ # a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
1449
1478
  # b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
1450
1479
  # c: ['small','large','large','small','small','large','small','large','small'],
1451
1480
  # d: [1,2,2,3,3,4,5,6,7],
1452
1481
  # e: [2,4,4,6,6,8,10,12,14]
1453
1482
  # })
1454
1483
  # df.pivot_table(index: [:a], vectors: [:b], agg: :sum, values: :e)
1455
- #
1456
- # #=>
1484
+ #
1485
+ # #=>
1457
1486
  # # #<Daru::DataFrame:88342020 @name = 08cdaf4e-b154-4186-9084-e76dd191b2c9 @size = 2>
1458
- # # [:e, :one] [:e, :two]
1459
- # # [:bar] 18 26
1460
- # # [:foo] 10 12
1487
+ # # [:e, :one] [:e, :two]
1488
+ # # [:bar] 18 26
1489
+ # # [:foo] 10 12
1461
1490
  def pivot_table opts={}
1462
- raise ArgumentError,
1491
+ raise ArgumentError,
1463
1492
  "Specify grouping index" if !opts[:index] or opts[:index].empty?
1464
1493
 
1465
1494
  index = opts[:index]
1466
1495
  vectors = opts[:vectors] || []
1467
1496
  aggregate_function = opts[:agg] || :mean
1468
- values =
1497
+ values =
1469
1498
  if opts[:values].is_a?(Symbol)
1470
1499
  [opts[:values]]
1471
1500
  elsif opts[:values].is_a?(Array)
@@ -1473,7 +1502,7 @@ module Daru
1473
1502
  else # nil
1474
1503
  (@vectors.to_a - (index | vectors)) & numeric_vector_names
1475
1504
  end
1476
-
1505
+
1477
1506
  raise IndexError, "No numeric vectors to aggregate" if values.empty?
1478
1507
 
1479
1508
  grouped = group_by(index)
@@ -1524,7 +1553,7 @@ module Daru
1524
1553
  end
1525
1554
  end
1526
1555
 
1527
- # Merge vectors from two DataFrames. In case of name collision,
1556
+ # Merge vectors from two DataFrames. In case of name collision,
1528
1557
  # the vectors names are changed to x_1, x_2 ....
1529
1558
  #
1530
1559
  # @return {Daru::DataFrame}
@@ -1545,9 +1574,9 @@ module Daru
1545
1574
  df_new
1546
1575
  end
1547
1576
 
1548
- # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1577
+ # Join 2 DataFrames with SQL style joins. Currently supports inner, left
1549
1578
  # outer, right outer and full outer joins.
1550
- #
1579
+ #
1551
1580
  # @param [Daru::DataFrame] other_df Another DataFrame on which the join is
1552
1581
  # to be performed.
1553
1582
  # @param [Hash] opts Options Hash
@@ -1565,11 +1594,11 @@ module Daru
1565
1594
  # :name => ['Rutabaga', 'Pirate', 'Darth Vader', 'Ninja']
1566
1595
  # })
1567
1596
  # left.join(right, how: :inner, on: [:name])
1568
- # #=>
1597
+ # #=>
1569
1598
  # ##<Daru::DataFrame:82416700 @name = 74c0811b-76c6-4c42-ac93-e6458e82afb0 @size = 2>
1570
- # # id_1 name id_2
1571
- # # 0 1 Pirate 2
1572
- # # 1 3 Ninja 4
1599
+ # # id_1 name id_2
1600
+ # # 0 1 Pirate 2
1601
+ # # 1 3 Ninja 4
1573
1602
  def join(other_df,opts={})
1574
1603
  Daru::Core::Merge.join(self, other_df, opts)
1575
1604
  end
@@ -1586,7 +1615,7 @@ module Daru
1586
1615
  # the field of first parameters will be copied verbatim
1587
1616
  # to new dataset, and fields which responds to second
1588
1617
  # pattern will be added one case for each different %n.
1589
- #
1618
+ #
1590
1619
  # @example
1591
1620
  # cases=[
1592
1621
  # ['1','george','red',10,'blue',20,nil,nil],
@@ -1607,9 +1636,9 @@ module Daru
1607
1636
  ds_vars = parent_fields.dup
1608
1637
  vars = []
1609
1638
  max_n = 0
1610
- h = parent_fields.inject({}) { |a,v|
1639
+ h = parent_fields.inject({}) { |a,v|
1611
1640
  a[v] = Daru::Vector.new([])
1612
- a
1641
+ a
1613
1642
  }
1614
1643
  # Adding _row_id
1615
1644
  h['_col_id'] = Daru::Vector.new([])
@@ -1663,12 +1692,12 @@ module Daru
1663
1692
  end
1664
1693
 
1665
1694
  # Create a sql, basen on a given Dataset
1666
- #
1695
+ #
1667
1696
  # == Arguments
1668
- #
1697
+ #
1669
1698
  # * table - String specifying name of the table that will created in SQL.
1670
1699
  # * charset - Character set. Default is "UTF8".
1671
- #
1700
+ #
1672
1701
  # @example
1673
1702
  #
1674
1703
  # ds = Daru::DataFrame.new({
@@ -1717,17 +1746,17 @@ module Daru
1717
1746
  def to_nmatrix
1718
1747
  numerics_as_arrays = []
1719
1748
  each_vector do |vector|
1720
- numerics_as_arrays << vector.to_a if(vector.type == :numeric and
1749
+ numerics_as_arrays << vector.to_a if(vector.type == :numeric and
1721
1750
  vector.missing_positions.size == 0)
1722
1751
  end
1723
1752
 
1724
1753
  numerics_as_arrays.transpose.to_nm
1725
1754
  end
1726
-
1755
+
1727
1756
  # Converts the DataFrame into an array of hashes where key is vector name
1728
- # and value is the corresponding element. The 0th index of the array contains
1729
- # the array of hashes while the 1th index contains the indexes of each row
1730
- # of the dataframe. Each element in the index array corresponds to its row
1757
+ # and value is the corresponding element. The 0th index of the array contains
1758
+ # the array of hashes while the 1th index contains the indexes of each row
1759
+ # of the dataframe. Each element in the index array corresponds to its row
1731
1760
  # in the array of hashes, which has the same index.
1732
1761
  def to_a
1733
1762
  arry = [[],[]]
@@ -1762,10 +1791,10 @@ module Daru
1762
1791
 
1763
1792
  # Convert to html for IRuby.
1764
1793
  def to_html threshold=30
1765
- html = "<table>" +
1794
+ html = "<table>" +
1766
1795
  "<tr>" +
1767
- "<th colspan=\"#{@vectors.size+1}\">" +
1768
- "Daru::DataFrame:#{self.object_id} " + " rows: #{nrows} " + " cols: #{ncols}"
1796
+ "<th colspan=\"#{@vectors.size+1}\">" +
1797
+ "Daru::DataFrame:#{self.object_id} " + " rows: #{nrows} " + " cols: #{ncols}"
1769
1798
  "</th>" +
1770
1799
  "</tr>"
1771
1800
  html +='<tr><th></th>'
@@ -1791,7 +1820,7 @@ module Daru
1791
1820
  html += '<tr>'
1792
1821
  html += "<td>" + last_index.to_s + "</td>"
1793
1822
  (0..(ncols - 1)).to_a.each do |i|
1794
- html += '<td>' + last_row[i].to_s + '</td>'
1823
+ html += '<td>' + last_row[i].to_s + '</td>'
1795
1824
  end
1796
1825
  html += '</tr>'
1797
1826
  break
@@ -1825,21 +1854,21 @@ module Daru
1825
1854
  # == Arguements
1826
1855
  #
1827
1856
  # * filename - Path of CSV file where the DataFrame is to be saved.
1828
- #
1857
+ #
1829
1858
  # == Options
1830
- #
1859
+ #
1831
1860
  # * convert_comma - If set to *true*, will convert any commas in any
1832
1861
  # of the data to full stops ('.').
1833
- # All the options accepted by CSV.read() can also be passed into this
1862
+ # All the options accepted by CSV.read() can also be passed into this
1834
1863
  # function.
1835
1864
  def write_csv filename, opts={}
1836
1865
  Daru::IO.dataframe_write_csv self, filename, opts
1837
1866
  end
1838
1867
 
1839
1868
  # Write this dataframe to an Excel Spreadsheet
1840
- #
1869
+ #
1841
1870
  # == Arguments
1842
- #
1871
+ #
1843
1872
  # * filename - The path of the file where the DataFrame should be written.
1844
1873
  def write_excel filename, opts={}
1845
1874
  Daru::IO.dataframe_write_excel self, filename, opts
@@ -1848,10 +1877,10 @@ module Daru
1848
1877
  # Insert each case of the Dataset on the selected table
1849
1878
  #
1850
1879
  # == Arguments
1851
- #
1880
+ #
1852
1881
  # * dbh - DBI database connection object.
1853
1882
  # * query - Query string.
1854
- #
1883
+ #
1855
1884
  # == Usage
1856
1885
  #
1857
1886
  # ds = Daru::DataFrame.new({:id=>Daru::Vector.new([1,2,3]), :name=>Daru::Vector.new(["a","b","c"])})
@@ -1869,8 +1898,8 @@ module Daru
1869
1898
 
1870
1899
  def _dump depth
1871
1900
  Marshal.dump({
1872
- data: @data,
1873
- index: @index.to_a,
1901
+ data: @data,
1902
+ index: @index.to_a,
1874
1903
  order: @vectors.to_a,
1875
1904
  name: @name
1876
1905
  })
@@ -1878,14 +1907,14 @@ module Daru
1878
1907
 
1879
1908
  def self._load data
1880
1909
  h = Marshal.load data
1881
- Daru::DataFrame.new(h[:data],
1882
- index: h[:index],
1910
+ Daru::DataFrame.new(h[:data],
1911
+ index: h[:index],
1883
1912
  order: h[:order],
1884
1913
  name: h[:name])
1885
1914
  end
1886
1915
 
1887
1916
  # Change dtypes of vectors by supplying a hash of :vector_name => :new_dtype
1888
- #
1917
+ #
1889
1918
  # == Usage
1890
1919
  # df = Daru::DataFrame.new({a: [1,2,3], b: [1,2,3], c: [1,2,3]})
1891
1920
  # df.recast a: :nmatrix, c: :nmatrix
@@ -1908,7 +1937,7 @@ module Daru
1908
1937
  # Pretty print in a nice table format for the command line (irb/pry/iruby)
1909
1938
  def inspect spacing=10, threshold=15
1910
1939
  longest = [@name.to_s.size,
1911
- (@vectors.map(&:to_s).map(&:size).max || 0),
1940
+ (@vectors.map(&:to_s).map(&:size).max || 0),
1912
1941
  (@index .map(&:to_s).map(&:size).max || 0),
1913
1942
  (@data .map{ |v| v.map(&:to_s).map(&:size).max}.max || 0)].max
1914
1943
 
@@ -1918,7 +1947,7 @@ module Daru
1918
1947
  formatter = "\n"
1919
1948
 
1920
1949
  (@vectors.size + 1).times { formatter += "%#{longest}.#{longest}s " }
1921
- content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
1950
+ content += "\n#<" + self.class.to_s + ":" + self.object_id.to_s + " @name = " +
1922
1951
  name.to_s + " @size = " + @size.to_s + ">"
1923
1952
  content += sprintf formatter, "" , *@vectors.map(&:to_s)
1924
1953
  row_num = 1
@@ -1945,10 +1974,10 @@ module Daru
1945
1974
  end
1946
1975
 
1947
1976
  def == other
1948
- self.class == other.class and
1949
- @size == other.size and
1977
+ self.class == other.class and
1978
+ @size == other.size and
1950
1979
  @index == other.index and
1951
- @vectors == other.vectors and
1980
+ @vectors == other.vectors and
1952
1981
  @vectors.to_a.all? { |v| self[v] == other[v] }
1953
1982
  end
1954
1983
 
@@ -1977,9 +2006,9 @@ module Daru
1977
2006
  end
1978
2007
 
1979
2008
  # == Arguments
1980
- #
1981
- # vector_order -
1982
- # index -
2009
+ #
2010
+ # vector_order -
2011
+ # index -
1983
2012
  # by -
1984
2013
  # ascending -
1985
2014
  # left_lower -
@@ -2120,7 +2149,7 @@ module Daru
2120
2149
  end
2121
2150
 
2122
2151
  order = names.is_a?(Array) ? Daru::Index.new(names) : names
2123
- Daru::DataFrame.new(new_vcs, order: order,
2152
+ Daru::DataFrame.new(new_vcs, order: order,
2124
2153
  index: @index, name: @name)
2125
2154
  end
2126
2155
  end
@@ -2134,7 +2163,7 @@ module Daru
2134
2163
  return Daru::Vector.new(populate_row_for(pos), index: @vectors, name: pos)
2135
2164
  else
2136
2165
  new_rows = pos.map { |tuple| populate_row_for(tuple) }
2137
-
2166
+
2138
2167
  if !location.is_a?(Range) and names.size < @index.width
2139
2168
  pos = pos.drop_left_level names.size
2140
2169
  end
@@ -2143,7 +2172,7 @@ module Daru
2143
2172
  new_rows, order: @vectors, name: @name, index: pos)
2144
2173
  end
2145
2174
  else
2146
- if names[1].nil?
2175
+ if names[1].nil?
2147
2176
  names = @index[location]
2148
2177
  if names.is_a?(Numeric)
2149
2178
  row = []
@@ -2159,8 +2188,8 @@ module Daru
2159
2188
  names.each do |name|
2160
2189
  rows << self.row[name].to_a
2161
2190
  end
2162
-
2163
- Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2191
+
2192
+ Daru::DataFrame.rows rows, index: names ,name: @name, order: @vectors
2164
2193
  end
2165
2194
  end
2166
2195
 
@@ -2171,11 +2200,11 @@ module Daru
2171
2200
  end
2172
2201
 
2173
2202
  def insert_or_modify_vector name, vector
2174
- name = name[0] unless @vectors.is_a?(MultiIndex)
2203
+ name = name[0] unless @vectors.is_a?(MultiIndex)
2175
2204
  v = nil
2176
2205
 
2177
2206
  if @index.empty?
2178
- v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2207
+ v = vector.is_a?(Daru::Vector) ? vector : Daru::Vector.new(vector.to_a)
2179
2208
  @index = v.index
2180
2209
  assign_or_add_vector name, v
2181
2210
  set_size
@@ -2217,7 +2246,7 @@ module Daru
2217
2246
  #FIXME: fix this jugaad. need to make changes in Indexing itself.
2218
2247
  pos = @vectors[name]
2219
2248
 
2220
- if !pos.kind_of?(Daru::Index) and pos == name and
2249
+ if !pos.kind_of?(Daru::Index) and pos == name and
2221
2250
  (@vectors.include?(name) or (pos.is_a?(Integer) and pos < @data.size))
2222
2251
  @data[pos] = v
2223
2252
  elsif pos.kind_of?(Daru::Index)
@@ -2227,10 +2256,10 @@ module Daru
2227
2256
  else
2228
2257
  @vectors = @vectors | [name] if !@vectors.include?(name)
2229
2258
  @data[@vectors[name]] = v
2230
- end
2259
+ end
2231
2260
  end
2232
2261
 
2233
- def insert_or_modify_row name, vector
2262
+ def insert_or_modify_row name, vector
2234
2263
  if index.is_a?(MultiIndex)
2235
2264
  # TODO
2236
2265
  else
@@ -2264,7 +2293,7 @@ module Daru
2264
2293
  end
2265
2294
 
2266
2295
  def validate_labels
2267
- raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
2296
+ raise IndexError, "Expected equal number of vector names (#{@vectors.size}) for number of vectors (#{@data.size})." if
2268
2297
  @vectors and @vectors.size != @data.size
2269
2298
 
2270
2299
  raise IndexError, "Expected number of indexes same as number of rows" if
@@ -2330,7 +2359,7 @@ module Daru
2330
2359
  end
2331
2360
 
2332
2361
  def symbolize arry
2333
- symbolized_arry =
2362
+ symbolized_arry =
2334
2363
  if arry.all? { |e| e.is_a?(Array) }
2335
2364
  arry.map do |sub_arry|
2336
2365
  sub_arry.map do |e|
@@ -2344,4 +2373,4 @@ module Daru
2344
2373
  symbolized_arry
2345
2374
  end
2346
2375
  end
2347
- end
2376
+ end