daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,75 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Missable
4
+ extend Gem::Deprecate
5
+
6
+ # Rolling fillna
7
+ # replace all Float::NAN and NIL values with the preceeding or following value
8
+ #
9
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
10
+ #
11
+ # @example
12
+ # df = DaruLite::DataFrame.new({
13
+ # a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
14
+ # b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
15
+ # c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
16
+ # })
17
+ #
18
+ # => #<DaruLite::DataFrame(8x3)>
19
+ # a b c
20
+ # 0 1 a a
21
+ # 1 2 b NaN
22
+ # 2 3 nil 3
23
+ # 3 nil NaN 4
24
+ # 4 NaN nil 3
25
+ # 5 nil 3 5
26
+ # 6 1 5 nil
27
+ # 7 7 nil 7
28
+ #
29
+ # 2.3.3 :068 > df.rolling_fillna(:forward)
30
+ # => #<DaruLite::DataFrame(8x3)>
31
+ # a b c
32
+ # 0 1 a a
33
+ # 1 2 b a
34
+ # 2 3 b 3
35
+ # 3 3 b 4
36
+ # 4 3 b 3
37
+ # 5 3 3 5
38
+ # 6 1 5 5
39
+ # 7 7 5 7
40
+ #
41
+ def rolling_fillna!(direction = :forward)
42
+ @data.each { |vec| vec.rolling_fillna!(direction) }
43
+ self
44
+ end
45
+
46
+ def rolling_fillna(direction = :forward)
47
+ dup.rolling_fillna!(direction)
48
+ end
49
+
50
+ # Return a vector with the number of missing values in each row.
51
+ #
52
+ # == Arguments
53
+ #
54
+ # * +missing_values+ - An Array of the values that should be
55
+ # treated as 'missing'. The default missing value is *nil*.
56
+ def missing_values_rows(missing_values = [nil])
57
+ number_of_missing = each_row.map do |row|
58
+ row.indexes(*missing_values).size
59
+ end
60
+
61
+ DaruLite::Vector.new number_of_missing, index: @index, name: "#{@name}_missing_rows"
62
+ end
63
+
64
+ # TODO: remove next version
65
+ alias vector_missing_values missing_values_rows
66
+
67
+ def has_missing_data?
68
+ @data.any? { |vec| vec.include_values?(*DaruLite::MISSING_VALUES) }
69
+ end
70
+ alias flawed? has_missing_data?
71
+ deprecate :has_missing_data?, :include_values?, 2016, 10
72
+ deprecate :flawed?, :include_values?, 2016, 10
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,108 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Pivotable
4
+ # Pivots a data frame on specified vectors and applies an aggregate function
5
+ # to quickly generate a summary.
6
+ #
7
+ # == Options
8
+ #
9
+ # +:index+ - Keys to group by on the pivot table row index. Pass vector names
10
+ # contained in an Array.
11
+ #
12
+ # +:vectors+ - Keys to group by on the pivot table column index. Pass vector
13
+ # names contained in an Array.
14
+ #
15
+ # +:agg+ - Function to aggregate the grouped values. Default to *:mean*. Can
16
+ # use any of the statistics functions applicable on Vectors that can be found in
17
+ # the DaruLite::Statistics::Vector module.
18
+ #
19
+ # +:values+ - Columns to aggregate. Will consider all numeric columns not
20
+ # specified in *:index* or *:vectors*. Optional.
21
+ #
22
+ # == Usage
23
+ #
24
+ # df = DaruLite::DataFrame.new({
25
+ # a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
26
+ # b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
27
+ # c: ['small','large','large','small','small','large','small','large','small'],
28
+ # d: [1,2,2,3,3,4,5,6,7],
29
+ # e: [2,4,4,6,6,8,10,12,14]
30
+ # })
31
+ # df.pivot_table(index: [:a], vectors: [:b], agg: :sum, values: :e)
32
+ #
33
+ # #=>
34
+ # # #<DaruLite::DataFrame:88342020 @name = 08cdaf4e-b154-4186-9084-e76dd191b2c9 @size = 2>
35
+ # # [:e, :one] [:e, :two]
36
+ # # [:bar] 18 26
37
+ # # [:foo] 10 12
38
+ def pivot_table(opts = {})
39
+ raise ArgumentError, 'Specify grouping index' if Array(opts[:index]).empty?
40
+
41
+ index = opts[:index]
42
+ vectors = opts[:vectors] || []
43
+ aggregate_function = opts[:agg] || :mean
44
+ values = prepare_pivot_values index, vectors, opts
45
+ raise IndexError, 'No numeric vectors to aggregate' if values.empty?
46
+
47
+ grouped = group_by(index)
48
+ return grouped.send(aggregate_function) if vectors.empty?
49
+
50
+ super_hash = make_pivot_hash grouped, vectors, values, aggregate_function
51
+
52
+ pivot_dataframe super_hash
53
+ end
54
+
55
+ private
56
+
57
+ def prepare_pivot_values(index, vectors, opts)
58
+ case opts[:values]
59
+ when nil # values not specified at all.
60
+ (@vectors.to_a - (index | vectors)) & numeric_vector_names
61
+ when Array # multiple values specified.
62
+ opts[:values]
63
+ else # single value specified.
64
+ [opts[:values]]
65
+ end
66
+ end
67
+
68
+ def make_pivot_hash(grouped, vectors, values, aggregate_function)
69
+ grouped.groups.transform_values { |_| {} }.tap do |super_hash|
70
+ values.each do |value|
71
+ grouped.groups.each do |group_name, row_numbers|
72
+ row_numbers.each do |num|
73
+ arry = [value, *vectors.map { |v| self[v][num] }]
74
+ sub_hash = super_hash[group_name]
75
+ sub_hash[arry] ||= []
76
+
77
+ sub_hash[arry] << self[value][num]
78
+ end
79
+ end
80
+ end
81
+
82
+ setup_pivot_aggregates super_hash, aggregate_function
83
+ end
84
+ end
85
+
86
+ def setup_pivot_aggregates(super_hash, aggregate_function)
87
+ super_hash.each_value do |sub_hash|
88
+ sub_hash.each do |group_name, aggregates|
89
+ sub_hash[group_name] = DaruLite::Vector.new(aggregates).send(aggregate_function)
90
+ end
91
+ end
92
+ end
93
+
94
+ def pivot_dataframe(super_hash)
95
+ df_index = DaruLite::MultiIndex.from_tuples super_hash.keys
96
+ df_vectors = DaruLite::MultiIndex.from_tuples super_hash.values.flat_map(&:keys).uniq
97
+
98
+ DaruLite::DataFrame.new({}, index: df_index, order: df_vectors).tap do |pivoted_dataframe|
99
+ super_hash.each do |row_index, sub_h|
100
+ sub_h.each do |vector_index, val|
101
+ pivoted_dataframe[vector_index][row_index] = val
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,67 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Queryable
4
+ # Check if a vector is present
5
+ def has_vector?(vector)
6
+ @vectors.include? vector
7
+ end
8
+
9
+ # Check if any of given values occur in the data frame
10
+ # @param [Array] values to check for
11
+ # @return [true, false] true if any of the given values occur in the
12
+ # dataframe, false otherwise
13
+ # @example
14
+ # df = DaruLite::DataFrame.new({
15
+ # a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
16
+ # b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
17
+ # c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
18
+ # }, index: 11..18)
19
+ # df.include_values? nil
20
+ # # => true
21
+ def include_values?(*values)
22
+ @data.any? { |vec| vec.include_values?(*values) }
23
+ end
24
+
25
+ # Works like Array#any?.
26
+ #
27
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
28
+ # :row. A DaruLite::Vector object is yielded in the block.
29
+ # @example Using any?
30
+ # df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
31
+ # df.any?(:row) do |row|
32
+ # row[:a] < 3 and row[:b] == 'b'
33
+ # end #=> true
34
+ def any?(axis = :vector, &block)
35
+ if %i[vector column].include?(axis)
36
+ @data.any?(&block)
37
+ elsif axis == :row
38
+ each_row do |row|
39
+ return true if yield(row)
40
+ end
41
+ false
42
+ else
43
+ raise ArgumentError, "Unidentified axis #{axis}"
44
+ end
45
+ end
46
+
47
+ # Works like Array#all?
48
+ #
49
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
50
+ # :row. A DaruLite::Vector object is yielded in the block.
51
+ # @example Using all?
52
+ # df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
53
+ # df.all?(:row) do |row|
54
+ # row[:a] < 10
55
+ # end #=> true
56
+ def all?(axis = :vector, &block)
57
+ if %i[vector column].include?(axis)
58
+ @data.all?(&block)
59
+ elsif axis == :row
60
+ each_row.all?(&block)
61
+ else
62
+ raise ArgumentError, "Unidentified axis #{axis}"
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,109 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Setable
4
+ # Set rows by positions
5
+ # @param [Array<Integer>] positions positions of rows to set
6
+ # @param [Array, DaruLite::Vector] vector vector to be assigned
7
+ # @example
8
+ # df = DaruLite::DataFrame.new({
9
+ # a: [1, 2, 3],
10
+ # b: ['a', 'b', 'c']
11
+ # })
12
+ # df.set_row_at [0, 1], ['x', 'x']
13
+ # df
14
+ # #=> #<DaruLite::DataFrame(3x2)>
15
+ # # a b
16
+ # # 0 x x
17
+ # # 1 x x
18
+ # # 2 3 c
19
+ def set_row_at(positions, vector)
20
+ validate_positions(*positions, nrows)
21
+ vector =
22
+ if vector.is_a? DaruLite::Vector
23
+ vector.reindex @vectors
24
+ else
25
+ DaruLite::Vector.new vector
26
+ end
27
+
28
+ raise SizeError, 'Vector length should match row length' if
29
+ vector.size != @vectors.size
30
+
31
+ @data.each_with_index do |vec, pos|
32
+ vec.set_at(positions, vector.at(pos))
33
+ end
34
+ @index = @data[0].index
35
+ set_size
36
+ end
37
+
38
+ # Set vectors by positions
39
+ # @param [Array<Integer>] positions positions of vectors to set
40
+ # @param [Array, DaruLite::Vector] vector vector to be assigned
41
+ # @example
42
+ # df = DaruLite::DataFrame.new({
43
+ # a: [1, 2, 3],
44
+ # b: ['a', 'b', 'c']
45
+ # })
46
+ # df.set_at [0], ['x', 'y', 'z']
47
+ # df
48
+ # #=> #<DaruLite::DataFrame(3x2)>
49
+ # # a b
50
+ # # 0 x a
51
+ # # 1 y b
52
+ # # 2 z c
53
+ def set_at(positions, vector)
54
+ if positions.last == :row
55
+ positions.pop
56
+ return set_row_at(positions, vector)
57
+ end
58
+
59
+ validate_positions(*positions, ncols)
60
+ vector =
61
+ if vector.is_a? DaruLite::Vector
62
+ vector.reindex @index
63
+ else
64
+ DaruLite::Vector.new vector
65
+ end
66
+
67
+ raise SizeError, 'Vector length should match index length' if
68
+ vector.size != @index.size
69
+
70
+ positions.each { |pos| @data[pos] = vector }
71
+ end
72
+
73
+ # Insert a new row/vector of the specified name or modify a previous row.
74
+ # Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
75
+ # a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
76
+ #
77
+ # In case a DaruLite::Vector is specified after the equality the sign, the indexes
78
+ # of the vector will be matched against the row/vector indexes of the DataFrame
79
+ # before an insertion is performed. Unmatched indexes will be set to nil.
80
+ def []=(*args)
81
+ vector = args.pop
82
+ axis = extract_axis(args)
83
+ names = args
84
+
85
+ dispatch_to_axis axis, :insert_or_modify, names, vector
86
+ end
87
+
88
+ def add_row(row, index = nil)
89
+ self.row[*(index || @size)] = row
90
+ end
91
+
92
+ def add_vector(n, vector)
93
+ self[n] = vector
94
+ end
95
+
96
+ def insert_vector(n, name, source)
97
+ raise ArgumentError unless source.is_a? Array
98
+
99
+ vector = DaruLite::Vector.new(source, index: @index, name: @name)
100
+ @data << vector
101
+ @vectors = @vectors.add name
102
+ ordr = @vectors.dup.to_a
103
+ elmnt = ordr.pop
104
+ ordr.insert n, elmnt
105
+ self.order = ordr
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,241 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Sortable
4
+ # Reorder the vectors in a dataframe
5
+ # @param [Array] order_array new order of the vectors
6
+ # @example
7
+ # df = DaruLite::DataFrame({
8
+ # a: [1, 2, 3],
9
+ # b: [4, 5, 6]
10
+ # }, order: [:a, :b])
11
+ # df.order = [:b, :a]
12
+ # df
13
+ # # => #<DaruLite::DataFrame(3x2)>
14
+ # # b a
15
+ # # 0 4 1
16
+ # # 1 5 2
17
+ # # 2 6 3
18
+ def order=(order_array)
19
+ raise ArgumentError, 'Invalid order' unless order_array.tally == vectors.to_a.tally
20
+
21
+ initialize(to_h, order: order_array)
22
+ end
23
+
24
+ # Return the dataframe with rotate vectors positions, the vector at position count is now
25
+ # the first vector of the dataframe.
26
+ # If only one vector in the dataframe, the dataframe is return without any change.
27
+ # @param count => Integer, the vector at position count will be the first vector of the dataframe.
28
+ # @example
29
+ # df = DaruLite::DataFrame({
30
+ # a: [1, 2, 3],
31
+ # b: [4, 5, 6],
32
+ # total: [5, 7, 9],
33
+ # })
34
+ # df.rotate_vectors(-1)
35
+ # df
36
+ # # => #<DaruLite::DataFrame(3x3)>
37
+ # # total b a
38
+ # # 0 5 4 1
39
+ # # 1 7 5 2
40
+ # # 2 9 6 3
41
+ def rotate_vectors(count = -1)
42
+ return self unless vectors.many?
43
+
44
+ self.order = vectors.to_a.rotate(count)
45
+ self
46
+ end
47
+
48
+ # Sorts a dataframe (ascending/descending) in the given pripority sequence of
49
+ # vectors, with or without a block.
50
+ #
51
+ # @param vector_order [Array] The order of vector names in which the DataFrame
52
+ # should be sorted.
53
+ # @param opts [Hash] opts The options to sort with.
54
+ # @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
55
+ # or descending order. Specify Array corresponding to *order* for multiple
56
+ # sort orders.
57
+ # @option opts [Hash] :by (lambda{|a| a }) Specify attributes of objects to
58
+ # to be used for sorting, for each vector name in *order* as a hash of
59
+ # vector name and lambda expressions. In case a lambda for a vector is not
60
+ # specified, the default will be used.
61
+ # @option opts [TrueClass,FalseClass,Array] :handle_nils (false) Handle nils
62
+ # automatically or not when a block is provided.
63
+ # If set to True, nils will appear at top after sorting.
64
+ #
65
+ # @example Sort a dataframe with a vector sequence.
66
+ #
67
+ #
68
+ # df = DaruLite::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]})
69
+ #
70
+ # df.sort [:a, :b]
71
+ # # =>
72
+ # # <DaruLite::DataFrame:30604000 @name = d6a9294e-2c09-418f-b646-aa9244653444 @size = 5>
73
+ # # a b
74
+ # # 2 1 3
75
+ # # 0 1 5
76
+ # # 3 2 2
77
+ # # 1 2 4
78
+ # # 4 3 1
79
+ #
80
+ # @example Sort a dataframe without a block. Here nils will be handled automatically.
81
+ #
82
+ # df = DaruLite::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]})
83
+ #
84
+ # df.sort([:a])
85
+ # # =>
86
+ # # <DaruLite::DataFrame:14810920 @name = c07fb5c7-2201-458d-b679-6a1f7ebfe49f @size = 5>
87
+ # # a b
88
+ # # 1 nil 3
89
+ # # 3 nil 1
90
+ # # 0 -3 4
91
+ # # 2 -1 2
92
+ # # 4 5 4
93
+ #
94
+ # @example Sort a dataframe with a block with nils handled automatically.
95
+ #
96
+ # df = DaruLite::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
97
+ #
98
+ # df.sort [:b], by: {b: lambda { |a| a.length } }
99
+ # # NoMethodError: undefined method `length' for nil:NilClass
100
+ # # from (pry):8:in `block in __pry__'
101
+ #
102
+ # df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true
103
+ #
104
+ # # =>
105
+ # # <DaruLite::DataFrame:28469540 @name = 5f986508-556f-468b-be0c-88cc3534445c @size = 6>
106
+ # # a b
107
+ # # 2 1 nil
108
+ # # 5 1 nil
109
+ # # 4 -1 x
110
+ # # 1 -1 aa
111
+ # # 0 nil aaa
112
+ # # 3 nil baaa
113
+ #
114
+ # @example Sort a dataframe with a block with nils handled manually.
115
+ #
116
+ # df = DaruLite::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
117
+ #
118
+ # # To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] }
119
+ # df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true
120
+ #
121
+ # # =>
122
+ # #<DaruLite::DataFrame:22214180 @name = cd7703c7-1dca-4560-840b-5ea51a852ef9 @size = 6>
123
+ # # a b
124
+ # # 4 -1 x
125
+ # # 1 -1 aa
126
+ # # 0 nil aaa
127
+ # # 3 nil baaa
128
+ # # 2 1 nil
129
+ # # 5 1 nil
130
+
131
+ def sort!(vector_order, opts = {})
132
+ raise ArgumentError, 'Required atleast one vector name' if vector_order.empty?
133
+
134
+ # To enable sorting with categorical data,
135
+ # map categories to integers preserving their order
136
+ old = convert_categorical_vectors vector_order
137
+ block = sort_prepare_block vector_order, opts
138
+
139
+ order = @index.size.times.sort(&block)
140
+ new_index = @index.reorder order
141
+
142
+ # To reverse map mapping of categorical data to integers
143
+ restore_categorical_vectors old
144
+
145
+ @data.each do |vector|
146
+ vector.reorder! order
147
+ end
148
+
149
+ self.index = new_index
150
+
151
+ self
152
+ end
153
+
154
+ # Non-destructive version of #sort!
155
+ def sort(vector_order, opts = {})
156
+ dup.sort! vector_order, opts
157
+ end
158
+
159
+ private
160
+
161
+ def convert_categorical_vectors(names)
162
+ names.filter_map do |n|
163
+ next unless self[n].category?
164
+
165
+ old = [n, self[n]]
166
+ self[n] = DaruLite::Vector.new(self[n].to_ints)
167
+ old
168
+ end
169
+ end
170
+
171
+ def restore_categorical_vectors(old)
172
+ old.each { |name, vector| self[name] = vector }
173
+ end
174
+
175
+ def sort_build_row(vector_locs, by_blocks, ascending, handle_nils, r1, r2) # rubocop:disable Metrics/ParameterLists
176
+ # Create an array to be used for comparison of two rows in sorting
177
+ vector_locs
178
+ .zip(by_blocks, ascending, handle_nils)
179
+ .map do |vector_loc, by, asc, handle_nil|
180
+ value = @data[vector_loc].data[asc ? r1 : r2]
181
+
182
+ if by
183
+ value = begin
184
+ by.call(value)
185
+ rescue StandardError
186
+ nil
187
+ end
188
+ end
189
+
190
+ sort_handle_nils value, asc, handle_nil || !by
191
+ end
192
+ end
193
+
194
+ def sort_handle_nils(value, asc, handle_nil)
195
+ if !handle_nil
196
+ value
197
+ elsif asc
198
+ [value.nil? ? 0 : 1, value]
199
+ else
200
+ [value.nil? ? 1 : 0, value]
201
+ end
202
+ end
203
+
204
+ def sort_coerce_boolean(opts, symbol, default, size)
205
+ val = opts[symbol]
206
+ case val
207
+ when true, false
208
+ Array.new(size, val)
209
+ when nil
210
+ Array.new(size, default)
211
+ when Array
212
+ raise ArgumentError, "Specify same number of vector names and #{symbol}" if
213
+ size != val.size
214
+
215
+ val
216
+ else
217
+ raise ArgumentError, "Can't coerce #{symbol} from #{val.class} to boolean option"
218
+ end
219
+ end
220
+
221
+ def sort_prepare_block(vector_order, opts)
222
+ ascending = sort_coerce_boolean opts, :ascending, true, vector_order.size
223
+ handle_nils = sort_coerce_boolean opts, :handle_nils, false, vector_order.size
224
+
225
+ by_blocks = vector_order.map { |v| (opts[:by] || {})[v] }
226
+ vector_locs = vector_order.map { |v| @vectors[v] }
227
+
228
+ lambda do |index1, index2|
229
+ # Build left and right array to compare two rows
230
+ left = sort_build_row vector_locs, by_blocks, ascending, handle_nils, index1, index2
231
+ right = sort_build_row vector_locs, by_blocks, ascending, handle_nils, index2, index1
232
+
233
+ # Resolve conflict by Index if all attributes are same
234
+ left << index1
235
+ right << index2
236
+ left <=> right
237
+ end
238
+ end
239
+ end
240
+ end
241
+ end