daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,67 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Queryable
4
+ # Check if a vector is present
5
+ def has_vector?(vector)
6
+ @vectors.include? vector
7
+ end
8
+
9
+ # Check if any of given values occur in the data frame
10
+ # @param [Array] values to check for
11
+ # @return [true, false] true if any of the given values occur in the
12
+ # dataframe, false otherwise
13
+ # @example
14
+ # df = DaruLite::DataFrame.new({
15
+ # a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
16
+ # b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
17
+ # c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
18
+ # }, index: 11..18)
19
+ # df.include_values? nil
20
+ # # => true
21
+ def include_values?(*values)
22
+ @data.any? { |vec| vec.include_values?(*values) }
23
+ end
24
+
25
+ # Works like Array#any?.
26
+ #
27
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
28
+ # :row. A DaruLite::Vector object is yielded in the block.
29
+ # @example Using any?
30
+ # df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
31
+ # df.any?(:row) do |row|
32
+ # row[:a] < 3 and row[:b] == 'b'
33
+ # end #=> true
34
+ def any?(axis = :vector, &block)
35
+ if %i[vector column].include?(axis)
36
+ @data.any?(&block)
37
+ elsif axis == :row
38
+ each_row do |row|
39
+ return true if yield(row)
40
+ end
41
+ false
42
+ else
43
+ raise ArgumentError, "Unidentified axis #{axis}"
44
+ end
45
+ end
46
+
47
+ # Works like Array#all?
48
+ #
49
+ # @param [Symbol] axis (:vector) The axis to iterate over. Can be :vector or
50
+ # :row. A DaruLite::Vector object is yielded in the block.
51
+ # @example Using all?
52
+ # df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a', 'b', 'c', 'd', 'e']})
53
+ # df.all?(:row) do |row|
54
+ # row[:a] < 10
55
+ # end #=> true
56
+ def all?(axis = :vector, &block)
57
+ if %i[vector column].include?(axis)
58
+ @data.all?(&block)
59
+ elsif axis == :row
60
+ each_row.all?(&block)
61
+ else
62
+ raise ArgumentError, "Unidentified axis #{axis}"
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,109 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Setable
4
+ # Set rows by positions
5
+ # @param [Array<Integer>] positions positions of rows to set
6
+ # @param [Array, DaruLite::Vector] vector vector to be assigned
7
+ # @example
8
+ # df = DaruLite::DataFrame.new({
9
+ # a: [1, 2, 3],
10
+ # b: ['a', 'b', 'c']
11
+ # })
12
+ # df.set_row_at [0, 1], ['x', 'x']
13
+ # df
14
+ # #=> #<DaruLite::DataFrame(3x2)>
15
+ # # a b
16
+ # # 0 x x
17
+ # # 1 x x
18
+ # # 2 3 c
19
+ def set_row_at(positions, vector)
20
+ validate_positions(*positions, nrows)
21
+ vector =
22
+ if vector.is_a? DaruLite::Vector
23
+ vector.reindex @vectors
24
+ else
25
+ DaruLite::Vector.new vector
26
+ end
27
+
28
+ raise SizeError, 'Vector length should match row length' if
29
+ vector.size != @vectors.size
30
+
31
+ @data.each_with_index do |vec, pos|
32
+ vec.set_at(positions, vector.at(pos))
33
+ end
34
+ @index = @data[0].index
35
+ set_size
36
+ end
37
+
38
+ # Set vectors by positions
39
+ # @param [Array<Integer>] positions positions of vectors to set
40
+ # @param [Array, DaruLite::Vector] vector vector to be assigned
41
+ # @example
42
+ # df = DaruLite::DataFrame.new({
43
+ # a: [1, 2, 3],
44
+ # b: ['a', 'b', 'c']
45
+ # })
46
+ # df.set_at [0], ['x', 'y', 'z']
47
+ # df
48
+ # #=> #<DaruLite::DataFrame(3x2)>
49
+ # # a b
50
+ # # 0 x a
51
+ # # 1 y b
52
+ # # 2 z c
53
+ def set_at(positions, vector)
54
+ if positions.last == :row
55
+ positions.pop
56
+ return set_row_at(positions, vector)
57
+ end
58
+
59
+ validate_positions(*positions, ncols)
60
+ vector =
61
+ if vector.is_a? DaruLite::Vector
62
+ vector.reindex @index
63
+ else
64
+ DaruLite::Vector.new vector
65
+ end
66
+
67
+ raise SizeError, 'Vector length should match index length' if
68
+ vector.size != @index.size
69
+
70
+ positions.each { |pos| @data[pos] = vector }
71
+ end
72
+
73
+ # Insert a new row/vector of the specified name or modify a previous row.
74
+ # Instead of using this method directly, use df.row[:a] = [1,2,3] to set/create
75
+ # a row ':a' to [1,2,3], or df.vector[:vec] = [1,2,3] for vectors.
76
+ #
77
+ # In case a DaruLite::Vector is specified after the equality the sign, the indexes
78
+ # of the vector will be matched against the row/vector indexes of the DataFrame
79
+ # before an insertion is performed. Unmatched indexes will be set to nil.
80
+ def []=(*args)
81
+ vector = args.pop
82
+ axis = extract_axis(args)
83
+ names = args
84
+
85
+ dispatch_to_axis axis, :insert_or_modify, names, vector
86
+ end
87
+
88
+ def add_row(row, index = nil)
89
+ self.row[*(index || @size)] = row
90
+ end
91
+
92
+ def add_vector(n, vector)
93
+ self[n] = vector
94
+ end
95
+
96
+ def insert_vector(n, name, source)
97
+ raise ArgumentError unless source.is_a? Array
98
+
99
+ vector = DaruLite::Vector.new(source, index: @index, name: @name)
100
+ @data << vector
101
+ @vectors = @vectors.add name
102
+ ordr = @vectors.dup.to_a
103
+ elmnt = ordr.pop
104
+ ordr.insert n, elmnt
105
+ self.order = ordr
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,241 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Sortable
4
+ # Reorder the vectors in a dataframe
5
+ # @param [Array] order_array new order of the vectors
6
+ # @example
7
+ # df = DaruLite::DataFrame({
8
+ # a: [1, 2, 3],
9
+ # b: [4, 5, 6]
10
+ # }, order: [:a, :b])
11
+ # df.order = [:b, :a]
12
+ # df
13
+ # # => #<DaruLite::DataFrame(3x2)>
14
+ # # b a
15
+ # # 0 4 1
16
+ # # 1 5 2
17
+ # # 2 6 3
18
+ def order=(order_array)
19
+ raise ArgumentError, 'Invalid order' unless order_array.tally == vectors.to_a.tally
20
+
21
+ initialize(to_h, order: order_array)
22
+ end
23
+
24
+ # Return the dataframe with rotate vectors positions, the vector at position count is now
25
+ # the first vector of the dataframe.
26
+ # If only one vector in the dataframe, the dataframe is return without any change.
27
+ # @param count => Integer, the vector at position count will be the first vector of the dataframe.
28
+ # @example
29
+ # df = DaruLite::DataFrame({
30
+ # a: [1, 2, 3],
31
+ # b: [4, 5, 6],
32
+ # total: [5, 7, 9],
33
+ # })
34
+ # df.rotate_vectors(-1)
35
+ # df
36
+ # # => #<DaruLite::DataFrame(3x3)>
37
+ # # total b a
38
+ # # 0 5 4 1
39
+ # # 1 7 5 2
40
+ # # 2 9 6 3
41
+ def rotate_vectors(count = -1)
42
+ return self unless vectors.many?
43
+
44
+ self.order = vectors.to_a.rotate(count)
45
+ self
46
+ end
47
+
48
+ # Sorts a dataframe (ascending/descending) in the given pripority sequence of
49
+ # vectors, with or without a block.
50
+ #
51
+ # @param vector_order [Array] The order of vector names in which the DataFrame
52
+ # should be sorted.
53
+ # @param opts [Hash] opts The options to sort with.
54
+ # @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
55
+ # or descending order. Specify Array corresponding to *order* for multiple
56
+ # sort orders.
57
+ # @option opts [Hash] :by (lambda{|a| a }) Specify attributes of objects to
58
+ # to be used for sorting, for each vector name in *order* as a hash of
59
+ # vector name and lambda expressions. In case a lambda for a vector is not
60
+ # specified, the default will be used.
61
+ # @option opts [TrueClass,FalseClass,Array] :handle_nils (false) Handle nils
62
+ # automatically or not when a block is provided.
63
+ # If set to True, nils will appear at top after sorting.
64
+ #
65
+ # @example Sort a dataframe with a vector sequence.
66
+ #
67
+ #
68
+ # df = DaruLite::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]})
69
+ #
70
+ # df.sort [:a, :b]
71
+ # # =>
72
+ # # <DaruLite::DataFrame:30604000 @name = d6a9294e-2c09-418f-b646-aa9244653444 @size = 5>
73
+ # # a b
74
+ # # 2 1 3
75
+ # # 0 1 5
76
+ # # 3 2 2
77
+ # # 1 2 4
78
+ # # 4 3 1
79
+ #
80
+ # @example Sort a dataframe without a block. Here nils will be handled automatically.
81
+ #
82
+ # df = DaruLite::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]})
83
+ #
84
+ # df.sort([:a])
85
+ # # =>
86
+ # # <DaruLite::DataFrame:14810920 @name = c07fb5c7-2201-458d-b679-6a1f7ebfe49f @size = 5>
87
+ # # a b
88
+ # # 1 nil 3
89
+ # # 3 nil 1
90
+ # # 0 -3 4
91
+ # # 2 -1 2
92
+ # # 4 5 4
93
+ #
94
+ # @example Sort a dataframe with a block with nils handled automatically.
95
+ #
96
+ # df = DaruLite::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
97
+ #
98
+ # df.sort [:b], by: {b: lambda { |a| a.length } }
99
+ # # NoMethodError: undefined method `length' for nil:NilClass
100
+ # # from (pry):8:in `block in __pry__'
101
+ #
102
+ # df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true
103
+ #
104
+ # # =>
105
+ # # <DaruLite::DataFrame:28469540 @name = 5f986508-556f-468b-be0c-88cc3534445c @size = 6>
106
+ # # a b
107
+ # # 2 1 nil
108
+ # # 5 1 nil
109
+ # # 4 -1 x
110
+ # # 1 -1 aa
111
+ # # 0 nil aaa
112
+ # # 3 nil baaa
113
+ #
114
+ # @example Sort a dataframe with a block with nils handled manually.
115
+ #
116
+ # df = DaruLite::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })
117
+ #
118
+ # # To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] }
119
+ # df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true
120
+ #
121
+ # # =>
122
+ # #<DaruLite::DataFrame:22214180 @name = cd7703c7-1dca-4560-840b-5ea51a852ef9 @size = 6>
123
+ # # a b
124
+ # # 4 -1 x
125
+ # # 1 -1 aa
126
+ # # 0 nil aaa
127
+ # # 3 nil baaa
128
+ # # 2 1 nil
129
+ # # 5 1 nil
130
+
131
+ def sort!(vector_order, opts = {})
132
+ raise ArgumentError, 'Required atleast one vector name' if vector_order.empty?
133
+
134
+ # To enable sorting with categorical data,
135
+ # map categories to integers preserving their order
136
+ old = convert_categorical_vectors vector_order
137
+ block = sort_prepare_block vector_order, opts
138
+
139
+ order = @index.size.times.sort(&block)
140
+ new_index = @index.reorder order
141
+
142
+ # To reverse map mapping of categorical data to integers
143
+ restore_categorical_vectors old
144
+
145
+ @data.each do |vector|
146
+ vector.reorder! order
147
+ end
148
+
149
+ self.index = new_index
150
+
151
+ self
152
+ end
153
+
154
+ # Non-destructive version of #sort!
155
+ def sort(vector_order, opts = {})
156
+ dup.sort! vector_order, opts
157
+ end
158
+
159
+ private
160
+
161
+ def convert_categorical_vectors(names)
162
+ names.filter_map do |n|
163
+ next unless self[n].category?
164
+
165
+ old = [n, self[n]]
166
+ self[n] = DaruLite::Vector.new(self[n].to_ints)
167
+ old
168
+ end
169
+ end
170
+
171
+ def restore_categorical_vectors(old)
172
+ old.each { |name, vector| self[name] = vector }
173
+ end
174
+
175
+ def sort_build_row(vector_locs, by_blocks, ascending, handle_nils, r1, r2) # rubocop:disable Metrics/ParameterLists
176
+ # Create an array to be used for comparison of two rows in sorting
177
+ vector_locs
178
+ .zip(by_blocks, ascending, handle_nils)
179
+ .map do |vector_loc, by, asc, handle_nil|
180
+ value = @data[vector_loc].data[asc ? r1 : r2]
181
+
182
+ if by
183
+ value = begin
184
+ by.call(value)
185
+ rescue StandardError
186
+ nil
187
+ end
188
+ end
189
+
190
+ sort_handle_nils value, asc, handle_nil || !by
191
+ end
192
+ end
193
+
194
+ def sort_handle_nils(value, asc, handle_nil)
195
+ if !handle_nil
196
+ value
197
+ elsif asc
198
+ [value.nil? ? 0 : 1, value]
199
+ else
200
+ [value.nil? ? 1 : 0, value]
201
+ end
202
+ end
203
+
204
+ def sort_coerce_boolean(opts, symbol, default, size)
205
+ val = opts[symbol]
206
+ case val
207
+ when true, false
208
+ Array.new(size, val)
209
+ when nil
210
+ Array.new(size, default)
211
+ when Array
212
+ raise ArgumentError, "Specify same number of vector names and #{symbol}" if
213
+ size != val.size
214
+
215
+ val
216
+ else
217
+ raise ArgumentError, "Can't coerce #{symbol} from #{val.class} to boolean option"
218
+ end
219
+ end
220
+
221
+ def sort_prepare_block(vector_order, opts)
222
+ ascending = sort_coerce_boolean opts, :ascending, true, vector_order.size
223
+ handle_nils = sort_coerce_boolean opts, :handle_nils, false, vector_order.size
224
+
225
+ by_blocks = vector_order.map { |v| (opts[:by] || {})[v] }
226
+ vector_locs = vector_order.map { |v| @vectors[v] }
227
+
228
+ lambda do |index1, index2|
229
+ # Build left and right array to compare two rows
230
+ left = sort_build_row vector_locs, by_blocks, ascending, handle_nils, index1, index2
231
+ right = sort_build_row vector_locs, by_blocks, ascending, handle_nils, index2, index1
232
+
233
+ # Resolve conflict by Index if all attributes are same
234
+ left << index1
235
+ right << index2
236
+ left <=> right
237
+ end
238
+ end
239
+ end
240
+ end
241
+ end