daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -246,6 +246,19 @@ module DaruLite
246
246
  DaruLite::Index.new(to_a + indexes)
247
247
  end
248
248
 
249
+ # Takes a positional value and returns a new Index without the element at given position
250
+ # @param position [Integer] positional value
251
+ # @return [object] index object
252
+ # @example
253
+ # idx = DaruLite::Index.new [:a, :b, :c]
254
+ # idx.delete_at(0)
255
+ # # => #<DaruLite::Index(2): {b, c}>
256
+ def delete_at(position)
257
+ indexes = to_a
258
+ indexes.delete_at(position)
259
+ self.class.new(indexes)
260
+ end
261
+
249
262
  def _dump(*)
250
263
  Marshal.dump(relation_hash: @relation_hash)
251
264
  end
@@ -842,7 +842,7 @@ module DaruLite
842
842
  def emsd(n = 10, wilder = false)
843
843
  result = []
844
844
  emv_return = emv(n, wilder)
845
- emv_return.each do |d|
845
+ emv_return.each do |d| # rubocop:disable Style/MapIntoArray
846
846
  result << (d.nil? ? nil : Math.sqrt(d))
847
847
  end
848
848
  DaruLite::Vector.new(result, index: @index, name: @name)
@@ -0,0 +1,9 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Aggregatable
4
+ def group_by(*args)
5
+ to_df.group_by(*args)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,78 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Calculatable
4
+ # Count the number of values specified
5
+ # @param values [Array] values to count for
6
+ # @return [Integer] the number of times the values mentioned occurs
7
+ # @example
8
+ # dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
9
+ # dv.count_values nil
10
+ # # => 2
11
+ def count_values(*values)
12
+ positions(*values).size
13
+ end
14
+
15
+ # Create a summary of the Vector
16
+ # @param indent_level [Fixnum] indent level
17
+ # @return [String] String containing the summary of the Vector
18
+ # @example
19
+ # dv = DaruLite::Vector.new [1, 2, 3]
20
+ # puts dv.summary
21
+ #
22
+ # # =
23
+ # # n :3
24
+ # # non-missing:3
25
+ # # median: 2
26
+ # # mean: 2.0000
27
+ # # std.dev.: 1.0000
28
+ # # std.err.: 0.5774
29
+ # # skew: 0.0000
30
+ # # kurtosis: -2.3333
31
+ def summary(indent_level = 0)
32
+ non_missing = size - count_values(*DaruLite::MISSING_VALUES)
33
+ summary = (' =' * indent_level) + "= #{name}" \
34
+ "\n n :#{size}" \
35
+ "\n non-missing:#{non_missing}"
36
+ case type
37
+ when :object
38
+ summary << object_summary
39
+ when :numeric
40
+ summary << numeric_summary
41
+ end
42
+ summary.split("\n").join("\n#{' ' * indent_level}")
43
+ end
44
+
45
+ # Displays summary for an object type Vector
46
+ # @return [String] String containing object vector summary
47
+ def object_summary
48
+ nval = count_values(*DaruLite::MISSING_VALUES)
49
+ summary = "\n factors: #{factors.to_a.join(',')}" \
50
+ "\n mode: #{mode.to_a.join(',')}" \
51
+ "\n Distribution\n"
52
+
53
+ data = frequencies.sort.each_with_index.map do |v, k|
54
+ [k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
55
+ end
56
+
57
+ summary + Formatters::Table.format(data)
58
+ end
59
+
60
+ # Displays summary for an numeric type Vector
61
+ # @return [String] String containing numeric vector summary
62
+ def numeric_summary
63
+ summary = "\n median: #{median}" +
64
+ format("\n mean: %0.4f", mean)
65
+ if sd
66
+ summary << (format("\n std.dev.: %0.4f", sd) +
67
+ format("\n std.err.: %0.4f", se))
68
+ end
69
+
70
+ if count_values(*DaruLite::MISSING_VALUES).zero?
71
+ summary << (format("\n skew: %0.4f", skew) +
72
+ format("\n kurtosis: %0.4f", kurtosis))
73
+ end
74
+ summary
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,77 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Convertible
4
+ # @return [DaruLite::DataFrame] the vector as a single-vector dataframe
5
+ def to_df
6
+ DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
7
+ end
8
+
9
+ # Convert Vector to a horizontal or vertical Ruby Matrix.
10
+ #
11
+ # == Arguments
12
+ #
13
+ # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
14
+ def to_matrix(axis = :horizontal)
15
+ case axis
16
+ when :horizontal
17
+ Matrix[to_a]
18
+ when :vertical
19
+ Matrix.columns([to_a])
20
+ else
21
+ raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
22
+ end
23
+ end
24
+
25
+ # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
26
+ def to_h
27
+ @index.to_h { |index| [index, self[index]] }
28
+ end
29
+
30
+ # Return an array
31
+ def to_a
32
+ @data.to_a
33
+ end
34
+
35
+ # Convert the hash from to_h to json
36
+ def to_json(*)
37
+ to_h.to_json
38
+ end
39
+
40
+ # Convert to html for iruby
41
+ def to_html(threshold = 30)
42
+ table_thead = to_html_thead
43
+ table_tbody = to_html_tbody(threshold)
44
+ path = if index.is_a?(MultiIndex)
45
+ File.expand_path('../iruby/templates/vector_mi.html.erb', __dir__)
46
+ else
47
+ File.expand_path('../iruby/templates/vector.html.erb', __dir__)
48
+ end
49
+ ERB.new(File.read(path).strip).result(binding)
50
+ end
51
+
52
+ def to_html_thead
53
+ table_thead_path =
54
+ if index.is_a?(MultiIndex)
55
+ File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __dir__)
56
+ else
57
+ File.expand_path('../iruby/templates/vector_thead.html.erb', __dir__)
58
+ end
59
+ ERB.new(File.read(table_thead_path).strip).result(binding)
60
+ end
61
+
62
+ def to_html_tbody(threshold = 30)
63
+ table_tbody_path =
64
+ if index.is_a?(MultiIndex)
65
+ File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __dir__)
66
+ else
67
+ File.expand_path('../iruby/templates/vector_tbody.html.erb', __dir__)
68
+ end
69
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
70
+ end
71
+
72
+ def to_s
73
+ "#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,17 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Duplicatable
4
+ # Duplicated a vector
5
+ # @return [DaruLite::Vector] duplicated vector
6
+ def dup
7
+ DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
8
+ end
9
+
10
+ # Copies the structure of the vector (i.e the index, size, etc.) and fills all
11
+ # all values with nils.
12
+ def clone_structure
13
+ DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,175 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Fetchable
4
+ # Get one or more elements with specified index or a range.
5
+ #
6
+ # == Usage
7
+ # # For vectors employing single layer Index
8
+ #
9
+ # v[:one, :two] # => DaruLite::Vector with indexes :one and :two
10
+ # v[:one] # => Single element
11
+ # v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
12
+ #
13
+ # # For vectors employing hierarchial multi index
14
+ #
15
+ def [](*input_indexes)
16
+ # Get array of positions indexes
17
+ positions = @index.pos(*input_indexes)
18
+
19
+ # If one object is asked return it
20
+ return @data[positions] if positions.is_a? Numeric
21
+
22
+ # Form a new Vector using positional indexes
23
+ DaruLite::Vector.new(
24
+ positions.map { |loc| @data[loc] },
25
+ name: @name,
26
+ index: @index.subset(*input_indexes), dtype: @dtype
27
+ )
28
+ end
29
+
30
+ # Returns vector of values given positional values
31
+ # @param positions [Array<object>] positional values
32
+ # @return [object] vector
33
+ # @example
34
+ # dv = DaruLite::Vector.new 'a'..'e'
35
+ # dv.at 0, 1, 2
36
+ # # => #<DaruLite::Vector(3)>
37
+ # # 0 a
38
+ # # 1 b
39
+ # # 2 c
40
+ def at(*positions)
41
+ # to be used to form index
42
+ original_positions = positions
43
+ positions = coerce_positions(*positions)
44
+ validate_positions(*positions)
45
+
46
+ if positions.is_a? Integer
47
+ @data[positions]
48
+ else
49
+ values = positions.map { |pos| @data[pos] }
50
+ DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
51
+ end
52
+ end
53
+
54
+ def head(q = 10)
55
+ self[0..(q - 1)]
56
+ end
57
+
58
+ def tail(q = 10)
59
+ start = [size - q, 0].max
60
+ self[start..(size - 1)]
61
+ end
62
+
63
+ def last(q = 1)
64
+ # The Enumerable mixin dose not provide the last method.
65
+ tail(q)
66
+ end
67
+
68
+ # Returns a hash of Vectors, defined by the different values
69
+ # defined on the fields
70
+ # Example:
71
+ #
72
+ # a=DaruLite::Vector.new(["a,b","c,d","a,b"])
73
+ # a.split_by_separator
74
+ # => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
75
+ # @data=[1, 0, 1]>,
76
+ # "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
77
+ # @data=[1, 1, 0]>,
78
+ # "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
79
+ # @data=[0, 1, 1]>}
80
+ #
81
+ def split_by_separator(sep = ',')
82
+ split_data = splitted sep
83
+ split_data
84
+ .flatten.uniq.compact.to_h do |key|
85
+ [
86
+ key,
87
+ DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
88
+ ]
89
+ end
90
+ end
91
+
92
+ def split_by_separator_freq(sep = ',')
93
+ split_by_separator(sep).transform_values do |v|
94
+ v.sum(&:to_i)
95
+ end
96
+ end
97
+
98
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
99
+ # @return [DaruLite::Vector]
100
+ def get_sub_vector(keys, by_position: true)
101
+ return DaruLite::Vector.new([]) if keys == []
102
+
103
+ keys = @index.pos(*keys) unless by_position
104
+
105
+ sub_vect = at(*keys)
106
+ sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
107
+
108
+ sub_vect
109
+ end
110
+
111
+ # Partition a numeric variable into categories.
112
+ # @param [Array<Numeric>] partitions an array whose consecutive elements
113
+ # provide intervals for categories
114
+ # @param [Hash] opts options to cut the partition
115
+ # @option opts [:left, :right] :close_at specifies whether the interval closes at
116
+ # the right side of left side
117
+ # @option opts [Array] :labels names of the categories
118
+ # @return [DaruLite::Vector] numeric variable converted to categorical variable
119
+ # @example
120
+ # heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
121
+ # height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
122
+ # # => #<DaruLite::Vector(6)>
123
+ # # 0 low
124
+ # # 1 low
125
+ # # 2 low
126
+ # # 3 high
127
+ # # 4 medium
128
+ # # 5 high
129
+ def cut(partitions, opts = {})
130
+ close_at = opts[:close_at] || :right
131
+ labels = opts[:labels]
132
+ partitions = partitions.to_a
133
+ values = to_a.map { |val| cut_find_category partitions, val, close_at }
134
+ cats = cut_categories(partitions, close_at)
135
+
136
+ dv = DaruLite::Vector.new values,
137
+ index: @index,
138
+ type: :category,
139
+ categories: cats
140
+
141
+ # Rename categories if new labels provided
142
+ if labels
143
+ dv.rename_categories cats.zip(labels).to_h
144
+ else
145
+ dv
146
+ end
147
+ end
148
+
149
+ def positions(*values)
150
+ case values
151
+ when [nil]
152
+ nil_positions
153
+ when [Float::NAN]
154
+ nan_positions
155
+ when [nil, Float::NAN], [Float::NAN, nil]
156
+ nil_positions + nan_positions
157
+ else
158
+ size.times.select { |i| include_with_nan? values, @data[i] }
159
+ end
160
+ end
161
+
162
+ private
163
+
164
+ def split_value(key, v)
165
+ if v.nil?
166
+ nil
167
+ elsif v.include?(key)
168
+ 1
169
+ else
170
+ 0
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,128 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Filterable
4
+ # Return a new vector based on the contents of a boolean array. Use with the
5
+ # comparator methods to obtain meaningful results. See this notebook for
6
+ # a good overview of using #where.
7
+ #
8
+ # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
9
+ # collection containing the true of false values. Each element in the Vector
10
+ # corresponding to a `true` in the bool_arry will be returned alongwith it's
11
+ # index.
12
+ # @example Usage of #where.
13
+ # vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
14
+ #
15
+ # # Simple logic statement passed to #where.
16
+ # vector.where(vector.eq(5).or(vector.eq(1)))
17
+ # # =>
18
+ # ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
19
+ # # nil
20
+ # # 2 5
21
+ # # 4 5
22
+ # # 7 5
23
+ # # 10 1
24
+ # # 11 5
25
+ # # 13 5
26
+ # # 15 1
27
+ #
28
+ # # A somewhat more complex logic statement
29
+ # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
30
+ # #=>
31
+ # ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
32
+ # # nil
33
+ # # 2 5
34
+ # # 4 5
35
+ # # 7 5
36
+ # # 10 1
37
+ # # 11 5
38
+ # # 13 5
39
+ # # 15 1
40
+ def where(bool_array)
41
+ DaruLite::Core::Query.vector_where self, bool_array
42
+ end
43
+
44
+ # Return a new vector based on the contents of a boolean array and &block.
45
+ #
46
+ # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
47
+ # collection containing the true of false values. Each element in the Vector
48
+ # corresponding to a `true` in the bool_array will be returned along with it's
49
+ # index. The &block may contain manipulative functions for the Vector elements.
50
+ #
51
+ # @return [DaruLite::Vector]
52
+ #
53
+ # @example Usage of #apply_where.
54
+ # dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
55
+ # dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
56
+ # # =>
57
+ # ##<DaruLite::Vector(3)>
58
+ # # 0 3 days
59
+ # # 1 35 days
60
+ # # 2 14 days
61
+ def apply_where(bool_array, &block)
62
+ DaruLite::Core::Query.vector_apply_where self, bool_array, &block
63
+ end
64
+
65
+ # Keep only unique elements of the vector alongwith their indexes.
66
+ def uniq
67
+ uniq_vector = @data.uniq
68
+ new_index = uniq_vector.map { |element| index_of(element) }
69
+
70
+ DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
71
+ end
72
+
73
+ # Delete an element if block returns true. Destructive.
74
+ def delete_if
75
+ return to_enum(:delete_if) unless block_given?
76
+
77
+ keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
78
+
79
+ @data = cast_vector_to @dtype, keep_e
80
+ @index = DaruLite::Index.new(keep_i)
81
+
82
+ update_position_cache
83
+
84
+ self
85
+ end
86
+
87
+ # Keep an element if block returns true. Destructive.
88
+ def keep_if
89
+ return to_enum(:keep_if) unless block_given?
90
+
91
+ delete_if { |val| !yield(val) }
92
+ end
93
+
94
+ # Return a vector with specified values removed
95
+ # @param values [Array] values to reject from resultant vector
96
+ # @return [DaruLite::Vector] vector with specified values removed
97
+ # @example
98
+ # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
99
+ # dv.reject_values nil, Float::NAN
100
+ # # => #<DaruLite::Vector(2)>
101
+ # # 0 1
102
+ # # 1 2
103
+ def reject_values(*values)
104
+ resultant_pos = size.times.to_a - positions(*values)
105
+ dv = at(*resultant_pos)
106
+ # Handle the case when number of positions is 1
107
+ # and hence #at doesn't return a vector
108
+ if dv.is_a?(DaruLite::Vector)
109
+ dv
110
+ else
111
+ pos = resultant_pos.first
112
+ at(pos..pos)
113
+ end
114
+ end
115
+
116
+ # Returns a Vector with only numerical data. Missing data is included
117
+ # but non-Numeric objects are excluded. Preserves index.
118
+ def only_numerics
119
+ numeric_indexes =
120
+ each_with_index
121
+ .select { |v, _i| v.is_a?(Numeric) || v.nil? }
122
+ .map(&:last)
123
+
124
+ self[*numeric_indexes]
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,77 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Indexable
4
+ # Get index of element
5
+ def index_of(element)
6
+ case dtype
7
+ when :array then @index.key(@data.index { |x| x.eql? element })
8
+ else @index.key @data.index(element)
9
+ end
10
+ end
11
+
12
+ def reset_index!
13
+ @index = DaruLite::Index.new(Array.new(size) { |i| i })
14
+ self
15
+ end
16
+
17
+ # Returns *true* if an index exists
18
+ def has_index?(index) # rubocop:disable Naming/PredicateName
19
+ @index.include? index
20
+ end
21
+
22
+ def detach_index
23
+ DaruLite::DataFrame.new(
24
+ index: @index.to_a,
25
+ values: @data.to_a
26
+ )
27
+ end
28
+
29
+ # Sets new index for vector. Preserves index->value correspondence.
30
+ # Sets nil for new index keys absent from original index.
31
+ # @note Unlike #reorder! which takes positions as input it takes
32
+ # index as an input to reorder the vector
33
+ # @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
34
+ # @return [DaruLite::Vector] vector reindexed with new index
35
+ def reindex!(new_index)
36
+ values = []
37
+ each_with_index do |val, i|
38
+ values[new_index[i]] = val if new_index.include?(i)
39
+ end
40
+ values.fill(nil, values.size, new_index.size - values.size)
41
+
42
+ @data = cast_vector_to @dtype, values
43
+ @index = new_index
44
+
45
+ update_position_cache
46
+
47
+ self
48
+ end
49
+
50
+ # Create a new vector with a different index, and preserve the indexing of
51
+ # current elements.
52
+ def reindex(new_index)
53
+ dup.reindex!(new_index)
54
+ end
55
+
56
+ def index=(idx)
57
+ idx = Index.coerce(idx)
58
+
59
+ raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
60
+ raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
61
+
62
+ @index = idx
63
+ end
64
+
65
+ # Return indexes of values specified
66
+ # @param values [Array] values to find indexes for
67
+ # @return [Array] array of indexes of values specified
68
+ # @example
69
+ # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
70
+ # dv.indexes nil, Float::NAN
71
+ # # => [13, 14]
72
+ def indexes(*values)
73
+ index.to_a.values_at(*positions(*values))
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,95 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Iterable
4
+ def each(&block)
5
+ return to_enum(:each) unless block
6
+
7
+ @data.each(&block)
8
+ self
9
+ end
10
+
11
+ def each_index(&block)
12
+ return to_enum(:each_index) unless block
13
+
14
+ @index.each(&block)
15
+ self
16
+ end
17
+
18
+ def each_with_index(&block)
19
+ return to_enum(:each_with_index) unless block
20
+
21
+ @data.to_a.zip(@index.to_a).each(&block)
22
+
23
+ self
24
+ end
25
+
26
+ def map!(&block)
27
+ return to_enum(:map!) unless block
28
+
29
+ @data.map!(&block)
30
+ self
31
+ end
32
+
33
+ # Like map, but returns a DaruLite::Vector with the returned values.
34
+ def recode(dt = nil, &block)
35
+ return to_enum(:recode, dt) unless block
36
+
37
+ dup.recode! dt, &block
38
+ end
39
+
40
+ # Destructive version of recode!
41
+ def recode!(dt = nil, &block)
42
+ return to_enum(:recode!, dt) unless block
43
+
44
+ @data.map!(&block).data
45
+ @data = cast_vector_to(dt || @dtype)
46
+ self
47
+ end
48
+
49
+ # Reports all values that doesn't comply with a condition.
50
+ # Returns a hash with the index of data and the invalid data.
51
+ def verify
52
+ (0...size)
53
+ .map { |i| [i, @data[i]] }
54
+ .reject { |_i, val| yield(val) }
55
+ .to_h
56
+ end
57
+
58
+ def apply_method(method, keys: nil, by_position: true)
59
+ vect = keys ? get_sub_vector(keys, by_position: by_position) : self
60
+
61
+ case method
62
+ when Symbol then vect.send(method)
63
+ when Proc then method.call(vect)
64
+ else raise
65
+ end
66
+ end
67
+ alias apply_method_on_sub_vector apply_method
68
+
69
+ # Replaces specified values with a new value
70
+ # @param [Array] old_values array of values to replace
71
+ # @param [object] new_value new value to replace with
72
+ # @note It performs the replace in place.
73
+ # @return [DaruLite::Vector] Same vector itself with values
74
+ # replaced with new value
75
+ # @example
76
+ # dv = DaruLite::Vector.new [1, 2, :a, :b]
77
+ # dv.replace_values [:a, :b], nil
78
+ # dv
79
+ # # =>
80
+ # # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
81
+ # # nil
82
+ # # 0 1
83
+ # # 1 2
84
+ # # 2 nil
85
+ # # 3 nil
86
+ def replace_values(old_values, new_value)
87
+ old_values = [old_values] unless old_values.is_a? Array
88
+ size.times do |pos|
89
+ set_at([pos], new_value) if include_with_nan? old_values, at(pos)
90
+ end
91
+ self
92
+ end
93
+ end
94
+ end
95
+ end