daru_lite 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +14 -1
  20. data/lib/daru_lite/index/multi_index.rb +9 -0
  21. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  22. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  23. data/lib/daru_lite/vector/calculatable.rb +78 -0
  24. data/lib/daru_lite/vector/convertible.rb +77 -0
  25. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  26. data/lib/daru_lite/vector/fetchable.rb +175 -0
  27. data/lib/daru_lite/vector/filterable.rb +128 -0
  28. data/lib/daru_lite/vector/indexable.rb +77 -0
  29. data/lib/daru_lite/vector/iterable.rb +95 -0
  30. data/lib/daru_lite/vector/joinable.rb +17 -0
  31. data/lib/daru_lite/vector/missable.rb +124 -0
  32. data/lib/daru_lite/vector/queryable.rb +45 -0
  33. data/lib/daru_lite/vector/setable.rb +47 -0
  34. data/lib/daru_lite/vector/sortable.rb +113 -0
  35. data/lib/daru_lite/vector.rb +36 -932
  36. data/lib/daru_lite/version.rb +1 -1
  37. data/spec/data_frame/aggregatable_example.rb +65 -0
  38. data/spec/data_frame/buildable_example.rb +109 -0
  39. data/spec/data_frame/calculatable_example.rb +135 -0
  40. data/spec/data_frame/convertible_example.rb +180 -0
  41. data/spec/data_frame/duplicatable_example.rb +111 -0
  42. data/spec/data_frame/fetchable_example.rb +476 -0
  43. data/spec/data_frame/filterable_example.rb +409 -0
  44. data/spec/data_frame/indexable_example.rb +221 -0
  45. data/spec/data_frame/iterable_example.rb +465 -0
  46. data/spec/data_frame/joinable_example.rb +106 -0
  47. data/spec/data_frame/missable_example.rb +47 -0
  48. data/spec/data_frame/pivotable_example.rb +297 -0
  49. data/spec/data_frame/queryable_example.rb +92 -0
  50. data/spec/data_frame/setable_example.rb +482 -0
  51. data/spec/data_frame/sortable_example.rb +350 -0
  52. data/spec/dataframe_spec.rb +181 -3289
  53. data/spec/index/categorical_index_spec.rb +27 -8
  54. data/spec/index/index_spec.rb +21 -0
  55. data/spec/index/multi_index_spec.rb +85 -76
  56. data/spec/vector/aggregatable_example.rb +27 -0
  57. data/spec/vector/calculatable_example.rb +82 -0
  58. data/spec/vector/convertible_example.rb +126 -0
  59. data/spec/vector/duplicatable_example.rb +48 -0
  60. data/spec/vector/fetchable_example.rb +463 -0
  61. data/spec/vector/filterable_example.rb +165 -0
  62. data/spec/vector/indexable_example.rb +201 -0
  63. data/spec/vector/iterable_example.rb +111 -0
  64. data/spec/vector/joinable_example.rb +25 -0
  65. data/spec/vector/missable_example.rb +88 -0
  66. data/spec/vector/queryable_example.rb +91 -0
  67. data/spec/vector/setable_example.rb +300 -0
  68. data/spec/vector/sortable_example.rb +242 -0
  69. data/spec/vector_spec.rb +111 -1805
  70. metadata +86 -2
@@ -0,0 +1,95 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Iterable
4
+ def each(&block)
5
+ return to_enum(:each) unless block
6
+
7
+ @data.each(&block)
8
+ self
9
+ end
10
+
11
+ def each_index(&block)
12
+ return to_enum(:each_index) unless block
13
+
14
+ @index.each(&block)
15
+ self
16
+ end
17
+
18
+ def each_with_index(&block)
19
+ return to_enum(:each_with_index) unless block
20
+
21
+ @data.to_a.zip(@index.to_a).each(&block)
22
+
23
+ self
24
+ end
25
+
26
+ def map!(&block)
27
+ return to_enum(:map!) unless block
28
+
29
+ @data.map!(&block)
30
+ self
31
+ end
32
+
33
+ # Like map, but returns a DaruLite::Vector with the returned values.
34
+ def recode(dt = nil, &block)
35
+ return to_enum(:recode, dt) unless block
36
+
37
+ dup.recode! dt, &block
38
+ end
39
+
40
+ # Destructive version of recode!
41
+ def recode!(dt = nil, &block)
42
+ return to_enum(:recode!, dt) unless block
43
+
44
+ @data.map!(&block).data
45
+ @data = cast_vector_to(dt || @dtype)
46
+ self
47
+ end
48
+
49
+ # Reports all values that doesn't comply with a condition.
50
+ # Returns a hash with the index of data and the invalid data.
51
+ def verify
52
+ (0...size)
53
+ .map { |i| [i, @data[i]] }
54
+ .reject { |_i, val| yield(val) }
55
+ .to_h
56
+ end
57
+
58
+ def apply_method(method, keys: nil, by_position: true)
59
+ vect = keys ? get_sub_vector(keys, by_position: by_position) : self
60
+
61
+ case method
62
+ when Symbol then vect.send(method)
63
+ when Proc then method.call(vect)
64
+ else raise
65
+ end
66
+ end
67
+ alias apply_method_on_sub_vector apply_method
68
+
69
+ # Replaces specified values with a new value
70
+ # @param [Array] old_values array of values to replace
71
+ # @param [object] new_value new value to replace with
72
+ # @note It performs the replace in place.
73
+ # @return [DaruLite::Vector] Same vector itself with values
74
+ # replaced with new value
75
+ # @example
76
+ # dv = DaruLite::Vector.new [1, 2, :a, :b]
77
+ # dv.replace_values [:a, :b], nil
78
+ # dv
79
+ # # =>
80
+ # # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
81
+ # # nil
82
+ # # 0 1
83
+ # # 1 2
84
+ # # 2 nil
85
+ # # 3 nil
86
+ def replace_values(old_values, new_value)
87
+ old_values = [old_values] unless old_values.is_a? Array
88
+ size.times do |pos|
89
+ set_at([pos], new_value) if include_with_nan? old_values, at(pos)
90
+ end
91
+ self
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,17 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Joinable
4
+ # Append an element to the vector by specifying the element and index
5
+ def concat(element, index)
6
+ raise IndexError, 'Expected new unique index' if @index.include? index
7
+
8
+ @index |= [index]
9
+ @data[@index[index]] = element
10
+
11
+ update_position_cache
12
+ end
13
+ alias push concat
14
+ alias << concat
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,124 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Missable
4
+ extend Gem::Deprecate
5
+
6
+ # Reports whether missing data is present in the Vector.
7
+ def has_missing_data? # rubocop:disable Naming/PredicateName
8
+ !indexes(*DaruLite::MISSING_VALUES).empty?
9
+ end
10
+ alias flawed? has_missing_data?
11
+ deprecate :has_missing_data?, :include_values?, 2016, 10
12
+ deprecate :flawed?, :include_values?, 2016, 10
13
+
14
+ # Replace all nils in the vector with the value passed as an argument. Destructive.
15
+ # See #replace_nils for non-destructive version
16
+ #
17
+ # == Arguments
18
+ #
19
+ # * +replacement+ - The value which should replace all nils
20
+ def replace_nils!(replacement)
21
+ indexes(*DaruLite::MISSING_VALUES).each do |idx|
22
+ self[idx] = replacement
23
+ end
24
+
25
+ self
26
+ end
27
+
28
+ # Rolling fillna
29
+ # replace all Float::NAN and NIL values with the preceeding or following value
30
+ #
31
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
32
+ #
33
+ # @example
34
+ # dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
35
+ #
36
+ # 2.3.3 :068 > dv.rolling_fillna(:forward)
37
+ # => #<DaruLite::Vector(9)>
38
+ # 0 1
39
+ # 1 2
40
+ # 2 1
41
+ # 3 4
42
+ # 4 4
43
+ # 5 4
44
+ # 6 3
45
+ # 7 3
46
+ # 8 3
47
+ #
48
+ def rolling_fillna!(direction = :forward)
49
+ enum = direction == :forward ? index : index.reverse_each
50
+ last_valid_value = 0
51
+ enum.each do |idx|
52
+ if valid_value?(self[idx])
53
+ last_valid_value = self[idx]
54
+ else
55
+ self[idx] = last_valid_value
56
+ end
57
+ end
58
+ self
59
+ end
60
+
61
+ # Non-destructive version of rolling_fillna!
62
+ def rolling_fillna(direction = :forward)
63
+ dup.rolling_fillna!(direction)
64
+ end
65
+
66
+ # Non-destructive version of #replace_nils!
67
+ def replace_nils(replacement)
68
+ dup.replace_nils!(replacement)
69
+ end
70
+
71
+ # number of non-missing elements
72
+ def n_valid
73
+ size - indexes(*DaruLite::MISSING_VALUES).size
74
+ end
75
+ deprecate :n_valid, :count_values, 2016, 10
76
+
77
+ # Creates a new vector consisting only of non-nil data
78
+ #
79
+ # == Arguments
80
+ #
81
+ # @param as_a [Symbol] Passing :array will return only the elements
82
+ # as an Array. Otherwise will return a DaruLite::Vector.
83
+ #
84
+ # @param _duplicate [Symbol] In case no missing data is found in the
85
+ # vector, setting this to false will return the same vector.
86
+ # Otherwise, a duplicate will be returned irrespective of
87
+ # presence of missing data.
88
+
89
+ def only_valid(as_a = :vector, _duplicate = true) # rubocop:disable Style/OptionalBooleanParameter
90
+ # FIXME: Now duplicate is just ignored.
91
+ # There are no spec that fail on this case, so I'll leave it
92
+ # this way for now - zverok, 2016-05-07
93
+
94
+ new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
95
+ new_vector = new_index.map { |idx| self[idx] }
96
+
97
+ if as_a == :vector
98
+ DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
99
+ else
100
+ new_vector
101
+ end
102
+ end
103
+ deprecate :only_valid, :reject_values, 2016, 10
104
+
105
+ # Returns a Vector containing only missing data (preserves indexes).
106
+ def only_missing(as_a = :vector)
107
+ case as_a
108
+ when :vector
109
+ self[*indexes(*DaruLite::MISSING_VALUES)]
110
+ when :array
111
+ self[*indexes(*DaruLite::MISSING_VALUES)].to_a
112
+ end
113
+ end
114
+ deprecate :only_missing, nil, 2016, 10
115
+
116
+ private
117
+
118
+ # Helper method returning validity of arbitrary value
119
+ def valid_value?(v)
120
+ !((v.respond_to?(:nan?) && v.nan?) || v.nil?)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,45 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Queryable
4
+ def empty?
5
+ @index.empty?
6
+ end
7
+
8
+ # Check if any one of mentioned values occur in the vector
9
+ # @param values [Array] values to check for
10
+ # @return [true, false] returns true if any one of specified values
11
+ # occur in the vector
12
+ # @example
13
+ # dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
14
+ # dv.include_values? nil, Float::NAN
15
+ # # => true
16
+ def include_values?(*values)
17
+ values.any? { |v| include_with_nan? @data, v }
18
+ end
19
+
20
+ def any?(&block)
21
+ @data.data.any?(&block)
22
+ end
23
+
24
+ def all?(&block)
25
+ @data.data.all?(&block)
26
+ end
27
+
28
+ # Returns an array of either none or integer values, indicating the
29
+ # +regexp+ matching with the given array.
30
+ #
31
+ # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
32
+ #
33
+ # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
34
+ #
35
+ # @example
36
+ # dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
37
+ # dv.match(/weeks/)
38
+ #
39
+ # # => [false, true, true]
40
+ def match(regexp)
41
+ @data.map { |value| !!(value =~ regexp) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,47 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Setable
4
+ # Change value at given positions
5
+ # @param positions [Array<object>] positional values
6
+ # @param [object] val value to assign
7
+ # @example
8
+ # dv = DaruLite::Vector.new 'a'..'e'
9
+ # dv.set_at [0, 1], 'x'
10
+ # dv
11
+ # # => #<DaruLite::Vector(5)>
12
+ # # 0 x
13
+ # # 1 x
14
+ # # 2 c
15
+ # # 3 d
16
+ # # 4 e
17
+ def set_at(positions, val)
18
+ validate_positions(*positions)
19
+ positions.map { |pos| @data[pos] = val }
20
+ update_position_cache
21
+ end
22
+
23
+ # Just like in Hashes, you can specify the index label of the DaruLite::Vector
24
+ # and assign an element an that place in the DaruLite::Vector.
25
+ #
26
+ # == Usage
27
+ #
28
+ # v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
29
+ # v[:a] = 999
30
+ # #=>
31
+ # ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
32
+ # # nil
33
+ # # a 999
34
+ # # b 2
35
+ # # c 3
36
+ def []=(*indexes, val)
37
+ cast(dtype: :array) if val.nil? && dtype != :array
38
+
39
+ guard_type_check(val)
40
+
41
+ modify_vector(indexes, val)
42
+
43
+ update_position_cache
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,113 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Sortable
4
+ # Sorts a vector according to its values. If a block is specified, the contents
5
+ # will be evaluated and data will be swapped whenever the block evaluates
6
+ # to *true*. Defaults to ascending order sorting. Any missing values will be
7
+ # put at the end of the vector. Preserves indexing. Default sort algorithm is
8
+ # quick sort.
9
+ #
10
+ # == Options
11
+ #
12
+ # * +:ascending+ - if false, will sort in descending order. Defaults to true.
13
+ #
14
+ # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
15
+ # == Usage
16
+ #
17
+ # v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
18
+ # # Say you want to sort these strings by length.
19
+ # v.sort(ascending: false) { |a,b| a.length <=> b.length }
20
+ def sort(opts = {}, &block)
21
+ opts = { ascending: true }.merge(opts)
22
+
23
+ vector_index = resort_index(@data.each_with_index, opts, &block)
24
+ vector, index = vector_index.transpose
25
+
26
+ index = @index.reorder index
27
+
28
+ DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
29
+ end
30
+
31
+ # Sorts the vector according to it's`Index` values. Defaults to ascending
32
+ # order sorting.
33
+ #
34
+ # @param [Hash] opts the options for sort_by_index method.
35
+ # @option opts [Boolean] :ascending false, will sort `index` in
36
+ # descending order.
37
+ #
38
+ # @return [Vector] new sorted `Vector` according to the index values.
39
+ #
40
+ # @example
41
+ #
42
+ # dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
43
+ # # Say you want to sort index in ascending order
44
+ # dv.sort_by_index(ascending: true)
45
+ # #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
46
+ # # Say you want to sort index in descending order
47
+ # dv.sort_by_index(ascending: false)
48
+ # #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
49
+ def sort_by_index(opts = {})
50
+ opts = { ascending: true }.merge(opts)
51
+ _, new_order = resort_index(@index.each_with_index, opts).transpose
52
+
53
+ reorder new_order
54
+ end
55
+
56
+ DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
57
+ if lv.nil? && rv.nil?
58
+ li <=> ri
59
+ elsif lv.nil?
60
+ -1
61
+ elsif rv.nil?
62
+ 1
63
+ else
64
+ lv <=> rv
65
+ end
66
+ }
67
+
68
+ # Just sort the data and get an Array in return using Enumerable#sort.
69
+ # Non-destructive.
70
+ # :nocov:
71
+ def sorted_data(&block)
72
+ @data.to_a.sort(&block)
73
+ end
74
+ # :nocov:
75
+
76
+ # Reorder the vector with given positions
77
+ # @note Unlike #reindex! which takes index as input, it takes
78
+ # positions as an input to reorder the vector
79
+ # @param [Array] order the order to reorder the vector with
80
+ # @return reordered vector
81
+ # @example
82
+ # dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
83
+ # dv.reorder! [2, 1, 0]
84
+ # # => #<DaruLite::Vector(3)>
85
+ # # a 1
86
+ # # b 2
87
+ # # c 3
88
+ def reorder!(order)
89
+ @index = @index.reorder order
90
+ data_array = order.map { |i| @data[i] }
91
+ @data = cast_vector_to @dtype, data_array, @nm_dtype
92
+ update_position_cache
93
+ self
94
+ end
95
+
96
+ # Non-destructive version of #reorder!
97
+ def reorder(order)
98
+ dup.reorder! order
99
+ end
100
+
101
+ private
102
+
103
+ def resort_index(vector_index, opts)
104
+ if block_given?
105
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
106
+ else
107
+ vector_index.sort(&DEFAULT_SORTER)
108
+ end
109
+ .tap { |res| res.reverse! unless opts[:ascending] }
110
+ end
111
+ end
112
+ end
113
+ end