daru_lite 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +14 -1
  20. data/lib/daru_lite/index/multi_index.rb +9 -0
  21. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  22. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  23. data/lib/daru_lite/vector/calculatable.rb +78 -0
  24. data/lib/daru_lite/vector/convertible.rb +77 -0
  25. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  26. data/lib/daru_lite/vector/fetchable.rb +175 -0
  27. data/lib/daru_lite/vector/filterable.rb +128 -0
  28. data/lib/daru_lite/vector/indexable.rb +77 -0
  29. data/lib/daru_lite/vector/iterable.rb +95 -0
  30. data/lib/daru_lite/vector/joinable.rb +17 -0
  31. data/lib/daru_lite/vector/missable.rb +124 -0
  32. data/lib/daru_lite/vector/queryable.rb +45 -0
  33. data/lib/daru_lite/vector/setable.rb +47 -0
  34. data/lib/daru_lite/vector/sortable.rb +113 -0
  35. data/lib/daru_lite/vector.rb +36 -932
  36. data/lib/daru_lite/version.rb +1 -1
  37. data/spec/data_frame/aggregatable_example.rb +65 -0
  38. data/spec/data_frame/buildable_example.rb +109 -0
  39. data/spec/data_frame/calculatable_example.rb +135 -0
  40. data/spec/data_frame/convertible_example.rb +180 -0
  41. data/spec/data_frame/duplicatable_example.rb +111 -0
  42. data/spec/data_frame/fetchable_example.rb +476 -0
  43. data/spec/data_frame/filterable_example.rb +409 -0
  44. data/spec/data_frame/indexable_example.rb +221 -0
  45. data/spec/data_frame/iterable_example.rb +465 -0
  46. data/spec/data_frame/joinable_example.rb +106 -0
  47. data/spec/data_frame/missable_example.rb +47 -0
  48. data/spec/data_frame/pivotable_example.rb +297 -0
  49. data/spec/data_frame/queryable_example.rb +92 -0
  50. data/spec/data_frame/setable_example.rb +482 -0
  51. data/spec/data_frame/sortable_example.rb +350 -0
  52. data/spec/dataframe_spec.rb +181 -3289
  53. data/spec/index/categorical_index_spec.rb +27 -8
  54. data/spec/index/index_spec.rb +21 -0
  55. data/spec/index/multi_index_spec.rb +85 -76
  56. data/spec/vector/aggregatable_example.rb +27 -0
  57. data/spec/vector/calculatable_example.rb +82 -0
  58. data/spec/vector/convertible_example.rb +126 -0
  59. data/spec/vector/duplicatable_example.rb +48 -0
  60. data/spec/vector/fetchable_example.rb +463 -0
  61. data/spec/vector/filterable_example.rb +165 -0
  62. data/spec/vector/indexable_example.rb +201 -0
  63. data/spec/vector/iterable_example.rb +111 -0
  64. data/spec/vector/joinable_example.rb +25 -0
  65. data/spec/vector/missable_example.rb +88 -0
  66. data/spec/vector/queryable_example.rb +91 -0
  67. data/spec/vector/setable_example.rb +300 -0
  68. data/spec/vector/sortable_example.rb +242 -0
  69. data/spec/vector_spec.rb +111 -1805
  70. metadata +86 -2
@@ -0,0 +1,95 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Iterable
4
+ def each(&block)
5
+ return to_enum(:each) unless block
6
+
7
+ @data.each(&block)
8
+ self
9
+ end
10
+
11
+ def each_index(&block)
12
+ return to_enum(:each_index) unless block
13
+
14
+ @index.each(&block)
15
+ self
16
+ end
17
+
18
+ def each_with_index(&block)
19
+ return to_enum(:each_with_index) unless block
20
+
21
+ @data.to_a.zip(@index.to_a).each(&block)
22
+
23
+ self
24
+ end
25
+
26
+ def map!(&block)
27
+ return to_enum(:map!) unless block
28
+
29
+ @data.map!(&block)
30
+ self
31
+ end
32
+
33
+ # Like map, but returns a DaruLite::Vector with the returned values.
34
+ def recode(dt = nil, &block)
35
+ return to_enum(:recode, dt) unless block
36
+
37
+ dup.recode! dt, &block
38
+ end
39
+
40
+ # Destructive version of recode!
41
+ def recode!(dt = nil, &block)
42
+ return to_enum(:recode!, dt) unless block
43
+
44
+ @data.map!(&block).data
45
+ @data = cast_vector_to(dt || @dtype)
46
+ self
47
+ end
48
+
49
+ # Reports all values that doesn't comply with a condition.
50
+ # Returns a hash with the index of data and the invalid data.
51
+ def verify
52
+ (0...size)
53
+ .map { |i| [i, @data[i]] }
54
+ .reject { |_i, val| yield(val) }
55
+ .to_h
56
+ end
57
+
58
+ def apply_method(method, keys: nil, by_position: true)
59
+ vect = keys ? get_sub_vector(keys, by_position: by_position) : self
60
+
61
+ case method
62
+ when Symbol then vect.send(method)
63
+ when Proc then method.call(vect)
64
+ else raise
65
+ end
66
+ end
67
+ alias apply_method_on_sub_vector apply_method
68
+
69
+ # Replaces specified values with a new value
70
+ # @param [Array] old_values array of values to replace
71
+ # @param [object] new_value new value to replace with
72
+ # @note It performs the replace in place.
73
+ # @return [DaruLite::Vector] Same vector itself with values
74
+ # replaced with new value
75
+ # @example
76
+ # dv = DaruLite::Vector.new [1, 2, :a, :b]
77
+ # dv.replace_values [:a, :b], nil
78
+ # dv
79
+ # # =>
80
+ # # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
81
+ # # nil
82
+ # # 0 1
83
+ # # 1 2
84
+ # # 2 nil
85
+ # # 3 nil
86
+ def replace_values(old_values, new_value)
87
+ old_values = [old_values] unless old_values.is_a? Array
88
+ size.times do |pos|
89
+ set_at([pos], new_value) if include_with_nan? old_values, at(pos)
90
+ end
91
+ self
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,17 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Joinable
4
+ # Append an element to the vector by specifying the element and index
5
+ def concat(element, index)
6
+ raise IndexError, 'Expected new unique index' if @index.include? index
7
+
8
+ @index |= [index]
9
+ @data[@index[index]] = element
10
+
11
+ update_position_cache
12
+ end
13
+ alias push concat
14
+ alias << concat
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,124 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Missable
4
+ extend Gem::Deprecate
5
+
6
+ # Reports whether missing data is present in the Vector.
7
+ def has_missing_data? # rubocop:disable Naming/PredicateName
8
+ !indexes(*DaruLite::MISSING_VALUES).empty?
9
+ end
10
+ alias flawed? has_missing_data?
11
+ deprecate :has_missing_data?, :include_values?, 2016, 10
12
+ deprecate :flawed?, :include_values?, 2016, 10
13
+
14
+ # Replace all nils in the vector with the value passed as an argument. Destructive.
15
+ # See #replace_nils for non-destructive version
16
+ #
17
+ # == Arguments
18
+ #
19
+ # * +replacement+ - The value which should replace all nils
20
+ def replace_nils!(replacement)
21
+ indexes(*DaruLite::MISSING_VALUES).each do |idx|
22
+ self[idx] = replacement
23
+ end
24
+
25
+ self
26
+ end
27
+
28
+ # Rolling fillna
29
+ # replace all Float::NAN and NIL values with the preceeding or following value
30
+ #
31
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
32
+ #
33
+ # @example
34
+ # dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
35
+ #
36
+ # 2.3.3 :068 > dv.rolling_fillna(:forward)
37
+ # => #<DaruLite::Vector(9)>
38
+ # 0 1
39
+ # 1 2
40
+ # 2 1
41
+ # 3 4
42
+ # 4 4
43
+ # 5 4
44
+ # 6 3
45
+ # 7 3
46
+ # 8 3
47
+ #
48
+ def rolling_fillna!(direction = :forward)
49
+ enum = direction == :forward ? index : index.reverse_each
50
+ last_valid_value = 0
51
+ enum.each do |idx|
52
+ if valid_value?(self[idx])
53
+ last_valid_value = self[idx]
54
+ else
55
+ self[idx] = last_valid_value
56
+ end
57
+ end
58
+ self
59
+ end
60
+
61
+ # Non-destructive version of rolling_fillna!
62
+ def rolling_fillna(direction = :forward)
63
+ dup.rolling_fillna!(direction)
64
+ end
65
+
66
+ # Non-destructive version of #replace_nils!
67
+ def replace_nils(replacement)
68
+ dup.replace_nils!(replacement)
69
+ end
70
+
71
+ # number of non-missing elements
72
+ def n_valid
73
+ size - indexes(*DaruLite::MISSING_VALUES).size
74
+ end
75
+ deprecate :n_valid, :count_values, 2016, 10
76
+
77
+ # Creates a new vector consisting only of non-nil data
78
+ #
79
+ # == Arguments
80
+ #
81
+ # @param as_a [Symbol] Passing :array will return only the elements
82
+ # as an Array. Otherwise will return a DaruLite::Vector.
83
+ #
84
+ # @param _duplicate [Symbol] In case no missing data is found in the
85
+ # vector, setting this to false will return the same vector.
86
+ # Otherwise, a duplicate will be returned irrespective of
87
+ # presence of missing data.
88
+
89
+ def only_valid(as_a = :vector, _duplicate = true) # rubocop:disable Style/OptionalBooleanParameter
90
+ # FIXME: Now duplicate is just ignored.
91
+ # There are no spec that fail on this case, so I'll leave it
92
+ # this way for now - zverok, 2016-05-07
93
+
94
+ new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
95
+ new_vector = new_index.map { |idx| self[idx] }
96
+
97
+ if as_a == :vector
98
+ DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
99
+ else
100
+ new_vector
101
+ end
102
+ end
103
+ deprecate :only_valid, :reject_values, 2016, 10
104
+
105
+ # Returns a Vector containing only missing data (preserves indexes).
106
+ def only_missing(as_a = :vector)
107
+ case as_a
108
+ when :vector
109
+ self[*indexes(*DaruLite::MISSING_VALUES)]
110
+ when :array
111
+ self[*indexes(*DaruLite::MISSING_VALUES)].to_a
112
+ end
113
+ end
114
+ deprecate :only_missing, nil, 2016, 10
115
+
116
+ private
117
+
118
+ # Helper method returning validity of arbitrary value
119
+ def valid_value?(v)
120
+ !((v.respond_to?(:nan?) && v.nan?) || v.nil?)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,45 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Queryable
4
+ def empty?
5
+ @index.empty?
6
+ end
7
+
8
+ # Check if any one of mentioned values occur in the vector
9
+ # @param values [Array] values to check for
10
+ # @return [true, false] returns true if any one of specified values
11
+ # occur in the vector
12
+ # @example
13
+ # dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
14
+ # dv.include_values? nil, Float::NAN
15
+ # # => true
16
+ def include_values?(*values)
17
+ values.any? { |v| include_with_nan? @data, v }
18
+ end
19
+
20
+ def any?(&block)
21
+ @data.data.any?(&block)
22
+ end
23
+
24
+ def all?(&block)
25
+ @data.data.all?(&block)
26
+ end
27
+
28
+ # Returns an array of either none or integer values, indicating the
29
+ # +regexp+ matching with the given array.
30
+ #
31
+ # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
32
+ #
33
+ # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
34
+ #
35
+ # @example
36
+ # dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
37
+ # dv.match(/weeks/)
38
+ #
39
+ # # => [false, true, true]
40
+ def match(regexp)
41
+ @data.map { |value| !!(value =~ regexp) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,47 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Setable
4
+ # Change value at given positions
5
+ # @param positions [Array<object>] positional values
6
+ # @param [object] val value to assign
7
+ # @example
8
+ # dv = DaruLite::Vector.new 'a'..'e'
9
+ # dv.set_at [0, 1], 'x'
10
+ # dv
11
+ # # => #<DaruLite::Vector(5)>
12
+ # # 0 x
13
+ # # 1 x
14
+ # # 2 c
15
+ # # 3 d
16
+ # # 4 e
17
+ def set_at(positions, val)
18
+ validate_positions(*positions)
19
+ positions.map { |pos| @data[pos] = val }
20
+ update_position_cache
21
+ end
22
+
23
+ # Just like in Hashes, you can specify the index label of the DaruLite::Vector
24
+ # and assign an element an that place in the DaruLite::Vector.
25
+ #
26
+ # == Usage
27
+ #
28
+ # v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
29
+ # v[:a] = 999
30
+ # #=>
31
+ # ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
32
+ # # nil
33
+ # # a 999
34
+ # # b 2
35
+ # # c 3
36
+ def []=(*indexes, val)
37
+ cast(dtype: :array) if val.nil? && dtype != :array
38
+
39
+ guard_type_check(val)
40
+
41
+ modify_vector(indexes, val)
42
+
43
+ update_position_cache
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,113 @@
1
+ module DaruLite
2
+ class Vector
3
+ module Sortable
4
+ # Sorts a vector according to its values. If a block is specified, the contents
5
+ # will be evaluated and data will be swapped whenever the block evaluates
6
+ # to *true*. Defaults to ascending order sorting. Any missing values will be
7
+ # put at the end of the vector. Preserves indexing. Default sort algorithm is
8
+ # quick sort.
9
+ #
10
+ # == Options
11
+ #
12
+ # * +:ascending+ - if false, will sort in descending order. Defaults to true.
13
+ #
14
+ # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
15
+ # == Usage
16
+ #
17
+ # v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
18
+ # # Say you want to sort these strings by length.
19
+ # v.sort(ascending: false) { |a,b| a.length <=> b.length }
20
+ def sort(opts = {}, &block)
21
+ opts = { ascending: true }.merge(opts)
22
+
23
+ vector_index = resort_index(@data.each_with_index, opts, &block)
24
+ vector, index = vector_index.transpose
25
+
26
+ index = @index.reorder index
27
+
28
+ DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
29
+ end
30
+
31
+ # Sorts the vector according to it's`Index` values. Defaults to ascending
32
+ # order sorting.
33
+ #
34
+ # @param [Hash] opts the options for sort_by_index method.
35
+ # @option opts [Boolean] :ascending false, will sort `index` in
36
+ # descending order.
37
+ #
38
+ # @return [Vector] new sorted `Vector` according to the index values.
39
+ #
40
+ # @example
41
+ #
42
+ # dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
43
+ # # Say you want to sort index in ascending order
44
+ # dv.sort_by_index(ascending: true)
45
+ # #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
46
+ # # Say you want to sort index in descending order
47
+ # dv.sort_by_index(ascending: false)
48
+ # #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
49
+ def sort_by_index(opts = {})
50
+ opts = { ascending: true }.merge(opts)
51
+ _, new_order = resort_index(@index.each_with_index, opts).transpose
52
+
53
+ reorder new_order
54
+ end
55
+
56
+ DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
57
+ if lv.nil? && rv.nil?
58
+ li <=> ri
59
+ elsif lv.nil?
60
+ -1
61
+ elsif rv.nil?
62
+ 1
63
+ else
64
+ lv <=> rv
65
+ end
66
+ }
67
+
68
+ # Just sort the data and get an Array in return using Enumerable#sort.
69
+ # Non-destructive.
70
+ # :nocov:
71
+ def sorted_data(&block)
72
+ @data.to_a.sort(&block)
73
+ end
74
+ # :nocov:
75
+
76
+ # Reorder the vector with given positions
77
+ # @note Unlike #reindex! which takes index as input, it takes
78
+ # positions as an input to reorder the vector
79
+ # @param [Array] order the order to reorder the vector with
80
+ # @return reordered vector
81
+ # @example
82
+ # dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
83
+ # dv.reorder! [2, 1, 0]
84
+ # # => #<DaruLite::Vector(3)>
85
+ # # a 1
86
+ # # b 2
87
+ # # c 3
88
+ def reorder!(order)
89
+ @index = @index.reorder order
90
+ data_array = order.map { |i| @data[i] }
91
+ @data = cast_vector_to @dtype, data_array, @nm_dtype
92
+ update_position_cache
93
+ self
94
+ end
95
+
96
+ # Non-destructive version of #reorder!
97
+ def reorder(order)
98
+ dup.reorder! order
99
+ end
100
+
101
+ private
102
+
103
+ def resort_index(vector_index, opts)
104
+ if block_given?
105
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
106
+ else
107
+ vector_index.sort(&DEFAULT_SORTER)
108
+ end
109
+ .tap { |res| res.reverse! unless opts[:ascending] }
110
+ end
111
+ end
112
+ end
113
+ end