daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -0,0 +1,17 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Joinable
|
4
|
+
# Append an element to the vector by specifying the element and index
|
5
|
+
def concat(element, index)
|
6
|
+
raise IndexError, 'Expected new unique index' if @index.include? index
|
7
|
+
|
8
|
+
@index |= [index]
|
9
|
+
@data[@index[index]] = element
|
10
|
+
|
11
|
+
update_position_cache
|
12
|
+
end
|
13
|
+
alias push concat
|
14
|
+
alias << concat
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Missable
|
4
|
+
extend Gem::Deprecate
|
5
|
+
|
6
|
+
# Reports whether missing data is present in the Vector.
|
7
|
+
def has_missing_data? # rubocop:disable Naming/PredicateName
|
8
|
+
!indexes(*DaruLite::MISSING_VALUES).empty?
|
9
|
+
end
|
10
|
+
alias flawed? has_missing_data?
|
11
|
+
deprecate :has_missing_data?, :include_values?, 2016, 10
|
12
|
+
deprecate :flawed?, :include_values?, 2016, 10
|
13
|
+
|
14
|
+
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
15
|
+
# See #replace_nils for non-destructive version
|
16
|
+
#
|
17
|
+
# == Arguments
|
18
|
+
#
|
19
|
+
# * +replacement+ - The value which should replace all nils
|
20
|
+
def replace_nils!(replacement)
|
21
|
+
indexes(*DaruLite::MISSING_VALUES).each do |idx|
|
22
|
+
self[idx] = replacement
|
23
|
+
end
|
24
|
+
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
# Rolling fillna
|
29
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
30
|
+
#
|
31
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
35
|
+
#
|
36
|
+
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
37
|
+
# => #<DaruLite::Vector(9)>
|
38
|
+
# 0 1
|
39
|
+
# 1 2
|
40
|
+
# 2 1
|
41
|
+
# 3 4
|
42
|
+
# 4 4
|
43
|
+
# 5 4
|
44
|
+
# 6 3
|
45
|
+
# 7 3
|
46
|
+
# 8 3
|
47
|
+
#
|
48
|
+
def rolling_fillna!(direction = :forward)
|
49
|
+
enum = direction == :forward ? index : index.reverse_each
|
50
|
+
last_valid_value = 0
|
51
|
+
enum.each do |idx|
|
52
|
+
if valid_value?(self[idx])
|
53
|
+
last_valid_value = self[idx]
|
54
|
+
else
|
55
|
+
self[idx] = last_valid_value
|
56
|
+
end
|
57
|
+
end
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Non-destructive version of rolling_fillna!
|
62
|
+
def rolling_fillna(direction = :forward)
|
63
|
+
dup.rolling_fillna!(direction)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Non-destructive version of #replace_nils!
|
67
|
+
def replace_nils(replacement)
|
68
|
+
dup.replace_nils!(replacement)
|
69
|
+
end
|
70
|
+
|
71
|
+
# number of non-missing elements
|
72
|
+
def n_valid
|
73
|
+
size - indexes(*DaruLite::MISSING_VALUES).size
|
74
|
+
end
|
75
|
+
deprecate :n_valid, :count_values, 2016, 10
|
76
|
+
|
77
|
+
# Creates a new vector consisting only of non-nil data
|
78
|
+
#
|
79
|
+
# == Arguments
|
80
|
+
#
|
81
|
+
# @param as_a [Symbol] Passing :array will return only the elements
|
82
|
+
# as an Array. Otherwise will return a DaruLite::Vector.
|
83
|
+
#
|
84
|
+
# @param _duplicate [Symbol] In case no missing data is found in the
|
85
|
+
# vector, setting this to false will return the same vector.
|
86
|
+
# Otherwise, a duplicate will be returned irrespective of
|
87
|
+
# presence of missing data.
|
88
|
+
|
89
|
+
def only_valid(as_a = :vector, _duplicate = true) # rubocop:disable Style/OptionalBooleanParameter
|
90
|
+
# FIXME: Now duplicate is just ignored.
|
91
|
+
# There are no spec that fail on this case, so I'll leave it
|
92
|
+
# this way for now - zverok, 2016-05-07
|
93
|
+
|
94
|
+
new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
|
95
|
+
new_vector = new_index.map { |idx| self[idx] }
|
96
|
+
|
97
|
+
if as_a == :vector
|
98
|
+
DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
99
|
+
else
|
100
|
+
new_vector
|
101
|
+
end
|
102
|
+
end
|
103
|
+
deprecate :only_valid, :reject_values, 2016, 10
|
104
|
+
|
105
|
+
# Returns a Vector containing only missing data (preserves indexes).
|
106
|
+
def only_missing(as_a = :vector)
|
107
|
+
case as_a
|
108
|
+
when :vector
|
109
|
+
self[*indexes(*DaruLite::MISSING_VALUES)]
|
110
|
+
when :array
|
111
|
+
self[*indexes(*DaruLite::MISSING_VALUES)].to_a
|
112
|
+
end
|
113
|
+
end
|
114
|
+
deprecate :only_missing, nil, 2016, 10
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
# Helper method returning validity of arbitrary value
|
119
|
+
def valid_value?(v)
|
120
|
+
!((v.respond_to?(:nan?) && v.nan?) || v.nil?)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Queryable
|
4
|
+
def empty?
|
5
|
+
@index.empty?
|
6
|
+
end
|
7
|
+
|
8
|
+
# Check if any one of mentioned values occur in the vector
|
9
|
+
# @param values [Array] values to check for
|
10
|
+
# @return [true, false] returns true if any one of specified values
|
11
|
+
# occur in the vector
|
12
|
+
# @example
|
13
|
+
# dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
|
14
|
+
# dv.include_values? nil, Float::NAN
|
15
|
+
# # => true
|
16
|
+
def include_values?(*values)
|
17
|
+
values.any? { |v| include_with_nan? @data, v }
|
18
|
+
end
|
19
|
+
|
20
|
+
def any?(&block)
|
21
|
+
@data.data.any?(&block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def all?(&block)
|
25
|
+
@data.data.all?(&block)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns an array of either none or integer values, indicating the
|
29
|
+
# +regexp+ matching with the given array.
|
30
|
+
#
|
31
|
+
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
32
|
+
#
|
33
|
+
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
37
|
+
# dv.match(/weeks/)
|
38
|
+
#
|
39
|
+
# # => [false, true, true]
|
40
|
+
def match(regexp)
|
41
|
+
@data.map { |value| !!(value =~ regexp) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Setable
|
4
|
+
# Change value at given positions
|
5
|
+
# @param positions [Array<object>] positional values
|
6
|
+
# @param [object] val value to assign
|
7
|
+
# @example
|
8
|
+
# dv = DaruLite::Vector.new 'a'..'e'
|
9
|
+
# dv.set_at [0, 1], 'x'
|
10
|
+
# dv
|
11
|
+
# # => #<DaruLite::Vector(5)>
|
12
|
+
# # 0 x
|
13
|
+
# # 1 x
|
14
|
+
# # 2 c
|
15
|
+
# # 3 d
|
16
|
+
# # 4 e
|
17
|
+
def set_at(positions, val)
|
18
|
+
validate_positions(*positions)
|
19
|
+
positions.map { |pos| @data[pos] = val }
|
20
|
+
update_position_cache
|
21
|
+
end
|
22
|
+
|
23
|
+
# Just like in Hashes, you can specify the index label of the DaruLite::Vector
|
24
|
+
# and assign an element an that place in the DaruLite::Vector.
|
25
|
+
#
|
26
|
+
# == Usage
|
27
|
+
#
|
28
|
+
# v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
|
29
|
+
# v[:a] = 999
|
30
|
+
# #=>
|
31
|
+
# ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
|
32
|
+
# # nil
|
33
|
+
# # a 999
|
34
|
+
# # b 2
|
35
|
+
# # c 3
|
36
|
+
def []=(*indexes, val)
|
37
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
38
|
+
|
39
|
+
guard_type_check(val)
|
40
|
+
|
41
|
+
modify_vector(indexes, val)
|
42
|
+
|
43
|
+
update_position_cache
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Sortable
|
4
|
+
# Sorts a vector according to its values. If a block is specified, the contents
|
5
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
6
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
7
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
8
|
+
# quick sort.
|
9
|
+
#
|
10
|
+
# == Options
|
11
|
+
#
|
12
|
+
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
13
|
+
#
|
14
|
+
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
15
|
+
# == Usage
|
16
|
+
#
|
17
|
+
# v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
|
18
|
+
# # Say you want to sort these strings by length.
|
19
|
+
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
20
|
+
def sort(opts = {}, &block)
|
21
|
+
opts = { ascending: true }.merge(opts)
|
22
|
+
|
23
|
+
vector_index = resort_index(@data.each_with_index, opts, &block)
|
24
|
+
vector, index = vector_index.transpose
|
25
|
+
|
26
|
+
index = @index.reorder index
|
27
|
+
|
28
|
+
DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sorts the vector according to it's`Index` values. Defaults to ascending
|
32
|
+
# order sorting.
|
33
|
+
#
|
34
|
+
# @param [Hash] opts the options for sort_by_index method.
|
35
|
+
# @option opts [Boolean] :ascending false, will sort `index` in
|
36
|
+
# descending order.
|
37
|
+
#
|
38
|
+
# @return [Vector] new sorted `Vector` according to the index values.
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
#
|
42
|
+
# dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
|
43
|
+
# # Say you want to sort index in ascending order
|
44
|
+
# dv.sort_by_index(ascending: true)
|
45
|
+
# #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
|
46
|
+
# # Say you want to sort index in descending order
|
47
|
+
# dv.sort_by_index(ascending: false)
|
48
|
+
# #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
|
49
|
+
def sort_by_index(opts = {})
|
50
|
+
opts = { ascending: true }.merge(opts)
|
51
|
+
_, new_order = resort_index(@index.each_with_index, opts).transpose
|
52
|
+
|
53
|
+
reorder new_order
|
54
|
+
end
|
55
|
+
|
56
|
+
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
57
|
+
if lv.nil? && rv.nil?
|
58
|
+
li <=> ri
|
59
|
+
elsif lv.nil?
|
60
|
+
-1
|
61
|
+
elsif rv.nil?
|
62
|
+
1
|
63
|
+
else
|
64
|
+
lv <=> rv
|
65
|
+
end
|
66
|
+
}
|
67
|
+
|
68
|
+
# Just sort the data and get an Array in return using Enumerable#sort.
|
69
|
+
# Non-destructive.
|
70
|
+
# :nocov:
|
71
|
+
def sorted_data(&block)
|
72
|
+
@data.to_a.sort(&block)
|
73
|
+
end
|
74
|
+
# :nocov:
|
75
|
+
|
76
|
+
# Reorder the vector with given positions
|
77
|
+
# @note Unlike #reindex! which takes index as input, it takes
|
78
|
+
# positions as an input to reorder the vector
|
79
|
+
# @param [Array] order the order to reorder the vector with
|
80
|
+
# @return reordered vector
|
81
|
+
# @example
|
82
|
+
# dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
83
|
+
# dv.reorder! [2, 1, 0]
|
84
|
+
# # => #<DaruLite::Vector(3)>
|
85
|
+
# # a 1
|
86
|
+
# # b 2
|
87
|
+
# # c 3
|
88
|
+
def reorder!(order)
|
89
|
+
@index = @index.reorder order
|
90
|
+
data_array = order.map { |i| @data[i] }
|
91
|
+
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
92
|
+
update_position_cache
|
93
|
+
self
|
94
|
+
end
|
95
|
+
|
96
|
+
# Non-destructive version of #reorder!
|
97
|
+
def reorder(order)
|
98
|
+
dup.reorder! order
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def resort_index(vector_index, opts)
|
104
|
+
if block_given?
|
105
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
106
|
+
else
|
107
|
+
vector_index.sort(&DEFAULT_SORTER)
|
108
|
+
end
|
109
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|