daru_lite 0.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- data/.github/workflows/ci.yml +20 -0
- data/.rubocop_todo.yml +35 -33
- data/README.md +19 -115
- data/daru_lite.gemspec +1 -0
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +142 -2355
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3243
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +102 -3
- data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,17 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Joinable
|
4
|
+
# Append an element to the vector by specifying the element and index
|
5
|
+
def concat(element, index)
|
6
|
+
raise IndexError, 'Expected new unique index' if @index.include? index
|
7
|
+
|
8
|
+
@index |= [index]
|
9
|
+
@data[@index[index]] = element
|
10
|
+
|
11
|
+
update_position_cache
|
12
|
+
end
|
13
|
+
alias push concat
|
14
|
+
alias << concat
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Missable
|
4
|
+
extend Gem::Deprecate
|
5
|
+
|
6
|
+
# Reports whether missing data is present in the Vector.
|
7
|
+
def has_missing_data? # rubocop:disable Naming/PredicateName
|
8
|
+
!indexes(*DaruLite::MISSING_VALUES).empty?
|
9
|
+
end
|
10
|
+
alias flawed? has_missing_data?
|
11
|
+
deprecate :has_missing_data?, :include_values?, 2016, 10
|
12
|
+
deprecate :flawed?, :include_values?, 2016, 10
|
13
|
+
|
14
|
+
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
15
|
+
# See #replace_nils for non-destructive version
|
16
|
+
#
|
17
|
+
# == Arguments
|
18
|
+
#
|
19
|
+
# * +replacement+ - The value which should replace all nils
|
20
|
+
def replace_nils!(replacement)
|
21
|
+
indexes(*DaruLite::MISSING_VALUES).each do |idx|
|
22
|
+
self[idx] = replacement
|
23
|
+
end
|
24
|
+
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
# Rolling fillna
|
29
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
30
|
+
#
|
31
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
35
|
+
#
|
36
|
+
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
37
|
+
# => #<DaruLite::Vector(9)>
|
38
|
+
# 0 1
|
39
|
+
# 1 2
|
40
|
+
# 2 1
|
41
|
+
# 3 4
|
42
|
+
# 4 4
|
43
|
+
# 5 4
|
44
|
+
# 6 3
|
45
|
+
# 7 3
|
46
|
+
# 8 3
|
47
|
+
#
|
48
|
+
def rolling_fillna!(direction = :forward)
|
49
|
+
enum = direction == :forward ? index : index.reverse_each
|
50
|
+
last_valid_value = 0
|
51
|
+
enum.each do |idx|
|
52
|
+
if valid_value?(self[idx])
|
53
|
+
last_valid_value = self[idx]
|
54
|
+
else
|
55
|
+
self[idx] = last_valid_value
|
56
|
+
end
|
57
|
+
end
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Non-destructive version of rolling_fillna!
|
62
|
+
def rolling_fillna(direction = :forward)
|
63
|
+
dup.rolling_fillna!(direction)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Non-destructive version of #replace_nils!
|
67
|
+
def replace_nils(replacement)
|
68
|
+
dup.replace_nils!(replacement)
|
69
|
+
end
|
70
|
+
|
71
|
+
# number of non-missing elements
|
72
|
+
def n_valid
|
73
|
+
size - indexes(*DaruLite::MISSING_VALUES).size
|
74
|
+
end
|
75
|
+
deprecate :n_valid, :count_values, 2016, 10
|
76
|
+
|
77
|
+
# Creates a new vector consisting only of non-nil data
|
78
|
+
#
|
79
|
+
# == Arguments
|
80
|
+
#
|
81
|
+
# @param as_a [Symbol] Passing :array will return only the elements
|
82
|
+
# as an Array. Otherwise will return a DaruLite::Vector.
|
83
|
+
#
|
84
|
+
# @param _duplicate [Symbol] In case no missing data is found in the
|
85
|
+
# vector, setting this to false will return the same vector.
|
86
|
+
# Otherwise, a duplicate will be returned irrespective of
|
87
|
+
# presence of missing data.
|
88
|
+
|
89
|
+
def only_valid(as_a = :vector, _duplicate = true) # rubocop:disable Style/OptionalBooleanParameter
|
90
|
+
# FIXME: Now duplicate is just ignored.
|
91
|
+
# There are no spec that fail on this case, so I'll leave it
|
92
|
+
# this way for now - zverok, 2016-05-07
|
93
|
+
|
94
|
+
new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
|
95
|
+
new_vector = new_index.map { |idx| self[idx] }
|
96
|
+
|
97
|
+
if as_a == :vector
|
98
|
+
DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
99
|
+
else
|
100
|
+
new_vector
|
101
|
+
end
|
102
|
+
end
|
103
|
+
deprecate :only_valid, :reject_values, 2016, 10
|
104
|
+
|
105
|
+
# Returns a Vector containing only missing data (preserves indexes).
|
106
|
+
def only_missing(as_a = :vector)
|
107
|
+
case as_a
|
108
|
+
when :vector
|
109
|
+
self[*indexes(*DaruLite::MISSING_VALUES)]
|
110
|
+
when :array
|
111
|
+
self[*indexes(*DaruLite::MISSING_VALUES)].to_a
|
112
|
+
end
|
113
|
+
end
|
114
|
+
deprecate :only_missing, nil, 2016, 10
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
# Helper method returning validity of arbitrary value
|
119
|
+
def valid_value?(v)
|
120
|
+
!((v.respond_to?(:nan?) && v.nan?) || v.nil?)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Queryable
|
4
|
+
def empty?
|
5
|
+
@index.empty?
|
6
|
+
end
|
7
|
+
|
8
|
+
# Check if any one of mentioned values occur in the vector
|
9
|
+
# @param values [Array] values to check for
|
10
|
+
# @return [true, false] returns true if any one of specified values
|
11
|
+
# occur in the vector
|
12
|
+
# @example
|
13
|
+
# dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
|
14
|
+
# dv.include_values? nil, Float::NAN
|
15
|
+
# # => true
|
16
|
+
def include_values?(*values)
|
17
|
+
values.any? { |v| include_with_nan? @data, v }
|
18
|
+
end
|
19
|
+
|
20
|
+
def any?(&block)
|
21
|
+
@data.data.any?(&block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def all?(&block)
|
25
|
+
@data.data.all?(&block)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns an array of either none or integer values, indicating the
|
29
|
+
# +regexp+ matching with the given array.
|
30
|
+
#
|
31
|
+
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
32
|
+
#
|
33
|
+
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
37
|
+
# dv.match(/weeks/)
|
38
|
+
#
|
39
|
+
# # => [false, true, true]
|
40
|
+
def match(regexp)
|
41
|
+
@data.map { |value| !!(value =~ regexp) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Setable
|
4
|
+
# Change value at given positions
|
5
|
+
# @param positions [Array<object>] positional values
|
6
|
+
# @param [object] val value to assign
|
7
|
+
# @example
|
8
|
+
# dv = DaruLite::Vector.new 'a'..'e'
|
9
|
+
# dv.set_at [0, 1], 'x'
|
10
|
+
# dv
|
11
|
+
# # => #<DaruLite::Vector(5)>
|
12
|
+
# # 0 x
|
13
|
+
# # 1 x
|
14
|
+
# # 2 c
|
15
|
+
# # 3 d
|
16
|
+
# # 4 e
|
17
|
+
def set_at(positions, val)
|
18
|
+
validate_positions(*positions)
|
19
|
+
positions.map { |pos| @data[pos] = val }
|
20
|
+
update_position_cache
|
21
|
+
end
|
22
|
+
|
23
|
+
# Just like in Hashes, you can specify the index label of the DaruLite::Vector
|
24
|
+
# and assign an element an that place in the DaruLite::Vector.
|
25
|
+
#
|
26
|
+
# == Usage
|
27
|
+
#
|
28
|
+
# v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
|
29
|
+
# v[:a] = 999
|
30
|
+
# #=>
|
31
|
+
# ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
|
32
|
+
# # nil
|
33
|
+
# # a 999
|
34
|
+
# # b 2
|
35
|
+
# # c 3
|
36
|
+
def []=(*indexes, val)
|
37
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
38
|
+
|
39
|
+
guard_type_check(val)
|
40
|
+
|
41
|
+
modify_vector(indexes, val)
|
42
|
+
|
43
|
+
update_position_cache
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Sortable
|
4
|
+
# Sorts a vector according to its values. If a block is specified, the contents
|
5
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
6
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
7
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
8
|
+
# quick sort.
|
9
|
+
#
|
10
|
+
# == Options
|
11
|
+
#
|
12
|
+
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
13
|
+
#
|
14
|
+
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
15
|
+
# == Usage
|
16
|
+
#
|
17
|
+
# v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
|
18
|
+
# # Say you want to sort these strings by length.
|
19
|
+
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
20
|
+
def sort(opts = {}, &block)
|
21
|
+
opts = { ascending: true }.merge(opts)
|
22
|
+
|
23
|
+
vector_index = resort_index(@data.each_with_index, opts, &block)
|
24
|
+
vector, index = vector_index.transpose
|
25
|
+
|
26
|
+
index = @index.reorder index
|
27
|
+
|
28
|
+
DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sorts the vector according to it's`Index` values. Defaults to ascending
|
32
|
+
# order sorting.
|
33
|
+
#
|
34
|
+
# @param [Hash] opts the options for sort_by_index method.
|
35
|
+
# @option opts [Boolean] :ascending false, will sort `index` in
|
36
|
+
# descending order.
|
37
|
+
#
|
38
|
+
# @return [Vector] new sorted `Vector` according to the index values.
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
#
|
42
|
+
# dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
|
43
|
+
# # Say you want to sort index in ascending order
|
44
|
+
# dv.sort_by_index(ascending: true)
|
45
|
+
# #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
|
46
|
+
# # Say you want to sort index in descending order
|
47
|
+
# dv.sort_by_index(ascending: false)
|
48
|
+
# #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
|
49
|
+
def sort_by_index(opts = {})
|
50
|
+
opts = { ascending: true }.merge(opts)
|
51
|
+
_, new_order = resort_index(@index.each_with_index, opts).transpose
|
52
|
+
|
53
|
+
reorder new_order
|
54
|
+
end
|
55
|
+
|
56
|
+
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
57
|
+
if lv.nil? && rv.nil?
|
58
|
+
li <=> ri
|
59
|
+
elsif lv.nil?
|
60
|
+
-1
|
61
|
+
elsif rv.nil?
|
62
|
+
1
|
63
|
+
else
|
64
|
+
lv <=> rv
|
65
|
+
end
|
66
|
+
}
|
67
|
+
|
68
|
+
# Just sort the data and get an Array in return using Enumerable#sort.
|
69
|
+
# Non-destructive.
|
70
|
+
# :nocov:
|
71
|
+
def sorted_data(&block)
|
72
|
+
@data.to_a.sort(&block)
|
73
|
+
end
|
74
|
+
# :nocov:
|
75
|
+
|
76
|
+
# Reorder the vector with given positions
|
77
|
+
# @note Unlike #reindex! which takes index as input, it takes
|
78
|
+
# positions as an input to reorder the vector
|
79
|
+
# @param [Array] order the order to reorder the vector with
|
80
|
+
# @return reordered vector
|
81
|
+
# @example
|
82
|
+
# dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
83
|
+
# dv.reorder! [2, 1, 0]
|
84
|
+
# # => #<DaruLite::Vector(3)>
|
85
|
+
# # a 1
|
86
|
+
# # b 2
|
87
|
+
# # c 3
|
88
|
+
def reorder!(order)
|
89
|
+
@index = @index.reorder order
|
90
|
+
data_array = order.map { |i| @data[i] }
|
91
|
+
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
92
|
+
update_position_cache
|
93
|
+
self
|
94
|
+
end
|
95
|
+
|
96
|
+
# Non-destructive version of #reorder!
|
97
|
+
def reorder(order)
|
98
|
+
dup.reorder! order
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def resort_index(vector_index, opts)
|
104
|
+
if block_given?
|
105
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
106
|
+
else
|
107
|
+
vector_index.sort(&DEFAULT_SORTER)
|
108
|
+
end
|
109
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|