daru_lite 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -246,6 +246,19 @@ module DaruLite
|
|
246
246
|
DaruLite::Index.new(to_a + indexes)
|
247
247
|
end
|
248
248
|
|
249
|
+
# Takes a positional value and returns a new Index without the element at given position
|
250
|
+
# @param position [Integer] positional value
|
251
|
+
# @return [object] index object
|
252
|
+
# @example
|
253
|
+
# idx = DaruLite::Index.new [:a, :b, :c]
|
254
|
+
# idx.delete_at(0)
|
255
|
+
# # => #<DaruLite::Index(2): {b, c}>
|
256
|
+
def delete_at(position)
|
257
|
+
indexes = to_a
|
258
|
+
indexes.delete_at(position)
|
259
|
+
self.class.new(indexes)
|
260
|
+
end
|
261
|
+
|
249
262
|
def _dump(*)
|
250
263
|
Marshal.dump(relation_hash: @relation_hash)
|
251
264
|
end
|
@@ -842,7 +842,7 @@ module DaruLite
|
|
842
842
|
def emsd(n = 10, wilder = false)
|
843
843
|
result = []
|
844
844
|
emv_return = emv(n, wilder)
|
845
|
-
emv_return.each do |d|
|
845
|
+
emv_return.each do |d| # rubocop:disable Style/MapIntoArray
|
846
846
|
result << (d.nil? ? nil : Math.sqrt(d))
|
847
847
|
end
|
848
848
|
DaruLite::Vector.new(result, index: @index, name: @name)
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Calculatable
|
4
|
+
# Count the number of values specified
|
5
|
+
# @param values [Array] values to count for
|
6
|
+
# @return [Integer] the number of times the values mentioned occurs
|
7
|
+
# @example
|
8
|
+
# dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
9
|
+
# dv.count_values nil
|
10
|
+
# # => 2
|
11
|
+
def count_values(*values)
|
12
|
+
positions(*values).size
|
13
|
+
end
|
14
|
+
|
15
|
+
# Create a summary of the Vector
|
16
|
+
# @param indent_level [Fixnum] indent level
|
17
|
+
# @return [String] String containing the summary of the Vector
|
18
|
+
# @example
|
19
|
+
# dv = DaruLite::Vector.new [1, 2, 3]
|
20
|
+
# puts dv.summary
|
21
|
+
#
|
22
|
+
# # =
|
23
|
+
# # n :3
|
24
|
+
# # non-missing:3
|
25
|
+
# # median: 2
|
26
|
+
# # mean: 2.0000
|
27
|
+
# # std.dev.: 1.0000
|
28
|
+
# # std.err.: 0.5774
|
29
|
+
# # skew: 0.0000
|
30
|
+
# # kurtosis: -2.3333
|
31
|
+
def summary(indent_level = 0)
|
32
|
+
non_missing = size - count_values(*DaruLite::MISSING_VALUES)
|
33
|
+
summary = (' =' * indent_level) + "= #{name}" \
|
34
|
+
"\n n :#{size}" \
|
35
|
+
"\n non-missing:#{non_missing}"
|
36
|
+
case type
|
37
|
+
when :object
|
38
|
+
summary << object_summary
|
39
|
+
when :numeric
|
40
|
+
summary << numeric_summary
|
41
|
+
end
|
42
|
+
summary.split("\n").join("\n#{' ' * indent_level}")
|
43
|
+
end
|
44
|
+
|
45
|
+
# Displays summary for an object type Vector
|
46
|
+
# @return [String] String containing object vector summary
|
47
|
+
def object_summary
|
48
|
+
nval = count_values(*DaruLite::MISSING_VALUES)
|
49
|
+
summary = "\n factors: #{factors.to_a.join(',')}" \
|
50
|
+
"\n mode: #{mode.to_a.join(',')}" \
|
51
|
+
"\n Distribution\n"
|
52
|
+
|
53
|
+
data = frequencies.sort.each_with_index.map do |v, k|
|
54
|
+
[k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
|
55
|
+
end
|
56
|
+
|
57
|
+
summary + Formatters::Table.format(data)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Displays summary for an numeric type Vector
|
61
|
+
# @return [String] String containing numeric vector summary
|
62
|
+
def numeric_summary
|
63
|
+
summary = "\n median: #{median}" +
|
64
|
+
format("\n mean: %0.4f", mean)
|
65
|
+
if sd
|
66
|
+
summary << (format("\n std.dev.: %0.4f", sd) +
|
67
|
+
format("\n std.err.: %0.4f", se))
|
68
|
+
end
|
69
|
+
|
70
|
+
if count_values(*DaruLite::MISSING_VALUES).zero?
|
71
|
+
summary << (format("\n skew: %0.4f", skew) +
|
72
|
+
format("\n kurtosis: %0.4f", kurtosis))
|
73
|
+
end
|
74
|
+
summary
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Convertible
|
4
|
+
# @return [DaruLite::DataFrame] the vector as a single-vector dataframe
|
5
|
+
def to_df
|
6
|
+
DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
10
|
+
#
|
11
|
+
# == Arguments
|
12
|
+
#
|
13
|
+
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
14
|
+
def to_matrix(axis = :horizontal)
|
15
|
+
case axis
|
16
|
+
when :horizontal
|
17
|
+
Matrix[to_a]
|
18
|
+
when :vertical
|
19
|
+
Matrix.columns([to_a])
|
20
|
+
else
|
21
|
+
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
26
|
+
def to_h
|
27
|
+
@index.to_h { |index| [index, self[index]] }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return an array
|
31
|
+
def to_a
|
32
|
+
@data.to_a
|
33
|
+
end
|
34
|
+
|
35
|
+
# Convert the hash from to_h to json
|
36
|
+
def to_json(*)
|
37
|
+
to_h.to_json
|
38
|
+
end
|
39
|
+
|
40
|
+
# Convert to html for iruby
|
41
|
+
def to_html(threshold = 30)
|
42
|
+
table_thead = to_html_thead
|
43
|
+
table_tbody = to_html_tbody(threshold)
|
44
|
+
path = if index.is_a?(MultiIndex)
|
45
|
+
File.expand_path('../iruby/templates/vector_mi.html.erb', __dir__)
|
46
|
+
else
|
47
|
+
File.expand_path('../iruby/templates/vector.html.erb', __dir__)
|
48
|
+
end
|
49
|
+
ERB.new(File.read(path).strip).result(binding)
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_html_thead
|
53
|
+
table_thead_path =
|
54
|
+
if index.is_a?(MultiIndex)
|
55
|
+
File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __dir__)
|
56
|
+
else
|
57
|
+
File.expand_path('../iruby/templates/vector_thead.html.erb', __dir__)
|
58
|
+
end
|
59
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_html_tbody(threshold = 30)
|
63
|
+
table_tbody_path =
|
64
|
+
if index.is_a?(MultiIndex)
|
65
|
+
File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __dir__)
|
66
|
+
else
|
67
|
+
File.expand_path('../iruby/templates/vector_tbody.html.erb', __dir__)
|
68
|
+
end
|
69
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_s
|
73
|
+
"#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Duplicatable
|
4
|
+
# Duplicated a vector
|
5
|
+
# @return [DaruLite::Vector] duplicated vector
|
6
|
+
def dup
|
7
|
+
DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
|
8
|
+
end
|
9
|
+
|
10
|
+
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
11
|
+
# all values with nils.
|
12
|
+
def clone_structure
|
13
|
+
DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Fetchable
|
4
|
+
# Get one or more elements with specified index or a range.
|
5
|
+
#
|
6
|
+
# == Usage
|
7
|
+
# # For vectors employing single layer Index
|
8
|
+
#
|
9
|
+
# v[:one, :two] # => DaruLite::Vector with indexes :one and :two
|
10
|
+
# v[:one] # => Single element
|
11
|
+
# v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
|
12
|
+
#
|
13
|
+
# # For vectors employing hierarchial multi index
|
14
|
+
#
|
15
|
+
def [](*input_indexes)
|
16
|
+
# Get array of positions indexes
|
17
|
+
positions = @index.pos(*input_indexes)
|
18
|
+
|
19
|
+
# If one object is asked return it
|
20
|
+
return @data[positions] if positions.is_a? Numeric
|
21
|
+
|
22
|
+
# Form a new Vector using positional indexes
|
23
|
+
DaruLite::Vector.new(
|
24
|
+
positions.map { |loc| @data[loc] },
|
25
|
+
name: @name,
|
26
|
+
index: @index.subset(*input_indexes), dtype: @dtype
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns vector of values given positional values
|
31
|
+
# @param positions [Array<object>] positional values
|
32
|
+
# @return [object] vector
|
33
|
+
# @example
|
34
|
+
# dv = DaruLite::Vector.new 'a'..'e'
|
35
|
+
# dv.at 0, 1, 2
|
36
|
+
# # => #<DaruLite::Vector(3)>
|
37
|
+
# # 0 a
|
38
|
+
# # 1 b
|
39
|
+
# # 2 c
|
40
|
+
def at(*positions)
|
41
|
+
# to be used to form index
|
42
|
+
original_positions = positions
|
43
|
+
positions = coerce_positions(*positions)
|
44
|
+
validate_positions(*positions)
|
45
|
+
|
46
|
+
if positions.is_a? Integer
|
47
|
+
@data[positions]
|
48
|
+
else
|
49
|
+
values = positions.map { |pos| @data[pos] }
|
50
|
+
DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def head(q = 10)
|
55
|
+
self[0..(q - 1)]
|
56
|
+
end
|
57
|
+
|
58
|
+
def tail(q = 10)
|
59
|
+
start = [size - q, 0].max
|
60
|
+
self[start..(size - 1)]
|
61
|
+
end
|
62
|
+
|
63
|
+
def last(q = 1)
|
64
|
+
# The Enumerable mixin dose not provide the last method.
|
65
|
+
tail(q)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns a hash of Vectors, defined by the different values
|
69
|
+
# defined on the fields
|
70
|
+
# Example:
|
71
|
+
#
|
72
|
+
# a=DaruLite::Vector.new(["a,b","c,d","a,b"])
|
73
|
+
# a.split_by_separator
|
74
|
+
# => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
|
75
|
+
# @data=[1, 0, 1]>,
|
76
|
+
# "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
|
77
|
+
# @data=[1, 1, 0]>,
|
78
|
+
# "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
|
79
|
+
# @data=[0, 1, 1]>}
|
80
|
+
#
|
81
|
+
def split_by_separator(sep = ',')
|
82
|
+
split_data = splitted sep
|
83
|
+
split_data
|
84
|
+
.flatten.uniq.compact.to_h do |key|
|
85
|
+
[
|
86
|
+
key,
|
87
|
+
DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
|
88
|
+
]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def split_by_separator_freq(sep = ',')
|
93
|
+
split_by_separator(sep).transform_values do |v|
|
94
|
+
v.sum(&:to_i)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
99
|
+
# @return [DaruLite::Vector]
|
100
|
+
def get_sub_vector(keys, by_position: true)
|
101
|
+
return DaruLite::Vector.new([]) if keys == []
|
102
|
+
|
103
|
+
keys = @index.pos(*keys) unless by_position
|
104
|
+
|
105
|
+
sub_vect = at(*keys)
|
106
|
+
sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
|
107
|
+
|
108
|
+
sub_vect
|
109
|
+
end
|
110
|
+
|
111
|
+
# Partition a numeric variable into categories.
|
112
|
+
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
113
|
+
# provide intervals for categories
|
114
|
+
# @param [Hash] opts options to cut the partition
|
115
|
+
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
116
|
+
# the right side of left side
|
117
|
+
# @option opts [Array] :labels names of the categories
|
118
|
+
# @return [DaruLite::Vector] numeric variable converted to categorical variable
|
119
|
+
# @example
|
120
|
+
# heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
|
121
|
+
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
122
|
+
# # => #<DaruLite::Vector(6)>
|
123
|
+
# # 0 low
|
124
|
+
# # 1 low
|
125
|
+
# # 2 low
|
126
|
+
# # 3 high
|
127
|
+
# # 4 medium
|
128
|
+
# # 5 high
|
129
|
+
def cut(partitions, opts = {})
|
130
|
+
close_at = opts[:close_at] || :right
|
131
|
+
labels = opts[:labels]
|
132
|
+
partitions = partitions.to_a
|
133
|
+
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
134
|
+
cats = cut_categories(partitions, close_at)
|
135
|
+
|
136
|
+
dv = DaruLite::Vector.new values,
|
137
|
+
index: @index,
|
138
|
+
type: :category,
|
139
|
+
categories: cats
|
140
|
+
|
141
|
+
# Rename categories if new labels provided
|
142
|
+
if labels
|
143
|
+
dv.rename_categories cats.zip(labels).to_h
|
144
|
+
else
|
145
|
+
dv
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def positions(*values)
|
150
|
+
case values
|
151
|
+
when [nil]
|
152
|
+
nil_positions
|
153
|
+
when [Float::NAN]
|
154
|
+
nan_positions
|
155
|
+
when [nil, Float::NAN], [Float::NAN, nil]
|
156
|
+
nil_positions + nan_positions
|
157
|
+
else
|
158
|
+
size.times.select { |i| include_with_nan? values, @data[i] }
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def split_value(key, v)
|
165
|
+
if v.nil?
|
166
|
+
nil
|
167
|
+
elsif v.include?(key)
|
168
|
+
1
|
169
|
+
else
|
170
|
+
0
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Filterable
|
4
|
+
# Return a new vector based on the contents of a boolean array. Use with the
|
5
|
+
# comparator methods to obtain meaningful results. See this notebook for
|
6
|
+
# a good overview of using #where.
|
7
|
+
#
|
8
|
+
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
9
|
+
# collection containing the true of false values. Each element in the Vector
|
10
|
+
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
11
|
+
# index.
|
12
|
+
# @example Usage of #where.
|
13
|
+
# vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
14
|
+
#
|
15
|
+
# # Simple logic statement passed to #where.
|
16
|
+
# vector.where(vector.eq(5).or(vector.eq(1)))
|
17
|
+
# # =>
|
18
|
+
# ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
|
19
|
+
# # nil
|
20
|
+
# # 2 5
|
21
|
+
# # 4 5
|
22
|
+
# # 7 5
|
23
|
+
# # 10 1
|
24
|
+
# # 11 5
|
25
|
+
# # 13 5
|
26
|
+
# # 15 1
|
27
|
+
#
|
28
|
+
# # A somewhat more complex logic statement
|
29
|
+
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
30
|
+
# #=>
|
31
|
+
# ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
|
32
|
+
# # nil
|
33
|
+
# # 2 5
|
34
|
+
# # 4 5
|
35
|
+
# # 7 5
|
36
|
+
# # 10 1
|
37
|
+
# # 11 5
|
38
|
+
# # 13 5
|
39
|
+
# # 15 1
|
40
|
+
def where(bool_array)
|
41
|
+
DaruLite::Core::Query.vector_where self, bool_array
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return a new vector based on the contents of a boolean array and &block.
|
45
|
+
#
|
46
|
+
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
47
|
+
# collection containing the true of false values. Each element in the Vector
|
48
|
+
# corresponding to a `true` in the bool_array will be returned along with it's
|
49
|
+
# index. The &block may contain manipulative functions for the Vector elements.
|
50
|
+
#
|
51
|
+
# @return [DaruLite::Vector]
|
52
|
+
#
|
53
|
+
# @example Usage of #apply_where.
|
54
|
+
# dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
|
55
|
+
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
56
|
+
# # =>
|
57
|
+
# ##<DaruLite::Vector(3)>
|
58
|
+
# # 0 3 days
|
59
|
+
# # 1 35 days
|
60
|
+
# # 2 14 days
|
61
|
+
def apply_where(bool_array, &block)
|
62
|
+
DaruLite::Core::Query.vector_apply_where self, bool_array, &block
|
63
|
+
end
|
64
|
+
|
65
|
+
# Keep only unique elements of the vector alongwith their indexes.
|
66
|
+
def uniq
|
67
|
+
uniq_vector = @data.uniq
|
68
|
+
new_index = uniq_vector.map { |element| index_of(element) }
|
69
|
+
|
70
|
+
DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
71
|
+
end
|
72
|
+
|
73
|
+
# Delete an element if block returns true. Destructive.
|
74
|
+
def delete_if
|
75
|
+
return to_enum(:delete_if) unless block_given?
|
76
|
+
|
77
|
+
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
|
78
|
+
|
79
|
+
@data = cast_vector_to @dtype, keep_e
|
80
|
+
@index = DaruLite::Index.new(keep_i)
|
81
|
+
|
82
|
+
update_position_cache
|
83
|
+
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
# Keep an element if block returns true. Destructive.
|
88
|
+
def keep_if
|
89
|
+
return to_enum(:keep_if) unless block_given?
|
90
|
+
|
91
|
+
delete_if { |val| !yield(val) }
|
92
|
+
end
|
93
|
+
|
94
|
+
# Return a vector with specified values removed
|
95
|
+
# @param values [Array] values to reject from resultant vector
|
96
|
+
# @return [DaruLite::Vector] vector with specified values removed
|
97
|
+
# @example
|
98
|
+
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
|
99
|
+
# dv.reject_values nil, Float::NAN
|
100
|
+
# # => #<DaruLite::Vector(2)>
|
101
|
+
# # 0 1
|
102
|
+
# # 1 2
|
103
|
+
def reject_values(*values)
|
104
|
+
resultant_pos = size.times.to_a - positions(*values)
|
105
|
+
dv = at(*resultant_pos)
|
106
|
+
# Handle the case when number of positions is 1
|
107
|
+
# and hence #at doesn't return a vector
|
108
|
+
if dv.is_a?(DaruLite::Vector)
|
109
|
+
dv
|
110
|
+
else
|
111
|
+
pos = resultant_pos.first
|
112
|
+
at(pos..pos)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns a Vector with only numerical data. Missing data is included
|
117
|
+
# but non-Numeric objects are excluded. Preserves index.
|
118
|
+
def only_numerics
|
119
|
+
numeric_indexes =
|
120
|
+
each_with_index
|
121
|
+
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
122
|
+
.map(&:last)
|
123
|
+
|
124
|
+
self[*numeric_indexes]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Indexable
|
4
|
+
# Get index of element
|
5
|
+
def index_of(element)
|
6
|
+
case dtype
|
7
|
+
when :array then @index.key(@data.index { |x| x.eql? element })
|
8
|
+
else @index.key @data.index(element)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def reset_index!
|
13
|
+
@index = DaruLite::Index.new(Array.new(size) { |i| i })
|
14
|
+
self
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns *true* if an index exists
|
18
|
+
def has_index?(index) # rubocop:disable Naming/PredicateName
|
19
|
+
@index.include? index
|
20
|
+
end
|
21
|
+
|
22
|
+
def detach_index
|
23
|
+
DaruLite::DataFrame.new(
|
24
|
+
index: @index.to_a,
|
25
|
+
values: @data.to_a
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Sets new index for vector. Preserves index->value correspondence.
|
30
|
+
# Sets nil for new index keys absent from original index.
|
31
|
+
# @note Unlike #reorder! which takes positions as input it takes
|
32
|
+
# index as an input to reorder the vector
|
33
|
+
# @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
|
34
|
+
# @return [DaruLite::Vector] vector reindexed with new index
|
35
|
+
def reindex!(new_index)
|
36
|
+
values = []
|
37
|
+
each_with_index do |val, i|
|
38
|
+
values[new_index[i]] = val if new_index.include?(i)
|
39
|
+
end
|
40
|
+
values.fill(nil, values.size, new_index.size - values.size)
|
41
|
+
|
42
|
+
@data = cast_vector_to @dtype, values
|
43
|
+
@index = new_index
|
44
|
+
|
45
|
+
update_position_cache
|
46
|
+
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# Create a new vector with a different index, and preserve the indexing of
|
51
|
+
# current elements.
|
52
|
+
def reindex(new_index)
|
53
|
+
dup.reindex!(new_index)
|
54
|
+
end
|
55
|
+
|
56
|
+
def index=(idx)
|
57
|
+
idx = Index.coerce(idx)
|
58
|
+
|
59
|
+
raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
|
60
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
|
61
|
+
|
62
|
+
@index = idx
|
63
|
+
end
|
64
|
+
|
65
|
+
# Return indexes of values specified
|
66
|
+
# @param values [Array] values to find indexes for
|
67
|
+
# @return [Array] array of indexes of values specified
|
68
|
+
# @example
|
69
|
+
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
70
|
+
# dv.indexes nil, Float::NAN
|
71
|
+
# # => [13, 14]
|
72
|
+
def indexes(*values)
|
73
|
+
index.to_a.values_at(*positions(*values))
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Iterable
|
4
|
+
def each(&block)
|
5
|
+
return to_enum(:each) unless block
|
6
|
+
|
7
|
+
@data.each(&block)
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
def each_index(&block)
|
12
|
+
return to_enum(:each_index) unless block
|
13
|
+
|
14
|
+
@index.each(&block)
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def each_with_index(&block)
|
19
|
+
return to_enum(:each_with_index) unless block
|
20
|
+
|
21
|
+
@data.to_a.zip(@index.to_a).each(&block)
|
22
|
+
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def map!(&block)
|
27
|
+
return to_enum(:map!) unless block
|
28
|
+
|
29
|
+
@data.map!(&block)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Like map, but returns a DaruLite::Vector with the returned values.
|
34
|
+
def recode(dt = nil, &block)
|
35
|
+
return to_enum(:recode, dt) unless block
|
36
|
+
|
37
|
+
dup.recode! dt, &block
|
38
|
+
end
|
39
|
+
|
40
|
+
# Destructive version of recode!
|
41
|
+
def recode!(dt = nil, &block)
|
42
|
+
return to_enum(:recode!, dt) unless block
|
43
|
+
|
44
|
+
@data.map!(&block).data
|
45
|
+
@data = cast_vector_to(dt || @dtype)
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Reports all values that doesn't comply with a condition.
|
50
|
+
# Returns a hash with the index of data and the invalid data.
|
51
|
+
def verify
|
52
|
+
(0...size)
|
53
|
+
.map { |i| [i, @data[i]] }
|
54
|
+
.reject { |_i, val| yield(val) }
|
55
|
+
.to_h
|
56
|
+
end
|
57
|
+
|
58
|
+
def apply_method(method, keys: nil, by_position: true)
|
59
|
+
vect = keys ? get_sub_vector(keys, by_position: by_position) : self
|
60
|
+
|
61
|
+
case method
|
62
|
+
when Symbol then vect.send(method)
|
63
|
+
when Proc then method.call(vect)
|
64
|
+
else raise
|
65
|
+
end
|
66
|
+
end
|
67
|
+
alias apply_method_on_sub_vector apply_method
|
68
|
+
|
69
|
+
# Replaces specified values with a new value
|
70
|
+
# @param [Array] old_values array of values to replace
|
71
|
+
# @param [object] new_value new value to replace with
|
72
|
+
# @note It performs the replace in place.
|
73
|
+
# @return [DaruLite::Vector] Same vector itself with values
|
74
|
+
# replaced with new value
|
75
|
+
# @example
|
76
|
+
# dv = DaruLite::Vector.new [1, 2, :a, :b]
|
77
|
+
# dv.replace_values [:a, :b], nil
|
78
|
+
# dv
|
79
|
+
# # =>
|
80
|
+
# # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
81
|
+
# # nil
|
82
|
+
# # 0 1
|
83
|
+
# # 1 2
|
84
|
+
# # 2 nil
|
85
|
+
# # 3 nil
|
86
|
+
def replace_values(old_values, new_value)
|
87
|
+
old_values = [old_values] unless old_values.is_a? Array
|
88
|
+
size.times do |pos|
|
89
|
+
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
90
|
+
end
|
91
|
+
self
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|