daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
@@ -246,6 +246,19 @@ module DaruLite
|
|
246
246
|
DaruLite::Index.new(to_a + indexes)
|
247
247
|
end
|
248
248
|
|
249
|
+
# Takes a positional value and returns a new Index without the element at given position
|
250
|
+
# @param position [Integer] positional value
|
251
|
+
# @return [object] index object
|
252
|
+
# @example
|
253
|
+
# idx = DaruLite::Index.new [:a, :b, :c]
|
254
|
+
# idx.delete_at(0)
|
255
|
+
# # => #<DaruLite::Index(2): {b, c}>
|
256
|
+
def delete_at(position)
|
257
|
+
indexes = to_a
|
258
|
+
indexes.delete_at(position)
|
259
|
+
self.class.new(indexes)
|
260
|
+
end
|
261
|
+
|
249
262
|
def _dump(*)
|
250
263
|
Marshal.dump(relation_hash: @relation_hash)
|
251
264
|
end
|
@@ -842,7 +842,7 @@ module DaruLite
|
|
842
842
|
def emsd(n = 10, wilder = false)
|
843
843
|
result = []
|
844
844
|
emv_return = emv(n, wilder)
|
845
|
-
emv_return.each do |d|
|
845
|
+
emv_return.each do |d| # rubocop:disable Style/MapIntoArray
|
846
846
|
result << (d.nil? ? nil : Math.sqrt(d))
|
847
847
|
end
|
848
848
|
DaruLite::Vector.new(result, index: @index, name: @name)
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Calculatable
|
4
|
+
# Count the number of values specified
|
5
|
+
# @param values [Array] values to count for
|
6
|
+
# @return [Integer] the number of times the values mentioned occurs
|
7
|
+
# @example
|
8
|
+
# dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
9
|
+
# dv.count_values nil
|
10
|
+
# # => 2
|
11
|
+
def count_values(*values)
|
12
|
+
positions(*values).size
|
13
|
+
end
|
14
|
+
|
15
|
+
# Create a summary of the Vector
|
16
|
+
# @param indent_level [Fixnum] indent level
|
17
|
+
# @return [String] String containing the summary of the Vector
|
18
|
+
# @example
|
19
|
+
# dv = DaruLite::Vector.new [1, 2, 3]
|
20
|
+
# puts dv.summary
|
21
|
+
#
|
22
|
+
# # =
|
23
|
+
# # n :3
|
24
|
+
# # non-missing:3
|
25
|
+
# # median: 2
|
26
|
+
# # mean: 2.0000
|
27
|
+
# # std.dev.: 1.0000
|
28
|
+
# # std.err.: 0.5774
|
29
|
+
# # skew: 0.0000
|
30
|
+
# # kurtosis: -2.3333
|
31
|
+
def summary(indent_level = 0)
|
32
|
+
non_missing = size - count_values(*DaruLite::MISSING_VALUES)
|
33
|
+
summary = (' =' * indent_level) + "= #{name}" \
|
34
|
+
"\n n :#{size}" \
|
35
|
+
"\n non-missing:#{non_missing}"
|
36
|
+
case type
|
37
|
+
when :object
|
38
|
+
summary << object_summary
|
39
|
+
when :numeric
|
40
|
+
summary << numeric_summary
|
41
|
+
end
|
42
|
+
summary.split("\n").join("\n#{' ' * indent_level}")
|
43
|
+
end
|
44
|
+
|
45
|
+
# Displays summary for an object type Vector
|
46
|
+
# @return [String] String containing object vector summary
|
47
|
+
def object_summary
|
48
|
+
nval = count_values(*DaruLite::MISSING_VALUES)
|
49
|
+
summary = "\n factors: #{factors.to_a.join(',')}" \
|
50
|
+
"\n mode: #{mode.to_a.join(',')}" \
|
51
|
+
"\n Distribution\n"
|
52
|
+
|
53
|
+
data = frequencies.sort.each_with_index.map do |v, k|
|
54
|
+
[k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
|
55
|
+
end
|
56
|
+
|
57
|
+
summary + Formatters::Table.format(data)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Displays summary for an numeric type Vector
|
61
|
+
# @return [String] String containing numeric vector summary
|
62
|
+
def numeric_summary
|
63
|
+
summary = "\n median: #{median}" +
|
64
|
+
format("\n mean: %0.4f", mean)
|
65
|
+
if sd
|
66
|
+
summary << (format("\n std.dev.: %0.4f", sd) +
|
67
|
+
format("\n std.err.: %0.4f", se))
|
68
|
+
end
|
69
|
+
|
70
|
+
if count_values(*DaruLite::MISSING_VALUES).zero?
|
71
|
+
summary << (format("\n skew: %0.4f", skew) +
|
72
|
+
format("\n kurtosis: %0.4f", kurtosis))
|
73
|
+
end
|
74
|
+
summary
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Convertible
|
4
|
+
# @return [DaruLite::DataFrame] the vector as a single-vector dataframe
|
5
|
+
def to_df
|
6
|
+
DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
10
|
+
#
|
11
|
+
# == Arguments
|
12
|
+
#
|
13
|
+
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
14
|
+
def to_matrix(axis = :horizontal)
|
15
|
+
case axis
|
16
|
+
when :horizontal
|
17
|
+
Matrix[to_a]
|
18
|
+
when :vertical
|
19
|
+
Matrix.columns([to_a])
|
20
|
+
else
|
21
|
+
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
26
|
+
def to_h
|
27
|
+
@index.to_h { |index| [index, self[index]] }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return an array
|
31
|
+
def to_a
|
32
|
+
@data.to_a
|
33
|
+
end
|
34
|
+
|
35
|
+
# Convert the hash from to_h to json
|
36
|
+
def to_json(*)
|
37
|
+
to_h.to_json
|
38
|
+
end
|
39
|
+
|
40
|
+
# Convert to html for iruby
|
41
|
+
def to_html(threshold = 30)
|
42
|
+
table_thead = to_html_thead
|
43
|
+
table_tbody = to_html_tbody(threshold)
|
44
|
+
path = if index.is_a?(MultiIndex)
|
45
|
+
File.expand_path('../iruby/templates/vector_mi.html.erb', __dir__)
|
46
|
+
else
|
47
|
+
File.expand_path('../iruby/templates/vector.html.erb', __dir__)
|
48
|
+
end
|
49
|
+
ERB.new(File.read(path).strip).result(binding)
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_html_thead
|
53
|
+
table_thead_path =
|
54
|
+
if index.is_a?(MultiIndex)
|
55
|
+
File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __dir__)
|
56
|
+
else
|
57
|
+
File.expand_path('../iruby/templates/vector_thead.html.erb', __dir__)
|
58
|
+
end
|
59
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_html_tbody(threshold = 30)
|
63
|
+
table_tbody_path =
|
64
|
+
if index.is_a?(MultiIndex)
|
65
|
+
File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __dir__)
|
66
|
+
else
|
67
|
+
File.expand_path('../iruby/templates/vector_tbody.html.erb', __dir__)
|
68
|
+
end
|
69
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_s
|
73
|
+
"#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Duplicatable
|
4
|
+
# Duplicated a vector
|
5
|
+
# @return [DaruLite::Vector] duplicated vector
|
6
|
+
def dup
|
7
|
+
DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
|
8
|
+
end
|
9
|
+
|
10
|
+
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
11
|
+
# all values with nils.
|
12
|
+
def clone_structure
|
13
|
+
DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Fetchable
|
4
|
+
# Get one or more elements with specified index or a range.
|
5
|
+
#
|
6
|
+
# == Usage
|
7
|
+
# # For vectors employing single layer Index
|
8
|
+
#
|
9
|
+
# v[:one, :two] # => DaruLite::Vector with indexes :one and :two
|
10
|
+
# v[:one] # => Single element
|
11
|
+
# v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
|
12
|
+
#
|
13
|
+
# # For vectors employing hierarchial multi index
|
14
|
+
#
|
15
|
+
def [](*input_indexes)
|
16
|
+
# Get array of positions indexes
|
17
|
+
positions = @index.pos(*input_indexes)
|
18
|
+
|
19
|
+
# If one object is asked return it
|
20
|
+
return @data[positions] if positions.is_a? Numeric
|
21
|
+
|
22
|
+
# Form a new Vector using positional indexes
|
23
|
+
DaruLite::Vector.new(
|
24
|
+
positions.map { |loc| @data[loc] },
|
25
|
+
name: @name,
|
26
|
+
index: @index.subset(*input_indexes), dtype: @dtype
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns vector of values given positional values
|
31
|
+
# @param positions [Array<object>] positional values
|
32
|
+
# @return [object] vector
|
33
|
+
# @example
|
34
|
+
# dv = DaruLite::Vector.new 'a'..'e'
|
35
|
+
# dv.at 0, 1, 2
|
36
|
+
# # => #<DaruLite::Vector(3)>
|
37
|
+
# # 0 a
|
38
|
+
# # 1 b
|
39
|
+
# # 2 c
|
40
|
+
def at(*positions)
|
41
|
+
# to be used to form index
|
42
|
+
original_positions = positions
|
43
|
+
positions = coerce_positions(*positions)
|
44
|
+
validate_positions(*positions)
|
45
|
+
|
46
|
+
if positions.is_a? Integer
|
47
|
+
@data[positions]
|
48
|
+
else
|
49
|
+
values = positions.map { |pos| @data[pos] }
|
50
|
+
DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def head(q = 10)
|
55
|
+
self[0..(q - 1)]
|
56
|
+
end
|
57
|
+
|
58
|
+
def tail(q = 10)
|
59
|
+
start = [size - q, 0].max
|
60
|
+
self[start..(size - 1)]
|
61
|
+
end
|
62
|
+
|
63
|
+
def last(q = 1)
|
64
|
+
# The Enumerable mixin dose not provide the last method.
|
65
|
+
tail(q)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns a hash of Vectors, defined by the different values
|
69
|
+
# defined on the fields
|
70
|
+
# Example:
|
71
|
+
#
|
72
|
+
# a=DaruLite::Vector.new(["a,b","c,d","a,b"])
|
73
|
+
# a.split_by_separator
|
74
|
+
# => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
|
75
|
+
# @data=[1, 0, 1]>,
|
76
|
+
# "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
|
77
|
+
# @data=[1, 1, 0]>,
|
78
|
+
# "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
|
79
|
+
# @data=[0, 1, 1]>}
|
80
|
+
#
|
81
|
+
def split_by_separator(sep = ',')
|
82
|
+
split_data = splitted sep
|
83
|
+
split_data
|
84
|
+
.flatten.uniq.compact.to_h do |key|
|
85
|
+
[
|
86
|
+
key,
|
87
|
+
DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
|
88
|
+
]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def split_by_separator_freq(sep = ',')
|
93
|
+
split_by_separator(sep).transform_values do |v|
|
94
|
+
v.sum(&:to_i)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
99
|
+
# @return [DaruLite::Vector]
|
100
|
+
def get_sub_vector(keys, by_position: true)
|
101
|
+
return DaruLite::Vector.new([]) if keys == []
|
102
|
+
|
103
|
+
keys = @index.pos(*keys) unless by_position
|
104
|
+
|
105
|
+
sub_vect = at(*keys)
|
106
|
+
sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
|
107
|
+
|
108
|
+
sub_vect
|
109
|
+
end
|
110
|
+
|
111
|
+
# Partition a numeric variable into categories.
|
112
|
+
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
113
|
+
# provide intervals for categories
|
114
|
+
# @param [Hash] opts options to cut the partition
|
115
|
+
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
116
|
+
# the right side of left side
|
117
|
+
# @option opts [Array] :labels names of the categories
|
118
|
+
# @return [DaruLite::Vector] numeric variable converted to categorical variable
|
119
|
+
# @example
|
120
|
+
# heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
|
121
|
+
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
122
|
+
# # => #<DaruLite::Vector(6)>
|
123
|
+
# # 0 low
|
124
|
+
# # 1 low
|
125
|
+
# # 2 low
|
126
|
+
# # 3 high
|
127
|
+
# # 4 medium
|
128
|
+
# # 5 high
|
129
|
+
def cut(partitions, opts = {})
|
130
|
+
close_at = opts[:close_at] || :right
|
131
|
+
labels = opts[:labels]
|
132
|
+
partitions = partitions.to_a
|
133
|
+
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
134
|
+
cats = cut_categories(partitions, close_at)
|
135
|
+
|
136
|
+
dv = DaruLite::Vector.new values,
|
137
|
+
index: @index,
|
138
|
+
type: :category,
|
139
|
+
categories: cats
|
140
|
+
|
141
|
+
# Rename categories if new labels provided
|
142
|
+
if labels
|
143
|
+
dv.rename_categories cats.zip(labels).to_h
|
144
|
+
else
|
145
|
+
dv
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def positions(*values)
|
150
|
+
case values
|
151
|
+
when [nil]
|
152
|
+
nil_positions
|
153
|
+
when [Float::NAN]
|
154
|
+
nan_positions
|
155
|
+
when [nil, Float::NAN], [Float::NAN, nil]
|
156
|
+
nil_positions + nan_positions
|
157
|
+
else
|
158
|
+
size.times.select { |i| include_with_nan? values, @data[i] }
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def split_value(key, v)
|
165
|
+
if v.nil?
|
166
|
+
nil
|
167
|
+
elsif v.include?(key)
|
168
|
+
1
|
169
|
+
else
|
170
|
+
0
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Filterable
|
4
|
+
# Return a new vector based on the contents of a boolean array. Use with the
|
5
|
+
# comparator methods to obtain meaningful results. See this notebook for
|
6
|
+
# a good overview of using #where.
|
7
|
+
#
|
8
|
+
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
9
|
+
# collection containing the true of false values. Each element in the Vector
|
10
|
+
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
11
|
+
# index.
|
12
|
+
# @example Usage of #where.
|
13
|
+
# vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
14
|
+
#
|
15
|
+
# # Simple logic statement passed to #where.
|
16
|
+
# vector.where(vector.eq(5).or(vector.eq(1)))
|
17
|
+
# # =>
|
18
|
+
# ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
|
19
|
+
# # nil
|
20
|
+
# # 2 5
|
21
|
+
# # 4 5
|
22
|
+
# # 7 5
|
23
|
+
# # 10 1
|
24
|
+
# # 11 5
|
25
|
+
# # 13 5
|
26
|
+
# # 15 1
|
27
|
+
#
|
28
|
+
# # A somewhat more complex logic statement
|
29
|
+
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
30
|
+
# #=>
|
31
|
+
# ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
|
32
|
+
# # nil
|
33
|
+
# # 2 5
|
34
|
+
# # 4 5
|
35
|
+
# # 7 5
|
36
|
+
# # 10 1
|
37
|
+
# # 11 5
|
38
|
+
# # 13 5
|
39
|
+
# # 15 1
|
40
|
+
def where(bool_array)
|
41
|
+
DaruLite::Core::Query.vector_where self, bool_array
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return a new vector based on the contents of a boolean array and &block.
|
45
|
+
#
|
46
|
+
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
47
|
+
# collection containing the true of false values. Each element in the Vector
|
48
|
+
# corresponding to a `true` in the bool_array will be returned along with it's
|
49
|
+
# index. The &block may contain manipulative functions for the Vector elements.
|
50
|
+
#
|
51
|
+
# @return [DaruLite::Vector]
|
52
|
+
#
|
53
|
+
# @example Usage of #apply_where.
|
54
|
+
# dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
|
55
|
+
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
56
|
+
# # =>
|
57
|
+
# ##<DaruLite::Vector(3)>
|
58
|
+
# # 0 3 days
|
59
|
+
# # 1 35 days
|
60
|
+
# # 2 14 days
|
61
|
+
def apply_where(bool_array, &block)
|
62
|
+
DaruLite::Core::Query.vector_apply_where self, bool_array, &block
|
63
|
+
end
|
64
|
+
|
65
|
+
# Keep only unique elements of the vector alongwith their indexes.
|
66
|
+
def uniq
|
67
|
+
uniq_vector = @data.uniq
|
68
|
+
new_index = uniq_vector.map { |element| index_of(element) }
|
69
|
+
|
70
|
+
DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
71
|
+
end
|
72
|
+
|
73
|
+
# Delete an element if block returns true. Destructive.
|
74
|
+
def delete_if
|
75
|
+
return to_enum(:delete_if) unless block_given?
|
76
|
+
|
77
|
+
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
|
78
|
+
|
79
|
+
@data = cast_vector_to @dtype, keep_e
|
80
|
+
@index = DaruLite::Index.new(keep_i)
|
81
|
+
|
82
|
+
update_position_cache
|
83
|
+
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
# Keep an element if block returns true. Destructive.
|
88
|
+
def keep_if
|
89
|
+
return to_enum(:keep_if) unless block_given?
|
90
|
+
|
91
|
+
delete_if { |val| !yield(val) }
|
92
|
+
end
|
93
|
+
|
94
|
+
# Return a vector with specified values removed
|
95
|
+
# @param values [Array] values to reject from resultant vector
|
96
|
+
# @return [DaruLite::Vector] vector with specified values removed
|
97
|
+
# @example
|
98
|
+
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
|
99
|
+
# dv.reject_values nil, Float::NAN
|
100
|
+
# # => #<DaruLite::Vector(2)>
|
101
|
+
# # 0 1
|
102
|
+
# # 1 2
|
103
|
+
def reject_values(*values)
|
104
|
+
resultant_pos = size.times.to_a - positions(*values)
|
105
|
+
dv = at(*resultant_pos)
|
106
|
+
# Handle the case when number of positions is 1
|
107
|
+
# and hence #at doesn't return a vector
|
108
|
+
if dv.is_a?(DaruLite::Vector)
|
109
|
+
dv
|
110
|
+
else
|
111
|
+
pos = resultant_pos.first
|
112
|
+
at(pos..pos)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns a Vector with only numerical data. Missing data is included
|
117
|
+
# but non-Numeric objects are excluded. Preserves index.
|
118
|
+
def only_numerics
|
119
|
+
numeric_indexes =
|
120
|
+
each_with_index
|
121
|
+
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
122
|
+
.map(&:last)
|
123
|
+
|
124
|
+
self[*numeric_indexes]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Indexable
|
4
|
+
# Get index of element
|
5
|
+
def index_of(element)
|
6
|
+
case dtype
|
7
|
+
when :array then @index.key(@data.index { |x| x.eql? element })
|
8
|
+
else @index.key @data.index(element)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def reset_index!
|
13
|
+
@index = DaruLite::Index.new(Array.new(size) { |i| i })
|
14
|
+
self
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns *true* if an index exists
|
18
|
+
def has_index?(index) # rubocop:disable Naming/PredicateName
|
19
|
+
@index.include? index
|
20
|
+
end
|
21
|
+
|
22
|
+
def detach_index
|
23
|
+
DaruLite::DataFrame.new(
|
24
|
+
index: @index.to_a,
|
25
|
+
values: @data.to_a
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Sets new index for vector. Preserves index->value correspondence.
|
30
|
+
# Sets nil for new index keys absent from original index.
|
31
|
+
# @note Unlike #reorder! which takes positions as input it takes
|
32
|
+
# index as an input to reorder the vector
|
33
|
+
# @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
|
34
|
+
# @return [DaruLite::Vector] vector reindexed with new index
|
35
|
+
def reindex!(new_index)
|
36
|
+
values = []
|
37
|
+
each_with_index do |val, i|
|
38
|
+
values[new_index[i]] = val if new_index.include?(i)
|
39
|
+
end
|
40
|
+
values.fill(nil, values.size, new_index.size - values.size)
|
41
|
+
|
42
|
+
@data = cast_vector_to @dtype, values
|
43
|
+
@index = new_index
|
44
|
+
|
45
|
+
update_position_cache
|
46
|
+
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# Create a new vector with a different index, and preserve the indexing of
|
51
|
+
# current elements.
|
52
|
+
def reindex(new_index)
|
53
|
+
dup.reindex!(new_index)
|
54
|
+
end
|
55
|
+
|
56
|
+
def index=(idx)
|
57
|
+
idx = Index.coerce(idx)
|
58
|
+
|
59
|
+
raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
|
60
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
|
61
|
+
|
62
|
+
@index = idx
|
63
|
+
end
|
64
|
+
|
65
|
+
# Return indexes of values specified
|
66
|
+
# @param values [Array] values to find indexes for
|
67
|
+
# @return [Array] array of indexes of values specified
|
68
|
+
# @example
|
69
|
+
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
70
|
+
# dv.indexes nil, Float::NAN
|
71
|
+
# # => [13, 14]
|
72
|
+
def indexes(*values)
|
73
|
+
index.to_a.values_at(*positions(*values))
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Vector
|
3
|
+
module Iterable
|
4
|
+
def each(&block)
|
5
|
+
return to_enum(:each) unless block
|
6
|
+
|
7
|
+
@data.each(&block)
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
def each_index(&block)
|
12
|
+
return to_enum(:each_index) unless block
|
13
|
+
|
14
|
+
@index.each(&block)
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def each_with_index(&block)
|
19
|
+
return to_enum(:each_with_index) unless block
|
20
|
+
|
21
|
+
@data.to_a.zip(@index.to_a).each(&block)
|
22
|
+
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def map!(&block)
|
27
|
+
return to_enum(:map!) unless block
|
28
|
+
|
29
|
+
@data.map!(&block)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Like map, but returns a DaruLite::Vector with the returned values.
|
34
|
+
def recode(dt = nil, &block)
|
35
|
+
return to_enum(:recode, dt) unless block
|
36
|
+
|
37
|
+
dup.recode! dt, &block
|
38
|
+
end
|
39
|
+
|
40
|
+
# Destructive version of recode!
|
41
|
+
def recode!(dt = nil, &block)
|
42
|
+
return to_enum(:recode!, dt) unless block
|
43
|
+
|
44
|
+
@data.map!(&block).data
|
45
|
+
@data = cast_vector_to(dt || @dtype)
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Reports all values that doesn't comply with a condition.
|
50
|
+
# Returns a hash with the index of data and the invalid data.
|
51
|
+
def verify
|
52
|
+
(0...size)
|
53
|
+
.map { |i| [i, @data[i]] }
|
54
|
+
.reject { |_i, val| yield(val) }
|
55
|
+
.to_h
|
56
|
+
end
|
57
|
+
|
58
|
+
def apply_method(method, keys: nil, by_position: true)
|
59
|
+
vect = keys ? get_sub_vector(keys, by_position: by_position) : self
|
60
|
+
|
61
|
+
case method
|
62
|
+
when Symbol then vect.send(method)
|
63
|
+
when Proc then method.call(vect)
|
64
|
+
else raise
|
65
|
+
end
|
66
|
+
end
|
67
|
+
alias apply_method_on_sub_vector apply_method
|
68
|
+
|
69
|
+
# Replaces specified values with a new value
|
70
|
+
# @param [Array] old_values array of values to replace
|
71
|
+
# @param [object] new_value new value to replace with
|
72
|
+
# @note It performs the replace in place.
|
73
|
+
# @return [DaruLite::Vector] Same vector itself with values
|
74
|
+
# replaced with new value
|
75
|
+
# @example
|
76
|
+
# dv = DaruLite::Vector.new [1, 2, :a, :b]
|
77
|
+
# dv.replace_values [:a, :b], nil
|
78
|
+
# dv
|
79
|
+
# # =>
|
80
|
+
# # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
81
|
+
# # nil
|
82
|
+
# # 0 1
|
83
|
+
# # 1 2
|
84
|
+
# # 2 nil
|
85
|
+
# # 3 nil
|
86
|
+
def replace_values(old_values, new_value)
|
87
|
+
old_values = [old_values] unless old_values.is_a? Array
|
88
|
+
size.times do |pos|
|
89
|
+
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
90
|
+
end
|
91
|
+
self
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|