daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,1019 @@
|
|
1
|
+
module DaruLite
|
2
|
+
module Maths
|
3
|
+
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
5
|
+
# the computationally intensive tasks.
|
6
|
+
module Statistics
|
7
|
+
module Vector # rubocop:disable Metrics/ModuleLength
|
8
|
+
extend Gem::Deprecate
|
9
|
+
|
10
|
+
def mean
|
11
|
+
@data.mean
|
12
|
+
end
|
13
|
+
|
14
|
+
def sum
|
15
|
+
@data.sum
|
16
|
+
end
|
17
|
+
|
18
|
+
def product
|
19
|
+
@data.product
|
20
|
+
end
|
21
|
+
|
22
|
+
def range
|
23
|
+
max - min
|
24
|
+
end
|
25
|
+
|
26
|
+
def median
|
27
|
+
@data.respond_to?(:median) ? @data.median : percentile(50)
|
28
|
+
end
|
29
|
+
|
30
|
+
def mode
|
31
|
+
mode = frequencies.to_h.select { |_, v| v == frequencies.max }.keys
|
32
|
+
mode.size > 1 ? DaruLite::Vector.new(mode) : mode.first
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create a summary of count, mean, standard deviation, min and max of
|
36
|
+
# the vector in one shot.
|
37
|
+
#
|
38
|
+
# == Arguments
|
39
|
+
#
|
40
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
41
|
+
# be applied to vectors. Default is [:count, :mean, :std, :max,
|
42
|
+
# :min]. Methods will be applied in the specified order.
|
43
|
+
def describe(methods = nil)
|
44
|
+
methods ||= %i[count mean std min max]
|
45
|
+
description = methods.map { |m| send(m) }
|
46
|
+
DaruLite::Vector.new(description, index: methods, name: :statistics)
|
47
|
+
end
|
48
|
+
|
49
|
+
def median_absolute_deviation
|
50
|
+
m = median
|
51
|
+
recode { |val| (val - m).abs }.median
|
52
|
+
end
|
53
|
+
|
54
|
+
alias mad median_absolute_deviation
|
55
|
+
|
56
|
+
def standard_error
|
57
|
+
standard_deviation_sample / Math.sqrt(size - count_values(*DaruLite::MISSING_VALUES))
|
58
|
+
end
|
59
|
+
|
60
|
+
def sum_of_squared_deviation
|
61
|
+
(
|
62
|
+
@data.inject(0) { |a, x| (x**2) + a } -
|
63
|
+
(sum**2).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
64
|
+
).to_f
|
65
|
+
end
|
66
|
+
|
67
|
+
# Retrieve unique values of non-nil data
|
68
|
+
def factors
|
69
|
+
reject_values(*DaruLite::MISSING_VALUES).uniq.reset_index!
|
70
|
+
end
|
71
|
+
|
72
|
+
if RUBY_VERSION >= '2.2'
|
73
|
+
# Returns the maximum value(s) present in the vector, with an optional comparator block.
|
74
|
+
#
|
75
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
76
|
+
#
|
77
|
+
# @example
|
78
|
+
#
|
79
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
80
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
81
|
+
# #=>
|
82
|
+
# # #<DaruLite::Vector(3)>
|
83
|
+
# # t Tyrion
|
84
|
+
# # d Daenerys
|
85
|
+
# # j Jon Starkgaryen
|
86
|
+
#
|
87
|
+
# dv.max
|
88
|
+
# #=> "Tyrion"
|
89
|
+
#
|
90
|
+
# dv.max(2) { |a,b| a.size <=> b.size }
|
91
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
92
|
+
def max(size = nil, &block)
|
93
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.max(size, &block)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the maximum value(s) present in the vector, with a compulsory object block.
|
97
|
+
#
|
98
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
99
|
+
#
|
100
|
+
# @example
|
101
|
+
#
|
102
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
103
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
104
|
+
# #=>
|
105
|
+
# # #<DaruLite::Vector(3)>
|
106
|
+
# # t Tyrion
|
107
|
+
# # d Daenerys
|
108
|
+
# # j Jon Starkgaryen
|
109
|
+
#
|
110
|
+
# dv.max_by(2) { |i| i.size }
|
111
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
112
|
+
def max_by(size = nil, &block)
|
113
|
+
raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
|
114
|
+
|
115
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.max_by(size, &block)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns the minimum value(s) present in the vector, with an optional comparator block.
|
119
|
+
#
|
120
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
121
|
+
#
|
122
|
+
# @example
|
123
|
+
#
|
124
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
125
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
126
|
+
# #=>
|
127
|
+
# # #<DaruLite::Vector(3)>
|
128
|
+
# # t Tyrion
|
129
|
+
# # d Daenerys
|
130
|
+
# # j Jon Starkgaryen
|
131
|
+
#
|
132
|
+
# dv.min
|
133
|
+
# #=> "Daenerys"
|
134
|
+
#
|
135
|
+
# dv.min(2) { |a,b| a.size <=> b.size }
|
136
|
+
# #=> ["Tyrion","Daenerys"]
|
137
|
+
def min(size = nil, &block)
|
138
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.min(size, &block)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns the minimum value(s) present in the vector, with a compulsory object block.
|
142
|
+
#
|
143
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
144
|
+
#
|
145
|
+
# @example
|
146
|
+
#
|
147
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
148
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
149
|
+
# #=>
|
150
|
+
# # #<DaruLite::Vector(3)>
|
151
|
+
# # t Tyrion
|
152
|
+
# # d Daenerys
|
153
|
+
# # j Jon Starkgaryen
|
154
|
+
#
|
155
|
+
# dv.min_by(2) { |i| i.size }
|
156
|
+
# #=> ["Tyrion","Daenerys"]
|
157
|
+
def min_by(size = nil, &block)
|
158
|
+
raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
|
159
|
+
|
160
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.min_by(size, &block)
|
161
|
+
end
|
162
|
+
else
|
163
|
+
# Returns the maximum value(s) present in the vector, with an optional comparator block.
|
164
|
+
#
|
165
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
166
|
+
#
|
167
|
+
# @example
|
168
|
+
#
|
169
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
170
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
171
|
+
# #=>
|
172
|
+
# # #<DaruLite::Vector(3)>
|
173
|
+
# # t Tyrion
|
174
|
+
# # d Daenerys
|
175
|
+
# # j Jon Starkgaryen
|
176
|
+
#
|
177
|
+
# dv.max
|
178
|
+
# #=> "Tyrion"
|
179
|
+
#
|
180
|
+
# dv.max(2) { |a,b| a.size <=> b.size }
|
181
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
182
|
+
def max(size = nil, &block)
|
183
|
+
range = size.nil? ? 0 : (0..size - 1)
|
184
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block).reverse[range]
|
185
|
+
end
|
186
|
+
|
187
|
+
# Returns the maximum value(s) present in the vector, with a compulsory object block.
|
188
|
+
#
|
189
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
190
|
+
#
|
191
|
+
# @example
|
192
|
+
#
|
193
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
194
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
195
|
+
# #=>
|
196
|
+
# # #<DaruLite::Vector(3)>
|
197
|
+
# # t Tyrion
|
198
|
+
# # d Daenerys
|
199
|
+
# # j Jon Starkgaryen
|
200
|
+
#
|
201
|
+
# dv.max_by(2) { |i| i.size }
|
202
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
203
|
+
def max_by(size = nil, &block)
|
204
|
+
raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
|
205
|
+
|
206
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size - 1)]
|
207
|
+
end
|
208
|
+
|
209
|
+
# Returns the minimum value(s) present in the vector, with an optional comparator block.
|
210
|
+
#
|
211
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
212
|
+
#
|
213
|
+
# @example
|
214
|
+
#
|
215
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
216
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
217
|
+
# #=>
|
218
|
+
# # #<DaruLite::Vector(3)>
|
219
|
+
# # t Tyrion
|
220
|
+
# # d Daenerys
|
221
|
+
# # j Jon Starkgaryen
|
222
|
+
#
|
223
|
+
# dv.min
|
224
|
+
# #=> "Daenerys"
|
225
|
+
#
|
226
|
+
# dv.min(2) { |a,b| a.size <=> b.size }
|
227
|
+
# #=> ["Tyrion","Daenerys"]
|
228
|
+
def min(size = nil, &block)
|
229
|
+
range = size.nil? ? 0 : (0..size - 1)
|
230
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block)[range]
|
231
|
+
end
|
232
|
+
|
233
|
+
# Returns the minimum value(s) present in the vector, with a compulsory object block.
|
234
|
+
#
|
235
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
236
|
+
#
|
237
|
+
# @example
|
238
|
+
#
|
239
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
240
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
241
|
+
# #=>
|
242
|
+
# # #<DaruLite::Vector(3)>
|
243
|
+
# # t Tyrion
|
244
|
+
# # d Daenerys
|
245
|
+
# # j Jon Starkgaryen
|
246
|
+
#
|
247
|
+
# dv.min_by
|
248
|
+
# #=> "Daenerys"
|
249
|
+
#
|
250
|
+
# dv.min_by(2) { |i| i.size }
|
251
|
+
# #=> ["Tyrion","Daenerys"]
|
252
|
+
def min_by(size = nil, &block)
|
253
|
+
raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
|
254
|
+
|
255
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size - 1)]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Returns the index of the maximum value(s) present in the vector, with an optional
|
260
|
+
# comparator block.
|
261
|
+
#
|
262
|
+
# @param size [Integer] Number of maximum indices to return. Defaults to nil.
|
263
|
+
#
|
264
|
+
# @example
|
265
|
+
#
|
266
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
267
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
268
|
+
# #=>
|
269
|
+
# # #<DaruLite::Vector(3)>
|
270
|
+
# # t Tyrion
|
271
|
+
# # d Daenerys
|
272
|
+
# # j Jon Starkgaryen
|
273
|
+
#
|
274
|
+
# dv.index_of_max
|
275
|
+
# #=> :t
|
276
|
+
#
|
277
|
+
# dv.index_of_max(2) { |a,b| a.size <=> b.size }
|
278
|
+
# #=> [:j, :d]
|
279
|
+
def index_of_max(size = nil, &block)
|
280
|
+
vals = max(size, &block)
|
281
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
282
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
283
|
+
end
|
284
|
+
|
285
|
+
# Returns the index of the maximum value(s) present in the vector, with a compulsory
|
286
|
+
# object block.
|
287
|
+
#
|
288
|
+
# @param size [Integer] Number of maximum indices to return. Defaults to nil.
|
289
|
+
#
|
290
|
+
# @example
|
291
|
+
#
|
292
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
293
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
294
|
+
# #=>
|
295
|
+
# # #<DaruLite::Vector(3)>
|
296
|
+
# # t Tyrion
|
297
|
+
# # d Daenerys
|
298
|
+
# # j Jon Starkgaryen
|
299
|
+
#
|
300
|
+
# dv.index_of_max_by(2) { |i| i.size }
|
301
|
+
# #=> [:j, :d]
|
302
|
+
def index_of_max_by(size = nil, &block)
|
303
|
+
vals = max_by(size, &block)
|
304
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
305
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
306
|
+
end
|
307
|
+
|
308
|
+
# Returns the index of the minimum value(s) present in the vector, with an optional
|
309
|
+
# comparator block.
|
310
|
+
#
|
311
|
+
# @param size [Integer] Number of minimum indices to return. Defaults to nil.
|
312
|
+
#
|
313
|
+
# @example
|
314
|
+
#
|
315
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
316
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
317
|
+
# #=>
|
318
|
+
# # #<DaruLite::Vector(3)>
|
319
|
+
# # t Tyrion
|
320
|
+
# # d Daenerys
|
321
|
+
# # j Jon Starkgaryen
|
322
|
+
#
|
323
|
+
# dv.index_of_min
|
324
|
+
# #=> :d
|
325
|
+
#
|
326
|
+
# dv.index_of_min(2) { |a,b| a.size <=> b.size }
|
327
|
+
# #=> [:t, :d]
|
328
|
+
def index_of_min(size = nil, &block)
|
329
|
+
vals = min(size, &block)
|
330
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
331
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
332
|
+
end
|
333
|
+
|
334
|
+
# Returns the index of the minimum value(s) present in the vector, with a compulsory
|
335
|
+
# object block.
|
336
|
+
#
|
337
|
+
# @param size [Integer] Number of minimum indices to return. Defaults to nil.
|
338
|
+
#
|
339
|
+
# @example
|
340
|
+
#
|
341
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
342
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
343
|
+
# #=>
|
344
|
+
# # #<DaruLite::Vector(3)>
|
345
|
+
# # t Tyrion
|
346
|
+
# # d Daenerys
|
347
|
+
# # j Jon Starkgaryen
|
348
|
+
#
|
349
|
+
# dv.index_of_min(2) { |i| i.size }
|
350
|
+
# #=> [:t, :d]
|
351
|
+
def index_of_min_by(size = nil, &block)
|
352
|
+
vals = min_by(size, &block)
|
353
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
354
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
355
|
+
end
|
356
|
+
|
357
|
+
# Return the maximum element present in the Vector, as a Vector.
|
358
|
+
# @return [DaruLite::Vector]
|
359
|
+
def max_index
|
360
|
+
max_value = @data.max
|
361
|
+
DaruLite::Vector.new({ index_of(max_value) => max_value }, name: @name, dtype: @dtype)
|
362
|
+
end
|
363
|
+
|
364
|
+
def frequencies
|
365
|
+
DaruLite::Vector.new(
|
366
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
367
|
+
hash[element] += 1 unless element.nil?
|
368
|
+
end
|
369
|
+
)
|
370
|
+
end
|
371
|
+
|
372
|
+
alias freqs frequencies
|
373
|
+
deprecate :freqs, :frequencies, 2016, 10
|
374
|
+
|
375
|
+
def proportions
|
376
|
+
len = size - count_values(*DaruLite::MISSING_VALUES)
|
377
|
+
frequencies.to_h.transform_values do |count|
|
378
|
+
count / len.to_f
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def ranked
|
383
|
+
sum = 0
|
384
|
+
r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
|
385
|
+
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
386
|
+
sum += count
|
387
|
+
end
|
388
|
+
|
389
|
+
recode { |e| r[e] }
|
390
|
+
end
|
391
|
+
|
392
|
+
def coefficient_of_variation
|
393
|
+
standard_deviation_sample / mean
|
394
|
+
end
|
395
|
+
|
396
|
+
# Retrieves number of cases which comply condition. If block given,
|
397
|
+
# retrieves number of instances where block returns true. If other
|
398
|
+
# values given, retrieves the frequency for this value. If no value
|
399
|
+
# given, counts the number of non-nil elements in the Vector.
|
400
|
+
def count(value = false, &block)
|
401
|
+
if block
|
402
|
+
@data.count(&block)
|
403
|
+
elsif value
|
404
|
+
count { |val| val == value }
|
405
|
+
else
|
406
|
+
size - indexes(*DaruLite::MISSING_VALUES).size
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
# Count number of occurrences of each value in the Vector
|
411
|
+
def value_counts
|
412
|
+
values = @data.each_with_object(Hash.new(0)) do |d, memo|
|
413
|
+
memo[d] += 1
|
414
|
+
end
|
415
|
+
|
416
|
+
DaruLite::Vector.new(values)
|
417
|
+
end
|
418
|
+
|
419
|
+
def proportion(value = 1)
|
420
|
+
frequencies[value].quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
421
|
+
end
|
422
|
+
|
423
|
+
# Sample variance with denominator (N-1)
|
424
|
+
def variance_sample(m = nil)
|
425
|
+
m ||= mean
|
426
|
+
if @data.respond_to? :variance_sample
|
427
|
+
@data.variance_sample m
|
428
|
+
else
|
429
|
+
sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES) - 1)
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
# Population variance with denominator (N)
|
434
|
+
def variance_population(m = nil)
|
435
|
+
m ||= mean
|
436
|
+
if @data.respond_to? :variance_population
|
437
|
+
@data.variance_population m
|
438
|
+
else
|
439
|
+
sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# Sample covariance with denominator (N-1)
|
444
|
+
def covariance_sample(other)
|
445
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
446
|
+
covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES) - 1)
|
447
|
+
end
|
448
|
+
|
449
|
+
# Population covariance with denominator (N)
|
450
|
+
def covariance_population(other)
|
451
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
452
|
+
covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES))
|
453
|
+
end
|
454
|
+
|
455
|
+
def sum_of_squares(m = nil)
|
456
|
+
m ||= mean
|
457
|
+
reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
|
458
|
+
memo + ((val - m)**2)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def standard_deviation_population(m = nil)
|
463
|
+
m ||= mean
|
464
|
+
if @data.respond_to? :standard_deviation_population
|
465
|
+
@data.standard_deviation_population(m)
|
466
|
+
else
|
467
|
+
Math.sqrt(variance_population(m))
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
def standard_deviation_sample(m = nil)
|
472
|
+
m ||= mean
|
473
|
+
if @data.respond_to? :standard_deviation_sample
|
474
|
+
@data.standard_deviation_sample m
|
475
|
+
else
|
476
|
+
Math.sqrt(variance_sample(m))
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
481
|
+
def skew(m = nil)
|
482
|
+
if @data.respond_to? :skew
|
483
|
+
@data.skew
|
484
|
+
else
|
485
|
+
m ||= mean
|
486
|
+
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
487
|
+
th.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
def kurtosis(m = nil)
|
492
|
+
if @data.respond_to? :kurtosis
|
493
|
+
@data.kurtosis
|
494
|
+
else
|
495
|
+
m ||= mean
|
496
|
+
fo = @data.inject(0) { |a, x| a + ((x - m)**4) }
|
497
|
+
fo.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**4)) - 3
|
498
|
+
end
|
499
|
+
end
|
500
|
+
|
501
|
+
def average_deviation_population(m = nil)
|
502
|
+
must_be_numeric!
|
503
|
+
m ||= mean
|
504
|
+
reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
|
505
|
+
(val - m).abs + memo
|
506
|
+
end.quo(size - count_values(*DaruLite::MISSING_VALUES))
|
507
|
+
end
|
508
|
+
|
509
|
+
# Returns the value of the percentile q
|
510
|
+
#
|
511
|
+
# Accepts an optional second argument specifying the strategy to interpolate
|
512
|
+
# when the requested percentile lies between two data points a and b
|
513
|
+
# Valid strategies are:
|
514
|
+
# * :midpoint (Default): (a + b) / 2
|
515
|
+
# * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
|
516
|
+
# == References
|
517
|
+
#
|
518
|
+
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
519
|
+
def percentile(q, strategy = :midpoint)
|
520
|
+
case strategy
|
521
|
+
when :midpoint
|
522
|
+
midpoint_percentile(q)
|
523
|
+
when :linear
|
524
|
+
linear_percentile(q)
|
525
|
+
else
|
526
|
+
raise ArgumentError, "Unknown strategy #{strategy}"
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
# Dichotomize the vector with 0 and 1, based on lowest value.
|
531
|
+
# If parameter is defined, this value and lower will be 0
|
532
|
+
# and higher, 1.
|
533
|
+
def dichotomize(low = nil)
|
534
|
+
low ||= factors.min
|
535
|
+
|
536
|
+
recode do |x|
|
537
|
+
if x.nil?
|
538
|
+
nil
|
539
|
+
elsif x > low
|
540
|
+
1
|
541
|
+
else
|
542
|
+
0
|
543
|
+
end
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
# Center data by subtracting the mean from each non-nil value.
|
548
|
+
def center
|
549
|
+
self - mean
|
550
|
+
end
|
551
|
+
|
552
|
+
# Standardize data.
|
553
|
+
#
|
554
|
+
# == Arguments
|
555
|
+
#
|
556
|
+
# * use_population - Pass as *true* if you want to use population
|
557
|
+
# standard deviation instead of sample standard deviation.
|
558
|
+
def standardize(use_population = false)
|
559
|
+
m ||= mean
|
560
|
+
sd = use_population ? sdp : sds
|
561
|
+
return DaruLite::Vector.new([nil] * size) if m.nil? || sd.to_d == BigDecimal('0.0')
|
562
|
+
|
563
|
+
vector_standardized_compute m, sd
|
564
|
+
end
|
565
|
+
|
566
|
+
# :nocov:
|
567
|
+
def box_cox_transformation(lambda) # :nodoc:
|
568
|
+
must_be_numeric!
|
569
|
+
|
570
|
+
recode do |x|
|
571
|
+
unless x.nil?
|
572
|
+
if lambda.zero?
|
573
|
+
Math.log(x)
|
574
|
+
else
|
575
|
+
((x**lambda) - 1).quo(lambda)
|
576
|
+
end
|
577
|
+
end
|
578
|
+
end
|
579
|
+
end
|
580
|
+
# :nocov:
|
581
|
+
|
582
|
+
# Replace each non-nil value in the vector with its percentile.
|
583
|
+
def vector_percentile
|
584
|
+
c = size - indexes(*DaruLite::MISSING_VALUES).size
|
585
|
+
ranked.recode! { |i| i.nil? ? nil : (i.quo(c) * 100).to_f }
|
586
|
+
end
|
587
|
+
|
588
|
+
def vector_standardized_compute(m, sd)
|
589
|
+
if @data.respond_to? :vector_standardized_compute
|
590
|
+
@data.vector_standardized_compute(m, sd)
|
591
|
+
else
|
592
|
+
DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
|
593
|
+
index: index, name: name, dtype: dtype
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
def vector_centered_compute(m)
|
598
|
+
if @data.respond_to? :vector_centered_compute
|
599
|
+
@data.vector_centered_compute(m)
|
600
|
+
else
|
601
|
+
DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f - m },
|
602
|
+
index: index, name: name, dtype: dtype
|
603
|
+
end
|
604
|
+
end
|
605
|
+
|
606
|
+
# Returns an random sample of size n, with replacement,
|
607
|
+
# only with non-nil data.
|
608
|
+
#
|
609
|
+
# In all the trails, every item have the same probability
|
610
|
+
# of been selected.
|
611
|
+
def sample_with_replacement(sample = 1)
|
612
|
+
if @data.respond_to? :sample_with_replacement
|
613
|
+
@data.sample_with_replacement sample
|
614
|
+
else
|
615
|
+
valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
|
616
|
+
vds = valid.size
|
617
|
+
(0...sample).collect { valid[rand(vds)] }
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
# Returns an random sample of size n, without replacement,
|
622
|
+
# only with valid data.
|
623
|
+
#
|
624
|
+
# Every element could only be selected once.
|
625
|
+
#
|
626
|
+
# A sample of the same size of the vector is the vector itself.
|
627
|
+
def sample_without_replacement(sample = 1)
|
628
|
+
if @data.respond_to? :sample_without_replacement
|
629
|
+
@data.sample_without_replacement sample
|
630
|
+
else
|
631
|
+
raw_sample_without_replacement(sample)
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
635
|
+
# The percent_change method computes the percent change over
|
636
|
+
# the given number of periods.
|
637
|
+
#
|
638
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
639
|
+
#
|
640
|
+
# @example
|
641
|
+
#
|
642
|
+
# vector = DaruLite::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
643
|
+
# vector.percent_change
|
644
|
+
# #=>
|
645
|
+
# # <DaruLite::Vector:28713060 @name = nil size: 5 >
|
646
|
+
# # nil
|
647
|
+
# # a
|
648
|
+
# # f 0.5
|
649
|
+
# # t 0.0
|
650
|
+
# # i 0.3333333333333333
|
651
|
+
# # k 0.25
|
652
|
+
def percent_change(periods = 1)
|
653
|
+
must_be_numeric!
|
654
|
+
|
655
|
+
prev = nil
|
656
|
+
arr = @data.each_with_index.map do |cur, i|
|
657
|
+
if i < periods ||
|
658
|
+
include_with_nan?(DaruLite::MISSING_VALUES, cur) ||
|
659
|
+
include_with_nan?(DaruLite::MISSING_VALUES, prev)
|
660
|
+
nil
|
661
|
+
else
|
662
|
+
(cur - prev) / prev.to_f
|
663
|
+
end.tap { prev = cur if cur }
|
664
|
+
end
|
665
|
+
|
666
|
+
DaruLite::Vector.new(arr, index: @index, name: @name)
|
667
|
+
end
|
668
|
+
|
669
|
+
# Performs the difference of the series.
|
670
|
+
# Note: The first difference of series is X(t) - X(t-1)
|
671
|
+
# But, second difference of series is NOT X(t) - X(t-2)
|
672
|
+
# It is the first difference of the first difference
|
673
|
+
# => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
|
674
|
+
#
|
675
|
+
# == Arguments
|
676
|
+
#
|
677
|
+
# * *max_lags*: integer, (default: 1), number of differences reqd.
|
678
|
+
#
|
679
|
+
# @example Using #diff
|
680
|
+
#
|
681
|
+
# ts = DaruLite::Vector.new((1..10).map { rand })
|
682
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
683
|
+
#
|
684
|
+
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
685
|
+
#
|
686
|
+
# @return [DaruLite::Vector]
|
687
|
+
def diff(max_lags = 1)
|
688
|
+
ts = self
|
689
|
+
difference = []
|
690
|
+
max_lags.times do
|
691
|
+
difference = ts - ts.lag
|
692
|
+
ts = difference
|
693
|
+
end
|
694
|
+
difference
|
695
|
+
end
|
696
|
+
|
697
|
+
# Calculate the rolling function for a loopback value.
|
698
|
+
#
|
699
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
700
|
+
# any function applicatble to DaruLite::Vector (:mean, :median, :count,
|
701
|
+
# :min, :max, etc.)
|
702
|
+
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
703
|
+
# @return [DaruLite::Vector] Vector containin rolling calculations.
|
704
|
+
# @example Using #rolling
|
705
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
706
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
707
|
+
# # first 9 observations are nil
|
708
|
+
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
709
|
+
def rolling(function, n = 10)
|
710
|
+
DaruLite::Vector.new(
|
711
|
+
([nil] * (n - 1)) +
|
712
|
+
(0..(size - n)).map do |i|
|
713
|
+
DaruLite::Vector.new(@data[i...(i + n)]).send(function)
|
714
|
+
end, index: @index
|
715
|
+
)
|
716
|
+
end
|
717
|
+
|
718
|
+
# @!method rolling_mean
|
719
|
+
# Calculate rolling average
|
720
|
+
# @yieldparam [Integer] n (10) Loopback length
|
721
|
+
# @!method rolling_median
|
722
|
+
# Calculate rolling median
|
723
|
+
# @yieldparam [Integer] n (10) Loopback length
|
724
|
+
# @!method rolling_count
|
725
|
+
# Calculate rolling non-missing count
|
726
|
+
# @yieldparam [Integer] n (10) Loopback length
|
727
|
+
# @!method rolling_max
|
728
|
+
# Calculate rolling max value
|
729
|
+
# @yieldparam [Integer] n (10) Loopback length
|
730
|
+
# @!method rolling_min
|
731
|
+
# Calculate rolling min value
|
732
|
+
# @yieldparam [Integer] n (10) Loopback length
|
733
|
+
# @!method rolling_sum
|
734
|
+
# Calculate rolling sum
|
735
|
+
# @yieldparam [Integer] n (10) Loopback length
|
736
|
+
# @!method rolling_std
|
737
|
+
# Calculate rolling standard deviation
|
738
|
+
# @yieldparam [Integer] n (10) Loopback length
|
739
|
+
# @!method rolling_variance
|
740
|
+
# Calculate rolling variance
|
741
|
+
# @yieldparam [Integer] n (10) Loopback length
|
742
|
+
%i[count mean median max min sum std variance].each do |meth|
|
743
|
+
define_method(:"rolling_#{meth}") do |n = 10|
|
744
|
+
rolling(meth, n)
|
745
|
+
end
|
746
|
+
end
|
747
|
+
|
748
|
+
# Exponential Moving Average.
|
749
|
+
# Calculates an exponential moving average of the series using a
|
750
|
+
# specified parameter. If wilder is false (the default) then the EMA
|
751
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
752
|
+
# Welles Wilder smoother of 1 / n.
|
753
|
+
#
|
754
|
+
# Warning for EMA usage: EMAs are unstable for small series, as they
|
755
|
+
# use a lot more than n observations to calculate. The series is stable
|
756
|
+
# if the size of the series is >= 3.45 * (n + 1)
|
757
|
+
#
|
758
|
+
# @param [Integer] n (10) Loopback length.
|
759
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
760
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
761
|
+
#
|
762
|
+
# @example Using ema
|
763
|
+
#
|
764
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
765
|
+
# # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
|
766
|
+
#
|
767
|
+
# # first 9 observations are nil
|
768
|
+
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
769
|
+
#
|
770
|
+
# @return [DaruLite::Vector] Contains EMA
|
771
|
+
def ema(n = 10, wilder = false)
|
772
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
773
|
+
# need to start everything from the first non-nil observation
|
774
|
+
start = @data.index { |i| !i.nil? }
|
775
|
+
# first n - 1 observations are nil
|
776
|
+
base = [nil] * (start + n - 1)
|
777
|
+
# nth observation is just a moving average
|
778
|
+
base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
|
779
|
+
(start + n).upto size - 1 do |i|
|
780
|
+
base << ((self[i] * smoother) + ((1 - smoother) * base.last))
|
781
|
+
end
|
782
|
+
|
783
|
+
DaruLite::Vector.new(base, index: @index, name: @name)
|
784
|
+
end
|
785
|
+
|
786
|
+
# Exponential Moving Variance.
|
787
|
+
# Calculates an exponential moving variance of the series using a
|
788
|
+
# specified parameter. If wilder is false (the default) then the EMV
|
789
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
790
|
+
# Welles Wilder smoother of 1 / n.
|
791
|
+
#
|
792
|
+
# @param [Integer] n (10) Loopback length.
|
793
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
794
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
795
|
+
#
|
796
|
+
# @example Using emv
|
797
|
+
#
|
798
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
799
|
+
# # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
|
800
|
+
#
|
801
|
+
# # first 9 observations are nil
|
802
|
+
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
803
|
+
#
|
804
|
+
# @return [DaruLite::Vector] contains EMV
|
805
|
+
def emv(n = 10, wilder = false) # rubocop:disable Metrics/AbcSize
|
806
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
807
|
+
# need to start everything from the first non-nil observation
|
808
|
+
start = @data.index { |i| !i.nil? }
|
809
|
+
# first n - 1 observations are nil
|
810
|
+
var_base = [nil] * (start + n - 1)
|
811
|
+
mean_base = [nil] * (start + n - 1)
|
812
|
+
mean_base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
|
813
|
+
# nth observation is just a moving variance_population
|
814
|
+
var_base << (@data[start...(start + n)].inject(0.0) { |s, x| x.nil? ? s : s + ((x - mean_base.last)**2) } / n)
|
815
|
+
(start + n).upto size - 1 do |i|
|
816
|
+
last = mean_base.last
|
817
|
+
mean_base << ((self[i] * smoother) + ((1 - smoother) * last))
|
818
|
+
var_base << (((1 - smoother) * var_base.last) + (smoother * (self[i] - last) * (self[i] - mean_base.last)))
|
819
|
+
end
|
820
|
+
DaruLite::Vector.new(var_base, index: @index, name: @name)
|
821
|
+
end
|
822
|
+
|
823
|
+
# Exponential Moving Standard Deviation.
|
824
|
+
# Calculates an exponential moving standard deviation of the series using a
|
825
|
+
# specified parameter. If wilder is false (the default) then the EMSD
|
826
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
827
|
+
# Welles Wilder smoother of 1 / n.
|
828
|
+
#
|
829
|
+
# @param [Integer] n (10) Loopback length.
|
830
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
831
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
832
|
+
#
|
833
|
+
# @example Using emsd
|
834
|
+
#
|
835
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
836
|
+
# # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
|
837
|
+
#
|
838
|
+
# # first 9 observations are nil
|
839
|
+
# ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
|
840
|
+
#
|
841
|
+
# @return [DaruLite::Vector] contains EMSD
|
842
|
+
def emsd(n = 10, wilder = false)
|
843
|
+
result = []
|
844
|
+
emv_return = emv(n, wilder)
|
845
|
+
emv_return.each do |d|
|
846
|
+
result << (d.nil? ? nil : Math.sqrt(d))
|
847
|
+
end
|
848
|
+
DaruLite::Vector.new(result, index: @index, name: @name)
|
849
|
+
end
|
850
|
+
|
851
|
+
# Moving Average Convergence-Divergence.
|
852
|
+
# Calculates the MACD (moving average convergence-divergence) of the time
|
853
|
+
# series.
|
854
|
+
# @see https://en.wikipedia.org/wiki/MACD
|
855
|
+
#
|
856
|
+
# @param fast [Integer] fast period of MACD (default 12)
|
857
|
+
# @param slow [Integer] slow period of MACD (default 26)
|
858
|
+
# @param signal [Integer] signal period of MACD (default 9)
|
859
|
+
#
|
860
|
+
# @example Create a series and calculate MACD values
|
861
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
862
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
863
|
+
# macdseries, macdsignal, macdhist = ts.macd
|
864
|
+
# macdseries, macdsignal, macdhist = ts.macd(13)
|
865
|
+
# macdseries, macdsignal, macdhist = ts.macd(signal=5)
|
866
|
+
#
|
867
|
+
# @return [Array<DaruLite::Vector>] macdseries, macdsignal and macdhist are
|
868
|
+
# returned as an array of three DaruLite::Vectors
|
869
|
+
#
|
870
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
871
|
+
macdseries = ema(fast) - ema(slow)
|
872
|
+
macdsignal = macdseries.ema(signal)
|
873
|
+
macdhist = macdseries - macdsignal
|
874
|
+
[macdseries, macdsignal, macdhist]
|
875
|
+
end
|
876
|
+
|
877
|
+
# Calculates the autocorrelation coefficients of the series.
|
878
|
+
#
|
879
|
+
# The first element is always 1, since that is the correlation
|
880
|
+
# of the series with itself.
|
881
|
+
#
|
882
|
+
# @example
|
883
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
884
|
+
#
|
885
|
+
# ts.acf # => array with first 21 autocorrelations
|
886
|
+
# ts.acf 3 # => array with first 3 autocorrelations
|
887
|
+
def acf(max_lags = nil)
|
888
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
889
|
+
|
890
|
+
(0..max_lags).map do |i|
|
891
|
+
if i.zero?
|
892
|
+
1.0
|
893
|
+
else
|
894
|
+
m = mean
|
895
|
+
# can't use Pearson coefficient since the mean for the lagged series should
|
896
|
+
# be the same as the regular series
|
897
|
+
((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
|
898
|
+
end
|
899
|
+
end
|
900
|
+
end
|
901
|
+
|
902
|
+
# Provides autocovariance.
|
903
|
+
#
|
904
|
+
# == Options
|
905
|
+
#
|
906
|
+
# * *:demean* = true; optional. Supply false if series is not to be demeaned
|
907
|
+
# * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
908
|
+
#
|
909
|
+
# == Returns
|
910
|
+
#
|
911
|
+
# Autocovariance value
|
912
|
+
def acvf(demean = true, unbiased = true)
|
913
|
+
demeaned_series = demean ? self - mean : self
|
914
|
+
|
915
|
+
n = (10 * Math.log10(size)).to_i + 1
|
916
|
+
m = mean
|
917
|
+
d = if unbiased
|
918
|
+
Array.new(size, size)
|
919
|
+
else
|
920
|
+
(1..size).to_a.reverse[0..n]
|
921
|
+
end
|
922
|
+
|
923
|
+
0.upto(n - 1).map do |i|
|
924
|
+
(demeaned_series * (lag(i) - m)).sum / d[i]
|
925
|
+
end
|
926
|
+
end
|
927
|
+
|
928
|
+
# Calculate cumulative sum of Vector
|
929
|
+
def cumsum
|
930
|
+
result = []
|
931
|
+
acc = 0
|
932
|
+
@data.each do |d|
|
933
|
+
if include_with_nan? DaruLite::MISSING_VALUES, d
|
934
|
+
result << nil
|
935
|
+
else
|
936
|
+
acc += d
|
937
|
+
result << acc
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
DaruLite::Vector.new(result, index: @index)
|
942
|
+
end
|
943
|
+
|
944
|
+
alias sdp standard_deviation_population
|
945
|
+
alias sds standard_deviation_sample
|
946
|
+
alias std sds
|
947
|
+
alias adp average_deviation_population
|
948
|
+
alias cov coefficient_of_variation
|
949
|
+
alias variance variance_sample
|
950
|
+
alias covariance covariance_sample
|
951
|
+
alias sd standard_deviation_sample
|
952
|
+
alias ss sum_of_squares
|
953
|
+
alias percentil percentile
|
954
|
+
alias se standard_error
|
955
|
+
|
956
|
+
private
|
957
|
+
|
958
|
+
def must_be_numeric!
|
959
|
+
numeric? or raise TypeError, 'Vector must be numeric'
|
960
|
+
end
|
961
|
+
|
962
|
+
def covariance_sum(other)
|
963
|
+
self_mean = mean
|
964
|
+
other_mean = other.mean
|
965
|
+
@data
|
966
|
+
.zip(other.data).inject(0) do |res, (d, o)|
|
967
|
+
res + if !d || !o
|
968
|
+
0
|
969
|
+
else
|
970
|
+
(d - self_mean) * (o - other_mean)
|
971
|
+
end
|
972
|
+
end
|
973
|
+
end
|
974
|
+
|
975
|
+
def midpoint_percentile(q)
|
976
|
+
sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
|
977
|
+
|
978
|
+
v = ((size - count_values(*DaruLite::MISSING_VALUES)) * q).quo(100)
|
979
|
+
if v.to_i == v
|
980
|
+
(sorted[(v - 0.5).to_i].to_f + sorted[(v + 0.5).to_i]).quo(2)
|
981
|
+
else
|
982
|
+
sorted[v.to_i]
|
983
|
+
end
|
984
|
+
end
|
985
|
+
|
986
|
+
def linear_percentile(q)
|
987
|
+
sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
|
988
|
+
index = (q / 100.0) * ((size - count_values(*DaruLite::MISSING_VALUES)) + 1)
|
989
|
+
|
990
|
+
k = index.truncate
|
991
|
+
d = index % 1
|
992
|
+
|
993
|
+
if k.zero?
|
994
|
+
sorted[0]
|
995
|
+
elsif k >= sorted.size
|
996
|
+
sorted[-1]
|
997
|
+
else
|
998
|
+
sorted[k - 1] + (d * (sorted[k] - sorted[k - 1]))
|
999
|
+
end
|
1000
|
+
end
|
1001
|
+
|
1002
|
+
def raw_sample_without_replacement(sample)
|
1003
|
+
valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
|
1004
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
1005
|
+
sample > valid.size
|
1006
|
+
|
1007
|
+
out = []
|
1008
|
+
size = valid.size
|
1009
|
+
while out.size < sample
|
1010
|
+
value = rand(size)
|
1011
|
+
out.push(value) unless out.include?(value)
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
out.collect { |i| valid[i] }
|
1015
|
+
end
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
end
|