daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,1019 @@
|
|
|
1
|
+
module DaruLite
|
|
2
|
+
module Maths
|
|
3
|
+
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
|
5
|
+
# the computationally intensive tasks.
|
|
6
|
+
module Statistics
|
|
7
|
+
module Vector # rubocop:disable Metrics/ModuleLength
|
|
8
|
+
extend Gem::Deprecate
|
|
9
|
+
|
|
10
|
+
def mean
|
|
11
|
+
@data.mean
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def sum
|
|
15
|
+
@data.sum
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def product
|
|
19
|
+
@data.product
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def range
|
|
23
|
+
max - min
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def median
|
|
27
|
+
@data.respond_to?(:median) ? @data.median : percentile(50)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def mode
|
|
31
|
+
mode = frequencies.to_h.select { |_, v| v == frequencies.max }.keys
|
|
32
|
+
mode.size > 1 ? DaruLite::Vector.new(mode) : mode.first
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Create a summary of count, mean, standard deviation, min and max of
|
|
36
|
+
# the vector in one shot.
|
|
37
|
+
#
|
|
38
|
+
# == Arguments
|
|
39
|
+
#
|
|
40
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
|
41
|
+
# be applied to vectors. Default is [:count, :mean, :std, :max,
|
|
42
|
+
# :min]. Methods will be applied in the specified order.
|
|
43
|
+
def describe(methods = nil)
|
|
44
|
+
methods ||= %i[count mean std min max]
|
|
45
|
+
description = methods.map { |m| send(m) }
|
|
46
|
+
DaruLite::Vector.new(description, index: methods, name: :statistics)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def median_absolute_deviation
|
|
50
|
+
m = median
|
|
51
|
+
recode { |val| (val - m).abs }.median
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
alias mad median_absolute_deviation
|
|
55
|
+
|
|
56
|
+
def standard_error
|
|
57
|
+
standard_deviation_sample / Math.sqrt(size - count_values(*DaruLite::MISSING_VALUES))
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def sum_of_squared_deviation
|
|
61
|
+
(
|
|
62
|
+
@data.inject(0) { |a, x| (x**2) + a } -
|
|
63
|
+
(sum**2).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
|
64
|
+
).to_f
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Retrieve unique values of non-nil data
|
|
68
|
+
def factors
|
|
69
|
+
reject_values(*DaruLite::MISSING_VALUES).uniq.reset_index!
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
if RUBY_VERSION >= '2.2'
|
|
73
|
+
# Returns the maximum value(s) present in the vector, with an optional comparator block.
|
|
74
|
+
#
|
|
75
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
|
76
|
+
#
|
|
77
|
+
# @example
|
|
78
|
+
#
|
|
79
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
80
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
81
|
+
# #=>
|
|
82
|
+
# # #<DaruLite::Vector(3)>
|
|
83
|
+
# # t Tyrion
|
|
84
|
+
# # d Daenerys
|
|
85
|
+
# # j Jon Starkgaryen
|
|
86
|
+
#
|
|
87
|
+
# dv.max
|
|
88
|
+
# #=> "Tyrion"
|
|
89
|
+
#
|
|
90
|
+
# dv.max(2) { |a,b| a.size <=> b.size }
|
|
91
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
|
92
|
+
def max(size = nil, &block)
|
|
93
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.max(size, &block)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Returns the maximum value(s) present in the vector, with a compulsory object block.
|
|
97
|
+
#
|
|
98
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
|
99
|
+
#
|
|
100
|
+
# @example
|
|
101
|
+
#
|
|
102
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
103
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
104
|
+
# #=>
|
|
105
|
+
# # #<DaruLite::Vector(3)>
|
|
106
|
+
# # t Tyrion
|
|
107
|
+
# # d Daenerys
|
|
108
|
+
# # j Jon Starkgaryen
|
|
109
|
+
#
|
|
110
|
+
# dv.max_by(2) { |i| i.size }
|
|
111
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
|
112
|
+
def max_by(size = nil, &block)
|
|
113
|
+
raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
|
|
114
|
+
|
|
115
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.max_by(size, &block)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns the minimum value(s) present in the vector, with an optional comparator block.
|
|
119
|
+
#
|
|
120
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
|
121
|
+
#
|
|
122
|
+
# @example
|
|
123
|
+
#
|
|
124
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
125
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
126
|
+
# #=>
|
|
127
|
+
# # #<DaruLite::Vector(3)>
|
|
128
|
+
# # t Tyrion
|
|
129
|
+
# # d Daenerys
|
|
130
|
+
# # j Jon Starkgaryen
|
|
131
|
+
#
|
|
132
|
+
# dv.min
|
|
133
|
+
# #=> "Daenerys"
|
|
134
|
+
#
|
|
135
|
+
# dv.min(2) { |a,b| a.size <=> b.size }
|
|
136
|
+
# #=> ["Tyrion","Daenerys"]
|
|
137
|
+
def min(size = nil, &block)
|
|
138
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.min(size, &block)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Returns the minimum value(s) present in the vector, with a compulsory object block.
|
|
142
|
+
#
|
|
143
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
|
144
|
+
#
|
|
145
|
+
# @example
|
|
146
|
+
#
|
|
147
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
148
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
149
|
+
# #=>
|
|
150
|
+
# # #<DaruLite::Vector(3)>
|
|
151
|
+
# # t Tyrion
|
|
152
|
+
# # d Daenerys
|
|
153
|
+
# # j Jon Starkgaryen
|
|
154
|
+
#
|
|
155
|
+
# dv.min_by(2) { |i| i.size }
|
|
156
|
+
# #=> ["Tyrion","Daenerys"]
|
|
157
|
+
def min_by(size = nil, &block)
|
|
158
|
+
raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
|
|
159
|
+
|
|
160
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.min_by(size, &block)
|
|
161
|
+
end
|
|
162
|
+
else
|
|
163
|
+
# Returns the maximum value(s) present in the vector, with an optional comparator block.
|
|
164
|
+
#
|
|
165
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
|
166
|
+
#
|
|
167
|
+
# @example
|
|
168
|
+
#
|
|
169
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
170
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
171
|
+
# #=>
|
|
172
|
+
# # #<DaruLite::Vector(3)>
|
|
173
|
+
# # t Tyrion
|
|
174
|
+
# # d Daenerys
|
|
175
|
+
# # j Jon Starkgaryen
|
|
176
|
+
#
|
|
177
|
+
# dv.max
|
|
178
|
+
# #=> "Tyrion"
|
|
179
|
+
#
|
|
180
|
+
# dv.max(2) { |a,b| a.size <=> b.size }
|
|
181
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
|
182
|
+
def max(size = nil, &block)
|
|
183
|
+
range = size.nil? ? 0 : (0..size - 1)
|
|
184
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block).reverse[range]
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Returns the maximum value(s) present in the vector, with a compulsory object block.
|
|
188
|
+
#
|
|
189
|
+
# @param size [Integer] Number of maximum values to return. Defaults to nil.
|
|
190
|
+
#
|
|
191
|
+
# @example
|
|
192
|
+
#
|
|
193
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
194
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
195
|
+
# #=>
|
|
196
|
+
# # #<DaruLite::Vector(3)>
|
|
197
|
+
# # t Tyrion
|
|
198
|
+
# # d Daenerys
|
|
199
|
+
# # j Jon Starkgaryen
|
|
200
|
+
#
|
|
201
|
+
# dv.max_by(2) { |i| i.size }
|
|
202
|
+
# #=> ["Jon Starkgaryen","Daenerys"]
|
|
203
|
+
def max_by(size = nil, &block)
|
|
204
|
+
raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
|
|
205
|
+
|
|
206
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size - 1)]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Returns the minimum value(s) present in the vector, with an optional comparator block.
|
|
210
|
+
#
|
|
211
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
|
212
|
+
#
|
|
213
|
+
# @example
|
|
214
|
+
#
|
|
215
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
216
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
217
|
+
# #=>
|
|
218
|
+
# # #<DaruLite::Vector(3)>
|
|
219
|
+
# # t Tyrion
|
|
220
|
+
# # d Daenerys
|
|
221
|
+
# # j Jon Starkgaryen
|
|
222
|
+
#
|
|
223
|
+
# dv.min
|
|
224
|
+
# #=> "Daenerys"
|
|
225
|
+
#
|
|
226
|
+
# dv.min(2) { |a,b| a.size <=> b.size }
|
|
227
|
+
# #=> ["Tyrion","Daenerys"]
|
|
228
|
+
def min(size = nil, &block)
|
|
229
|
+
range = size.nil? ? 0 : (0..size - 1)
|
|
230
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block)[range]
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Returns the minimum value(s) present in the vector, with a compulsory object block.
|
|
234
|
+
#
|
|
235
|
+
# @param size [Integer] Number of minimum values to return. Defaults to nil.
|
|
236
|
+
#
|
|
237
|
+
# @example
|
|
238
|
+
#
|
|
239
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
240
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
241
|
+
# #=>
|
|
242
|
+
# # #<DaruLite::Vector(3)>
|
|
243
|
+
# # t Tyrion
|
|
244
|
+
# # d Daenerys
|
|
245
|
+
# # j Jon Starkgaryen
|
|
246
|
+
#
|
|
247
|
+
# dv.min_by
|
|
248
|
+
# #=> "Daenerys"
|
|
249
|
+
#
|
|
250
|
+
# dv.min_by(2) { |i| i.size }
|
|
251
|
+
# #=> ["Tyrion","Daenerys"]
|
|
252
|
+
def min_by(size = nil, &block)
|
|
253
|
+
raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
|
|
254
|
+
|
|
255
|
+
reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size - 1)]
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Returns the index of the maximum value(s) present in the vector, with an optional
|
|
260
|
+
# comparator block.
|
|
261
|
+
#
|
|
262
|
+
# @param size [Integer] Number of maximum indices to return. Defaults to nil.
|
|
263
|
+
#
|
|
264
|
+
# @example
|
|
265
|
+
#
|
|
266
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
267
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
268
|
+
# #=>
|
|
269
|
+
# # #<DaruLite::Vector(3)>
|
|
270
|
+
# # t Tyrion
|
|
271
|
+
# # d Daenerys
|
|
272
|
+
# # j Jon Starkgaryen
|
|
273
|
+
#
|
|
274
|
+
# dv.index_of_max
|
|
275
|
+
# #=> :t
|
|
276
|
+
#
|
|
277
|
+
# dv.index_of_max(2) { |a,b| a.size <=> b.size }
|
|
278
|
+
# #=> [:j, :d]
|
|
279
|
+
def index_of_max(size = nil, &block)
|
|
280
|
+
vals = max(size, &block)
|
|
281
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
|
282
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Returns the index of the maximum value(s) present in the vector, with a compulsory
|
|
286
|
+
# object block.
|
|
287
|
+
#
|
|
288
|
+
# @param size [Integer] Number of maximum indices to return. Defaults to nil.
|
|
289
|
+
#
|
|
290
|
+
# @example
|
|
291
|
+
#
|
|
292
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
293
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
294
|
+
# #=>
|
|
295
|
+
# # #<DaruLite::Vector(3)>
|
|
296
|
+
# # t Tyrion
|
|
297
|
+
# # d Daenerys
|
|
298
|
+
# # j Jon Starkgaryen
|
|
299
|
+
#
|
|
300
|
+
# dv.index_of_max_by(2) { |i| i.size }
|
|
301
|
+
# #=> [:j, :d]
|
|
302
|
+
def index_of_max_by(size = nil, &block)
|
|
303
|
+
vals = max_by(size, &block)
|
|
304
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
|
305
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Returns the index of the minimum value(s) present in the vector, with an optional
|
|
309
|
+
# comparator block.
|
|
310
|
+
#
|
|
311
|
+
# @param size [Integer] Number of minimum indices to return. Defaults to nil.
|
|
312
|
+
#
|
|
313
|
+
# @example
|
|
314
|
+
#
|
|
315
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
316
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
317
|
+
# #=>
|
|
318
|
+
# # #<DaruLite::Vector(3)>
|
|
319
|
+
# # t Tyrion
|
|
320
|
+
# # d Daenerys
|
|
321
|
+
# # j Jon Starkgaryen
|
|
322
|
+
#
|
|
323
|
+
# dv.index_of_min
|
|
324
|
+
# #=> :d
|
|
325
|
+
#
|
|
326
|
+
# dv.index_of_min(2) { |a,b| a.size <=> b.size }
|
|
327
|
+
# #=> [:t, :d]
|
|
328
|
+
def index_of_min(size = nil, &block)
|
|
329
|
+
vals = min(size, &block)
|
|
330
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
|
331
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Returns the index of the minimum value(s) present in the vector, with a compulsory
|
|
335
|
+
# object block.
|
|
336
|
+
#
|
|
337
|
+
# @param size [Integer] Number of minimum indices to return. Defaults to nil.
|
|
338
|
+
#
|
|
339
|
+
# @example
|
|
340
|
+
#
|
|
341
|
+
# dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
|
|
342
|
+
# index: DaruLite::Index.new([:t, :d, :j])
|
|
343
|
+
# #=>
|
|
344
|
+
# # #<DaruLite::Vector(3)>
|
|
345
|
+
# # t Tyrion
|
|
346
|
+
# # d Daenerys
|
|
347
|
+
# # j Jon Starkgaryen
|
|
348
|
+
#
|
|
349
|
+
# dv.index_of_min(2) { |i| i.size }
|
|
350
|
+
# #=> [:t, :d]
|
|
351
|
+
def index_of_min_by(size = nil, &block)
|
|
352
|
+
vals = min_by(size, &block)
|
|
353
|
+
dv = reject_values(*DaruLite::MISSING_VALUES)
|
|
354
|
+
vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Return the maximum element present in the Vector, as a Vector.
|
|
358
|
+
# @return [DaruLite::Vector]
|
|
359
|
+
def max_index
|
|
360
|
+
max_value = @data.max
|
|
361
|
+
DaruLite::Vector.new({ index_of(max_value) => max_value }, name: @name, dtype: @dtype)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def frequencies
|
|
365
|
+
DaruLite::Vector.new(
|
|
366
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
|
367
|
+
hash[element] += 1 unless element.nil?
|
|
368
|
+
end
|
|
369
|
+
)
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
alias freqs frequencies
|
|
373
|
+
deprecate :freqs, :frequencies, 2016, 10
|
|
374
|
+
|
|
375
|
+
def proportions
|
|
376
|
+
len = size - count_values(*DaruLite::MISSING_VALUES)
|
|
377
|
+
frequencies.to_h.transform_values do |count|
|
|
378
|
+
count / len.to_f
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def ranked
|
|
383
|
+
sum = 0
|
|
384
|
+
r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
|
|
385
|
+
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
|
386
|
+
sum += count
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
recode { |e| r[e] }
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def coefficient_of_variation
|
|
393
|
+
standard_deviation_sample / mean
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Retrieves number of cases which comply condition. If block given,
|
|
397
|
+
# retrieves number of instances where block returns true. If other
|
|
398
|
+
# values given, retrieves the frequency for this value. If no value
|
|
399
|
+
# given, counts the number of non-nil elements in the Vector.
|
|
400
|
+
def count(value = false, &block)
|
|
401
|
+
if block
|
|
402
|
+
@data.count(&block)
|
|
403
|
+
elsif value
|
|
404
|
+
count { |val| val == value }
|
|
405
|
+
else
|
|
406
|
+
size - indexes(*DaruLite::MISSING_VALUES).size
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Count number of occurrences of each value in the Vector
|
|
411
|
+
def value_counts
|
|
412
|
+
values = @data.each_with_object(Hash.new(0)) do |d, memo|
|
|
413
|
+
memo[d] += 1
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
DaruLite::Vector.new(values)
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def proportion(value = 1)
|
|
420
|
+
frequencies[value].quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
# Sample variance with denominator (N-1)
|
|
424
|
+
def variance_sample(m = nil)
|
|
425
|
+
m ||= mean
|
|
426
|
+
if @data.respond_to? :variance_sample
|
|
427
|
+
@data.variance_sample m
|
|
428
|
+
else
|
|
429
|
+
sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES) - 1)
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# Population variance with denominator (N)
|
|
434
|
+
def variance_population(m = nil)
|
|
435
|
+
m ||= mean
|
|
436
|
+
if @data.respond_to? :variance_population
|
|
437
|
+
@data.variance_population m
|
|
438
|
+
else
|
|
439
|
+
sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
# Sample covariance with denominator (N-1)
|
|
444
|
+
def covariance_sample(other)
|
|
445
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
|
446
|
+
covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES) - 1)
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Population covariance with denominator (N)
|
|
450
|
+
def covariance_population(other)
|
|
451
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
|
452
|
+
covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES))
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def sum_of_squares(m = nil)
|
|
456
|
+
m ||= mean
|
|
457
|
+
reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
|
|
458
|
+
memo + ((val - m)**2)
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def standard_deviation_population(m = nil)
|
|
463
|
+
m ||= mean
|
|
464
|
+
if @data.respond_to? :standard_deviation_population
|
|
465
|
+
@data.standard_deviation_population(m)
|
|
466
|
+
else
|
|
467
|
+
Math.sqrt(variance_population(m))
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def standard_deviation_sample(m = nil)
|
|
472
|
+
m ||= mean
|
|
473
|
+
if @data.respond_to? :standard_deviation_sample
|
|
474
|
+
@data.standard_deviation_sample m
|
|
475
|
+
else
|
|
476
|
+
Math.sqrt(variance_sample(m))
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
|
481
|
+
def skew(m = nil)
|
|
482
|
+
if @data.respond_to? :skew
|
|
483
|
+
@data.skew
|
|
484
|
+
else
|
|
485
|
+
m ||= mean
|
|
486
|
+
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
|
487
|
+
th.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def kurtosis(m = nil)
|
|
492
|
+
if @data.respond_to? :kurtosis
|
|
493
|
+
@data.kurtosis
|
|
494
|
+
else
|
|
495
|
+
m ||= mean
|
|
496
|
+
fo = @data.inject(0) { |a, x| a + ((x - m)**4) }
|
|
497
|
+
fo.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**4)) - 3
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def average_deviation_population(m = nil)
|
|
502
|
+
must_be_numeric!
|
|
503
|
+
m ||= mean
|
|
504
|
+
reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
|
|
505
|
+
(val - m).abs + memo
|
|
506
|
+
end.quo(size - count_values(*DaruLite::MISSING_VALUES))
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# Returns the value of the percentile q
|
|
510
|
+
#
|
|
511
|
+
# Accepts an optional second argument specifying the strategy to interpolate
|
|
512
|
+
# when the requested percentile lies between two data points a and b
|
|
513
|
+
# Valid strategies are:
|
|
514
|
+
# * :midpoint (Default): (a + b) / 2
|
|
515
|
+
# * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
|
|
516
|
+
# == References
|
|
517
|
+
#
|
|
518
|
+
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
|
519
|
+
def percentile(q, strategy = :midpoint)
|
|
520
|
+
case strategy
|
|
521
|
+
when :midpoint
|
|
522
|
+
midpoint_percentile(q)
|
|
523
|
+
when :linear
|
|
524
|
+
linear_percentile(q)
|
|
525
|
+
else
|
|
526
|
+
raise ArgumentError, "Unknown strategy #{strategy}"
|
|
527
|
+
end
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
# Dichotomize the vector with 0 and 1, based on lowest value.
|
|
531
|
+
# If parameter is defined, this value and lower will be 0
|
|
532
|
+
# and higher, 1.
|
|
533
|
+
def dichotomize(low = nil)
|
|
534
|
+
low ||= factors.min
|
|
535
|
+
|
|
536
|
+
recode do |x|
|
|
537
|
+
if x.nil?
|
|
538
|
+
nil
|
|
539
|
+
elsif x > low
|
|
540
|
+
1
|
|
541
|
+
else
|
|
542
|
+
0
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
# Center data by subtracting the mean from each non-nil value.
|
|
548
|
+
def center
|
|
549
|
+
self - mean
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# Standardize data.
|
|
553
|
+
#
|
|
554
|
+
# == Arguments
|
|
555
|
+
#
|
|
556
|
+
# * use_population - Pass as *true* if you want to use population
|
|
557
|
+
# standard deviation instead of sample standard deviation.
|
|
558
|
+
def standardize(use_population = false)
|
|
559
|
+
m ||= mean
|
|
560
|
+
sd = use_population ? sdp : sds
|
|
561
|
+
return DaruLite::Vector.new([nil] * size) if m.nil? || sd.to_d == BigDecimal('0.0')
|
|
562
|
+
|
|
563
|
+
vector_standardized_compute m, sd
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
# :nocov:
|
|
567
|
+
def box_cox_transformation(lambda) # :nodoc:
|
|
568
|
+
must_be_numeric!
|
|
569
|
+
|
|
570
|
+
recode do |x|
|
|
571
|
+
unless x.nil?
|
|
572
|
+
if lambda.zero?
|
|
573
|
+
Math.log(x)
|
|
574
|
+
else
|
|
575
|
+
((x**lambda) - 1).quo(lambda)
|
|
576
|
+
end
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
end
|
|
580
|
+
# :nocov:
|
|
581
|
+
|
|
582
|
+
# Replace each non-nil value in the vector with its percentile.
|
|
583
|
+
def vector_percentile
|
|
584
|
+
c = size - indexes(*DaruLite::MISSING_VALUES).size
|
|
585
|
+
ranked.recode! { |i| i.nil? ? nil : (i.quo(c) * 100).to_f }
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
def vector_standardized_compute(m, sd)
|
|
589
|
+
if @data.respond_to? :vector_standardized_compute
|
|
590
|
+
@data.vector_standardized_compute(m, sd)
|
|
591
|
+
else
|
|
592
|
+
DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
|
|
593
|
+
index: index, name: name, dtype: dtype
|
|
594
|
+
end
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def vector_centered_compute(m)
|
|
598
|
+
if @data.respond_to? :vector_centered_compute
|
|
599
|
+
@data.vector_centered_compute(m)
|
|
600
|
+
else
|
|
601
|
+
DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f - m },
|
|
602
|
+
index: index, name: name, dtype: dtype
|
|
603
|
+
end
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# Returns an random sample of size n, with replacement,
|
|
607
|
+
# only with non-nil data.
|
|
608
|
+
#
|
|
609
|
+
# In all the trails, every item have the same probability
|
|
610
|
+
# of been selected.
|
|
611
|
+
def sample_with_replacement(sample = 1)
|
|
612
|
+
if @data.respond_to? :sample_with_replacement
|
|
613
|
+
@data.sample_with_replacement sample
|
|
614
|
+
else
|
|
615
|
+
valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
|
|
616
|
+
vds = valid.size
|
|
617
|
+
(0...sample).collect { valid[rand(vds)] }
|
|
618
|
+
end
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# Returns an random sample of size n, without replacement,
|
|
622
|
+
# only with valid data.
|
|
623
|
+
#
|
|
624
|
+
# Every element could only be selected once.
|
|
625
|
+
#
|
|
626
|
+
# A sample of the same size of the vector is the vector itself.
|
|
627
|
+
def sample_without_replacement(sample = 1)
|
|
628
|
+
if @data.respond_to? :sample_without_replacement
|
|
629
|
+
@data.sample_without_replacement sample
|
|
630
|
+
else
|
|
631
|
+
raw_sample_without_replacement(sample)
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
# The percent_change method computes the percent change over
|
|
636
|
+
# the given number of periods.
|
|
637
|
+
#
|
|
638
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
|
639
|
+
#
|
|
640
|
+
# @example
|
|
641
|
+
#
|
|
642
|
+
# vector = DaruLite::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
|
643
|
+
# vector.percent_change
|
|
644
|
+
# #=>
|
|
645
|
+
# # <DaruLite::Vector:28713060 @name = nil size: 5 >
|
|
646
|
+
# # nil
|
|
647
|
+
# # a
|
|
648
|
+
# # f 0.5
|
|
649
|
+
# # t 0.0
|
|
650
|
+
# # i 0.3333333333333333
|
|
651
|
+
# # k 0.25
|
|
652
|
+
def percent_change(periods = 1)
|
|
653
|
+
must_be_numeric!
|
|
654
|
+
|
|
655
|
+
prev = nil
|
|
656
|
+
arr = @data.each_with_index.map do |cur, i|
|
|
657
|
+
if i < periods ||
|
|
658
|
+
include_with_nan?(DaruLite::MISSING_VALUES, cur) ||
|
|
659
|
+
include_with_nan?(DaruLite::MISSING_VALUES, prev)
|
|
660
|
+
nil
|
|
661
|
+
else
|
|
662
|
+
(cur - prev) / prev.to_f
|
|
663
|
+
end.tap { prev = cur if cur }
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
DaruLite::Vector.new(arr, index: @index, name: @name)
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
# Performs the difference of the series.
|
|
670
|
+
# Note: The first difference of series is X(t) - X(t-1)
|
|
671
|
+
# But, second difference of series is NOT X(t) - X(t-2)
|
|
672
|
+
# It is the first difference of the first difference
|
|
673
|
+
# => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
|
|
674
|
+
#
|
|
675
|
+
# == Arguments
|
|
676
|
+
#
|
|
677
|
+
# * *max_lags*: integer, (default: 1), number of differences reqd.
|
|
678
|
+
#
|
|
679
|
+
# @example Using #diff
|
|
680
|
+
#
|
|
681
|
+
# ts = DaruLite::Vector.new((1..10).map { rand })
|
|
682
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
|
683
|
+
#
|
|
684
|
+
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
|
685
|
+
#
|
|
686
|
+
# @return [DaruLite::Vector]
|
|
687
|
+
def diff(max_lags = 1)
|
|
688
|
+
ts = self
|
|
689
|
+
difference = []
|
|
690
|
+
max_lags.times do
|
|
691
|
+
difference = ts - ts.lag
|
|
692
|
+
ts = difference
|
|
693
|
+
end
|
|
694
|
+
difference
|
|
695
|
+
end
|
|
696
|
+
|
|
697
|
+
# Calculate the rolling function for a loopback value.
|
|
698
|
+
#
|
|
699
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
|
700
|
+
# any function applicatble to DaruLite::Vector (:mean, :median, :count,
|
|
701
|
+
# :min, :max, etc.)
|
|
702
|
+
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
|
703
|
+
# @return [DaruLite::Vector] Vector containin rolling calculations.
|
|
704
|
+
# @example Using #rolling
|
|
705
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
706
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
|
707
|
+
# # first 9 observations are nil
|
|
708
|
+
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
|
709
|
+
def rolling(function, n = 10)
|
|
710
|
+
DaruLite::Vector.new(
|
|
711
|
+
([nil] * (n - 1)) +
|
|
712
|
+
(0..(size - n)).map do |i|
|
|
713
|
+
DaruLite::Vector.new(@data[i...(i + n)]).send(function)
|
|
714
|
+
end, index: @index
|
|
715
|
+
)
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
# @!method rolling_mean
|
|
719
|
+
# Calculate rolling average
|
|
720
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
721
|
+
# @!method rolling_median
|
|
722
|
+
# Calculate rolling median
|
|
723
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
724
|
+
# @!method rolling_count
|
|
725
|
+
# Calculate rolling non-missing count
|
|
726
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
727
|
+
# @!method rolling_max
|
|
728
|
+
# Calculate rolling max value
|
|
729
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
730
|
+
# @!method rolling_min
|
|
731
|
+
# Calculate rolling min value
|
|
732
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
733
|
+
# @!method rolling_sum
|
|
734
|
+
# Calculate rolling sum
|
|
735
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
736
|
+
# @!method rolling_std
|
|
737
|
+
# Calculate rolling standard deviation
|
|
738
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
739
|
+
# @!method rolling_variance
|
|
740
|
+
# Calculate rolling variance
|
|
741
|
+
# @yieldparam [Integer] n (10) Loopback length
|
|
742
|
+
%i[count mean median max min sum std variance].each do |meth|
|
|
743
|
+
define_method(:"rolling_#{meth}") do |n = 10|
|
|
744
|
+
rolling(meth, n)
|
|
745
|
+
end
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
# Exponential Moving Average.
|
|
749
|
+
# Calculates an exponential moving average of the series using a
|
|
750
|
+
# specified parameter. If wilder is false (the default) then the EMA
|
|
751
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
|
752
|
+
# Welles Wilder smoother of 1 / n.
|
|
753
|
+
#
|
|
754
|
+
# Warning for EMA usage: EMAs are unstable for small series, as they
|
|
755
|
+
# use a lot more than n observations to calculate. The series is stable
|
|
756
|
+
# if the size of the series is >= 3.45 * (n + 1)
|
|
757
|
+
#
|
|
758
|
+
# @param [Integer] n (10) Loopback length.
|
|
759
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
|
760
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
|
761
|
+
#
|
|
762
|
+
# @example Using ema
|
|
763
|
+
#
|
|
764
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
765
|
+
# # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
|
|
766
|
+
#
|
|
767
|
+
# # first 9 observations are nil
|
|
768
|
+
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
|
769
|
+
#
|
|
770
|
+
# @return [DaruLite::Vector] Contains EMA
|
|
771
|
+
def ema(n = 10, wilder = false)
|
|
772
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
|
773
|
+
# need to start everything from the first non-nil observation
|
|
774
|
+
start = @data.index { |i| !i.nil? }
|
|
775
|
+
# first n - 1 observations are nil
|
|
776
|
+
base = [nil] * (start + n - 1)
|
|
777
|
+
# nth observation is just a moving average
|
|
778
|
+
base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
|
|
779
|
+
(start + n).upto size - 1 do |i|
|
|
780
|
+
base << ((self[i] * smoother) + ((1 - smoother) * base.last))
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
DaruLite::Vector.new(base, index: @index, name: @name)
|
|
784
|
+
end
|
|
785
|
+
|
|
786
|
+
# Exponential Moving Variance.
|
|
787
|
+
# Calculates an exponential moving variance of the series using a
|
|
788
|
+
# specified parameter. If wilder is false (the default) then the EMV
|
|
789
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
|
790
|
+
# Welles Wilder smoother of 1 / n.
|
|
791
|
+
#
|
|
792
|
+
# @param [Integer] n (10) Loopback length.
|
|
793
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
|
794
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
|
795
|
+
#
|
|
796
|
+
# @example Using emv
|
|
797
|
+
#
|
|
798
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
799
|
+
# # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
|
|
800
|
+
#
|
|
801
|
+
# # first 9 observations are nil
|
|
802
|
+
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
|
803
|
+
#
|
|
804
|
+
# @return [DaruLite::Vector] contains EMV
|
|
805
|
+
def emv(n = 10, wilder = false) # rubocop:disable Metrics/AbcSize
|
|
806
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
|
807
|
+
# need to start everything from the first non-nil observation
|
|
808
|
+
start = @data.index { |i| !i.nil? }
|
|
809
|
+
# first n - 1 observations are nil
|
|
810
|
+
var_base = [nil] * (start + n - 1)
|
|
811
|
+
mean_base = [nil] * (start + n - 1)
|
|
812
|
+
mean_base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
|
|
813
|
+
# nth observation is just a moving variance_population
|
|
814
|
+
var_base << (@data[start...(start + n)].inject(0.0) { |s, x| x.nil? ? s : s + ((x - mean_base.last)**2) } / n)
|
|
815
|
+
(start + n).upto size - 1 do |i|
|
|
816
|
+
last = mean_base.last
|
|
817
|
+
mean_base << ((self[i] * smoother) + ((1 - smoother) * last))
|
|
818
|
+
var_base << (((1 - smoother) * var_base.last) + (smoother * (self[i] - last) * (self[i] - mean_base.last)))
|
|
819
|
+
end
|
|
820
|
+
DaruLite::Vector.new(var_base, index: @index, name: @name)
|
|
821
|
+
end
|
|
822
|
+
|
|
823
|
+
# Exponential Moving Standard Deviation.
|
|
824
|
+
# Calculates an exponential moving standard deviation of the series using a
|
|
825
|
+
# specified parameter. If wilder is false (the default) then the EMSD
|
|
826
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
|
827
|
+
# Welles Wilder smoother of 1 / n.
|
|
828
|
+
#
|
|
829
|
+
# @param [Integer] n (10) Loopback length.
|
|
830
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
|
831
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
|
832
|
+
#
|
|
833
|
+
# @example Using emsd
|
|
834
|
+
#
|
|
835
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
836
|
+
# # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
|
|
837
|
+
#
|
|
838
|
+
# # first 9 observations are nil
|
|
839
|
+
# ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
|
|
840
|
+
#
|
|
841
|
+
# @return [DaruLite::Vector] contains EMSD
|
|
842
|
+
def emsd(n = 10, wilder = false)
|
|
843
|
+
result = []
|
|
844
|
+
emv_return = emv(n, wilder)
|
|
845
|
+
emv_return.each do |d|
|
|
846
|
+
result << (d.nil? ? nil : Math.sqrt(d))
|
|
847
|
+
end
|
|
848
|
+
DaruLite::Vector.new(result, index: @index, name: @name)
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
# Moving Average Convergence-Divergence.
|
|
852
|
+
# Calculates the MACD (moving average convergence-divergence) of the time
|
|
853
|
+
# series.
|
|
854
|
+
# @see https://en.wikipedia.org/wiki/MACD
|
|
855
|
+
#
|
|
856
|
+
# @param fast [Integer] fast period of MACD (default 12)
|
|
857
|
+
# @param slow [Integer] slow period of MACD (default 26)
|
|
858
|
+
# @param signal [Integer] signal period of MACD (default 9)
|
|
859
|
+
#
|
|
860
|
+
# @example Create a series and calculate MACD values
|
|
861
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
862
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
|
863
|
+
# macdseries, macdsignal, macdhist = ts.macd
|
|
864
|
+
# macdseries, macdsignal, macdhist = ts.macd(13)
|
|
865
|
+
# macdseries, macdsignal, macdhist = ts.macd(signal=5)
|
|
866
|
+
#
|
|
867
|
+
# @return [Array<DaruLite::Vector>] macdseries, macdsignal and macdhist are
|
|
868
|
+
# returned as an array of three DaruLite::Vectors
|
|
869
|
+
#
|
|
870
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
|
871
|
+
macdseries = ema(fast) - ema(slow)
|
|
872
|
+
macdsignal = macdseries.ema(signal)
|
|
873
|
+
macdhist = macdseries - macdsignal
|
|
874
|
+
[macdseries, macdsignal, macdhist]
|
|
875
|
+
end
|
|
876
|
+
|
|
877
|
+
# Calculates the autocorrelation coefficients of the series.
|
|
878
|
+
#
|
|
879
|
+
# The first element is always 1, since that is the correlation
|
|
880
|
+
# of the series with itself.
|
|
881
|
+
#
|
|
882
|
+
# @example
|
|
883
|
+
# ts = DaruLite::Vector.new((1..100).map { rand })
|
|
884
|
+
#
|
|
885
|
+
# ts.acf # => array with first 21 autocorrelations
|
|
886
|
+
# ts.acf 3 # => array with first 3 autocorrelations
|
|
887
|
+
def acf(max_lags = nil)
|
|
888
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
|
889
|
+
|
|
890
|
+
(0..max_lags).map do |i|
|
|
891
|
+
if i.zero?
|
|
892
|
+
1.0
|
|
893
|
+
else
|
|
894
|
+
m = mean
|
|
895
|
+
# can't use Pearson coefficient since the mean for the lagged series should
|
|
896
|
+
# be the same as the regular series
|
|
897
|
+
((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
|
|
898
|
+
end
|
|
899
|
+
end
|
|
900
|
+
end
|
|
901
|
+
|
|
902
|
+
# Provides autocovariance.
|
|
903
|
+
#
|
|
904
|
+
# == Options
|
|
905
|
+
#
|
|
906
|
+
# * *:demean* = true; optional. Supply false if series is not to be demeaned
|
|
907
|
+
# * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
|
908
|
+
#
|
|
909
|
+
# == Returns
|
|
910
|
+
#
|
|
911
|
+
# Autocovariance value
|
|
912
|
+
def acvf(demean = true, unbiased = true)
|
|
913
|
+
demeaned_series = demean ? self - mean : self
|
|
914
|
+
|
|
915
|
+
n = (10 * Math.log10(size)).to_i + 1
|
|
916
|
+
m = mean
|
|
917
|
+
d = if unbiased
|
|
918
|
+
Array.new(size, size)
|
|
919
|
+
else
|
|
920
|
+
(1..size).to_a.reverse[0..n]
|
|
921
|
+
end
|
|
922
|
+
|
|
923
|
+
0.upto(n - 1).map do |i|
|
|
924
|
+
(demeaned_series * (lag(i) - m)).sum / d[i]
|
|
925
|
+
end
|
|
926
|
+
end
|
|
927
|
+
|
|
928
|
+
# Calculate cumulative sum of Vector
|
|
929
|
+
def cumsum
|
|
930
|
+
result = []
|
|
931
|
+
acc = 0
|
|
932
|
+
@data.each do |d|
|
|
933
|
+
if include_with_nan? DaruLite::MISSING_VALUES, d
|
|
934
|
+
result << nil
|
|
935
|
+
else
|
|
936
|
+
acc += d
|
|
937
|
+
result << acc
|
|
938
|
+
end
|
|
939
|
+
end
|
|
940
|
+
|
|
941
|
+
DaruLite::Vector.new(result, index: @index)
|
|
942
|
+
end
|
|
943
|
+
|
|
944
|
+
alias sdp standard_deviation_population
|
|
945
|
+
alias sds standard_deviation_sample
|
|
946
|
+
alias std sds
|
|
947
|
+
alias adp average_deviation_population
|
|
948
|
+
alias cov coefficient_of_variation
|
|
949
|
+
alias variance variance_sample
|
|
950
|
+
alias covariance covariance_sample
|
|
951
|
+
alias sd standard_deviation_sample
|
|
952
|
+
alias ss sum_of_squares
|
|
953
|
+
alias percentil percentile
|
|
954
|
+
alias se standard_error
|
|
955
|
+
|
|
956
|
+
private
|
|
957
|
+
|
|
958
|
+
def must_be_numeric!
|
|
959
|
+
numeric? or raise TypeError, 'Vector must be numeric'
|
|
960
|
+
end
|
|
961
|
+
|
|
962
|
+
def covariance_sum(other)
|
|
963
|
+
self_mean = mean
|
|
964
|
+
other_mean = other.mean
|
|
965
|
+
@data
|
|
966
|
+
.zip(other.data).inject(0) do |res, (d, o)|
|
|
967
|
+
res + if !d || !o
|
|
968
|
+
0
|
|
969
|
+
else
|
|
970
|
+
(d - self_mean) * (o - other_mean)
|
|
971
|
+
end
|
|
972
|
+
end
|
|
973
|
+
end
|
|
974
|
+
|
|
975
|
+
def midpoint_percentile(q)
|
|
976
|
+
sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
|
|
977
|
+
|
|
978
|
+
v = ((size - count_values(*DaruLite::MISSING_VALUES)) * q).quo(100)
|
|
979
|
+
if v.to_i == v
|
|
980
|
+
(sorted[(v - 0.5).to_i].to_f + sorted[(v + 0.5).to_i]).quo(2)
|
|
981
|
+
else
|
|
982
|
+
sorted[v.to_i]
|
|
983
|
+
end
|
|
984
|
+
end
|
|
985
|
+
|
|
986
|
+
def linear_percentile(q)
|
|
987
|
+
sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
|
|
988
|
+
index = (q / 100.0) * ((size - count_values(*DaruLite::MISSING_VALUES)) + 1)
|
|
989
|
+
|
|
990
|
+
k = index.truncate
|
|
991
|
+
d = index % 1
|
|
992
|
+
|
|
993
|
+
if k.zero?
|
|
994
|
+
sorted[0]
|
|
995
|
+
elsif k >= sorted.size
|
|
996
|
+
sorted[-1]
|
|
997
|
+
else
|
|
998
|
+
sorted[k - 1] + (d * (sorted[k] - sorted[k - 1]))
|
|
999
|
+
end
|
|
1000
|
+
end
|
|
1001
|
+
|
|
1002
|
+
def raw_sample_without_replacement(sample)
|
|
1003
|
+
valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
|
|
1004
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
|
1005
|
+
sample > valid.size
|
|
1006
|
+
|
|
1007
|
+
out = []
|
|
1008
|
+
size = valid.size
|
|
1009
|
+
while out.size < sample
|
|
1010
|
+
value = rand(size)
|
|
1011
|
+
out.push(value) unless out.include?(value)
|
|
1012
|
+
end
|
|
1013
|
+
|
|
1014
|
+
out.collect { |i| valid[i] }
|
|
1015
|
+
end
|
|
1016
|
+
end
|
|
1017
|
+
end
|
|
1018
|
+
end
|
|
1019
|
+
end
|