daru 0.1.3.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
@@ -0,0 +1,27 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan="2">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
|
4
|
+
</tr>
|
5
|
+
<% if name %>
|
6
|
+
<tr>
|
7
|
+
<th> </th>
|
8
|
+
<th><%= name %></th>
|
9
|
+
</tr>
|
10
|
+
<% end %>
|
11
|
+
|
12
|
+
<% @index.each_with_index.first(threshold).each do |index, pos| %>
|
13
|
+
<tr>
|
14
|
+
<td><%= index %></td>
|
15
|
+
<td><%= self.at(pos) %></td>
|
16
|
+
</tr>
|
17
|
+
<% end %>
|
18
|
+
|
19
|
+
<% if size > threshold %>
|
20
|
+
<% last_index = @index.size-1 %>
|
21
|
+
<tr><td>...</td><td>...</td></tr>
|
22
|
+
<tr>
|
23
|
+
<td><%= last_index %></td>
|
24
|
+
<td><%= self.at last_index %></td>
|
25
|
+
</tr>
|
26
|
+
<% end %>
|
27
|
+
</table>
|
@@ -0,0 +1,36 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan="<%= index.width+1 %>">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
|
4
|
+
</tr>
|
5
|
+
<% if name %>
|
6
|
+
<tr>
|
7
|
+
<th colspan="<%= index.width %>"> </th>
|
8
|
+
<th><%= name %></th>
|
9
|
+
</tr>
|
10
|
+
<% end %>
|
11
|
+
|
12
|
+
<% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(to_a).each do |tuple, value| %>
|
13
|
+
<tr>
|
14
|
+
<% tuple.each do |idx, span| %>
|
15
|
+
<th rowspan="<%= span %>"><%= idx %></th>
|
16
|
+
<% end %>
|
17
|
+
<td><%= value %></td>
|
18
|
+
</tr>
|
19
|
+
<% end %>
|
20
|
+
|
21
|
+
<% if size > threshold %>
|
22
|
+
<% last_index = @index.to_a.last %>
|
23
|
+
<tr>
|
24
|
+
<% last_index.size.times do %>
|
25
|
+
<th>...</th>
|
26
|
+
<% end %>
|
27
|
+
<td>...</td>
|
28
|
+
</tr>
|
29
|
+
<tr>
|
30
|
+
<% last_index.each do |idx| %>
|
31
|
+
<th><%= idx %></td>
|
32
|
+
<% end %>
|
33
|
+
<td><%= self[last_index] %></td>
|
34
|
+
</tr>
|
35
|
+
<% end %>
|
36
|
+
</table>
|
@@ -62,30 +62,28 @@ module Daru
|
|
62
62
|
all_vectors = (vectors.to_a | other.vectors.to_a).sort
|
63
63
|
all_indexes = (index.to_a | other.index.to_a).sort
|
64
64
|
|
65
|
-
hsh =
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
if this && that
|
72
|
-
this.send(operation, that)
|
73
|
-
else
|
74
|
-
Daru::Vector.new([], index: all_indexes, name: vector_name)
|
75
|
-
end
|
76
|
-
end
|
65
|
+
hsh =
|
66
|
+
all_vectors.map do |vector_name|
|
67
|
+
vector = dataframe_binary_operation_on_vectors other, vector_name, operation, all_indexes
|
68
|
+
|
69
|
+
[vector_name, vector]
|
70
|
+
end.to_h
|
77
71
|
|
78
72
|
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
79
73
|
end
|
80
74
|
|
81
|
-
def
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
75
|
+
def dataframe_binary_operation_on_vectors other, name, operation, indexes
|
76
|
+
if has_vector?(name) && other.has_vector?(name)
|
77
|
+
self[name].send(operation, other[name])
|
78
|
+
else
|
79
|
+
Daru::Vector.new([], index: indexes, name: name)
|
86
80
|
end
|
81
|
+
end
|
87
82
|
|
88
|
-
|
83
|
+
def scalar_binary_operation operation, other
|
84
|
+
dup.map_vectors! do |vector|
|
85
|
+
vector.numeric? ? vector.send(operation, other) : vector
|
86
|
+
end
|
89
87
|
end
|
90
88
|
end
|
91
89
|
end
|
@@ -63,19 +63,17 @@ module Daru
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def v2v_binary operation, other
|
66
|
-
|
67
|
-
elements = []
|
66
|
+
# FIXME: why the sorting?.. - zverok, 2016-05-18
|
68
67
|
index = (@index.to_a | other.index.to_a).sort
|
69
68
|
|
70
|
-
index.
|
69
|
+
elements = index.map do |idx|
|
71
70
|
this = self.index.include?(idx) ? self[idx] : nil
|
72
71
|
that = other.index.include?(idx) ? other[idx] : nil
|
73
72
|
|
74
|
-
|
75
|
-
common_idxs << idx
|
73
|
+
this && that ? this.send(operation, that) : nil
|
76
74
|
end
|
77
75
|
|
78
|
-
Daru::Vector.new(elements, name: @name, index:
|
76
|
+
Daru::Vector.new(elements, name: @name, index: index)
|
79
77
|
end
|
80
78
|
end
|
81
79
|
end
|
@@ -132,17 +132,15 @@ module Daru
|
|
132
132
|
|
133
133
|
# Calculate sample variance-covariance between the numeric vectors.
|
134
134
|
def covariance
|
135
|
-
cache=
|
135
|
+
cache = Hash.new do |h, (col, row)|
|
136
|
+
h[[col, row]] = vector_cov(self[row],self[col])
|
137
|
+
end
|
136
138
|
vectors = numeric_vectors
|
137
139
|
|
138
140
|
mat_rows = vectors.collect do |row|
|
139
141
|
vectors.collect do |col|
|
140
142
|
if row == col
|
141
143
|
self[row].variance
|
142
|
-
elsif cache[[col,row]].nil?
|
143
|
-
cov = vector_cov(self[row],self[col])
|
144
|
-
cache[[row,col]] = cov
|
145
|
-
cov
|
146
144
|
else
|
147
145
|
cache[[col,row]]
|
148
146
|
end
|
@@ -170,16 +168,11 @@ module Daru
|
|
170
168
|
private
|
171
169
|
|
172
170
|
def apply_method_to_numerics method, *args
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
if v.type == :numeric
|
177
|
-
memo << v.send(method, *args)
|
178
|
-
order << n
|
179
|
-
end
|
180
|
-
end
|
171
|
+
numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] }
|
172
|
+
.select { |_n, v| v.numeric? }
|
173
|
+
computed = numerics.map { |_n, v| v.send(method, *args) }
|
181
174
|
|
182
|
-
Daru::DataFrame.new(computed, index: @index, order:
|
175
|
+
Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false)
|
183
176
|
end
|
184
177
|
|
185
178
|
def vector_cov v1a, v2a
|
@@ -187,7 +180,7 @@ module Daru
|
|
187
180
|
end
|
188
181
|
|
189
182
|
def sum_of_squares v1, v2
|
190
|
-
v1a,v2a = v1.
|
183
|
+
v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES)
|
191
184
|
v1a.reset_index!
|
192
185
|
v2a.reset_index!
|
193
186
|
m1 = v1a.mean
|
@@ -4,7 +4,7 @@ module Daru
|
|
4
4
|
# is done inside the wrapper, so that native methods can be used for most of
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
|
-
module Vector
|
7
|
+
module Vector # rubocop:disable Metrics/ModuleLength
|
8
8
|
def mean
|
9
9
|
@data.mean
|
10
10
|
end
|
@@ -55,16 +55,16 @@ module Daru
|
|
55
55
|
alias :mad :median_absolute_deviation
|
56
56
|
|
57
57
|
def standard_error
|
58
|
-
standard_deviation_sample/Math.sqrt(
|
58
|
+
standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
|
59
59
|
end
|
60
60
|
|
61
61
|
def sum_of_squared_deviation
|
62
|
-
(@data.inject(0) { |a,x| x
|
62
|
+
(@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
|
63
63
|
end
|
64
64
|
|
65
65
|
# Retrieve unique values of non-nil data
|
66
66
|
def factors
|
67
|
-
|
67
|
+
reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
|
68
68
|
end
|
69
69
|
|
70
70
|
# Maximum element of the vector.
|
@@ -98,15 +98,17 @@ module Daru
|
|
98
98
|
end
|
99
99
|
|
100
100
|
def proportions
|
101
|
-
len =
|
102
|
-
frequencies.each_with_object({})
|
101
|
+
len = size - count_values(*Daru::MISSING_VALUES)
|
102
|
+
frequencies.each_with_object({}) do |(el, count), hash|
|
103
|
+
hash[el] = count / len
|
104
|
+
end
|
103
105
|
end
|
104
106
|
|
105
107
|
def ranked
|
106
108
|
sum = 0
|
107
|
-
r = frequencies.sort.each_with_object({}) do |
|
108
|
-
memo[
|
109
|
-
sum +=
|
109
|
+
r = frequencies.sort.each_with_object({}) do |(el, count), memo|
|
110
|
+
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
111
|
+
sum += count
|
110
112
|
end
|
111
113
|
|
112
114
|
recode { |e| r[e] }
|
@@ -120,29 +122,27 @@ module Daru
|
|
120
122
|
# retrieves number of instances where block returns true. If other
|
121
123
|
# values given, retrieves the frequency for this value. If no value
|
122
124
|
# given, counts the number of non-nil elements in the Vector.
|
123
|
-
def count value=false
|
125
|
+
def count value=false, &block
|
124
126
|
if block_given?
|
125
|
-
@data.select
|
127
|
+
@data.select(&block).count
|
126
128
|
elsif value
|
127
|
-
val
|
128
|
-
val.nil? ? 0 : val
|
129
|
+
count { |val| val == value }
|
129
130
|
else
|
130
|
-
size -
|
131
|
+
size - indexes(*Daru::MISSING_VALUES).size
|
131
132
|
end
|
132
133
|
end
|
133
134
|
|
134
135
|
# Count number of occurrences of each value in the Vector
|
135
136
|
def value_counts
|
136
|
-
values =
|
137
|
-
|
138
|
-
values[d] ? values[d] += 1 : values[d] = 1
|
137
|
+
values = @data.each_with_object(Hash.new(0)) do |d, memo|
|
138
|
+
memo[d] += 1
|
139
139
|
end
|
140
140
|
|
141
141
|
Daru::Vector.new(values)
|
142
142
|
end
|
143
143
|
|
144
144
|
def proportion value=1
|
145
|
-
frequencies[value].quo(
|
145
|
+
frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
|
146
146
|
end
|
147
147
|
|
148
148
|
# Sample variance with denominator (N-1)
|
@@ -151,7 +151,7 @@ module Daru
|
|
151
151
|
if @data.respond_to? :variance_sample
|
152
152
|
@data.variance_sample m
|
153
153
|
else
|
154
|
-
sum_of_squares(m).quo(
|
154
|
+
sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
|
155
155
|
end
|
156
156
|
end
|
157
157
|
|
@@ -161,38 +161,26 @@ module Daru
|
|
161
161
|
if @data.respond_to? :variance_population
|
162
162
|
@data.variance_population m
|
163
163
|
else
|
164
|
-
sum_of_squares(m).quo(
|
164
|
+
sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
|
165
165
|
end
|
166
166
|
end
|
167
167
|
|
168
168
|
# Sample covariance with denominator (N-1)
|
169
169
|
def covariance_sample other
|
170
|
-
|
171
|
-
|
172
|
-
mean_y = other.mean
|
173
|
-
sum = 0
|
174
|
-
(0...size).each do |i|
|
175
|
-
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
176
|
-
end
|
177
|
-
sum / (n_valid - 1)
|
170
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
171
|
+
covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
|
178
172
|
end
|
179
173
|
|
180
174
|
# Population covariance with denominator (N)
|
181
175
|
def covariance_population other
|
182
|
-
|
183
|
-
|
184
|
-
mean_y = other.mean
|
185
|
-
sum = 0
|
186
|
-
(0...size).each do |i|
|
187
|
-
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
188
|
-
end
|
189
|
-
sum / n_valid
|
176
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
177
|
+
covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
|
190
178
|
end
|
191
179
|
|
192
180
|
def sum_of_squares(m=nil)
|
193
181
|
m ||= mean
|
194
|
-
|
195
|
-
|
182
|
+
reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
|
183
|
+
memo + (val - m)**2
|
196
184
|
}
|
197
185
|
end
|
198
186
|
|
@@ -221,7 +209,7 @@ module Daru
|
|
221
209
|
else
|
222
210
|
m ||= mean
|
223
211
|
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
224
|
-
th.quo((
|
212
|
+
th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
|
225
213
|
end
|
226
214
|
end
|
227
215
|
|
@@ -231,16 +219,16 @@ module Daru
|
|
231
219
|
else
|
232
220
|
m ||= mean
|
233
221
|
fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
|
234
|
-
fo.quo((
|
222
|
+
fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
|
235
223
|
end
|
236
224
|
end
|
237
225
|
|
238
226
|
def average_deviation_population m=nil
|
239
|
-
|
227
|
+
must_be_numeric!
|
240
228
|
m ||= mean
|
241
|
-
(
|
242
|
-
|
243
|
-
}
|
229
|
+
reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
|
230
|
+
(val - m).abs + memo
|
231
|
+
}.quo(size - count_values(*Daru::MISSING_VALUES))
|
244
232
|
end
|
245
233
|
|
246
234
|
# Returns the value of the percentile q
|
@@ -254,31 +242,13 @@ module Daru
|
|
254
242
|
#
|
255
243
|
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
256
244
|
def percentile(q, strategy=:midpoint)
|
257
|
-
sorted = only_valid(:array).sort
|
258
|
-
|
259
245
|
case strategy
|
260
246
|
when :midpoint
|
261
|
-
|
262
|
-
if v.to_i!=v
|
263
|
-
sorted[v.to_i]
|
264
|
-
else
|
265
|
-
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
266
|
-
end
|
247
|
+
midpoint_percentile(q)
|
267
248
|
when :linear
|
268
|
-
|
269
|
-
|
270
|
-
k = index.truncate
|
271
|
-
d = index % 1
|
272
|
-
|
273
|
-
if k == 0
|
274
|
-
sorted[0]
|
275
|
-
elsif k >= sorted.size
|
276
|
-
sorted[-1]
|
277
|
-
else
|
278
|
-
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
279
|
-
end
|
249
|
+
linear_percentile(q)
|
280
250
|
else
|
281
|
-
raise
|
251
|
+
raise ArgumentError, "Unknown strategy #{strategy}"
|
282
252
|
end
|
283
253
|
end
|
284
254
|
|
@@ -313,17 +283,18 @@ module Daru
|
|
313
283
|
def standardize use_population=false
|
314
284
|
m ||= mean
|
315
285
|
sd = use_population ? sdp : sds
|
316
|
-
return Daru::Vector.new([nil]
|
286
|
+
return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0
|
317
287
|
|
318
288
|
vector_standardized_compute m, sd
|
319
289
|
end
|
320
290
|
|
291
|
+
# :nocov:
|
321
292
|
def box_cox_transformation lambda # :nodoc:
|
322
|
-
|
293
|
+
must_be_numeric!
|
323
294
|
|
324
295
|
recode do |x|
|
325
296
|
if !x.nil?
|
326
|
-
if lambda
|
297
|
+
if lambda.zero?
|
327
298
|
Math.log(x)
|
328
299
|
else
|
329
300
|
(x ** lambda - 1).quo(lambda)
|
@@ -333,10 +304,11 @@ module Daru
|
|
333
304
|
end
|
334
305
|
end
|
335
306
|
end
|
307
|
+
# :nocov:
|
336
308
|
|
337
309
|
# Replace each non-nil value in the vector with its percentile.
|
338
310
|
def vector_percentile
|
339
|
-
c = size -
|
311
|
+
c = size - indexes(*Daru::MISSING_VALUES).size
|
340
312
|
ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
|
341
313
|
end
|
342
314
|
|
@@ -367,7 +339,7 @@ module Daru
|
|
367
339
|
if @data.respond_to? :sample_with_replacement
|
368
340
|
@data.sample_with_replacement sample
|
369
341
|
else
|
370
|
-
valid =
|
342
|
+
valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
|
371
343
|
vds = valid.size
|
372
344
|
(0...sample).collect { valid[rand(vds)] }
|
373
345
|
end
|
@@ -383,17 +355,7 @@ module Daru
|
|
383
355
|
if @data.respond_to? :sample_without_replacement
|
384
356
|
@data.sample_without_replacement sample
|
385
357
|
else
|
386
|
-
|
387
|
-
raise ArgumentError, "Sample size couldn't be greater than n" if
|
388
|
-
sample > valid.size
|
389
|
-
out = []
|
390
|
-
size = valid.size
|
391
|
-
while out.size < sample
|
392
|
-
value = rand(size)
|
393
|
-
out.push(value) unless out.include?(value)
|
394
|
-
end
|
395
|
-
|
396
|
-
out.collect { |i| valid[i] }
|
358
|
+
raw_sample_without_replacement(sample)
|
397
359
|
end
|
398
360
|
end
|
399
361
|
|
@@ -407,7 +369,7 @@ module Daru
|
|
407
369
|
# vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
408
370
|
# vector.percent_change
|
409
371
|
# #=>
|
410
|
-
# # <Daru::Vector:28713060 @name = nil
|
372
|
+
# # <Daru::Vector:28713060 @name = nil size: 5 >
|
411
373
|
# # nil
|
412
374
|
# # a
|
413
375
|
# # f 0.5
|
@@ -415,19 +377,19 @@ module Daru
|
|
415
377
|
# # i 0.3333333333333333
|
416
378
|
# # k 0.25
|
417
379
|
def percent_change periods=1
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
380
|
+
must_be_numeric!
|
381
|
+
|
382
|
+
prev = nil
|
383
|
+
arr = @data.each_with_index.map do |cur, i|
|
384
|
+
if i < periods ||
|
385
|
+
include_with_nan?(Daru::MISSING_VALUES, cur) ||
|
386
|
+
include_with_nan?(Daru::MISSING_VALUES, prev)
|
387
|
+
nil
|
426
388
|
else
|
427
|
-
|
428
|
-
|
429
|
-
end
|
389
|
+
(cur - prev) / prev.to_f
|
390
|
+
end.tap { prev = cur if cur }
|
430
391
|
end
|
392
|
+
|
431
393
|
Daru::Vector.new(arr, index: @index, name: @name)
|
432
394
|
end
|
433
395
|
|
@@ -533,7 +495,7 @@ module Daru
|
|
533
495
|
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
534
496
|
#
|
535
497
|
# @return [Daru::Vector] Contains EMA
|
536
|
-
def ema(n=10, wilder=false)
|
498
|
+
def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
|
537
499
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
538
500
|
# need to start everything from the first non-nil observation
|
539
501
|
start = @data.index { |i| !i.nil? }
|
@@ -567,7 +529,7 @@ module Daru
|
|
567
529
|
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
568
530
|
#
|
569
531
|
# @return [Daru::Vector] contains EMV
|
570
|
-
def emv(n=10, wilder=false)
|
532
|
+
def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
|
571
533
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
572
534
|
# need to start everything from the first non-nil observation
|
573
535
|
start = @data.index { |i| !i.nil? }
|
@@ -651,7 +613,7 @@ module Daru
|
|
651
613
|
max_lags ||= (10 * Math.log10(size)).to_i
|
652
614
|
|
653
615
|
(0..max_lags).map do |i|
|
654
|
-
if i
|
616
|
+
if i.zero?
|
655
617
|
1.0
|
656
618
|
else
|
657
619
|
m = mean
|
@@ -672,7 +634,7 @@ module Daru
|
|
672
634
|
# == Returns
|
673
635
|
#
|
674
636
|
# Autocovariance value
|
675
|
-
def acvf(demean=true, unbiased=true)
|
637
|
+
def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
676
638
|
opts = {
|
677
639
|
demean: true,
|
678
640
|
unbaised: true
|
@@ -700,7 +662,7 @@ module Daru
|
|
700
662
|
result = []
|
701
663
|
acc = 0
|
702
664
|
@data.each do |d|
|
703
|
-
if
|
665
|
+
if include_with_nan? Daru::MISSING_VALUES, d
|
704
666
|
result << nil
|
705
667
|
else
|
706
668
|
acc += d
|
@@ -722,6 +684,66 @@ module Daru
|
|
722
684
|
alias :ss :sum_of_squares
|
723
685
|
alias :percentil :percentile
|
724
686
|
alias :se :standard_error
|
687
|
+
|
688
|
+
private
|
689
|
+
|
690
|
+
def must_be_numeric!
|
691
|
+
numeric? or raise TypeError, 'Vector must be numeric'
|
692
|
+
end
|
693
|
+
|
694
|
+
def covariance_sum other
|
695
|
+
self_mean = mean
|
696
|
+
other_mean = other.mean
|
697
|
+
@data
|
698
|
+
.zip(other.data).inject(0) do |res, (d, o)|
|
699
|
+
res + if !d || !o
|
700
|
+
0
|
701
|
+
else
|
702
|
+
(d - self_mean) * (o - other_mean)
|
703
|
+
end
|
704
|
+
end
|
705
|
+
end
|
706
|
+
|
707
|
+
def midpoint_percentile(q) # rubocop:disable Metrics/AbcSize
|
708
|
+
sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
|
709
|
+
|
710
|
+
v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100)
|
711
|
+
if v.to_i!=v
|
712
|
+
sorted[v.to_i]
|
713
|
+
else
|
714
|
+
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
715
|
+
end
|
716
|
+
end
|
717
|
+
|
718
|
+
def linear_percentile(q) # rubocop:disable Metrics/AbcSize
|
719
|
+
sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
|
720
|
+
index = (q / 100.0) * ((size - count_values(*Daru::MISSING_VALUES)) + 1)
|
721
|
+
|
722
|
+
k = index.truncate
|
723
|
+
d = index % 1
|
724
|
+
|
725
|
+
if k.zero?
|
726
|
+
sorted[0]
|
727
|
+
elsif k >= sorted.size
|
728
|
+
sorted[-1]
|
729
|
+
else
|
730
|
+
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
def raw_sample_without_replacement sample
|
735
|
+
valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
|
736
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
737
|
+
sample > valid.size
|
738
|
+
out = []
|
739
|
+
size = valid.size
|
740
|
+
while out.size < sample
|
741
|
+
value = rand(size)
|
742
|
+
out.push(value) unless out.include?(value)
|
743
|
+
end
|
744
|
+
|
745
|
+
out.collect { |i| valid[i] }
|
746
|
+
end
|
725
747
|
end
|
726
748
|
end
|
727
749
|
end
|