daru 0.1.3.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
@@ -0,0 +1,27 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan="2">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
|
4
|
+
</tr>
|
5
|
+
<% if name %>
|
6
|
+
<tr>
|
7
|
+
<th> </th>
|
8
|
+
<th><%= name %></th>
|
9
|
+
</tr>
|
10
|
+
<% end %>
|
11
|
+
|
12
|
+
<% @index.each_with_index.first(threshold).each do |index, pos| %>
|
13
|
+
<tr>
|
14
|
+
<td><%= index %></td>
|
15
|
+
<td><%= self.at(pos) %></td>
|
16
|
+
</tr>
|
17
|
+
<% end %>
|
18
|
+
|
19
|
+
<% if size > threshold %>
|
20
|
+
<% last_index = @index.size-1 %>
|
21
|
+
<tr><td>...</td><td>...</td></tr>
|
22
|
+
<tr>
|
23
|
+
<td><%= last_index %></td>
|
24
|
+
<td><%= self.at last_index %></td>
|
25
|
+
</tr>
|
26
|
+
<% end %>
|
27
|
+
</table>
|
@@ -0,0 +1,36 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan="<%= index.width+1 %>">Daru::Vector(<%= size %>)<%= ':category' if category? %></th>
|
4
|
+
</tr>
|
5
|
+
<% if name %>
|
6
|
+
<tr>
|
7
|
+
<th colspan="<%= index.width %>"> </th>
|
8
|
+
<th><%= name %></th>
|
9
|
+
</tr>
|
10
|
+
<% end %>
|
11
|
+
|
12
|
+
<% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(to_a).each do |tuple, value| %>
|
13
|
+
<tr>
|
14
|
+
<% tuple.each do |idx, span| %>
|
15
|
+
<th rowspan="<%= span %>"><%= idx %></th>
|
16
|
+
<% end %>
|
17
|
+
<td><%= value %></td>
|
18
|
+
</tr>
|
19
|
+
<% end %>
|
20
|
+
|
21
|
+
<% if size > threshold %>
|
22
|
+
<% last_index = @index.to_a.last %>
|
23
|
+
<tr>
|
24
|
+
<% last_index.size.times do %>
|
25
|
+
<th>...</th>
|
26
|
+
<% end %>
|
27
|
+
<td>...</td>
|
28
|
+
</tr>
|
29
|
+
<tr>
|
30
|
+
<% last_index.each do |idx| %>
|
31
|
+
<th><%= idx %></td>
|
32
|
+
<% end %>
|
33
|
+
<td><%= self[last_index] %></td>
|
34
|
+
</tr>
|
35
|
+
<% end %>
|
36
|
+
</table>
|
@@ -62,30 +62,28 @@ module Daru
|
|
62
62
|
all_vectors = (vectors.to_a | other.vectors.to_a).sort
|
63
63
|
all_indexes = (index.to_a | other.index.to_a).sort
|
64
64
|
|
65
|
-
hsh =
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
if this && that
|
72
|
-
this.send(operation, that)
|
73
|
-
else
|
74
|
-
Daru::Vector.new([], index: all_indexes, name: vector_name)
|
75
|
-
end
|
76
|
-
end
|
65
|
+
hsh =
|
66
|
+
all_vectors.map do |vector_name|
|
67
|
+
vector = dataframe_binary_operation_on_vectors other, vector_name, operation, all_indexes
|
68
|
+
|
69
|
+
[vector_name, vector]
|
70
|
+
end.to_h
|
77
71
|
|
78
72
|
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
79
73
|
end
|
80
74
|
|
81
|
-
def
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
75
|
+
def dataframe_binary_operation_on_vectors other, name, operation, indexes
|
76
|
+
if has_vector?(name) && other.has_vector?(name)
|
77
|
+
self[name].send(operation, other[name])
|
78
|
+
else
|
79
|
+
Daru::Vector.new([], index: indexes, name: name)
|
86
80
|
end
|
81
|
+
end
|
87
82
|
|
88
|
-
|
83
|
+
def scalar_binary_operation operation, other
|
84
|
+
dup.map_vectors! do |vector|
|
85
|
+
vector.numeric? ? vector.send(operation, other) : vector
|
86
|
+
end
|
89
87
|
end
|
90
88
|
end
|
91
89
|
end
|
@@ -63,19 +63,17 @@ module Daru
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def v2v_binary operation, other
|
66
|
-
|
67
|
-
elements = []
|
66
|
+
# FIXME: why the sorting?.. - zverok, 2016-05-18
|
68
67
|
index = (@index.to_a | other.index.to_a).sort
|
69
68
|
|
70
|
-
index.
|
69
|
+
elements = index.map do |idx|
|
71
70
|
this = self.index.include?(idx) ? self[idx] : nil
|
72
71
|
that = other.index.include?(idx) ? other[idx] : nil
|
73
72
|
|
74
|
-
|
75
|
-
common_idxs << idx
|
73
|
+
this && that ? this.send(operation, that) : nil
|
76
74
|
end
|
77
75
|
|
78
|
-
Daru::Vector.new(elements, name: @name, index:
|
76
|
+
Daru::Vector.new(elements, name: @name, index: index)
|
79
77
|
end
|
80
78
|
end
|
81
79
|
end
|
@@ -132,17 +132,15 @@ module Daru
|
|
132
132
|
|
133
133
|
# Calculate sample variance-covariance between the numeric vectors.
|
134
134
|
def covariance
|
135
|
-
cache=
|
135
|
+
cache = Hash.new do |h, (col, row)|
|
136
|
+
h[[col, row]] = vector_cov(self[row],self[col])
|
137
|
+
end
|
136
138
|
vectors = numeric_vectors
|
137
139
|
|
138
140
|
mat_rows = vectors.collect do |row|
|
139
141
|
vectors.collect do |col|
|
140
142
|
if row == col
|
141
143
|
self[row].variance
|
142
|
-
elsif cache[[col,row]].nil?
|
143
|
-
cov = vector_cov(self[row],self[col])
|
144
|
-
cache[[row,col]] = cov
|
145
|
-
cov
|
146
144
|
else
|
147
145
|
cache[[col,row]]
|
148
146
|
end
|
@@ -170,16 +168,11 @@ module Daru
|
|
170
168
|
private
|
171
169
|
|
172
170
|
def apply_method_to_numerics method, *args
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
if v.type == :numeric
|
177
|
-
memo << v.send(method, *args)
|
178
|
-
order << n
|
179
|
-
end
|
180
|
-
end
|
171
|
+
numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] }
|
172
|
+
.select { |_n, v| v.numeric? }
|
173
|
+
computed = numerics.map { |_n, v| v.send(method, *args) }
|
181
174
|
|
182
|
-
Daru::DataFrame.new(computed, index: @index, order:
|
175
|
+
Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false)
|
183
176
|
end
|
184
177
|
|
185
178
|
def vector_cov v1a, v2a
|
@@ -187,7 +180,7 @@ module Daru
|
|
187
180
|
end
|
188
181
|
|
189
182
|
def sum_of_squares v1, v2
|
190
|
-
v1a,v2a = v1.
|
183
|
+
v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES)
|
191
184
|
v1a.reset_index!
|
192
185
|
v2a.reset_index!
|
193
186
|
m1 = v1a.mean
|
@@ -4,7 +4,7 @@ module Daru
|
|
4
4
|
# is done inside the wrapper, so that native methods can be used for most of
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
|
-
module Vector
|
7
|
+
module Vector # rubocop:disable Metrics/ModuleLength
|
8
8
|
def mean
|
9
9
|
@data.mean
|
10
10
|
end
|
@@ -55,16 +55,16 @@ module Daru
|
|
55
55
|
alias :mad :median_absolute_deviation
|
56
56
|
|
57
57
|
def standard_error
|
58
|
-
standard_deviation_sample/Math.sqrt(
|
58
|
+
standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
|
59
59
|
end
|
60
60
|
|
61
61
|
def sum_of_squared_deviation
|
62
|
-
(@data.inject(0) { |a,x| x
|
62
|
+
(@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
|
63
63
|
end
|
64
64
|
|
65
65
|
# Retrieve unique values of non-nil data
|
66
66
|
def factors
|
67
|
-
|
67
|
+
reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
|
68
68
|
end
|
69
69
|
|
70
70
|
# Maximum element of the vector.
|
@@ -98,15 +98,17 @@ module Daru
|
|
98
98
|
end
|
99
99
|
|
100
100
|
def proportions
|
101
|
-
len =
|
102
|
-
frequencies.each_with_object({})
|
101
|
+
len = size - count_values(*Daru::MISSING_VALUES)
|
102
|
+
frequencies.each_with_object({}) do |(el, count), hash|
|
103
|
+
hash[el] = count / len
|
104
|
+
end
|
103
105
|
end
|
104
106
|
|
105
107
|
def ranked
|
106
108
|
sum = 0
|
107
|
-
r = frequencies.sort.each_with_object({}) do |
|
108
|
-
memo[
|
109
|
-
sum +=
|
109
|
+
r = frequencies.sort.each_with_object({}) do |(el, count), memo|
|
110
|
+
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
111
|
+
sum += count
|
110
112
|
end
|
111
113
|
|
112
114
|
recode { |e| r[e] }
|
@@ -120,29 +122,27 @@ module Daru
|
|
120
122
|
# retrieves number of instances where block returns true. If other
|
121
123
|
# values given, retrieves the frequency for this value. If no value
|
122
124
|
# given, counts the number of non-nil elements in the Vector.
|
123
|
-
def count value=false
|
125
|
+
def count value=false, &block
|
124
126
|
if block_given?
|
125
|
-
@data.select
|
127
|
+
@data.select(&block).count
|
126
128
|
elsif value
|
127
|
-
val
|
128
|
-
val.nil? ? 0 : val
|
129
|
+
count { |val| val == value }
|
129
130
|
else
|
130
|
-
size -
|
131
|
+
size - indexes(*Daru::MISSING_VALUES).size
|
131
132
|
end
|
132
133
|
end
|
133
134
|
|
134
135
|
# Count number of occurrences of each value in the Vector
|
135
136
|
def value_counts
|
136
|
-
values =
|
137
|
-
|
138
|
-
values[d] ? values[d] += 1 : values[d] = 1
|
137
|
+
values = @data.each_with_object(Hash.new(0)) do |d, memo|
|
138
|
+
memo[d] += 1
|
139
139
|
end
|
140
140
|
|
141
141
|
Daru::Vector.new(values)
|
142
142
|
end
|
143
143
|
|
144
144
|
def proportion value=1
|
145
|
-
frequencies[value].quo(
|
145
|
+
frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
|
146
146
|
end
|
147
147
|
|
148
148
|
# Sample variance with denominator (N-1)
|
@@ -151,7 +151,7 @@ module Daru
|
|
151
151
|
if @data.respond_to? :variance_sample
|
152
152
|
@data.variance_sample m
|
153
153
|
else
|
154
|
-
sum_of_squares(m).quo(
|
154
|
+
sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
|
155
155
|
end
|
156
156
|
end
|
157
157
|
|
@@ -161,38 +161,26 @@ module Daru
|
|
161
161
|
if @data.respond_to? :variance_population
|
162
162
|
@data.variance_population m
|
163
163
|
else
|
164
|
-
sum_of_squares(m).quo(
|
164
|
+
sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
|
165
165
|
end
|
166
166
|
end
|
167
167
|
|
168
168
|
# Sample covariance with denominator (N-1)
|
169
169
|
def covariance_sample other
|
170
|
-
|
171
|
-
|
172
|
-
mean_y = other.mean
|
173
|
-
sum = 0
|
174
|
-
(0...size).each do |i|
|
175
|
-
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
176
|
-
end
|
177
|
-
sum / (n_valid - 1)
|
170
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
171
|
+
covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
|
178
172
|
end
|
179
173
|
|
180
174
|
# Population covariance with denominator (N)
|
181
175
|
def covariance_population other
|
182
|
-
|
183
|
-
|
184
|
-
mean_y = other.mean
|
185
|
-
sum = 0
|
186
|
-
(0...size).each do |i|
|
187
|
-
sum += ((@missing_values.key?(@data[i]) || other.missing_values.include?(other[i])) ? 0 : (@data[i] - mean_x) * (other.data[i] - mean_y))
|
188
|
-
end
|
189
|
-
sum / n_valid
|
176
|
+
size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
|
177
|
+
covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
|
190
178
|
end
|
191
179
|
|
192
180
|
def sum_of_squares(m=nil)
|
193
181
|
m ||= mean
|
194
|
-
|
195
|
-
|
182
|
+
reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
|
183
|
+
memo + (val - m)**2
|
196
184
|
}
|
197
185
|
end
|
198
186
|
|
@@ -221,7 +209,7 @@ module Daru
|
|
221
209
|
else
|
222
210
|
m ||= mean
|
223
211
|
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
224
|
-
th.quo((
|
212
|
+
th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
|
225
213
|
end
|
226
214
|
end
|
227
215
|
|
@@ -231,16 +219,16 @@ module Daru
|
|
231
219
|
else
|
232
220
|
m ||= mean
|
233
221
|
fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
|
234
|
-
fo.quo((
|
222
|
+
fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
|
235
223
|
end
|
236
224
|
end
|
237
225
|
|
238
226
|
def average_deviation_population m=nil
|
239
|
-
|
227
|
+
must_be_numeric!
|
240
228
|
m ||= mean
|
241
|
-
(
|
242
|
-
|
243
|
-
}
|
229
|
+
reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
|
230
|
+
(val - m).abs + memo
|
231
|
+
}.quo(size - count_values(*Daru::MISSING_VALUES))
|
244
232
|
end
|
245
233
|
|
246
234
|
# Returns the value of the percentile q
|
@@ -254,31 +242,13 @@ module Daru
|
|
254
242
|
#
|
255
243
|
# This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
|
256
244
|
def percentile(q, strategy=:midpoint)
|
257
|
-
sorted = only_valid(:array).sort
|
258
|
-
|
259
245
|
case strategy
|
260
246
|
when :midpoint
|
261
|
-
|
262
|
-
if v.to_i!=v
|
263
|
-
sorted[v.to_i]
|
264
|
-
else
|
265
|
-
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
266
|
-
end
|
247
|
+
midpoint_percentile(q)
|
267
248
|
when :linear
|
268
|
-
|
269
|
-
|
270
|
-
k = index.truncate
|
271
|
-
d = index % 1
|
272
|
-
|
273
|
-
if k == 0
|
274
|
-
sorted[0]
|
275
|
-
elsif k >= sorted.size
|
276
|
-
sorted[-1]
|
277
|
-
else
|
278
|
-
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
279
|
-
end
|
249
|
+
linear_percentile(q)
|
280
250
|
else
|
281
|
-
raise
|
251
|
+
raise ArgumentError, "Unknown strategy #{strategy}"
|
282
252
|
end
|
283
253
|
end
|
284
254
|
|
@@ -313,17 +283,18 @@ module Daru
|
|
313
283
|
def standardize use_population=false
|
314
284
|
m ||= mean
|
315
285
|
sd = use_population ? sdp : sds
|
316
|
-
return Daru::Vector.new([nil]
|
286
|
+
return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0
|
317
287
|
|
318
288
|
vector_standardized_compute m, sd
|
319
289
|
end
|
320
290
|
|
291
|
+
# :nocov:
|
321
292
|
def box_cox_transformation lambda # :nodoc:
|
322
|
-
|
293
|
+
must_be_numeric!
|
323
294
|
|
324
295
|
recode do |x|
|
325
296
|
if !x.nil?
|
326
|
-
if lambda
|
297
|
+
if lambda.zero?
|
327
298
|
Math.log(x)
|
328
299
|
else
|
329
300
|
(x ** lambda - 1).quo(lambda)
|
@@ -333,10 +304,11 @@ module Daru
|
|
333
304
|
end
|
334
305
|
end
|
335
306
|
end
|
307
|
+
# :nocov:
|
336
308
|
|
337
309
|
# Replace each non-nil value in the vector with its percentile.
|
338
310
|
def vector_percentile
|
339
|
-
c = size -
|
311
|
+
c = size - indexes(*Daru::MISSING_VALUES).size
|
340
312
|
ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
|
341
313
|
end
|
342
314
|
|
@@ -367,7 +339,7 @@ module Daru
|
|
367
339
|
if @data.respond_to? :sample_with_replacement
|
368
340
|
@data.sample_with_replacement sample
|
369
341
|
else
|
370
|
-
valid =
|
342
|
+
valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
|
371
343
|
vds = valid.size
|
372
344
|
(0...sample).collect { valid[rand(vds)] }
|
373
345
|
end
|
@@ -383,17 +355,7 @@ module Daru
|
|
383
355
|
if @data.respond_to? :sample_without_replacement
|
384
356
|
@data.sample_without_replacement sample
|
385
357
|
else
|
386
|
-
|
387
|
-
raise ArgumentError, "Sample size couldn't be greater than n" if
|
388
|
-
sample > valid.size
|
389
|
-
out = []
|
390
|
-
size = valid.size
|
391
|
-
while out.size < sample
|
392
|
-
value = rand(size)
|
393
|
-
out.push(value) unless out.include?(value)
|
394
|
-
end
|
395
|
-
|
396
|
-
out.collect { |i| valid[i] }
|
358
|
+
raw_sample_without_replacement(sample)
|
397
359
|
end
|
398
360
|
end
|
399
361
|
|
@@ -407,7 +369,7 @@ module Daru
|
|
407
369
|
# vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
|
408
370
|
# vector.percent_change
|
409
371
|
# #=>
|
410
|
-
# # <Daru::Vector:28713060 @name = nil
|
372
|
+
# # <Daru::Vector:28713060 @name = nil size: 5 >
|
411
373
|
# # nil
|
412
374
|
# # a
|
413
375
|
# # f 0.5
|
@@ -415,19 +377,19 @@ module Daru
|
|
415
377
|
# # i 0.3333333333333333
|
416
378
|
# # k 0.25
|
417
379
|
def percent_change periods=1
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
380
|
+
must_be_numeric!
|
381
|
+
|
382
|
+
prev = nil
|
383
|
+
arr = @data.each_with_index.map do |cur, i|
|
384
|
+
if i < periods ||
|
385
|
+
include_with_nan?(Daru::MISSING_VALUES, cur) ||
|
386
|
+
include_with_nan?(Daru::MISSING_VALUES, prev)
|
387
|
+
nil
|
426
388
|
else
|
427
|
-
|
428
|
-
|
429
|
-
end
|
389
|
+
(cur - prev) / prev.to_f
|
390
|
+
end.tap { prev = cur if cur }
|
430
391
|
end
|
392
|
+
|
431
393
|
Daru::Vector.new(arr, index: @index, name: @name)
|
432
394
|
end
|
433
395
|
|
@@ -533,7 +495,7 @@ module Daru
|
|
533
495
|
# ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
|
534
496
|
#
|
535
497
|
# @return [Daru::Vector] Contains EMA
|
536
|
-
def ema(n=10, wilder=false)
|
498
|
+
def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
|
537
499
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
538
500
|
# need to start everything from the first non-nil observation
|
539
501
|
start = @data.index { |i| !i.nil? }
|
@@ -567,7 +529,7 @@ module Daru
|
|
567
529
|
# ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
|
568
530
|
#
|
569
531
|
# @return [Daru::Vector] contains EMV
|
570
|
-
def emv(n=10, wilder=false)
|
532
|
+
def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
|
571
533
|
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
572
534
|
# need to start everything from the first non-nil observation
|
573
535
|
start = @data.index { |i| !i.nil? }
|
@@ -651,7 +613,7 @@ module Daru
|
|
651
613
|
max_lags ||= (10 * Math.log10(size)).to_i
|
652
614
|
|
653
615
|
(0..max_lags).map do |i|
|
654
|
-
if i
|
616
|
+
if i.zero?
|
655
617
|
1.0
|
656
618
|
else
|
657
619
|
m = mean
|
@@ -672,7 +634,7 @@ module Daru
|
|
672
634
|
# == Returns
|
673
635
|
#
|
674
636
|
# Autocovariance value
|
675
|
-
def acvf(demean=true, unbiased=true)
|
637
|
+
def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
676
638
|
opts = {
|
677
639
|
demean: true,
|
678
640
|
unbaised: true
|
@@ -700,7 +662,7 @@ module Daru
|
|
700
662
|
result = []
|
701
663
|
acc = 0
|
702
664
|
@data.each do |d|
|
703
|
-
if
|
665
|
+
if include_with_nan? Daru::MISSING_VALUES, d
|
704
666
|
result << nil
|
705
667
|
else
|
706
668
|
acc += d
|
@@ -722,6 +684,66 @@ module Daru
|
|
722
684
|
alias :ss :sum_of_squares
|
723
685
|
alias :percentil :percentile
|
724
686
|
alias :se :standard_error
|
687
|
+
|
688
|
+
private
|
689
|
+
|
690
|
+
def must_be_numeric!
|
691
|
+
numeric? or raise TypeError, 'Vector must be numeric'
|
692
|
+
end
|
693
|
+
|
694
|
+
def covariance_sum other
|
695
|
+
self_mean = mean
|
696
|
+
other_mean = other.mean
|
697
|
+
@data
|
698
|
+
.zip(other.data).inject(0) do |res, (d, o)|
|
699
|
+
res + if !d || !o
|
700
|
+
0
|
701
|
+
else
|
702
|
+
(d - self_mean) * (o - other_mean)
|
703
|
+
end
|
704
|
+
end
|
705
|
+
end
|
706
|
+
|
707
|
+
def midpoint_percentile(q) # rubocop:disable Metrics/AbcSize
|
708
|
+
sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
|
709
|
+
|
710
|
+
v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100)
|
711
|
+
if v.to_i!=v
|
712
|
+
sorted[v.to_i]
|
713
|
+
else
|
714
|
+
(sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
|
715
|
+
end
|
716
|
+
end
|
717
|
+
|
718
|
+
def linear_percentile(q) # rubocop:disable Metrics/AbcSize
|
719
|
+
sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
|
720
|
+
index = (q / 100.0) * ((size - count_values(*Daru::MISSING_VALUES)) + 1)
|
721
|
+
|
722
|
+
k = index.truncate
|
723
|
+
d = index % 1
|
724
|
+
|
725
|
+
if k.zero?
|
726
|
+
sorted[0]
|
727
|
+
elsif k >= sorted.size
|
728
|
+
sorted[-1]
|
729
|
+
else
|
730
|
+
sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
def raw_sample_without_replacement sample
|
735
|
+
valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
|
736
|
+
raise ArgumentError, "Sample size couldn't be greater than n" if
|
737
|
+
sample > valid.size
|
738
|
+
out = []
|
739
|
+
size = valid.size
|
740
|
+
while out.size < sample
|
741
|
+
value = rand(size)
|
742
|
+
out.push(value) unless out.include?(value)
|
743
|
+
end
|
744
|
+
|
745
|
+
out.collect { |i| valid[i] }
|
746
|
+
end
|
725
747
|
end
|
726
748
|
end
|
727
749
|
end
|