runarray 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +11 -0
- data/README +36 -0
- data/Rakefile +149 -0
- data/lib/runarray/auto.rb +8 -0
- data/lib/runarray/narray.rb +1001 -0
- data/lib/runarray.rb +2 -0
- data/spec/runarray/narray_spec.rb +367 -0
- metadata +66 -0
@@ -0,0 +1,1001 @@
|
|
1
|
+
|
2
|
+
module Runarray
|
3
|
+
class NArray < Array
|
4
|
+
|
5
|
+
alias_method :old_map, :map
|
6
|
+
alias_method :old_select, :select
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
def float(*dims)
|
11
|
+
build('float', *dims)
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](*args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build(typecode, *dims)
|
18
|
+
zero =
|
19
|
+
case typecode
|
20
|
+
when 'float' : 0.0
|
21
|
+
when 'int' : 0
|
22
|
+
end
|
23
|
+
raise NotImplementedError, "dims <= 1 right now" if dims.size > 2
|
24
|
+
case dims.size
|
25
|
+
when 1
|
26
|
+
self.new(dims.first, zero)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.max(first, second)
|
33
|
+
if first >= second ; first
|
34
|
+
else ; second
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.min(first, second)
|
39
|
+
if first <= second ; first
|
40
|
+
else ; second
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
alias_method :dim, :size
|
45
|
+
alias_method :old_map, :map
|
46
|
+
alias_method :old_select, :select
|
47
|
+
@@zero = 0.0
|
48
|
+
|
49
|
+
TYPECODES = ['float', 'int']
|
50
|
+
|
51
|
+
def initialize(*args)
|
52
|
+
if TYPECODES.include?(args[0])
|
53
|
+
self.class.build(*args)
|
54
|
+
else
|
55
|
+
super(*args)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
## BASIC METHODS:
|
60
|
+
|
61
|
+
def log_space(&block)
|
62
|
+
logged = self.map{|v| Math.log(v) }
|
63
|
+
new_ar = block.call(logged)
|
64
|
+
self.class.new( new_ar.map{|v| Math.exp(v) } )
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect
|
68
|
+
"[ #{self.join(", ")} ]"
|
69
|
+
end
|
70
|
+
|
71
|
+
def select(&block)
|
72
|
+
self.class.new(old_select(&block))
|
73
|
+
end
|
74
|
+
|
75
|
+
def map(&block)
|
76
|
+
self.class.new(old_map(&block))
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.[](*ar)
|
80
|
+
self.prep(ar)
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_s
|
84
|
+
self.join(" ")
|
85
|
+
end
|
86
|
+
|
87
|
+
#def dup
|
88
|
+
# self.class.new(self)
|
89
|
+
#end
|
90
|
+
|
91
|
+
#def ==(other)
|
92
|
+
# if other == nil
|
93
|
+
# return false
|
94
|
+
# end
|
95
|
+
# self.each_index do |i|
|
96
|
+
# if self[i] != other[i]
|
97
|
+
# return false
|
98
|
+
# end
|
99
|
+
# end
|
100
|
+
# true
|
101
|
+
#end
|
102
|
+
|
103
|
+
def to_a
|
104
|
+
x = []
|
105
|
+
self.each do |it|
|
106
|
+
x << it
|
107
|
+
end
|
108
|
+
x
|
109
|
+
end
|
110
|
+
|
111
|
+
# for each value in mat, take a certain fraction and make it random
|
112
|
+
# random fraction can be from 0 to 2X the original fraction.
|
113
|
+
def noisify!(fraction, precision=1000000)
|
114
|
+
self.collect! do |val|
|
115
|
+
part = fraction * val
|
116
|
+
rnum = rand((2*part*precision).to_i)
|
117
|
+
random = rnum.to_f/precision
|
118
|
+
answ = val - part
|
119
|
+
if val > 0
|
120
|
+
answ + random
|
121
|
+
else
|
122
|
+
answ - random
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def histogram(*arg)
|
128
|
+
require 'histogram'
|
129
|
+
end
|
130
|
+
|
131
|
+
# Takes input and converts to whatever internal representation
|
132
|
+
# SUBCLASS THIS GUY!
|
133
|
+
def to_rep(val)
|
134
|
+
val.to_f
|
135
|
+
end
|
136
|
+
|
137
|
+
# Accepts an Array, Vector, or list
|
138
|
+
# Returns a new Object
|
139
|
+
# Basically just does a to_rep on each element
|
140
|
+
def self.prep(input)
|
141
|
+
obj = self.new
|
142
|
+
(0...input.size).each do |i|
|
143
|
+
obj[i] = obj.to_rep(input[i])
|
144
|
+
end
|
145
|
+
obj
|
146
|
+
end
|
147
|
+
|
148
|
+
########################################
|
149
|
+
# BREAD AND BUTTER
|
150
|
+
########################################
|
151
|
+
|
152
|
+
# the operator
|
153
|
+
#def operator(sym, other)
|
154
|
+
# nw = self.class.new
|
155
|
+
# if other.kind_of?(Vec)
|
156
|
+
# self.each_with_index do |val,i|
|
157
|
+
# nw << val.send(sym, other[i])
|
158
|
+
# end
|
159
|
+
# else
|
160
|
+
# self.each do |val|
|
161
|
+
# nw << val.send(sym, other)
|
162
|
+
# end
|
163
|
+
# end
|
164
|
+
# nw
|
165
|
+
#end
|
166
|
+
|
167
|
+
#def /(other); send('/'.to_sym, other) end
|
168
|
+
#def +(other); send('+'.to_sym, other) end
|
169
|
+
#def -(other); send('-'.to_sym, other) end
|
170
|
+
#def *(other); send('*'.to_sym, other) end
|
171
|
+
|
172
|
+
def /(other)
|
173
|
+
nw = self.class.new
|
174
|
+
if other.kind_of?(Runarray::NArray)
|
175
|
+
self.each_with_index do |val,i|
|
176
|
+
nw << val / other[i]
|
177
|
+
end
|
178
|
+
else
|
179
|
+
self.each do |val|
|
180
|
+
nw << val / other
|
181
|
+
end
|
182
|
+
end
|
183
|
+
nw
|
184
|
+
end
|
185
|
+
|
186
|
+
def **(other)
|
187
|
+
nw = self.class.new
|
188
|
+
if other.kind_of?(Runarray::NArray)
|
189
|
+
self.each_with_index do |val,i|
|
190
|
+
nw << (val ** other[i])
|
191
|
+
end
|
192
|
+
else
|
193
|
+
self.each do |val|
|
194
|
+
nw << val ** other
|
195
|
+
end
|
196
|
+
end
|
197
|
+
nw
|
198
|
+
end
|
199
|
+
|
200
|
+
def *(other)
|
201
|
+
nw = self.class.new
|
202
|
+
if other.kind_of?(Runarray::NArray)
|
203
|
+
self.each_with_index do |val,i|
|
204
|
+
nw << val * other[i]
|
205
|
+
end
|
206
|
+
else
|
207
|
+
self.each do |val|
|
208
|
+
nw << val * other
|
209
|
+
end
|
210
|
+
end
|
211
|
+
nw
|
212
|
+
end
|
213
|
+
|
214
|
+
def +(other)
|
215
|
+
nw = self.class.new
|
216
|
+
if other.kind_of?(Runarray::NArray)
|
217
|
+
self.each_with_index do |val,i|
|
218
|
+
nw << val + other[i]
|
219
|
+
end
|
220
|
+
else
|
221
|
+
self.each do |val|
|
222
|
+
nw << val + other
|
223
|
+
end
|
224
|
+
end
|
225
|
+
nw
|
226
|
+
end
|
227
|
+
|
228
|
+
def -(other)
|
229
|
+
nw = self.class.new
|
230
|
+
if other.kind_of?(Runarray::NArray)
|
231
|
+
self.each_with_index do |val,i|
|
232
|
+
nw << val - other[i]
|
233
|
+
end
|
234
|
+
else
|
235
|
+
self.each do |val|
|
236
|
+
nw << val - other
|
237
|
+
end
|
238
|
+
end
|
239
|
+
nw
|
240
|
+
end
|
241
|
+
|
242
|
+
def abs
|
243
|
+
nw = self.class.new
|
244
|
+
self.each do |val|
|
245
|
+
nw << val.abs
|
246
|
+
end
|
247
|
+
nw
|
248
|
+
end
|
249
|
+
|
250
|
+
def floor
|
251
|
+
nw = self.class.new
|
252
|
+
self.each do |val|
|
253
|
+
nw << val.floor
|
254
|
+
end
|
255
|
+
nw
|
256
|
+
end
|
257
|
+
|
258
|
+
def sum
|
259
|
+
sum = @@zero
|
260
|
+
self.each do |val|
|
261
|
+
sum += val
|
262
|
+
end
|
263
|
+
sum
|
264
|
+
end
|
265
|
+
|
266
|
+
# returns a float
|
267
|
+
def avg
|
268
|
+
sum.to_f/self.size
|
269
|
+
end
|
270
|
+
|
271
|
+
########################################
|
272
|
+
# MORE INVOLVED
|
273
|
+
########################################
|
274
|
+
|
275
|
+
# returns (new_x_coords, new_y_coords) of the same type as self
|
276
|
+
# Where:
|
277
|
+
# self = the current x coordinates
|
278
|
+
# yvec = the parallel y coords
|
279
|
+
# start = the initial x point
|
280
|
+
# endp = the final point
|
281
|
+
# increment = the x coordinate increment
|
282
|
+
# baseline = the default value if no values lie in a bin
|
283
|
+
# behavior = response when multiple values fall to the same bin
|
284
|
+
# sum => sums all values
|
285
|
+
# avg => avgs the values
|
286
|
+
# high => takes the value at the highest x coordinate
|
287
|
+
# max => takes the value of the highest y value [need to finalize]
|
288
|
+
# maxb => ?? [need to finalize]
|
289
|
+
def inc_x(yvec, start=0, endp=2047, increment=1.0, baseline=0.0, behavior="sum")
|
290
|
+
xvec = self
|
291
|
+
|
292
|
+
|
293
|
+
scale_factor = 1.0/increment
|
294
|
+
end_scaled = ((endp * (scale_factor)) + 0.5).to_int
|
295
|
+
start_scaled = ((start* (scale_factor)) + 0.5).to_int
|
296
|
+
|
297
|
+
|
298
|
+
# the size of the yvec will be: [start_scaled..end_scaled] = end_scaled - start_scaled + 1
|
299
|
+
## the x values of the incremented vector:
|
300
|
+
xvec_new_size = (end_scaled - start_scaled + 1)
|
301
|
+
xvec_new = self.class.new(xvec_new_size)
|
302
|
+
# We can't just use the start and endp that are given, because we might
|
303
|
+
# have needed to do some rounding on them
|
304
|
+
end_unscaled = end_scaled / scale_factor
|
305
|
+
start_unscaled = start_scaled / scale_factor
|
306
|
+
xval_new = start_unscaled
|
307
|
+
xvec_new_size.times do |i|
|
308
|
+
xvec_new[i] = start_unscaled
|
309
|
+
start_unscaled += increment
|
310
|
+
end
|
311
|
+
|
312
|
+
# special case: no data
|
313
|
+
if xvec.size == 0
|
314
|
+
yvec_new = self.class.new(xvec_new.size, baseline)
|
315
|
+
return [xvec_new, yvec_new]
|
316
|
+
end
|
317
|
+
|
318
|
+
## SCALE the mz_scaled vector
|
319
|
+
xvec_scaled = xvec.collect do |val|
|
320
|
+
(val * scale_factor).round
|
321
|
+
end
|
322
|
+
|
323
|
+
## FIND greatest index
|
324
|
+
_max = xvec_scaled.last
|
325
|
+
|
326
|
+
## DETERMINE maximum value
|
327
|
+
max_ind = end_scaled
|
328
|
+
if _max > end_scaled; max_ind = _max ## this is because we'll need the room
|
329
|
+
else; max_ind = end_scaled
|
330
|
+
end
|
331
|
+
|
332
|
+
## CREATE array to hold mapped values and write in the baseline
|
333
|
+
arr = self.class.new(max_ind+1, baseline)
|
334
|
+
nobl = self.class.new(max_ind+1, 0)
|
335
|
+
|
336
|
+
case behavior
|
337
|
+
when "sum"
|
338
|
+
xvec_scaled.each_with_index do |ind,i|
|
339
|
+
val = yvec[i]
|
340
|
+
arr[ind] = nobl[ind] + val
|
341
|
+
nobl[ind] += val
|
342
|
+
end
|
343
|
+
when "high" ## FASTEST BEHAVIOR
|
344
|
+
xvec_scaled.each_with_index do |ind,i|
|
345
|
+
arr[ind] = yvec[i]
|
346
|
+
end
|
347
|
+
when "avg"
|
348
|
+
count = Hash.new {|s,key| s[key] = 0 }
|
349
|
+
xvec_scaled.each_with_index do |ind,i|
|
350
|
+
val = yvec[i]
|
351
|
+
arr[ind] = nobl[ind] + val
|
352
|
+
nobl[ind] += val
|
353
|
+
count[ind] += 1
|
354
|
+
end
|
355
|
+
count.each do |k,co|
|
356
|
+
if co > 1; arr[k] /= co end
|
357
|
+
end
|
358
|
+
when "max" # @TODO: finalize behavior of max and maxb
|
359
|
+
xvec_scaled.each_with_index do |ind,i|
|
360
|
+
val = yvec[i]
|
361
|
+
if val > nobl[ind]; arr[ind] = val; nobl[ind] = val end
|
362
|
+
end
|
363
|
+
when "maxb"
|
364
|
+
xvec_scaled.each_with_index do |ind,i|
|
365
|
+
val = yvec[i]
|
366
|
+
if val > arr[ind]; arr[ind] = val end
|
367
|
+
end
|
368
|
+
else
|
369
|
+
warn "Not a valid behavior: #{behavior}, in one_dim\n"
|
370
|
+
end
|
371
|
+
|
372
|
+
trimmed = arr[start_scaled..end_scaled]
|
373
|
+
if xvec_new.size != trimmed.size
|
374
|
+
abort "xvec_new.size(#{xvec_new.size}) != trimmed.size(#{trimmed.size})"
|
375
|
+
end
|
376
|
+
[xvec_new, trimmed]
|
377
|
+
end
|
378
|
+
|
379
|
+
def pearsons_r(y)
|
380
|
+
x = self
|
381
|
+
sum_xy = @@zero
|
382
|
+
sum_x = @@zero
|
383
|
+
sum_y = @@zero
|
384
|
+
sum_x2 = @@zero
|
385
|
+
sum_y2 = @@zero
|
386
|
+
n = x.size
|
387
|
+
|
388
|
+
x.each_with_index do |xval,i|
|
389
|
+
yval = y[i]
|
390
|
+
sum_xy += xval * yval
|
391
|
+
sum_x += xval
|
392
|
+
sum_y += yval
|
393
|
+
sum_x2 += xval**2
|
394
|
+
sum_y2 += yval**2
|
395
|
+
end
|
396
|
+
|
397
|
+
## Here it is:
|
398
|
+
# 'E' is Capital Sigma
|
399
|
+
# r = EXY - (EXEY/N)
|
400
|
+
# -----------------
|
401
|
+
# sqrt( (EX^2 - (EX)^2/N) * (EY^2 - (EY)^2/N) )
|
402
|
+
|
403
|
+
top = sum_xy.to_f - ((sum_x * sum_y).to_f/n)
|
404
|
+
fbot = sum_x2.to_f - ((sum_x**2).to_f/n)
|
405
|
+
sbot = sum_y2.to_f - ((sum_y**2).to_f/n)
|
406
|
+
top / Math.sqrt(fbot * sbot)
|
407
|
+
end
|
408
|
+
|
409
|
+
# Returns (rsq, slope, y_intercept)
|
410
|
+
def rsq_slope_intercept(y)
|
411
|
+
x = self
|
412
|
+
if y.size != x.size then raise ArgumentError, "y must have same size as self!" end
|
413
|
+
if x.size < 2
|
414
|
+
raise ArgumentError, "vectors must have 2 or more data points!"
|
415
|
+
elsif x.size == 2
|
416
|
+
l = x[1]; fl = y[1]; s = x[0]; fs = y[0]
|
417
|
+
if x[0] > x[1] ; l,s=s,l; fl,fs=fs,fl end
|
418
|
+
if l-s == 0 then raise ArgumentError, "two points same x" end
|
419
|
+
slope = (fl-fs)/(l-s)
|
420
|
+
# y = mx + b
|
421
|
+
# b = y - mx
|
422
|
+
y_intercept = fl - (slope*l)
|
423
|
+
rsq = 1.0
|
424
|
+
return rsq, slope, y_intercept
|
425
|
+
else
|
426
|
+
x = self
|
427
|
+
mean_x = x.avg
|
428
|
+
mean_y = y.avg
|
429
|
+
sum_sq_res_xx = @@zero
|
430
|
+
sum_sq_res_yy = @@zero
|
431
|
+
sum_sq_res_xy = @@zero
|
432
|
+
x.each_with_index do |val,i|
|
433
|
+
x_minus_mean_i = x[i].to_f - mean_x
|
434
|
+
y_minus_mean_i = y[i].to_f - mean_y
|
435
|
+
sum_sq_res_xx += x_minus_mean_i*x_minus_mean_i
|
436
|
+
sum_sq_res_yy += y_minus_mean_i*y_minus_mean_i
|
437
|
+
sum_sq_res_xy += x_minus_mean_i*y_minus_mean_i
|
438
|
+
end
|
439
|
+
slope = sum_sq_res_xy/sum_sq_res_xx
|
440
|
+
y_intercept = mean_y - (slope * mean_x)
|
441
|
+
rsq = (sum_sq_res_xy*sum_sq_res_xy)/(sum_sq_res_xx*sum_sq_res_yy)
|
442
|
+
return rsq, slope, y_intercept
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Returns (mean, standard_dev)
|
447
|
+
# if size == 0 returns [nil, nil]
|
448
|
+
def sample_stats
|
449
|
+
_len = size
|
450
|
+
return [nil, nil] if _len == 0
|
451
|
+
_sum = 0.0
|
452
|
+
_sum_sq = 0.0
|
453
|
+
self.each do |val|
|
454
|
+
_sum += val
|
455
|
+
_sum_sq += val * val
|
456
|
+
end
|
457
|
+
std_dev = _sum_sq - ((_sum * _sum)/_len)
|
458
|
+
std_dev /= ( _len > 1 ? _len-1 : 1 )
|
459
|
+
std_dev = Math.sqrt(std_dev)
|
460
|
+
mean = _sum.to_f/_len
|
461
|
+
return mean, std_dev
|
462
|
+
end
|
463
|
+
|
464
|
+
# moving average (slow, simple implementation)
|
465
|
+
def moving_avg(pre=1, post=1)
|
466
|
+
last_index = self.size - 1
|
467
|
+
ma = self.class.new(self.size)
|
468
|
+
self.each_with_index do |center,index|
|
469
|
+
start_i = index - pre
|
470
|
+
start_i >= 0 or start_i = 0
|
471
|
+
end_i = index + post
|
472
|
+
end_i < self.size or end_i = last_index
|
473
|
+
ma[index] = self[start_i..end_i].avg
|
474
|
+
end
|
475
|
+
ma
|
476
|
+
end
|
477
|
+
|
478
|
+
=begin
|
479
|
+
# in progress on this guy: moving average
|
480
|
+
def moving_avg(pre=1, post=1)
|
481
|
+
ar_size = size
|
482
|
+
mv_avg = self.class.new(size)
|
483
|
+
window_size = pre + post + 1
|
484
|
+
window_size_f = window_size.to_f
|
485
|
+
sum = 0.0
|
486
|
+
|
487
|
+
# do the first bit
|
488
|
+
if post + 1 > ar_size
|
489
|
+
post = ar_size
|
490
|
+
end
|
491
|
+
|
492
|
+
post_p1 = post+1
|
493
|
+
(0...(post_p1)).each do |i|
|
494
|
+
sum += self[i]
|
495
|
+
end
|
496
|
+
self[0] = sum / (post_p1)
|
497
|
+
|
498
|
+
((post+1)...window_size).each do |add_i|
|
499
|
+
sum += self[add_i]
|
500
|
+
self[add_i - (post_p1)] = sum /
|
501
|
+
end
|
502
|
+
|
503
|
+
# the middle bit
|
504
|
+
(window_size...(size - window_size)).each do |i|
|
505
|
+
sum -= self[i - pre]
|
506
|
+
sum += self[i + post]
|
507
|
+
mv_avg[i] = sum / window_size_f
|
508
|
+
end
|
509
|
+
|
510
|
+
# do the last bit
|
511
|
+
((size - window_size)...size).each do |i|
|
512
|
+
window_size -= 1
|
513
|
+
mv_avg[i] = sum / window_size
|
514
|
+
end
|
515
|
+
|
516
|
+
mv_avg
|
517
|
+
end
|
518
|
+
|
519
|
+
=end
|
520
|
+
|
521
|
+
# Returns (x, y) where any data points
|
522
|
+
# in cartesion coords(self,y) that are > 'deviations' from the
|
523
|
+
# least squares regression line are deleted.
|
524
|
+
# The least squares line is recalculated and outliers tossed out
|
525
|
+
# iteratively until no further points are tossed.
|
526
|
+
# In the future this may be for multiple vecs...
|
527
|
+
def delete_outliers_iteratively(deviations, y=nil)
|
528
|
+
x = self
|
529
|
+
ln = x.size
|
530
|
+
nx = nil
|
531
|
+
ny = nil
|
532
|
+
loop do
|
533
|
+
answ = x.delete_outliers(deviations, y)
|
534
|
+
if y
|
535
|
+
(nx, ny) = answ
|
536
|
+
else
|
537
|
+
nx = answ
|
538
|
+
end
|
539
|
+
if nx.size == ln
|
540
|
+
break
|
541
|
+
else
|
542
|
+
ln = nx.size
|
543
|
+
x = nx
|
544
|
+
y = ny ## still nil if only x
|
545
|
+
end
|
546
|
+
end
|
547
|
+
if y
|
548
|
+
[nx, ny]
|
549
|
+
else
|
550
|
+
nx
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
def outliers_iteratively(deviations, y=nil)
|
555
|
+
xdup = self.dup
|
556
|
+
ydup = y.dup if y
|
557
|
+
indices = nil
|
558
|
+
all_indices = []
|
559
|
+
loop do
|
560
|
+
indices = xdup.outliers(deviations, ydup)
|
561
|
+
all_indices << indices.dup
|
562
|
+
if indices.size == 0
|
563
|
+
break
|
564
|
+
else
|
565
|
+
indices.reverse.each do |i|
|
566
|
+
xdup.delete_at(i)
|
567
|
+
ydup.delete_at(i) if y
|
568
|
+
end
|
569
|
+
end
|
570
|
+
end
|
571
|
+
_correct_indices(all_indices)
|
572
|
+
end
|
573
|
+
|
574
|
+
# given indices that were deleted in sequential order, reconstruct
|
575
|
+
# the original indices
|
576
|
+
# e.g. ( '*' indicates that the index was deleted in that round )
|
577
|
+
# [ 0][ ][ 2][ 3][ ][*5][ 6][ ][ 8]
|
578
|
+
# | | | / /
|
579
|
+
# [*0][ ][ 2][*3][ ][ 5][ ][ 7][ ]
|
580
|
+
# / _____/ _____/
|
581
|
+
# [ ][ 1][ ][*3][ ][ 5][ ][ ][ ]
|
582
|
+
# | /
|
583
|
+
# [ ][*1][ ][ ][*4][ ][ ][ ][ ]
|
584
|
+
# ### -> ANSWER: [0,2,3,5,6,8]
|
585
|
+
def _correct_indices(indices)
|
586
|
+
## need to correct the indices based on what was deleted before
|
587
|
+
indices_new = indices.reverse.inject do |final,ind_ar|
|
588
|
+
new_final = final.collect do |fi|
|
589
|
+
rtn = fi
|
590
|
+
ind_ar.each do |ind|
|
591
|
+
if ind <= fi
|
592
|
+
rtn += 1
|
593
|
+
end
|
594
|
+
end
|
595
|
+
rtn
|
596
|
+
end
|
597
|
+
new_final.push(*ind_ar)
|
598
|
+
new_final
|
599
|
+
end
|
600
|
+
indices_new.sort
|
601
|
+
end
|
602
|
+
|
603
|
+
# returns an ary of indices to outliers
|
604
|
+
# if y is given, the residuals from the least squares between self and y are
|
605
|
+
# calculated before finding outliers
|
606
|
+
def outliers(deviations, y=nil)
|
607
|
+
indices = []
|
608
|
+
distribution =
|
609
|
+
if y
|
610
|
+
self.residuals_from_least_squares(y)
|
611
|
+
else
|
612
|
+
self
|
613
|
+
end
|
614
|
+
mean, std_dev = distribution.sample_stats
|
615
|
+
cutoff = deviations.to_f * std_dev
|
616
|
+
distribution.each_with_index do |res,i|
|
617
|
+
if (res - mean).abs > cutoff
|
618
|
+
indices << i
|
619
|
+
end
|
620
|
+
end
|
621
|
+
indices
|
622
|
+
end
|
623
|
+
|
624
|
+
# Returns (x, y) where any data points
|
625
|
+
# in cartesion coords(self,y) that are > 'deviations' from the
|
626
|
+
# least squares regression line are deleted
|
627
|
+
# (deviations will be converted to float)
|
628
|
+
# In the future this may be for multiple vecs...
|
629
|
+
def delete_outliers(deviations, y=nil)
|
630
|
+
nx = self.class.new
|
631
|
+
ny = self.class.new if y
|
632
|
+
distribution =
|
633
|
+
if y
|
634
|
+
self.residuals_from_least_squares(y)
|
635
|
+
else
|
636
|
+
self
|
637
|
+
end
|
638
|
+
mean, std_dev = distribution.sample_stats
|
639
|
+
cutoff = deviations.to_f * std_dev
|
640
|
+
#puts "CUTOFF: #{cutoff}"
|
641
|
+
distribution.each_with_index do |res,i|
|
642
|
+
#puts "RES: #{res}"
|
643
|
+
unless (res - mean).abs > cutoff
|
644
|
+
#puts "ADDING"
|
645
|
+
nx << self[i]
|
646
|
+
(ny << y[i]) if y
|
647
|
+
end
|
648
|
+
end
|
649
|
+
if y
|
650
|
+
[nx,ny]
|
651
|
+
else
|
652
|
+
#puts "GIVING BACK"
|
653
|
+
nx
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
# Returns a NArray object (of doubles)
|
658
|
+
def residuals_from_least_squares(y)
|
659
|
+
rsq, slope, intercept = rsq_slope_intercept(y)
|
660
|
+
residuals = Runarray::NArray.float
|
661
|
+
self.each_with_index do |val,i|
|
662
|
+
expected_y = (slope*val) + intercept
|
663
|
+
ydiff = y[i].to_f - expected_y
|
664
|
+
if ydiff == 0.0
|
665
|
+
residuals << 0.0
|
666
|
+
else
|
667
|
+
run = ydiff/slope
|
668
|
+
residuals << run/( Math.sin(Math.atan(ydiff/run)) )
|
669
|
+
end
|
670
|
+
end
|
671
|
+
residuals
|
672
|
+
end
|
673
|
+
|
674
|
+
def min
|
675
|
+
mn = self.first
|
676
|
+
self.each do |val|
|
677
|
+
if val < mn then mn = val end
|
678
|
+
end
|
679
|
+
mn
|
680
|
+
end
|
681
|
+
|
682
|
+
def max
|
683
|
+
mx = self.first
|
684
|
+
self.each do |val|
|
685
|
+
if val > mx ; mx = val end
|
686
|
+
end
|
687
|
+
mx
|
688
|
+
end
|
689
|
+
|
690
|
+
def shuffle!
|
691
|
+
##################################
|
692
|
+
## this is actually slightly faster, but I don't know how stable
|
693
|
+
#size.downto(1) { |n| push delete_at(rand(n)) }
|
694
|
+
#self
|
695
|
+
##################################
|
696
|
+
(size - 1) .downto 1 do |i|
|
697
|
+
j = rand(i + 1)
|
698
|
+
self[i], self[j] = self[j], self[i]
|
699
|
+
end
|
700
|
+
self
|
701
|
+
end
|
702
|
+
|
703
|
+
def shuffle
|
704
|
+
self.dup.shuffle!
|
705
|
+
end
|
706
|
+
|
707
|
+
# returns an array of indices
|
708
|
+
def max_indices
|
709
|
+
indices_equal(self.max)
|
710
|
+
end
|
711
|
+
|
712
|
+
# returns the indices as VecI object which indicate the ascending order of
|
713
|
+
# the values tie goes to the value closest to the front of the list
|
714
|
+
def order
|
715
|
+
sorted = self.sort
|
716
|
+
hash = Hash.new {|h,k| h[k] = [] }
|
717
|
+
self.each_with_index do |sortd,i|
|
718
|
+
hash[sortd] << i
|
719
|
+
end
|
720
|
+
ord = sorted.map do |val|
|
721
|
+
hash[val].shift
|
722
|
+
end
|
723
|
+
Runarray::NArray.new('int').replace(ord)
|
724
|
+
end
|
725
|
+
|
726
|
+
# returns an Array of indices where val == member
|
727
|
+
def indices_equal(val)
|
728
|
+
indices = []
|
729
|
+
self.each_with_index do |v,i|
|
730
|
+
if val == v
|
731
|
+
indices << i
|
732
|
+
end
|
733
|
+
end
|
734
|
+
indices
|
735
|
+
end
|
736
|
+
|
737
|
+
# returns an array of indices
|
738
|
+
def min_indices
|
739
|
+
indices_equal(self.min)
|
740
|
+
end
|
741
|
+
|
742
|
+
|
743
|
+
# Returns (min, max)
|
744
|
+
def min_max
|
745
|
+
mn = self.first
|
746
|
+
mx = self.first
|
747
|
+
self.each do |val|
|
748
|
+
if val < mn then mn = val end
|
749
|
+
if val > mx then mx = val end
|
750
|
+
end
|
751
|
+
return mn, mx
|
752
|
+
end
|
753
|
+
|
754
|
+
=begin
|
755
|
+
# complete rewrite of the
|
756
|
+
# returns empty derivs for size == 0
|
757
|
+
# returns 0 for size == 1
|
758
|
+
# does linear interpolation for size == 2
|
759
|
+
# does three point derivative for everything else
|
760
|
+
# zero_for_inflection is the "adjusted to be shape-preserving" spoken of in
|
761
|
+
# the SLATEC chim code.
|
762
|
+
def derivs(y, zero_for_inflection=true)
|
763
|
+
dvs = self.class.new(size)
|
764
|
+
x = self
|
765
|
+
case size
|
766
|
+
when 0
|
767
|
+
dvs
|
768
|
+
when 1
|
769
|
+
dvs[0] = 0
|
770
|
+
dvs
|
771
|
+
when 2
|
772
|
+
slope = (y[1] - y[0])/(x[1] - x[0])
|
773
|
+
dvs[0], dvs[1] = slope, slope
|
774
|
+
else
|
775
|
+
dvs[0] = (y[1] - y[0])/(x[1] - x[0])
|
776
|
+
cnt = 1
|
777
|
+
x.zip(y).each_cons(3) do |pre,cur,post|
|
778
|
+
pre_x, pre_y = pre
|
779
|
+
post_x, post_y = post
|
780
|
+
cur_x, cur_y = cur
|
781
|
+
dvs[cnt] =
|
782
|
+
if zero_for_inflection
|
783
|
+
r_post_slope = post_y <=> cur_y
|
784
|
+
r_pre_slope = pre_y <=> cur_y
|
785
|
+
if r_post_slope != 0 and r_pre_slope != 0 and (r_post_slope * -1) == r_pre_slope
|
786
|
+
0
|
787
|
+
else
|
788
|
+
three_point_deriv(pre, cur, post)
|
789
|
+
end
|
790
|
+
else
|
791
|
+
three_point_deriv(pre, cur, post)
|
792
|
+
end
|
793
|
+
cnt += 1
|
794
|
+
end
|
795
|
+
dvs
|
796
|
+
end
|
797
|
+
end
|
798
|
+
=end
|
799
|
+
|
800
|
+
# difference between max and min
|
801
|
+
def spread
|
802
|
+
(max - min).abs
|
803
|
+
end
|
804
|
+
|
805
|
+
def nil?
|
806
|
+
false
|
807
|
+
end
|
808
|
+
|
809
|
+
# Class functions:
|
810
|
+
# THIS MUST BE FOR FLOAT AND DOUBLE ONLY!!!
|
811
|
+
# This is a fairly precise Fortran->C translation of the SLATEC chim code
|
812
|
+
# Evaluate the deriv at each x point
|
813
|
+
# return 1 if less than 2 data points
|
814
|
+
# return 0 if no errors
|
815
|
+
# ASSUMES monotonicity of the X data points !!!!!
|
816
|
+
# ASSUMES that this->length() >= 2
|
817
|
+
# If length == 1 then derivs[0] is set to 0
|
818
|
+
# If length == 0 then raises an ArgumentError
|
819
|
+
# returns a new array of derivatives
|
820
|
+
# Assumes that y values are Floats
|
821
|
+
# if y is not given, then values are assumed to be evenly spaced.
|
822
|
+
def chim(y=nil)
|
823
|
+
y = self.class.new((0...(self.size)).to_a) if y.nil?
|
824
|
+
|
825
|
+
#void VecABR::chim(VecABR &x, VecABR &y, VecABR &out_derivs) {
|
826
|
+
x = self
|
827
|
+
derivs = Runarray::NArray.new(x.size)
|
828
|
+
|
829
|
+
length = x.size
|
830
|
+
three = 3.0
|
831
|
+
|
832
|
+
ierr = 0
|
833
|
+
lengthLess1 = length - 1
|
834
|
+
|
835
|
+
if length < 2
|
836
|
+
if length == 1
|
837
|
+
derivs[0] = 0
|
838
|
+
return derivs
|
839
|
+
else
|
840
|
+
raise ArgumentError, "trying to chim with 0 data points!"
|
841
|
+
end
|
842
|
+
end
|
843
|
+
|
844
|
+
h1 = x[1] - x[0]
|
845
|
+
del1 = (y[1] - y[0]) / h1
|
846
|
+
dsave = del1
|
847
|
+
|
848
|
+
# special case length=2 --use linear interpolation
|
849
|
+
if lengthLess1 < 2
|
850
|
+
derivs[0] = del1
|
851
|
+
derivs[1] = del1
|
852
|
+
return derivs
|
853
|
+
end
|
854
|
+
|
855
|
+
# Normal case (length >= 3)
|
856
|
+
|
857
|
+
h2 = x[2] - x[1]
|
858
|
+
del2 = (y[2] - y[1]) / h2
|
859
|
+
|
860
|
+
# SET D(1) VIA NON-CENTERED THREE-POINT FORMULA, ADJUSTED TO BE
|
861
|
+
# SHAPE-PRESERVING.
|
862
|
+
|
863
|
+
hsum = h1 + h2
|
864
|
+
w1 = (h1 + hsum)/hsum
|
865
|
+
w2 = (h1*-1.0)/hsum
|
866
|
+
derivs[0] = (w1*del1) + (w2*del2)
|
867
|
+
if (( pchst(derivs[0], del1) ) <= 0)
|
868
|
+
derivs[0] = @@zero
|
869
|
+
elsif ( pchst(del1, del2) < 0 )
|
870
|
+
# need to do this check only if monotonicity switches
|
871
|
+
dmax = del1 * three
|
872
|
+
if (derivs[0].abs > dmax.abs)
|
873
|
+
derivs[0] = dmax
|
874
|
+
end
|
875
|
+
end
|
876
|
+
|
877
|
+
(1...lengthLess1).to_a.each do |ind|
|
878
|
+
if (ind != 1)
|
879
|
+
h1 = h2
|
880
|
+
h2 = x[ind+1] - x[ind]
|
881
|
+
hsum = h1 + h2
|
882
|
+
del1 = del2
|
883
|
+
del2 = (y[ind+1] - y[ind])/h2
|
884
|
+
end
|
885
|
+
|
886
|
+
derivs[ind] = @@zero
|
887
|
+
|
888
|
+
pchstval = pchst(del1, del2)
|
889
|
+
|
890
|
+
klass = self.class
|
891
|
+
|
892
|
+
if (pchstval > 0)
|
893
|
+
hsumt3 = hsum+hsum+hsum
|
894
|
+
w1 = (hsum + h1)/hsumt3
|
895
|
+
w2 = (hsum + h2)/hsumt3
|
896
|
+
dmax = klass.max( del1.abs, del2.abs )
|
897
|
+
dmin = klass.min( del1.abs, del2.abs )
|
898
|
+
drat1 = del1/dmax
|
899
|
+
drat2 = del2/dmax
|
900
|
+
derivs[ind] = dmin/(w1*drat1 + w2*drat2)
|
901
|
+
elsif (pchstval < 0 )
|
902
|
+
ierr = ierr + 1
|
903
|
+
dsave = del2
|
904
|
+
next
|
905
|
+
else # equal to zero
|
906
|
+
if (del2 == @@zero)
|
907
|
+
next
|
908
|
+
end
|
909
|
+
if (pchst(dsave,del2) < 0)
|
910
|
+
ierr = ierr + 1
|
911
|
+
end
|
912
|
+
dsave = del2
|
913
|
+
next
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
|
918
|
+
w1 = (h2*-1.0)/hsum
|
919
|
+
w2 = (h2 + hsum)/hsum
|
920
|
+
derivs[lengthLess1] = (w1*del1) + (w2*del2)
|
921
|
+
if ( pchst(derivs[lengthLess1], del2) <= 0 )
|
922
|
+
derivs[lengthLess1] = @@zero;
|
923
|
+
elsif ( pchst(del1, del2) < 0)
|
924
|
+
# NEED DO THIS CHECK ONLY IF MONOTONICITY SWITCHES.
|
925
|
+
dmax = three*del2
|
926
|
+
if (derivs[lengthLess1].abs > dmax.abs)
|
927
|
+
derivs[lengthLess1] = dmax
|
928
|
+
end
|
929
|
+
end
|
930
|
+
derivs
|
931
|
+
end
|
932
|
+
|
933
|
+
|
934
|
+
# called as (points, &block)
|
935
|
+
# or (pre, post, &block)
|
936
|
+
# points e.g. 3, means one before, current, and one after)
|
937
|
+
# pre = 1 and post = 1 is 3 points
|
938
|
+
# pre = 2 and post = 2 is 5 points
|
939
|
+
# yields a Vec object with the objects to be acted on and sets the value to
|
940
|
+
# the return value of the block.
|
941
|
+
def transform(*args, &block)
|
942
|
+
(pre, post) =
|
943
|
+
if args.size == 1
|
944
|
+
pre = (args[0] - 1) / 2
|
945
|
+
post = pre
|
946
|
+
[pre, post]
|
947
|
+
elsif args.size == 2
|
948
|
+
args
|
949
|
+
else
|
950
|
+
raise(ArgumentError, "accepts (pre, post, &block), or (points, &block)")
|
951
|
+
end
|
952
|
+
trans = self.class.new(size)
|
953
|
+
last_i = self.size - 1
|
954
|
+
# TODO: could implement with rolling yielded array and be much faster...
|
955
|
+
self.each_with_index do |x,i|
|
956
|
+
start = i - pre
|
957
|
+
stop = i + post
|
958
|
+
start = 0 if start < 0
|
959
|
+
stop = last_i if stop > last_i
|
960
|
+
trans[i] = block.call(self[start..stop])
|
961
|
+
end
|
962
|
+
trans
|
963
|
+
end
|
964
|
+
|
965
|
+
|
966
|
+
private
|
967
|
+
|
968
|
+
def pchst(arg1, arg2)
|
969
|
+
if arg1*arg2 > 0
|
970
|
+
1
|
971
|
+
elsif arg1*arg2 < 0
|
972
|
+
-1
|
973
|
+
else
|
974
|
+
0
|
975
|
+
end
|
976
|
+
end
|
977
|
+
|
978
|
+
# returns float
|
979
|
+
def avg_ints(one, two)
|
980
|
+
(one.to_f + two.to_f)/2.0
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
|
985
|
+
|
986
|
+
|
987
|
+
#class VecD < Vec
|
988
|
+
#end
|
989
|
+
|
990
|
+
#class VecI < Vec
|
991
|
+
#tmp = $VERBOSE ; $VERBOSE = nil
|
992
|
+
#@@zero = 0
|
993
|
+
#$VERBOSE = tmp
|
994
|
+
|
995
|
+
#def to_rep(val)
|
996
|
+
#val.to_i
|
997
|
+
#end
|
998
|
+
#end
|
999
|
+
|
1000
|
+
|
1001
|
+
end
|