runarray 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +11 -0
- data/README +36 -0
- data/Rakefile +149 -0
- data/lib/runarray/auto.rb +8 -0
- data/lib/runarray/narray.rb +1001 -0
- data/lib/runarray.rb +2 -0
- data/spec/runarray/narray_spec.rb +367 -0
- metadata +66 -0
@@ -0,0 +1,1001 @@
|
|
1
|
+
|
2
|
+
module Runarray
|
3
|
+
class NArray < Array
|
4
|
+
|
5
|
+
alias_method :old_map, :map
|
6
|
+
alias_method :old_select, :select
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
def float(*dims)
|
11
|
+
build('float', *dims)
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](*args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build(typecode, *dims)
|
18
|
+
zero =
|
19
|
+
case typecode
|
20
|
+
when 'float' : 0.0
|
21
|
+
when 'int' : 0
|
22
|
+
end
|
23
|
+
raise NotImplementedError, "dims <= 1 right now" if dims.size > 2
|
24
|
+
case dims.size
|
25
|
+
when 1
|
26
|
+
self.new(dims.first, zero)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.max(first, second)
|
33
|
+
if first >= second ; first
|
34
|
+
else ; second
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.min(first, second)
|
39
|
+
if first <= second ; first
|
40
|
+
else ; second
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
alias_method :dim, :size
|
45
|
+
alias_method :old_map, :map
|
46
|
+
alias_method :old_select, :select
|
47
|
+
@@zero = 0.0
|
48
|
+
|
49
|
+
TYPECODES = ['float', 'int']
|
50
|
+
|
51
|
+
def initialize(*args)
|
52
|
+
if TYPECODES.include?(args[0])
|
53
|
+
self.class.build(*args)
|
54
|
+
else
|
55
|
+
super(*args)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
## BASIC METHODS:
|
60
|
+
|
61
|
+
def log_space(&block)
|
62
|
+
logged = self.map{|v| Math.log(v) }
|
63
|
+
new_ar = block.call(logged)
|
64
|
+
self.class.new( new_ar.map{|v| Math.exp(v) } )
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect
|
68
|
+
"[ #{self.join(", ")} ]"
|
69
|
+
end
|
70
|
+
|
71
|
+
def select(&block)
|
72
|
+
self.class.new(old_select(&block))
|
73
|
+
end
|
74
|
+
|
75
|
+
def map(&block)
|
76
|
+
self.class.new(old_map(&block))
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.[](*ar)
|
80
|
+
self.prep(ar)
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_s
|
84
|
+
self.join(" ")
|
85
|
+
end
|
86
|
+
|
87
|
+
#def dup
|
88
|
+
# self.class.new(self)
|
89
|
+
#end
|
90
|
+
|
91
|
+
#def ==(other)
|
92
|
+
# if other == nil
|
93
|
+
# return false
|
94
|
+
# end
|
95
|
+
# self.each_index do |i|
|
96
|
+
# if self[i] != other[i]
|
97
|
+
# return false
|
98
|
+
# end
|
99
|
+
# end
|
100
|
+
# true
|
101
|
+
#end
|
102
|
+
|
103
|
+
def to_a
|
104
|
+
x = []
|
105
|
+
self.each do |it|
|
106
|
+
x << it
|
107
|
+
end
|
108
|
+
x
|
109
|
+
end
|
110
|
+
|
111
|
+
# for each value in mat, take a certain fraction and make it random
|
112
|
+
# random fraction can be from 0 to 2X the original fraction.
|
113
|
+
def noisify!(fraction, precision=1000000)
|
114
|
+
self.collect! do |val|
|
115
|
+
part = fraction * val
|
116
|
+
rnum = rand((2*part*precision).to_i)
|
117
|
+
random = rnum.to_f/precision
|
118
|
+
answ = val - part
|
119
|
+
if val > 0
|
120
|
+
answ + random
|
121
|
+
else
|
122
|
+
answ - random
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def histogram(*arg)
|
128
|
+
require 'histogram'
|
129
|
+
end
|
130
|
+
|
131
|
+
# Takes input and converts to whatever internal representation
|
132
|
+
# SUBCLASS THIS GUY!
|
133
|
+
def to_rep(val)
|
134
|
+
val.to_f
|
135
|
+
end
|
136
|
+
|
137
|
+
# Accepts an Array, Vector, or list
|
138
|
+
# Returns a new Object
|
139
|
+
# Basically just does a to_rep on each element
|
140
|
+
def self.prep(input)
|
141
|
+
obj = self.new
|
142
|
+
(0...input.size).each do |i|
|
143
|
+
obj[i] = obj.to_rep(input[i])
|
144
|
+
end
|
145
|
+
obj
|
146
|
+
end
|
147
|
+
|
148
|
+
########################################
|
149
|
+
# BREAD AND BUTTER
|
150
|
+
########################################
|
151
|
+
|
152
|
+
# the operator
|
153
|
+
#def operator(sym, other)
|
154
|
+
# nw = self.class.new
|
155
|
+
# if other.kind_of?(Vec)
|
156
|
+
# self.each_with_index do |val,i|
|
157
|
+
# nw << val.send(sym, other[i])
|
158
|
+
# end
|
159
|
+
# else
|
160
|
+
# self.each do |val|
|
161
|
+
# nw << val.send(sym, other)
|
162
|
+
# end
|
163
|
+
# end
|
164
|
+
# nw
|
165
|
+
#end
|
166
|
+
|
167
|
+
#def /(other); send('/'.to_sym, other) end
|
168
|
+
#def +(other); send('+'.to_sym, other) end
|
169
|
+
#def -(other); send('-'.to_sym, other) end
|
170
|
+
#def *(other); send('*'.to_sym, other) end
|
171
|
+
|
172
|
+
def /(other)
|
173
|
+
nw = self.class.new
|
174
|
+
if other.kind_of?(Runarray::NArray)
|
175
|
+
self.each_with_index do |val,i|
|
176
|
+
nw << val / other[i]
|
177
|
+
end
|
178
|
+
else
|
179
|
+
self.each do |val|
|
180
|
+
nw << val / other
|
181
|
+
end
|
182
|
+
end
|
183
|
+
nw
|
184
|
+
end
|
185
|
+
|
186
|
+
def **(other)
|
187
|
+
nw = self.class.new
|
188
|
+
if other.kind_of?(Runarray::NArray)
|
189
|
+
self.each_with_index do |val,i|
|
190
|
+
nw << (val ** other[i])
|
191
|
+
end
|
192
|
+
else
|
193
|
+
self.each do |val|
|
194
|
+
nw << val ** other
|
195
|
+
end
|
196
|
+
end
|
197
|
+
nw
|
198
|
+
end
|
199
|
+
|
200
|
+
def *(other)
|
201
|
+
nw = self.class.new
|
202
|
+
if other.kind_of?(Runarray::NArray)
|
203
|
+
self.each_with_index do |val,i|
|
204
|
+
nw << val * other[i]
|
205
|
+
end
|
206
|
+
else
|
207
|
+
self.each do |val|
|
208
|
+
nw << val * other
|
209
|
+
end
|
210
|
+
end
|
211
|
+
nw
|
212
|
+
end
|
213
|
+
|
214
|
+
def +(other)
|
215
|
+
nw = self.class.new
|
216
|
+
if other.kind_of?(Runarray::NArray)
|
217
|
+
self.each_with_index do |val,i|
|
218
|
+
nw << val + other[i]
|
219
|
+
end
|
220
|
+
else
|
221
|
+
self.each do |val|
|
222
|
+
nw << val + other
|
223
|
+
end
|
224
|
+
end
|
225
|
+
nw
|
226
|
+
end
|
227
|
+
|
228
|
+
def -(other)
|
229
|
+
nw = self.class.new
|
230
|
+
if other.kind_of?(Runarray::NArray)
|
231
|
+
self.each_with_index do |val,i|
|
232
|
+
nw << val - other[i]
|
233
|
+
end
|
234
|
+
else
|
235
|
+
self.each do |val|
|
236
|
+
nw << val - other
|
237
|
+
end
|
238
|
+
end
|
239
|
+
nw
|
240
|
+
end
|
241
|
+
|
242
|
+
def abs
|
243
|
+
nw = self.class.new
|
244
|
+
self.each do |val|
|
245
|
+
nw << val.abs
|
246
|
+
end
|
247
|
+
nw
|
248
|
+
end
|
249
|
+
|
250
|
+
def floor
|
251
|
+
nw = self.class.new
|
252
|
+
self.each do |val|
|
253
|
+
nw << val.floor
|
254
|
+
end
|
255
|
+
nw
|
256
|
+
end
|
257
|
+
|
258
|
+
def sum
|
259
|
+
sum = @@zero
|
260
|
+
self.each do |val|
|
261
|
+
sum += val
|
262
|
+
end
|
263
|
+
sum
|
264
|
+
end
|
265
|
+
|
266
|
+
# returns a float
|
267
|
+
def avg
|
268
|
+
sum.to_f/self.size
|
269
|
+
end
|
270
|
+
|
271
|
+
########################################
|
272
|
+
# MORE INVOLVED
|
273
|
+
########################################
|
274
|
+
|
275
|
+
# returns (new_x_coords, new_y_coords) of the same type as self
|
276
|
+
# Where:
|
277
|
+
# self = the current x coordinates
|
278
|
+
# yvec = the parallel y coords
|
279
|
+
# start = the initial x point
|
280
|
+
# endp = the final point
|
281
|
+
# increment = the x coordinate increment
|
282
|
+
# baseline = the default value if no values lie in a bin
|
283
|
+
# behavior = response when multiple values fall to the same bin
|
284
|
+
# sum => sums all values
|
285
|
+
# avg => avgs the values
|
286
|
+
# high => takes the value at the highest x coordinate
|
287
|
+
# max => takes the value of the highest y value [need to finalize]
|
288
|
+
# maxb => ?? [need to finalize]
|
289
|
+
def inc_x(yvec, start=0, endp=2047, increment=1.0, baseline=0.0, behavior="sum")
|
290
|
+
xvec = self
|
291
|
+
|
292
|
+
|
293
|
+
scale_factor = 1.0/increment
|
294
|
+
end_scaled = ((endp * (scale_factor)) + 0.5).to_int
|
295
|
+
start_scaled = ((start* (scale_factor)) + 0.5).to_int
|
296
|
+
|
297
|
+
|
298
|
+
# the size of the yvec will be: [start_scaled..end_scaled] = end_scaled - start_scaled + 1
|
299
|
+
## the x values of the incremented vector:
|
300
|
+
xvec_new_size = (end_scaled - start_scaled + 1)
|
301
|
+
xvec_new = self.class.new(xvec_new_size)
|
302
|
+
# We can't just use the start and endp that are given, because we might
|
303
|
+
# have needed to do some rounding on them
|
304
|
+
end_unscaled = end_scaled / scale_factor
|
305
|
+
start_unscaled = start_scaled / scale_factor
|
306
|
+
xval_new = start_unscaled
|
307
|
+
xvec_new_size.times do |i|
|
308
|
+
xvec_new[i] = start_unscaled
|
309
|
+
start_unscaled += increment
|
310
|
+
end
|
311
|
+
|
312
|
+
# special case: no data
|
313
|
+
if xvec.size == 0
|
314
|
+
yvec_new = self.class.new(xvec_new.size, baseline)
|
315
|
+
return [xvec_new, yvec_new]
|
316
|
+
end
|
317
|
+
|
318
|
+
## SCALE the mz_scaled vector
|
319
|
+
xvec_scaled = xvec.collect do |val|
|
320
|
+
(val * scale_factor).round
|
321
|
+
end
|
322
|
+
|
323
|
+
## FIND greatest index
|
324
|
+
_max = xvec_scaled.last
|
325
|
+
|
326
|
+
## DETERMINE maximum value
|
327
|
+
max_ind = end_scaled
|
328
|
+
if _max > end_scaled; max_ind = _max ## this is because we'll need the room
|
329
|
+
else; max_ind = end_scaled
|
330
|
+
end
|
331
|
+
|
332
|
+
## CREATE array to hold mapped values and write in the baseline
|
333
|
+
arr = self.class.new(max_ind+1, baseline)
|
334
|
+
nobl = self.class.new(max_ind+1, 0)
|
335
|
+
|
336
|
+
case behavior
|
337
|
+
when "sum"
|
338
|
+
xvec_scaled.each_with_index do |ind,i|
|
339
|
+
val = yvec[i]
|
340
|
+
arr[ind] = nobl[ind] + val
|
341
|
+
nobl[ind] += val
|
342
|
+
end
|
343
|
+
when "high" ## FASTEST BEHAVIOR
|
344
|
+
xvec_scaled.each_with_index do |ind,i|
|
345
|
+
arr[ind] = yvec[i]
|
346
|
+
end
|
347
|
+
when "avg"
|
348
|
+
count = Hash.new {|s,key| s[key] = 0 }
|
349
|
+
xvec_scaled.each_with_index do |ind,i|
|
350
|
+
val = yvec[i]
|
351
|
+
arr[ind] = nobl[ind] + val
|
352
|
+
nobl[ind] += val
|
353
|
+
count[ind] += 1
|
354
|
+
end
|
355
|
+
count.each do |k,co|
|
356
|
+
if co > 1; arr[k] /= co end
|
357
|
+
end
|
358
|
+
when "max" # @TODO: finalize behavior of max and maxb
|
359
|
+
xvec_scaled.each_with_index do |ind,i|
|
360
|
+
val = yvec[i]
|
361
|
+
if val > nobl[ind]; arr[ind] = val; nobl[ind] = val end
|
362
|
+
end
|
363
|
+
when "maxb"
|
364
|
+
xvec_scaled.each_with_index do |ind,i|
|
365
|
+
val = yvec[i]
|
366
|
+
if val > arr[ind]; arr[ind] = val end
|
367
|
+
end
|
368
|
+
else
|
369
|
+
warn "Not a valid behavior: #{behavior}, in one_dim\n"
|
370
|
+
end
|
371
|
+
|
372
|
+
trimmed = arr[start_scaled..end_scaled]
|
373
|
+
if xvec_new.size != trimmed.size
|
374
|
+
abort "xvec_new.size(#{xvec_new.size}) != trimmed.size(#{trimmed.size})"
|
375
|
+
end
|
376
|
+
[xvec_new, trimmed]
|
377
|
+
end
|
378
|
+
|
379
|
+
def pearsons_r(y)
|
380
|
+
x = self
|
381
|
+
sum_xy = @@zero
|
382
|
+
sum_x = @@zero
|
383
|
+
sum_y = @@zero
|
384
|
+
sum_x2 = @@zero
|
385
|
+
sum_y2 = @@zero
|
386
|
+
n = x.size
|
387
|
+
|
388
|
+
x.each_with_index do |xval,i|
|
389
|
+
yval = y[i]
|
390
|
+
sum_xy += xval * yval
|
391
|
+
sum_x += xval
|
392
|
+
sum_y += yval
|
393
|
+
sum_x2 += xval**2
|
394
|
+
sum_y2 += yval**2
|
395
|
+
end
|
396
|
+
|
397
|
+
## Here it is:
|
398
|
+
# 'E' is Capital Sigma
|
399
|
+
# r = EXY - (EXEY/N)
|
400
|
+
# -----------------
|
401
|
+
# sqrt( (EX^2 - (EX)^2/N) * (EY^2 - (EY)^2/N) )
|
402
|
+
|
403
|
+
top = sum_xy.to_f - ((sum_x * sum_y).to_f/n)
|
404
|
+
fbot = sum_x2.to_f - ((sum_x**2).to_f/n)
|
405
|
+
sbot = sum_y2.to_f - ((sum_y**2).to_f/n)
|
406
|
+
top / Math.sqrt(fbot * sbot)
|
407
|
+
end
|
408
|
+
|
409
|
+
# Returns (rsq, slope, y_intercept)
|
410
|
+
def rsq_slope_intercept(y)
|
411
|
+
x = self
|
412
|
+
if y.size != x.size then raise ArgumentError, "y must have same size as self!" end
|
413
|
+
if x.size < 2
|
414
|
+
raise ArgumentError, "vectors must have 2 or more data points!"
|
415
|
+
elsif x.size == 2
|
416
|
+
l = x[1]; fl = y[1]; s = x[0]; fs = y[0]
|
417
|
+
if x[0] > x[1] ; l,s=s,l; fl,fs=fs,fl end
|
418
|
+
if l-s == 0 then raise ArgumentError, "two points same x" end
|
419
|
+
slope = (fl-fs)/(l-s)
|
420
|
+
# y = mx + b
|
421
|
+
# b = y - mx
|
422
|
+
y_intercept = fl - (slope*l)
|
423
|
+
rsq = 1.0
|
424
|
+
return rsq, slope, y_intercept
|
425
|
+
else
|
426
|
+
x = self
|
427
|
+
mean_x = x.avg
|
428
|
+
mean_y = y.avg
|
429
|
+
sum_sq_res_xx = @@zero
|
430
|
+
sum_sq_res_yy = @@zero
|
431
|
+
sum_sq_res_xy = @@zero
|
432
|
+
x.each_with_index do |val,i|
|
433
|
+
x_minus_mean_i = x[i].to_f - mean_x
|
434
|
+
y_minus_mean_i = y[i].to_f - mean_y
|
435
|
+
sum_sq_res_xx += x_minus_mean_i*x_minus_mean_i
|
436
|
+
sum_sq_res_yy += y_minus_mean_i*y_minus_mean_i
|
437
|
+
sum_sq_res_xy += x_minus_mean_i*y_minus_mean_i
|
438
|
+
end
|
439
|
+
slope = sum_sq_res_xy/sum_sq_res_xx
|
440
|
+
y_intercept = mean_y - (slope * mean_x)
|
441
|
+
rsq = (sum_sq_res_xy*sum_sq_res_xy)/(sum_sq_res_xx*sum_sq_res_yy)
|
442
|
+
return rsq, slope, y_intercept
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Returns (mean, standard_dev)
|
447
|
+
# if size == 0 returns [nil, nil]
|
448
|
+
def sample_stats
|
449
|
+
_len = size
|
450
|
+
return [nil, nil] if _len == 0
|
451
|
+
_sum = 0.0
|
452
|
+
_sum_sq = 0.0
|
453
|
+
self.each do |val|
|
454
|
+
_sum += val
|
455
|
+
_sum_sq += val * val
|
456
|
+
end
|
457
|
+
std_dev = _sum_sq - ((_sum * _sum)/_len)
|
458
|
+
std_dev /= ( _len > 1 ? _len-1 : 1 )
|
459
|
+
std_dev = Math.sqrt(std_dev)
|
460
|
+
mean = _sum.to_f/_len
|
461
|
+
return mean, std_dev
|
462
|
+
end
|
463
|
+
|
464
|
+
# moving average (slow, simple implementation)
|
465
|
+
def moving_avg(pre=1, post=1)
|
466
|
+
last_index = self.size - 1
|
467
|
+
ma = self.class.new(self.size)
|
468
|
+
self.each_with_index do |center,index|
|
469
|
+
start_i = index - pre
|
470
|
+
start_i >= 0 or start_i = 0
|
471
|
+
end_i = index + post
|
472
|
+
end_i < self.size or end_i = last_index
|
473
|
+
ma[index] = self[start_i..end_i].avg
|
474
|
+
end
|
475
|
+
ma
|
476
|
+
end
|
477
|
+
|
478
|
+
=begin
|
479
|
+
# in progress on this guy: moving average
|
480
|
+
def moving_avg(pre=1, post=1)
|
481
|
+
ar_size = size
|
482
|
+
mv_avg = self.class.new(size)
|
483
|
+
window_size = pre + post + 1
|
484
|
+
window_size_f = window_size.to_f
|
485
|
+
sum = 0.0
|
486
|
+
|
487
|
+
# do the first bit
|
488
|
+
if post + 1 > ar_size
|
489
|
+
post = ar_size
|
490
|
+
end
|
491
|
+
|
492
|
+
post_p1 = post+1
|
493
|
+
(0...(post_p1)).each do |i|
|
494
|
+
sum += self[i]
|
495
|
+
end
|
496
|
+
self[0] = sum / (post_p1)
|
497
|
+
|
498
|
+
((post+1)...window_size).each do |add_i|
|
499
|
+
sum += self[add_i]
|
500
|
+
self[add_i - (post_p1)] = sum /
|
501
|
+
end
|
502
|
+
|
503
|
+
# the middle bit
|
504
|
+
(window_size...(size - window_size)).each do |i|
|
505
|
+
sum -= self[i - pre]
|
506
|
+
sum += self[i + post]
|
507
|
+
mv_avg[i] = sum / window_size_f
|
508
|
+
end
|
509
|
+
|
510
|
+
# do the last bit
|
511
|
+
((size - window_size)...size).each do |i|
|
512
|
+
window_size -= 1
|
513
|
+
mv_avg[i] = sum / window_size
|
514
|
+
end
|
515
|
+
|
516
|
+
mv_avg
|
517
|
+
end
|
518
|
+
|
519
|
+
=end
|
520
|
+
|
521
|
+
# Returns (x, y) where any data points
|
522
|
+
# in cartesion coords(self,y) that are > 'deviations' from the
|
523
|
+
# least squares regression line are deleted.
|
524
|
+
# The least squares line is recalculated and outliers tossed out
|
525
|
+
# iteratively until no further points are tossed.
|
526
|
+
# In the future this may be for multiple vecs...
|
527
|
+
def delete_outliers_iteratively(deviations, y=nil)
|
528
|
+
x = self
|
529
|
+
ln = x.size
|
530
|
+
nx = nil
|
531
|
+
ny = nil
|
532
|
+
loop do
|
533
|
+
answ = x.delete_outliers(deviations, y)
|
534
|
+
if y
|
535
|
+
(nx, ny) = answ
|
536
|
+
else
|
537
|
+
nx = answ
|
538
|
+
end
|
539
|
+
if nx.size == ln
|
540
|
+
break
|
541
|
+
else
|
542
|
+
ln = nx.size
|
543
|
+
x = nx
|
544
|
+
y = ny ## still nil if only x
|
545
|
+
end
|
546
|
+
end
|
547
|
+
if y
|
548
|
+
[nx, ny]
|
549
|
+
else
|
550
|
+
nx
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
def outliers_iteratively(deviations, y=nil)
|
555
|
+
xdup = self.dup
|
556
|
+
ydup = y.dup if y
|
557
|
+
indices = nil
|
558
|
+
all_indices = []
|
559
|
+
loop do
|
560
|
+
indices = xdup.outliers(deviations, ydup)
|
561
|
+
all_indices << indices.dup
|
562
|
+
if indices.size == 0
|
563
|
+
break
|
564
|
+
else
|
565
|
+
indices.reverse.each do |i|
|
566
|
+
xdup.delete_at(i)
|
567
|
+
ydup.delete_at(i) if y
|
568
|
+
end
|
569
|
+
end
|
570
|
+
end
|
571
|
+
_correct_indices(all_indices)
|
572
|
+
end
|
573
|
+
|
574
|
+
# given indices that were deleted in sequential order, reconstruct
|
575
|
+
# the original indices
|
576
|
+
# e.g. ( '*' indicates that the index was deleted in that round )
|
577
|
+
# [ 0][ ][ 2][ 3][ ][*5][ 6][ ][ 8]
|
578
|
+
# | | | / /
|
579
|
+
# [*0][ ][ 2][*3][ ][ 5][ ][ 7][ ]
|
580
|
+
# / _____/ _____/
|
581
|
+
# [ ][ 1][ ][*3][ ][ 5][ ][ ][ ]
|
582
|
+
# | /
|
583
|
+
# [ ][*1][ ][ ][*4][ ][ ][ ][ ]
|
584
|
+
# ### -> ANSWER: [0,2,3,5,6,8]
|
585
|
+
def _correct_indices(indices)
|
586
|
+
## need to correct the indices based on what was deleted before
|
587
|
+
indices_new = indices.reverse.inject do |final,ind_ar|
|
588
|
+
new_final = final.collect do |fi|
|
589
|
+
rtn = fi
|
590
|
+
ind_ar.each do |ind|
|
591
|
+
if ind <= fi
|
592
|
+
rtn += 1
|
593
|
+
end
|
594
|
+
end
|
595
|
+
rtn
|
596
|
+
end
|
597
|
+
new_final.push(*ind_ar)
|
598
|
+
new_final
|
599
|
+
end
|
600
|
+
indices_new.sort
|
601
|
+
end
|
602
|
+
|
603
|
+
# returns an ary of indices to outliers
|
604
|
+
# if y is given, the residuals from the least squares between self and y are
|
605
|
+
# calculated before finding outliers
|
606
|
+
def outliers(deviations, y=nil)
|
607
|
+
indices = []
|
608
|
+
distribution =
|
609
|
+
if y
|
610
|
+
self.residuals_from_least_squares(y)
|
611
|
+
else
|
612
|
+
self
|
613
|
+
end
|
614
|
+
mean, std_dev = distribution.sample_stats
|
615
|
+
cutoff = deviations.to_f * std_dev
|
616
|
+
distribution.each_with_index do |res,i|
|
617
|
+
if (res - mean).abs > cutoff
|
618
|
+
indices << i
|
619
|
+
end
|
620
|
+
end
|
621
|
+
indices
|
622
|
+
end
|
623
|
+
|
624
|
+
# Returns (x, y) where any data points
|
625
|
+
# in cartesion coords(self,y) that are > 'deviations' from the
|
626
|
+
# least squares regression line are deleted
|
627
|
+
# (deviations will be converted to float)
|
628
|
+
# In the future this may be for multiple vecs...
|
629
|
+
def delete_outliers(deviations, y=nil)
|
630
|
+
nx = self.class.new
|
631
|
+
ny = self.class.new if y
|
632
|
+
distribution =
|
633
|
+
if y
|
634
|
+
self.residuals_from_least_squares(y)
|
635
|
+
else
|
636
|
+
self
|
637
|
+
end
|
638
|
+
mean, std_dev = distribution.sample_stats
|
639
|
+
cutoff = deviations.to_f * std_dev
|
640
|
+
#puts "CUTOFF: #{cutoff}"
|
641
|
+
distribution.each_with_index do |res,i|
|
642
|
+
#puts "RES: #{res}"
|
643
|
+
unless (res - mean).abs > cutoff
|
644
|
+
#puts "ADDING"
|
645
|
+
nx << self[i]
|
646
|
+
(ny << y[i]) if y
|
647
|
+
end
|
648
|
+
end
|
649
|
+
if y
|
650
|
+
[nx,ny]
|
651
|
+
else
|
652
|
+
#puts "GIVING BACK"
|
653
|
+
nx
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
# Returns a NArray object (of doubles)
|
658
|
+
def residuals_from_least_squares(y)
|
659
|
+
rsq, slope, intercept = rsq_slope_intercept(y)
|
660
|
+
residuals = Runarray::NArray.float
|
661
|
+
self.each_with_index do |val,i|
|
662
|
+
expected_y = (slope*val) + intercept
|
663
|
+
ydiff = y[i].to_f - expected_y
|
664
|
+
if ydiff == 0.0
|
665
|
+
residuals << 0.0
|
666
|
+
else
|
667
|
+
run = ydiff/slope
|
668
|
+
residuals << run/( Math.sin(Math.atan(ydiff/run)) )
|
669
|
+
end
|
670
|
+
end
|
671
|
+
residuals
|
672
|
+
end
|
673
|
+
|
674
|
+
def min
|
675
|
+
mn = self.first
|
676
|
+
self.each do |val|
|
677
|
+
if val < mn then mn = val end
|
678
|
+
end
|
679
|
+
mn
|
680
|
+
end
|
681
|
+
|
682
|
+
def max
|
683
|
+
mx = self.first
|
684
|
+
self.each do |val|
|
685
|
+
if val > mx ; mx = val end
|
686
|
+
end
|
687
|
+
mx
|
688
|
+
end
|
689
|
+
|
690
|
+
def shuffle!
|
691
|
+
##################################
|
692
|
+
## this is actually slightly faster, but I don't know how stable
|
693
|
+
#size.downto(1) { |n| push delete_at(rand(n)) }
|
694
|
+
#self
|
695
|
+
##################################
|
696
|
+
(size - 1) .downto 1 do |i|
|
697
|
+
j = rand(i + 1)
|
698
|
+
self[i], self[j] = self[j], self[i]
|
699
|
+
end
|
700
|
+
self
|
701
|
+
end
|
702
|
+
|
703
|
+
def shuffle
|
704
|
+
self.dup.shuffle!
|
705
|
+
end
|
706
|
+
|
707
|
+
# returns an array of indices
|
708
|
+
def max_indices
|
709
|
+
indices_equal(self.max)
|
710
|
+
end
|
711
|
+
|
712
|
+
# returns the indices as VecI object which indicate the ascending order of
|
713
|
+
# the values tie goes to the value closest to the front of the list
|
714
|
+
def order
|
715
|
+
sorted = self.sort
|
716
|
+
hash = Hash.new {|h,k| h[k] = [] }
|
717
|
+
self.each_with_index do |sortd,i|
|
718
|
+
hash[sortd] << i
|
719
|
+
end
|
720
|
+
ord = sorted.map do |val|
|
721
|
+
hash[val].shift
|
722
|
+
end
|
723
|
+
Runarray::NArray.new('int').replace(ord)
|
724
|
+
end
|
725
|
+
|
726
|
+
# returns an Array of indices where val == member
|
727
|
+
def indices_equal(val)
|
728
|
+
indices = []
|
729
|
+
self.each_with_index do |v,i|
|
730
|
+
if val == v
|
731
|
+
indices << i
|
732
|
+
end
|
733
|
+
end
|
734
|
+
indices
|
735
|
+
end
|
736
|
+
|
737
|
+
# returns an array of indices
|
738
|
+
def min_indices
|
739
|
+
indices_equal(self.min)
|
740
|
+
end
|
741
|
+
|
742
|
+
|
743
|
+
# Returns (min, max)
|
744
|
+
def min_max
|
745
|
+
mn = self.first
|
746
|
+
mx = self.first
|
747
|
+
self.each do |val|
|
748
|
+
if val < mn then mn = val end
|
749
|
+
if val > mx then mx = val end
|
750
|
+
end
|
751
|
+
return mn, mx
|
752
|
+
end
|
753
|
+
|
754
|
+
=begin
|
755
|
+
# complete rewrite of the
|
756
|
+
# returns empty derivs for size == 0
|
757
|
+
# returns 0 for size == 1
|
758
|
+
# does linear interpolation for size == 2
|
759
|
+
# does three point derivative for everything else
|
760
|
+
# zero_for_inflection is the "adjusted to be shape-preserving" spoken of in
|
761
|
+
# the SLATEC chim code.
|
762
|
+
def derivs(y, zero_for_inflection=true)
|
763
|
+
dvs = self.class.new(size)
|
764
|
+
x = self
|
765
|
+
case size
|
766
|
+
when 0
|
767
|
+
dvs
|
768
|
+
when 1
|
769
|
+
dvs[0] = 0
|
770
|
+
dvs
|
771
|
+
when 2
|
772
|
+
slope = (y[1] - y[0])/(x[1] - x[0])
|
773
|
+
dvs[0], dvs[1] = slope, slope
|
774
|
+
else
|
775
|
+
dvs[0] = (y[1] - y[0])/(x[1] - x[0])
|
776
|
+
cnt = 1
|
777
|
+
x.zip(y).each_cons(3) do |pre,cur,post|
|
778
|
+
pre_x, pre_y = pre
|
779
|
+
post_x, post_y = post
|
780
|
+
cur_x, cur_y = cur
|
781
|
+
dvs[cnt] =
|
782
|
+
if zero_for_inflection
|
783
|
+
r_post_slope = post_y <=> cur_y
|
784
|
+
r_pre_slope = pre_y <=> cur_y
|
785
|
+
if r_post_slope != 0 and r_pre_slope != 0 and (r_post_slope * -1) == r_pre_slope
|
786
|
+
0
|
787
|
+
else
|
788
|
+
three_point_deriv(pre, cur, post)
|
789
|
+
end
|
790
|
+
else
|
791
|
+
three_point_deriv(pre, cur, post)
|
792
|
+
end
|
793
|
+
cnt += 1
|
794
|
+
end
|
795
|
+
dvs
|
796
|
+
end
|
797
|
+
end
|
798
|
+
=end
|
799
|
+
|
800
|
+
# difference between max and min
|
801
|
+
def spread
|
802
|
+
(max - min).abs
|
803
|
+
end
|
804
|
+
|
805
|
+
def nil?
|
806
|
+
false
|
807
|
+
end
|
808
|
+
|
809
|
+
# Class functions:
|
810
|
+
# THIS MUST BE FOR FLOAT AND DOUBLE ONLY!!!
|
811
|
+
# This is a fairly precise Fortran->C translation of the SLATEC chim code
|
812
|
+
# Evaluate the deriv at each x point
|
813
|
+
# return 1 if less than 2 data points
|
814
|
+
# return 0 if no errors
|
815
|
+
# ASSUMES monotonicity of the X data points !!!!!
|
816
|
+
# ASSUMES that this->length() >= 2
|
817
|
+
# If length == 1 then derivs[0] is set to 0
|
818
|
+
# If length == 0 then raises an ArgumentError
|
819
|
+
# returns a new array of derivatives
|
820
|
+
# Assumes that y values are Floats
|
821
|
+
# if y is not given, then values are assumed to be evenly spaced.
|
822
|
+
def chim(y=nil)
|
823
|
+
y = self.class.new((0...(self.size)).to_a) if y.nil?
|
824
|
+
|
825
|
+
#void VecABR::chim(VecABR &x, VecABR &y, VecABR &out_derivs) {
|
826
|
+
x = self
|
827
|
+
derivs = Runarray::NArray.new(x.size)
|
828
|
+
|
829
|
+
length = x.size
|
830
|
+
three = 3.0
|
831
|
+
|
832
|
+
ierr = 0
|
833
|
+
lengthLess1 = length - 1
|
834
|
+
|
835
|
+
if length < 2
|
836
|
+
if length == 1
|
837
|
+
derivs[0] = 0
|
838
|
+
return derivs
|
839
|
+
else
|
840
|
+
raise ArgumentError, "trying to chim with 0 data points!"
|
841
|
+
end
|
842
|
+
end
|
843
|
+
|
844
|
+
h1 = x[1] - x[0]
|
845
|
+
del1 = (y[1] - y[0]) / h1
|
846
|
+
dsave = del1
|
847
|
+
|
848
|
+
# special case length=2 --use linear interpolation
|
849
|
+
if lengthLess1 < 2
|
850
|
+
derivs[0] = del1
|
851
|
+
derivs[1] = del1
|
852
|
+
return derivs
|
853
|
+
end
|
854
|
+
|
855
|
+
# Normal case (length >= 3)
|
856
|
+
|
857
|
+
h2 = x[2] - x[1]
|
858
|
+
del2 = (y[2] - y[1]) / h2
|
859
|
+
|
860
|
+
# SET D(1) VIA NON-CENTERED THREE-POINT FORMULA, ADJUSTED TO BE
|
861
|
+
# SHAPE-PRESERVING.
|
862
|
+
|
863
|
+
hsum = h1 + h2
|
864
|
+
w1 = (h1 + hsum)/hsum
|
865
|
+
w2 = (h1*-1.0)/hsum
|
866
|
+
derivs[0] = (w1*del1) + (w2*del2)
|
867
|
+
if (( pchst(derivs[0], del1) ) <= 0)
|
868
|
+
derivs[0] = @@zero
|
869
|
+
elsif ( pchst(del1, del2) < 0 )
|
870
|
+
# need to do this check only if monotonicity switches
|
871
|
+
dmax = del1 * three
|
872
|
+
if (derivs[0].abs > dmax.abs)
|
873
|
+
derivs[0] = dmax
|
874
|
+
end
|
875
|
+
end
|
876
|
+
|
877
|
+
(1...lengthLess1).to_a.each do |ind|
|
878
|
+
if (ind != 1)
|
879
|
+
h1 = h2
|
880
|
+
h2 = x[ind+1] - x[ind]
|
881
|
+
hsum = h1 + h2
|
882
|
+
del1 = del2
|
883
|
+
del2 = (y[ind+1] - y[ind])/h2
|
884
|
+
end
|
885
|
+
|
886
|
+
derivs[ind] = @@zero
|
887
|
+
|
888
|
+
pchstval = pchst(del1, del2)
|
889
|
+
|
890
|
+
klass = self.class
|
891
|
+
|
892
|
+
if (pchstval > 0)
|
893
|
+
hsumt3 = hsum+hsum+hsum
|
894
|
+
w1 = (hsum + h1)/hsumt3
|
895
|
+
w2 = (hsum + h2)/hsumt3
|
896
|
+
dmax = klass.max( del1.abs, del2.abs )
|
897
|
+
dmin = klass.min( del1.abs, del2.abs )
|
898
|
+
drat1 = del1/dmax
|
899
|
+
drat2 = del2/dmax
|
900
|
+
derivs[ind] = dmin/(w1*drat1 + w2*drat2)
|
901
|
+
elsif (pchstval < 0 )
|
902
|
+
ierr = ierr + 1
|
903
|
+
dsave = del2
|
904
|
+
next
|
905
|
+
else # equal to zero
|
906
|
+
if (del2 == @@zero)
|
907
|
+
next
|
908
|
+
end
|
909
|
+
if (pchst(dsave,del2) < 0)
|
910
|
+
ierr = ierr + 1
|
911
|
+
end
|
912
|
+
dsave = del2
|
913
|
+
next
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
|
918
|
+
w1 = (h2*-1.0)/hsum
|
919
|
+
w2 = (h2 + hsum)/hsum
|
920
|
+
derivs[lengthLess1] = (w1*del1) + (w2*del2)
|
921
|
+
if ( pchst(derivs[lengthLess1], del2) <= 0 )
|
922
|
+
derivs[lengthLess1] = @@zero;
|
923
|
+
elsif ( pchst(del1, del2) < 0)
|
924
|
+
# NEED DO THIS CHECK ONLY IF MONOTONICITY SWITCHES.
|
925
|
+
dmax = three*del2
|
926
|
+
if (derivs[lengthLess1].abs > dmax.abs)
|
927
|
+
derivs[lengthLess1] = dmax
|
928
|
+
end
|
929
|
+
end
|
930
|
+
derivs
|
931
|
+
end
|
932
|
+
|
933
|
+
|
934
|
+
# called as (points, &block)
|
935
|
+
# or (pre, post, &block)
|
936
|
+
# points e.g. 3, means one before, current, and one after)
|
937
|
+
# pre = 1 and post = 1 is 3 points
|
938
|
+
# pre = 2 and post = 2 is 5 points
|
939
|
+
# yields a Vec object with the objects to be acted on and sets the value to
|
940
|
+
# the return value of the block.
|
941
|
+
def transform(*args, &block)
|
942
|
+
(pre, post) =
|
943
|
+
if args.size == 1
|
944
|
+
pre = (args[0] - 1) / 2
|
945
|
+
post = pre
|
946
|
+
[pre, post]
|
947
|
+
elsif args.size == 2
|
948
|
+
args
|
949
|
+
else
|
950
|
+
raise(ArgumentError, "accepts (pre, post, &block), or (points, &block)")
|
951
|
+
end
|
952
|
+
trans = self.class.new(size)
|
953
|
+
last_i = self.size - 1
|
954
|
+
# TODO: could implement with rolling yielded array and be much faster...
|
955
|
+
self.each_with_index do |x,i|
|
956
|
+
start = i - pre
|
957
|
+
stop = i + post
|
958
|
+
start = 0 if start < 0
|
959
|
+
stop = last_i if stop > last_i
|
960
|
+
trans[i] = block.call(self[start..stop])
|
961
|
+
end
|
962
|
+
trans
|
963
|
+
end
|
964
|
+
|
965
|
+
|
966
|
+
private
|
967
|
+
|
968
|
+
def pchst(arg1, arg2)
|
969
|
+
if arg1*arg2 > 0
|
970
|
+
1
|
971
|
+
elsif arg1*arg2 < 0
|
972
|
+
-1
|
973
|
+
else
|
974
|
+
0
|
975
|
+
end
|
976
|
+
end
|
977
|
+
|
978
|
+
# returns float
|
979
|
+
def avg_ints(one, two)
|
980
|
+
(one.to_f + two.to_f)/2.0
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
|
985
|
+
|
986
|
+
|
987
|
+
#class VecD < Vec
|
988
|
+
#end
|
989
|
+
|
990
|
+
#class VecI < Vec
|
991
|
+
#tmp = $VERBOSE ; $VERBOSE = nil
|
992
|
+
#@@zero = 0
|
993
|
+
#$VERBOSE = tmp
|
994
|
+
|
995
|
+
#def to_rep(val)
|
996
|
+
#val.to_i
|
997
|
+
#end
|
998
|
+
#end
|
999
|
+
|
1000
|
+
|
1001
|
+
end
|