daru 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
data/lib/daru/io/io.rb
CHANGED
@@ -58,7 +58,6 @@ module Daru
|
|
58
58
|
end
|
59
59
|
|
60
60
|
# Functions for loading/writing CSV files
|
61
|
-
|
62
61
|
def from_csv path, opts={}
|
63
62
|
opts[:col_sep] ||= ','
|
64
63
|
opts[:converters] ||= :numeric
|
@@ -79,7 +78,7 @@ module Daru
|
|
79
78
|
yield csv if block_given?
|
80
79
|
|
81
80
|
csv_as_arrays = csv.to_a
|
82
|
-
headers = csv_as_arrays[0].recode_repeated.map
|
81
|
+
headers = csv_as_arrays[0].recode_repeated.map
|
83
82
|
csv_as_arrays.delete_at 0
|
84
83
|
csv_as_arrays = csv_as_arrays.transpose
|
85
84
|
|
@@ -64,8 +64,8 @@ module Daru
|
|
64
64
|
|
65
65
|
hsh = {}
|
66
66
|
all_vectors.each do |vector_name|
|
67
|
-
this = self .has_vector?(vector_name) ? self
|
68
|
-
that = other.has_vector?(vector_name) ? other
|
67
|
+
this = self .has_vector?(vector_name) ? self[vector_name] : nil
|
68
|
+
that = other.has_vector?(vector_name) ? other[vector_name] : nil
|
69
69
|
|
70
70
|
if this and that
|
71
71
|
hsh[vector_name] = this.send(operation, that)
|
@@ -68,8 +68,8 @@ module Daru
|
|
68
68
|
index = (@index.to_a | other.index.to_a).sort
|
69
69
|
|
70
70
|
index.each do |idx|
|
71
|
-
this = self[idx]
|
72
|
-
that = other[idx]
|
71
|
+
this = self.index.include?(idx) ? self[idx] : nil
|
72
|
+
that = other.index.include?(idx) ? other[idx] : nil
|
73
73
|
|
74
74
|
if this and that
|
75
75
|
elements << this.send(operation ,that)
|
@@ -23,8 +23,12 @@ module Daru
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# Calculate the maximum value of each numeric vector.
|
26
|
-
def max
|
27
|
-
|
26
|
+
def max opts={}
|
27
|
+
if opts[:vector]
|
28
|
+
self.row[*self[opts[:vector]].max_index.index.to_a]
|
29
|
+
else
|
30
|
+
compute_stats :max
|
31
|
+
end
|
28
32
|
end
|
29
33
|
|
30
34
|
# Calculate the minimmum value of each numeric vector.
|
@@ -37,13 +41,45 @@ module Daru
|
|
37
41
|
compute_stats :product
|
38
42
|
end
|
39
43
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
# @!method cumsum
|
45
|
+
# Calculate cumulative sum of each numeric Vector
|
46
|
+
# @!method standardize
|
47
|
+
# Standardize each Vector
|
48
|
+
# @!method acf
|
49
|
+
# Calculate Autocorrelation coefficient
|
50
|
+
# @param [Integer] max_lags (nil) Number of initial lags
|
51
|
+
# @!method ema
|
52
|
+
# Calculate exponential moving average.
|
53
|
+
# @param [Integer] n (10) Loopback length.
|
54
|
+
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
55
|
+
# 1/n value is used for smoothing; if false, uses 2/(n+1) value.
|
56
|
+
# @!method rolling_mean
|
57
|
+
# Calculate moving averages
|
58
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
59
|
+
# @!method rolling_median
|
60
|
+
# Calculate moving median
|
61
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
62
|
+
# @!method rolling_max
|
63
|
+
# Calculate moving max
|
64
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
65
|
+
# @!method rolling_min
|
66
|
+
# Calculate moving min
|
67
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
68
|
+
# @!method rolling_count
|
69
|
+
# Calculate moving non-missing count
|
70
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
71
|
+
# @!method rolling_std
|
72
|
+
# Calculate moving standard deviation
|
73
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
74
|
+
# @!method rolling_variance
|
75
|
+
# Calculate moving variance
|
76
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
77
|
+
[:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
|
78
|
+
:rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
|
79
|
+
].each do |meth|
|
80
|
+
define_method(meth) do |*args|
|
81
|
+
apply_method_to_numerics meth, *args
|
44
82
|
end
|
45
|
-
|
46
|
-
df
|
47
83
|
end
|
48
84
|
|
49
85
|
# Create a summary of mean, standard deviation, count, max and min of
|
@@ -105,6 +141,20 @@ module Daru
|
|
105
141
|
|
106
142
|
private
|
107
143
|
|
144
|
+
def apply_method_to_numerics method, *args
|
145
|
+
order = []
|
146
|
+
computed = @vectors.to_a.inject([]) do |memo, n|
|
147
|
+
v = @data[@vectors[n]]
|
148
|
+
if v.type == :numeric
|
149
|
+
memo << v.send(method, *args)
|
150
|
+
order << n
|
151
|
+
end
|
152
|
+
memo
|
153
|
+
end
|
154
|
+
|
155
|
+
Daru::DataFrame.new(computed, index: @index, order: order ,clone: false)
|
156
|
+
end
|
157
|
+
|
108
158
|
def vector_cov v1a, v2a
|
109
159
|
sum_of_squares(v1a,v2a) / (v1a.size - 1)
|
110
160
|
end
|
@@ -122,6 +122,16 @@ module Daru
|
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
125
|
+
# Count number of occurences of each value in the Vector
|
126
|
+
def value_counts
|
127
|
+
values = {}
|
128
|
+
@data.each do |d|
|
129
|
+
values[d] ? values[d] += 1 : values[d] = 1
|
130
|
+
end
|
131
|
+
|
132
|
+
Daru::Vector.new(values)
|
133
|
+
end
|
134
|
+
|
125
135
|
def proportion value=1
|
126
136
|
frequencies[value].quo(n_valid).to_f
|
127
137
|
end
|
@@ -354,6 +364,225 @@ module Daru
|
|
354
364
|
end
|
355
365
|
end
|
356
366
|
|
367
|
+
# Performs the difference of the series.
|
368
|
+
# Note: The first difference of series is X(t) - X(t-1)
|
369
|
+
# But, second difference of series is NOT X(t) - X(t-2)
|
370
|
+
# It is the first difference of the first difference
|
371
|
+
# => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
|
372
|
+
#
|
373
|
+
# == Arguments
|
374
|
+
#
|
375
|
+
#* *max_lags*: integer, (default: 1), number of differences reqd.
|
376
|
+
#
|
377
|
+
# @example Using #diff
|
378
|
+
#
|
379
|
+
# ts = Daru::Vector.new((1..10).map { rand })
|
380
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
381
|
+
#
|
382
|
+
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
383
|
+
#
|
384
|
+
# @return [Daru::Vector]
|
385
|
+
def diff(max_lags = 1)
|
386
|
+
ts = self
|
387
|
+
difference = []
|
388
|
+
max_lags.times do
|
389
|
+
difference = ts - ts.lag
|
390
|
+
ts = difference
|
391
|
+
end
|
392
|
+
difference
|
393
|
+
end
|
394
|
+
|
395
|
+
# Calculate the rolling function for a loopback value.
|
396
|
+
#
|
397
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
398
|
+
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
399
|
+
# :min, :max, etc.)
|
400
|
+
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
401
|
+
# @return [Daru::Vector] Vector containin rolling calculations.
|
402
|
+
# @example Using #rolling
|
403
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
404
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
405
|
+
# # first 9 observations are nil
|
406
|
+
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
407
|
+
def rolling function, n=10
|
408
|
+
Daru::Vector.new(
|
409
|
+
[nil] * (n - 1) +
|
410
|
+
(0..(size - n)).map do |i|
|
411
|
+
Daru::Vector.new(@data[i...(i + n)]).send(function)
|
412
|
+
end, index: @index
|
413
|
+
)
|
414
|
+
end
|
415
|
+
|
416
|
+
# @!method rolling_mean
|
417
|
+
# Calculate rolling average
|
418
|
+
# @param [Integer] n (10) Loopback length
|
419
|
+
# @!method rolling_median
|
420
|
+
# Calculate rolling median
|
421
|
+
# @param [Integer] n (10) Loopback length
|
422
|
+
# @!method rolling_count
|
423
|
+
# Calculate rolling non-missing count
|
424
|
+
# @param [Integer] n (10) Loopback length
|
425
|
+
# @!method rolling_max
|
426
|
+
# Calculate rolling max value
|
427
|
+
# @param [Integer] n (10) Loopback length
|
428
|
+
# @!method rolling_min
|
429
|
+
# Calculate rolling min value
|
430
|
+
# @param [Integer] n (10) Loopback length
|
431
|
+
# @!method rolling_sum
|
432
|
+
# Calculate rolling sum
|
433
|
+
# @param [Integer] n (10) Loopback length
|
434
|
+
# @!method rolling_std
|
435
|
+
# Calculate rolling standard deviation
|
436
|
+
# @param [Integer] n (10) Loopback length
|
437
|
+
# @!method rolling_variance
|
438
|
+
# Calculate rolling variance
|
439
|
+
# @param [Integer] n (10) Loopback length
|
440
|
+
[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
|
441
|
+
define_method("rolling_#{meth}".to_sym) do |n=10|
|
442
|
+
rolling(meth, n)
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Exponential Moving Average.
|
447
|
+
# Calculates an exponential moving average of the series using a
|
448
|
+
# specified parameter. If wilder is false (the default) then the EMA
|
449
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
450
|
+
# Welles Wilder smoother of 1 / n.
|
451
|
+
#
|
452
|
+
# Warning for EMA usage: EMAs are unstable for small series, as they
|
453
|
+
# use a lot more than n observations to calculate. The series is stable
|
454
|
+
# if the size of the series is >= 3.45 * (n + 1)
|
455
|
+
#
|
456
|
+
# @param [Integer] n (10) Loopback length.
|
457
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
458
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
459
|
+
#
|
460
|
+
# @example Using ema
|
461
|
+
#
|
462
|
+
# ts = (1..100).map { rand }.to_ts
|
463
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
464
|
+
#
|
465
|
+
# # first 9 observations are nil
|
466
|
+
# ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
|
467
|
+
#
|
468
|
+
# @return [Daru::Vector] Contains EMA
|
469
|
+
def ema(n = 10, wilder = false)
|
470
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
471
|
+
# need to start everything from the first non-nil observation
|
472
|
+
start = @data.index { |i| i != nil }
|
473
|
+
# first n - 1 observations are nil
|
474
|
+
base = [nil] * (start + n - 1)
|
475
|
+
# nth observation is just a moving average
|
476
|
+
base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
|
477
|
+
(start + n).upto size - 1 do |i|
|
478
|
+
base << self[i] * smoother + (1 - smoother) * base.last
|
479
|
+
end
|
480
|
+
|
481
|
+
Daru::Vector.new(base, index: @index)
|
482
|
+
end
|
483
|
+
|
484
|
+
# Moving Average Convergence-Divergence.
|
485
|
+
# Calculates the MACD (moving average convergence-divergence) of the time
|
486
|
+
# series - this is a comparison of a fast EMA with a slow EMA.
|
487
|
+
#
|
488
|
+
# == Arguments
|
489
|
+
#* *fast*: integer, (default = 12) - fast component of MACD
|
490
|
+
#* *slow*: integer, (default = 26) - slow component of MACD
|
491
|
+
#* *signal*: integer, (default = 9) - signal component of MACD
|
492
|
+
#
|
493
|
+
# == Usage
|
494
|
+
#
|
495
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
496
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
497
|
+
# ts.macd(13)
|
498
|
+
#
|
499
|
+
# == Returns
|
500
|
+
#
|
501
|
+
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
502
|
+
# signal value
|
503
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
504
|
+
series = ema(fast) - ema(slow)
|
505
|
+
[series, series.ema(signal)]
|
506
|
+
end
|
507
|
+
|
508
|
+
# Calculates the autocorrelation coefficients of the series.
|
509
|
+
#
|
510
|
+
# The first element is always 1, since that is the correlation
|
511
|
+
# of the series with itself.
|
512
|
+
#
|
513
|
+
# @example
|
514
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
515
|
+
#
|
516
|
+
# ts.acf # => array with first 21 autocorrelations
|
517
|
+
# ts.acf 3 # => array with first 3 autocorrelations
|
518
|
+
def acf(max_lags = nil)
|
519
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
520
|
+
|
521
|
+
(0..max_lags).map do |i|
|
522
|
+
if i == 0
|
523
|
+
1.0
|
524
|
+
else
|
525
|
+
m = self.mean
|
526
|
+
# can't use Pearson coefficient since the mean for the lagged series should
|
527
|
+
# be the same as the regular series
|
528
|
+
((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
|
529
|
+
end
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
# Provides autocovariance.
|
534
|
+
#
|
535
|
+
# == Options
|
536
|
+
#
|
537
|
+
#* *:demean* = true; optional. Supply false if series is not to be demeaned
|
538
|
+
#* *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
539
|
+
#
|
540
|
+
# == Returns
|
541
|
+
#
|
542
|
+
# Autocovariance value
|
543
|
+
def acvf(demean = true, unbiased = true)
|
544
|
+
opts = {
|
545
|
+
demean: true,
|
546
|
+
unbaised: true
|
547
|
+
}.merge(opts)
|
548
|
+
|
549
|
+
demean = opts[:demean]
|
550
|
+
unbiased = opts[:unbiased]
|
551
|
+
if demean
|
552
|
+
demeaned_series = self - self.mean
|
553
|
+
else
|
554
|
+
demeaned_series = self
|
555
|
+
end
|
556
|
+
|
557
|
+
n = (10 * Math.log10(size)).to_i + 1
|
558
|
+
m = self.mean
|
559
|
+
if unbiased
|
560
|
+
d = Array.new(self.size, self.size)
|
561
|
+
else
|
562
|
+
d = ((1..self.size).to_a.reverse)[0..n]
|
563
|
+
end
|
564
|
+
|
565
|
+
0.upto(n - 1).map do |i|
|
566
|
+
(demeaned_series * (self.lag(i) - m)).sum / d[i]
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
570
|
+
# Calculate cumulative sum of Vector
|
571
|
+
def cumsum
|
572
|
+
result = []
|
573
|
+
acc = 0
|
574
|
+
@data.each do |d|
|
575
|
+
if @missing_values.has_key?(d)
|
576
|
+
result << nil
|
577
|
+
else
|
578
|
+
acc += d
|
579
|
+
result << acc
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
Daru::Vector.new(result, index: @index)
|
584
|
+
end
|
585
|
+
|
357
586
|
alias :sdp :standard_deviation_population
|
358
587
|
alias :sds :standard_deviation_sample
|
359
588
|
alias :std :sds
|
data/lib/daru/vector.rb
CHANGED
@@ -103,7 +103,7 @@ module Daru
|
|
103
103
|
set_name name
|
104
104
|
|
105
105
|
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
106
|
-
@index =
|
106
|
+
@index = try_create_index(index || @data.size)
|
107
107
|
|
108
108
|
if @index.size > @data.size
|
109
109
|
cast(dtype: :array) # NM with nils seg faults
|
@@ -200,32 +200,22 @@ module Daru
|
|
200
200
|
# # For vectors employing hierarchial multi index
|
201
201
|
#
|
202
202
|
def [](*indexes)
|
203
|
-
indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
204
203
|
location = indexes[0]
|
205
204
|
if @index.is_a?(MultiIndex)
|
205
|
+
sub_index = @index[indexes]
|
206
206
|
result =
|
207
|
-
if
|
208
|
-
|
209
|
-
elsif location.is_a?(Range)
|
210
|
-
arry = location.inject([]) do |memo, num|
|
211
|
-
memo << element_from_numeric_index(num)
|
212
|
-
memo
|
213
|
-
end
|
214
|
-
|
215
|
-
new_index = Daru::MultiIndex.new(@index.to_a[location])
|
216
|
-
Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
|
207
|
+
if sub_index.is_a?(Integer)
|
208
|
+
@data[sub_index]
|
217
209
|
else
|
218
|
-
|
210
|
+
elements = sub_index.map do |tuple|
|
211
|
+
@data[@index[tuple]]
|
212
|
+
end
|
219
213
|
|
220
|
-
if
|
221
|
-
|
222
|
-
else
|
223
|
-
elements = sub_index.map do |tuple|
|
224
|
-
@data[@index[(indexes + tuple)]]
|
225
|
-
end
|
226
|
-
Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
|
227
|
-
name: @name, dtype: @dtype)
|
214
|
+
if !indexes[0].is_a?(Range) and indexes.size < @index.width
|
215
|
+
sub_index = sub_index.drop_left_level indexes.size
|
228
216
|
end
|
217
|
+
Daru::Vector.new(
|
218
|
+
elements, index: sub_index, name: @name, dtype: @dtype)
|
229
219
|
end
|
230
220
|
|
231
221
|
return result
|
@@ -233,23 +223,24 @@ module Daru
|
|
233
223
|
unless indexes[1]
|
234
224
|
case location
|
235
225
|
when Range
|
236
|
-
|
237
|
-
|
238
|
-
|
226
|
+
first = location.first
|
227
|
+
last = location.last
|
228
|
+
indexes = @index.slice first, last
|
229
|
+
else
|
230
|
+
pos = @index[location]
|
231
|
+
if pos.is_a?(Numeric)
|
232
|
+
return @data[pos]
|
239
233
|
else
|
240
|
-
|
241
|
-
last = location.last
|
242
|
-
|
243
|
-
(first..last)
|
234
|
+
indexes = pos
|
244
235
|
end
|
245
|
-
indexes = @index[range]
|
246
|
-
else
|
247
|
-
return element_from_numeric_index(location)
|
248
236
|
end
|
237
|
+
else
|
238
|
+
indexes = indexes.map { |e| named_index_for(e) }
|
249
239
|
end
|
250
240
|
|
251
|
-
Daru::Vector.new
|
252
|
-
|
241
|
+
Daru::Vector.new(
|
242
|
+
indexes.map { |loc| @data[@index[loc]] },
|
243
|
+
name: @name, index: indexes, dtype: @dtype)
|
253
244
|
end
|
254
245
|
end
|
255
246
|
|
@@ -274,19 +265,13 @@ module Daru
|
|
274
265
|
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
|
275
266
|
!value.nil?)
|
276
267
|
|
277
|
-
|
278
|
-
|
279
|
-
index_for location
|
280
|
-
else
|
281
|
-
index_for location[0]
|
282
|
-
end
|
268
|
+
location = location[0] unless @index.is_a?(MultiIndex)
|
269
|
+
pos = @index[location]
|
283
270
|
|
284
|
-
if pos.is_a?(
|
285
|
-
pos.each do |sub_tuple|
|
286
|
-
self[*(location + sub_tuple)] = value
|
287
|
-
end
|
288
|
-
else
|
271
|
+
if pos.is_a?(Numeric)
|
289
272
|
@data[pos] = value
|
273
|
+
else
|
274
|
+
pos.each { |tuple| self[tuple] = value }
|
290
275
|
end
|
291
276
|
|
292
277
|
set_size
|
@@ -334,16 +319,132 @@ module Daru
|
|
334
319
|
self[index] == other[index]
|
335
320
|
end
|
336
321
|
else
|
337
|
-
|
322
|
+
super
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# !@method eq
|
327
|
+
# Uses `==` and returns `true` for each **equal** entry
|
328
|
+
# @param [#==, Daru::Vector] If scalar object, compares it with each
|
329
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
330
|
+
# @example (see #where)
|
331
|
+
# !@method not_eq
|
332
|
+
# Uses `!=` and returns `true` for each **unequal** entry
|
333
|
+
# @param [#!=, Daru::Vector] If scalar object, compares it with each
|
334
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
335
|
+
# @example (see #where)
|
336
|
+
# !@method lt
|
337
|
+
# Uses `<` and returns `true` for each entry **less than** the supplied object
|
338
|
+
# @param [#<, Daru::Vector] If scalar object, compares it with each
|
339
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
340
|
+
# @example (see #where)
|
341
|
+
# !@method lteq
|
342
|
+
# Uses `<=` and returns `true` for each entry **less than or equal to** the supplied object
|
343
|
+
# @param [#<=, Daru::Vector] If scalar object, compares it with each
|
344
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
345
|
+
# @example (see #where)
|
346
|
+
# !@method mt
|
347
|
+
# Uses `>` and returns `true` for each entry **more than** the supplied object
|
348
|
+
# @param [#>, Daru::Vector] If scalar object, compares it with each
|
349
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
350
|
+
# @example (see #where)
|
351
|
+
# !@method mteq
|
352
|
+
# Uses `>=` and returns `true` for each entry **more than or equal to** the supplied object
|
353
|
+
# @param [#>=, Daru::Vector] If scalar object, compares it with each
|
354
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
355
|
+
# @example (see #where)
|
356
|
+
|
357
|
+
# Define the comparator methods with metaprogramming. See documentation
|
358
|
+
# written above for functionality of each method. Use these methods with the
|
359
|
+
# `where` method to obtain the corresponding Vector/DataFrame.
|
360
|
+
{
|
361
|
+
:eq => :==,
|
362
|
+
:not_eq => :!=,
|
363
|
+
:lt => :<,
|
364
|
+
:lteq => :<=,
|
365
|
+
:mt => :>,
|
366
|
+
:mteq => :>=,
|
367
|
+
}.each do |method, operator|
|
368
|
+
define_method(method) do |other|
|
369
|
+
mod = Daru::Core::Query
|
370
|
+
if other.is_a?(Daru::Vector)
|
371
|
+
mod.apply_vector_operator operator, self, other
|
372
|
+
else
|
373
|
+
mod.apply_scalar_operator operator, @data,other
|
374
|
+
end
|
338
375
|
end
|
339
376
|
end
|
377
|
+
alias :gt :mt
|
378
|
+
alias :gteq :mteq
|
379
|
+
|
380
|
+
# Comparator for checking if any of the elements in *other* exist in self.
|
381
|
+
#
|
382
|
+
# @param [Array, Daru::Vector] other A collection which has elements that
|
383
|
+
# need to be checked for in self.
|
384
|
+
# @example Usage of `in`.
|
385
|
+
# vector = Daru::Vector.new([1,2,3,4,5])
|
386
|
+
# vector.where(vector.in([3,5]))
|
387
|
+
# #=>
|
388
|
+
# ##<Daru::Vector:82215960 @name = nil @size = 2 >
|
389
|
+
# # nil
|
390
|
+
# # 2 3
|
391
|
+
# # 4 5
|
392
|
+
def in other
|
393
|
+
other = Hash[other.zip(Array.new(other.size, 0))]
|
394
|
+
Daru::Core::Query::BoolArray.new(
|
395
|
+
@data.inject([]) do |memo, d|
|
396
|
+
memo << (other.has_key?(d) ? true : false)
|
397
|
+
memo
|
398
|
+
end
|
399
|
+
)
|
400
|
+
end
|
401
|
+
|
402
|
+
# Return a new vector based on the contents of a boolean array. Use with the
|
403
|
+
# comparator methods to obtain meaningful results. See this notebook for
|
404
|
+
# a good overview of using #where.
|
405
|
+
#
|
406
|
+
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
|
407
|
+
# collection containing the true of false values. Each element in the Vector
|
408
|
+
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
409
|
+
# index.
|
410
|
+
# @exmaple Usage of #where.
|
411
|
+
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
412
|
+
#
|
413
|
+
# # Simple logic statement passed to #where.
|
414
|
+
# vector.where(vector.eq(5).or(vector.eq(1)))
|
415
|
+
# # =>
|
416
|
+
# ##<Daru::Vector:77626210 @name = nil @size = 7 >
|
417
|
+
# # nil
|
418
|
+
# # 2 5
|
419
|
+
# # 4 5
|
420
|
+
# # 7 5
|
421
|
+
# # 10 1
|
422
|
+
# # 11 5
|
423
|
+
# # 13 5
|
424
|
+
# # 15 1
|
425
|
+
#
|
426
|
+
# # A somewhat more complex logic statement
|
427
|
+
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
428
|
+
# #=>
|
429
|
+
# ##<Daru::Vector:81072310 @name = nil @size = 7 >
|
430
|
+
# # nil
|
431
|
+
# # 2 5
|
432
|
+
# # 4 5
|
433
|
+
# # 7 5
|
434
|
+
# # 10 1
|
435
|
+
# # 11 5
|
436
|
+
# # 13 5
|
437
|
+
# # 15 1
|
438
|
+
def where bool_arry
|
439
|
+
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, self.dtype
|
440
|
+
end
|
340
441
|
|
341
442
|
def head q=10
|
342
443
|
self[0..(q-1)]
|
343
444
|
end
|
344
445
|
|
345
446
|
def tail q=10
|
346
|
-
self[(@size - q
|
447
|
+
self[(@size - q)..(@size-1)]
|
347
448
|
end
|
348
449
|
|
349
450
|
# Reports whether missing data is present in the Vector.
|
@@ -354,20 +455,12 @@ module Daru
|
|
354
455
|
|
355
456
|
|
356
457
|
# Append an element to the vector by specifying the element and index
|
357
|
-
def concat element, index
|
458
|
+
def concat element, index
|
358
459
|
raise IndexError, "Expected new unique index" if @index.include? index
|
359
460
|
|
360
|
-
|
361
|
-
@index = create_index(@size + 1)
|
362
|
-
index = @size
|
363
|
-
else
|
364
|
-
begin
|
365
|
-
@index = create_index(@index + index)
|
366
|
-
rescue StandardError => e
|
367
|
-
raise e, "Expected valid index."
|
368
|
-
end
|
369
|
-
end
|
461
|
+
@index = @index | [index]
|
370
462
|
@data[@index[index]] = element
|
463
|
+
|
371
464
|
set_size
|
372
465
|
set_missing_positions unless Daru.lazy_update
|
373
466
|
end
|
@@ -394,14 +487,8 @@ module Daru
|
|
394
487
|
|
395
488
|
# Delete element by index
|
396
489
|
def delete_at index
|
397
|
-
|
398
|
-
@
|
399
|
-
|
400
|
-
if @index.index_class == Integer
|
401
|
-
@index = Daru::Index.new @size-1
|
402
|
-
else
|
403
|
-
@index = Daru::Index.new (@index.to_a - [idx])
|
404
|
-
end
|
490
|
+
@data.delete_at @index[index]
|
491
|
+
@index = Daru::Index.new(@index.to_a - [index])
|
405
492
|
|
406
493
|
set_size
|
407
494
|
set_missing_positions unless Daru.lazy_update
|
@@ -482,9 +569,9 @@ module Daru
|
|
482
569
|
|
483
570
|
order = opts[:ascending] ? :ascending : :descending
|
484
571
|
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
|
485
|
-
index =
|
572
|
+
index = Daru::Index.new index
|
486
573
|
|
487
|
-
Daru::Vector.new(vector, index:
|
574
|
+
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
488
575
|
end
|
489
576
|
|
490
577
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
@@ -515,9 +602,31 @@ module Daru
|
|
515
602
|
self
|
516
603
|
end
|
517
604
|
|
605
|
+
# Delete an element if block returns true. Destructive.
|
518
606
|
def delete_if &block
|
519
607
|
return to_enum(:delete_if) unless block_given?
|
520
608
|
|
609
|
+
keep_e = []
|
610
|
+
keep_i = []
|
611
|
+
each_with_index do |n, i|
|
612
|
+
unless yield(n)
|
613
|
+
keep_e << n
|
614
|
+
keep_i << i
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
618
|
+
@data = cast_vector_to @dtype, keep_e
|
619
|
+
@index = Daru::Index.new(keep_i)
|
620
|
+
set_missing_positions unless Daru.lazy_update
|
621
|
+
set_size
|
622
|
+
|
623
|
+
self
|
624
|
+
end
|
625
|
+
|
626
|
+
# Keep an element if block returns true. Destructive.
|
627
|
+
def keep_if &block
|
628
|
+
return to_enum(:keep_if) unless block_given?
|
629
|
+
|
521
630
|
keep_e = []
|
522
631
|
keep_i = []
|
523
632
|
each_with_index do |n, i|
|
@@ -528,7 +637,7 @@ module Daru
|
|
528
637
|
end
|
529
638
|
|
530
639
|
@data = cast_vector_to @dtype, keep_e
|
531
|
-
@index =
|
640
|
+
@index = Daru::Index.new(keep_i)
|
532
641
|
set_missing_positions unless Daru.lazy_update
|
533
642
|
set_size
|
534
643
|
|
@@ -664,10 +773,33 @@ module Daru
|
|
664
773
|
self
|
665
774
|
end
|
666
775
|
|
776
|
+
# Lags the series by k periods.
|
777
|
+
#
|
778
|
+
# The convention is to set the oldest observations (the first ones
|
779
|
+
# in the series) to nil so that the size of the lagged series is the
|
780
|
+
# same as the original.
|
781
|
+
#
|
782
|
+
# Usage:
|
783
|
+
#
|
784
|
+
# ts = Daru::Vector.new((1..10).map { rand })
|
785
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
786
|
+
#
|
787
|
+
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
788
|
+
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
789
|
+
def lag k=1
|
790
|
+
return self.dup if k == 0
|
791
|
+
|
792
|
+
dat = @data.to_a.dup
|
793
|
+
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
794
|
+
(0...k).each { |i| dat[i] = nil }
|
795
|
+
|
796
|
+
Daru::Vector.new(dat, index: @index, name: @name)
|
797
|
+
end
|
798
|
+
|
667
799
|
def detach_index
|
668
800
|
Daru::DataFrame.new({
|
669
|
-
index: @index.to_a
|
670
|
-
|
801
|
+
index: @index.to_a,
|
802
|
+
values: @data.to_a
|
671
803
|
})
|
672
804
|
end
|
673
805
|
|
@@ -826,13 +958,31 @@ module Daru
|
|
826
958
|
content
|
827
959
|
end
|
828
960
|
|
829
|
-
# Create a new vector with a different index
|
830
|
-
#
|
831
|
-
# @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
|
832
|
-
# will reindex with sequential numbers from 0 to (n-1).
|
961
|
+
# Create a new vector with a different index, and preserve the indexing of
|
962
|
+
# current elements.
|
833
963
|
def reindex new_index
|
834
|
-
|
835
|
-
|
964
|
+
vector = Daru::Vector.new([], index: new_index, name: @name)
|
965
|
+
|
966
|
+
new_index.each do |idx|
|
967
|
+
if @index.include?(idx)
|
968
|
+
vector[idx] = self[idx]
|
969
|
+
else
|
970
|
+
vector[idx] = nil
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
vector
|
975
|
+
end
|
976
|
+
|
977
|
+
def index= idx
|
978
|
+
raise ArgumentError,
|
979
|
+
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
980
|
+
idx.size != self.size
|
981
|
+
raise ArgumentError, "Can only assign type Index and its subclasses." unless
|
982
|
+
idx.kind_of?(Daru::Index)
|
983
|
+
|
984
|
+
@index = idx
|
985
|
+
self
|
836
986
|
end
|
837
987
|
|
838
988
|
# Give the vector a new name
|
@@ -844,7 +994,7 @@ module Daru
|
|
844
994
|
return
|
845
995
|
end
|
846
996
|
|
847
|
-
@name = new_name
|
997
|
+
@name = new_name
|
848
998
|
end
|
849
999
|
|
850
1000
|
# Duplicate elements and indexes
|
@@ -1166,8 +1316,8 @@ module Daru
|
|
1166
1316
|
def set_name name
|
1167
1317
|
@name =
|
1168
1318
|
if name.is_a?(Numeric) then name
|
1169
|
-
elsif name.is_a?(Array) then name.join
|
1170
|
-
elsif name then name
|
1319
|
+
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
|
1320
|
+
elsif name then name # anything but Numeric or nil
|
1171
1321
|
else
|
1172
1322
|
nil
|
1173
1323
|
end
|
@@ -1180,7 +1330,7 @@ module Daru
|
|
1180
1330
|
end
|
1181
1331
|
end
|
1182
1332
|
|
1183
|
-
def
|
1333
|
+
def try_create_index potential_index
|
1184
1334
|
if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
|
1185
1335
|
potential_index
|
1186
1336
|
else
|