daru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
data/lib/daru/io/io.rb
CHANGED
@@ -58,7 +58,6 @@ module Daru
|
|
58
58
|
end
|
59
59
|
|
60
60
|
# Functions for loading/writing CSV files
|
61
|
-
|
62
61
|
def from_csv path, opts={}
|
63
62
|
opts[:col_sep] ||= ','
|
64
63
|
opts[:converters] ||= :numeric
|
@@ -79,7 +78,7 @@ module Daru
|
|
79
78
|
yield csv if block_given?
|
80
79
|
|
81
80
|
csv_as_arrays = csv.to_a
|
82
|
-
headers = csv_as_arrays[0].recode_repeated.map
|
81
|
+
headers = csv_as_arrays[0].recode_repeated.map
|
83
82
|
csv_as_arrays.delete_at 0
|
84
83
|
csv_as_arrays = csv_as_arrays.transpose
|
85
84
|
|
@@ -64,8 +64,8 @@ module Daru
|
|
64
64
|
|
65
65
|
hsh = {}
|
66
66
|
all_vectors.each do |vector_name|
|
67
|
-
this = self .has_vector?(vector_name) ? self
|
68
|
-
that = other.has_vector?(vector_name) ? other
|
67
|
+
this = self .has_vector?(vector_name) ? self[vector_name] : nil
|
68
|
+
that = other.has_vector?(vector_name) ? other[vector_name] : nil
|
69
69
|
|
70
70
|
if this and that
|
71
71
|
hsh[vector_name] = this.send(operation, that)
|
@@ -68,8 +68,8 @@ module Daru
|
|
68
68
|
index = (@index.to_a | other.index.to_a).sort
|
69
69
|
|
70
70
|
index.each do |idx|
|
71
|
-
this = self[idx]
|
72
|
-
that = other[idx]
|
71
|
+
this = self.index.include?(idx) ? self[idx] : nil
|
72
|
+
that = other.index.include?(idx) ? other[idx] : nil
|
73
73
|
|
74
74
|
if this and that
|
75
75
|
elements << this.send(operation ,that)
|
@@ -23,8 +23,12 @@ module Daru
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# Calculate the maximum value of each numeric vector.
|
26
|
-
def max
|
27
|
-
|
26
|
+
def max opts={}
|
27
|
+
if opts[:vector]
|
28
|
+
self.row[*self[opts[:vector]].max_index.index.to_a]
|
29
|
+
else
|
30
|
+
compute_stats :max
|
31
|
+
end
|
28
32
|
end
|
29
33
|
|
30
34
|
# Calculate the minimmum value of each numeric vector.
|
@@ -37,13 +41,45 @@ module Daru
|
|
37
41
|
compute_stats :product
|
38
42
|
end
|
39
43
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
# @!method cumsum
|
45
|
+
# Calculate cumulative sum of each numeric Vector
|
46
|
+
# @!method standardize
|
47
|
+
# Standardize each Vector
|
48
|
+
# @!method acf
|
49
|
+
# Calculate Autocorrelation coefficient
|
50
|
+
# @param [Integer] max_lags (nil) Number of initial lags
|
51
|
+
# @!method ema
|
52
|
+
# Calculate exponential moving average.
|
53
|
+
# @param [Integer] n (10) Loopback length.
|
54
|
+
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
55
|
+
# 1/n value is used for smoothing; if false, uses 2/(n+1) value.
|
56
|
+
# @!method rolling_mean
|
57
|
+
# Calculate moving averages
|
58
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
59
|
+
# @!method rolling_median
|
60
|
+
# Calculate moving median
|
61
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
62
|
+
# @!method rolling_max
|
63
|
+
# Calculate moving max
|
64
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
65
|
+
# @!method rolling_min
|
66
|
+
# Calculate moving min
|
67
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
68
|
+
# @!method rolling_count
|
69
|
+
# Calculate moving non-missing count
|
70
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
71
|
+
# @!method rolling_std
|
72
|
+
# Calculate moving standard deviation
|
73
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
74
|
+
# @!method rolling_variance
|
75
|
+
# Calculate moving variance
|
76
|
+
# @param [Integer] n (10) Loopback length. Default to 10.
|
77
|
+
[:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
|
78
|
+
:rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
|
79
|
+
].each do |meth|
|
80
|
+
define_method(meth) do |*args|
|
81
|
+
apply_method_to_numerics meth, *args
|
44
82
|
end
|
45
|
-
|
46
|
-
df
|
47
83
|
end
|
48
84
|
|
49
85
|
# Create a summary of mean, standard deviation, count, max and min of
|
@@ -105,6 +141,20 @@ module Daru
|
|
105
141
|
|
106
142
|
private
|
107
143
|
|
144
|
+
def apply_method_to_numerics method, *args
|
145
|
+
order = []
|
146
|
+
computed = @vectors.to_a.inject([]) do |memo, n|
|
147
|
+
v = @data[@vectors[n]]
|
148
|
+
if v.type == :numeric
|
149
|
+
memo << v.send(method, *args)
|
150
|
+
order << n
|
151
|
+
end
|
152
|
+
memo
|
153
|
+
end
|
154
|
+
|
155
|
+
Daru::DataFrame.new(computed, index: @index, order: order ,clone: false)
|
156
|
+
end
|
157
|
+
|
108
158
|
def vector_cov v1a, v2a
|
109
159
|
sum_of_squares(v1a,v2a) / (v1a.size - 1)
|
110
160
|
end
|
@@ -122,6 +122,16 @@ module Daru
|
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
125
|
+
# Count number of occurences of each value in the Vector
|
126
|
+
def value_counts
|
127
|
+
values = {}
|
128
|
+
@data.each do |d|
|
129
|
+
values[d] ? values[d] += 1 : values[d] = 1
|
130
|
+
end
|
131
|
+
|
132
|
+
Daru::Vector.new(values)
|
133
|
+
end
|
134
|
+
|
125
135
|
def proportion value=1
|
126
136
|
frequencies[value].quo(n_valid).to_f
|
127
137
|
end
|
@@ -354,6 +364,225 @@ module Daru
|
|
354
364
|
end
|
355
365
|
end
|
356
366
|
|
367
|
+
# Performs the difference of the series.
|
368
|
+
# Note: The first difference of series is X(t) - X(t-1)
|
369
|
+
# But, second difference of series is NOT X(t) - X(t-2)
|
370
|
+
# It is the first difference of the first difference
|
371
|
+
# => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
|
372
|
+
#
|
373
|
+
# == Arguments
|
374
|
+
#
|
375
|
+
#* *max_lags*: integer, (default: 1), number of differences reqd.
|
376
|
+
#
|
377
|
+
# @example Using #diff
|
378
|
+
#
|
379
|
+
# ts = Daru::Vector.new((1..10).map { rand })
|
380
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
381
|
+
#
|
382
|
+
# ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
|
383
|
+
#
|
384
|
+
# @return [Daru::Vector]
|
385
|
+
def diff(max_lags = 1)
|
386
|
+
ts = self
|
387
|
+
difference = []
|
388
|
+
max_lags.times do
|
389
|
+
difference = ts - ts.lag
|
390
|
+
ts = difference
|
391
|
+
end
|
392
|
+
difference
|
393
|
+
end
|
394
|
+
|
395
|
+
# Calculate the rolling function for a loopback value.
|
396
|
+
#
|
397
|
+
# @param [Symbol] function The rolling function to be applied. Can be
|
398
|
+
# any function applicatble to Daru::Vector (:mean, :median, :count,
|
399
|
+
# :min, :max, etc.)
|
400
|
+
# @param [Integer] n (10) A non-negative value which serves as the loopback length.
|
401
|
+
# @return [Daru::Vector] Vector containin rolling calculations.
|
402
|
+
# @example Using #rolling
|
403
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
404
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
405
|
+
# # first 9 observations are nil
|
406
|
+
# ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
|
407
|
+
def rolling function, n=10
|
408
|
+
Daru::Vector.new(
|
409
|
+
[nil] * (n - 1) +
|
410
|
+
(0..(size - n)).map do |i|
|
411
|
+
Daru::Vector.new(@data[i...(i + n)]).send(function)
|
412
|
+
end, index: @index
|
413
|
+
)
|
414
|
+
end
|
415
|
+
|
416
|
+
# @!method rolling_mean
|
417
|
+
# Calculate rolling average
|
418
|
+
# @param [Integer] n (10) Loopback length
|
419
|
+
# @!method rolling_median
|
420
|
+
# Calculate rolling median
|
421
|
+
# @param [Integer] n (10) Loopback length
|
422
|
+
# @!method rolling_count
|
423
|
+
# Calculate rolling non-missing count
|
424
|
+
# @param [Integer] n (10) Loopback length
|
425
|
+
# @!method rolling_max
|
426
|
+
# Calculate rolling max value
|
427
|
+
# @param [Integer] n (10) Loopback length
|
428
|
+
# @!method rolling_min
|
429
|
+
# Calculate rolling min value
|
430
|
+
# @param [Integer] n (10) Loopback length
|
431
|
+
# @!method rolling_sum
|
432
|
+
# Calculate rolling sum
|
433
|
+
# @param [Integer] n (10) Loopback length
|
434
|
+
# @!method rolling_std
|
435
|
+
# Calculate rolling standard deviation
|
436
|
+
# @param [Integer] n (10) Loopback length
|
437
|
+
# @!method rolling_variance
|
438
|
+
# Calculate rolling variance
|
439
|
+
# @param [Integer] n (10) Loopback length
|
440
|
+
[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
|
441
|
+
define_method("rolling_#{meth}".to_sym) do |n=10|
|
442
|
+
rolling(meth, n)
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# Exponential Moving Average.
|
447
|
+
# Calculates an exponential moving average of the series using a
|
448
|
+
# specified parameter. If wilder is false (the default) then the EMA
|
449
|
+
# uses a smoothing value of 2 / (n + 1), if it is true then it uses the
|
450
|
+
# Welles Wilder smoother of 1 / n.
|
451
|
+
#
|
452
|
+
# Warning for EMA usage: EMAs are unstable for small series, as they
|
453
|
+
# use a lot more than n observations to calculate. The series is stable
|
454
|
+
# if the size of the series is >= 3.45 * (n + 1)
|
455
|
+
#
|
456
|
+
# @param [Integer] n (10) Loopback length.
|
457
|
+
# @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
|
458
|
+
# used for smoothing; if false, uses 2/(n+1) value
|
459
|
+
#
|
460
|
+
# @example Using ema
|
461
|
+
#
|
462
|
+
# ts = (1..100).map { rand }.to_ts
|
463
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
464
|
+
#
|
465
|
+
# # first 9 observations are nil
|
466
|
+
# ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
|
467
|
+
#
|
468
|
+
# @return [Daru::Vector] Contains EMA
|
469
|
+
def ema(n = 10, wilder = false)
|
470
|
+
smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
|
471
|
+
# need to start everything from the first non-nil observation
|
472
|
+
start = @data.index { |i| i != nil }
|
473
|
+
# first n - 1 observations are nil
|
474
|
+
base = [nil] * (start + n - 1)
|
475
|
+
# nth observation is just a moving average
|
476
|
+
base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
|
477
|
+
(start + n).upto size - 1 do |i|
|
478
|
+
base << self[i] * smoother + (1 - smoother) * base.last
|
479
|
+
end
|
480
|
+
|
481
|
+
Daru::Vector.new(base, index: @index)
|
482
|
+
end
|
483
|
+
|
484
|
+
# Moving Average Convergence-Divergence.
|
485
|
+
# Calculates the MACD (moving average convergence-divergence) of the time
|
486
|
+
# series - this is a comparison of a fast EMA with a slow EMA.
|
487
|
+
#
|
488
|
+
# == Arguments
|
489
|
+
#* *fast*: integer, (default = 12) - fast component of MACD
|
490
|
+
#* *slow*: integer, (default = 26) - slow component of MACD
|
491
|
+
#* *signal*: integer, (default = 9) - signal component of MACD
|
492
|
+
#
|
493
|
+
# == Usage
|
494
|
+
#
|
495
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
496
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
497
|
+
# ts.macd(13)
|
498
|
+
#
|
499
|
+
# == Returns
|
500
|
+
#
|
501
|
+
# Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
|
502
|
+
# signal value
|
503
|
+
def macd(fast = 12, slow = 26, signal = 9)
|
504
|
+
series = ema(fast) - ema(slow)
|
505
|
+
[series, series.ema(signal)]
|
506
|
+
end
|
507
|
+
|
508
|
+
# Calculates the autocorrelation coefficients of the series.
|
509
|
+
#
|
510
|
+
# The first element is always 1, since that is the correlation
|
511
|
+
# of the series with itself.
|
512
|
+
#
|
513
|
+
# @example
|
514
|
+
# ts = Daru::Vector.new((1..100).map { rand })
|
515
|
+
#
|
516
|
+
# ts.acf # => array with first 21 autocorrelations
|
517
|
+
# ts.acf 3 # => array with first 3 autocorrelations
|
518
|
+
def acf(max_lags = nil)
|
519
|
+
max_lags ||= (10 * Math.log10(size)).to_i
|
520
|
+
|
521
|
+
(0..max_lags).map do |i|
|
522
|
+
if i == 0
|
523
|
+
1.0
|
524
|
+
else
|
525
|
+
m = self.mean
|
526
|
+
# can't use Pearson coefficient since the mean for the lagged series should
|
527
|
+
# be the same as the regular series
|
528
|
+
((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
|
529
|
+
end
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
# Provides autocovariance.
|
534
|
+
#
|
535
|
+
# == Options
|
536
|
+
#
|
537
|
+
#* *:demean* = true; optional. Supply false if series is not to be demeaned
|
538
|
+
#* *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
|
539
|
+
#
|
540
|
+
# == Returns
|
541
|
+
#
|
542
|
+
# Autocovariance value
|
543
|
+
def acvf(demean = true, unbiased = true)
|
544
|
+
opts = {
|
545
|
+
demean: true,
|
546
|
+
unbaised: true
|
547
|
+
}.merge(opts)
|
548
|
+
|
549
|
+
demean = opts[:demean]
|
550
|
+
unbiased = opts[:unbiased]
|
551
|
+
if demean
|
552
|
+
demeaned_series = self - self.mean
|
553
|
+
else
|
554
|
+
demeaned_series = self
|
555
|
+
end
|
556
|
+
|
557
|
+
n = (10 * Math.log10(size)).to_i + 1
|
558
|
+
m = self.mean
|
559
|
+
if unbiased
|
560
|
+
d = Array.new(self.size, self.size)
|
561
|
+
else
|
562
|
+
d = ((1..self.size).to_a.reverse)[0..n]
|
563
|
+
end
|
564
|
+
|
565
|
+
0.upto(n - 1).map do |i|
|
566
|
+
(demeaned_series * (self.lag(i) - m)).sum / d[i]
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
570
|
+
# Calculate cumulative sum of Vector
|
571
|
+
def cumsum
|
572
|
+
result = []
|
573
|
+
acc = 0
|
574
|
+
@data.each do |d|
|
575
|
+
if @missing_values.has_key?(d)
|
576
|
+
result << nil
|
577
|
+
else
|
578
|
+
acc += d
|
579
|
+
result << acc
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
Daru::Vector.new(result, index: @index)
|
584
|
+
end
|
585
|
+
|
357
586
|
alias :sdp :standard_deviation_population
|
358
587
|
alias :sds :standard_deviation_sample
|
359
588
|
alias :std :sds
|
data/lib/daru/vector.rb
CHANGED
@@ -103,7 +103,7 @@ module Daru
|
|
103
103
|
set_name name
|
104
104
|
|
105
105
|
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
106
|
-
@index =
|
106
|
+
@index = try_create_index(index || @data.size)
|
107
107
|
|
108
108
|
if @index.size > @data.size
|
109
109
|
cast(dtype: :array) # NM with nils seg faults
|
@@ -200,32 +200,22 @@ module Daru
|
|
200
200
|
# # For vectors employing hierarchial multi index
|
201
201
|
#
|
202
202
|
def [](*indexes)
|
203
|
-
indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
204
203
|
location = indexes[0]
|
205
204
|
if @index.is_a?(MultiIndex)
|
205
|
+
sub_index = @index[indexes]
|
206
206
|
result =
|
207
|
-
if
|
208
|
-
|
209
|
-
elsif location.is_a?(Range)
|
210
|
-
arry = location.inject([]) do |memo, num|
|
211
|
-
memo << element_from_numeric_index(num)
|
212
|
-
memo
|
213
|
-
end
|
214
|
-
|
215
|
-
new_index = Daru::MultiIndex.new(@index.to_a[location])
|
216
|
-
Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
|
207
|
+
if sub_index.is_a?(Integer)
|
208
|
+
@data[sub_index]
|
217
209
|
else
|
218
|
-
|
210
|
+
elements = sub_index.map do |tuple|
|
211
|
+
@data[@index[tuple]]
|
212
|
+
end
|
219
213
|
|
220
|
-
if
|
221
|
-
|
222
|
-
else
|
223
|
-
elements = sub_index.map do |tuple|
|
224
|
-
@data[@index[(indexes + tuple)]]
|
225
|
-
end
|
226
|
-
Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
|
227
|
-
name: @name, dtype: @dtype)
|
214
|
+
if !indexes[0].is_a?(Range) and indexes.size < @index.width
|
215
|
+
sub_index = sub_index.drop_left_level indexes.size
|
228
216
|
end
|
217
|
+
Daru::Vector.new(
|
218
|
+
elements, index: sub_index, name: @name, dtype: @dtype)
|
229
219
|
end
|
230
220
|
|
231
221
|
return result
|
@@ -233,23 +223,24 @@ module Daru
|
|
233
223
|
unless indexes[1]
|
234
224
|
case location
|
235
225
|
when Range
|
236
|
-
|
237
|
-
|
238
|
-
|
226
|
+
first = location.first
|
227
|
+
last = location.last
|
228
|
+
indexes = @index.slice first, last
|
229
|
+
else
|
230
|
+
pos = @index[location]
|
231
|
+
if pos.is_a?(Numeric)
|
232
|
+
return @data[pos]
|
239
233
|
else
|
240
|
-
|
241
|
-
last = location.last
|
242
|
-
|
243
|
-
(first..last)
|
234
|
+
indexes = pos
|
244
235
|
end
|
245
|
-
indexes = @index[range]
|
246
|
-
else
|
247
|
-
return element_from_numeric_index(location)
|
248
236
|
end
|
237
|
+
else
|
238
|
+
indexes = indexes.map { |e| named_index_for(e) }
|
249
239
|
end
|
250
240
|
|
251
|
-
Daru::Vector.new
|
252
|
-
|
241
|
+
Daru::Vector.new(
|
242
|
+
indexes.map { |loc| @data[@index[loc]] },
|
243
|
+
name: @name, index: indexes, dtype: @dtype)
|
253
244
|
end
|
254
245
|
end
|
255
246
|
|
@@ -274,19 +265,13 @@ module Daru
|
|
274
265
|
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
|
275
266
|
!value.nil?)
|
276
267
|
|
277
|
-
|
278
|
-
|
279
|
-
index_for location
|
280
|
-
else
|
281
|
-
index_for location[0]
|
282
|
-
end
|
268
|
+
location = location[0] unless @index.is_a?(MultiIndex)
|
269
|
+
pos = @index[location]
|
283
270
|
|
284
|
-
if pos.is_a?(
|
285
|
-
pos.each do |sub_tuple|
|
286
|
-
self[*(location + sub_tuple)] = value
|
287
|
-
end
|
288
|
-
else
|
271
|
+
if pos.is_a?(Numeric)
|
289
272
|
@data[pos] = value
|
273
|
+
else
|
274
|
+
pos.each { |tuple| self[tuple] = value }
|
290
275
|
end
|
291
276
|
|
292
277
|
set_size
|
@@ -334,16 +319,132 @@ module Daru
|
|
334
319
|
self[index] == other[index]
|
335
320
|
end
|
336
321
|
else
|
337
|
-
|
322
|
+
super
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# !@method eq
|
327
|
+
# Uses `==` and returns `true` for each **equal** entry
|
328
|
+
# @param [#==, Daru::Vector] If scalar object, compares it with each
|
329
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
330
|
+
# @example (see #where)
|
331
|
+
# !@method not_eq
|
332
|
+
# Uses `!=` and returns `true` for each **unequal** entry
|
333
|
+
# @param [#!=, Daru::Vector] If scalar object, compares it with each
|
334
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
335
|
+
# @example (see #where)
|
336
|
+
# !@method lt
|
337
|
+
# Uses `<` and returns `true` for each entry **less than** the supplied object
|
338
|
+
# @param [#<, Daru::Vector] If scalar object, compares it with each
|
339
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
340
|
+
# @example (see #where)
|
341
|
+
# !@method lteq
|
342
|
+
# Uses `<=` and returns `true` for each entry **less than or equal to** the supplied object
|
343
|
+
# @param [#<=, Daru::Vector] If scalar object, compares it with each
|
344
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
345
|
+
# @example (see #where)
|
346
|
+
# !@method mt
|
347
|
+
# Uses `>` and returns `true` for each entry **more than** the supplied object
|
348
|
+
# @param [#>, Daru::Vector] If scalar object, compares it with each
|
349
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
350
|
+
# @example (see #where)
|
351
|
+
# !@method mteq
|
352
|
+
# Uses `>=` and returns `true` for each entry **more than or equal to** the supplied object
|
353
|
+
# @param [#>=, Daru::Vector] If scalar object, compares it with each
|
354
|
+
# element in self. If Daru::Vector, compares elements with same indexes.
|
355
|
+
# @example (see #where)
|
356
|
+
|
357
|
+
# Define the comparator methods with metaprogramming. See documentation
|
358
|
+
# written above for functionality of each method. Use these methods with the
|
359
|
+
# `where` method to obtain the corresponding Vector/DataFrame.
|
360
|
+
{
|
361
|
+
:eq => :==,
|
362
|
+
:not_eq => :!=,
|
363
|
+
:lt => :<,
|
364
|
+
:lteq => :<=,
|
365
|
+
:mt => :>,
|
366
|
+
:mteq => :>=,
|
367
|
+
}.each do |method, operator|
|
368
|
+
define_method(method) do |other|
|
369
|
+
mod = Daru::Core::Query
|
370
|
+
if other.is_a?(Daru::Vector)
|
371
|
+
mod.apply_vector_operator operator, self, other
|
372
|
+
else
|
373
|
+
mod.apply_scalar_operator operator, @data,other
|
374
|
+
end
|
338
375
|
end
|
339
376
|
end
|
377
|
+
alias :gt :mt
|
378
|
+
alias :gteq :mteq
|
379
|
+
|
380
|
+
# Comparator for checking if any of the elements in *other* exist in self.
|
381
|
+
#
|
382
|
+
# @param [Array, Daru::Vector] other A collection which has elements that
|
383
|
+
# need to be checked for in self.
|
384
|
+
# @example Usage of `in`.
|
385
|
+
# vector = Daru::Vector.new([1,2,3,4,5])
|
386
|
+
# vector.where(vector.in([3,5]))
|
387
|
+
# #=>
|
388
|
+
# ##<Daru::Vector:82215960 @name = nil @size = 2 >
|
389
|
+
# # nil
|
390
|
+
# # 2 3
|
391
|
+
# # 4 5
|
392
|
+
def in other
|
393
|
+
other = Hash[other.zip(Array.new(other.size, 0))]
|
394
|
+
Daru::Core::Query::BoolArray.new(
|
395
|
+
@data.inject([]) do |memo, d|
|
396
|
+
memo << (other.has_key?(d) ? true : false)
|
397
|
+
memo
|
398
|
+
end
|
399
|
+
)
|
400
|
+
end
|
401
|
+
|
402
|
+
# Return a new vector based on the contents of a boolean array. Use with the
|
403
|
+
# comparator methods to obtain meaningful results. See this notebook for
|
404
|
+
# a good overview of using #where.
|
405
|
+
#
|
406
|
+
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
|
407
|
+
# collection containing the true of false values. Each element in the Vector
|
408
|
+
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
409
|
+
# index.
|
410
|
+
# @exmaple Usage of #where.
|
411
|
+
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
412
|
+
#
|
413
|
+
# # Simple logic statement passed to #where.
|
414
|
+
# vector.where(vector.eq(5).or(vector.eq(1)))
|
415
|
+
# # =>
|
416
|
+
# ##<Daru::Vector:77626210 @name = nil @size = 7 >
|
417
|
+
# # nil
|
418
|
+
# # 2 5
|
419
|
+
# # 4 5
|
420
|
+
# # 7 5
|
421
|
+
# # 10 1
|
422
|
+
# # 11 5
|
423
|
+
# # 13 5
|
424
|
+
# # 15 1
|
425
|
+
#
|
426
|
+
# # A somewhat more complex logic statement
|
427
|
+
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
428
|
+
# #=>
|
429
|
+
# ##<Daru::Vector:81072310 @name = nil @size = 7 >
|
430
|
+
# # nil
|
431
|
+
# # 2 5
|
432
|
+
# # 4 5
|
433
|
+
# # 7 5
|
434
|
+
# # 10 1
|
435
|
+
# # 11 5
|
436
|
+
# # 13 5
|
437
|
+
# # 15 1
|
438
|
+
def where bool_arry
|
439
|
+
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, self.dtype
|
440
|
+
end
|
340
441
|
|
341
442
|
def head q=10
|
342
443
|
self[0..(q-1)]
|
343
444
|
end
|
344
445
|
|
345
446
|
def tail q=10
|
346
|
-
self[(@size - q
|
447
|
+
self[(@size - q)..(@size-1)]
|
347
448
|
end
|
348
449
|
|
349
450
|
# Reports whether missing data is present in the Vector.
|
@@ -354,20 +455,12 @@ module Daru
|
|
354
455
|
|
355
456
|
|
356
457
|
# Append an element to the vector by specifying the element and index
|
357
|
-
def concat element, index
|
458
|
+
def concat element, index
|
358
459
|
raise IndexError, "Expected new unique index" if @index.include? index
|
359
460
|
|
360
|
-
|
361
|
-
@index = create_index(@size + 1)
|
362
|
-
index = @size
|
363
|
-
else
|
364
|
-
begin
|
365
|
-
@index = create_index(@index + index)
|
366
|
-
rescue StandardError => e
|
367
|
-
raise e, "Expected valid index."
|
368
|
-
end
|
369
|
-
end
|
461
|
+
@index = @index | [index]
|
370
462
|
@data[@index[index]] = element
|
463
|
+
|
371
464
|
set_size
|
372
465
|
set_missing_positions unless Daru.lazy_update
|
373
466
|
end
|
@@ -394,14 +487,8 @@ module Daru
|
|
394
487
|
|
395
488
|
# Delete element by index
|
396
489
|
def delete_at index
|
397
|
-
|
398
|
-
@
|
399
|
-
|
400
|
-
if @index.index_class == Integer
|
401
|
-
@index = Daru::Index.new @size-1
|
402
|
-
else
|
403
|
-
@index = Daru::Index.new (@index.to_a - [idx])
|
404
|
-
end
|
490
|
+
@data.delete_at @index[index]
|
491
|
+
@index = Daru::Index.new(@index.to_a - [index])
|
405
492
|
|
406
493
|
set_size
|
407
494
|
set_missing_positions unless Daru.lazy_update
|
@@ -482,9 +569,9 @@ module Daru
|
|
482
569
|
|
483
570
|
order = opts[:ascending] ? :ascending : :descending
|
484
571
|
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
|
485
|
-
index =
|
572
|
+
index = Daru::Index.new index
|
486
573
|
|
487
|
-
Daru::Vector.new(vector, index:
|
574
|
+
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
488
575
|
end
|
489
576
|
|
490
577
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
@@ -515,9 +602,31 @@ module Daru
|
|
515
602
|
self
|
516
603
|
end
|
517
604
|
|
605
|
+
# Delete an element if block returns true. Destructive.
|
518
606
|
def delete_if &block
|
519
607
|
return to_enum(:delete_if) unless block_given?
|
520
608
|
|
609
|
+
keep_e = []
|
610
|
+
keep_i = []
|
611
|
+
each_with_index do |n, i|
|
612
|
+
unless yield(n)
|
613
|
+
keep_e << n
|
614
|
+
keep_i << i
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
618
|
+
@data = cast_vector_to @dtype, keep_e
|
619
|
+
@index = Daru::Index.new(keep_i)
|
620
|
+
set_missing_positions unless Daru.lazy_update
|
621
|
+
set_size
|
622
|
+
|
623
|
+
self
|
624
|
+
end
|
625
|
+
|
626
|
+
# Keep an element if block returns true. Destructive.
|
627
|
+
def keep_if &block
|
628
|
+
return to_enum(:keep_if) unless block_given?
|
629
|
+
|
521
630
|
keep_e = []
|
522
631
|
keep_i = []
|
523
632
|
each_with_index do |n, i|
|
@@ -528,7 +637,7 @@ module Daru
|
|
528
637
|
end
|
529
638
|
|
530
639
|
@data = cast_vector_to @dtype, keep_e
|
531
|
-
@index =
|
640
|
+
@index = Daru::Index.new(keep_i)
|
532
641
|
set_missing_positions unless Daru.lazy_update
|
533
642
|
set_size
|
534
643
|
|
@@ -664,10 +773,33 @@ module Daru
|
|
664
773
|
self
|
665
774
|
end
|
666
775
|
|
776
|
+
# Lags the series by k periods.
|
777
|
+
#
|
778
|
+
# The convention is to set the oldest observations (the first ones
|
779
|
+
# in the series) to nil so that the size of the lagged series is the
|
780
|
+
# same as the original.
|
781
|
+
#
|
782
|
+
# Usage:
|
783
|
+
#
|
784
|
+
# ts = Daru::Vector.new((1..10).map { rand })
|
785
|
+
# # => [0.69, 0.23, 0.44, 0.71, ...]
|
786
|
+
#
|
787
|
+
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
788
|
+
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
789
|
+
def lag k=1
|
790
|
+
return self.dup if k == 0
|
791
|
+
|
792
|
+
dat = @data.to_a.dup
|
793
|
+
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
794
|
+
(0...k).each { |i| dat[i] = nil }
|
795
|
+
|
796
|
+
Daru::Vector.new(dat, index: @index, name: @name)
|
797
|
+
end
|
798
|
+
|
667
799
|
def detach_index
|
668
800
|
Daru::DataFrame.new({
|
669
|
-
index: @index.to_a
|
670
|
-
|
801
|
+
index: @index.to_a,
|
802
|
+
values: @data.to_a
|
671
803
|
})
|
672
804
|
end
|
673
805
|
|
@@ -826,13 +958,31 @@ module Daru
|
|
826
958
|
content
|
827
959
|
end
|
828
960
|
|
829
|
-
# Create a new vector with a different index
|
830
|
-
#
|
831
|
-
# @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
|
832
|
-
# will reindex with sequential numbers from 0 to (n-1).
|
961
|
+
# Create a new vector with a different index, and preserve the indexing of
|
962
|
+
# current elements.
|
833
963
|
def reindex new_index
|
834
|
-
|
835
|
-
|
964
|
+
vector = Daru::Vector.new([], index: new_index, name: @name)
|
965
|
+
|
966
|
+
new_index.each do |idx|
|
967
|
+
if @index.include?(idx)
|
968
|
+
vector[idx] = self[idx]
|
969
|
+
else
|
970
|
+
vector[idx] = nil
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
vector
|
975
|
+
end
|
976
|
+
|
977
|
+
def index= idx
|
978
|
+
raise ArgumentError,
|
979
|
+
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
980
|
+
idx.size != self.size
|
981
|
+
raise ArgumentError, "Can only assign type Index and its subclasses." unless
|
982
|
+
idx.kind_of?(Daru::Index)
|
983
|
+
|
984
|
+
@index = idx
|
985
|
+
self
|
836
986
|
end
|
837
987
|
|
838
988
|
# Give the vector a new name
|
@@ -844,7 +994,7 @@ module Daru
|
|
844
994
|
return
|
845
995
|
end
|
846
996
|
|
847
|
-
@name = new_name
|
997
|
+
@name = new_name
|
848
998
|
end
|
849
999
|
|
850
1000
|
# Duplicate elements and indexes
|
@@ -1166,8 +1316,8 @@ module Daru
|
|
1166
1316
|
def set_name name
|
1167
1317
|
@name =
|
1168
1318
|
if name.is_a?(Numeric) then name
|
1169
|
-
elsif name.is_a?(Array) then name.join
|
1170
|
-
elsif name then name
|
1319
|
+
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
|
1320
|
+
elsif name then name # anything but Numeric or nil
|
1171
1321
|
else
|
1172
1322
|
nil
|
1173
1323
|
end
|
@@ -1180,7 +1330,7 @@ module Daru
|
|
1180
1330
|
end
|
1181
1331
|
end
|
1182
1332
|
|
1183
|
-
def
|
1333
|
+
def try_create_index potential_index
|
1184
1334
|
if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
|
1185
1335
|
potential_index
|
1186
1336
|
else
|