daru 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -58,7 +58,6 @@ module Daru
58
58
  end
59
59
 
60
60
  # Functions for loading/writing CSV files
61
-
62
61
  def from_csv path, opts={}
63
62
  opts[:col_sep] ||= ','
64
63
  opts[:converters] ||= :numeric
@@ -79,7 +78,7 @@ module Daru
79
78
  yield csv if block_given?
80
79
 
81
80
  csv_as_arrays = csv.to_a
82
- headers = csv_as_arrays[0].recode_repeated.map(&:to_sym)
81
+ headers = csv_as_arrays[0].recode_repeated.map
83
82
  csv_as_arrays.delete_at 0
84
83
  csv_as_arrays = csv_as_arrays.transpose
85
84
 
@@ -64,8 +64,8 @@ module Daru
64
64
 
65
65
  hsh = {}
66
66
  all_vectors.each do |vector_name|
67
- this = self .has_vector?(vector_name) ? self .vector[vector_name] : nil
68
- that = other.has_vector?(vector_name) ? other.vector[vector_name] : nil
67
+ this = self .has_vector?(vector_name) ? self[vector_name] : nil
68
+ that = other.has_vector?(vector_name) ? other[vector_name] : nil
69
69
 
70
70
  if this and that
71
71
  hsh[vector_name] = this.send(operation, that)
@@ -68,8 +68,8 @@ module Daru
68
68
  index = (@index.to_a | other.index.to_a).sort
69
69
 
70
70
  index.each do |idx|
71
- this = self[idx]
72
- that = other[idx]
71
+ this = self.index.include?(idx) ? self[idx] : nil
72
+ that = other.index.include?(idx) ? other[idx] : nil
73
73
 
74
74
  if this and that
75
75
  elements << this.send(operation ,that)
@@ -23,8 +23,12 @@ module Daru
23
23
  end
24
24
 
25
25
  # Calculate the maximum value of each numeric vector.
26
- def max
27
- compute_stats :max
26
+ def max opts={}
27
+ if opts[:vector]
28
+ self.row[*self[opts[:vector]].max_index.index.to_a]
29
+ else
30
+ compute_stats :max
31
+ end
28
32
  end
29
33
 
30
34
  # Calculate the minimmum value of each numeric vector.
@@ -37,13 +41,45 @@ module Daru
37
41
  compute_stats :product
38
42
  end
39
43
 
40
- def standardize
41
- df = self.only_numerics clone: true
42
- df.map! do |v|
43
- v.standardize
44
+ # @!method cumsum
45
+ # Calculate cumulative sum of each numeric Vector
46
+ # @!method standardize
47
+ # Standardize each Vector
48
+ # @!method acf
49
+ # Calculate Autocorrelation coefficient
50
+ # @param [Integer] max_lags (nil) Number of initial lags
51
+ # @!method ema
52
+ # Calculate exponential moving average.
53
+ # @param [Integer] n (10) Loopback length.
54
+ # @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
55
+ # 1/n value is used for smoothing; if false, uses 2/(n+1) value.
56
+ # @!method rolling_mean
57
+ # Calculate moving averages
58
+ # @param [Integer] n (10) Loopback length. Default to 10.
59
+ # @!method rolling_median
60
+ # Calculate moving median
61
+ # @param [Integer] n (10) Loopback length. Default to 10.
62
+ # @!method rolling_max
63
+ # Calculate moving max
64
+ # @param [Integer] n (10) Loopback length. Default to 10.
65
+ # @!method rolling_min
66
+ # Calculate moving min
67
+ # @param [Integer] n (10) Loopback length. Default to 10.
68
+ # @!method rolling_count
69
+ # Calculate moving non-missing count
70
+ # @param [Integer] n (10) Loopback length. Default to 10.
71
+ # @!method rolling_std
72
+ # Calculate moving standard deviation
73
+ # @param [Integer] n (10) Loopback length. Default to 10.
74
+ # @!method rolling_variance
75
+ # Calculate moving variance
76
+ # @param [Integer] n (10) Loopback length. Default to 10.
77
+ [:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
78
+ :rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
79
+ ].each do |meth|
80
+ define_method(meth) do |*args|
81
+ apply_method_to_numerics meth, *args
44
82
  end
45
-
46
- df
47
83
  end
48
84
 
49
85
  # Create a summary of mean, standard deviation, count, max and min of
@@ -105,6 +141,20 @@ module Daru
105
141
 
106
142
  private
107
143
 
144
+ def apply_method_to_numerics method, *args
145
+ order = []
146
+ computed = @vectors.to_a.inject([]) do |memo, n|
147
+ v = @data[@vectors[n]]
148
+ if v.type == :numeric
149
+ memo << v.send(method, *args)
150
+ order << n
151
+ end
152
+ memo
153
+ end
154
+
155
+ Daru::DataFrame.new(computed, index: @index, order: order ,clone: false)
156
+ end
157
+
108
158
  def vector_cov v1a, v2a
109
159
  sum_of_squares(v1a,v2a) / (v1a.size - 1)
110
160
  end
@@ -122,6 +122,16 @@ module Daru
122
122
  end
123
123
  end
124
124
 
125
+ # Count number of occurences of each value in the Vector
126
+ def value_counts
127
+ values = {}
128
+ @data.each do |d|
129
+ values[d] ? values[d] += 1 : values[d] = 1
130
+ end
131
+
132
+ Daru::Vector.new(values)
133
+ end
134
+
125
135
  def proportion value=1
126
136
  frequencies[value].quo(n_valid).to_f
127
137
  end
@@ -354,6 +364,225 @@ module Daru
354
364
  end
355
365
  end
356
366
 
367
+ # Performs the difference of the series.
368
+ # Note: The first difference of series is X(t) - X(t-1)
369
+ # But, second difference of series is NOT X(t) - X(t-2)
370
+ # It is the first difference of the first difference
371
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
372
+ #
373
+ # == Arguments
374
+ #
375
+ #* *max_lags*: integer, (default: 1), number of differences reqd.
376
+ #
377
+ # @example Using #diff
378
+ #
379
+ # ts = Daru::Vector.new((1..10).map { rand })
380
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
381
+ #
382
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
383
+ #
384
+ # @return [Daru::Vector]
385
+ def diff(max_lags = 1)
386
+ ts = self
387
+ difference = []
388
+ max_lags.times do
389
+ difference = ts - ts.lag
390
+ ts = difference
391
+ end
392
+ difference
393
+ end
394
+
395
+ # Calculate the rolling function for a loopback value.
396
+ #
397
+ # @param [Symbol] function The rolling function to be applied. Can be
398
+ # any function applicatble to Daru::Vector (:mean, :median, :count,
399
+ # :min, :max, etc.)
400
+ # @param [Integer] n (10) A non-negative value which serves as the loopback length.
401
+ # @return [Daru::Vector] Vector containin rolling calculations.
402
+ # @example Using #rolling
403
+ # ts = Daru::Vector.new((1..100).map { rand })
404
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
405
+ # # first 9 observations are nil
406
+ # ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
407
+ def rolling function, n=10
408
+ Daru::Vector.new(
409
+ [nil] * (n - 1) +
410
+ (0..(size - n)).map do |i|
411
+ Daru::Vector.new(@data[i...(i + n)]).send(function)
412
+ end, index: @index
413
+ )
414
+ end
415
+
416
+ # @!method rolling_mean
417
+ # Calculate rolling average
418
+ # @param [Integer] n (10) Loopback length
419
+ # @!method rolling_median
420
+ # Calculate rolling median
421
+ # @param [Integer] n (10) Loopback length
422
+ # @!method rolling_count
423
+ # Calculate rolling non-missing count
424
+ # @param [Integer] n (10) Loopback length
425
+ # @!method rolling_max
426
+ # Calculate rolling max value
427
+ # @param [Integer] n (10) Loopback length
428
+ # @!method rolling_min
429
+ # Calculate rolling min value
430
+ # @param [Integer] n (10) Loopback length
431
+ # @!method rolling_sum
432
+ # Calculate rolling sum
433
+ # @param [Integer] n (10) Loopback length
434
+ # @!method rolling_std
435
+ # Calculate rolling standard deviation
436
+ # @param [Integer] n (10) Loopback length
437
+ # @!method rolling_variance
438
+ # Calculate rolling variance
439
+ # @param [Integer] n (10) Loopback length
440
+ [:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
441
+ define_method("rolling_#{meth}".to_sym) do |n=10|
442
+ rolling(meth, n)
443
+ end
444
+ end
445
+
446
+ # Exponential Moving Average.
447
+ # Calculates an exponential moving average of the series using a
448
+ # specified parameter. If wilder is false (the default) then the EMA
449
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
450
+ # Welles Wilder smoother of 1 / n.
451
+ #
452
+ # Warning for EMA usage: EMAs are unstable for small series, as they
453
+ # use a lot more than n observations to calculate. The series is stable
454
+ # if the size of the series is >= 3.45 * (n + 1)
455
+ #
456
+ # @param [Integer] n (10) Loopback length.
457
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
458
+ # used for smoothing; if false, uses 2/(n+1) value
459
+ #
460
+ # @example Using ema
461
+ #
462
+ # ts = (1..100).map { rand }.to_ts
463
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
464
+ #
465
+ # # first 9 observations are nil
466
+ # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
467
+ #
468
+ # @return [Daru::Vector] Contains EMA
469
+ def ema(n = 10, wilder = false)
470
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
471
+ # need to start everything from the first non-nil observation
472
+ start = @data.index { |i| i != nil }
473
+ # first n - 1 observations are nil
474
+ base = [nil] * (start + n - 1)
475
+ # nth observation is just a moving average
476
+ base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
477
+ (start + n).upto size - 1 do |i|
478
+ base << self[i] * smoother + (1 - smoother) * base.last
479
+ end
480
+
481
+ Daru::Vector.new(base, index: @index)
482
+ end
483
+
484
+ # Moving Average Convergence-Divergence.
485
+ # Calculates the MACD (moving average convergence-divergence) of the time
486
+ # series - this is a comparison of a fast EMA with a slow EMA.
487
+ #
488
+ # == Arguments
489
+ #* *fast*: integer, (default = 12) - fast component of MACD
490
+ #* *slow*: integer, (default = 26) - slow component of MACD
491
+ #* *signal*: integer, (default = 9) - signal component of MACD
492
+ #
493
+ # == Usage
494
+ #
495
+ # ts = Daru::Vector.new((1..100).map { rand })
496
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
497
+ # ts.macd(13)
498
+ #
499
+ # == Returns
500
+ #
501
+ # Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
502
+ # signal value
503
+ def macd(fast = 12, slow = 26, signal = 9)
504
+ series = ema(fast) - ema(slow)
505
+ [series, series.ema(signal)]
506
+ end
507
+
508
+ # Calculates the autocorrelation coefficients of the series.
509
+ #
510
+ # The first element is always 1, since that is the correlation
511
+ # of the series with itself.
512
+ #
513
+ # @example
514
+ # ts = Daru::Vector.new((1..100).map { rand })
515
+ #
516
+ # ts.acf # => array with first 21 autocorrelations
517
+ # ts.acf 3 # => array with first 3 autocorrelations
518
+ def acf(max_lags = nil)
519
+ max_lags ||= (10 * Math.log10(size)).to_i
520
+
521
+ (0..max_lags).map do |i|
522
+ if i == 0
523
+ 1.0
524
+ else
525
+ m = self.mean
526
+ # can't use Pearson coefficient since the mean for the lagged series should
527
+ # be the same as the regular series
528
+ ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
529
+ end
530
+ end
531
+ end
532
+
533
+ # Provides autocovariance.
534
+ #
535
+ # == Options
536
+ #
537
+ #* *:demean* = true; optional. Supply false if series is not to be demeaned
538
+ #* *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
539
+ #
540
+ # == Returns
541
+ #
542
+ # Autocovariance value
543
+ def acvf(demean = true, unbiased = true)
544
+ opts = {
545
+ demean: true,
546
+ unbaised: true
547
+ }.merge(opts)
548
+
549
+ demean = opts[:demean]
550
+ unbiased = opts[:unbiased]
551
+ if demean
552
+ demeaned_series = self - self.mean
553
+ else
554
+ demeaned_series = self
555
+ end
556
+
557
+ n = (10 * Math.log10(size)).to_i + 1
558
+ m = self.mean
559
+ if unbiased
560
+ d = Array.new(self.size, self.size)
561
+ else
562
+ d = ((1..self.size).to_a.reverse)[0..n]
563
+ end
564
+
565
+ 0.upto(n - 1).map do |i|
566
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
567
+ end
568
+ end
569
+
570
+ # Calculate cumulative sum of Vector
571
+ def cumsum
572
+ result = []
573
+ acc = 0
574
+ @data.each do |d|
575
+ if @missing_values.has_key?(d)
576
+ result << nil
577
+ else
578
+ acc += d
579
+ result << acc
580
+ end
581
+ end
582
+
583
+ Daru::Vector.new(result, index: @index)
584
+ end
585
+
357
586
  alias :sdp :standard_deviation_population
358
587
  alias :sds :standard_deviation_sample
359
588
  alias :std :sds
@@ -103,7 +103,7 @@ module Daru
103
103
  set_name name
104
104
 
105
105
  @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
106
- @index = create_index(index || @data.size)
106
+ @index = try_create_index(index || @data.size)
107
107
 
108
108
  if @index.size > @data.size
109
109
  cast(dtype: :array) # NM with nils seg faults
@@ -200,32 +200,22 @@ module Daru
200
200
  # # For vectors employing hierarchial multi index
201
201
  #
202
202
  def [](*indexes)
203
- indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
204
203
  location = indexes[0]
205
204
  if @index.is_a?(MultiIndex)
205
+ sub_index = @index[indexes]
206
206
  result =
207
- if location.is_a?(Integer)
208
- element_from_numeric_index(location)
209
- elsif location.is_a?(Range)
210
- arry = location.inject([]) do |memo, num|
211
- memo << element_from_numeric_index(num)
212
- memo
213
- end
214
-
215
- new_index = Daru::MultiIndex.new(@index.to_a[location])
216
- Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
207
+ if sub_index.is_a?(Integer)
208
+ @data[sub_index]
217
209
  else
218
- sub_index = @index[indexes]
210
+ elements = sub_index.map do |tuple|
211
+ @data[@index[tuple]]
212
+ end
219
213
 
220
- if sub_index.is_a?(Integer)
221
- element_from_numeric_index(sub_index)
222
- else
223
- elements = sub_index.map do |tuple|
224
- @data[@index[(indexes + tuple)]]
225
- end
226
- Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
227
- name: @name, dtype: @dtype)
214
+ if !indexes[0].is_a?(Range) and indexes.size < @index.width
215
+ sub_index = sub_index.drop_left_level indexes.size
228
216
  end
217
+ Daru::Vector.new(
218
+ elements, index: sub_index, name: @name, dtype: @dtype)
229
219
  end
230
220
 
231
221
  return result
@@ -233,23 +223,24 @@ module Daru
233
223
  unless indexes[1]
234
224
  case location
235
225
  when Range
236
- range =
237
- if location.first.is_a?(Numeric)
238
- location
226
+ first = location.first
227
+ last = location.last
228
+ indexes = @index.slice first, last
229
+ else
230
+ pos = @index[location]
231
+ if pos.is_a?(Numeric)
232
+ return @data[pos]
239
233
  else
240
- first = location.first
241
- last = location.last
242
-
243
- (first..last)
234
+ indexes = pos
244
235
  end
245
- indexes = @index[range]
246
- else
247
- return element_from_numeric_index(location)
248
236
  end
237
+ else
238
+ indexes = indexes.map { |e| named_index_for(e) }
249
239
  end
250
240
 
251
- Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
252
- index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
241
+ Daru::Vector.new(
242
+ indexes.map { |loc| @data[@index[loc]] },
243
+ name: @name, index: indexes, dtype: @dtype)
253
244
  end
254
245
  end
255
246
 
@@ -274,19 +265,13 @@ module Daru
274
265
  @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
275
266
  !value.nil?)
276
267
 
277
- pos =
278
- if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
279
- index_for location
280
- else
281
- index_for location[0]
282
- end
268
+ location = location[0] unless @index.is_a?(MultiIndex)
269
+ pos = @index[location]
283
270
 
284
- if pos.is_a?(MultiIndex)
285
- pos.each do |sub_tuple|
286
- self[*(location + sub_tuple)] = value
287
- end
288
- else
271
+ if pos.is_a?(Numeric)
289
272
  @data[pos] = value
273
+ else
274
+ pos.each { |tuple| self[tuple] = value }
290
275
  end
291
276
 
292
277
  set_size
@@ -334,16 +319,132 @@ module Daru
334
319
  self[index] == other[index]
335
320
  end
336
321
  else
337
- # TODO: Compare against some other obj (string, number, etc.)
322
+ super
323
+ end
324
+ end
325
+
326
+ # !@method eq
327
+ # Uses `==` and returns `true` for each **equal** entry
328
+ # @param [#==, Daru::Vector] If scalar object, compares it with each
329
+ # element in self. If Daru::Vector, compares elements with same indexes.
330
+ # @example (see #where)
331
+ # !@method not_eq
332
+ # Uses `!=` and returns `true` for each **unequal** entry
333
+ # @param [#!=, Daru::Vector] If scalar object, compares it with each
334
+ # element in self. If Daru::Vector, compares elements with same indexes.
335
+ # @example (see #where)
336
+ # !@method lt
337
+ # Uses `<` and returns `true` for each entry **less than** the supplied object
338
+ # @param [#<, Daru::Vector] If scalar object, compares it with each
339
+ # element in self. If Daru::Vector, compares elements with same indexes.
340
+ # @example (see #where)
341
+ # !@method lteq
342
+ # Uses `<=` and returns `true` for each entry **less than or equal to** the supplied object
343
+ # @param [#<=, Daru::Vector] If scalar object, compares it with each
344
+ # element in self. If Daru::Vector, compares elements with same indexes.
345
+ # @example (see #where)
346
+ # !@method mt
347
+ # Uses `>` and returns `true` for each entry **more than** the supplied object
348
+ # @param [#>, Daru::Vector] If scalar object, compares it with each
349
+ # element in self. If Daru::Vector, compares elements with same indexes.
350
+ # @example (see #where)
351
+ # !@method mteq
352
+ # Uses `>=` and returns `true` for each entry **more than or equal to** the supplied object
353
+ # @param [#>=, Daru::Vector] If scalar object, compares it with each
354
+ # element in self. If Daru::Vector, compares elements with same indexes.
355
+ # @example (see #where)
356
+
357
+ # Define the comparator methods with metaprogramming. See documentation
358
+ # written above for functionality of each method. Use these methods with the
359
+ # `where` method to obtain the corresponding Vector/DataFrame.
360
+ {
361
+ :eq => :==,
362
+ :not_eq => :!=,
363
+ :lt => :<,
364
+ :lteq => :<=,
365
+ :mt => :>,
366
+ :mteq => :>=,
367
+ }.each do |method, operator|
368
+ define_method(method) do |other|
369
+ mod = Daru::Core::Query
370
+ if other.is_a?(Daru::Vector)
371
+ mod.apply_vector_operator operator, self, other
372
+ else
373
+ mod.apply_scalar_operator operator, @data,other
374
+ end
338
375
  end
339
376
  end
377
+ alias :gt :mt
378
+ alias :gteq :mteq
379
+
380
+ # Comparator for checking if any of the elements in *other* exist in self.
381
+ #
382
+ # @param [Array, Daru::Vector] other A collection which has elements that
383
+ # need to be checked for in self.
384
+ # @example Usage of `in`.
385
+ # vector = Daru::Vector.new([1,2,3,4,5])
386
+ # vector.where(vector.in([3,5]))
387
+ # #=>
388
+ # ##<Daru::Vector:82215960 @name = nil @size = 2 >
389
+ # # nil
390
+ # # 2 3
391
+ # # 4 5
392
+ def in other
393
+ other = Hash[other.zip(Array.new(other.size, 0))]
394
+ Daru::Core::Query::BoolArray.new(
395
+ @data.inject([]) do |memo, d|
396
+ memo << (other.has_key?(d) ? true : false)
397
+ memo
398
+ end
399
+ )
400
+ end
401
+
402
+ # Return a new vector based on the contents of a boolean array. Use with the
403
+ # comparator methods to obtain meaningful results. See this notebook for
404
+ # a good overview of using #where.
405
+ #
406
+ # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
407
+ # collection containing the true of false values. Each element in the Vector
408
+ # corresponding to a `true` in the bool_arry will be returned alongwith it's
409
+ # index.
410
+ # @exmaple Usage of #where.
411
+ # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
412
+ #
413
+ # # Simple logic statement passed to #where.
414
+ # vector.where(vector.eq(5).or(vector.eq(1)))
415
+ # # =>
416
+ # ##<Daru::Vector:77626210 @name = nil @size = 7 >
417
+ # # nil
418
+ # # 2 5
419
+ # # 4 5
420
+ # # 7 5
421
+ # # 10 1
422
+ # # 11 5
423
+ # # 13 5
424
+ # # 15 1
425
+ #
426
+ # # A somewhat more complex logic statement
427
+ # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
428
+ # #=>
429
+ # ##<Daru::Vector:81072310 @name = nil @size = 7 >
430
+ # # nil
431
+ # # 2 5
432
+ # # 4 5
433
+ # # 7 5
434
+ # # 10 1
435
+ # # 11 5
436
+ # # 13 5
437
+ # # 15 1
438
+ def where bool_arry
439
+ Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, self.dtype
440
+ end
340
441
 
341
442
  def head q=10
342
443
  self[0..(q-1)]
343
444
  end
344
445
 
345
446
  def tail q=10
346
- self[(@size - q - 1)..(@size-1)]
447
+ self[(@size - q)..(@size-1)]
347
448
  end
348
449
 
349
450
  # Reports whether missing data is present in the Vector.
@@ -354,20 +455,12 @@ module Daru
354
455
 
355
456
 
356
457
  # Append an element to the vector by specifying the element and index
357
- def concat element, index=nil
458
+ def concat element, index
358
459
  raise IndexError, "Expected new unique index" if @index.include? index
359
460
 
360
- if index.nil? and @index.index_class == Integer
361
- @index = create_index(@size + 1)
362
- index = @size
363
- else
364
- begin
365
- @index = create_index(@index + index)
366
- rescue StandardError => e
367
- raise e, "Expected valid index."
368
- end
369
- end
461
+ @index = @index | [index]
370
462
  @data[@index[index]] = element
463
+
371
464
  set_size
372
465
  set_missing_positions unless Daru.lazy_update
373
466
  end
@@ -394,14 +487,8 @@ module Daru
394
487
 
395
488
  # Delete element by index
396
489
  def delete_at index
397
- idx = named_index_for index
398
- @data.delete_at @index[idx]
399
-
400
- if @index.index_class == Integer
401
- @index = Daru::Index.new @size-1
402
- else
403
- @index = Daru::Index.new (@index.to_a - [idx])
404
- end
490
+ @data.delete_at @index[index]
491
+ @index = Daru::Index.new(@index.to_a - [index])
405
492
 
406
493
  set_size
407
494
  set_missing_positions unless Daru.lazy_update
@@ -482,9 +569,9 @@ module Daru
482
569
 
483
570
  order = opts[:ascending] ? :ascending : :descending
484
571
  vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
485
- index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
572
+ index = Daru::Index.new index
486
573
 
487
- Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
574
+ Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
488
575
  end
489
576
 
490
577
  # Just sort the data and get an Array in return using Enumerable#sort.
@@ -515,9 +602,31 @@ module Daru
515
602
  self
516
603
  end
517
604
 
605
+ # Delete an element if block returns true. Destructive.
518
606
  def delete_if &block
519
607
  return to_enum(:delete_if) unless block_given?
520
608
 
609
+ keep_e = []
610
+ keep_i = []
611
+ each_with_index do |n, i|
612
+ unless yield(n)
613
+ keep_e << n
614
+ keep_i << i
615
+ end
616
+ end
617
+
618
+ @data = cast_vector_to @dtype, keep_e
619
+ @index = Daru::Index.new(keep_i)
620
+ set_missing_positions unless Daru.lazy_update
621
+ set_size
622
+
623
+ self
624
+ end
625
+
626
+ # Keep an element if block returns true. Destructive.
627
+ def keep_if &block
628
+ return to_enum(:keep_if) unless block_given?
629
+
521
630
  keep_e = []
522
631
  keep_i = []
523
632
  each_with_index do |n, i|
@@ -528,7 +637,7 @@ module Daru
528
637
  end
529
638
 
530
639
  @data = cast_vector_to @dtype, keep_e
531
- @index = @index.is_a?(MultiIndex) ? MultiIndex.new(keep_i) : Index.new(keep_i)
640
+ @index = Daru::Index.new(keep_i)
532
641
  set_missing_positions unless Daru.lazy_update
533
642
  set_size
534
643
 
@@ -664,10 +773,33 @@ module Daru
664
773
  self
665
774
  end
666
775
 
776
+ # Lags the series by k periods.
777
+ #
778
+ # The convention is to set the oldest observations (the first ones
779
+ # in the series) to nil so that the size of the lagged series is the
780
+ # same as the original.
781
+ #
782
+ # Usage:
783
+ #
784
+ # ts = Daru::Vector.new((1..10).map { rand })
785
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
786
+ #
787
+ # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
788
+ # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
789
+ def lag k=1
790
+ return self.dup if k == 0
791
+
792
+ dat = @data.to_a.dup
793
+ (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
794
+ (0...k).each { |i| dat[i] = nil }
795
+
796
+ Daru::Vector.new(dat, index: @index, name: @name)
797
+ end
798
+
667
799
  def detach_index
668
800
  Daru::DataFrame.new({
669
- index: @index.to_a.map(&:to_s),
670
- vector: @data.to_a
801
+ index: @index.to_a,
802
+ values: @data.to_a
671
803
  })
672
804
  end
673
805
 
@@ -826,13 +958,31 @@ module Daru
826
958
  content
827
959
  end
828
960
 
829
- # Create a new vector with a different index.
830
- #
831
- # @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
832
- # will reindex with sequential numbers from 0 to (n-1).
961
+ # Create a new vector with a different index, and preserve the indexing of
962
+ # current elements.
833
963
  def reindex new_index
834
- index = create_index(new_index == :seq ? @size : new_index)
835
- Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
964
+ vector = Daru::Vector.new([], index: new_index, name: @name)
965
+
966
+ new_index.each do |idx|
967
+ if @index.include?(idx)
968
+ vector[idx] = self[idx]
969
+ else
970
+ vector[idx] = nil
971
+ end
972
+ end
973
+
974
+ vector
975
+ end
976
+
977
+ def index= idx
978
+ raise ArgumentError,
979
+ "Size of supplied index #{index.size} does not match size of DataFrame" if
980
+ idx.size != self.size
981
+ raise ArgumentError, "Can only assign type Index and its subclasses." unless
982
+ idx.kind_of?(Daru::Index)
983
+
984
+ @index = idx
985
+ self
836
986
  end
837
987
 
838
988
  # Give the vector a new name
@@ -844,7 +994,7 @@ module Daru
844
994
  return
845
995
  end
846
996
 
847
- @name = new_name.to_sym
997
+ @name = new_name
848
998
  end
849
999
 
850
1000
  # Duplicate elements and indexes
@@ -1166,8 +1316,8 @@ module Daru
1166
1316
  def set_name name
1167
1317
  @name =
1168
1318
  if name.is_a?(Numeric) then name
1169
- elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
1170
- elsif name then name.to_sym # anything but Numeric or nil
1319
+ elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
1320
+ elsif name then name # anything but Numeric or nil
1171
1321
  else
1172
1322
  nil
1173
1323
  end
@@ -1180,7 +1330,7 @@ module Daru
1180
1330
  end
1181
1331
  end
1182
1332
 
1183
- def create_index potential_index
1333
+ def try_create_index potential_index
1184
1334
  if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
1185
1335
  potential_index
1186
1336
  else