daru 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -58,7 +58,6 @@ module Daru
58
58
  end
59
59
 
60
60
  # Functions for loading/writing CSV files
61
-
62
61
  def from_csv path, opts={}
63
62
  opts[:col_sep] ||= ','
64
63
  opts[:converters] ||= :numeric
@@ -79,7 +78,7 @@ module Daru
79
78
  yield csv if block_given?
80
79
 
81
80
  csv_as_arrays = csv.to_a
82
- headers = csv_as_arrays[0].recode_repeated.map(&:to_sym)
81
+ headers = csv_as_arrays[0].recode_repeated.map
83
82
  csv_as_arrays.delete_at 0
84
83
  csv_as_arrays = csv_as_arrays.transpose
85
84
 
@@ -64,8 +64,8 @@ module Daru
64
64
 
65
65
  hsh = {}
66
66
  all_vectors.each do |vector_name|
67
- this = self .has_vector?(vector_name) ? self .vector[vector_name] : nil
68
- that = other.has_vector?(vector_name) ? other.vector[vector_name] : nil
67
+ this = self .has_vector?(vector_name) ? self[vector_name] : nil
68
+ that = other.has_vector?(vector_name) ? other[vector_name] : nil
69
69
 
70
70
  if this and that
71
71
  hsh[vector_name] = this.send(operation, that)
@@ -68,8 +68,8 @@ module Daru
68
68
  index = (@index.to_a | other.index.to_a).sort
69
69
 
70
70
  index.each do |idx|
71
- this = self[idx]
72
- that = other[idx]
71
+ this = self.index.include?(idx) ? self[idx] : nil
72
+ that = other.index.include?(idx) ? other[idx] : nil
73
73
 
74
74
  if this and that
75
75
  elements << this.send(operation ,that)
@@ -23,8 +23,12 @@ module Daru
23
23
  end
24
24
 
25
25
  # Calculate the maximum value of each numeric vector.
26
- def max
27
- compute_stats :max
26
+ def max opts={}
27
+ if opts[:vector]
28
+ self.row[*self[opts[:vector]].max_index.index.to_a]
29
+ else
30
+ compute_stats :max
31
+ end
28
32
  end
29
33
 
30
34
  # Calculate the minimmum value of each numeric vector.
@@ -37,13 +41,45 @@ module Daru
37
41
  compute_stats :product
38
42
  end
39
43
 
40
- def standardize
41
- df = self.only_numerics clone: true
42
- df.map! do |v|
43
- v.standardize
44
+ # @!method cumsum
45
+ # Calculate cumulative sum of each numeric Vector
46
+ # @!method standardize
47
+ # Standardize each Vector
48
+ # @!method acf
49
+ # Calculate Autocorrelation coefficient
50
+ # @param [Integer] max_lags (nil) Number of initial lags
51
+ # @!method ema
52
+ # Calculate exponential moving average.
53
+ # @param [Integer] n (10) Loopback length.
54
+ # @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
55
+ # 1/n value is used for smoothing; if false, uses 2/(n+1) value.
56
+ # @!method rolling_mean
57
+ # Calculate moving averages
58
+ # @param [Integer] n (10) Loopback length. Default to 10.
59
+ # @!method rolling_median
60
+ # Calculate moving median
61
+ # @param [Integer] n (10) Loopback length. Default to 10.
62
+ # @!method rolling_max
63
+ # Calculate moving max
64
+ # @param [Integer] n (10) Loopback length. Default to 10.
65
+ # @!method rolling_min
66
+ # Calculate moving min
67
+ # @param [Integer] n (10) Loopback length. Default to 10.
68
+ # @!method rolling_count
69
+ # Calculate moving non-missing count
70
+ # @param [Integer] n (10) Loopback length. Default to 10.
71
+ # @!method rolling_std
72
+ # Calculate moving standard deviation
73
+ # @param [Integer] n (10) Loopback length. Default to 10.
74
+ # @!method rolling_variance
75
+ # Calculate moving variance
76
+ # @param [Integer] n (10) Loopback length. Default to 10.
77
+ [:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
78
+ :rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
79
+ ].each do |meth|
80
+ define_method(meth) do |*args|
81
+ apply_method_to_numerics meth, *args
44
82
  end
45
-
46
- df
47
83
  end
48
84
 
49
85
  # Create a summary of mean, standard deviation, count, max and min of
@@ -105,6 +141,20 @@ module Daru
105
141
 
106
142
  private
107
143
 
144
+ def apply_method_to_numerics method, *args
145
+ order = []
146
+ computed = @vectors.to_a.inject([]) do |memo, n|
147
+ v = @data[@vectors[n]]
148
+ if v.type == :numeric
149
+ memo << v.send(method, *args)
150
+ order << n
151
+ end
152
+ memo
153
+ end
154
+
155
+ Daru::DataFrame.new(computed, index: @index, order: order ,clone: false)
156
+ end
157
+
108
158
  def vector_cov v1a, v2a
109
159
  sum_of_squares(v1a,v2a) / (v1a.size - 1)
110
160
  end
@@ -122,6 +122,16 @@ module Daru
122
122
  end
123
123
  end
124
124
 
125
+ # Count number of occurences of each value in the Vector
126
+ def value_counts
127
+ values = {}
128
+ @data.each do |d|
129
+ values[d] ? values[d] += 1 : values[d] = 1
130
+ end
131
+
132
+ Daru::Vector.new(values)
133
+ end
134
+
125
135
  def proportion value=1
126
136
  frequencies[value].quo(n_valid).to_f
127
137
  end
@@ -354,6 +364,225 @@ module Daru
354
364
  end
355
365
  end
356
366
 
367
+ # Performs the difference of the series.
368
+ # Note: The first difference of series is X(t) - X(t-1)
369
+ # But, second difference of series is NOT X(t) - X(t-2)
370
+ # It is the first difference of the first difference
371
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
372
+ #
373
+ # == Arguments
374
+ #
375
+ #* *max_lags*: integer, (default: 1), number of differences reqd.
376
+ #
377
+ # @example Using #diff
378
+ #
379
+ # ts = Daru::Vector.new((1..10).map { rand })
380
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
381
+ #
382
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
383
+ #
384
+ # @return [Daru::Vector]
385
+ def diff(max_lags = 1)
386
+ ts = self
387
+ difference = []
388
+ max_lags.times do
389
+ difference = ts - ts.lag
390
+ ts = difference
391
+ end
392
+ difference
393
+ end
394
+
395
+ # Calculate the rolling function for a loopback value.
396
+ #
397
+ # @param [Symbol] function The rolling function to be applied. Can be
398
+ # any function applicatble to Daru::Vector (:mean, :median, :count,
399
+ # :min, :max, etc.)
400
+ # @param [Integer] n (10) A non-negative value which serves as the loopback length.
401
+ # @return [Daru::Vector] Vector containin rolling calculations.
402
+ # @example Using #rolling
403
+ # ts = Daru::Vector.new((1..100).map { rand })
404
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
405
+ # # first 9 observations are nil
406
+ # ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
407
+ def rolling function, n=10
408
+ Daru::Vector.new(
409
+ [nil] * (n - 1) +
410
+ (0..(size - n)).map do |i|
411
+ Daru::Vector.new(@data[i...(i + n)]).send(function)
412
+ end, index: @index
413
+ )
414
+ end
415
+
416
+ # @!method rolling_mean
417
+ # Calculate rolling average
418
+ # @param [Integer] n (10) Loopback length
419
+ # @!method rolling_median
420
+ # Calculate rolling median
421
+ # @param [Integer] n (10) Loopback length
422
+ # @!method rolling_count
423
+ # Calculate rolling non-missing count
424
+ # @param [Integer] n (10) Loopback length
425
+ # @!method rolling_max
426
+ # Calculate rolling max value
427
+ # @param [Integer] n (10) Loopback length
428
+ # @!method rolling_min
429
+ # Calculate rolling min value
430
+ # @param [Integer] n (10) Loopback length
431
+ # @!method rolling_sum
432
+ # Calculate rolling sum
433
+ # @param [Integer] n (10) Loopback length
434
+ # @!method rolling_std
435
+ # Calculate rolling standard deviation
436
+ # @param [Integer] n (10) Loopback length
437
+ # @!method rolling_variance
438
+ # Calculate rolling variance
439
+ # @param [Integer] n (10) Loopback length
440
+ [:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
441
+ define_method("rolling_#{meth}".to_sym) do |n=10|
442
+ rolling(meth, n)
443
+ end
444
+ end
445
+
446
+ # Exponential Moving Average.
447
+ # Calculates an exponential moving average of the series using a
448
+ # specified parameter. If wilder is false (the default) then the EMA
449
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
450
+ # Welles Wilder smoother of 1 / n.
451
+ #
452
+ # Warning for EMA usage: EMAs are unstable for small series, as they
453
+ # use a lot more than n observations to calculate. The series is stable
454
+ # if the size of the series is >= 3.45 * (n + 1)
455
+ #
456
+ # @param [Integer] n (10) Loopback length.
457
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
458
+ # used for smoothing; if false, uses 2/(n+1) value
459
+ #
460
+ # @example Using ema
461
+ #
462
+ # ts = (1..100).map { rand }.to_ts
463
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
464
+ #
465
+ # # first 9 observations are nil
466
+ # ts.ema # => [ ... nil, 0.509... , 0.433..., ... ]
467
+ #
468
+ # @return [Daru::Vector] Contains EMA
469
+ def ema(n = 10, wilder = false)
470
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
471
+ # need to start everything from the first non-nil observation
472
+ start = @data.index { |i| i != nil }
473
+ # first n - 1 observations are nil
474
+ base = [nil] * (start + n - 1)
475
+ # nth observation is just a moving average
476
+ base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
477
+ (start + n).upto size - 1 do |i|
478
+ base << self[i] * smoother + (1 - smoother) * base.last
479
+ end
480
+
481
+ Daru::Vector.new(base, index: @index)
482
+ end
483
+
484
+ # Moving Average Convergence-Divergence.
485
+ # Calculates the MACD (moving average convergence-divergence) of the time
486
+ # series - this is a comparison of a fast EMA with a slow EMA.
487
+ #
488
+ # == Arguments
489
+ #* *fast*: integer, (default = 12) - fast component of MACD
490
+ #* *slow*: integer, (default = 26) - slow component of MACD
491
+ #* *signal*: integer, (default = 9) - signal component of MACD
492
+ #
493
+ # == Usage
494
+ #
495
+ # ts = Daru::Vector.new((1..100).map { rand })
496
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
497
+ # ts.macd(13)
498
+ #
499
+ # == Returns
500
+ #
501
+ # Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
502
+ # signal value
503
+ def macd(fast = 12, slow = 26, signal = 9)
504
+ series = ema(fast) - ema(slow)
505
+ [series, series.ema(signal)]
506
+ end
507
+
508
+ # Calculates the autocorrelation coefficients of the series.
509
+ #
510
+ # The first element is always 1, since that is the correlation
511
+ # of the series with itself.
512
+ #
513
+ # @example
514
+ # ts = Daru::Vector.new((1..100).map { rand })
515
+ #
516
+ # ts.acf # => array with first 21 autocorrelations
517
+ # ts.acf 3 # => array with first 3 autocorrelations
518
+ def acf(max_lags = nil)
519
+ max_lags ||= (10 * Math.log10(size)).to_i
520
+
521
+ (0..max_lags).map do |i|
522
+ if i == 0
523
+ 1.0
524
+ else
525
+ m = self.mean
526
+ # can't use Pearson coefficient since the mean for the lagged series should
527
+ # be the same as the regular series
528
+ ((self - m) * (self.lag(i) - m)).sum / self.variance_sample / (self.size - 1)
529
+ end
530
+ end
531
+ end
532
+
533
+ # Provides autocovariance.
534
+ #
535
+ # == Options
536
+ #
537
+ #* *:demean* = true; optional. Supply false if series is not to be demeaned
538
+ #* *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
539
+ #
540
+ # == Returns
541
+ #
542
+ # Autocovariance value
543
+ def acvf(demean = true, unbiased = true)
544
+ opts = {
545
+ demean: true,
546
+ unbaised: true
547
+ }.merge(opts)
548
+
549
+ demean = opts[:demean]
550
+ unbiased = opts[:unbiased]
551
+ if demean
552
+ demeaned_series = self - self.mean
553
+ else
554
+ demeaned_series = self
555
+ end
556
+
557
+ n = (10 * Math.log10(size)).to_i + 1
558
+ m = self.mean
559
+ if unbiased
560
+ d = Array.new(self.size, self.size)
561
+ else
562
+ d = ((1..self.size).to_a.reverse)[0..n]
563
+ end
564
+
565
+ 0.upto(n - 1).map do |i|
566
+ (demeaned_series * (self.lag(i) - m)).sum / d[i]
567
+ end
568
+ end
569
+
570
+ # Calculate cumulative sum of Vector
571
+ def cumsum
572
+ result = []
573
+ acc = 0
574
+ @data.each do |d|
575
+ if @missing_values.has_key?(d)
576
+ result << nil
577
+ else
578
+ acc += d
579
+ result << acc
580
+ end
581
+ end
582
+
583
+ Daru::Vector.new(result, index: @index)
584
+ end
585
+
357
586
  alias :sdp :standard_deviation_population
358
587
  alias :sds :standard_deviation_sample
359
588
  alias :std :sds
@@ -103,7 +103,7 @@ module Daru
103
103
  set_name name
104
104
 
105
105
  @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
106
- @index = create_index(index || @data.size)
106
+ @index = try_create_index(index || @data.size)
107
107
 
108
108
  if @index.size > @data.size
109
109
  cast(dtype: :array) # NM with nils seg faults
@@ -200,32 +200,22 @@ module Daru
200
200
  # # For vectors employing hierarchial multi index
201
201
  #
202
202
  def [](*indexes)
203
- indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
204
203
  location = indexes[0]
205
204
  if @index.is_a?(MultiIndex)
205
+ sub_index = @index[indexes]
206
206
  result =
207
- if location.is_a?(Integer)
208
- element_from_numeric_index(location)
209
- elsif location.is_a?(Range)
210
- arry = location.inject([]) do |memo, num|
211
- memo << element_from_numeric_index(num)
212
- memo
213
- end
214
-
215
- new_index = Daru::MultiIndex.new(@index.to_a[location])
216
- Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
207
+ if sub_index.is_a?(Integer)
208
+ @data[sub_index]
217
209
  else
218
- sub_index = @index[indexes]
210
+ elements = sub_index.map do |tuple|
211
+ @data[@index[tuple]]
212
+ end
219
213
 
220
- if sub_index.is_a?(Integer)
221
- element_from_numeric_index(sub_index)
222
- else
223
- elements = sub_index.map do |tuple|
224
- @data[@index[(indexes + tuple)]]
225
- end
226
- Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
227
- name: @name, dtype: @dtype)
214
+ if !indexes[0].is_a?(Range) and indexes.size < @index.width
215
+ sub_index = sub_index.drop_left_level indexes.size
228
216
  end
217
+ Daru::Vector.new(
218
+ elements, index: sub_index, name: @name, dtype: @dtype)
229
219
  end
230
220
 
231
221
  return result
@@ -233,23 +223,24 @@ module Daru
233
223
  unless indexes[1]
234
224
  case location
235
225
  when Range
236
- range =
237
- if location.first.is_a?(Numeric)
238
- location
226
+ first = location.first
227
+ last = location.last
228
+ indexes = @index.slice first, last
229
+ else
230
+ pos = @index[location]
231
+ if pos.is_a?(Numeric)
232
+ return @data[pos]
239
233
  else
240
- first = location.first
241
- last = location.last
242
-
243
- (first..last)
234
+ indexes = pos
244
235
  end
245
- indexes = @index[range]
246
- else
247
- return element_from_numeric_index(location)
248
236
  end
237
+ else
238
+ indexes = indexes.map { |e| named_index_for(e) }
249
239
  end
250
240
 
251
- Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
252
- index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
241
+ Daru::Vector.new(
242
+ indexes.map { |loc| @data[@index[loc]] },
243
+ name: @name, index: indexes, dtype: @dtype)
253
244
  end
254
245
  end
255
246
 
@@ -274,19 +265,13 @@ module Daru
274
265
  @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
275
266
  !value.nil?)
276
267
 
277
- pos =
278
- if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
279
- index_for location
280
- else
281
- index_for location[0]
282
- end
268
+ location = location[0] unless @index.is_a?(MultiIndex)
269
+ pos = @index[location]
283
270
 
284
- if pos.is_a?(MultiIndex)
285
- pos.each do |sub_tuple|
286
- self[*(location + sub_tuple)] = value
287
- end
288
- else
271
+ if pos.is_a?(Numeric)
289
272
  @data[pos] = value
273
+ else
274
+ pos.each { |tuple| self[tuple] = value }
290
275
  end
291
276
 
292
277
  set_size
@@ -334,16 +319,132 @@ module Daru
334
319
  self[index] == other[index]
335
320
  end
336
321
  else
337
- # TODO: Compare against some other obj (string, number, etc.)
322
+ super
323
+ end
324
+ end
325
+
326
+ # !@method eq
327
+ # Uses `==` and returns `true` for each **equal** entry
328
+ # @param [#==, Daru::Vector] If scalar object, compares it with each
329
+ # element in self. If Daru::Vector, compares elements with same indexes.
330
+ # @example (see #where)
331
+ # !@method not_eq
332
+ # Uses `!=` and returns `true` for each **unequal** entry
333
+ # @param [#!=, Daru::Vector] If scalar object, compares it with each
334
+ # element in self. If Daru::Vector, compares elements with same indexes.
335
+ # @example (see #where)
336
+ # !@method lt
337
+ # Uses `<` and returns `true` for each entry **less than** the supplied object
338
+ # @param [#<, Daru::Vector] If scalar object, compares it with each
339
+ # element in self. If Daru::Vector, compares elements with same indexes.
340
+ # @example (see #where)
341
+ # !@method lteq
342
+ # Uses `<=` and returns `true` for each entry **less than or equal to** the supplied object
343
+ # @param [#<=, Daru::Vector] If scalar object, compares it with each
344
+ # element in self. If Daru::Vector, compares elements with same indexes.
345
+ # @example (see #where)
346
+ # !@method mt
347
+ # Uses `>` and returns `true` for each entry **more than** the supplied object
348
+ # @param [#>, Daru::Vector] If scalar object, compares it with each
349
+ # element in self. If Daru::Vector, compares elements with same indexes.
350
+ # @example (see #where)
351
+ # !@method mteq
352
+ # Uses `>=` and returns `true` for each entry **more than or equal to** the supplied object
353
+ # @param [#>=, Daru::Vector] If scalar object, compares it with each
354
+ # element in self. If Daru::Vector, compares elements with same indexes.
355
+ # @example (see #where)
356
+
357
+ # Define the comparator methods with metaprogramming. See documentation
358
+ # written above for functionality of each method. Use these methods with the
359
+ # `where` method to obtain the corresponding Vector/DataFrame.
360
+ {
361
+ :eq => :==,
362
+ :not_eq => :!=,
363
+ :lt => :<,
364
+ :lteq => :<=,
365
+ :mt => :>,
366
+ :mteq => :>=,
367
+ }.each do |method, operator|
368
+ define_method(method) do |other|
369
+ mod = Daru::Core::Query
370
+ if other.is_a?(Daru::Vector)
371
+ mod.apply_vector_operator operator, self, other
372
+ else
373
+ mod.apply_scalar_operator operator, @data,other
374
+ end
338
375
  end
339
376
  end
377
+ alias :gt :mt
378
+ alias :gteq :mteq
379
+
380
+ # Comparator for checking if any of the elements in *other* exist in self.
381
+ #
382
+ # @param [Array, Daru::Vector] other A collection which has elements that
383
+ # need to be checked for in self.
384
+ # @example Usage of `in`.
385
+ # vector = Daru::Vector.new([1,2,3,4,5])
386
+ # vector.where(vector.in([3,5]))
387
+ # #=>
388
+ # ##<Daru::Vector:82215960 @name = nil @size = 2 >
389
+ # # nil
390
+ # # 2 3
391
+ # # 4 5
392
+ def in other
393
+ other = Hash[other.zip(Array.new(other.size, 0))]
394
+ Daru::Core::Query::BoolArray.new(
395
+ @data.inject([]) do |memo, d|
396
+ memo << (other.has_key?(d) ? true : false)
397
+ memo
398
+ end
399
+ )
400
+ end
401
+
402
+ # Return a new vector based on the contents of a boolean array. Use with the
403
+ # comparator methods to obtain meaningful results. See this notebook for
404
+ # a good overview of using #where.
405
+ #
406
+ # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
407
+ # collection containing the true of false values. Each element in the Vector
408
+ # corresponding to a `true` in the bool_arry will be returned alongwith it's
409
+ # index.
410
+ # @exmaple Usage of #where.
411
+ # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
412
+ #
413
+ # # Simple logic statement passed to #where.
414
+ # vector.where(vector.eq(5).or(vector.eq(1)))
415
+ # # =>
416
+ # ##<Daru::Vector:77626210 @name = nil @size = 7 >
417
+ # # nil
418
+ # # 2 5
419
+ # # 4 5
420
+ # # 7 5
421
+ # # 10 1
422
+ # # 11 5
423
+ # # 13 5
424
+ # # 15 1
425
+ #
426
+ # # A somewhat more complex logic statement
427
+ # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
428
+ # #=>
429
+ # ##<Daru::Vector:81072310 @name = nil @size = 7 >
430
+ # # nil
431
+ # # 2 5
432
+ # # 4 5
433
+ # # 7 5
434
+ # # 10 1
435
+ # # 11 5
436
+ # # 13 5
437
+ # # 15 1
438
+ def where bool_arry
439
+ Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, self.dtype
440
+ end
340
441
 
341
442
  def head q=10
342
443
  self[0..(q-1)]
343
444
  end
344
445
 
345
446
  def tail q=10
346
- self[(@size - q - 1)..(@size-1)]
447
+ self[(@size - q)..(@size-1)]
347
448
  end
348
449
 
349
450
  # Reports whether missing data is present in the Vector.
@@ -354,20 +455,12 @@ module Daru
354
455
 
355
456
 
356
457
  # Append an element to the vector by specifying the element and index
357
- def concat element, index=nil
458
+ def concat element, index
358
459
  raise IndexError, "Expected new unique index" if @index.include? index
359
460
 
360
- if index.nil? and @index.index_class == Integer
361
- @index = create_index(@size + 1)
362
- index = @size
363
- else
364
- begin
365
- @index = create_index(@index + index)
366
- rescue StandardError => e
367
- raise e, "Expected valid index."
368
- end
369
- end
461
+ @index = @index | [index]
370
462
  @data[@index[index]] = element
463
+
371
464
  set_size
372
465
  set_missing_positions unless Daru.lazy_update
373
466
  end
@@ -394,14 +487,8 @@ module Daru
394
487
 
395
488
  # Delete element by index
396
489
  def delete_at index
397
- idx = named_index_for index
398
- @data.delete_at @index[idx]
399
-
400
- if @index.index_class == Integer
401
- @index = Daru::Index.new @size-1
402
- else
403
- @index = Daru::Index.new (@index.to_a - [idx])
404
- end
490
+ @data.delete_at @index[index]
491
+ @index = Daru::Index.new(@index.to_a - [index])
405
492
 
406
493
  set_size
407
494
  set_missing_positions unless Daru.lazy_update
@@ -482,9 +569,9 @@ module Daru
482
569
 
483
570
  order = opts[:ascending] ? :ascending : :descending
484
571
  vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
485
- index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
572
+ index = Daru::Index.new index
486
573
 
487
- Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
574
+ Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
488
575
  end
489
576
 
490
577
  # Just sort the data and get an Array in return using Enumerable#sort.
@@ -515,9 +602,31 @@ module Daru
515
602
  self
516
603
  end
517
604
 
605
+ # Delete an element if block returns true. Destructive.
518
606
  def delete_if &block
519
607
  return to_enum(:delete_if) unless block_given?
520
608
 
609
+ keep_e = []
610
+ keep_i = []
611
+ each_with_index do |n, i|
612
+ unless yield(n)
613
+ keep_e << n
614
+ keep_i << i
615
+ end
616
+ end
617
+
618
+ @data = cast_vector_to @dtype, keep_e
619
+ @index = Daru::Index.new(keep_i)
620
+ set_missing_positions unless Daru.lazy_update
621
+ set_size
622
+
623
+ self
624
+ end
625
+
626
+ # Keep an element if block returns true. Destructive.
627
+ def keep_if &block
628
+ return to_enum(:keep_if) unless block_given?
629
+
521
630
  keep_e = []
522
631
  keep_i = []
523
632
  each_with_index do |n, i|
@@ -528,7 +637,7 @@ module Daru
528
637
  end
529
638
 
530
639
  @data = cast_vector_to @dtype, keep_e
531
- @index = @index.is_a?(MultiIndex) ? MultiIndex.new(keep_i) : Index.new(keep_i)
640
+ @index = Daru::Index.new(keep_i)
532
641
  set_missing_positions unless Daru.lazy_update
533
642
  set_size
534
643
 
@@ -664,10 +773,33 @@ module Daru
664
773
  self
665
774
  end
666
775
 
776
+ # Lags the series by k periods.
777
+ #
778
+ # The convention is to set the oldest observations (the first ones
779
+ # in the series) to nil so that the size of the lagged series is the
780
+ # same as the original.
781
+ #
782
+ # Usage:
783
+ #
784
+ # ts = Daru::Vector.new((1..10).map { rand })
785
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
786
+ #
787
+ # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
788
+ # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
789
+ def lag k=1
790
+ return self.dup if k == 0
791
+
792
+ dat = @data.to_a.dup
793
+ (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
794
+ (0...k).each { |i| dat[i] = nil }
795
+
796
+ Daru::Vector.new(dat, index: @index, name: @name)
797
+ end
798
+
667
799
  def detach_index
668
800
  Daru::DataFrame.new({
669
- index: @index.to_a.map(&:to_s),
670
- vector: @data.to_a
801
+ index: @index.to_a,
802
+ values: @data.to_a
671
803
  })
672
804
  end
673
805
 
@@ -826,13 +958,31 @@ module Daru
826
958
  content
827
959
  end
828
960
 
829
- # Create a new vector with a different index.
830
- #
831
- # @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
832
- # will reindex with sequential numbers from 0 to (n-1).
961
+ # Create a new vector with a different index, and preserve the indexing of
962
+ # current elements.
833
963
  def reindex new_index
834
- index = create_index(new_index == :seq ? @size : new_index)
835
- Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
964
+ vector = Daru::Vector.new([], index: new_index, name: @name)
965
+
966
+ new_index.each do |idx|
967
+ if @index.include?(idx)
968
+ vector[idx] = self[idx]
969
+ else
970
+ vector[idx] = nil
971
+ end
972
+ end
973
+
974
+ vector
975
+ end
976
+
977
+ def index= idx
978
+ raise ArgumentError,
979
+ "Size of supplied index #{index.size} does not match size of DataFrame" if
980
+ idx.size != self.size
981
+ raise ArgumentError, "Can only assign type Index and its subclasses." unless
982
+ idx.kind_of?(Daru::Index)
983
+
984
+ @index = idx
985
+ self
836
986
  end
837
987
 
838
988
  # Give the vector a new name
@@ -844,7 +994,7 @@ module Daru
844
994
  return
845
995
  end
846
996
 
847
- @name = new_name.to_sym
997
+ @name = new_name
848
998
  end
849
999
 
850
1000
  # Duplicate elements and indexes
@@ -1166,8 +1316,8 @@ module Daru
1166
1316
  def set_name name
1167
1317
  @name =
1168
1318
  if name.is_a?(Numeric) then name
1169
- elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
1170
- elsif name then name.to_sym # anything but Numeric or nil
1319
+ elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
1320
+ elsif name then name # anything but Numeric or nil
1171
1321
  else
1172
1322
  nil
1173
1323
  end
@@ -1180,7 +1330,7 @@ module Daru
1180
1330
  end
1181
1331
  end
1182
1332
 
1183
- def create_index potential_index
1333
+ def try_create_index potential_index
1184
1334
  if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
1185
1335
  potential_index
1186
1336
  else