daru 0.1.5 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +21 -7
  5. data/.travis.yml +10 -5
  6. data/CONTRIBUTING.md +15 -10
  7. data/History.md +124 -2
  8. data/README.md +37 -9
  9. data/ReleasePolicy.md +20 -0
  10. data/benchmarks/db_loading.rb +34 -0
  11. data/benchmarks/statistics.rb +6 -6
  12. data/benchmarks/where_clause.rb +1 -1
  13. data/benchmarks/where_vs_filter.rb +1 -1
  14. data/daru.gemspec +17 -41
  15. data/lib/daru.rb +10 -13
  16. data/lib/daru/accessors/gsl_wrapper.rb +1 -1
  17. data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
  18. data/lib/daru/category.rb +29 -15
  19. data/lib/daru/configuration.rb +34 -0
  20. data/lib/daru/core/group_by.rb +158 -77
  21. data/lib/daru/core/merge.rb +12 -3
  22. data/lib/daru/core/query.rb +20 -4
  23. data/lib/daru/dataframe.rb +692 -118
  24. data/lib/daru/date_time/index.rb +14 -11
  25. data/lib/daru/date_time/offsets.rb +9 -1
  26. data/lib/daru/extensions/which_dsl.rb +55 -0
  27. data/lib/daru/formatters/table.rb +3 -5
  28. data/lib/daru/index/categorical_index.rb +4 -4
  29. data/lib/daru/index/index.rb +131 -42
  30. data/lib/daru/index/multi_index.rb +118 -10
  31. data/lib/daru/io/csv/converters.rb +21 -0
  32. data/lib/daru/io/io.rb +105 -33
  33. data/lib/daru/io/sql_data_source.rb +10 -0
  34. data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
  35. data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
  36. data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  37. data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  38. data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
  39. data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
  40. data/lib/daru/iruby/templates/vector.html.erb +3 -25
  41. data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
  42. data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
  43. data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
  44. data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
  45. data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
  46. data/lib/daru/maths/arithmetic/vector.rb +38 -2
  47. data/lib/daru/maths/statistics/dataframe.rb +28 -30
  48. data/lib/daru/maths/statistics/vector.rb +295 -41
  49. data/lib/daru/plotting/gruff/dataframe.rb +13 -15
  50. data/lib/daru/plotting/nyaplot/category.rb +1 -1
  51. data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
  52. data/lib/daru/plotting/nyaplot/vector.rb +1 -2
  53. data/lib/daru/vector.rb +308 -96
  54. data/lib/daru/version.rb +1 -1
  55. data/profile/vector_new.rb +9 -0
  56. data/spec/accessors/gsl_wrapper_spec.rb +38 -35
  57. data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
  58. data/spec/category_spec.rb +24 -20
  59. data/spec/core/group_by_spec.rb +238 -4
  60. data/spec/core/merge_spec.rb +1 -1
  61. data/spec/core/query_spec.rb +65 -50
  62. data/spec/daru_spec.rb +22 -0
  63. data/spec/dataframe_spec.rb +473 -16
  64. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  65. data/spec/date_time/index_spec.rb +34 -16
  66. data/spec/date_time/offsets_spec.rb +14 -0
  67. data/spec/extensions/rserve_spec.rb +1 -1
  68. data/spec/extensions/which_dsl_spec.rb +38 -0
  69. data/spec/fixtures/boolean_converter_test.csv +5 -0
  70. data/spec/fixtures/duplicates.csv +32 -0
  71. data/spec/fixtures/eciresults.html +394 -0
  72. data/spec/fixtures/empty_rows_test.csv +17 -0
  73. data/spec/fixtures/macau.html +3691 -0
  74. data/spec/fixtures/macd_data.csv +150 -0
  75. data/spec/fixtures/matrix_test.csv +55 -55
  76. data/spec/fixtures/moneycontrol.html +6812 -0
  77. data/spec/fixtures/string_converter_test.csv +5 -0
  78. data/spec/fixtures/test_xls.xls +0 -0
  79. data/spec/fixtures/test_xls_2.xls +0 -0
  80. data/spec/fixtures/url_test.txt~ +0 -0
  81. data/spec/fixtures/valid_markup.html +62 -0
  82. data/spec/fixtures/wiki_climate.html +1243 -0
  83. data/spec/fixtures/wiki_table_info.html +631 -0
  84. data/spec/formatters/table_formatter_spec.rb +29 -0
  85. data/spec/index/categorical_index_spec.rb +33 -33
  86. data/spec/index/index_spec.rb +160 -41
  87. data/spec/index/multi_index_spec.rb +143 -33
  88. data/spec/io/io_spec.rb +246 -2
  89. data/spec/io/sql_data_source_spec.rb +31 -41
  90. data/spec/iruby/dataframe_spec.rb +17 -19
  91. data/spec/iruby/vector_spec.rb +26 -28
  92. data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
  93. data/spec/maths/arithmetic/vector_spec.rb +18 -0
  94. data/spec/maths/statistics/vector_spec.rb +153 -15
  95. data/spec/plotting/gruff/category_spec.rb +3 -3
  96. data/spec/plotting/gruff/dataframe_spec.rb +14 -4
  97. data/spec/plotting/gruff/vector_spec.rb +9 -9
  98. data/spec/plotting/nyaplot/category_spec.rb +5 -9
  99. data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
  100. data/spec/plotting/nyaplot/vector_spec.rb +5 -11
  101. data/spec/shared/vector_display_spec.rb +12 -14
  102. data/spec/spec_helper.rb +30 -7
  103. data/spec/support/matchers.rb +5 -0
  104. data/spec/vector_spec.rb +306 -72
  105. metadata +96 -55
  106. data/spec/fixtures/stock_data.csv +0 -500
@@ -7,12 +7,12 @@ module Daru
7
7
  size = opts[:size] || 500
8
8
  x = extract_x_vector opts[:x]
9
9
  y = extract_y_vectors opts[:y]
10
- if opts[:categorized]
11
- return plot_with_category(size, type, x, y, opts[:categorized])
12
- end
10
+ type = process_type type, opts[:categorized]
13
11
  case type
14
12
  when :line, :bar, :scatter
15
13
  plot = send("#{type}_plot", size, x, y)
14
+ when :scatter_categorized
15
+ plot = scatter_with_category(size, x, y, opts[:categorized])
16
16
  # TODO: hist, box
17
17
  # It turns out hist and box are not supported in Gruff yet
18
18
  else
@@ -24,6 +24,10 @@ module Daru
24
24
 
25
25
  private
26
26
 
27
+ def process_type type, categorized
28
+ type == :scatter && categorized ? :scatter_categorized : type
29
+ end
30
+
27
31
  def line_plot size, x, y
28
32
  plot = Gruff::Line.new size
29
33
  plot.labels = size.times.to_a.zip(x).to_h
@@ -50,21 +54,15 @@ module Daru
50
54
  plot
51
55
  end
52
56
 
53
- def plot_with_category size, type, x, y, opts
57
+ def scatter_with_category size, x, y, opts
54
58
  x = Daru::Vector.new x
55
59
  y = y.first
56
- case type
57
- when :scatter
58
- plot = Gruff::Scatter.new size
59
- cat_dv = self[opts[:by]]
60
- cat_dv.categories.each do |cat|
61
- bools = cat_dv.eq cat
62
- plot.data cat, x.where(bools).to_a, y.where(bools).to_a
63
- end
64
- else
65
- raise ArgumentError, "Type #{type} is not supported."
60
+ plot = Gruff::Scatter.new size
61
+ cat_dv = self[opts[:by]]
62
+ cat_dv.categories.each do |cat|
63
+ bools = cat_dv.eq cat
64
+ plot.data cat, x.where(bools).to_a, y.where(bools).to_a
66
65
  end
67
- yield plot if block_given?
68
66
  plot
69
67
  end
70
68
 
@@ -12,7 +12,7 @@ module Daru
12
12
  # Set yrange for good view
13
13
  set_yrange plot, opts[:method]
14
14
  yield plot, diagram if block_given?
15
- plot.show
15
+ plot
16
16
  else
17
17
  raise ArgumentError, "#{type} type is not supported."
18
18
  end
@@ -21,6 +21,8 @@ module Daru
21
21
  # df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
22
22
  # df.plot type: :bar, x: :a, y: :b
23
23
  def plot opts={}, &block
24
+ index_as_default_x_axis(opts) unless x_axis_defined?(opts)
25
+
24
26
  if opts[:categorized]
25
27
  plot_with_category(opts, &block)
26
28
  else
@@ -30,6 +32,15 @@ module Daru
30
32
 
31
33
  private
32
34
 
35
+ def x_axis_defined?(opts)
36
+ opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
37
+ end
38
+
39
+ def index_as_default_x_axis(opts)
40
+ opts[:x] = :_index
41
+ self[:_index] = @index.to_a
42
+ end
43
+
33
44
  def plot_without_category opts
34
45
  options = {type: :scatter}.merge(opts)
35
46
 
@@ -38,7 +49,7 @@ module Daru
38
49
 
39
50
  diagram =
40
51
  case
41
- when !([:scatter, :bar, :line, :histogram] & types).empty?
52
+ when !(%i[scatter bar line histogram] & types).empty?
42
53
  plot_regular_diagrams plot, opts
43
54
  when types.include?(:box)
44
55
  plot_box_diagram plot
@@ -48,7 +59,7 @@ module Daru
48
59
 
49
60
  yield(plot, diagram) if block_given?
50
61
 
51
- plot.show
62
+ plot
52
63
  end
53
64
 
54
65
  def plot_with_category opts
@@ -66,8 +77,8 @@ module Daru
66
77
 
67
78
  plot.legend true
68
79
  yield plot, *diagrams if block_given?
69
- plot.show
70
80
 
81
+ plot
71
82
  else
72
83
  raise ArgumentError, "Unsupported type #{type}"
73
84
  end
@@ -102,7 +113,7 @@ module Daru
102
113
  end
103
114
  end
104
115
 
105
- SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
116
+ SHAPES = %w[circle triangle-up diamond square triangle-down cross].freeze
106
117
  def get_shape type
107
118
  validate_type type, :scatter
108
119
  SHAPES.cycle
@@ -26,8 +26,7 @@ module Daru
26
26
  diagram = create_diagram plot, options[:type], x_axis
27
27
 
28
28
  yield plot, diagram if block_given?
29
-
30
- plot.show
29
+ plot
31
30
  end
32
31
 
33
32
  private
@@ -122,6 +122,17 @@ module Daru
122
122
  self
123
123
  end
124
124
 
125
+ def apply_method(method, keys: nil, by_position: true)
126
+ vect = keys ? get_sub_vector(keys, by_position: by_position) : self
127
+
128
+ case method
129
+ when Symbol then vect.send(method)
130
+ when Proc then method.call(vect)
131
+ else raise
132
+ end
133
+ end
134
+ alias :apply_method_on_sub_vector :apply_method
135
+
125
136
  # The name of the Daru::Vector. String.
126
137
  attr_reader :name
127
138
  # The row index. Can be either Daru::Index or Daru::MultiIndex.
@@ -140,8 +151,6 @@ module Daru
140
151
  attr_accessor :labels
141
152
  # Store vector data in an array
142
153
  attr_reader :data
143
- # Ploting library being used for this vector
144
- attr_reader :plotting_library
145
154
  # TODO: Make private.
146
155
  attr_reader :nil_positions, :nan_positions
147
156
 
@@ -186,6 +195,13 @@ module Daru
186
195
  end
187
196
  end
188
197
 
198
+ # attr_reader for :plotting_library
199
+ def plotting_library
200
+ init_plotting_library
201
+
202
+ @plotting_library
203
+ end
204
+
189
205
  def plotting_library= lib
190
206
  case lib
191
207
  when :gruff, :nyaplot
@@ -196,11 +212,18 @@ module Daru
196
212
  )
197
213
  end
198
214
  else
199
- raise ArguementError, "Plotting library #{lib} not supported. "\
215
+ raise ArgumentError, "Plotting library #{lib} not supported. "\
200
216
  'Supported libraries are :nyaplot and :gruff'
201
217
  end
202
218
  end
203
219
 
220
+ # this method is overwritten: see Daru::Vector#plotting_library=
221
+ def plot(*args, **options, &b)
222
+ init_plotting_library
223
+
224
+ plot(*args, **options, &b)
225
+ end
226
+
204
227
  # Get one or more elements with specified index or a range.
205
228
  #
206
229
  # == Usage
@@ -228,7 +251,7 @@ module Daru
228
251
  end
229
252
 
230
253
  # Returns vector of values given positional values
231
- # @param [Array<object>] *positions positional values
254
+ # @param positions [Array<object>] positional values
232
255
  # @return [object] vector
233
256
  # @example
234
257
  # dv = Daru::Vector.new 'a'..'e'
@@ -252,7 +275,7 @@ module Daru
252
275
  end
253
276
 
254
277
  # Change value at given positions
255
- # @param [Array<object>] *positions positional values
278
+ # @param positions [Array<object>] positional values
256
279
  # @param [object] val value to assign
257
280
  # @example
258
281
  # dv = Daru::Vector.new 'a'..'e'
@@ -352,7 +375,7 @@ module Daru
352
375
  if other.is_a?(Daru::Vector)
353
376
  mod.apply_vector_operator operator, self, other
354
377
  else
355
- mod.apply_scalar_operator operator, @data,other
378
+ mod.apply_scalar_operator operator, @data, other
356
379
  end
357
380
  end
358
381
  alias_method operator, method if operator != :== && operator != :!=
@@ -385,11 +408,11 @@ module Daru
385
408
  # comparator methods to obtain meaningful results. See this notebook for
386
409
  # a good overview of using #where.
387
410
  #
388
- # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
411
+ # @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
389
412
  # collection containing the true of false values. Each element in the Vector
390
413
  # corresponding to a `true` in the bool_arry will be returned alongwith it's
391
414
  # index.
392
- # @exmaple Usage of #where.
415
+ # @example Usage of #where.
393
416
  # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
394
417
  #
395
418
  # # Simple logic statement passed to #where.
@@ -421,6 +444,27 @@ module Daru
421
444
  Daru::Core::Query.vector_where self, bool_array
422
445
  end
423
446
 
447
+ # Return a new vector based on the contents of a boolean array and &block.
448
+ #
449
+ # @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
450
+ # collection containing the true of false values. Each element in the Vector
451
+ # corresponding to a `true` in the bool_array will be returned along with it's
452
+ # index. The &block may contain manipulative functions for the Vector elements.
453
+ #
454
+ # @return [Daru::Vector]
455
+ #
456
+ # @example Usage of #apply_where.
457
+ # dv = Daru::Vector.new ['3 days', '5 weeks', '2 weeks']
458
+ # dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
459
+ # # =>
460
+ # ##<Daru::Vector(3)>
461
+ # # 0 3 days
462
+ # # 1 35 days
463
+ # # 2 14 days
464
+ def apply_where bool_array, &block
465
+ Daru::Core::Query.vector_apply_where self, bool_array, &block
466
+ end
467
+
424
468
  def head q=10
425
469
  self[0..(q-1)]
426
470
  end
@@ -430,6 +474,11 @@ module Daru
430
474
  self[start..(size-1)]
431
475
  end
432
476
 
477
+ def last q=1
478
+ # The Enumerable mixin dose not provide the last method.
479
+ tail(q)
480
+ end
481
+
433
482
  def empty?
434
483
  @index.empty?
435
484
  end
@@ -451,7 +500,7 @@ module Daru
451
500
  deprecate :flawed?, :include_values?, 2016, 10
452
501
 
453
502
  # Check if any one of mentioned values occur in the vector
454
- # @param [Array] *values values to check for
503
+ # @param values [Array] values to check for
455
504
  # @return [true, false] returns true if any one of specified values
456
505
  # occur in the vector
457
506
  # @example
@@ -462,6 +511,26 @@ module Daru
462
511
  values.any? { |v| include_with_nan? @data, v }
463
512
  end
464
513
 
514
+ # @note Do not use it to check for Float::NAN as
515
+ # Float::NAN == Float::NAN is false
516
+ # Return vector of booleans with value at ith position is either
517
+ # true or false depending upon whether value at position i is equal to
518
+ # any of the values passed in the argument or not
519
+ # @param values [Array] values to equate with
520
+ # @return [Daru::Vector] vector of boolean values
521
+ # @example
522
+ # dv = Daru::Vector.new [1, 2, 3, 2, 1]
523
+ # dv.is_values 1, 2
524
+ # # => #<Daru::Vector(5)>
525
+ # # 0 true
526
+ # # 1 true
527
+ # # 2 false
528
+ # # 3 true
529
+ # # 4 true
530
+ def is_values(*values)
531
+ Daru::Vector.new values.map { |v| eq(v) }.inject(:|)
532
+ end
533
+
465
534
  # Append an element to the vector by specifying the element and index
466
535
  def concat element, index
467
536
  raise IndexError, 'Expected new unique index' if @index.include? index
@@ -481,8 +550,7 @@ module Daru
481
550
  # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
482
551
  def cast opts={}
483
552
  dt = opts[:dtype]
484
- raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
485
- dt == :array || dt == :nmatrix || dt == :gsl
553
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless %i[array nmatrix gsl].include?(dt)
486
554
 
487
555
  @data = cast_vector_to dt unless @dtype == dt
488
556
  end
@@ -535,7 +603,7 @@ module Daru
535
603
  # Get index of element
536
604
  def index_of element
537
605
  case dtype
538
- when :array then @index.key @data.index { |x| x.eql? element }
606
+ when :array then @index.key(@data.index { |x| x.eql? element })
539
607
  else @index.key @data.index(element)
540
608
  end
541
609
  end
@@ -583,6 +651,31 @@ module Daru
583
651
  Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
584
652
  end
585
653
 
654
+ # Sorts the vector according to it's`Index` values. Defaults to ascending
655
+ # order sorting.
656
+ #
657
+ # @param [Hash] opts the options for sort_by_index method.
658
+ # @option opts [Boolean] :ascending false, will sort `index` in
659
+ # descending order.
660
+ #
661
+ # @return [Vector] new sorted `Vector` according to the index values.
662
+ #
663
+ # @example
664
+ #
665
+ # dv = Daru::Vector.new [11, 13, 12], index: [23, 21, 22]
666
+ # # Say you want to sort index in ascending order
667
+ # dv.sort_by_index(ascending: true)
668
+ # #=> Daru::Vector.new [13, 12, 11], index: [21, 22, 23]
669
+ # # Say you want to sort index in descending order
670
+ # dv.sort_by_index(ascending: false)
671
+ # #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21]
672
+ def sort_by_index opts={}
673
+ opts = {ascending: true}.merge(opts)
674
+ _, new_order = resort_index(@index.each_with_index, opts).transpose
675
+
676
+ reorder new_order
677
+ end
678
+
586
679
  DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
587
680
  case
588
681
  when lv.nil? && rv.nil?
@@ -624,7 +717,7 @@ module Daru
624
717
  def delete_if
625
718
  return to_enum(:delete_if) unless block_given?
626
719
 
627
- keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
720
+ keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
628
721
 
629
722
  @data = cast_vector_to @dtype, keep_e
630
723
  @index = Daru::Index.new(keep_i)
@@ -702,31 +795,6 @@ module Daru
702
795
  self
703
796
  end
704
797
 
705
- # Returns a vector which has *true* in the position where the element in self
706
- # is nil, and false otherwise.
707
- #
708
- # == Usage
709
- #
710
- # v = Daru::Vector.new([1,2,4,nil])
711
- # v.is_nil?
712
- # # =>
713
- # #<Daru::Vector:89421000 @name = nil @size = 4 >
714
- # # nil
715
- # # 0 false
716
- # # 1 false
717
- # # 2 false
718
- # # 3 true
719
- #
720
- def is_nil?
721
- # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
722
- recode(&:nil?)
723
- end
724
-
725
- # Opposite of #is_nil?
726
- def not_nil?
727
- recode { |v| !v.nil? }
728
- end
729
-
730
798
  # Replace all nils in the vector with the value passed as an argument. Destructive.
731
799
  # See #replace_nils for non-destructive version
732
800
  #
@@ -741,27 +809,81 @@ module Daru
741
809
  self
742
810
  end
743
811
 
744
- # Lags the series by k periods.
812
+ # Rolling fillna
813
+ # replace all Float::NAN and NIL values with the preceeding or following value
745
814
  #
746
- # The convention is to set the oldest observations (the first ones
747
- # in the series) to nil so that the size of the lagged series is the
748
- # same as the original.
815
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
749
816
  #
750
- # Usage:
817
+ # @example
818
+ # dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
751
819
  #
752
- # ts = Daru::Vector.new((1..10).map { rand })
753
- # # => [0.69, 0.23, 0.44, 0.71, ...]
820
+ # 2.3.3 :068 > dv.rolling_fillna(:forward)
821
+ # => #<Daru::Vector(9)>
822
+ # 0 1
823
+ # 1 2
824
+ # 2 1
825
+ # 3 4
826
+ # 4 4
827
+ # 5 4
828
+ # 6 3
829
+ # 7 3
830
+ # 8 3
754
831
  #
755
- # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
756
- # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
757
- def lag k=1
758
- return dup if k.zero?
832
+ def rolling_fillna!(direction=:forward)
833
+ enum = direction == :forward ? index : index.reverse_each
834
+ last_valid_value = 0
835
+ enum.each do |idx|
836
+ if valid_value?(self[idx])
837
+ last_valid_value = self[idx]
838
+ else
839
+ self[idx] = last_valid_value
840
+ end
841
+ end
842
+ self
843
+ end
759
844
 
760
- dat = @data.to_a.dup
761
- (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
762
- (0...k).each { |i| dat[i] = nil }
845
+ # Non-destructive version of rolling_fillna!
846
+ def rolling_fillna(direction=:forward)
847
+ dup.rolling_fillna!(direction)
848
+ end
763
849
 
764
- Daru::Vector.new(dat, index: @index, name: @name)
850
+ # Lags the series by `k` periods.
851
+ #
852
+ # Lags the series by `k` periods, "shifting" data and inserting `nil`s
853
+ # from beginning or end of a vector, while preserving original vector's
854
+ # size.
855
+ #
856
+ # `k` can be positive or negative integer. If `k` is positive, `nil`s
857
+ # are inserted at the beginning of the vector, otherwise they are
858
+ # inserted at the end.
859
+ #
860
+ # @param [Integer] k "shift" the series by `k` periods. `k` can be
861
+ # positive or negative. (default = 1)
862
+ #
863
+ # @return [Daru::Vector] a new vector with "shifted" inital values
864
+ # and `nil` values inserted. The return vector is the same length
865
+ # as the orignal vector.
866
+ #
867
+ # @example Lag a vector with different periods `k`
868
+ #
869
+ # ts = Daru::Vector.new(1..5)
870
+ # # => [1, 2, 3, 4, 5]
871
+ #
872
+ # ts.lag # => [nil, 1, 2, 3, 4]
873
+ # ts.lag(1) # => [nil, 1, 2, 3, 4]
874
+ # ts.lag(2) # => [nil, nil, 1, 2, 3]
875
+ # ts.lag(-1) # => [2, 3, 4, 5, nil]
876
+ #
877
+ def lag k=1
878
+ case k
879
+ when 0 then dup
880
+ when 1...size
881
+ copy([nil] * k + data.to_a)
882
+ when -size..-1
883
+ copy(data.to_a[k.abs...size])
884
+ else
885
+ copy([])
886
+ end
765
887
  end
766
888
 
767
889
  def detach_index
@@ -783,7 +905,7 @@ module Daru
783
905
  deprecate :n_valid, :count_values, 2016, 10
784
906
 
785
907
  # Count the number of values specified
786
- # @param [Array] *values values to count for
908
+ # @param values [Array] values to count for
787
909
  # @return [Integer] the number of times the values mentioned occurs
788
910
  # @example
789
911
  # dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
@@ -798,6 +920,19 @@ module Daru
798
920
  @index.include? index
799
921
  end
800
922
 
923
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
924
+ # @return [Daru::Vector]
925
+ def get_sub_vector(keys, by_position: true)
926
+ return Daru::Vector.new([]) if keys == []
927
+
928
+ keys = @index.pos(*keys) unless by_position
929
+
930
+ sub_vect = at(*keys)
931
+ sub_vect = Daru::Vector.new([sub_vect]) unless sub_vect.is_a?(Daru::Vector)
932
+
933
+ sub_vect
934
+ end
935
+
801
936
  # @return [Daru::DataFrame] the vector as a single-vector dataframe
802
937
  def to_df
803
938
  Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
@@ -871,7 +1006,9 @@ module Daru
871
1006
  end
872
1007
 
873
1008
  # Convert to html for iruby
874
- def to_html threshold=30
1009
+ def to_html(threshold=30)
1010
+ table_thead = to_html_thead
1011
+ table_tbody = to_html_tbody(threshold)
875
1012
  path = if index.is_a?(MultiIndex)
876
1013
  File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
877
1014
  else
@@ -880,51 +1017,97 @@ module Daru
880
1017
  ERB.new(File.read(path).strip).result(binding)
881
1018
  end
882
1019
 
1020
+ def to_html_thead
1021
+ table_thead_path =
1022
+ if index.is_a?(MultiIndex)
1023
+ File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__)
1024
+ else
1025
+ File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__)
1026
+ end
1027
+ ERB.new(File.read(table_thead_path).strip).result(binding)
1028
+ end
1029
+
1030
+ def to_html_tbody(threshold=30)
1031
+ table_tbody_path =
1032
+ if index.is_a?(MultiIndex)
1033
+ File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__)
1034
+ else
1035
+ File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__)
1036
+ end
1037
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
1038
+ end
1039
+
883
1040
  def to_s
884
- to_html
1041
+ "#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>"
885
1042
  end
886
1043
 
887
- # Create a summary of the Vector using Report Builder.
888
- def summary(method=:to_text)
889
- ReportBuilder.new(no_title: true).add(self).send(method)
1044
+ # Create a summary of the Vector
1045
+ # @param indent_level [Fixnum] indent level
1046
+ # @return [String] String containing the summary of the Vector
1047
+ # @example
1048
+ # dv = Daru::Vector.new [1, 2, 3]
1049
+ # puts dv.summary
1050
+ #
1051
+ # # =
1052
+ # # n :3
1053
+ # # non-missing:3
1054
+ # # median: 2
1055
+ # # mean: 2.0000
1056
+ # # std.dev.: 1.0000
1057
+ # # std.err.: 0.5774
1058
+ # # skew: 0.0000
1059
+ # # kurtosis: -2.3333
1060
+ def summary(indent_level=0)
1061
+ non_missing = size - count_values(*Daru::MISSING_VALUES)
1062
+ summary = ' =' * indent_level + "= #{name}" \
1063
+ "\n n :#{size}" \
1064
+ "\n non-missing:#{non_missing}"
1065
+ case type
1066
+ when :object
1067
+ summary << object_summary
1068
+ when :numeric
1069
+ summary << numeric_summary
1070
+ end
1071
+ summary.split("\n").join("\n" + ' ' * indent_level)
890
1072
  end
891
1073
 
892
- # :nocov:
893
- def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
894
- b.section(name: name) do |s|
895
- s.text "n :#{size}"
896
- s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
897
- if @type == :object
898
- s.text "factors: #{factors.to_a.join(',')}"
899
- s.text "mode: #{mode}"
900
-
901
- s.table(name: 'Distribution') do |t|
902
- frequencies.sort_by(&:to_s).each do |k,v|
903
- key = @index.include?(k) ? @index[k] : k
904
- t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
905
- end
906
- end
907
- end
1074
+ # Displays summary for an object type Vector
1075
+ # @return [String] String containing object vector summary
1076
+ def object_summary
1077
+ nval = count_values(*Daru::MISSING_VALUES)
1078
+ summary = "\n factors: #{factors.to_a.join(',')}" \
1079
+ "\n mode: #{mode.to_a.join(',')}" \
1080
+ "\n Distribution\n"
908
1081
 
909
- s.text "median: #{median}" if @type==:numeric || @type==:numeric
910
- if @type==:numeric
911
- s.text 'mean: %0.4f' % mean
912
- if sd
913
- s.text 'std.dev.: %0.4f' % sd
914
- s.text 'std.err.: %0.4f' % se
915
- s.text 'skew: %0.4f' % skew
916
- s.text 'kurtosis: %0.4f' % kurtosis
917
- end
918
- end
1082
+ data = frequencies.sort.each_with_index.map do |v, k|
1083
+ [k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)]
919
1084
  end
1085
+
1086
+ summary + Formatters::Table.format(data)
1087
+ end
1088
+
1089
+ # Displays summary for an numeric type Vector
1090
+ # @return [String] String containing numeric vector summary
1091
+ def numeric_summary
1092
+ summary = "\n median: #{median}" +
1093
+ "\n mean: %0.4f" % mean
1094
+ if sd
1095
+ summary << "\n std.dev.: %0.4f" % sd +
1096
+ "\n std.err.: %0.4f" % se
1097
+ end
1098
+
1099
+ if count_values(*Daru::MISSING_VALUES).zero?
1100
+ summary << "\n skew: %0.4f" % skew +
1101
+ "\n kurtosis: %0.4f" % kurtosis
1102
+ end
1103
+ summary
920
1104
  end
921
- # :nocov:
922
1105
 
923
1106
  # Over rides original inspect for pretty printing in irb
924
1107
  def inspect spacing=20, threshold=15
925
1108
  row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
926
1109
 
927
- "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
1110
+ "#<#{self.class}(#{size})#{':category' if category?}>\n" +
928
1111
  Formatters::Table.format(
929
1112
  to_a.lazy.map { |v| [v] },
930
1113
  headers: @name && [@name],
@@ -1095,14 +1278,30 @@ module Daru
1095
1278
  Daru::DataFrame.new ps
1096
1279
  end
1097
1280
 
1281
+ # Returns an array of either none or integer values, indicating the
1282
+ # +regexp+ matching with the given array.
1283
+ #
1284
+ # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
1285
+ #
1286
+ # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
1287
+ #
1288
+ # @example
1289
+ # dv = Daru::Vector.new(['3 days', '5 weeks', '2 weeks'])
1290
+ # dv.match(/weeks/)
1291
+ #
1292
+ # # => [false, true, true]
1293
+ def match(regexp)
1294
+ @data.map { |value| !!(value =~ regexp) }
1295
+ end
1296
+
1098
1297
  # Creates a new vector consisting only of non-nil data
1099
1298
  #
1100
1299
  # == Arguments
1101
1300
  #
1102
- # @as_a [Symbol] Passing :array will return only the elements
1301
+ # @param as_a [Symbol] Passing :array will return only the elements
1103
1302
  # as an Array. Otherwise will return a Daru::Vector.
1104
1303
  #
1105
- # @duplicate [Symbol] In case no missing data is found in the
1304
+ # @param _duplicate [Symbol] In case no missing data is found in the
1106
1305
  # vector, setting this to false will return the same vector.
1107
1306
  # Otherwise, a duplicate will be returned irrespective of
1108
1307
  # presence of missing data.
@@ -1124,7 +1323,7 @@ module Daru
1124
1323
  deprecate :only_valid, :reject_values, 2016, 10
1125
1324
 
1126
1325
  # Return a vector with specified values removed
1127
- # @param [Array] *values values to reject from resultant vector
1326
+ # @param values [Array] values to reject from resultant vector
1128
1327
  # @return [Daru::Vector] vector with specified values removed
1129
1328
  # @example
1130
1329
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN]
@@ -1146,7 +1345,7 @@ module Daru
1146
1345
  end
1147
1346
 
1148
1347
  # Return indexes of values specified
1149
- # @param [Array] *values values to find indexes for
1348
+ # @param values [Array] values to find indexes for
1150
1349
  # @return [Array] array of indexes of values specified
1151
1350
  # @example
1152
1351
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
@@ -1336,6 +1535,17 @@ module Daru
1336
1535
 
1337
1536
  private
1338
1537
 
1538
+ # Will lazily load the plotting library being used for this vector
1539
+ def init_plotting_library
1540
+ self.plotting_library = Daru.plotting_library
1541
+ end
1542
+
1543
+ def copy(values)
1544
+ # Make sure values is right-justified to the size of the vector
1545
+ values.concat([nil] * (size-values.size)) if values.size < size
1546
+ Daru::Vector.new(values[0...size], index: @index, name: @name)
1547
+ end
1548
+
1339
1549
  def nil_positions
1340
1550
  @nil_positions ||
1341
1551
  @nil_positions = size.times.select { |i| @data[i].nil? }
@@ -1348,6 +1558,11 @@ module Daru
1348
1558
  end
1349
1559
  end
1350
1560
 
1561
+ # Helper method returning validity of arbitrary value
1562
+ def valid_value?(v)
1563
+ v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
1564
+ end
1565
+
1351
1566
  def initialize_vector source, opts
1352
1567
  index, source = parse_source(source, opts)
1353
1568
  set_name opts[:name]
@@ -1358,8 +1573,6 @@ module Daru
1358
1573
  guard_sizes!
1359
1574
 
1360
1575
  @possibly_changed_type = true
1361
- # Include plotting functionality
1362
- self.plotting_library = Daru.plotting_library
1363
1576
  end
1364
1577
 
1365
1578
  def parse_source source, opts
@@ -1413,7 +1626,7 @@ module Daru
1413
1626
  end
1414
1627
 
1415
1628
  # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
1416
- # @dtype variable is set and the underlying data type of vector changed.
1629
+ # @param dtype [db_type] variable is set and the underlying data type of vector changed.
1417
1630
  def cast_vector_to dtype, source=nil, nm_dtype=nil
1418
1631
  source = @data.to_a if source.nil?
1419
1632
 
@@ -1426,7 +1639,7 @@ module Daru
1426
1639
  else raise ArgumentError, "Unknown dtype #{dtype}"
1427
1640
  end
1428
1641
 
1429
- @dtype = dtype || :array
1642
+ @dtype = dtype
1430
1643
  new_vector
1431
1644
  end
1432
1645
 
@@ -1442,7 +1655,6 @@ module Daru
1442
1655
 
1443
1656
  # Raises IndexError when one of the positions is an invalid position
1444
1657
  def validate_positions *positions
1445
- positions = [positions] if positions.is_a? Integer
1446
1658
  positions.each do |pos|
1447
1659
  raise IndexError, "#{pos} is not a valid position." if pos >= size
1448
1660
  end