daru 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +20 -7
  3. data/CONTRIBUTING.md +1 -1
  4. data/History.md +48 -1
  5. data/README.md +3 -3
  6. data/benchmarks/statistics.rb +6 -6
  7. data/benchmarks/where_clause.rb +1 -1
  8. data/benchmarks/where_vs_filter.rb +1 -1
  9. data/daru.gemspec +3 -2
  10. data/lib/daru.rb +14 -6
  11. data/lib/daru/accessors/gsl_wrapper.rb +1 -1
  12. data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
  13. data/lib/daru/category.rb +1 -1
  14. data/lib/daru/core/group_by.rb +32 -15
  15. data/lib/daru/core/query.rb +4 -4
  16. data/lib/daru/dataframe.rb +196 -48
  17. data/lib/daru/date_time/index.rb +7 -5
  18. data/lib/daru/formatters/table.rb +1 -0
  19. data/lib/daru/index/index.rb +121 -33
  20. data/lib/daru/index/multi_index.rb +83 -3
  21. data/lib/daru/io/csv/converters.rb +18 -0
  22. data/lib/daru/io/io.rb +80 -11
  23. data/lib/daru/io/sql_data_source.rb +10 -0
  24. data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
  25. data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
  26. data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  27. data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  28. data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
  29. data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
  30. data/lib/daru/iruby/templates/vector.html.erb +3 -25
  31. data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
  32. data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
  33. data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
  34. data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
  35. data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
  36. data/lib/daru/maths/statistics/dataframe.rb +9 -11
  37. data/lib/daru/maths/statistics/vector.rb +139 -32
  38. data/lib/daru/plotting/gruff/dataframe.rb +13 -15
  39. data/lib/daru/plotting/nyaplot/category.rb +1 -1
  40. data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
  41. data/lib/daru/plotting/nyaplot/vector.rb +1 -2
  42. data/lib/daru/vector.rb +169 -80
  43. data/lib/daru/version.rb +1 -1
  44. data/spec/category_spec.rb +19 -19
  45. data/spec/core/group_by_spec.rb +47 -0
  46. data/spec/core/query_spec.rb +55 -50
  47. data/spec/daru_spec.rb +22 -0
  48. data/spec/dataframe_spec.rb +118 -6
  49. data/spec/date_time/index_spec.rb +34 -16
  50. data/spec/extensions/rserve_spec.rb +1 -1
  51. data/spec/fixtures/boolean_converter_test.csv +5 -0
  52. data/spec/fixtures/eciresults.html +394 -0
  53. data/spec/fixtures/empty_rows_test.csv +17 -0
  54. data/spec/fixtures/macau.html +3691 -0
  55. data/spec/fixtures/macd_data.csv +150 -0
  56. data/spec/fixtures/moneycontrol.html +6812 -0
  57. data/spec/fixtures/url_test.txt~ +0 -0
  58. data/spec/fixtures/valid_markup.html +62 -0
  59. data/spec/fixtures/wiki_climate.html +1243 -0
  60. data/spec/fixtures/wiki_table_info.html +631 -0
  61. data/spec/formatters/table_formatter_spec.rb +29 -0
  62. data/spec/index/categorical_index_spec.rb +33 -33
  63. data/spec/index/index_spec.rb +134 -41
  64. data/spec/index/multi_index_spec.rb +115 -31
  65. data/spec/io/io_spec.rb +201 -0
  66. data/spec/io/sql_data_source_spec.rb +31 -41
  67. data/spec/iruby/dataframe_spec.rb +17 -19
  68. data/spec/iruby/vector_spec.rb +26 -28
  69. data/spec/maths/statistics/vector_spec.rb +136 -14
  70. data/spec/plotting/gruff/category_spec.rb +3 -3
  71. data/spec/plotting/gruff/dataframe_spec.rb +14 -4
  72. data/spec/plotting/gruff/vector_spec.rb +9 -9
  73. data/spec/plotting/nyaplot/category_spec.rb +5 -9
  74. data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
  75. data/spec/plotting/nyaplot/vector_spec.rb +5 -11
  76. data/spec/shared/vector_display_spec.rb +12 -14
  77. data/spec/spec_helper.rb +21 -0
  78. data/spec/support/matchers.rb +5 -0
  79. data/spec/vector_spec.rb +222 -72
  80. metadata +68 -23
  81. data/spec/fixtures/stock_data.csv +0 -500
@@ -7,12 +7,12 @@ module Daru
7
7
  size = opts[:size] || 500
8
8
  x = extract_x_vector opts[:x]
9
9
  y = extract_y_vectors opts[:y]
10
- if opts[:categorized]
11
- return plot_with_category(size, type, x, y, opts[:categorized])
12
- end
10
+ type = process_type type, opts[:categorized]
13
11
  case type
14
12
  when :line, :bar, :scatter
15
13
  plot = send("#{type}_plot", size, x, y)
14
+ when :scatter_categorized
15
+ plot = scatter_with_category(size, x, y, opts[:categorized])
16
16
  # TODO: hist, box
17
17
  # It turns out hist and box are not supported in Gruff yet
18
18
  else
@@ -24,6 +24,10 @@ module Daru
24
24
 
25
25
  private
26
26
 
27
+ def process_type type, categorized
28
+ type == :scatter && categorized ? :scatter_categorized : type
29
+ end
30
+
27
31
  def line_plot size, x, y
28
32
  plot = Gruff::Line.new size
29
33
  plot.labels = size.times.to_a.zip(x).to_h
@@ -50,21 +54,15 @@ module Daru
50
54
  plot
51
55
  end
52
56
 
53
- def plot_with_category size, type, x, y, opts
57
+ def scatter_with_category size, x, y, opts
54
58
  x = Daru::Vector.new x
55
59
  y = y.first
56
- case type
57
- when :scatter
58
- plot = Gruff::Scatter.new size
59
- cat_dv = self[opts[:by]]
60
- cat_dv.categories.each do |cat|
61
- bools = cat_dv.eq cat
62
- plot.data cat, x.where(bools).to_a, y.where(bools).to_a
63
- end
64
- else
65
- raise ArgumentError, "Type #{type} is not supported."
60
+ plot = Gruff::Scatter.new size
61
+ cat_dv = self[opts[:by]]
62
+ cat_dv.categories.each do |cat|
63
+ bools = cat_dv.eq cat
64
+ plot.data cat, x.where(bools).to_a, y.where(bools).to_a
66
65
  end
67
- yield plot if block_given?
68
66
  plot
69
67
  end
70
68
 
@@ -12,7 +12,7 @@ module Daru
12
12
  # Set yrange for good view
13
13
  set_yrange plot, opts[:method]
14
14
  yield plot, diagram if block_given?
15
- plot.show
15
+ plot
16
16
  else
17
17
  raise ArgumentError, "#{type} type is not supported."
18
18
  end
@@ -38,7 +38,7 @@ module Daru
38
38
 
39
39
  diagram =
40
40
  case
41
- when !([:scatter, :bar, :line, :histogram] & types).empty?
41
+ when !(%i[scatter bar line histogram] & types).empty?
42
42
  plot_regular_diagrams plot, opts
43
43
  when types.include?(:box)
44
44
  plot_box_diagram plot
@@ -48,7 +48,7 @@ module Daru
48
48
 
49
49
  yield(plot, diagram) if block_given?
50
50
 
51
- plot.show
51
+ plot
52
52
  end
53
53
 
54
54
  def plot_with_category opts
@@ -66,8 +66,8 @@ module Daru
66
66
 
67
67
  plot.legend true
68
68
  yield plot, *diagrams if block_given?
69
- plot.show
70
69
 
70
+ plot
71
71
  else
72
72
  raise ArgumentError, "Unsupported type #{type}"
73
73
  end
@@ -102,7 +102,7 @@ module Daru
102
102
  end
103
103
  end
104
104
 
105
- SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
105
+ SHAPES = %w[circle triangle-up diamond square triangle-down cross].freeze
106
106
  def get_shape type
107
107
  validate_type type, :scatter
108
108
  SHAPES.cycle
@@ -26,8 +26,7 @@ module Daru
26
26
  diagram = create_diagram plot, options[:type], x_axis
27
27
 
28
28
  yield plot, diagram if block_given?
29
-
30
- plot.show
29
+ plot
31
30
  end
32
31
 
33
32
  private
@@ -352,7 +352,7 @@ module Daru
352
352
  if other.is_a?(Daru::Vector)
353
353
  mod.apply_vector_operator operator, self, other
354
354
  else
355
- mod.apply_scalar_operator operator, @data,other
355
+ mod.apply_scalar_operator operator, @data, other
356
356
  end
357
357
  end
358
358
  alias_method operator, method if operator != :== && operator != :!=
@@ -462,6 +462,26 @@ module Daru
462
462
  values.any? { |v| include_with_nan? @data, v }
463
463
  end
464
464
 
465
+ # @note Do not use it to check for Float::NAN as
466
+ # Float::NAN == Float::NAN is false
467
+ # Return vector of booleans with value at ith position is either
468
+ # true or false depending upon whether value at position i is equal to
469
+ # any of the values passed in the argument or not
470
+ # @param [Array] *values values to equate with
471
+ # @return [Daru::Vector] vector of boolean values
472
+ # @example
473
+ # dv = Daru::Vector.new [1, 2, 3, 2, 1]
474
+ # dv.is_values 1, 2
475
+ # # => #<Daru::Vector(5)>
476
+ # # 0 true
477
+ # # 1 true
478
+ # # 2 false
479
+ # # 3 true
480
+ # # 4 true
481
+ def is_values(*values)
482
+ Daru::Vector.new values.map { |v| eq(v) }.inject(:|)
483
+ end
484
+
465
485
  # Append an element to the vector by specifying the element and index
466
486
  def concat element, index
467
487
  raise IndexError, 'Expected new unique index' if @index.include? index
@@ -481,8 +501,7 @@ module Daru
481
501
  # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
482
502
  def cast opts={}
483
503
  dt = opts[:dtype]
484
- raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
485
- dt == :array || dt == :nmatrix || dt == :gsl
504
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless %i[array nmatrix gsl].include?(dt)
486
505
 
487
506
  @data = cast_vector_to dt unless @dtype == dt
488
507
  end
@@ -535,7 +554,7 @@ module Daru
535
554
  # Get index of element
536
555
  def index_of element
537
556
  case dtype
538
- when :array then @index.key @data.index { |x| x.eql? element }
557
+ when :array then @index.key(@data.index { |x| x.eql? element })
539
558
  else @index.key @data.index(element)
540
559
  end
541
560
  end
@@ -583,6 +602,31 @@ module Daru
583
602
  Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
584
603
  end
585
604
 
605
+ # Sorts the vector according to it's`Index` values. Defaults to ascending
606
+ # order sorting.
607
+ #
608
+ # @param [Hash] opts the options for sort_by_index method.
609
+ # @option opts [Boolean] :ascending false, will sort `index` in
610
+ # descending order.
611
+ #
612
+ # @return [Vector] new sorted `Vector` according to the index values.
613
+ #
614
+ # @example
615
+ #
616
+ # dv = Daru::Vector.new [11, 13, 12], index: [23, 21, 22]
617
+ # # Say you want to sort index in ascending order
618
+ # dv.sort_by_index(ascending: true)
619
+ # #=> Daru::Vector.new [13, 12, 11], index: [21, 22, 23]
620
+ # # Say you want to sort index in descending order
621
+ # dv.sort_by_index(ascending: false)
622
+ # #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21]
623
+ def sort_by_index opts={}
624
+ opts = {ascending: true}.merge(opts)
625
+ _, new_order = resort_index(@index.each_with_index, opts).transpose
626
+
627
+ reorder new_order
628
+ end
629
+
586
630
  DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
587
631
  case
588
632
  when lv.nil? && rv.nil?
@@ -624,7 +668,7 @@ module Daru
624
668
  def delete_if
625
669
  return to_enum(:delete_if) unless block_given?
626
670
 
627
- keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
671
+ keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
628
672
 
629
673
  @data = cast_vector_to @dtype, keep_e
630
674
  @index = Daru::Index.new(keep_i)
@@ -702,31 +746,6 @@ module Daru
702
746
  self
703
747
  end
704
748
 
705
- # Returns a vector which has *true* in the position where the element in self
706
- # is nil, and false otherwise.
707
- #
708
- # == Usage
709
- #
710
- # v = Daru::Vector.new([1,2,4,nil])
711
- # v.is_nil?
712
- # # =>
713
- # #<Daru::Vector:89421000 @name = nil @size = 4 >
714
- # # nil
715
- # # 0 false
716
- # # 1 false
717
- # # 2 false
718
- # # 3 true
719
- #
720
- def is_nil?
721
- # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
722
- recode(&:nil?)
723
- end
724
-
725
- # Opposite of #is_nil?
726
- def not_nil?
727
- recode { |v| !v.nil? }
728
- end
729
-
730
749
  # Replace all nils in the vector with the value passed as an argument. Destructive.
731
750
  # See #replace_nils for non-destructive version
732
751
  #
@@ -741,27 +760,43 @@ module Daru
741
760
  self
742
761
  end
743
762
 
744
- # Lags the series by k periods.
763
+ # Lags the series by `k` periods.
764
+ #
765
+ # Lags the series by `k` periods, "shifting" data and inserting `nil`s
766
+ # from beginning or end of a vector, while preserving original vector's
767
+ # size.
768
+ #
769
+ # `k` can be positive or negative integer. If `k` is positive, `nil`s
770
+ # are inserted at the beginning of the vector, otherwise they are
771
+ # inserted at the end.
745
772
  #
746
- # The convention is to set the oldest observations (the first ones
747
- # in the series) to nil so that the size of the lagged series is the
748
- # same as the original.
773
+ # @param [Integer] k "shift" the series by `k` periods. `k` can be
774
+ # positive or negative. (default = 1)
749
775
  #
750
- # Usage:
776
+ # @return [Daru::Vector] a new vector with "shifted" inital values
777
+ # and `nil` values inserted. The return vector is the same length
778
+ # as the orignal vector.
751
779
  #
752
- # ts = Daru::Vector.new((1..10).map { rand })
753
- # # => [0.69, 0.23, 0.44, 0.71, ...]
780
+ # @example Lag a vector with different periods `k`
781
+ #
782
+ # ts = Daru::Vector.new(1..5)
783
+ # # => [1, 2, 3, 4, 5]
784
+ #
785
+ # ts.lag # => [nil, 1, 2, 3, 4]
786
+ # ts.lag(1) # => [nil, 1, 2, 3, 4]
787
+ # ts.lag(2) # => [nil, nil, 1, 2, 3]
788
+ # ts.lag(-1) # => [2, 3, 4, 5, nil]
754
789
  #
755
- # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
756
- # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
757
790
  def lag k=1
758
- return dup if k.zero?
759
-
760
- dat = @data.to_a.dup
761
- (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
762
- (0...k).each { |i| dat[i] = nil }
763
-
764
- Daru::Vector.new(dat, index: @index, name: @name)
791
+ case k
792
+ when 0 then dup
793
+ when 1...size
794
+ copy([nil] * k + data.to_a)
795
+ when -size..-1
796
+ copy(data.to_a[k.abs...size])
797
+ else
798
+ copy([])
799
+ end
765
800
  end
766
801
 
767
802
  def detach_index
@@ -871,7 +906,9 @@ module Daru
871
906
  end
872
907
 
873
908
  # Convert to html for iruby
874
- def to_html threshold=30
909
+ def to_html(threshold=30)
910
+ table_thead = to_html_thead
911
+ table_tbody = to_html_tbody(threshold)
875
912
  path = if index.is_a?(MultiIndex)
876
913
  File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
877
914
  else
@@ -880,51 +917,97 @@ module Daru
880
917
  ERB.new(File.read(path).strip).result(binding)
881
918
  end
882
919
 
920
+ def to_html_thead
921
+ table_thead_path =
922
+ if index.is_a?(MultiIndex)
923
+ File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__)
924
+ else
925
+ File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__)
926
+ end
927
+ ERB.new(File.read(table_thead_path).strip).result(binding)
928
+ end
929
+
930
+ def to_html_tbody(threshold=30)
931
+ table_tbody_path =
932
+ if index.is_a?(MultiIndex)
933
+ File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__)
934
+ else
935
+ File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__)
936
+ end
937
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
938
+ end
939
+
883
940
  def to_s
884
- to_html
941
+ "#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>"
885
942
  end
886
943
 
887
- # Create a summary of the Vector using Report Builder.
888
- def summary(method=:to_text)
889
- ReportBuilder.new(no_title: true).add(self).send(method)
944
+ # Create a summary of the Vector
945
+ # @params [Fixnum] indent_level
946
+ # @return [String] String containing the summary of the Vector
947
+ # @example
948
+ # dv = Daru::Vector.new [1, 2, 3]
949
+ # puts dv.summary
950
+ #
951
+ # # =
952
+ # # n :3
953
+ # # non-missing:3
954
+ # # median: 2
955
+ # # mean: 2.0000
956
+ # # std.dev.: 1.0000
957
+ # # std.err.: 0.5774
958
+ # # skew: 0.0000
959
+ # # kurtosis: -2.3333
960
+ def summary(indent_level=0)
961
+ non_missing = size - count_values(*Daru::MISSING_VALUES)
962
+ summary = ' =' * indent_level + "= #{name}" \
963
+ "\n n :#{size}" \
964
+ "\n non-missing:#{non_missing}"
965
+ case type
966
+ when :object
967
+ summary << object_summary
968
+ when :numeric
969
+ summary << numeric_summary
970
+ end
971
+ summary.split("\n").join("\n" + ' ' * indent_level)
890
972
  end
891
973
 
892
- # :nocov:
893
- def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
894
- b.section(name: name) do |s|
895
- s.text "n :#{size}"
896
- s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
897
- if @type == :object
898
- s.text "factors: #{factors.to_a.join(',')}"
899
- s.text "mode: #{mode}"
900
-
901
- s.table(name: 'Distribution') do |t|
902
- frequencies.sort_by(&:to_s).each do |k,v|
903
- key = @index.include?(k) ? @index[k] : k
904
- t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
905
- end
906
- end
907
- end
974
+ # Displays summary for an object type Vector
975
+ # @return [String] String containing object vector summary
976
+ def object_summary
977
+ nval = count_values(*Daru::MISSING_VALUES)
978
+ summary = "\n factors: #{factors.to_a.join(',')}" \
979
+ "\n mode: #{mode.to_a.join(',')}" \
980
+ "\n Distribution\n"
908
981
 
909
- s.text "median: #{median}" if @type==:numeric || @type==:numeric
910
- if @type==:numeric
911
- s.text 'mean: %0.4f' % mean
912
- if sd
913
- s.text 'std.dev.: %0.4f' % sd
914
- s.text 'std.err.: %0.4f' % se
915
- s.text 'skew: %0.4f' % skew
916
- s.text 'kurtosis: %0.4f' % kurtosis
917
- end
918
- end
982
+ data = frequencies.sort.each_with_index.map do |v, k|
983
+ [k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)]
919
984
  end
985
+
986
+ summary + Formatters::Table.format(data)
987
+ end
988
+
989
+ # Displays summary for an numeric type Vector
990
+ # @return [String] String containing numeric vector summary
991
+ def numeric_summary
992
+ summary = "\n median: #{median}" +
993
+ "\n mean: %0.4f" % mean
994
+ if sd
995
+ summary << "\n std.dev.: %0.4f" % sd +
996
+ "\n std.err.: %0.4f" % se
997
+ end
998
+
999
+ if count_values(*Daru::MISSING_VALUES).zero?
1000
+ summary << "\n skew: %0.4f" % skew +
1001
+ "\n kurtosis: %0.4f" % kurtosis
1002
+ end
1003
+ summary
920
1004
  end
921
- # :nocov:
922
1005
 
923
1006
  # Over rides original inspect for pretty printing in irb
924
1007
  def inspect spacing=20, threshold=15
925
1008
  row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
926
1009
 
927
- "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
1010
+ "#<#{self.class}(#{size})#{':category' if category?}>\n" +
928
1011
  Formatters::Table.format(
929
1012
  to_a.lazy.map { |v| [v] },
930
1013
  headers: @name && [@name],
@@ -1336,6 +1419,12 @@ module Daru
1336
1419
 
1337
1420
  private
1338
1421
 
1422
+ def copy(values)
1423
+ # Make sure values is right-justified to the size of the vector
1424
+ values.concat([nil] * (size-values.size)) if values.size < size
1425
+ Daru::Vector.new(values[0...size], index: @index, name: @name)
1426
+ end
1427
+
1339
1428
  def nil_positions
1340
1429
  @nil_positions ||
1341
1430
  @nil_positions = size.times.select { |i| @data[i].nil? }
@@ -1426,7 +1515,7 @@ module Daru
1426
1515
  else raise ArgumentError, "Unknown dtype #{dtype}"
1427
1516
  end
1428
1517
 
1429
- @dtype = dtype || :array
1518
+ @dtype = dtype
1430
1519
  new_vector
1431
1520
  end
1432
1521