daru 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +20 -7
  3. data/CONTRIBUTING.md +1 -1
  4. data/History.md +48 -1
  5. data/README.md +3 -3
  6. data/benchmarks/statistics.rb +6 -6
  7. data/benchmarks/where_clause.rb +1 -1
  8. data/benchmarks/where_vs_filter.rb +1 -1
  9. data/daru.gemspec +3 -2
  10. data/lib/daru.rb +14 -6
  11. data/lib/daru/accessors/gsl_wrapper.rb +1 -1
  12. data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
  13. data/lib/daru/category.rb +1 -1
  14. data/lib/daru/core/group_by.rb +32 -15
  15. data/lib/daru/core/query.rb +4 -4
  16. data/lib/daru/dataframe.rb +196 -48
  17. data/lib/daru/date_time/index.rb +7 -5
  18. data/lib/daru/formatters/table.rb +1 -0
  19. data/lib/daru/index/index.rb +121 -33
  20. data/lib/daru/index/multi_index.rb +83 -3
  21. data/lib/daru/io/csv/converters.rb +18 -0
  22. data/lib/daru/io/io.rb +80 -11
  23. data/lib/daru/io/sql_data_source.rb +10 -0
  24. data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
  25. data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
  26. data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  27. data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  28. data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
  29. data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
  30. data/lib/daru/iruby/templates/vector.html.erb +3 -25
  31. data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
  32. data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
  33. data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
  34. data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
  35. data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
  36. data/lib/daru/maths/statistics/dataframe.rb +9 -11
  37. data/lib/daru/maths/statistics/vector.rb +139 -32
  38. data/lib/daru/plotting/gruff/dataframe.rb +13 -15
  39. data/lib/daru/plotting/nyaplot/category.rb +1 -1
  40. data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
  41. data/lib/daru/plotting/nyaplot/vector.rb +1 -2
  42. data/lib/daru/vector.rb +169 -80
  43. data/lib/daru/version.rb +1 -1
  44. data/spec/category_spec.rb +19 -19
  45. data/spec/core/group_by_spec.rb +47 -0
  46. data/spec/core/query_spec.rb +55 -50
  47. data/spec/daru_spec.rb +22 -0
  48. data/spec/dataframe_spec.rb +118 -6
  49. data/spec/date_time/index_spec.rb +34 -16
  50. data/spec/extensions/rserve_spec.rb +1 -1
  51. data/spec/fixtures/boolean_converter_test.csv +5 -0
  52. data/spec/fixtures/eciresults.html +394 -0
  53. data/spec/fixtures/empty_rows_test.csv +17 -0
  54. data/spec/fixtures/macau.html +3691 -0
  55. data/spec/fixtures/macd_data.csv +150 -0
  56. data/spec/fixtures/moneycontrol.html +6812 -0
  57. data/spec/fixtures/url_test.txt~ +0 -0
  58. data/spec/fixtures/valid_markup.html +62 -0
  59. data/spec/fixtures/wiki_climate.html +1243 -0
  60. data/spec/fixtures/wiki_table_info.html +631 -0
  61. data/spec/formatters/table_formatter_spec.rb +29 -0
  62. data/spec/index/categorical_index_spec.rb +33 -33
  63. data/spec/index/index_spec.rb +134 -41
  64. data/spec/index/multi_index_spec.rb +115 -31
  65. data/spec/io/io_spec.rb +201 -0
  66. data/spec/io/sql_data_source_spec.rb +31 -41
  67. data/spec/iruby/dataframe_spec.rb +17 -19
  68. data/spec/iruby/vector_spec.rb +26 -28
  69. data/spec/maths/statistics/vector_spec.rb +136 -14
  70. data/spec/plotting/gruff/category_spec.rb +3 -3
  71. data/spec/plotting/gruff/dataframe_spec.rb +14 -4
  72. data/spec/plotting/gruff/vector_spec.rb +9 -9
  73. data/spec/plotting/nyaplot/category_spec.rb +5 -9
  74. data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
  75. data/spec/plotting/nyaplot/vector_spec.rb +5 -11
  76. data/spec/shared/vector_display_spec.rb +12 -14
  77. data/spec/spec_helper.rb +21 -0
  78. data/spec/support/matchers.rb +5 -0
  79. data/spec/vector_spec.rb +222 -72
  80. metadata +68 -23
  81. data/spec/fixtures/stock_data.csv +0 -500
@@ -7,12 +7,12 @@ module Daru
7
7
  size = opts[:size] || 500
8
8
  x = extract_x_vector opts[:x]
9
9
  y = extract_y_vectors opts[:y]
10
- if opts[:categorized]
11
- return plot_with_category(size, type, x, y, opts[:categorized])
12
- end
10
+ type = process_type type, opts[:categorized]
13
11
  case type
14
12
  when :line, :bar, :scatter
15
13
  plot = send("#{type}_plot", size, x, y)
14
+ when :scatter_categorized
15
+ plot = scatter_with_category(size, x, y, opts[:categorized])
16
16
  # TODO: hist, box
17
17
  # It turns out hist and box are not supported in Gruff yet
18
18
  else
@@ -24,6 +24,10 @@ module Daru
24
24
 
25
25
  private
26
26
 
27
+ def process_type type, categorized
28
+ type == :scatter && categorized ? :scatter_categorized : type
29
+ end
30
+
27
31
  def line_plot size, x, y
28
32
  plot = Gruff::Line.new size
29
33
  plot.labels = size.times.to_a.zip(x).to_h
@@ -50,21 +54,15 @@ module Daru
50
54
  plot
51
55
  end
52
56
 
53
- def plot_with_category size, type, x, y, opts
57
+ def scatter_with_category size, x, y, opts
54
58
  x = Daru::Vector.new x
55
59
  y = y.first
56
- case type
57
- when :scatter
58
- plot = Gruff::Scatter.new size
59
- cat_dv = self[opts[:by]]
60
- cat_dv.categories.each do |cat|
61
- bools = cat_dv.eq cat
62
- plot.data cat, x.where(bools).to_a, y.where(bools).to_a
63
- end
64
- else
65
- raise ArgumentError, "Type #{type} is not supported."
60
+ plot = Gruff::Scatter.new size
61
+ cat_dv = self[opts[:by]]
62
+ cat_dv.categories.each do |cat|
63
+ bools = cat_dv.eq cat
64
+ plot.data cat, x.where(bools).to_a, y.where(bools).to_a
66
65
  end
67
- yield plot if block_given?
68
66
  plot
69
67
  end
70
68
 
@@ -12,7 +12,7 @@ module Daru
12
12
  # Set yrange for good view
13
13
  set_yrange plot, opts[:method]
14
14
  yield plot, diagram if block_given?
15
- plot.show
15
+ plot
16
16
  else
17
17
  raise ArgumentError, "#{type} type is not supported."
18
18
  end
@@ -38,7 +38,7 @@ module Daru
38
38
 
39
39
  diagram =
40
40
  case
41
- when !([:scatter, :bar, :line, :histogram] & types).empty?
41
+ when !(%i[scatter bar line histogram] & types).empty?
42
42
  plot_regular_diagrams plot, opts
43
43
  when types.include?(:box)
44
44
  plot_box_diagram plot
@@ -48,7 +48,7 @@ module Daru
48
48
 
49
49
  yield(plot, diagram) if block_given?
50
50
 
51
- plot.show
51
+ plot
52
52
  end
53
53
 
54
54
  def plot_with_category opts
@@ -66,8 +66,8 @@ module Daru
66
66
 
67
67
  plot.legend true
68
68
  yield plot, *diagrams if block_given?
69
- plot.show
70
69
 
70
+ plot
71
71
  else
72
72
  raise ArgumentError, "Unsupported type #{type}"
73
73
  end
@@ -102,7 +102,7 @@ module Daru
102
102
  end
103
103
  end
104
104
 
105
- SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
105
+ SHAPES = %w[circle triangle-up diamond square triangle-down cross].freeze
106
106
  def get_shape type
107
107
  validate_type type, :scatter
108
108
  SHAPES.cycle
@@ -26,8 +26,7 @@ module Daru
26
26
  diagram = create_diagram plot, options[:type], x_axis
27
27
 
28
28
  yield plot, diagram if block_given?
29
-
30
- plot.show
29
+ plot
31
30
  end
32
31
 
33
32
  private
@@ -352,7 +352,7 @@ module Daru
352
352
  if other.is_a?(Daru::Vector)
353
353
  mod.apply_vector_operator operator, self, other
354
354
  else
355
- mod.apply_scalar_operator operator, @data,other
355
+ mod.apply_scalar_operator operator, @data, other
356
356
  end
357
357
  end
358
358
  alias_method operator, method if operator != :== && operator != :!=
@@ -462,6 +462,26 @@ module Daru
462
462
  values.any? { |v| include_with_nan? @data, v }
463
463
  end
464
464
 
465
+ # @note Do not use it to check for Float::NAN as
466
+ # Float::NAN == Float::NAN is false
467
+ # Return vector of booleans with value at ith position is either
468
+ # true or false depending upon whether value at position i is equal to
469
+ # any of the values passed in the argument or not
470
+ # @param [Array] *values values to equate with
471
+ # @return [Daru::Vector] vector of boolean values
472
+ # @example
473
+ # dv = Daru::Vector.new [1, 2, 3, 2, 1]
474
+ # dv.is_values 1, 2
475
+ # # => #<Daru::Vector(5)>
476
+ # # 0 true
477
+ # # 1 true
478
+ # # 2 false
479
+ # # 3 true
480
+ # # 4 true
481
+ def is_values(*values)
482
+ Daru::Vector.new values.map { |v| eq(v) }.inject(:|)
483
+ end
484
+
465
485
  # Append an element to the vector by specifying the element and index
466
486
  def concat element, index
467
487
  raise IndexError, 'Expected new unique index' if @index.include? index
@@ -481,8 +501,7 @@ module Daru
481
501
  # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
482
502
  def cast opts={}
483
503
  dt = opts[:dtype]
484
- raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
485
- dt == :array || dt == :nmatrix || dt == :gsl
504
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless %i[array nmatrix gsl].include?(dt)
486
505
 
487
506
  @data = cast_vector_to dt unless @dtype == dt
488
507
  end
@@ -535,7 +554,7 @@ module Daru
535
554
  # Get index of element
536
555
  def index_of element
537
556
  case dtype
538
- when :array then @index.key @data.index { |x| x.eql? element }
557
+ when :array then @index.key(@data.index { |x| x.eql? element })
539
558
  else @index.key @data.index(element)
540
559
  end
541
560
  end
@@ -583,6 +602,31 @@ module Daru
583
602
  Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
584
603
  end
585
604
 
605
+ # Sorts the vector according to it's`Index` values. Defaults to ascending
606
+ # order sorting.
607
+ #
608
+ # @param [Hash] opts the options for sort_by_index method.
609
+ # @option opts [Boolean] :ascending false, will sort `index` in
610
+ # descending order.
611
+ #
612
+ # @return [Vector] new sorted `Vector` according to the index values.
613
+ #
614
+ # @example
615
+ #
616
+ # dv = Daru::Vector.new [11, 13, 12], index: [23, 21, 22]
617
+ # # Say you want to sort index in ascending order
618
+ # dv.sort_by_index(ascending: true)
619
+ # #=> Daru::Vector.new [13, 12, 11], index: [21, 22, 23]
620
+ # # Say you want to sort index in descending order
621
+ # dv.sort_by_index(ascending: false)
622
+ # #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21]
623
+ def sort_by_index opts={}
624
+ opts = {ascending: true}.merge(opts)
625
+ _, new_order = resort_index(@index.each_with_index, opts).transpose
626
+
627
+ reorder new_order
628
+ end
629
+
586
630
  DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
587
631
  case
588
632
  when lv.nil? && rv.nil?
@@ -624,7 +668,7 @@ module Daru
624
668
  def delete_if
625
669
  return to_enum(:delete_if) unless block_given?
626
670
 
627
- keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
671
+ keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
628
672
 
629
673
  @data = cast_vector_to @dtype, keep_e
630
674
  @index = Daru::Index.new(keep_i)
@@ -702,31 +746,6 @@ module Daru
702
746
  self
703
747
  end
704
748
 
705
- # Returns a vector which has *true* in the position where the element in self
706
- # is nil, and false otherwise.
707
- #
708
- # == Usage
709
- #
710
- # v = Daru::Vector.new([1,2,4,nil])
711
- # v.is_nil?
712
- # # =>
713
- # #<Daru::Vector:89421000 @name = nil @size = 4 >
714
- # # nil
715
- # # 0 false
716
- # # 1 false
717
- # # 2 false
718
- # # 3 true
719
- #
720
- def is_nil?
721
- # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
722
- recode(&:nil?)
723
- end
724
-
725
- # Opposite of #is_nil?
726
- def not_nil?
727
- recode { |v| !v.nil? }
728
- end
729
-
730
749
  # Replace all nils in the vector with the value passed as an argument. Destructive.
731
750
  # See #replace_nils for non-destructive version
732
751
  #
@@ -741,27 +760,43 @@ module Daru
741
760
  self
742
761
  end
743
762
 
744
- # Lags the series by k periods.
763
+ # Lags the series by `k` periods.
764
+ #
765
+ # Lags the series by `k` periods, "shifting" data and inserting `nil`s
766
+ # from beginning or end of a vector, while preserving original vector's
767
+ # size.
768
+ #
769
+ # `k` can be positive or negative integer. If `k` is positive, `nil`s
770
+ # are inserted at the beginning of the vector, otherwise they are
771
+ # inserted at the end.
745
772
  #
746
- # The convention is to set the oldest observations (the first ones
747
- # in the series) to nil so that the size of the lagged series is the
748
- # same as the original.
773
+ # @param [Integer] k "shift" the series by `k` periods. `k` can be
774
+ # positive or negative. (default = 1)
749
775
  #
750
- # Usage:
776
+ # @return [Daru::Vector] a new vector with "shifted" inital values
777
+ # and `nil` values inserted. The return vector is the same length
778
+ # as the orignal vector.
751
779
  #
752
- # ts = Daru::Vector.new((1..10).map { rand })
753
- # # => [0.69, 0.23, 0.44, 0.71, ...]
780
+ # @example Lag a vector with different periods `k`
781
+ #
782
+ # ts = Daru::Vector.new(1..5)
783
+ # # => [1, 2, 3, 4, 5]
784
+ #
785
+ # ts.lag # => [nil, 1, 2, 3, 4]
786
+ # ts.lag(1) # => [nil, 1, 2, 3, 4]
787
+ # ts.lag(2) # => [nil, nil, 1, 2, 3]
788
+ # ts.lag(-1) # => [2, 3, 4, 5, nil]
754
789
  #
755
- # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
756
- # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
757
790
  def lag k=1
758
- return dup if k.zero?
759
-
760
- dat = @data.to_a.dup
761
- (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
762
- (0...k).each { |i| dat[i] = nil }
763
-
764
- Daru::Vector.new(dat, index: @index, name: @name)
791
+ case k
792
+ when 0 then dup
793
+ when 1...size
794
+ copy([nil] * k + data.to_a)
795
+ when -size..-1
796
+ copy(data.to_a[k.abs...size])
797
+ else
798
+ copy([])
799
+ end
765
800
  end
766
801
 
767
802
  def detach_index
@@ -871,7 +906,9 @@ module Daru
871
906
  end
872
907
 
873
908
  # Convert to html for iruby
874
- def to_html threshold=30
909
+ def to_html(threshold=30)
910
+ table_thead = to_html_thead
911
+ table_tbody = to_html_tbody(threshold)
875
912
  path = if index.is_a?(MultiIndex)
876
913
  File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
877
914
  else
@@ -880,51 +917,97 @@ module Daru
880
917
  ERB.new(File.read(path).strip).result(binding)
881
918
  end
882
919
 
920
+ def to_html_thead
921
+ table_thead_path =
922
+ if index.is_a?(MultiIndex)
923
+ File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__)
924
+ else
925
+ File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__)
926
+ end
927
+ ERB.new(File.read(table_thead_path).strip).result(binding)
928
+ end
929
+
930
+ def to_html_tbody(threshold=30)
931
+ table_tbody_path =
932
+ if index.is_a?(MultiIndex)
933
+ File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__)
934
+ else
935
+ File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__)
936
+ end
937
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
938
+ end
939
+
883
940
  def to_s
884
- to_html
941
+ "#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>"
885
942
  end
886
943
 
887
- # Create a summary of the Vector using Report Builder.
888
- def summary(method=:to_text)
889
- ReportBuilder.new(no_title: true).add(self).send(method)
944
+ # Create a summary of the Vector
945
+ # @params [Fixnum] indent_level
946
+ # @return [String] String containing the summary of the Vector
947
+ # @example
948
+ # dv = Daru::Vector.new [1, 2, 3]
949
+ # puts dv.summary
950
+ #
951
+ # # =
952
+ # # n :3
953
+ # # non-missing:3
954
+ # # median: 2
955
+ # # mean: 2.0000
956
+ # # std.dev.: 1.0000
957
+ # # std.err.: 0.5774
958
+ # # skew: 0.0000
959
+ # # kurtosis: -2.3333
960
+ def summary(indent_level=0)
961
+ non_missing = size - count_values(*Daru::MISSING_VALUES)
962
+ summary = ' =' * indent_level + "= #{name}" \
963
+ "\n n :#{size}" \
964
+ "\n non-missing:#{non_missing}"
965
+ case type
966
+ when :object
967
+ summary << object_summary
968
+ when :numeric
969
+ summary << numeric_summary
970
+ end
971
+ summary.split("\n").join("\n" + ' ' * indent_level)
890
972
  end
891
973
 
892
- # :nocov:
893
- def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
894
- b.section(name: name) do |s|
895
- s.text "n :#{size}"
896
- s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
897
- if @type == :object
898
- s.text "factors: #{factors.to_a.join(',')}"
899
- s.text "mode: #{mode}"
900
-
901
- s.table(name: 'Distribution') do |t|
902
- frequencies.sort_by(&:to_s).each do |k,v|
903
- key = @index.include?(k) ? @index[k] : k
904
- t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
905
- end
906
- end
907
- end
974
+ # Displays summary for an object type Vector
975
+ # @return [String] String containing object vector summary
976
+ def object_summary
977
+ nval = count_values(*Daru::MISSING_VALUES)
978
+ summary = "\n factors: #{factors.to_a.join(',')}" \
979
+ "\n mode: #{mode.to_a.join(',')}" \
980
+ "\n Distribution\n"
908
981
 
909
- s.text "median: #{median}" if @type==:numeric || @type==:numeric
910
- if @type==:numeric
911
- s.text 'mean: %0.4f' % mean
912
- if sd
913
- s.text 'std.dev.: %0.4f' % sd
914
- s.text 'std.err.: %0.4f' % se
915
- s.text 'skew: %0.4f' % skew
916
- s.text 'kurtosis: %0.4f' % kurtosis
917
- end
918
- end
982
+ data = frequencies.sort.each_with_index.map do |v, k|
983
+ [k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)]
919
984
  end
985
+
986
+ summary + Formatters::Table.format(data)
987
+ end
988
+
989
+ # Displays summary for an numeric type Vector
990
+ # @return [String] String containing numeric vector summary
991
+ def numeric_summary
992
+ summary = "\n median: #{median}" +
993
+ "\n mean: %0.4f" % mean
994
+ if sd
995
+ summary << "\n std.dev.: %0.4f" % sd +
996
+ "\n std.err.: %0.4f" % se
997
+ end
998
+
999
+ if count_values(*Daru::MISSING_VALUES).zero?
1000
+ summary << "\n skew: %0.4f" % skew +
1001
+ "\n kurtosis: %0.4f" % kurtosis
1002
+ end
1003
+ summary
920
1004
  end
921
- # :nocov:
922
1005
 
923
1006
  # Over rides original inspect for pretty printing in irb
924
1007
  def inspect spacing=20, threshold=15
925
1008
  row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
926
1009
 
927
- "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
1010
+ "#<#{self.class}(#{size})#{':category' if category?}>\n" +
928
1011
  Formatters::Table.format(
929
1012
  to_a.lazy.map { |v| [v] },
930
1013
  headers: @name && [@name],
@@ -1336,6 +1419,12 @@ module Daru
1336
1419
 
1337
1420
  private
1338
1421
 
1422
+ def copy(values)
1423
+ # Make sure values is right-justified to the size of the vector
1424
+ values.concat([nil] * (size-values.size)) if values.size < size
1425
+ Daru::Vector.new(values[0...size], index: @index, name: @name)
1426
+ end
1427
+
1339
1428
  def nil_positions
1340
1429
  @nil_positions ||
1341
1430
  @nil_positions = size.times.select { |i| @data[i].nil? }
@@ -1426,7 +1515,7 @@ module Daru
1426
1515
  else raise ArgumentError, "Unknown dtype #{dtype}"
1427
1516
  end
1428
1517
 
1429
- @dtype = dtype || :array
1518
+ @dtype = dtype
1430
1519
  new_vector
1431
1520
  end
1432
1521