daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -1,28 +1,4 @@
1
1
  class Array
2
- # Recode repeated values on an array, adding the number of repetition
3
- # at the end
4
- # Example:
5
- # a=%w{a b c c d d d e}
6
- # a.recode_repeated
7
- # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
8
- def recode_repeated
9
- return self if size == uniq.size
10
-
11
- duplicated = group_by { |n| n }
12
- .select { |_, g| g.size > 1 }.map(&:first)
13
-
14
- counter = duplicated.collect { |n| [n, 0] }.to_h
15
-
16
- collect do |n|
17
- if counter.key?(n)
18
- counter[n] += 1
19
- '%s_%d' % [n, counter[n]]
20
- else
21
- n
22
- end
23
- end
24
- end
25
-
26
2
  def daru_vector name=nil, index=nil, dtype=:array
27
3
  Daru::Vector.new self, name: name, index: index, dtype: dtype
28
4
  end
@@ -54,6 +30,7 @@ class Hash
54
30
  alias_method :dv, :daru_vector
55
31
  end
56
32
 
33
+ # :nocov:
57
34
  class NMatrix
58
35
  def daru_vector(name=nil, index=nil, *)
59
36
  Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
@@ -70,12 +47,6 @@ class MDArray
70
47
  alias_method :dv, :daru_vector
71
48
  end
72
49
 
73
- class Numeric
74
- def square
75
- self * self
76
- end
77
- end
78
-
79
50
  class Matrix
80
51
  def elementwise_division other
81
52
  map.with_index do |e, index|
@@ -84,17 +55,18 @@ class Matrix
84
55
  end
85
56
  end
86
57
 
87
- class String
88
- def is_number?
89
- if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
90
- true
91
- else
92
- false
58
+ class Object
59
+ if RUBY_VERSION < '2.2'
60
+ def itself
61
+ self
93
62
  end
94
63
  end
95
64
  end
96
65
 
97
- class Daru::DataFrame
98
- # NOTE: This alias will soon be removed. Use to_h in all future work.
99
- alias :to_hash :to_h
100
- end
66
+ module Daru
67
+ class DataFrame
68
+ # NOTE: This alias will soon be removed. Use to_h in all future work.
69
+ alias :to_hash :to_h
70
+ end
71
+ end
72
+ # :nocov:
@@ -0,0 +1,3 @@
1
+ require_relative 'gruff/category.rb'
2
+ require_relative 'gruff/vector.rb'
3
+ require_relative 'gruff/dataframe.rb'
@@ -0,0 +1,49 @@
1
+ module Daru
2
+ module Plotting
3
+ module Category
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ case type
9
+ when :bar, :pie, :sidebar
10
+ plot = send("category_#{type}_plot".to_sym, size, opts[:method])
11
+ else
12
+ raise ArgumentError, 'This type of plot is not supported.'
13
+ end
14
+ yield plot if block_given?
15
+ plot
16
+ end
17
+
18
+ private
19
+
20
+ def category_bar_plot size, method
21
+ plot = Gruff::Bar.new size
22
+ method ||= :count
23
+ dv = frequencies(method)
24
+ plot.labels = size.times.to_a.zip(dv.index.to_a).to_h
25
+ plot.data name || :vector, dv.to_a
26
+ plot
27
+ end
28
+
29
+ def category_pie_plot size, method
30
+ plot = Gruff::Pie.new size
31
+ method ||= :count
32
+ frequencies(method).each_with_index do |data, index|
33
+ plot.data index, data
34
+ end
35
+ plot
36
+ end
37
+
38
+ def category_sidebar_plot size, method
39
+ plot = Gruff::SideBar.new size
40
+ plot.labels = {0 => (name.to_s || 'vector')}
41
+ frequencies(method).each_with_index do |data, index|
42
+ plot.data index, data
43
+ end
44
+ plot
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,91 @@
1
+ module Daru
2
+ module Plotting
3
+ module DataFrame
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ x = extract_x_vector opts[:x]
9
+ y = extract_y_vectors opts[:y]
10
+ return plot_with_category(
11
+ size, type, x, y, opts[:categorized]
12
+ ) if opts[:categorized]
13
+ case type
14
+ when :line, :bar, :scatter
15
+ plot = send("#{type}_plot", size, x, y)
16
+ # TODO: hist, box
17
+ # It turns out hist and box are not supported in Gruff yet
18
+ else
19
+ raise ArgumentError, 'This type of plot is not supported.'
20
+ end
21
+ yield plot if block_given?
22
+ plot
23
+ end
24
+
25
+ private
26
+
27
+ def line_plot size, x, y
28
+ plot = Gruff::Line.new size
29
+ plot.labels = size.times.to_a.zip(x).to_h
30
+ y.each do |vec|
31
+ plot.data vec.name || :vector, vec.to_a
32
+ end
33
+ plot
34
+ end
35
+
36
+ def bar_plot size, x, y
37
+ plot = Gruff::Bar.new size
38
+ plot.labels = size.times.to_a.zip(x).to_h
39
+ y.each do |vec|
40
+ plot.data vec.name || :vector, vec.to_a
41
+ end
42
+ plot
43
+ end
44
+
45
+ def scatter_plot size, x, y
46
+ plot = Gruff::Scatter.new size
47
+ y.each do |vec|
48
+ plot.data vec.name || :vector, x, vec.to_a
49
+ end
50
+ plot
51
+ end
52
+
53
+ def plot_with_category size, type, x, y, opts
54
+ x = Daru::Vector.new x
55
+ y = y.first
56
+ case type
57
+ when :scatter
58
+ plot = Gruff::Scatter.new size
59
+ cat_dv = self[opts[:by]]
60
+ cat_dv.categories.each do |cat|
61
+ bools = cat_dv.eq cat
62
+ plot.data cat, x.where(bools).to_a, y.where(bools).to_a
63
+ end
64
+ else
65
+ raise ArgumentError, "Type #{type} is not supported."
66
+ end
67
+ yield plot if block_given?
68
+ plot
69
+ end
70
+
71
+ def extract_x_vector x_name
72
+ x_name && self[x_name].to_a || index.to_a
73
+ end
74
+
75
+ def extract_y_vectors y_names
76
+ y_names =
77
+ case y_names
78
+ when nil
79
+ vectors.to_a
80
+ when Array
81
+ y_names
82
+ else
83
+ [y_names]
84
+ end
85
+
86
+ y_names.map { |y| self[y] }.select(&:numeric?)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,57 @@
1
+ module Daru
2
+ module Plotting
3
+ module Vector
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ case type
9
+ when :line, :bar, :pie, :scatter, :sidebar
10
+ plot = send("#{type}_plot", size)
11
+ # TODO: hist, box
12
+ # It turns out hist and box are not supported in Gruff yet
13
+ else
14
+ raise ArgumentError, 'This type of plot is not supported.'
15
+ end
16
+ yield plot if block_given?
17
+ plot
18
+ end
19
+
20
+ private
21
+
22
+ def line_plot size
23
+ plot = Gruff::Line.new size
24
+ plot.labels = size.times.to_a.zip(index.to_a).to_h
25
+ plot.data name || :vector, to_a
26
+ plot
27
+ end
28
+
29
+ def bar_plot size
30
+ plot = Gruff::Bar.new size
31
+ plot.labels = size.times.to_a.zip(index.to_a).to_h
32
+ plot.data name || :vector, to_a
33
+ plot
34
+ end
35
+
36
+ def pie_plot size
37
+ plot = Gruff::Pie.new size
38
+ each_with_index { |data, index| plot.data index, data }
39
+ plot
40
+ end
41
+
42
+ def scatter_plot size
43
+ plot = Gruff::Scatter.new size
44
+ plot.data name || :vector, index.to_a, to_a
45
+ plot
46
+ end
47
+
48
+ def sidebar_plot size
49
+ plot = Gruff::SideBar.new size
50
+ plot.labels = {0 => (name.to_s || 'vector')}
51
+ each_with_index { |data, index| plot.data index, data }
52
+ plot
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ require_relative 'nyaplot/category.rb'
2
+ require_relative 'nyaplot/vector.rb'
3
+ require_relative 'nyaplot/dataframe.rb'
@@ -0,0 +1,34 @@
1
+ module Daru
2
+ module Plotting
3
+ module Category
4
+ module NyaplotLibrary
5
+ def plot opts
6
+ case type = opts[:type]
7
+ when :bar
8
+ plot = Nyaplot::Plot.new
9
+ opts[:method] ||= :count
10
+ values = frequencies opts[:method]
11
+ diagram = plot.add :bar, values.index.to_a, values.to_a
12
+ # Set yrange for good view
13
+ set_yrange plot, opts[:method]
14
+ yield plot, diagram if block_given?
15
+ plot.show
16
+ else
17
+ raise ArgumentError, "#{type} type is not supported."
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def set_yrange plot, method
24
+ case method
25
+ when :percentage
26
+ plot.yrange [0, 100]
27
+ when :fraction
28
+ plot.yrange [0, 1]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,187 @@
1
+ module Daru
2
+ module Plotting
3
+ module DataFrame
4
+ module NyaplotLibrary
5
+ # Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
6
+ # the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
7
+ # to the block, if it is specified. See the nyaplot docs for info on how to
8
+ # further use these objects.
9
+ #
10
+ # Detailed instructions on use of the plotting API can be found in the
11
+ # notebooks whose links you can find in the README.
12
+ #
13
+ # == Options
14
+ #
15
+ # * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
16
+ # * +:x+ - Vector to be used for X co-ordinates.
17
+ # * +:y+ - Vector to be used for Y co-ordinates.
18
+ #
19
+ # == Usage
20
+ # # Simple bar chart
21
+ # df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
22
+ # df.plot type: :bar, x: :a, y: :b
23
+ def plot opts={}, &block
24
+ if opts[:categorized]
25
+ plot_with_category(opts, &block)
26
+ else
27
+ plot_without_category(opts, &block)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def plot_without_category opts
34
+ options = {type: :scatter}.merge(opts)
35
+
36
+ plot = Nyaplot::Plot.new
37
+ types = extract_option :type, options
38
+
39
+ diagram =
40
+ case
41
+ when !([:scatter, :bar, :line, :histogram] & types).empty?
42
+ plot_regular_diagrams plot, opts
43
+ when types.include?(:box)
44
+ plot_box_diagram plot
45
+ else
46
+ raise ArgumentError, "Unidentified plot types: #{types}"
47
+ end
48
+
49
+ yield(plot, diagram) if block_given?
50
+
51
+ plot.show
52
+ end
53
+
54
+ def plot_with_category opts
55
+ case type = opts[:type]
56
+ when :scatter, :line
57
+ plot = Nyaplot::Plot.new
58
+ category_opts = opts[:categorized]
59
+ type = opts[:type]
60
+ x, y = opts[:x], opts[:y]
61
+ cat_dv = self[category_opts[:by]]
62
+
63
+ diagrams = create_categorized_diagrams plot, cat_dv, x, y, type
64
+
65
+ apply_variant_to_diagrams diagrams, category_opts, type
66
+
67
+ plot.legend true
68
+ yield plot, *diagrams if block_given?
69
+ plot.show
70
+
71
+ else
72
+ raise ArgumentError, "Unsupported type #{type}"
73
+ end
74
+ end
75
+
76
+ def create_categorized_diagrams plot, cat_dv, x, y, type
77
+ cat_dv.categories.map do |cat|
78
+ x_vec = self[x].where(cat_dv.eq cat)
79
+ y_vec = self[y].where(cat_dv.eq cat)
80
+ df = Daru::DataFrame.new [x_vec, y_vec], order: [x, y]
81
+ nyaplot_df = df.to_nyaplotdf
82
+
83
+ plot.add_with_df(nyaplot_df, type, x, y)
84
+ end
85
+ end
86
+
87
+ def apply_variant_to_diagrams diagrams, category_opts, type
88
+ method = category_opts[:method]
89
+ cat_dv = self[category_opts[:by]]
90
+ # If user has mentioned custom color, size, shape use them
91
+ variant =
92
+ if category_opts[method]
93
+ category_opts[method].cycle
94
+ else
95
+ send("get_#{method}".to_sym, type)
96
+ end
97
+
98
+ diagrams.zip(cat_dv.categories) do |d, cat|
99
+ d.title cat
100
+ d.send(method, variant.next)
101
+ d.tooltip_contents [cat]*cat_dv.count(cat) if type == :scatter
102
+ end
103
+ end
104
+
105
+ SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
106
+ def get_shape type
107
+ validate_type type, :scatter
108
+ SHAPES.cycle
109
+ end
110
+
111
+ def get_size type
112
+ validate_type type, :scatter
113
+ (50..550).step(100).cycle
114
+ end
115
+
116
+ def get_color(*)
117
+ Nyaplot::Colors.qual.cycle
118
+ end
119
+
120
+ def get_stroke_width type
121
+ validate_type type, :line
122
+ (2..16).step(2).cycle
123
+ end
124
+
125
+ def validate_type type, *types
126
+ raise ArgumentError, "Invalid option for #{type} type" unless
127
+ types.include? type
128
+ end
129
+
130
+ def single_diagram? options
131
+ options[:x] && options[:x].is_a?(Symbol)
132
+ end
133
+
134
+ def plot_regular_diagrams plot, opts
135
+ if single_diagram? opts
136
+ add_single_diagram plot, opts
137
+ else
138
+ add_multiple_diagrams plot, opts
139
+ end
140
+ end
141
+
142
+ def plot_box_diagram plot
143
+ numeric = only_numerics(clone: false).reject_values(*Daru::MISSING_VALUES)
144
+ plot.add_with_df(numeric.to_nyaplotdf, :box, *numeric.vectors.to_a)
145
+ end
146
+
147
+ def add_single_diagram plot, options
148
+ args = [
149
+ to_nyaplotdf,
150
+ options[:type],
151
+ options[:x]
152
+ ]
153
+
154
+ args << options[:y] if options[:y]
155
+
156
+ plot.add_with_df(*args)
157
+ end
158
+
159
+ def add_multiple_diagrams plot, options
160
+ types = extract_option :type, options
161
+ x_vecs = extract_option :x, options
162
+ y_vecs = extract_option :y, options
163
+
164
+ nyaplot_df = to_nyaplotdf
165
+ total = x_vecs.size
166
+ types = types.size < total ? types*total : types
167
+
168
+ types.zip(x_vecs, y_vecs).map do |t, xv, yv|
169
+ plot.add_with_df(nyaplot_df, t, xv, yv)
170
+ end
171
+ end
172
+
173
+ def extract_option opt, options
174
+ if options[opt]
175
+ o = options[opt]
176
+ o.is_a?(Array) ? o : [o]
177
+ else
178
+ options.keys
179
+ .select { |a| a =~ Regexp.new("\\A#{opt}") }
180
+ .sort
181
+ .map { |a| options[a] }
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end