daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -1,28 +1,4 @@
1
1
  class Array
2
- # Recode repeated values on an array, adding the number of repetition
3
- # at the end
4
- # Example:
5
- # a=%w{a b c c d d d e}
6
- # a.recode_repeated
7
- # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
8
- def recode_repeated
9
- return self if size == uniq.size
10
-
11
- duplicated = group_by { |n| n }
12
- .select { |_, g| g.size > 1 }.map(&:first)
13
-
14
- counter = duplicated.collect { |n| [n, 0] }.to_h
15
-
16
- collect do |n|
17
- if counter.key?(n)
18
- counter[n] += 1
19
- '%s_%d' % [n, counter[n]]
20
- else
21
- n
22
- end
23
- end
24
- end
25
-
26
2
  def daru_vector name=nil, index=nil, dtype=:array
27
3
  Daru::Vector.new self, name: name, index: index, dtype: dtype
28
4
  end
@@ -54,6 +30,7 @@ class Hash
54
30
  alias_method :dv, :daru_vector
55
31
  end
56
32
 
33
+ # :nocov:
57
34
  class NMatrix
58
35
  def daru_vector(name=nil, index=nil, *)
59
36
  Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
@@ -70,12 +47,6 @@ class MDArray
70
47
  alias_method :dv, :daru_vector
71
48
  end
72
49
 
73
- class Numeric
74
- def square
75
- self * self
76
- end
77
- end
78
-
79
50
  class Matrix
80
51
  def elementwise_division other
81
52
  map.with_index do |e, index|
@@ -84,17 +55,18 @@ class Matrix
84
55
  end
85
56
  end
86
57
 
87
- class String
88
- def is_number?
89
- if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
90
- true
91
- else
92
- false
58
+ class Object
59
+ if RUBY_VERSION < '2.2'
60
+ def itself
61
+ self
93
62
  end
94
63
  end
95
64
  end
96
65
 
97
- class Daru::DataFrame
98
- # NOTE: This alias will soon be removed. Use to_h in all future work.
99
- alias :to_hash :to_h
100
- end
66
+ module Daru
67
+ class DataFrame
68
+ # NOTE: This alias will soon be removed. Use to_h in all future work.
69
+ alias :to_hash :to_h
70
+ end
71
+ end
72
+ # :nocov:
@@ -0,0 +1,3 @@
1
+ require_relative 'gruff/category.rb'
2
+ require_relative 'gruff/vector.rb'
3
+ require_relative 'gruff/dataframe.rb'
@@ -0,0 +1,49 @@
1
+ module Daru
2
+ module Plotting
3
+ module Category
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ case type
9
+ when :bar, :pie, :sidebar
10
+ plot = send("category_#{type}_plot".to_sym, size, opts[:method])
11
+ else
12
+ raise ArgumentError, 'This type of plot is not supported.'
13
+ end
14
+ yield plot if block_given?
15
+ plot
16
+ end
17
+
18
+ private
19
+
20
+ def category_bar_plot size, method
21
+ plot = Gruff::Bar.new size
22
+ method ||= :count
23
+ dv = frequencies(method)
24
+ plot.labels = size.times.to_a.zip(dv.index.to_a).to_h
25
+ plot.data name || :vector, dv.to_a
26
+ plot
27
+ end
28
+
29
+ def category_pie_plot size, method
30
+ plot = Gruff::Pie.new size
31
+ method ||= :count
32
+ frequencies(method).each_with_index do |data, index|
33
+ plot.data index, data
34
+ end
35
+ plot
36
+ end
37
+
38
+ def category_sidebar_plot size, method
39
+ plot = Gruff::SideBar.new size
40
+ plot.labels = {0 => (name.to_s || 'vector')}
41
+ frequencies(method).each_with_index do |data, index|
42
+ plot.data index, data
43
+ end
44
+ plot
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,91 @@
1
+ module Daru
2
+ module Plotting
3
+ module DataFrame
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ x = extract_x_vector opts[:x]
9
+ y = extract_y_vectors opts[:y]
10
+ return plot_with_category(
11
+ size, type, x, y, opts[:categorized]
12
+ ) if opts[:categorized]
13
+ case type
14
+ when :line, :bar, :scatter
15
+ plot = send("#{type}_plot", size, x, y)
16
+ # TODO: hist, box
17
+ # It turns out hist and box are not supported in Gruff yet
18
+ else
19
+ raise ArgumentError, 'This type of plot is not supported.'
20
+ end
21
+ yield plot if block_given?
22
+ plot
23
+ end
24
+
25
+ private
26
+
27
+ def line_plot size, x, y
28
+ plot = Gruff::Line.new size
29
+ plot.labels = size.times.to_a.zip(x).to_h
30
+ y.each do |vec|
31
+ plot.data vec.name || :vector, vec.to_a
32
+ end
33
+ plot
34
+ end
35
+
36
+ def bar_plot size, x, y
37
+ plot = Gruff::Bar.new size
38
+ plot.labels = size.times.to_a.zip(x).to_h
39
+ y.each do |vec|
40
+ plot.data vec.name || :vector, vec.to_a
41
+ end
42
+ plot
43
+ end
44
+
45
+ def scatter_plot size, x, y
46
+ plot = Gruff::Scatter.new size
47
+ y.each do |vec|
48
+ plot.data vec.name || :vector, x, vec.to_a
49
+ end
50
+ plot
51
+ end
52
+
53
+ def plot_with_category size, type, x, y, opts
54
+ x = Daru::Vector.new x
55
+ y = y.first
56
+ case type
57
+ when :scatter
58
+ plot = Gruff::Scatter.new size
59
+ cat_dv = self[opts[:by]]
60
+ cat_dv.categories.each do |cat|
61
+ bools = cat_dv.eq cat
62
+ plot.data cat, x.where(bools).to_a, y.where(bools).to_a
63
+ end
64
+ else
65
+ raise ArgumentError, "Type #{type} is not supported."
66
+ end
67
+ yield plot if block_given?
68
+ plot
69
+ end
70
+
71
+ def extract_x_vector x_name
72
+ x_name && self[x_name].to_a || index.to_a
73
+ end
74
+
75
+ def extract_y_vectors y_names
76
+ y_names =
77
+ case y_names
78
+ when nil
79
+ vectors.to_a
80
+ when Array
81
+ y_names
82
+ else
83
+ [y_names]
84
+ end
85
+
86
+ y_names.map { |y| self[y] }.select(&:numeric?)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,57 @@
1
+ module Daru
2
+ module Plotting
3
+ module Vector
4
+ module GruffLibrary
5
+ def plot opts={}
6
+ type = opts[:type] || :bar
7
+ size = opts[:size] || 500
8
+ case type
9
+ when :line, :bar, :pie, :scatter, :sidebar
10
+ plot = send("#{type}_plot", size)
11
+ # TODO: hist, box
12
+ # It turns out hist and box are not supported in Gruff yet
13
+ else
14
+ raise ArgumentError, 'This type of plot is not supported.'
15
+ end
16
+ yield plot if block_given?
17
+ plot
18
+ end
19
+
20
+ private
21
+
22
+ def line_plot size
23
+ plot = Gruff::Line.new size
24
+ plot.labels = size.times.to_a.zip(index.to_a).to_h
25
+ plot.data name || :vector, to_a
26
+ plot
27
+ end
28
+
29
+ def bar_plot size
30
+ plot = Gruff::Bar.new size
31
+ plot.labels = size.times.to_a.zip(index.to_a).to_h
32
+ plot.data name || :vector, to_a
33
+ plot
34
+ end
35
+
36
+ def pie_plot size
37
+ plot = Gruff::Pie.new size
38
+ each_with_index { |data, index| plot.data index, data }
39
+ plot
40
+ end
41
+
42
+ def scatter_plot size
43
+ plot = Gruff::Scatter.new size
44
+ plot.data name || :vector, index.to_a, to_a
45
+ plot
46
+ end
47
+
48
+ def sidebar_plot size
49
+ plot = Gruff::SideBar.new size
50
+ plot.labels = {0 => (name.to_s || 'vector')}
51
+ each_with_index { |data, index| plot.data index, data }
52
+ plot
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ require_relative 'nyaplot/category.rb'
2
+ require_relative 'nyaplot/vector.rb'
3
+ require_relative 'nyaplot/dataframe.rb'
@@ -0,0 +1,34 @@
1
+ module Daru
2
+ module Plotting
3
+ module Category
4
+ module NyaplotLibrary
5
+ def plot opts
6
+ case type = opts[:type]
7
+ when :bar
8
+ plot = Nyaplot::Plot.new
9
+ opts[:method] ||= :count
10
+ values = frequencies opts[:method]
11
+ diagram = plot.add :bar, values.index.to_a, values.to_a
12
+ # Set yrange for good view
13
+ set_yrange plot, opts[:method]
14
+ yield plot, diagram if block_given?
15
+ plot.show
16
+ else
17
+ raise ArgumentError, "#{type} type is not supported."
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def set_yrange plot, method
24
+ case method
25
+ when :percentage
26
+ plot.yrange [0, 100]
27
+ when :fraction
28
+ plot.yrange [0, 1]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,187 @@
1
+ module Daru
2
+ module Plotting
3
+ module DataFrame
4
+ module NyaplotLibrary
5
+ # Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
6
+ # the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
7
+ # to the block, if it is specified. See the nyaplot docs for info on how to
8
+ # further use these objects.
9
+ #
10
+ # Detailed instructions on use of the plotting API can be found in the
11
+ # notebooks whose links you can find in the README.
12
+ #
13
+ # == Options
14
+ #
15
+ # * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
16
+ # * +:x+ - Vector to be used for X co-ordinates.
17
+ # * +:y+ - Vector to be used for Y co-ordinates.
18
+ #
19
+ # == Usage
20
+ # # Simple bar chart
21
+ # df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
22
+ # df.plot type: :bar, x: :a, y: :b
23
+ def plot opts={}, &block
24
+ if opts[:categorized]
25
+ plot_with_category(opts, &block)
26
+ else
27
+ plot_without_category(opts, &block)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def plot_without_category opts
34
+ options = {type: :scatter}.merge(opts)
35
+
36
+ plot = Nyaplot::Plot.new
37
+ types = extract_option :type, options
38
+
39
+ diagram =
40
+ case
41
+ when !([:scatter, :bar, :line, :histogram] & types).empty?
42
+ plot_regular_diagrams plot, opts
43
+ when types.include?(:box)
44
+ plot_box_diagram plot
45
+ else
46
+ raise ArgumentError, "Unidentified plot types: #{types}"
47
+ end
48
+
49
+ yield(plot, diagram) if block_given?
50
+
51
+ plot.show
52
+ end
53
+
54
+ def plot_with_category opts
55
+ case type = opts[:type]
56
+ when :scatter, :line
57
+ plot = Nyaplot::Plot.new
58
+ category_opts = opts[:categorized]
59
+ type = opts[:type]
60
+ x, y = opts[:x], opts[:y]
61
+ cat_dv = self[category_opts[:by]]
62
+
63
+ diagrams = create_categorized_diagrams plot, cat_dv, x, y, type
64
+
65
+ apply_variant_to_diagrams diagrams, category_opts, type
66
+
67
+ plot.legend true
68
+ yield plot, *diagrams if block_given?
69
+ plot.show
70
+
71
+ else
72
+ raise ArgumentError, "Unsupported type #{type}"
73
+ end
74
+ end
75
+
76
+ def create_categorized_diagrams plot, cat_dv, x, y, type
77
+ cat_dv.categories.map do |cat|
78
+ x_vec = self[x].where(cat_dv.eq cat)
79
+ y_vec = self[y].where(cat_dv.eq cat)
80
+ df = Daru::DataFrame.new [x_vec, y_vec], order: [x, y]
81
+ nyaplot_df = df.to_nyaplotdf
82
+
83
+ plot.add_with_df(nyaplot_df, type, x, y)
84
+ end
85
+ end
86
+
87
+ def apply_variant_to_diagrams diagrams, category_opts, type
88
+ method = category_opts[:method]
89
+ cat_dv = self[category_opts[:by]]
90
+ # If user has mentioned custom color, size, shape use them
91
+ variant =
92
+ if category_opts[method]
93
+ category_opts[method].cycle
94
+ else
95
+ send("get_#{method}".to_sym, type)
96
+ end
97
+
98
+ diagrams.zip(cat_dv.categories) do |d, cat|
99
+ d.title cat
100
+ d.send(method, variant.next)
101
+ d.tooltip_contents [cat]*cat_dv.count(cat) if type == :scatter
102
+ end
103
+ end
104
+
105
+ SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
106
+ def get_shape type
107
+ validate_type type, :scatter
108
+ SHAPES.cycle
109
+ end
110
+
111
+ def get_size type
112
+ validate_type type, :scatter
113
+ (50..550).step(100).cycle
114
+ end
115
+
116
+ def get_color(*)
117
+ Nyaplot::Colors.qual.cycle
118
+ end
119
+
120
+ def get_stroke_width type
121
+ validate_type type, :line
122
+ (2..16).step(2).cycle
123
+ end
124
+
125
+ def validate_type type, *types
126
+ raise ArgumentError, "Invalid option for #{type} type" unless
127
+ types.include? type
128
+ end
129
+
130
+ def single_diagram? options
131
+ options[:x] && options[:x].is_a?(Symbol)
132
+ end
133
+
134
+ def plot_regular_diagrams plot, opts
135
+ if single_diagram? opts
136
+ add_single_diagram plot, opts
137
+ else
138
+ add_multiple_diagrams plot, opts
139
+ end
140
+ end
141
+
142
+ def plot_box_diagram plot
143
+ numeric = only_numerics(clone: false).reject_values(*Daru::MISSING_VALUES)
144
+ plot.add_with_df(numeric.to_nyaplotdf, :box, *numeric.vectors.to_a)
145
+ end
146
+
147
+ def add_single_diagram plot, options
148
+ args = [
149
+ to_nyaplotdf,
150
+ options[:type],
151
+ options[:x]
152
+ ]
153
+
154
+ args << options[:y] if options[:y]
155
+
156
+ plot.add_with_df(*args)
157
+ end
158
+
159
+ def add_multiple_diagrams plot, options
160
+ types = extract_option :type, options
161
+ x_vecs = extract_option :x, options
162
+ y_vecs = extract_option :y, options
163
+
164
+ nyaplot_df = to_nyaplotdf
165
+ total = x_vecs.size
166
+ types = types.size < total ? types*total : types
167
+
168
+ types.zip(x_vecs, y_vecs).map do |t, xv, yv|
169
+ plot.add_with_df(nyaplot_df, t, xv, yv)
170
+ end
171
+ end
172
+
173
+ def extract_option opt, options
174
+ if options[opt]
175
+ o = options[opt]
176
+ o.is_a?(Array) ? o : [o]
177
+ else
178
+ options.keys
179
+ .select { |a| a =~ Regexp.new("\\A#{opt}") }
180
+ .sort
181
+ .map { |a| options[a] }
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end