charty 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -23
  3. data/.github/workflows/nmatrix.yml +67 -0
  4. data/.github/workflows/pycall.yml +86 -0
  5. data/Gemfile +18 -0
  6. data/README.md +123 -4
  7. data/Rakefile +4 -5
  8. data/charty.gemspec +1 -3
  9. data/examples/sample_images/hist_gruff.png +0 -0
  10. data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
  11. data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
  12. data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
  13. data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
  14. data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
  15. data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
  16. data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
  17. data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
  18. data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
  19. data/lib/charty.rb +4 -0
  20. data/lib/charty/backends/gruff.rb +13 -2
  21. data/lib/charty/backends/plotly.rb +322 -20
  22. data/lib/charty/backends/pyplot.rb +416 -64
  23. data/lib/charty/index.rb +213 -0
  24. data/lib/charty/linspace.rb +1 -1
  25. data/lib/charty/missing_value_support.rb +14 -0
  26. data/lib/charty/plot_methods.rb +173 -8
  27. data/lib/charty/plotters.rb +7 -0
  28. data/lib/charty/plotters/abstract_plotter.rb +87 -12
  29. data/lib/charty/plotters/bar_plotter.rb +200 -3
  30. data/lib/charty/plotters/box_plotter.rb +75 -7
  31. data/lib/charty/plotters/categorical_plotter.rb +272 -40
  32. data/lib/charty/plotters/count_plotter.rb +7 -0
  33. data/lib/charty/plotters/estimation_support.rb +84 -0
  34. data/lib/charty/plotters/random_support.rb +25 -0
  35. data/lib/charty/plotters/relational_plotter.rb +518 -0
  36. data/lib/charty/plotters/scatter_plotter.rb +115 -0
  37. data/lib/charty/plotters/vector_plotter.rb +6 -0
  38. data/lib/charty/statistics.rb +87 -2
  39. data/lib/charty/table.rb +50 -15
  40. data/lib/charty/table_adapters.rb +2 -0
  41. data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
  42. data/lib/charty/table_adapters/base_adapter.rb +69 -0
  43. data/lib/charty/table_adapters/daru_adapter.rb +37 -3
  44. data/lib/charty/table_adapters/datasets_adapter.rb +6 -2
  45. data/lib/charty/table_adapters/hash_adapter.rb +130 -16
  46. data/lib/charty/table_adapters/narray_adapter.rb +25 -6
  47. data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
  48. data/lib/charty/table_adapters/pandas_adapter.rb +81 -0
  49. data/lib/charty/vector.rb +69 -0
  50. data/lib/charty/vector_adapters.rb +183 -0
  51. data/lib/charty/vector_adapters/array_adapter.rb +109 -0
  52. data/lib/charty/vector_adapters/daru_adapter.rb +171 -0
  53. data/lib/charty/vector_adapters/narray_adapter.rb +187 -0
  54. data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
  55. data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
  56. data/lib/charty/vector_adapters/pandas_adapter.rb +200 -0
  57. data/lib/charty/version.rb +1 -1
  58. metadata +33 -45
@@ -0,0 +1,115 @@
1
+ module Charty
2
+ module Plotters
3
+ class ScatterPlotter < RelationalPlotter
4
+ def initialize(data: nil, variables: {}, **options, &block)
5
+ x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
6
+ super(x, y, color, style, size, data: data, **options, &block)
7
+ end
8
+
9
+ attr_reader :alpha, :legend
10
+
11
+ def alpha=(val)
12
+ case val
13
+ when nil, :auto, 0..1
14
+ @alpha = val
15
+ when "auto"
16
+ @alpha = val.to_sym
17
+ when Numeric
18
+ raise ArgumentError,
19
+ "the given alpha is out of bounds " +
20
+ "(%p for nil, :auto, or number 0..1)" % val
21
+ else
22
+ raise ArgumentError,
23
+ "invalid value of alpha " +
24
+ "(%p for nil, :auto, or number in 0..1)" % val
25
+ end
26
+ end
27
+
28
+ def legend=(val)
29
+ case val
30
+ when :auto, :brief, :full, false
31
+ @legend = val
32
+ when "auto", "brief", "full"
33
+ @legend = val.to_sym
34
+ else
35
+ raise ArgumentError,
36
+ "invalid value of legend (%p for :auto, :brief, :full, or false)" % val
37
+ end
38
+ end
39
+
40
+ attr_reader :line_width, :edge_color
41
+
42
+ def line_width=(val)
43
+ @line_width = check_number(val, :line_width, allow_nil: true)
44
+ end
45
+
46
+ def edge_color=(val)
47
+ @line_width = check_color(val, :edge_color, allow_nil: true)
48
+ end
49
+
50
+ def render
51
+ backend = Backends.current
52
+ backend.begin_figure
53
+ draw_points(backend)
54
+ annotate_axes(backend)
55
+ backend.show
56
+ end
57
+
58
+ def save(filename, **opts)
59
+ backend = Backends.current
60
+ backend.begin_figure
61
+ draw_points(backend)
62
+ annotate_axes(backend)
63
+ backend.save(filename, **opts)
64
+ end
65
+
66
+ private def draw_points(backend)
67
+ map_color(palette: palette, order: color_order, norm: color_norm)
68
+ map_size(sizes: sizes, order: size_order, norm: size_norm)
69
+ map_style(markers: markers, order: marker_order)
70
+
71
+ data = @plot_data.drop_na
72
+
73
+ # TODO: shold pass key_color to backend's scatter method.
74
+ # In pyplot backend, it is passed as color parameter.
75
+
76
+ x = data[:x]
77
+ y = data[:y]
78
+ color = data[:color] if @variables.key?(:color)
79
+ style = data[:style] if @variables.key?(:style)
80
+ size = data[:size] if @variables.key?(:size)
81
+
82
+ # TODO: key_color
83
+ backend.scatter(
84
+ x, y, @variables,
85
+ color: color, color_mapper: @color_mapper,
86
+ style: style, style_mapper: @style_mapper,
87
+ size: size, size_mapper: @size_mapper,
88
+ legend: legend
89
+ )
90
+ end
91
+
92
+ private def annotate_axes(backend)
93
+ xlabel = self.variables[:x]
94
+ ylabel = self.variables[:y]
95
+ backend.set_xlabel(xlabel) unless xlabel.nil?
96
+ backend.set_ylabel(ylabel) unless ylabel.nil?
97
+
98
+ if legend
99
+ add_legend_data(backend)
100
+ end
101
+ end
102
+
103
+ private def add_legend_data(backend)
104
+ # TODO: Legend Support
105
+ verbosity = legend
106
+ verbosity = :auto if verbosity == true
107
+
108
+ titles = [:color, :size, :style].filter_map do |v|
109
+ variables[v] if variables.key?(v)
110
+ end
111
+ legend_title = titles.length == 1 ? titles[0] : ""
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,6 @@
1
+ module Charty
2
+ module Plotters
3
+ class VectorPlotter < AbstractPlotter
4
+ end
5
+ end
6
+ end
@@ -7,8 +7,8 @@ module Charty
7
7
  enum.mean
8
8
  end
9
9
 
10
- def self.stdev(enum)
11
- enum.stdev
10
+ def self.stdev(enum, population: false)
11
+ enum.stdev(population: population)
12
12
  end
13
13
  rescue LoadError
14
14
  def self.mean(enum)
@@ -25,5 +25,90 @@ module Charty
25
25
  Math.sqrt(var)
26
26
  end
27
27
  end
28
+
29
+ def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
30
+ n = vector.size
31
+ random = Charty::Plotters::RandomSupport.check_random(random)
32
+ func = Charty::Plotters::EstimationSupport.check_estimator(func)
33
+
34
+ if units
35
+ return structured_bootstrap(vector, n_boot, units, func, random)
36
+ end
37
+
38
+ if defined?(Pandas::Series) || defined?(Numpy::NDArray)
39
+ boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
40
+ return boot_dist if boot_dist
41
+ end
42
+
43
+ boot_dist = Array.new(n_boot) do |i|
44
+ resampler = Array.new(n) { random.rand(n) }
45
+
46
+ w ||= vector.values_at(*resampler)
47
+
48
+ case func
49
+ when :mean
50
+ mean(w)
51
+ end
52
+ end
53
+
54
+ boot_dist
55
+ end
56
+
57
+ private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
58
+ case
59
+ when vector.is_a?(Charty::Vector)
60
+ bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
61
+
62
+ when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
63
+ # numpy is also available when pandas is available
64
+ n = vector.size
65
+ resampler = Numpy.empty(n, dtype: Numpy.intp)
66
+ Array.new(n_boot) do |i|
67
+ # TODO: Use Numo and MemoryView to reduce execution time
68
+ # resampler = Numo::Int64.new(n).rand(n)
69
+ # w = Numpy.take(vector, resampler)
70
+ n.times {|i| resampler[i] = random.rand(n) }
71
+ w = vector.take(resampler)
72
+
73
+ case func
74
+ when :mean
75
+ w.mean
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
82
+ raise NotImplementedError,
83
+ "structured bootstrapping has not been supported yet"
84
+ end
85
+
86
+ def self.bootstrap_ci(*vectors, which, n_boot: 2000, func: :mean, units: nil, random: nil)
87
+ boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
88
+ q = [50 - which / 2, 50 + which / 2]
89
+ if boot.respond_to?(:percentile)
90
+ boot.percentile(q)
91
+ else
92
+ percentile(boot, q)
93
+ end
94
+ end
95
+
96
+ # TODO: optimize with introselect algorithm
97
+ def self.percentile(a, q)
98
+ return mean(a) if a.size == 0
99
+
100
+ a = a.sort
101
+ n = a.size
102
+ q.map do |x|
103
+ x = n * (x / 100.0)
104
+ i = x.floor
105
+ if i == n-1
106
+ a[i]
107
+ else
108
+ t = x - i
109
+ (1-t)*a[i] + t*a[i+1]
110
+ end
111
+ end
112
+ end
28
113
  end
29
114
  end
data/lib/charty/table.rb CHANGED
@@ -13,6 +13,7 @@ module Charty
13
13
 
14
14
  class Table
15
15
  extend Forwardable
16
+ include MissingValueSupport
16
17
 
17
18
  def initialize(data, **kwargs)
18
19
  adapter_class = TableAdapters.find_adapter_class(data)
@@ -21,31 +22,46 @@ module Charty
21
22
  else
22
23
  @adapter = adapter_class.new(data, **kwargs)
23
24
  end
25
+
26
+ @column_cache = {}
24
27
  end
25
28
 
26
29
  attr_reader :adapter
27
30
 
31
+ def_delegators :adapter, :length, :column_length
32
+
33
+ def_delegators :adapter, :columns, :columns=
34
+ def_delegators :adapter, :index, :index=
35
+
28
36
  def_delegator :@adapter, :column_names
29
37
  def_delegator :@adapter, :data, :raw_data
30
38
 
31
- def columns
32
- @column_accessor ||= ColumnAccessor.new(@adapter)
39
+ def ==(other)
40
+ return true if equal?(other)
41
+
42
+ case other
43
+ when Charty::Table
44
+ adapter == other.adapter
45
+ else
46
+ super
47
+ end
48
+ end
49
+
50
+ def empty?
51
+ length == 0
33
52
  end
34
53
 
35
- def [](*args)
36
- n_args = args.length
37
- case n_args
38
- when 1
39
- row = nil
40
- column = args[0]
41
- @adapter[row, column]
42
- when 2
43
- row = args[0]
44
- column = args[1]
45
- @adapter[row, column]
54
+ def [](key)
55
+ key = case key
56
+ when Symbol
57
+ key
58
+ else
59
+ String.try_convert(key).to_sym
60
+ end
61
+ if @column_cache.key?(key)
62
+ @column_cache[key]
46
63
  else
47
- message = "wrong number of arguments (given #{n_args}, expected 1..2)"
48
- raise ArgumentError, message
64
+ @column_cache[key] = @adapter[nil, key]
49
65
  end
50
66
  end
51
67
 
@@ -81,5 +97,24 @@ module Charty
81
97
  i += 1
82
98
  end
83
99
  end
100
+
101
+ def drop_na
102
+ # TODO: Must implement this method in each adapter
103
+ missing_index = index.select do |i|
104
+ column_names.any? do |key|
105
+ missing_value?(self[key][i])
106
+ end
107
+ end
108
+ if missing_index.empty?
109
+ self
110
+ else
111
+ select_index = index.to_a - missing_index
112
+ new_data = column_names.map { |key|
113
+ vals = select_index.map {|i| self[key][i] }
114
+ [key, vals]
115
+ }.to_h
116
+ Charty::Table.new(new_data, index: select_index)
117
+ end
118
+ end
84
119
  end
85
120
  end
@@ -15,9 +15,11 @@ module Charty
15
15
  end
16
16
  end
17
17
 
18
+ require_relative 'table_adapters/base_adapter'
18
19
  require_relative 'table_adapters/hash_adapter'
19
20
  require_relative 'table_adapters/narray_adapter'
20
21
  require_relative 'table_adapters/datasets_adapter'
21
22
  require_relative 'table_adapters/daru_adapter'
22
23
  require_relative 'table_adapters/active_record_adapter'
23
24
  require_relative 'table_adapters/nmatrix_adapter'
25
+ require_relative 'table_adapters/pandas_adapter'
@@ -1,10 +1,8 @@
1
1
  module Charty
2
2
  module TableAdapters
3
- class ActiveRecordAdapter
3
+ class ActiveRecordAdapter < BaseAdapter
4
4
  TableAdapters.register(:active_record, self)
5
5
 
6
- include Enumerable
7
-
8
6
  def self.supported?(data)
9
7
  defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
10
8
  end
@@ -12,17 +10,27 @@ module Charty
12
10
  def initialize(data)
13
11
  @data = check_type(ActiveRecord::Relation, data, :data)
14
12
  @column_names = @data.column_names.freeze
15
- @columns = nil
13
+ self.columns = Index.new(@column_names)
14
+ self.index = RangeIndex.new(0 ... length)
16
15
  end
17
16
 
18
- attr_reader :column_names, :data
17
+ attr_reader :data, :column_names
18
+
19
+ def_delegators :data, :size
20
+
21
+ alias length size
22
+
23
+ def column_length
24
+ column_names.length
25
+ end
19
26
 
20
27
  def [](row, column)
21
- fetch_records unless @columns
28
+ fetch_records unless @columns_cache
22
29
  if row
23
- @columns[resolve_column_index(column)][row]
30
+ @columns_cache[resolve_column_index(column)][row]
24
31
  else
25
- @columns[resolve_column_index(column)]
32
+ column_data = @columns_cache[resolve_column_index(column)]
33
+ Vector.new(column_data, index: index, name: column)
26
34
  end
27
35
  end
28
36
 
@@ -43,7 +51,7 @@ module Charty
43
51
  end
44
52
 
45
53
  private def fetch_records
46
- @columns = @data.pluck(*column_names).transpose
54
+ @columns_cache = @data.pluck(*column_names).transpose
47
55
  end
48
56
 
49
57
  private def check_type(type, data, name)
@@ -0,0 +1,69 @@
1
+ require "forwardable"
2
+
3
+ module Charty
4
+ module TableAdapters
5
+ class BaseAdapter
6
+ extend Forwardable
7
+ include Enumerable
8
+
9
+ attr_reader :columns
10
+
11
+ def columns=(values)
12
+ @columns = check_and_convert_index(values, :columns, column_length)
13
+ end
14
+
15
+ def column_names
16
+ columns.to_a
17
+ end
18
+
19
+ attr_reader :index
20
+
21
+ def index=(values)
22
+ @index = check_and_convert_index(values, :index, length)
23
+ end
24
+
25
+ def ==(other)
26
+ case other
27
+ when BaseAdapter
28
+ return false if columns != other.columns
29
+ return false if index != other.index
30
+ compare_data_equality(other)
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ def compare_data_equality(other)
37
+ columns.each do |name|
38
+ if self[nil, name] != other[nil, name]
39
+ return false
40
+ end
41
+ end
42
+ true
43
+ end
44
+
45
+ private def check_and_convert_index(values, name, expected_length)
46
+ case values
47
+ when Index, Range
48
+ else
49
+ unless (ary = Array.try_convert(values))
50
+ raise ArgumentError, "invalid object for %s: %p" % [name, values]
51
+ end
52
+ values = ary
53
+ end
54
+ if expected_length != values.size
55
+ raise ArgumentError,
56
+ "invalid length for %s (%d for %d)" % [name, values.size, expected_length]
57
+ end
58
+ case values
59
+ when Index
60
+ values
61
+ when Range
62
+ RangeIndex.new(values)
63
+ when Array
64
+ Index.new(values)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end