charty 0.2.3 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -23
- data/.github/workflows/nmatrix.yml +67 -0
- data/.github/workflows/pycall.yml +86 -0
- data/Gemfile +18 -0
- data/README.md +172 -4
- data/Rakefile +4 -5
- data/charty.gemspec +10 -6
- data/examples/sample_images/hist_gruff.png +0 -0
- data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
- data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
- data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
- data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
- data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
- data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
- data/lib/charty.rb +8 -1
- data/lib/charty/backends/bokeh.rb +2 -2
- data/lib/charty/backends/google_charts.rb +1 -1
- data/lib/charty/backends/gruff.rb +14 -3
- data/lib/charty/backends/plotly.rb +731 -32
- data/lib/charty/backends/plotly_helpers/html_renderer.rb +203 -0
- data/lib/charty/backends/plotly_helpers/notebook_renderer.rb +87 -0
- data/lib/charty/backends/plotly_helpers/plotly_renderer.rb +121 -0
- data/lib/charty/backends/pyplot.rb +514 -66
- data/lib/charty/backends/rubyplot.rb +1 -1
- data/lib/charty/cache_dir.rb +27 -0
- data/lib/charty/dash_pattern_generator.rb +57 -0
- data/lib/charty/index.rb +213 -0
- data/lib/charty/iruby_helper.rb +18 -0
- data/lib/charty/linspace.rb +1 -1
- data/lib/charty/plot_methods.rb +283 -8
- data/lib/charty/plotter.rb +2 -2
- data/lib/charty/plotters.rb +11 -0
- data/lib/charty/plotters/abstract_plotter.rb +186 -16
- data/lib/charty/plotters/bar_plotter.rb +189 -7
- data/lib/charty/plotters/box_plotter.rb +64 -11
- data/lib/charty/plotters/categorical_plotter.rb +272 -40
- data/lib/charty/plotters/count_plotter.rb +7 -0
- data/lib/charty/plotters/distribution_plotter.rb +143 -0
- data/lib/charty/plotters/estimation_support.rb +84 -0
- data/lib/charty/plotters/histogram_plotter.rb +186 -0
- data/lib/charty/plotters/line_plotter.rb +300 -0
- data/lib/charty/plotters/random_support.rb +25 -0
- data/lib/charty/plotters/relational_plotter.rb +635 -0
- data/lib/charty/plotters/scatter_plotter.rb +80 -0
- data/lib/charty/plotters/vector_plotter.rb +6 -0
- data/lib/charty/statistics.rb +96 -2
- data/lib/charty/table.rb +160 -15
- data/lib/charty/table_adapters.rb +2 -0
- data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
- data/lib/charty/table_adapters/base_adapter.rb +166 -0
- data/lib/charty/table_adapters/daru_adapter.rb +39 -3
- data/lib/charty/table_adapters/datasets_adapter.rb +13 -2
- data/lib/charty/table_adapters/hash_adapter.rb +141 -16
- data/lib/charty/table_adapters/narray_adapter.rb +25 -6
- data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
- data/lib/charty/table_adapters/pandas_adapter.rb +163 -0
- data/lib/charty/util.rb +28 -0
- data/lib/charty/vector.rb +69 -0
- data/lib/charty/vector_adapters.rb +187 -0
- data/lib/charty/vector_adapters/array_adapter.rb +101 -0
- data/lib/charty/vector_adapters/daru_adapter.rb +163 -0
- data/lib/charty/vector_adapters/narray_adapter.rb +182 -0
- data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
- data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
- data/lib/charty/vector_adapters/pandas_adapter.rb +199 -0
- data/lib/charty/version.rb +1 -1
- metadata +92 -25
@@ -0,0 +1,80 @@
|
|
1
|
+
module Charty
|
2
|
+
module Plotters
|
3
|
+
class ScatterPlotter < RelationalPlotter
|
4
|
+
def initialize(data: nil, variables: {}, **options, &block)
|
5
|
+
x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
|
6
|
+
super(x, y, color, style, size, data: data, **options, &block)
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :alpha
|
10
|
+
|
11
|
+
def alpha=(val)
|
12
|
+
case val
|
13
|
+
when nil, :auto, 0..1
|
14
|
+
@alpha = val
|
15
|
+
when "auto"
|
16
|
+
@alpha = val.to_sym
|
17
|
+
when Numeric
|
18
|
+
raise ArgumentError,
|
19
|
+
"the given alpha is out of bounds " +
|
20
|
+
"(%p for nil, :auto, or number 0..1)" % val
|
21
|
+
else
|
22
|
+
raise ArgumentError,
|
23
|
+
"invalid value of alpha " +
|
24
|
+
"(%p for nil, :auto, or number in 0..1)" % val
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :line_width, :edge_color
|
29
|
+
|
30
|
+
def line_width=(val)
|
31
|
+
@line_width = check_number(val, :line_width, allow_nil: true)
|
32
|
+
end
|
33
|
+
|
34
|
+
def edge_color=(val)
|
35
|
+
@line_width = check_color(val, :edge_color, allow_nil: true)
|
36
|
+
end
|
37
|
+
|
38
|
+
private def render_plot(backend, **)
|
39
|
+
draw_points(backend)
|
40
|
+
annotate_axes(backend)
|
41
|
+
end
|
42
|
+
|
43
|
+
private def draw_points(backend)
|
44
|
+
map_color(palette: palette, order: color_order, norm: color_norm)
|
45
|
+
map_size(sizes: sizes, order: size_order, norm: size_norm)
|
46
|
+
map_style(markers: markers, order: style_order)
|
47
|
+
|
48
|
+
data = @plot_data.drop_na
|
49
|
+
|
50
|
+
# TODO: shold pass key_color to backend's scatter method.
|
51
|
+
# In pyplot backend, it is passed as color parameter.
|
52
|
+
|
53
|
+
x = data[:x]
|
54
|
+
y = data[:y]
|
55
|
+
color = data[:color] if @variables.key?(:color)
|
56
|
+
style = data[:style] if @variables.key?(:style)
|
57
|
+
size = data[:size] if @variables.key?(:size)
|
58
|
+
|
59
|
+
# TODO: key_color
|
60
|
+
backend.scatter(
|
61
|
+
x, y, @variables,
|
62
|
+
color: color, color_mapper: @color_mapper,
|
63
|
+
style: style, style_mapper: @style_mapper,
|
64
|
+
size: size, size_mapper: @size_mapper
|
65
|
+
)
|
66
|
+
|
67
|
+
if legend
|
68
|
+
backend.add_scatter_plot_legend(@variables, @color_mapper, @size_mapper, @style_mapper, legend)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private def annotate_axes(backend)
|
73
|
+
xlabel = self.variables[:x]
|
74
|
+
ylabel = self.variables[:y]
|
75
|
+
backend.set_xlabel(xlabel) unless xlabel.nil?
|
76
|
+
backend.set_ylabel(ylabel) unless ylabel.nil?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/charty/statistics.rb
CHANGED
@@ -7,8 +7,12 @@ module Charty
|
|
7
7
|
enum.mean
|
8
8
|
end
|
9
9
|
|
10
|
-
def self.stdev(enum)
|
11
|
-
enum.stdev
|
10
|
+
def self.stdev(enum, population: false)
|
11
|
+
enum.stdev(population: population)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.histogram(ary, *args, **kwargs)
|
15
|
+
ary.histogram(*args, **kwargs)
|
12
16
|
end
|
13
17
|
rescue LoadError
|
14
18
|
def self.mean(enum)
|
@@ -24,6 +28,96 @@ module Charty
|
|
24
28
|
var = xs.map {|x| (x - mean)**2 }.sum / (n - ddof)
|
25
29
|
Math.sqrt(var)
|
26
30
|
end
|
31
|
+
|
32
|
+
def self.histogram(ary, *args, **kwargs)
|
33
|
+
raise NotImplementedError,
|
34
|
+
"histogram is currently supported only with enumerable-statistics"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
|
39
|
+
n = vector.size
|
40
|
+
random = Charty::Plotters::RandomSupport.check_random(random)
|
41
|
+
func = Charty::Plotters::EstimationSupport.check_estimator(func)
|
42
|
+
|
43
|
+
if units
|
44
|
+
return structured_bootstrap(vector, n_boot, units, func, random)
|
45
|
+
end
|
46
|
+
|
47
|
+
if defined?(Pandas::Series) || defined?(Numpy::NDArray)
|
48
|
+
boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
|
49
|
+
return boot_dist if boot_dist
|
50
|
+
end
|
51
|
+
|
52
|
+
boot_dist = Array.new(n_boot) do |i|
|
53
|
+
resampler = Array.new(n) { random.rand(n) }
|
54
|
+
|
55
|
+
w ||= vector.values_at(*resampler)
|
56
|
+
|
57
|
+
case func
|
58
|
+
when :mean
|
59
|
+
mean(w)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
boot_dist
|
64
|
+
end
|
65
|
+
|
66
|
+
private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
|
67
|
+
case
|
68
|
+
when vector.is_a?(Charty::Vector)
|
69
|
+
bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
|
70
|
+
|
71
|
+
when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
|
72
|
+
# numpy is also available when pandas is available
|
73
|
+
n = vector.size
|
74
|
+
resampler = Numpy.empty(n, dtype: Numpy.intp)
|
75
|
+
Array.new(n_boot) do |i|
|
76
|
+
# TODO: Use Numo and MemoryView to reduce execution time
|
77
|
+
# resampler = Numo::Int64.new(n).rand(n)
|
78
|
+
# w = Numpy.take(vector, resampler)
|
79
|
+
n.times {|i| resampler[i] = random.rand(n) }
|
80
|
+
w = vector.take(resampler)
|
81
|
+
|
82
|
+
case func
|
83
|
+
when :mean
|
84
|
+
w.mean
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
|
91
|
+
raise NotImplementedError,
|
92
|
+
"structured bootstrapping has not been supported yet"
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
|
96
|
+
boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
|
97
|
+
q = [50 - width / 2, 50 + width / 2]
|
98
|
+
if boot.respond_to?(:percentile)
|
99
|
+
boot.percentile(q)
|
100
|
+
else
|
101
|
+
percentile(boot, q)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# TODO: optimize with introselect algorithm
|
106
|
+
def self.percentile(a, q)
|
107
|
+
return mean(a) if a.size == 0
|
108
|
+
|
109
|
+
a = a.sort
|
110
|
+
n = a.size
|
111
|
+
q.map do |x|
|
112
|
+
x = n * (x / 100.0)
|
113
|
+
i = x.floor
|
114
|
+
if i == n-1
|
115
|
+
a[i]
|
116
|
+
else
|
117
|
+
t = x - i
|
118
|
+
(1-t)*a[i] + t*a[i+1]
|
119
|
+
end
|
120
|
+
end
|
27
121
|
end
|
28
122
|
end
|
29
123
|
end
|
data/lib/charty/table.rb
CHANGED
@@ -21,34 +21,67 @@ module Charty
|
|
21
21
|
else
|
22
22
|
@adapter = adapter_class.new(data, **kwargs)
|
23
23
|
end
|
24
|
+
|
25
|
+
@column_cache = {}
|
24
26
|
end
|
25
27
|
|
26
28
|
attr_reader :adapter
|
27
29
|
|
30
|
+
def_delegators :adapter, :length, :column_length
|
31
|
+
|
32
|
+
def_delegators :adapter, :columns, :columns=
|
33
|
+
def_delegators :adapter, :index, :index=
|
34
|
+
|
28
35
|
def_delegator :@adapter, :column_names
|
36
|
+
|
37
|
+
def column?(name)
|
38
|
+
return true if column_names.include?(name)
|
39
|
+
|
40
|
+
case name
|
41
|
+
when String
|
42
|
+
column_names.include?(name.to_sym)
|
43
|
+
when Symbol
|
44
|
+
column_names.include?(name.to_s)
|
45
|
+
else
|
46
|
+
false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
29
50
|
def_delegator :@adapter, :data, :raw_data
|
30
51
|
|
31
|
-
def
|
32
|
-
|
52
|
+
def ==(other)
|
53
|
+
return true if equal?(other)
|
54
|
+
|
55
|
+
case other
|
56
|
+
when Charty::Table
|
57
|
+
adapter == other.adapter
|
58
|
+
else
|
59
|
+
super
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def empty?
|
64
|
+
length == 0
|
33
65
|
end
|
34
66
|
|
35
|
-
def [](
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
column = args[1]
|
45
|
-
@adapter[row, column]
|
67
|
+
def [](key)
|
68
|
+
key = case key
|
69
|
+
when Symbol
|
70
|
+
key
|
71
|
+
else
|
72
|
+
String.try_convert(key).to_sym
|
73
|
+
end
|
74
|
+
if @column_cache.key?(key)
|
75
|
+
@column_cache[key]
|
46
76
|
else
|
47
|
-
|
48
|
-
raise ArgumentError, message
|
77
|
+
@column_cache[key] = @adapter[nil, key]
|
49
78
|
end
|
50
79
|
end
|
51
80
|
|
81
|
+
def group_by(grouper, sort: true, drop_na: true)
|
82
|
+
adapter.group_by(self, grouper, sort, drop_na)
|
83
|
+
end
|
84
|
+
|
52
85
|
def to_a(x=nil, y=nil, z=nil)
|
53
86
|
case
|
54
87
|
when defined?(Daru::DataFrame) && table.kind_of?(Daru::DataFrame)
|
@@ -81,5 +114,117 @@ module Charty
|
|
81
114
|
i += 1
|
82
115
|
end
|
83
116
|
end
|
117
|
+
|
118
|
+
def drop_na
|
119
|
+
@adapter.drop_na || self
|
120
|
+
end
|
121
|
+
|
122
|
+
def_delegator :adapter, :sort_values
|
123
|
+
|
124
|
+
def_delegator :adapter, :reset_index
|
125
|
+
|
126
|
+
class GroupByBase
|
127
|
+
end
|
128
|
+
|
129
|
+
class HashGroupBy < GroupByBase
|
130
|
+
def initialize(table, grouper, sort, drop_na)
|
131
|
+
@table = table
|
132
|
+
@grouper = check_grouper(grouper)
|
133
|
+
init_groups(sort, drop_na)
|
134
|
+
end
|
135
|
+
|
136
|
+
private def check_grouper(grouper)
|
137
|
+
case grouper
|
138
|
+
when Symbol, String, Array
|
139
|
+
# TODO check column existence
|
140
|
+
return grouper
|
141
|
+
when Charty::Vector
|
142
|
+
if @table.length != grouper.length
|
143
|
+
raise ArgumentError,
|
144
|
+
"Wrong number of items in grouper array " +
|
145
|
+
"(%p for %p)" % [val.length, @table.length]
|
146
|
+
end
|
147
|
+
return grouper
|
148
|
+
when ->(x) { x.respond_to?(:call) }
|
149
|
+
raise NotImplementedError,
|
150
|
+
"A callable grouper is unsupported"
|
151
|
+
else
|
152
|
+
raise ArgumentError,
|
153
|
+
"Unable to recognize the value for `grouper`: %p" % val
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
private def init_groups(sort, drop_na)
|
158
|
+
case @grouper
|
159
|
+
when Symbol, String
|
160
|
+
column = @table[@grouper]
|
161
|
+
@indices = (0 ... @table.length).group_by do |i|
|
162
|
+
column.data[i]
|
163
|
+
end
|
164
|
+
when Array
|
165
|
+
@indices = (0 ... @table.length).group_by { |i|
|
166
|
+
@grouper.map {|j| @table[j].data[i] }
|
167
|
+
}
|
168
|
+
when Charty::Vector
|
169
|
+
@indices = (0 ... @table.length).group_by do |i|
|
170
|
+
@grouper.data[i]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
if drop_na
|
175
|
+
case @grouper
|
176
|
+
when Array
|
177
|
+
@indices.reject! {|key, | key.any? {|k| Util.missing?(k) } }
|
178
|
+
else
|
179
|
+
@indices.reject! {|key, | Util.missing?(key) }
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
if sort
|
184
|
+
@indices = @indices.sort_by {|key, | key }.to_h
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def indices
|
189
|
+
@indices.dup
|
190
|
+
end
|
191
|
+
|
192
|
+
def group_keys
|
193
|
+
@indices.keys
|
194
|
+
end
|
195
|
+
|
196
|
+
def each_group_key(&block)
|
197
|
+
@indices.each_key(&block)
|
198
|
+
end
|
199
|
+
|
200
|
+
def apply(*args, &block)
|
201
|
+
Charty::Table.new(
|
202
|
+
each_group.map { |_key, table|
|
203
|
+
block.call(table, *args)
|
204
|
+
},
|
205
|
+
index: Charty::Index.new(@indices.keys, name: @grouper)
|
206
|
+
)
|
207
|
+
end
|
208
|
+
|
209
|
+
def each_group
|
210
|
+
return enum_for(__method__) unless block_given?
|
211
|
+
|
212
|
+
@indices.each_key do |key|
|
213
|
+
yield(key, self[key])
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def [](key)
|
218
|
+
return nil unless @indices.key?(key)
|
219
|
+
|
220
|
+
index = @indices[key]
|
221
|
+
Charty::Table.new(
|
222
|
+
@table.column_names.map {|col|
|
223
|
+
[col, @table[col].values_at(*index)]
|
224
|
+
}.to_h,
|
225
|
+
index: index
|
226
|
+
)
|
227
|
+
end
|
228
|
+
end
|
84
229
|
end
|
85
230
|
end
|
@@ -15,9 +15,11 @@ module Charty
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
require_relative 'table_adapters/base_adapter'
|
18
19
|
require_relative 'table_adapters/hash_adapter'
|
19
20
|
require_relative 'table_adapters/narray_adapter'
|
20
21
|
require_relative 'table_adapters/datasets_adapter'
|
21
22
|
require_relative 'table_adapters/daru_adapter'
|
22
23
|
require_relative 'table_adapters/active_record_adapter'
|
23
24
|
require_relative 'table_adapters/nmatrix_adapter'
|
25
|
+
require_relative 'table_adapters/pandas_adapter'
|
@@ -1,10 +1,8 @@
|
|
1
1
|
module Charty
|
2
2
|
module TableAdapters
|
3
|
-
class ActiveRecordAdapter
|
3
|
+
class ActiveRecordAdapter < BaseAdapter
|
4
4
|
TableAdapters.register(:active_record, self)
|
5
5
|
|
6
|
-
include Enumerable
|
7
|
-
|
8
6
|
def self.supported?(data)
|
9
7
|
defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
|
10
8
|
end
|
@@ -12,17 +10,27 @@ module Charty
|
|
12
10
|
def initialize(data)
|
13
11
|
@data = check_type(ActiveRecord::Relation, data, :data)
|
14
12
|
@column_names = @data.column_names.freeze
|
15
|
-
|
13
|
+
self.columns = Index.new(@column_names)
|
14
|
+
self.index = RangeIndex.new(0 ... length)
|
16
15
|
end
|
17
16
|
|
18
|
-
attr_reader :
|
17
|
+
attr_reader :data, :column_names
|
18
|
+
|
19
|
+
def_delegators :data, :size
|
20
|
+
|
21
|
+
alias length size
|
22
|
+
|
23
|
+
def column_length
|
24
|
+
column_names.length
|
25
|
+
end
|
19
26
|
|
20
27
|
def [](row, column)
|
21
|
-
fetch_records unless @
|
28
|
+
fetch_records unless @columns_cache
|
22
29
|
if row
|
23
|
-
@
|
30
|
+
@columns_cache[resolve_column_index(column)][row]
|
24
31
|
else
|
25
|
-
@
|
32
|
+
column_data = @columns_cache[resolve_column_index(column)]
|
33
|
+
Vector.new(column_data, index: index, name: column)
|
26
34
|
end
|
27
35
|
end
|
28
36
|
|
@@ -43,7 +51,7 @@ module Charty
|
|
43
51
|
end
|
44
52
|
|
45
53
|
private def fetch_records
|
46
|
-
@
|
54
|
+
@columns_cache = @data.pluck(*column_names).transpose
|
47
55
|
end
|
48
56
|
|
49
57
|
private def check_type(type, data, name)
|