charty 0.2.0 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +71 -0
- data/.github/workflows/nmatrix.yml +67 -0
- data/.github/workflows/pycall.yml +86 -0
- data/Dockerfile.dev +9 -1
- data/Gemfile +18 -0
- data/README.md +177 -9
- data/Rakefile +4 -5
- data/charty.gemspec +10 -4
- data/examples/Gemfile +1 -0
- data/examples/active_record.ipynb +1 -1
- data/examples/daru.ipynb +1 -1
- data/examples/iris_dataset.ipynb +1 -1
- data/examples/nmatrix.ipynb +1 -1
- data/examples/{numo-narray.ipynb → numo_narray.ipynb} +1 -1
- data/examples/palette.rb +71 -0
- data/examples/sample.png +0 -0
- data/examples/sample_images/hist_gruff.png +0 -0
- data/examples/sample_pyplot.ipynb +40 -38
- data/images/penguins_body_mass_g_flipper_length_mm_scatter_plot.png +0 -0
- data/images/penguins_body_mass_g_flipper_length_mm_species_scatter_plot.png +0 -0
- data/images/penguins_body_mass_g_flipper_length_mm_species_sex_scatter_plot.png +0 -0
- data/images/penguins_species_body_mass_g_bar_plot_h.png +0 -0
- data/images/penguins_species_body_mass_g_bar_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_box_plot_h.png +0 -0
- data/images/penguins_species_body_mass_g_box_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_sex_bar_plot_v.png +0 -0
- data/images/penguins_species_body_mass_g_sex_box_plot_v.png +0 -0
- data/lib/charty.rb +13 -1
- data/lib/charty/backend_methods.rb +8 -0
- data/lib/charty/backends.rb +26 -1
- data/lib/charty/backends/bokeh.rb +31 -31
- data/lib/charty/backends/{google_chart.rb → google_charts.rb} +75 -33
- data/lib/charty/backends/gruff.rb +14 -3
- data/lib/charty/backends/plotly.rb +774 -9
- data/lib/charty/backends/pyplot.rb +611 -34
- data/lib/charty/backends/rubyplot.rb +2 -2
- data/lib/charty/backends/unicode_plot.rb +79 -0
- data/lib/charty/dash_pattern_generator.rb +57 -0
- data/lib/charty/index.rb +213 -0
- data/lib/charty/linspace.rb +1 -1
- data/lib/charty/plot_methods.rb +254 -0
- data/lib/charty/plotter.rb +10 -10
- data/lib/charty/plotters.rb +12 -0
- data/lib/charty/plotters/abstract_plotter.rb +243 -0
- data/lib/charty/plotters/bar_plotter.rb +201 -0
- data/lib/charty/plotters/box_plotter.rb +79 -0
- data/lib/charty/plotters/categorical_plotter.rb +380 -0
- data/lib/charty/plotters/count_plotter.rb +7 -0
- data/lib/charty/plotters/estimation_support.rb +84 -0
- data/lib/charty/plotters/line_plotter.rb +300 -0
- data/lib/charty/plotters/random_support.rb +25 -0
- data/lib/charty/plotters/relational_plotter.rb +635 -0
- data/lib/charty/plotters/scatter_plotter.rb +80 -0
- data/lib/charty/plotters/vector_plotter.rb +6 -0
- data/lib/charty/statistics.rb +114 -0
- data/lib/charty/table.rb +161 -15
- data/lib/charty/table_adapters.rb +2 -0
- data/lib/charty/table_adapters/active_record_adapter.rb +17 -9
- data/lib/charty/table_adapters/base_adapter.rb +166 -0
- data/lib/charty/table_adapters/daru_adapter.rb +41 -3
- data/lib/charty/table_adapters/datasets_adapter.rb +17 -2
- data/lib/charty/table_adapters/hash_adapter.rb +143 -16
- data/lib/charty/table_adapters/narray_adapter.rb +25 -6
- data/lib/charty/table_adapters/nmatrix_adapter.rb +15 -5
- data/lib/charty/table_adapters/pandas_adapter.rb +163 -0
- data/lib/charty/util.rb +28 -0
- data/lib/charty/vector.rb +69 -0
- data/lib/charty/vector_adapters.rb +187 -0
- data/lib/charty/vector_adapters/array_adapter.rb +101 -0
- data/lib/charty/vector_adapters/daru_adapter.rb +163 -0
- data/lib/charty/vector_adapters/narray_adapter.rb +182 -0
- data/lib/charty/vector_adapters/nmatrix_adapter.rb +37 -0
- data/lib/charty/vector_adapters/numpy_adapter.rb +168 -0
- data/lib/charty/vector_adapters/pandas_adapter.rb +199 -0
- data/lib/charty/version.rb +1 -1
- metadata +121 -22
- data/.travis.yml +0 -10
@@ -0,0 +1,80 @@
|
|
1
|
+
module Charty
|
2
|
+
module Plotters
|
3
|
+
class ScatterPlotter < RelationalPlotter
|
4
|
+
def initialize(data: nil, variables: {}, **options, &block)
|
5
|
+
x, y, color, style, size = variables.values_at(:x, :y, :color, :style, :size)
|
6
|
+
super(x, y, color, style, size, data: data, **options, &block)
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :alpha
|
10
|
+
|
11
|
+
def alpha=(val)
|
12
|
+
case val
|
13
|
+
when nil, :auto, 0..1
|
14
|
+
@alpha = val
|
15
|
+
when "auto"
|
16
|
+
@alpha = val.to_sym
|
17
|
+
when Numeric
|
18
|
+
raise ArgumentError,
|
19
|
+
"the given alpha is out of bounds " +
|
20
|
+
"(%p for nil, :auto, or number 0..1)" % val
|
21
|
+
else
|
22
|
+
raise ArgumentError,
|
23
|
+
"invalid value of alpha " +
|
24
|
+
"(%p for nil, :auto, or number in 0..1)" % val
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :line_width, :edge_color
|
29
|
+
|
30
|
+
def line_width=(val)
|
31
|
+
@line_width = check_number(val, :line_width, allow_nil: true)
|
32
|
+
end
|
33
|
+
|
34
|
+
def edge_color=(val)
|
35
|
+
@line_width = check_color(val, :edge_color, allow_nil: true)
|
36
|
+
end
|
37
|
+
|
38
|
+
private def render_plot(backend, **)
|
39
|
+
draw_points(backend)
|
40
|
+
annotate_axes(backend)
|
41
|
+
end
|
42
|
+
|
43
|
+
private def draw_points(backend)
|
44
|
+
map_color(palette: palette, order: color_order, norm: color_norm)
|
45
|
+
map_size(sizes: sizes, order: size_order, norm: size_norm)
|
46
|
+
map_style(markers: markers, order: style_order)
|
47
|
+
|
48
|
+
data = @plot_data.drop_na
|
49
|
+
|
50
|
+
# TODO: shold pass key_color to backend's scatter method.
|
51
|
+
# In pyplot backend, it is passed as color parameter.
|
52
|
+
|
53
|
+
x = data[:x]
|
54
|
+
y = data[:y]
|
55
|
+
color = data[:color] if @variables.key?(:color)
|
56
|
+
style = data[:style] if @variables.key?(:style)
|
57
|
+
size = data[:size] if @variables.key?(:size)
|
58
|
+
|
59
|
+
# TODO: key_color
|
60
|
+
backend.scatter(
|
61
|
+
x, y, @variables,
|
62
|
+
color: color, color_mapper: @color_mapper,
|
63
|
+
style: style, style_mapper: @style_mapper,
|
64
|
+
size: size, size_mapper: @size_mapper
|
65
|
+
)
|
66
|
+
|
67
|
+
if legend
|
68
|
+
backend.add_scatter_plot_legend(@variables, @color_mapper, @size_mapper, @style_mapper, legend)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private def annotate_axes(backend)
|
73
|
+
xlabel = self.variables[:x]
|
74
|
+
ylabel = self.variables[:y]
|
75
|
+
backend.set_xlabel(xlabel) unless xlabel.nil?
|
76
|
+
backend.set_ylabel(ylabel) unless ylabel.nil?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module Charty
|
2
|
+
module Statistics
|
3
|
+
begin
|
4
|
+
require "enumerable/statistics"
|
5
|
+
|
6
|
+
def self.mean(enum)
|
7
|
+
enum.mean
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.stdev(enum, population: false)
|
11
|
+
enum.stdev(population: population)
|
12
|
+
end
|
13
|
+
rescue LoadError
|
14
|
+
def self.mean(enum)
|
15
|
+
xs = enum.to_a
|
16
|
+
xs.sum / xs.length.to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.stdev(enum, population: false)
|
20
|
+
xs = enum.to_a
|
21
|
+
n = xs.length
|
22
|
+
mean = xs.sum.to_f / n
|
23
|
+
ddof = population ? 0 : 1
|
24
|
+
var = xs.map {|x| (x - mean)**2 }.sum / (n - ddof)
|
25
|
+
Math.sqrt(var)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
|
30
|
+
n = vector.size
|
31
|
+
random = Charty::Plotters::RandomSupport.check_random(random)
|
32
|
+
func = Charty::Plotters::EstimationSupport.check_estimator(func)
|
33
|
+
|
34
|
+
if units
|
35
|
+
return structured_bootstrap(vector, n_boot, units, func, random)
|
36
|
+
end
|
37
|
+
|
38
|
+
if defined?(Pandas::Series) || defined?(Numpy::NDArray)
|
39
|
+
boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
|
40
|
+
return boot_dist if boot_dist
|
41
|
+
end
|
42
|
+
|
43
|
+
boot_dist = Array.new(n_boot) do |i|
|
44
|
+
resampler = Array.new(n) { random.rand(n) }
|
45
|
+
|
46
|
+
w ||= vector.values_at(*resampler)
|
47
|
+
|
48
|
+
case func
|
49
|
+
when :mean
|
50
|
+
mean(w)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
boot_dist
|
55
|
+
end
|
56
|
+
|
57
|
+
private_class_method def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
|
58
|
+
case
|
59
|
+
when vector.is_a?(Charty::Vector)
|
60
|
+
bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)
|
61
|
+
|
62
|
+
when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
|
63
|
+
# numpy is also available when pandas is available
|
64
|
+
n = vector.size
|
65
|
+
resampler = Numpy.empty(n, dtype: Numpy.intp)
|
66
|
+
Array.new(n_boot) do |i|
|
67
|
+
# TODO: Use Numo and MemoryView to reduce execution time
|
68
|
+
# resampler = Numo::Int64.new(n).rand(n)
|
69
|
+
# w = Numpy.take(vector, resampler)
|
70
|
+
n.times {|i| resampler[i] = random.rand(n) }
|
71
|
+
w = vector.take(resampler)
|
72
|
+
|
73
|
+
case func
|
74
|
+
when :mean
|
75
|
+
w.mean
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private_class_method def self.structured_bootstrap(vector, n_boot, units, func, random)
|
82
|
+
raise NotImplementedError,
|
83
|
+
"structured bootstrapping has not been supported yet"
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
|
87
|
+
boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
|
88
|
+
q = [50 - width / 2, 50 + width / 2]
|
89
|
+
if boot.respond_to?(:percentile)
|
90
|
+
boot.percentile(q)
|
91
|
+
else
|
92
|
+
percentile(boot, q)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# TODO: optimize with introselect algorithm
|
97
|
+
def self.percentile(a, q)
|
98
|
+
return mean(a) if a.size == 0
|
99
|
+
|
100
|
+
a = a.sort
|
101
|
+
n = a.size
|
102
|
+
q.map do |x|
|
103
|
+
x = n * (x / 100.0)
|
104
|
+
i = x.floor
|
105
|
+
if i == n-1
|
106
|
+
a[i]
|
107
|
+
else
|
108
|
+
t = x - i
|
109
|
+
(1-t)*a[i] + t*a[i+1]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
data/lib/charty/table.rb
CHANGED
@@ -21,33 +21,67 @@ module Charty
|
|
21
21
|
else
|
22
22
|
@adapter = adapter_class.new(data, **kwargs)
|
23
23
|
end
|
24
|
+
|
25
|
+
@column_cache = {}
|
24
26
|
end
|
25
27
|
|
26
28
|
attr_reader :adapter
|
27
29
|
|
30
|
+
def_delegators :adapter, :length, :column_length
|
31
|
+
|
32
|
+
def_delegators :adapter, :columns, :columns=
|
33
|
+
def_delegators :adapter, :index, :index=
|
34
|
+
|
28
35
|
def_delegator :@adapter, :column_names
|
29
36
|
|
30
|
-
def
|
31
|
-
|
37
|
+
def column?(name)
|
38
|
+
return true if column_names.include?(name)
|
39
|
+
|
40
|
+
case name
|
41
|
+
when String
|
42
|
+
column_names.include?(name.to_sym)
|
43
|
+
when Symbol
|
44
|
+
column_names.include?(name.to_s)
|
45
|
+
else
|
46
|
+
false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def_delegator :@adapter, :data, :raw_data
|
51
|
+
|
52
|
+
def ==(other)
|
53
|
+
return true if equal?(other)
|
54
|
+
|
55
|
+
case other
|
56
|
+
when Charty::Table
|
57
|
+
adapter == other.adapter
|
58
|
+
else
|
59
|
+
super
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def empty?
|
64
|
+
length == 0
|
32
65
|
end
|
33
66
|
|
34
|
-
def [](
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
column = args[1]
|
44
|
-
@adapter[row, column]
|
67
|
+
def [](key)
|
68
|
+
key = case key
|
69
|
+
when Symbol
|
70
|
+
key
|
71
|
+
else
|
72
|
+
String.try_convert(key).to_sym
|
73
|
+
end
|
74
|
+
if @column_cache.key?(key)
|
75
|
+
@column_cache[key]
|
45
76
|
else
|
46
|
-
|
47
|
-
raise ArgumentError, message
|
77
|
+
@column_cache[key] = @adapter[nil, key]
|
48
78
|
end
|
49
79
|
end
|
50
80
|
|
81
|
+
def group_by(grouper, sort: true, drop_na: true)
|
82
|
+
adapter.group_by(self, grouper, sort, drop_na)
|
83
|
+
end
|
84
|
+
|
51
85
|
def to_a(x=nil, y=nil, z=nil)
|
52
86
|
case
|
53
87
|
when defined?(Daru::DataFrame) && table.kind_of?(Daru::DataFrame)
|
@@ -80,5 +114,117 @@ module Charty
|
|
80
114
|
i += 1
|
81
115
|
end
|
82
116
|
end
|
117
|
+
|
118
|
+
def drop_na
|
119
|
+
@adapter.drop_na || self
|
120
|
+
end
|
121
|
+
|
122
|
+
def_delegator :adapter, :sort_values
|
123
|
+
|
124
|
+
def_delegator :adapter, :reset_index
|
125
|
+
|
126
|
+
class GroupByBase
|
127
|
+
end
|
128
|
+
|
129
|
+
class HashGroupBy < GroupByBase
|
130
|
+
def initialize(table, grouper, sort, drop_na)
|
131
|
+
@table = table
|
132
|
+
@grouper = check_grouper(grouper)
|
133
|
+
init_groups(sort, drop_na)
|
134
|
+
end
|
135
|
+
|
136
|
+
private def check_grouper(grouper)
|
137
|
+
case grouper
|
138
|
+
when Symbol, String, Array
|
139
|
+
# TODO check column existence
|
140
|
+
return grouper
|
141
|
+
when Charty::Vector
|
142
|
+
if @table.length != grouper.length
|
143
|
+
raise ArgumentError,
|
144
|
+
"Wrong number of items in grouper array " +
|
145
|
+
"(%p for %p)" % [val.length, @table.length]
|
146
|
+
end
|
147
|
+
return grouper
|
148
|
+
when ->(x) { x.respond_to?(:call) }
|
149
|
+
raise NotImplementedError,
|
150
|
+
"A callable grouper is unsupported"
|
151
|
+
else
|
152
|
+
raise ArgumentError,
|
153
|
+
"Unable to recognize the value for `grouper`: %p" % val
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
private def init_groups(sort, drop_na)
|
158
|
+
case @grouper
|
159
|
+
when Symbol, String
|
160
|
+
column = @table[@grouper]
|
161
|
+
@indices = (0 ... @table.length).group_by do |i|
|
162
|
+
column.data[i]
|
163
|
+
end
|
164
|
+
when Array
|
165
|
+
@indices = (0 ... @table.length).group_by { |i|
|
166
|
+
@grouper.map {|j| @table[j].data[i] }
|
167
|
+
}
|
168
|
+
when Charty::Vector
|
169
|
+
@indices = (0 ... @table.length).group_by do |i|
|
170
|
+
@grouper.data[i]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
if drop_na
|
175
|
+
case @grouper
|
176
|
+
when Array
|
177
|
+
@indices.reject! {|key, | key.any? {|k| Util.missing?(k) } }
|
178
|
+
else
|
179
|
+
@indices.reject! {|key, | Util.missing?(key) }
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
if sort
|
184
|
+
@indices = @indices.sort_by {|key, | key }.to_h
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def indices
|
189
|
+
@indices.dup
|
190
|
+
end
|
191
|
+
|
192
|
+
def group_keys
|
193
|
+
@indices.keys
|
194
|
+
end
|
195
|
+
|
196
|
+
def each_group_key(&block)
|
197
|
+
@indices.each_key(&block)
|
198
|
+
end
|
199
|
+
|
200
|
+
def apply(*args, &block)
|
201
|
+
Charty::Table.new(
|
202
|
+
each_group.map { |_key, table|
|
203
|
+
block.call(table, *args)
|
204
|
+
},
|
205
|
+
index: Charty::Index.new(@indices.keys, name: @grouper)
|
206
|
+
)
|
207
|
+
end
|
208
|
+
|
209
|
+
def each_group
|
210
|
+
return enum_for(__method__) unless block_given?
|
211
|
+
|
212
|
+
@indices.each_key do |key|
|
213
|
+
yield(key, self[key])
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def [](key)
|
218
|
+
return nil unless @indices.key?(key)
|
219
|
+
|
220
|
+
index = @indices[key]
|
221
|
+
Charty::Table.new(
|
222
|
+
@table.column_names.map {|col|
|
223
|
+
[col, @table[col].values_at(*index)]
|
224
|
+
}.to_h,
|
225
|
+
index: index
|
226
|
+
)
|
227
|
+
end
|
228
|
+
end
|
83
229
|
end
|
84
230
|
end
|
@@ -15,9 +15,11 @@ module Charty
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
require_relative 'table_adapters/base_adapter'
|
18
19
|
require_relative 'table_adapters/hash_adapter'
|
19
20
|
require_relative 'table_adapters/narray_adapter'
|
20
21
|
require_relative 'table_adapters/datasets_adapter'
|
21
22
|
require_relative 'table_adapters/daru_adapter'
|
22
23
|
require_relative 'table_adapters/active_record_adapter'
|
23
24
|
require_relative 'table_adapters/nmatrix_adapter'
|
25
|
+
require_relative 'table_adapters/pandas_adapter'
|
@@ -1,10 +1,8 @@
|
|
1
1
|
module Charty
|
2
2
|
module TableAdapters
|
3
|
-
class ActiveRecordAdapter
|
3
|
+
class ActiveRecordAdapter < BaseAdapter
|
4
4
|
TableAdapters.register(:active_record, self)
|
5
5
|
|
6
|
-
include Enumerable
|
7
|
-
|
8
6
|
def self.supported?(data)
|
9
7
|
defined?(ActiveRecord::Relation) && data.is_a?(ActiveRecord::Relation)
|
10
8
|
end
|
@@ -12,17 +10,27 @@ module Charty
|
|
12
10
|
def initialize(data)
|
13
11
|
@data = check_type(ActiveRecord::Relation, data, :data)
|
14
12
|
@column_names = @data.column_names.freeze
|
15
|
-
|
13
|
+
self.columns = Index.new(@column_names)
|
14
|
+
self.index = RangeIndex.new(0 ... length)
|
16
15
|
end
|
17
16
|
|
18
|
-
attr_reader :column_names
|
17
|
+
attr_reader :data, :column_names
|
18
|
+
|
19
|
+
def_delegators :data, :size
|
20
|
+
|
21
|
+
alias length size
|
22
|
+
|
23
|
+
def column_length
|
24
|
+
column_names.length
|
25
|
+
end
|
19
26
|
|
20
27
|
def [](row, column)
|
21
|
-
fetch_records unless @
|
28
|
+
fetch_records unless @columns_cache
|
22
29
|
if row
|
23
|
-
@
|
30
|
+
@columns_cache[resolve_column_index(column)][row]
|
24
31
|
else
|
25
|
-
@
|
32
|
+
column_data = @columns_cache[resolve_column_index(column)]
|
33
|
+
Vector.new(column_data, index: index, name: column)
|
26
34
|
end
|
27
35
|
end
|
28
36
|
|
@@ -43,7 +51,7 @@ module Charty
|
|
43
51
|
end
|
44
52
|
|
45
53
|
private def fetch_records
|
46
|
-
@
|
54
|
+
@columns_cache = @data.pluck(*column_names).transpose
|
47
55
|
end
|
48
56
|
|
49
57
|
private def check_type(type, data, name)
|