polars-df 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
module Polars
|
2
|
+
class LazyFrame
|
3
|
+
attr_accessor :_ldf
|
4
|
+
|
5
|
+
def self._from_rbldf(rb_ldf)
|
6
|
+
ldf = LazyFrame.allocate
|
7
|
+
ldf._ldf = rb_ldf
|
8
|
+
ldf
|
9
|
+
end
|
10
|
+
|
11
|
+
def collect(
|
12
|
+
type_coercion: true,
|
13
|
+
predicate_pushdown: true,
|
14
|
+
projection_pushdown: true,
|
15
|
+
simplify_expression: true,
|
16
|
+
string_cache: false,
|
17
|
+
no_optimization: false,
|
18
|
+
slice_pushdown: true,
|
19
|
+
common_subplan_elimination: true,
|
20
|
+
allow_streaming: false
|
21
|
+
)
|
22
|
+
if no_optimization
|
23
|
+
predicate_pushdown = false
|
24
|
+
projection_pushdown = false
|
25
|
+
slice_pushdown = false
|
26
|
+
common_subplan_elimination = false
|
27
|
+
end
|
28
|
+
|
29
|
+
if allow_streaming
|
30
|
+
common_subplan_elimination = false
|
31
|
+
end
|
32
|
+
|
33
|
+
ldf = _ldf.optimization_toggle(
|
34
|
+
type_coercion,
|
35
|
+
predicate_pushdown,
|
36
|
+
projection_pushdown,
|
37
|
+
simplify_expression,
|
38
|
+
slice_pushdown,
|
39
|
+
common_subplan_elimination,
|
40
|
+
allow_streaming
|
41
|
+
)
|
42
|
+
Utils.wrap_df(ldf.collect)
|
43
|
+
end
|
44
|
+
|
45
|
+
def filter(predicate)
|
46
|
+
self.class._from_rbldf(
|
47
|
+
_ldf.filter(
|
48
|
+
Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
|
49
|
+
)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def select(exprs)
|
54
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
55
|
+
self.class._from_rbldf(_ldf.select(exprs))
|
56
|
+
end
|
57
|
+
|
58
|
+
def groupby(by, maintain_order: false)
|
59
|
+
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
60
|
+
lgb = _ldf.groupby(rbexprs_by, maintain_order)
|
61
|
+
LazyGroupBy.new(lgb, self.class)
|
62
|
+
end
|
63
|
+
|
64
|
+
def join(
|
65
|
+
other,
|
66
|
+
left_on: nil,
|
67
|
+
right_on: nil,
|
68
|
+
on: nil,
|
69
|
+
how: "inner",
|
70
|
+
suffix: "_right",
|
71
|
+
allow_parallel: true,
|
72
|
+
force_parallel: false
|
73
|
+
)
|
74
|
+
if !other.is_a?(LazyFrame)
|
75
|
+
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
76
|
+
end
|
77
|
+
|
78
|
+
if how == "cross"
|
79
|
+
return self.class._from_rbldf(
|
80
|
+
_ldf.join(
|
81
|
+
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
82
|
+
)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
if !on.nil?
|
87
|
+
rbexprs = Utils.selection_to_rbexpr_list(on)
|
88
|
+
rbexprs_left = rbexprs
|
89
|
+
rbexprs_right = rbexprs
|
90
|
+
elsif !left_on.nil? && !right_on.nil?
|
91
|
+
rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
|
92
|
+
rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
|
93
|
+
else
|
94
|
+
raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
|
95
|
+
end
|
96
|
+
|
97
|
+
self.class._from_rbldf(
|
98
|
+
self._ldf.join(
|
99
|
+
other._ldf,
|
100
|
+
rbexprs_left,
|
101
|
+
rbexprs_right,
|
102
|
+
allow_parallel,
|
103
|
+
force_parallel,
|
104
|
+
how,
|
105
|
+
suffix,
|
106
|
+
)
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def with_columns(exprs)
|
111
|
+
exprs =
|
112
|
+
if exprs.nil?
|
113
|
+
[]
|
114
|
+
elsif exprs.is_a?(Expr)
|
115
|
+
[exprs]
|
116
|
+
else
|
117
|
+
exprs.to_a
|
118
|
+
end
|
119
|
+
|
120
|
+
rbexprs = []
|
121
|
+
exprs.each do |e|
|
122
|
+
case e
|
123
|
+
when Expr
|
124
|
+
rbexprs << e._rbexpr
|
125
|
+
when Series
|
126
|
+
rbexprs = Internal.lit(e)._rbexpr
|
127
|
+
else
|
128
|
+
raise ArgumentError, "Expected an expression, got #{e}"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
self.class._from_rbldf(_ldf.with_columns(rbexprs))
|
133
|
+
end
|
134
|
+
|
135
|
+
def with_column(column)
|
136
|
+
with_columns([column])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module Polars
|
2
|
+
module LazyFunctions
|
3
|
+
def col(name)
|
4
|
+
name = name.to_s if name.is_a?(Symbol)
|
5
|
+
Utils.wrap_expr(RbExpr.col(name))
|
6
|
+
end
|
7
|
+
|
8
|
+
def std(column, ddof: 1)
|
9
|
+
if column.is_a?(Series)
|
10
|
+
column.std(ddof: ddof)
|
11
|
+
else
|
12
|
+
col(column).std(ddof: ddof)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def var(column, ddof: 1)
|
17
|
+
if column.is_a?(Series)
|
18
|
+
column.var(ddof: ddof)
|
19
|
+
else
|
20
|
+
col(column).var(ddof: ddof)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def max(column)
|
25
|
+
if column.is_a?(Series)
|
26
|
+
column.max
|
27
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
28
|
+
col(column).max
|
29
|
+
else
|
30
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
31
|
+
# TODO
|
32
|
+
Utils.wrap_expr(_max_exprs(exprs))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def min(column)
|
37
|
+
if column.is_a?(Series)
|
38
|
+
column.min
|
39
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
40
|
+
col(column).min
|
41
|
+
else
|
42
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
43
|
+
# TODO
|
44
|
+
Utils.wrap_expr(_min_exprs(exprs))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def sum(column)
|
49
|
+
if column.is_a?(Series)
|
50
|
+
column.sum
|
51
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
52
|
+
col(column.to_s).sum
|
53
|
+
elsif column.is_a?(Array)
|
54
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
55
|
+
# TODO
|
56
|
+
Utils.wrap_expr(_sum_exprs(exprs))
|
57
|
+
else
|
58
|
+
raise "todo"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def mean(column)
|
63
|
+
if column.is_a?(Series)
|
64
|
+
column.mean
|
65
|
+
else
|
66
|
+
col(column).mean
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def avg(column)
|
71
|
+
mean(column)
|
72
|
+
end
|
73
|
+
|
74
|
+
def median(column)
|
75
|
+
if column.is_a?(Series)
|
76
|
+
column.median
|
77
|
+
else
|
78
|
+
col(column).median
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def lit(value)
|
83
|
+
Utils.wrap_expr(RbExpr.lit(value))
|
84
|
+
end
|
85
|
+
|
86
|
+
def arange(low, high, step: 1, eager: false, dtype: nil)
|
87
|
+
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
88
|
+
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
89
|
+
range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
|
90
|
+
|
91
|
+
if !dtype.nil? && dtype != "i64"
|
92
|
+
range_expr = range_expr.cast(dtype)
|
93
|
+
end
|
94
|
+
|
95
|
+
if !eager
|
96
|
+
range_expr
|
97
|
+
else
|
98
|
+
DataFrame.new
|
99
|
+
.select(range_expr)
|
100
|
+
.to_series
|
101
|
+
.rename("arange", in_place: true)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def all(name = nil)
|
106
|
+
if name.nil?
|
107
|
+
col("*")
|
108
|
+
elsif name.is_a?(String) || name.is_a?(Symbol)
|
109
|
+
col(name).all
|
110
|
+
else
|
111
|
+
raise "todo"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def when(expr)
|
116
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
117
|
+
pw = RbExpr.when(expr._rbexpr)
|
118
|
+
When.new(pw)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Polars
|
2
|
+
class LazyGroupBy
|
3
|
+
def initialize(lgb, lazyframe_class)
|
4
|
+
@lgb = lgb
|
5
|
+
@lazyframe_class = lazyframe_class
|
6
|
+
end
|
7
|
+
|
8
|
+
def agg(aggs)
|
9
|
+
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
10
|
+
@lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,261 @@
|
|
1
|
+
module Polars
|
2
|
+
class Series
|
3
|
+
attr_accessor :_s
|
4
|
+
|
5
|
+
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
|
6
|
+
# Handle case where values are passed as the first argument
|
7
|
+
if !name.nil? && !name.is_a?(String)
|
8
|
+
if values.nil?
|
9
|
+
values = name
|
10
|
+
name = nil
|
11
|
+
else
|
12
|
+
raise ArgumentError, "Series name must be a string."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
name = "" if name.nil?
|
17
|
+
|
18
|
+
if values.nil?
|
19
|
+
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
20
|
+
elsif values.is_a?(Range)
|
21
|
+
self._s =
|
22
|
+
Polars.arange(
|
23
|
+
values.first,
|
24
|
+
values.last + (values.exclude_end? ? 0 : 1),
|
25
|
+
step: 1,
|
26
|
+
eager: true,
|
27
|
+
dtype: dtype
|
28
|
+
)
|
29
|
+
.rename(name, in_place: true)
|
30
|
+
._s
|
31
|
+
elsif values.is_a?(Array)
|
32
|
+
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
33
|
+
else
|
34
|
+
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self._from_rbseries(s)
|
39
|
+
series = Series.allocate
|
40
|
+
series._s = s
|
41
|
+
series
|
42
|
+
end
|
43
|
+
|
44
|
+
def dtype
|
45
|
+
_s.dtype.to_sym
|
46
|
+
end
|
47
|
+
|
48
|
+
def name
|
49
|
+
_s.name
|
50
|
+
end
|
51
|
+
|
52
|
+
def shape
|
53
|
+
[_s.len]
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_s
|
57
|
+
_s.to_s
|
58
|
+
end
|
59
|
+
alias_method :inspect, :to_s
|
60
|
+
|
61
|
+
def &(other)
|
62
|
+
Utils.wrap_s(_s.bitand(other._s))
|
63
|
+
end
|
64
|
+
|
65
|
+
def |(other)
|
66
|
+
Utils.wrap_s(_s.bitor(other._s))
|
67
|
+
end
|
68
|
+
|
69
|
+
def ^(other)
|
70
|
+
Utils.wrap_s(_s.bitxor(other._s))
|
71
|
+
end
|
72
|
+
|
73
|
+
def +(other)
|
74
|
+
Utils. wrap_s(_s.add(other._s))
|
75
|
+
end
|
76
|
+
|
77
|
+
def -(other)
|
78
|
+
Utils.wrap_s(_s.sub(other._s))
|
79
|
+
end
|
80
|
+
|
81
|
+
def *(other)
|
82
|
+
Utils.wrap_s(_s.mul(other._s))
|
83
|
+
end
|
84
|
+
|
85
|
+
def /(other)
|
86
|
+
Utils.wrap_s(_s.div(other._s))
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_frame
|
90
|
+
Utils.wrap_df(RbDataFrame.new([_s]))
|
91
|
+
end
|
92
|
+
|
93
|
+
def sum
|
94
|
+
_s.sum
|
95
|
+
end
|
96
|
+
|
97
|
+
def mean
|
98
|
+
_s.mean
|
99
|
+
end
|
100
|
+
|
101
|
+
def min
|
102
|
+
_s.min
|
103
|
+
end
|
104
|
+
|
105
|
+
def max
|
106
|
+
_s.max
|
107
|
+
end
|
108
|
+
|
109
|
+
def alias(name)
|
110
|
+
s = dup
|
111
|
+
s._s.rename(name)
|
112
|
+
s
|
113
|
+
end
|
114
|
+
|
115
|
+
def rename(name, in_place: false)
|
116
|
+
if in_place
|
117
|
+
_s.rename(name)
|
118
|
+
self
|
119
|
+
else
|
120
|
+
self.alias(name)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def chunk_lengths
|
125
|
+
_s.chunk_lengths
|
126
|
+
end
|
127
|
+
|
128
|
+
def n_chunks
|
129
|
+
_s.n_chunks
|
130
|
+
end
|
131
|
+
|
132
|
+
def cumsum(reverse: false)
|
133
|
+
Utils.wrap_s(_s.cumsum(reverse))
|
134
|
+
end
|
135
|
+
|
136
|
+
def cummin(reverse: false)
|
137
|
+
Utils.wrap_s(_s.cummin(reverse))
|
138
|
+
end
|
139
|
+
|
140
|
+
def cummax(reverse: false)
|
141
|
+
Utils.wrap_s(_s.cummax(reverse))
|
142
|
+
end
|
143
|
+
|
144
|
+
def limit(n = 10)
|
145
|
+
to_frame().select(Utils.col(name).limit(n)).to_series
|
146
|
+
end
|
147
|
+
|
148
|
+
def slice(offset, length = nil)
|
149
|
+
length = len if length.nil?
|
150
|
+
Utils.wrap_s(_s.slice(offset, length))
|
151
|
+
end
|
152
|
+
|
153
|
+
def append(other)
|
154
|
+
_s.append(other._s)
|
155
|
+
self
|
156
|
+
end
|
157
|
+
|
158
|
+
def filter(predicate)
|
159
|
+
Utils.wrap_s(_s.filter(predicate._s))
|
160
|
+
end
|
161
|
+
|
162
|
+
def head(n = 10)
|
163
|
+
to_frame.select(Utils.col(name).head(n)).to_series
|
164
|
+
end
|
165
|
+
|
166
|
+
def tail(n = 10)
|
167
|
+
to_frame.select(Utils.col(name).tail(n)).to_series
|
168
|
+
end
|
169
|
+
|
170
|
+
def sort(reverse: false, in_place: false)
|
171
|
+
if in_place
|
172
|
+
self._s = _s.sort(reverse)
|
173
|
+
self
|
174
|
+
else
|
175
|
+
Utils.wrap_s(_s.sort(reverse))
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def to_a
|
180
|
+
_s.to_a
|
181
|
+
end
|
182
|
+
|
183
|
+
def len
|
184
|
+
_s.len
|
185
|
+
end
|
186
|
+
|
187
|
+
def rechunk(in_place: false)
|
188
|
+
opt_s = _s.rechunk(in_place)
|
189
|
+
in_place ? self : Utils.wrap_s(opt_s)
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
195
|
+
ruby_dtype = nil
|
196
|
+
|
197
|
+
if (values.nil? || values.empty?) && dtype.nil?
|
198
|
+
if dtype_if_empty
|
199
|
+
# if dtype for empty sequence could be guessed
|
200
|
+
# (e.g comparisons between self and other)
|
201
|
+
dtype = dtype_if_empty
|
202
|
+
else
|
203
|
+
# default to Float32 type
|
204
|
+
dtype = "f32"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# _get_first_non_none
|
209
|
+
value = values.find { |v| !v.nil? }
|
210
|
+
|
211
|
+
if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
|
212
|
+
constructor = polars_type_to_constructor(dtype)
|
213
|
+
rbseries = constructor.call(name, values, strict)
|
214
|
+
return rbseries
|
215
|
+
end
|
216
|
+
|
217
|
+
constructor = rb_type_to_constructor(value.class)
|
218
|
+
constructor.call(name, values, strict)
|
219
|
+
end
|
220
|
+
|
221
|
+
POLARS_TYPE_TO_CONSTRUCTOR = {
|
222
|
+
f32: RbSeries.method(:new_opt_f32),
|
223
|
+
f64: RbSeries.method(:new_opt_f64),
|
224
|
+
i8: RbSeries.method(:new_opt_i8),
|
225
|
+
i16: RbSeries.method(:new_opt_i16),
|
226
|
+
i32: RbSeries.method(:new_opt_i32),
|
227
|
+
i64: RbSeries.method(:new_opt_i64),
|
228
|
+
u8: RbSeries.method(:new_opt_u8),
|
229
|
+
u16: RbSeries.method(:new_opt_u16),
|
230
|
+
u32: RbSeries.method(:new_opt_u32),
|
231
|
+
u64: RbSeries.method(:new_opt_u64),
|
232
|
+
bool: RbSeries.method(:new_opt_bool),
|
233
|
+
str: RbSeries.method(:new_str)
|
234
|
+
}
|
235
|
+
|
236
|
+
def polars_type_to_constructor(dtype)
|
237
|
+
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
238
|
+
rescue KeyError
|
239
|
+
raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
|
240
|
+
end
|
241
|
+
|
242
|
+
RB_TYPE_TO_CONSTRUCTOR = {
|
243
|
+
Float => RbSeries.method(:new_opt_f64),
|
244
|
+
Integer => RbSeries.method(:new_opt_i64),
|
245
|
+
String => RbSeries.method(:new_str),
|
246
|
+
TrueClass => RbSeries.method(:new_opt_bool),
|
247
|
+
FalseClass => RbSeries.method(:new_opt_bool)
|
248
|
+
}
|
249
|
+
|
250
|
+
def rb_type_to_constructor(dtype)
|
251
|
+
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
252
|
+
rescue KeyError
|
253
|
+
# RbSeries.method(:new_object)
|
254
|
+
raise ArgumentError, "Cannot determine type"
|
255
|
+
end
|
256
|
+
|
257
|
+
def is_polars_dtype(data_type)
|
258
|
+
true
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Polars
|
2
|
+
class StringExpr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def initialize(expr)
|
6
|
+
self._rbexpr = expr._rbexpr
|
7
|
+
end
|
8
|
+
|
9
|
+
def lengths
|
10
|
+
Utils.wrap_expr(_rbexpr.str_lengths)
|
11
|
+
end
|
12
|
+
|
13
|
+
def contains(pattern, literal: false)
|
14
|
+
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/polars/utils.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.wrap_s(s)
|
4
|
+
Series._from_rbseries(s)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.wrap_df(df)
|
8
|
+
DataFrame._from_rbdf(df)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.wrap_expr(rbexpr)
|
12
|
+
Expr._from_rbexpr(rbexpr)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.col(name)
|
16
|
+
Polars.col(name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.selection_to_rbexpr_list(exprs)
|
20
|
+
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
21
|
+
exprs = [exprs]
|
22
|
+
end
|
23
|
+
|
24
|
+
exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
28
|
+
if expr.is_a?(String) && !str_to_lit
|
29
|
+
col(expr)
|
30
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
|
31
|
+
lit(expr)
|
32
|
+
elsif expr.is_a?(Expr)
|
33
|
+
expr
|
34
|
+
else
|
35
|
+
raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.lit(value)
|
40
|
+
Polars.lit(value)
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.format_path(path)
|
44
|
+
File.expand_path(path)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/polars/when.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Polars
|
2
|
+
class When
|
3
|
+
attr_accessor :_rbwhen
|
4
|
+
|
5
|
+
def initialize(rbwhen)
|
6
|
+
self._rbwhen = rbwhen
|
7
|
+
end
|
8
|
+
|
9
|
+
def then(expr)
|
10
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
11
|
+
rbwhenthen = _rbwhen._then(expr._rbexpr)
|
12
|
+
WhenThen.new(rbwhenthen)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Polars
|
2
|
+
class WhenThen
|
3
|
+
attr_accessor :_rbwhenthen
|
4
|
+
|
5
|
+
def initialize(rbwhenthen)
|
6
|
+
self._rbwhenthen = rbwhenthen
|
7
|
+
end
|
8
|
+
|
9
|
+
def when(predicate)
|
10
|
+
WhenThenThen.new(_rbwhenthen.when(predicate._rbexpr))
|
11
|
+
end
|
12
|
+
|
13
|
+
def otherwise(expr)
|
14
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
15
|
+
Utils.wrap_expr(_rbwhenthen.otherwise(expr._rbexpr))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/polars-df.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "polars"
|
data/lib/polars.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# ext
|
2
|
+
require "polars/polars"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "polars/data_frame"
|
6
|
+
require "polars/expr"
|
7
|
+
require "polars/functions"
|
8
|
+
require "polars/lazy_frame"
|
9
|
+
require "polars/lazy_functions"
|
10
|
+
require "polars/lazy_group_by"
|
11
|
+
require "polars/io"
|
12
|
+
require "polars/series"
|
13
|
+
require "polars/string_expr"
|
14
|
+
require "polars/utils"
|
15
|
+
require "polars/version"
|
16
|
+
require "polars/when"
|
17
|
+
require "polars/when_then"
|
18
|
+
|
19
|
+
module Polars
|
20
|
+
class Error < StandardError; end
|
21
|
+
|
22
|
+
extend Functions
|
23
|
+
extend IO
|
24
|
+
extend LazyFunctions
|
25
|
+
end
|