polars-df 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
module Polars
|
2
|
+
class LazyFrame
|
3
|
+
attr_accessor :_ldf
|
4
|
+
|
5
|
+
def self._from_rbldf(rb_ldf)
|
6
|
+
ldf = LazyFrame.allocate
|
7
|
+
ldf._ldf = rb_ldf
|
8
|
+
ldf
|
9
|
+
end
|
10
|
+
|
11
|
+
def collect(
|
12
|
+
type_coercion: true,
|
13
|
+
predicate_pushdown: true,
|
14
|
+
projection_pushdown: true,
|
15
|
+
simplify_expression: true,
|
16
|
+
string_cache: false,
|
17
|
+
no_optimization: false,
|
18
|
+
slice_pushdown: true,
|
19
|
+
common_subplan_elimination: true,
|
20
|
+
allow_streaming: false
|
21
|
+
)
|
22
|
+
if no_optimization
|
23
|
+
predicate_pushdown = false
|
24
|
+
projection_pushdown = false
|
25
|
+
slice_pushdown = false
|
26
|
+
common_subplan_elimination = false
|
27
|
+
end
|
28
|
+
|
29
|
+
if allow_streaming
|
30
|
+
common_subplan_elimination = false
|
31
|
+
end
|
32
|
+
|
33
|
+
ldf = _ldf.optimization_toggle(
|
34
|
+
type_coercion,
|
35
|
+
predicate_pushdown,
|
36
|
+
projection_pushdown,
|
37
|
+
simplify_expression,
|
38
|
+
slice_pushdown,
|
39
|
+
common_subplan_elimination,
|
40
|
+
allow_streaming
|
41
|
+
)
|
42
|
+
Utils.wrap_df(ldf.collect)
|
43
|
+
end
|
44
|
+
|
45
|
+
def filter(predicate)
|
46
|
+
self.class._from_rbldf(
|
47
|
+
_ldf.filter(
|
48
|
+
Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
|
49
|
+
)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def select(exprs)
|
54
|
+
exprs = Utils.selection_to_rbexpr_list(exprs)
|
55
|
+
self.class._from_rbldf(_ldf.select(exprs))
|
56
|
+
end
|
57
|
+
|
58
|
+
def groupby(by, maintain_order: false)
|
59
|
+
rbexprs_by = Utils.selection_to_rbexpr_list(by)
|
60
|
+
lgb = _ldf.groupby(rbexprs_by, maintain_order)
|
61
|
+
LazyGroupBy.new(lgb, self.class)
|
62
|
+
end
|
63
|
+
|
64
|
+
def join(
|
65
|
+
other,
|
66
|
+
left_on: nil,
|
67
|
+
right_on: nil,
|
68
|
+
on: nil,
|
69
|
+
how: "inner",
|
70
|
+
suffix: "_right",
|
71
|
+
allow_parallel: true,
|
72
|
+
force_parallel: false
|
73
|
+
)
|
74
|
+
if !other.is_a?(LazyFrame)
|
75
|
+
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
76
|
+
end
|
77
|
+
|
78
|
+
if how == "cross"
|
79
|
+
return self.class._from_rbldf(
|
80
|
+
_ldf.join(
|
81
|
+
other._ldf, [], [], allow_parallel, force_parallel, how, suffix
|
82
|
+
)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
if !on.nil?
|
87
|
+
rbexprs = Utils.selection_to_rbexpr_list(on)
|
88
|
+
rbexprs_left = rbexprs
|
89
|
+
rbexprs_right = rbexprs
|
90
|
+
elsif !left_on.nil? && !right_on.nil?
|
91
|
+
rbexprs_left = Utils.selection_to_rbexpr_list(left_on)
|
92
|
+
rbexprs_right = Utils.selection_to_rbexpr_list(right_on)
|
93
|
+
else
|
94
|
+
raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
|
95
|
+
end
|
96
|
+
|
97
|
+
self.class._from_rbldf(
|
98
|
+
self._ldf.join(
|
99
|
+
other._ldf,
|
100
|
+
rbexprs_left,
|
101
|
+
rbexprs_right,
|
102
|
+
allow_parallel,
|
103
|
+
force_parallel,
|
104
|
+
how,
|
105
|
+
suffix,
|
106
|
+
)
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def with_columns(exprs)
|
111
|
+
exprs =
|
112
|
+
if exprs.nil?
|
113
|
+
[]
|
114
|
+
elsif exprs.is_a?(Expr)
|
115
|
+
[exprs]
|
116
|
+
else
|
117
|
+
exprs.to_a
|
118
|
+
end
|
119
|
+
|
120
|
+
rbexprs = []
|
121
|
+
exprs.each do |e|
|
122
|
+
case e
|
123
|
+
when Expr
|
124
|
+
rbexprs << e._rbexpr
|
125
|
+
when Series
|
126
|
+
rbexprs = Internal.lit(e)._rbexpr
|
127
|
+
else
|
128
|
+
raise ArgumentError, "Expected an expression, got #{e}"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
self.class._from_rbldf(_ldf.with_columns(rbexprs))
|
133
|
+
end
|
134
|
+
|
135
|
+
def with_column(column)
|
136
|
+
with_columns([column])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module Polars
|
2
|
+
module LazyFunctions
|
3
|
+
def col(name)
|
4
|
+
name = name.to_s if name.is_a?(Symbol)
|
5
|
+
Utils.wrap_expr(RbExpr.col(name))
|
6
|
+
end
|
7
|
+
|
8
|
+
def std(column, ddof: 1)
|
9
|
+
if column.is_a?(Series)
|
10
|
+
column.std(ddof: ddof)
|
11
|
+
else
|
12
|
+
col(column).std(ddof: ddof)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def var(column, ddof: 1)
|
17
|
+
if column.is_a?(Series)
|
18
|
+
column.var(ddof: ddof)
|
19
|
+
else
|
20
|
+
col(column).var(ddof: ddof)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def max(column)
|
25
|
+
if column.is_a?(Series)
|
26
|
+
column.max
|
27
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
28
|
+
col(column).max
|
29
|
+
else
|
30
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
31
|
+
# TODO
|
32
|
+
Utils.wrap_expr(_max_exprs(exprs))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def min(column)
|
37
|
+
if column.is_a?(Series)
|
38
|
+
column.min
|
39
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
40
|
+
col(column).min
|
41
|
+
else
|
42
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
43
|
+
# TODO
|
44
|
+
Utils.wrap_expr(_min_exprs(exprs))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def sum(column)
|
49
|
+
if column.is_a?(Series)
|
50
|
+
column.sum
|
51
|
+
elsif column.is_a?(String) || column.is_a?(Symbol)
|
52
|
+
col(column.to_s).sum
|
53
|
+
elsif column.is_a?(Array)
|
54
|
+
exprs = Utils.selection_to_rbexpr_list(column)
|
55
|
+
# TODO
|
56
|
+
Utils.wrap_expr(_sum_exprs(exprs))
|
57
|
+
else
|
58
|
+
raise "todo"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def mean(column)
|
63
|
+
if column.is_a?(Series)
|
64
|
+
column.mean
|
65
|
+
else
|
66
|
+
col(column).mean
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def avg(column)
|
71
|
+
mean(column)
|
72
|
+
end
|
73
|
+
|
74
|
+
def median(column)
|
75
|
+
if column.is_a?(Series)
|
76
|
+
column.median
|
77
|
+
else
|
78
|
+
col(column).median
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def lit(value)
|
83
|
+
Utils.wrap_expr(RbExpr.lit(value))
|
84
|
+
end
|
85
|
+
|
86
|
+
def arange(low, high, step: 1, eager: false, dtype: nil)
|
87
|
+
low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
|
88
|
+
high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
|
89
|
+
range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))
|
90
|
+
|
91
|
+
if !dtype.nil? && dtype != "i64"
|
92
|
+
range_expr = range_expr.cast(dtype)
|
93
|
+
end
|
94
|
+
|
95
|
+
if !eager
|
96
|
+
range_expr
|
97
|
+
else
|
98
|
+
DataFrame.new
|
99
|
+
.select(range_expr)
|
100
|
+
.to_series
|
101
|
+
.rename("arange", in_place: true)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def all(name = nil)
|
106
|
+
if name.nil?
|
107
|
+
col("*")
|
108
|
+
elsif name.is_a?(String) || name.is_a?(Symbol)
|
109
|
+
col(name).all
|
110
|
+
else
|
111
|
+
raise "todo"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def when(expr)
|
116
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
117
|
+
pw = RbExpr.when(expr._rbexpr)
|
118
|
+
When.new(pw)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Polars
|
2
|
+
class LazyGroupBy
|
3
|
+
def initialize(lgb, lazyframe_class)
|
4
|
+
@lgb = lgb
|
5
|
+
@lazyframe_class = lazyframe_class
|
6
|
+
end
|
7
|
+
|
8
|
+
def agg(aggs)
|
9
|
+
rbexprs = Utils.selection_to_rbexpr_list(aggs)
|
10
|
+
@lazyframe_class._from_rbldf(@lgb.agg(rbexprs))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,261 @@
|
|
1
|
+
module Polars
|
2
|
+
class Series
|
3
|
+
attr_accessor :_s
|
4
|
+
|
5
|
+
def initialize(name = nil, values = nil, dtype: nil, strict: true, nan_to_null: false, dtype_if_empty: nil)
|
6
|
+
# Handle case where values are passed as the first argument
|
7
|
+
if !name.nil? && !name.is_a?(String)
|
8
|
+
if values.nil?
|
9
|
+
values = name
|
10
|
+
name = nil
|
11
|
+
else
|
12
|
+
raise ArgumentError, "Series name must be a string."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
name = "" if name.nil?
|
17
|
+
|
18
|
+
if values.nil?
|
19
|
+
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
20
|
+
elsif values.is_a?(Range)
|
21
|
+
self._s =
|
22
|
+
Polars.arange(
|
23
|
+
values.first,
|
24
|
+
values.last + (values.exclude_end? ? 0 : 1),
|
25
|
+
step: 1,
|
26
|
+
eager: true,
|
27
|
+
dtype: dtype
|
28
|
+
)
|
29
|
+
.rename(name, in_place: true)
|
30
|
+
._s
|
31
|
+
elsif values.is_a?(Array)
|
32
|
+
self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
|
33
|
+
else
|
34
|
+
raise ArgumentError, "Series constructor called with unsupported type; got #{values.class.name}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self._from_rbseries(s)
|
39
|
+
series = Series.allocate
|
40
|
+
series._s = s
|
41
|
+
series
|
42
|
+
end
|
43
|
+
|
44
|
+
def dtype
|
45
|
+
_s.dtype.to_sym
|
46
|
+
end
|
47
|
+
|
48
|
+
def name
|
49
|
+
_s.name
|
50
|
+
end
|
51
|
+
|
52
|
+
def shape
|
53
|
+
[_s.len]
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_s
|
57
|
+
_s.to_s
|
58
|
+
end
|
59
|
+
alias_method :inspect, :to_s
|
60
|
+
|
61
|
+
def &(other)
|
62
|
+
Utils.wrap_s(_s.bitand(other._s))
|
63
|
+
end
|
64
|
+
|
65
|
+
def |(other)
|
66
|
+
Utils.wrap_s(_s.bitor(other._s))
|
67
|
+
end
|
68
|
+
|
69
|
+
def ^(other)
|
70
|
+
Utils.wrap_s(_s.bitxor(other._s))
|
71
|
+
end
|
72
|
+
|
73
|
+
def +(other)
|
74
|
+
Utils. wrap_s(_s.add(other._s))
|
75
|
+
end
|
76
|
+
|
77
|
+
def -(other)
|
78
|
+
Utils.wrap_s(_s.sub(other._s))
|
79
|
+
end
|
80
|
+
|
81
|
+
def *(other)
|
82
|
+
Utils.wrap_s(_s.mul(other._s))
|
83
|
+
end
|
84
|
+
|
85
|
+
def /(other)
|
86
|
+
Utils.wrap_s(_s.div(other._s))
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_frame
|
90
|
+
Utils.wrap_df(RbDataFrame.new([_s]))
|
91
|
+
end
|
92
|
+
|
93
|
+
def sum
|
94
|
+
_s.sum
|
95
|
+
end
|
96
|
+
|
97
|
+
def mean
|
98
|
+
_s.mean
|
99
|
+
end
|
100
|
+
|
101
|
+
def min
|
102
|
+
_s.min
|
103
|
+
end
|
104
|
+
|
105
|
+
def max
|
106
|
+
_s.max
|
107
|
+
end
|
108
|
+
|
109
|
+
def alias(name)
|
110
|
+
s = dup
|
111
|
+
s._s.rename(name)
|
112
|
+
s
|
113
|
+
end
|
114
|
+
|
115
|
+
def rename(name, in_place: false)
|
116
|
+
if in_place
|
117
|
+
_s.rename(name)
|
118
|
+
self
|
119
|
+
else
|
120
|
+
self.alias(name)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def chunk_lengths
|
125
|
+
_s.chunk_lengths
|
126
|
+
end
|
127
|
+
|
128
|
+
def n_chunks
|
129
|
+
_s.n_chunks
|
130
|
+
end
|
131
|
+
|
132
|
+
def cumsum(reverse: false)
|
133
|
+
Utils.wrap_s(_s.cumsum(reverse))
|
134
|
+
end
|
135
|
+
|
136
|
+
def cummin(reverse: false)
|
137
|
+
Utils.wrap_s(_s.cummin(reverse))
|
138
|
+
end
|
139
|
+
|
140
|
+
def cummax(reverse: false)
|
141
|
+
Utils.wrap_s(_s.cummax(reverse))
|
142
|
+
end
|
143
|
+
|
144
|
+
def limit(n = 10)
|
145
|
+
to_frame().select(Utils.col(name).limit(n)).to_series
|
146
|
+
end
|
147
|
+
|
148
|
+
def slice(offset, length = nil)
|
149
|
+
length = len if length.nil?
|
150
|
+
Utils.wrap_s(_s.slice(offset, length))
|
151
|
+
end
|
152
|
+
|
153
|
+
def append(other)
|
154
|
+
_s.append(other._s)
|
155
|
+
self
|
156
|
+
end
|
157
|
+
|
158
|
+
def filter(predicate)
|
159
|
+
Utils.wrap_s(_s.filter(predicate._s))
|
160
|
+
end
|
161
|
+
|
162
|
+
def head(n = 10)
|
163
|
+
to_frame.select(Utils.col(name).head(n)).to_series
|
164
|
+
end
|
165
|
+
|
166
|
+
def tail(n = 10)
|
167
|
+
to_frame.select(Utils.col(name).tail(n)).to_series
|
168
|
+
end
|
169
|
+
|
170
|
+
def sort(reverse: false, in_place: false)
|
171
|
+
if in_place
|
172
|
+
self._s = _s.sort(reverse)
|
173
|
+
self
|
174
|
+
else
|
175
|
+
Utils.wrap_s(_s.sort(reverse))
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def to_a
|
180
|
+
_s.to_a
|
181
|
+
end
|
182
|
+
|
183
|
+
def len
|
184
|
+
_s.len
|
185
|
+
end
|
186
|
+
|
187
|
+
def rechunk(in_place: false)
|
188
|
+
opt_s = _s.rechunk(in_place)
|
189
|
+
in_place ? self : Utils.wrap_s(opt_s)
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
|
195
|
+
ruby_dtype = nil
|
196
|
+
|
197
|
+
if (values.nil? || values.empty?) && dtype.nil?
|
198
|
+
if dtype_if_empty
|
199
|
+
# if dtype for empty sequence could be guessed
|
200
|
+
# (e.g comparisons between self and other)
|
201
|
+
dtype = dtype_if_empty
|
202
|
+
else
|
203
|
+
# default to Float32 type
|
204
|
+
dtype = "f32"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# _get_first_non_none
|
209
|
+
value = values.find { |v| !v.nil? }
|
210
|
+
|
211
|
+
if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil?
|
212
|
+
constructor = polars_type_to_constructor(dtype)
|
213
|
+
rbseries = constructor.call(name, values, strict)
|
214
|
+
return rbseries
|
215
|
+
end
|
216
|
+
|
217
|
+
constructor = rb_type_to_constructor(value.class)
|
218
|
+
constructor.call(name, values, strict)
|
219
|
+
end
|
220
|
+
|
221
|
+
POLARS_TYPE_TO_CONSTRUCTOR = {
|
222
|
+
f32: RbSeries.method(:new_opt_f32),
|
223
|
+
f64: RbSeries.method(:new_opt_f64),
|
224
|
+
i8: RbSeries.method(:new_opt_i8),
|
225
|
+
i16: RbSeries.method(:new_opt_i16),
|
226
|
+
i32: RbSeries.method(:new_opt_i32),
|
227
|
+
i64: RbSeries.method(:new_opt_i64),
|
228
|
+
u8: RbSeries.method(:new_opt_u8),
|
229
|
+
u16: RbSeries.method(:new_opt_u16),
|
230
|
+
u32: RbSeries.method(:new_opt_u32),
|
231
|
+
u64: RbSeries.method(:new_opt_u64),
|
232
|
+
bool: RbSeries.method(:new_opt_bool),
|
233
|
+
str: RbSeries.method(:new_str)
|
234
|
+
}
|
235
|
+
|
236
|
+
def polars_type_to_constructor(dtype)
|
237
|
+
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
238
|
+
rescue KeyError
|
239
|
+
raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
|
240
|
+
end
|
241
|
+
|
242
|
+
RB_TYPE_TO_CONSTRUCTOR = {
|
243
|
+
Float => RbSeries.method(:new_opt_f64),
|
244
|
+
Integer => RbSeries.method(:new_opt_i64),
|
245
|
+
String => RbSeries.method(:new_str),
|
246
|
+
TrueClass => RbSeries.method(:new_opt_bool),
|
247
|
+
FalseClass => RbSeries.method(:new_opt_bool)
|
248
|
+
}
|
249
|
+
|
250
|
+
def rb_type_to_constructor(dtype)
|
251
|
+
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
252
|
+
rescue KeyError
|
253
|
+
# RbSeries.method(:new_object)
|
254
|
+
raise ArgumentError, "Cannot determine type"
|
255
|
+
end
|
256
|
+
|
257
|
+
def is_polars_dtype(data_type)
|
258
|
+
true
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Polars
|
2
|
+
class StringExpr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def initialize(expr)
|
6
|
+
self._rbexpr = expr._rbexpr
|
7
|
+
end
|
8
|
+
|
9
|
+
def lengths
|
10
|
+
Utils.wrap_expr(_rbexpr.str_lengths)
|
11
|
+
end
|
12
|
+
|
13
|
+
def contains(pattern, literal: false)
|
14
|
+
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/polars/utils.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Polars
|
2
|
+
module Utils
|
3
|
+
def self.wrap_s(s)
|
4
|
+
Series._from_rbseries(s)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.wrap_df(df)
|
8
|
+
DataFrame._from_rbdf(df)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.wrap_expr(rbexpr)
|
12
|
+
Expr._from_rbexpr(rbexpr)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.col(name)
|
16
|
+
Polars.col(name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.selection_to_rbexpr_list(exprs)
|
20
|
+
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
21
|
+
exprs = [exprs]
|
22
|
+
end
|
23
|
+
|
24
|
+
exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
28
|
+
if expr.is_a?(String) && !str_to_lit
|
29
|
+
col(expr)
|
30
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
|
31
|
+
lit(expr)
|
32
|
+
elsif expr.is_a?(Expr)
|
33
|
+
expr
|
34
|
+
else
|
35
|
+
raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.lit(value)
|
40
|
+
Polars.lit(value)
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.format_path(path)
|
44
|
+
File.expand_path(path)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/polars/when.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Polars
|
2
|
+
class When
|
3
|
+
attr_accessor :_rbwhen
|
4
|
+
|
5
|
+
def initialize(rbwhen)
|
6
|
+
self._rbwhen = rbwhen
|
7
|
+
end
|
8
|
+
|
9
|
+
def then(expr)
|
10
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
11
|
+
rbwhenthen = _rbwhen._then(expr._rbexpr)
|
12
|
+
WhenThen.new(rbwhenthen)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Polars
|
2
|
+
class WhenThen
|
3
|
+
attr_accessor :_rbwhenthen
|
4
|
+
|
5
|
+
def initialize(rbwhenthen)
|
6
|
+
self._rbwhenthen = rbwhenthen
|
7
|
+
end
|
8
|
+
|
9
|
+
def when(predicate)
|
10
|
+
WhenThenThen.new(_rbwhenthen.when(predicate._rbexpr))
|
11
|
+
end
|
12
|
+
|
13
|
+
def otherwise(expr)
|
14
|
+
expr = Utils.expr_to_lit_or_expr(expr)
|
15
|
+
Utils.wrap_expr(_rbwhenthen.otherwise(expr._rbexpr))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/polars-df.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "polars"
|
data/lib/polars.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# ext
|
2
|
+
require "polars/polars"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "polars/data_frame"
|
6
|
+
require "polars/expr"
|
7
|
+
require "polars/functions"
|
8
|
+
require "polars/lazy_frame"
|
9
|
+
require "polars/lazy_functions"
|
10
|
+
require "polars/lazy_group_by"
|
11
|
+
require "polars/io"
|
12
|
+
require "polars/series"
|
13
|
+
require "polars/string_expr"
|
14
|
+
require "polars/utils"
|
15
|
+
require "polars/version"
|
16
|
+
require "polars/when"
|
17
|
+
require "polars/when_then"
|
18
|
+
|
19
|
+
module Polars
|
20
|
+
class Error < StandardError; end
|
21
|
+
|
22
|
+
extend Functions
|
23
|
+
extend IO
|
24
|
+
extend LazyFunctions
|
25
|
+
end
|