polars-df 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/lib/polars/list_expr.rb
CHANGED
@@ -27,8 +27,9 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_len)
|
31
31
|
end
|
32
|
+
alias_method :len, :lengths
|
32
33
|
|
33
34
|
# Sum all the lists in the array.
|
34
35
|
#
|
@@ -379,6 +380,7 @@ module Polars
|
|
379
380
|
# # │ x y │
|
380
381
|
# # └───────┘
|
381
382
|
def join(separator)
|
383
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
382
384
|
Utils.wrap_expr(_rbexpr.list_join(separator))
|
383
385
|
end
|
384
386
|
|
@@ -457,7 +459,7 @@ module Polars
|
|
457
459
|
|
458
460
|
# Shift values by the given period.
|
459
461
|
#
|
460
|
-
# @param
|
462
|
+
# @param n [Integer]
|
461
463
|
# Number of places to shift (may be negative).
|
462
464
|
#
|
463
465
|
# @return [Expr]
|
@@ -472,8 +474,9 @@ module Polars
|
|
472
474
|
# # [null, 1, … 3]
|
473
475
|
# # [null, 10, 2]
|
474
476
|
# # ]
|
475
|
-
def shift(
|
476
|
-
Utils.
|
477
|
+
def shift(n = 1)
|
478
|
+
n = Utils.parse_as_expression(n)
|
479
|
+
Utils.wrap_expr(_rbexpr.list_shift(n))
|
477
480
|
end
|
478
481
|
|
479
482
|
# Slice every sublist.
|
@@ -568,9 +571,10 @@ module Polars
|
|
568
571
|
# # │ 1 │
|
569
572
|
# # │ 0 │
|
570
573
|
# # └────────────────┘
|
571
|
-
def
|
572
|
-
Utils.wrap_expr(_rbexpr.
|
574
|
+
def count_matches(element)
|
575
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
576
|
end
|
577
|
+
alias_method :count_match, :count_matches
|
574
578
|
|
575
579
|
# Convert the series of type `List` to a series of type `Struct`.
|
576
580
|
#
|
@@ -609,7 +613,7 @@ module Polars
|
|
609
613
|
# Run all expression parallel. Don't activate this blindly.
|
610
614
|
# Parallelism is worth it if there is enough work to do per thread.
|
611
615
|
#
|
612
|
-
# This likely should not be use in the
|
616
|
+
# This likely should not be use in the group by context, because we already
|
613
617
|
# parallel execution per group
|
614
618
|
#
|
615
619
|
# @return [Expr]
|
@@ -624,7 +628,7 @@ module Polars
|
|
624
628
|
# # ┌─────┬─────┬────────────┐
|
625
629
|
# # │ a ┆ b ┆ rank │
|
626
630
|
# # │ --- ┆ --- ┆ --- │
|
627
|
-
# # │ i64 ┆ i64 ┆ list[
|
631
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
628
632
|
# # ╞═════╪═════╪════════════╡
|
629
633
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
630
634
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -315,7 +315,7 @@ module Polars
|
|
315
315
|
# Run all expression parallel. Don't activate this blindly.
|
316
316
|
# Parallelism is worth it if there is enough work to do per thread.
|
317
317
|
#
|
318
|
-
# This likely should not be use in the
|
318
|
+
# This likely should not be use in the group by context, because we already
|
319
319
|
# parallel execution per group
|
320
320
|
#
|
321
321
|
# @return [Series]
|
@@ -330,7 +330,7 @@ module Polars
|
|
330
330
|
# # ┌─────┬─────┬────────────┐
|
331
331
|
# # │ a ┆ b ┆ rank │
|
332
332
|
# # │ --- ┆ --- ┆ --- │
|
333
|
-
# # │ i64 ┆ i64 ┆ list[
|
333
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
334
334
|
# # ╞═════╪═════╪════════════╡
|
335
335
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
336
336
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -0,0 +1,198 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for expressions that operate on expression names.
|
3
|
+
class NameExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Keep the original root name of the expression.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# Due to implementation constraints, this method can only be called as the last
|
16
|
+
# expression in a chain.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
#
|
20
|
+
# @example Prevent errors due to potential duplicate column names.
|
21
|
+
# df = Polars::DataFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2],
|
24
|
+
# "b" => [3, 4]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# df.select((Polars.lit(10) / Polars.all).name.keep)
|
28
|
+
# # =>
|
29
|
+
# # shape: (2, 2)
|
30
|
+
# # ┌──────┬──────────┐
|
31
|
+
# # │ a ┆ b │
|
32
|
+
# # │ --- ┆ --- │
|
33
|
+
# # │ f64 ┆ f64 │
|
34
|
+
# # ╞══════╪══════════╡
|
35
|
+
# # │ 10.0 ┆ 3.333333 │
|
36
|
+
# # │ 5.0 ┆ 2.5 │
|
37
|
+
# # └──────┴──────────┘
|
38
|
+
#
|
39
|
+
# @example Undo an alias operation.
|
40
|
+
# df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
|
41
|
+
# # =>
|
42
|
+
# # shape: (2, 2)
|
43
|
+
# # ┌─────┬─────┐
|
44
|
+
# # │ a ┆ b │
|
45
|
+
# # │ --- ┆ --- │
|
46
|
+
# # │ i64 ┆ i64 │
|
47
|
+
# # ╞═════╪═════╡
|
48
|
+
# # │ 9 ┆ 3 │
|
49
|
+
# # │ 18 ┆ 4 │
|
50
|
+
# # └─────┴─────┘
|
51
|
+
def keep
|
52
|
+
Utils.wrap_expr(_rbexpr.name_keep)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Rename the output of an expression by mapping a function over the root name.
|
56
|
+
#
|
57
|
+
# @return [Expr]
|
58
|
+
#
|
59
|
+
# @example Remove a common suffix and convert to lower case.
|
60
|
+
# df = Polars::DataFrame.new(
|
61
|
+
# {
|
62
|
+
# "A_reverse" => [3, 2, 1],
|
63
|
+
# "B_reverse" => ["z", "y", "x"]
|
64
|
+
# }
|
65
|
+
# )
|
66
|
+
# df.with_columns(
|
67
|
+
# Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
|
68
|
+
# )
|
69
|
+
# # =>
|
70
|
+
# # shape: (3, 4)
|
71
|
+
# # ┌───────────┬───────────┬─────┬─────┐
|
72
|
+
# # │ A_reverse ┆ B_reverse ┆ a ┆ b │
|
73
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
75
|
+
# # ╞═══════════╪═══════════╪═════╪═════╡
|
76
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
77
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
78
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
79
|
+
# # └───────────┴───────────┴─────┴─────┘
|
80
|
+
def map(&f)
|
81
|
+
Utils.wrap_expr(_rbexpr.name_map(f))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Add a prefix to the root column name of the expression.
|
85
|
+
#
|
86
|
+
# @param prefix [Object]
|
87
|
+
# Prefix to add to the root column name.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [1, 2, 3],
|
95
|
+
# "b" => ["x", "y", "z"]
|
96
|
+
# }
|
97
|
+
# )
|
98
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
99
|
+
# # =>
|
100
|
+
# # shape: (3, 4)
|
101
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
102
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
103
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
104
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
105
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
106
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
107
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
108
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
109
|
+
# # └─────┴─────┴───────────┴───────────┘
|
110
|
+
def prefix(prefix)
|
111
|
+
Utils.wrap_expr(_rbexpr.name_prefix(prefix))
|
112
|
+
end
|
113
|
+
|
114
|
+
# Add a suffix to the root column name of the expression.
|
115
|
+
#
|
116
|
+
# @param suffix [Object]
|
117
|
+
# Suffix to add to the root column name.
|
118
|
+
#
|
119
|
+
# @return [Expr]
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# df = Polars::DataFrame.new(
|
123
|
+
# {
|
124
|
+
# "a" => [1, 2, 3],
|
125
|
+
# "b" => ["x", "y", "z"]
|
126
|
+
# }
|
127
|
+
# )
|
128
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
129
|
+
# # =>
|
130
|
+
# # shape: (3, 4)
|
131
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
132
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
133
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
134
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
135
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
136
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
137
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
138
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
139
|
+
# # └─────┴─────┴───────────┴───────────┘
|
140
|
+
def suffix(suffix)
|
141
|
+
Utils.wrap_expr(_rbexpr.name_suffix(suffix))
|
142
|
+
end
|
143
|
+
|
144
|
+
# Make the root column name lowercase.
|
145
|
+
#
|
146
|
+
# @return [Expr]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new(
|
150
|
+
# {
|
151
|
+
# "ColX" => [1, 2, 3],
|
152
|
+
# "ColY" => ["x", "y", "z"],
|
153
|
+
# }
|
154
|
+
# )
|
155
|
+
# df.with_columns(Polars.all.name.to_lowercase)
|
156
|
+
# # =>
|
157
|
+
# # shape: (3, 4)
|
158
|
+
# # ┌──────┬──────┬──────┬──────┐
|
159
|
+
# # │ ColX ┆ ColY ┆ colx ┆ coly │
|
160
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
161
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
162
|
+
# # ╞══════╪══════╪══════╪══════╡
|
163
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
164
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
165
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
166
|
+
# # └──────┴──────┴──────┴──────┘
|
167
|
+
def to_lowercase
|
168
|
+
Utils.wrap_expr(_rbexpr.name_to_lowercase)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Make the root column name uppercase.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "ColX" => [1, 2, 3],
|
179
|
+
# "ColY" => ["x", "y", "z"]
|
180
|
+
# }
|
181
|
+
# )
|
182
|
+
# df.with_columns(Polars.all.name.to_uppercase)
|
183
|
+
# # =>
|
184
|
+
# # shape: (3, 4)
|
185
|
+
# # ┌──────┬──────┬──────┬──────┐
|
186
|
+
# # │ ColX ┆ ColY ┆ COLX ┆ COLY │
|
187
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
188
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
189
|
+
# # ╞══════╪══════╪══════╪══════╡
|
190
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
191
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
192
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
193
|
+
# # └──────┴──────┴──────┴──────┘
|
194
|
+
def to_uppercase
|
195
|
+
Utils.wrap_expr(_rbexpr.name_to_uppercase)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A rolling grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class RollingGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
|
28
28
|
def agg(aggs)
|
29
29
|
@df.lazy
|
30
|
-
.
|
30
|
+
.group_by_rolling(
|
31
31
|
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
|
32
32
|
)
|
33
33
|
.agg(aggs)
|
data/lib/polars/series.rb
CHANGED
@@ -432,6 +432,18 @@ module Polars
|
|
432
432
|
end
|
433
433
|
alias_method :all, :all?
|
434
434
|
|
435
|
+
# Check if all boolean values in the column are `false`.
|
436
|
+
#
|
437
|
+
# @return [Boolean]
|
438
|
+
def none?(&block)
|
439
|
+
if block_given?
|
440
|
+
apply(&block).none?
|
441
|
+
else
|
442
|
+
to_frame.select(Polars.col(name).is_not.all).to_series[0]
|
443
|
+
end
|
444
|
+
end
|
445
|
+
alias_method :none, :none?
|
446
|
+
|
435
447
|
# Compute the logarithm to a given base.
|
436
448
|
#
|
437
449
|
# @param base [Float]
|
@@ -799,7 +811,7 @@ module Polars
|
|
799
811
|
# Number of valid values there should be in the window before the expression
|
800
812
|
# is evaluated. valid values = `length - null_count`
|
801
813
|
# @param parallel [Boolean]
|
802
|
-
# Run in parallel. Don't do this in a
|
814
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
803
815
|
# already has much parallelization.
|
804
816
|
#
|
805
817
|
# @return [Series]
|
@@ -3097,7 +3109,7 @@ module Polars
|
|
3097
3109
|
# s.peak_max
|
3098
3110
|
# # =>
|
3099
3111
|
# # shape: (5,)
|
3100
|
-
# # Series: '' [bool]
|
3112
|
+
# # Series: 'a' [bool]
|
3101
3113
|
# # [
|
3102
3114
|
# # false
|
3103
3115
|
# # false
|
@@ -3106,7 +3118,7 @@ module Polars
|
|
3106
3118
|
# # true
|
3107
3119
|
# # ]
|
3108
3120
|
def peak_max
|
3109
|
-
|
3121
|
+
super
|
3110
3122
|
end
|
3111
3123
|
|
3112
3124
|
# Get a boolean mask of the local minimum peaks.
|
@@ -3118,7 +3130,7 @@ module Polars
|
|
3118
3130
|
# s.peak_min
|
3119
3131
|
# # =>
|
3120
3132
|
# # shape: (5,)
|
3121
|
-
# # Series: '' [bool]
|
3133
|
+
# # Series: 'a' [bool]
|
3122
3134
|
# # [
|
3123
3135
|
# # false
|
3124
3136
|
# # true
|
@@ -3127,7 +3139,7 @@ module Polars
|
|
3127
3139
|
# # false
|
3128
3140
|
# # ]
|
3129
3141
|
def peak_min
|
3130
|
-
|
3142
|
+
super
|
3131
3143
|
end
|
3132
3144
|
|
3133
3145
|
# Count the number of unique values in this Series.
|
@@ -3211,13 +3223,13 @@ module Polars
|
|
3211
3223
|
# s.interpolate
|
3212
3224
|
# # =>
|
3213
3225
|
# # shape: (5,)
|
3214
|
-
# # Series: 'a' [
|
3226
|
+
# # Series: 'a' [f64]
|
3215
3227
|
# # [
|
3216
|
-
# # 1
|
3217
|
-
# # 2
|
3218
|
-
# # 3
|
3219
|
-
# # 4
|
3220
|
-
# # 5
|
3228
|
+
# # 1.0
|
3229
|
+
# # 2.0
|
3230
|
+
# # 3.0
|
3231
|
+
# # 4.0
|
3232
|
+
# # 5.0
|
3221
3233
|
# # ]
|
3222
3234
|
def interpolate(method: "linear")
|
3223
3235
|
super
|
@@ -3260,7 +3272,7 @@ module Polars
|
|
3260
3272
|
# s.rank
|
3261
3273
|
# # =>
|
3262
3274
|
# # shape: (5,)
|
3263
|
-
# # Series: 'a' [
|
3275
|
+
# # Series: 'a' [f64]
|
3264
3276
|
# # [
|
3265
3277
|
# # 3.0
|
3266
3278
|
# # 4.5
|
@@ -3998,7 +4010,8 @@ module Polars
|
|
3998
4010
|
Integer => RbSeries.method(:new_opt_i64),
|
3999
4011
|
TrueClass => RbSeries.method(:new_opt_bool),
|
4000
4012
|
FalseClass => RbSeries.method(:new_opt_bool),
|
4001
|
-
BigDecimal => RbSeries.method(:new_decimal)
|
4013
|
+
BigDecimal => RbSeries.method(:new_decimal),
|
4014
|
+
NilClass => RbSeries.method(:new_null)
|
4002
4015
|
}
|
4003
4016
|
|
4004
4017
|
def rb_type_to_constructor(dtype)
|
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|