polars-df 0.6.0-x86_64-darwin → 0.7.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4896 -5867
- data/README.md +8 -7
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/list_expr.rb
CHANGED
@@ -27,8 +27,9 @@ module Polars
|
|
27
27
|
# # │ 1 │
|
28
28
|
# # └─────┘
|
29
29
|
def lengths
|
30
|
-
Utils.wrap_expr(_rbexpr.
|
30
|
+
Utils.wrap_expr(_rbexpr.list_len)
|
31
31
|
end
|
32
|
+
alias_method :len, :lengths
|
32
33
|
|
33
34
|
# Sum all the lists in the array.
|
34
35
|
#
|
@@ -379,6 +380,7 @@ module Polars
|
|
379
380
|
# # │ x y │
|
380
381
|
# # └───────┘
|
381
382
|
def join(separator)
|
383
|
+
separator = Utils.parse_as_expression(separator, str_as_lit: true)
|
382
384
|
Utils.wrap_expr(_rbexpr.list_join(separator))
|
383
385
|
end
|
384
386
|
|
@@ -457,7 +459,7 @@ module Polars
|
|
457
459
|
|
458
460
|
# Shift values by the given period.
|
459
461
|
#
|
460
|
-
# @param
|
462
|
+
# @param n [Integer]
|
461
463
|
# Number of places to shift (may be negative).
|
462
464
|
#
|
463
465
|
# @return [Expr]
|
@@ -472,8 +474,9 @@ module Polars
|
|
472
474
|
# # [null, 1, … 3]
|
473
475
|
# # [null, 10, 2]
|
474
476
|
# # ]
|
475
|
-
def shift(
|
476
|
-
Utils.
|
477
|
+
def shift(n = 1)
|
478
|
+
n = Utils.parse_as_expression(n)
|
479
|
+
Utils.wrap_expr(_rbexpr.list_shift(n))
|
477
480
|
end
|
478
481
|
|
479
482
|
# Slice every sublist.
|
@@ -568,9 +571,10 @@ module Polars
|
|
568
571
|
# # │ 1 │
|
569
572
|
# # │ 0 │
|
570
573
|
# # └────────────────┘
|
571
|
-
def
|
572
|
-
Utils.wrap_expr(_rbexpr.
|
574
|
+
def count_matches(element)
|
575
|
+
Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
|
573
576
|
end
|
577
|
+
alias_method :count_match, :count_matches
|
574
578
|
|
575
579
|
# Convert the series of type `List` to a series of type `Struct`.
|
576
580
|
#
|
@@ -609,7 +613,7 @@ module Polars
|
|
609
613
|
# Run all expression parallel. Don't activate this blindly.
|
610
614
|
# Parallelism is worth it if there is enough work to do per thread.
|
611
615
|
#
|
612
|
-
# This likely should not be use in the
|
616
|
+
# This likely should not be use in the group by context, because we already
|
613
617
|
# parallel execution per group
|
614
618
|
#
|
615
619
|
# @return [Expr]
|
@@ -624,7 +628,7 @@ module Polars
|
|
624
628
|
# # ┌─────┬─────┬────────────┐
|
625
629
|
# # │ a ┆ b ┆ rank │
|
626
630
|
# # │ --- ┆ --- ┆ --- │
|
627
|
-
# # │ i64 ┆ i64 ┆ list[
|
631
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
628
632
|
# # ╞═════╪═════╪════════════╡
|
629
633
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
630
634
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -315,7 +315,7 @@ module Polars
|
|
315
315
|
# Run all expression parallel. Don't activate this blindly.
|
316
316
|
# Parallelism is worth it if there is enough work to do per thread.
|
317
317
|
#
|
318
|
-
# This likely should not be use in the
|
318
|
+
# This likely should not be use in the group by context, because we already
|
319
319
|
# parallel execution per group
|
320
320
|
#
|
321
321
|
# @return [Series]
|
@@ -330,7 +330,7 @@ module Polars
|
|
330
330
|
# # ┌─────┬─────┬────────────┐
|
331
331
|
# # │ a ┆ b ┆ rank │
|
332
332
|
# # │ --- ┆ --- ┆ --- │
|
333
|
-
# # │ i64 ┆ i64 ┆ list[
|
333
|
+
# # │ i64 ┆ i64 ┆ list[f64] │
|
334
334
|
# # ╞═════╪═════╪════════════╡
|
335
335
|
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
336
336
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
@@ -0,0 +1,198 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for expressions that operate on expression names.
|
3
|
+
class NameExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Keep the original root name of the expression.
|
13
|
+
#
|
14
|
+
# @note
|
15
|
+
# Due to implementation constraints, this method can only be called as the last
|
16
|
+
# expression in a chain.
|
17
|
+
#
|
18
|
+
# @return [Expr]
|
19
|
+
#
|
20
|
+
# @example Prevent errors due to potential duplicate column names.
|
21
|
+
# df = Polars::DataFrame.new(
|
22
|
+
# {
|
23
|
+
# "a" => [1, 2],
|
24
|
+
# "b" => [3, 4]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
# df.select((Polars.lit(10) / Polars.all).name.keep)
|
28
|
+
# # =>
|
29
|
+
# # shape: (2, 2)
|
30
|
+
# # ┌──────┬──────────┐
|
31
|
+
# # │ a ┆ b │
|
32
|
+
# # │ --- ┆ --- │
|
33
|
+
# # │ f64 ┆ f64 │
|
34
|
+
# # ╞══════╪══════════╡
|
35
|
+
# # │ 10.0 ┆ 3.333333 │
|
36
|
+
# # │ 5.0 ┆ 2.5 │
|
37
|
+
# # └──────┴──────────┘
|
38
|
+
#
|
39
|
+
# @example Undo an alias operation.
|
40
|
+
# df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
|
41
|
+
# # =>
|
42
|
+
# # shape: (2, 2)
|
43
|
+
# # ┌─────┬─────┐
|
44
|
+
# # │ a ┆ b │
|
45
|
+
# # │ --- ┆ --- │
|
46
|
+
# # │ i64 ┆ i64 │
|
47
|
+
# # ╞═════╪═════╡
|
48
|
+
# # │ 9 ┆ 3 │
|
49
|
+
# # │ 18 ┆ 4 │
|
50
|
+
# # └─────┴─────┘
|
51
|
+
def keep
|
52
|
+
Utils.wrap_expr(_rbexpr.name_keep)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Rename the output of an expression by mapping a function over the root name.
|
56
|
+
#
|
57
|
+
# @return [Expr]
|
58
|
+
#
|
59
|
+
# @example Remove a common suffix and convert to lower case.
|
60
|
+
# df = Polars::DataFrame.new(
|
61
|
+
# {
|
62
|
+
# "A_reverse" => [3, 2, 1],
|
63
|
+
# "B_reverse" => ["z", "y", "x"]
|
64
|
+
# }
|
65
|
+
# )
|
66
|
+
# df.with_columns(
|
67
|
+
# Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
|
68
|
+
# )
|
69
|
+
# # =>
|
70
|
+
# # shape: (3, 4)
|
71
|
+
# # ┌───────────┬───────────┬─────┬─────┐
|
72
|
+
# # │ A_reverse ┆ B_reverse ┆ a ┆ b │
|
73
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
75
|
+
# # ╞═══════════╪═══════════╪═════╪═════╡
|
76
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
77
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
78
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
79
|
+
# # └───────────┴───────────┴─────┴─────┘
|
80
|
+
def map(&f)
|
81
|
+
Utils.wrap_expr(_rbexpr.name_map(f))
|
82
|
+
end
|
83
|
+
|
84
|
+
# Add a prefix to the root column name of the expression.
|
85
|
+
#
|
86
|
+
# @param prefix [Object]
|
87
|
+
# Prefix to add to the root column name.
|
88
|
+
#
|
89
|
+
# @return [Expr]
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# df = Polars::DataFrame.new(
|
93
|
+
# {
|
94
|
+
# "a" => [1, 2, 3],
|
95
|
+
# "b" => ["x", "y", "z"]
|
96
|
+
# }
|
97
|
+
# )
|
98
|
+
# df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
|
99
|
+
# # =>
|
100
|
+
# # shape: (3, 4)
|
101
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
102
|
+
# # │ a ┆ b ┆ reverse_a ┆ reverse_b │
|
103
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
104
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
105
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
106
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
107
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
108
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
109
|
+
# # └─────┴─────┴───────────┴───────────┘
|
110
|
+
def prefix(prefix)
|
111
|
+
Utils.wrap_expr(_rbexpr.name_prefix(prefix))
|
112
|
+
end
|
113
|
+
|
114
|
+
# Add a suffix to the root column name of the expression.
|
115
|
+
#
|
116
|
+
# @param suffix [Object]
|
117
|
+
# Suffix to add to the root column name.
|
118
|
+
#
|
119
|
+
# @return [Expr]
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# df = Polars::DataFrame.new(
|
123
|
+
# {
|
124
|
+
# "a" => [1, 2, 3],
|
125
|
+
# "b" => ["x", "y", "z"]
|
126
|
+
# }
|
127
|
+
# )
|
128
|
+
# df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
|
129
|
+
# # =>
|
130
|
+
# # shape: (3, 4)
|
131
|
+
# # ┌─────┬─────┬───────────┬───────────┐
|
132
|
+
# # │ a ┆ b ┆ a_reverse ┆ b_reverse │
|
133
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
134
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
135
|
+
# # ╞═════╪═════╪═══════════╪═══════════╡
|
136
|
+
# # │ 1 ┆ x ┆ 3 ┆ z │
|
137
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
138
|
+
# # │ 3 ┆ z ┆ 1 ┆ x │
|
139
|
+
# # └─────┴─────┴───────────┴───────────┘
|
140
|
+
def suffix(suffix)
|
141
|
+
Utils.wrap_expr(_rbexpr.name_suffix(suffix))
|
142
|
+
end
|
143
|
+
|
144
|
+
# Make the root column name lowercase.
|
145
|
+
#
|
146
|
+
# @return [Expr]
|
147
|
+
#
|
148
|
+
# @example
|
149
|
+
# df = Polars::DataFrame.new(
|
150
|
+
# {
|
151
|
+
# "ColX" => [1, 2, 3],
|
152
|
+
# "ColY" => ["x", "y", "z"],
|
153
|
+
# }
|
154
|
+
# )
|
155
|
+
# df.with_columns(Polars.all.name.to_lowercase)
|
156
|
+
# # =>
|
157
|
+
# # shape: (3, 4)
|
158
|
+
# # ┌──────┬──────┬──────┬──────┐
|
159
|
+
# # │ ColX ┆ ColY ┆ colx ┆ coly │
|
160
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
161
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
162
|
+
# # ╞══════╪══════╪══════╪══════╡
|
163
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
164
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
165
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
166
|
+
# # └──────┴──────┴──────┴──────┘
|
167
|
+
def to_lowercase
|
168
|
+
Utils.wrap_expr(_rbexpr.name_to_lowercase)
|
169
|
+
end
|
170
|
+
|
171
|
+
# Make the root column name uppercase.
|
172
|
+
#
|
173
|
+
# @return [Expr]
|
174
|
+
#
|
175
|
+
# @example
|
176
|
+
# df = Polars::DataFrame.new(
|
177
|
+
# {
|
178
|
+
# "ColX" => [1, 2, 3],
|
179
|
+
# "ColY" => ["x", "y", "z"]
|
180
|
+
# }
|
181
|
+
# )
|
182
|
+
# df.with_columns(Polars.all.name.to_uppercase)
|
183
|
+
# # =>
|
184
|
+
# # shape: (3, 4)
|
185
|
+
# # ┌──────┬──────┬──────┬──────┐
|
186
|
+
# # │ ColX ┆ ColY ┆ COLX ┆ COLY │
|
187
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
188
|
+
# # │ i64 ┆ str ┆ i64 ┆ str │
|
189
|
+
# # ╞══════╪══════╪══════╪══════╡
|
190
|
+
# # │ 1 ┆ x ┆ 1 ┆ x │
|
191
|
+
# # │ 2 ┆ y ┆ 2 ┆ y │
|
192
|
+
# # │ 3 ┆ z ┆ 3 ┆ z │
|
193
|
+
# # └──────┴──────┴──────┴──────┘
|
194
|
+
def to_uppercase
|
195
|
+
Utils.wrap_expr(_rbexpr.name_to_uppercase)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A rolling grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class RollingGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
|
28
28
|
def agg(aggs)
|
29
29
|
@df.lazy
|
30
|
-
.
|
30
|
+
.group_by_rolling(
|
31
31
|
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
|
32
32
|
)
|
33
33
|
.agg(aggs)
|
data/lib/polars/series.rb
CHANGED
@@ -432,6 +432,18 @@ module Polars
|
|
432
432
|
end
|
433
433
|
alias_method :all, :all?
|
434
434
|
|
435
|
+
# Check if all boolean values in the column are `false`.
|
436
|
+
#
|
437
|
+
# @return [Boolean]
|
438
|
+
def none?(&block)
|
439
|
+
if block_given?
|
440
|
+
apply(&block).none?
|
441
|
+
else
|
442
|
+
to_frame.select(Polars.col(name).is_not.all).to_series[0]
|
443
|
+
end
|
444
|
+
end
|
445
|
+
alias_method :none, :none?
|
446
|
+
|
435
447
|
# Compute the logarithm to a given base.
|
436
448
|
#
|
437
449
|
# @param base [Float]
|
@@ -799,7 +811,7 @@ module Polars
|
|
799
811
|
# Number of valid values there should be in the window before the expression
|
800
812
|
# is evaluated. valid values = `length - null_count`
|
801
813
|
# @param parallel [Boolean]
|
802
|
-
# Run in parallel. Don't do this in a
|
814
|
+
# Run in parallel. Don't do this in a group by or another operation that
|
803
815
|
# already has much parallelization.
|
804
816
|
#
|
805
817
|
# @return [Series]
|
@@ -3097,7 +3109,7 @@ module Polars
|
|
3097
3109
|
# s.peak_max
|
3098
3110
|
# # =>
|
3099
3111
|
# # shape: (5,)
|
3100
|
-
# # Series: '' [bool]
|
3112
|
+
# # Series: 'a' [bool]
|
3101
3113
|
# # [
|
3102
3114
|
# # false
|
3103
3115
|
# # false
|
@@ -3106,7 +3118,7 @@ module Polars
|
|
3106
3118
|
# # true
|
3107
3119
|
# # ]
|
3108
3120
|
def peak_max
|
3109
|
-
|
3121
|
+
super
|
3110
3122
|
end
|
3111
3123
|
|
3112
3124
|
# Get a boolean mask of the local minimum peaks.
|
@@ -3118,7 +3130,7 @@ module Polars
|
|
3118
3130
|
# s.peak_min
|
3119
3131
|
# # =>
|
3120
3132
|
# # shape: (5,)
|
3121
|
-
# # Series: '' [bool]
|
3133
|
+
# # Series: 'a' [bool]
|
3122
3134
|
# # [
|
3123
3135
|
# # false
|
3124
3136
|
# # true
|
@@ -3127,7 +3139,7 @@ module Polars
|
|
3127
3139
|
# # false
|
3128
3140
|
# # ]
|
3129
3141
|
def peak_min
|
3130
|
-
|
3142
|
+
super
|
3131
3143
|
end
|
3132
3144
|
|
3133
3145
|
# Count the number of unique values in this Series.
|
@@ -3211,13 +3223,13 @@ module Polars
|
|
3211
3223
|
# s.interpolate
|
3212
3224
|
# # =>
|
3213
3225
|
# # shape: (5,)
|
3214
|
-
# # Series: 'a' [
|
3226
|
+
# # Series: 'a' [f64]
|
3215
3227
|
# # [
|
3216
|
-
# # 1
|
3217
|
-
# # 2
|
3218
|
-
# # 3
|
3219
|
-
# # 4
|
3220
|
-
# # 5
|
3228
|
+
# # 1.0
|
3229
|
+
# # 2.0
|
3230
|
+
# # 3.0
|
3231
|
+
# # 4.0
|
3232
|
+
# # 5.0
|
3221
3233
|
# # ]
|
3222
3234
|
def interpolate(method: "linear")
|
3223
3235
|
super
|
@@ -3260,7 +3272,7 @@ module Polars
|
|
3260
3272
|
# s.rank
|
3261
3273
|
# # =>
|
3262
3274
|
# # shape: (5,)
|
3263
|
-
# # Series: 'a' [
|
3275
|
+
# # Series: 'a' [f64]
|
3264
3276
|
# # [
|
3265
3277
|
# # 3.0
|
3266
3278
|
# # 4.5
|
@@ -3998,7 +4010,8 @@ module Polars
|
|
3998
4010
|
Integer => RbSeries.method(:new_opt_i64),
|
3999
4011
|
TrueClass => RbSeries.method(:new_opt_bool),
|
4000
4012
|
FalseClass => RbSeries.method(:new_opt_bool),
|
4001
|
-
BigDecimal => RbSeries.method(:new_decimal)
|
4013
|
+
BigDecimal => RbSeries.method(:new_decimal),
|
4014
|
+
NilClass => RbSeries.method(:new_null)
|
4002
4015
|
}
|
4003
4016
|
|
4004
4017
|
def rb_type_to_constructor(dtype)
|
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|