polars-df 0.17.0-aarch64-linux-musl → 0.18.0-aarch64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +668 -377
- data/LICENSE-THIRD-PARTY.txt +6123 -2972
- data/README.md +2 -2
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/data_frame.rb +304 -6
- data/lib/polars/expr.rb +58 -19
- data/lib/polars/functions/eager.rb +145 -16
- data/lib/polars/io/database.rb +17 -0
- data/lib/polars/lazy_frame.rb +135 -18
- data/lib/polars/list_expr.rb +4 -7
- data/lib/polars/schema.rb +29 -0
- data/lib/polars/series.rb +36 -32
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -3
@@ -6,8 +6,7 @@ module Polars
|
|
6
6
|
# DataFrames/Series/LazyFrames to concatenate.
|
7
7
|
# @param rechunk [Boolean]
|
8
8
|
# Make sure that all data is in contiguous memory.
|
9
|
-
# @param how ["vertical", "vertical_relaxed", "diagonal", "horizontal"]
|
10
|
-
# LazyFrames do not support the `horizontal` strategy.
|
9
|
+
# @param how ["vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal"]
|
11
10
|
#
|
12
11
|
# - Vertical: applies multiple `vstack` operations.
|
13
12
|
# - Diagonal: finds a union between the column schemas and fills missing column values with null.
|
@@ -21,7 +20,7 @@ module Polars
|
|
21
20
|
# @example
|
22
21
|
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
23
22
|
# df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
|
24
|
-
# Polars.concat([df1, df2])
|
23
|
+
# Polars.concat([df1, df2]) # default is 'vertical' strategy
|
25
24
|
# # =>
|
26
25
|
# # shape: (2, 2)
|
27
26
|
# # ┌─────┬─────┐
|
@@ -32,38 +31,168 @@ module Polars
|
|
32
31
|
# # │ 1 ┆ 3 │
|
33
32
|
# # │ 2 ┆ 4 │
|
34
33
|
# # └─────┴─────┘
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
37
|
+
# df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
|
38
|
+
# Polars.concat([df1, df2], how: "vertical_relaxed") # 'a' coerced into f64
|
39
|
+
# # =>
|
40
|
+
# # shape: (2, 2)
|
41
|
+
# # ┌─────┬─────┐
|
42
|
+
# # │ a ┆ b │
|
43
|
+
# # │ --- ┆ --- │
|
44
|
+
# # │ f64 ┆ i64 │
|
45
|
+
# # ╞═════╪═════╡
|
46
|
+
# # │ 1.0 ┆ 3 │
|
47
|
+
# # │ 2.5 ┆ 4 │
|
48
|
+
# # └─────┴─────┘
|
49
|
+
#
|
50
|
+
# @example
|
51
|
+
# df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
|
52
|
+
# df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
|
53
|
+
# Polars.concat([df_h1, df_h2], how: "horizontal")
|
54
|
+
# # =>
|
55
|
+
# # shape: (2, 5)
|
56
|
+
# # ┌─────┬─────┬─────┬─────┬─────┐
|
57
|
+
# # │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
|
58
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
59
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
|
60
|
+
# # ╞═════╪═════╪═════╪═════╪═════╡
|
61
|
+
# # │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
|
62
|
+
# # │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
|
63
|
+
# # └─────┴─────┴─────┴─────┴─────┘
|
64
|
+
#
|
65
|
+
# @example
|
66
|
+
# df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
67
|
+
# df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
|
68
|
+
# Polars.concat([df_d1, df_d2], how: "diagonal")
|
69
|
+
# # =>
|
70
|
+
# # shape: (2, 3)
|
71
|
+
# # ┌─────┬──────┬──────┐
|
72
|
+
# # │ a ┆ b ┆ c │
|
73
|
+
# # │ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
75
|
+
# # ╞═════╪══════╪══════╡
|
76
|
+
# # │ 1 ┆ 3 ┆ null │
|
77
|
+
# # │ 2 ┆ null ┆ 4 │
|
78
|
+
# # └─────┴──────┴──────┘
|
79
|
+
#
|
80
|
+
# @example
|
81
|
+
# df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
|
82
|
+
# df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
|
83
|
+
# df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
|
84
|
+
# Polars.concat([df_a1, df_a2, df_a3], how: "align")
|
85
|
+
# # =>
|
86
|
+
# # shape: (3, 4)
|
87
|
+
# # ┌─────┬──────┬──────┬──────┐
|
88
|
+
# # │ id ┆ x ┆ y ┆ z │
|
89
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
90
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 │
|
91
|
+
# # ╞═════╪══════╪══════╪══════╡
|
92
|
+
# # │ 1 ┆ 3 ┆ null ┆ 7 │
|
93
|
+
# # │ 2 ┆ 4 ┆ 5 ┆ null │
|
94
|
+
# # │ 3 ┆ null ┆ 6 ┆ 8 │
|
95
|
+
# # └─────┴──────┴──────┴──────┘
|
35
96
|
def concat(items, rechunk: true, how: "vertical", parallel: true)
|
36
|
-
|
97
|
+
elems = items.to_a
|
98
|
+
|
99
|
+
if elems.empty?
|
37
100
|
raise ArgumentError, "cannot concat empty list"
|
38
101
|
end
|
39
102
|
|
40
|
-
|
103
|
+
if how == "align"
|
104
|
+
if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
|
105
|
+
msg = "'align' strategy is not supported for #{elems[0].class.name}"
|
106
|
+
raise TypeError, msg
|
107
|
+
end
|
108
|
+
|
109
|
+
# establish common columns, maintaining the order in which they appear
|
110
|
+
all_columns = elems.flat_map { |e| e.collect_schema.names }
|
111
|
+
key = all_columns.uniq.map.with_index.to_h
|
112
|
+
common_cols = elems.map { |e| e.collect_schema.names }
|
113
|
+
.reduce { |x, y| Set.new(x) & Set.new(y) }
|
114
|
+
.sort_by { |k| key[k] }
|
115
|
+
# we require at least one key column for 'align'
|
116
|
+
if common_cols.empty?
|
117
|
+
msg = "'align' strategy requires at least one common column"
|
118
|
+
raise InvalidOperationError, msg
|
119
|
+
end
|
120
|
+
|
121
|
+
# align the frame data using a full outer join with no suffix-resolution
|
122
|
+
# (so we raise an error in case of column collision, like "horizontal")
|
123
|
+
lf = elems.map { |df| df.lazy }.reduce do |x, y|
|
124
|
+
x.join(
|
125
|
+
y,
|
126
|
+
how: "full",
|
127
|
+
on: common_cols,
|
128
|
+
suffix: "_PL_CONCAT_RIGHT",
|
129
|
+
maintain_order: "right_left"
|
130
|
+
)
|
131
|
+
# Coalesce full outer join columns
|
132
|
+
.with_columns(
|
133
|
+
common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
|
134
|
+
)
|
135
|
+
.drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
|
136
|
+
end.sort(common_cols)
|
137
|
+
|
138
|
+
eager = elems[0].is_a?(DataFrame)
|
139
|
+
return eager ? lf.collect : lf
|
140
|
+
end
|
141
|
+
|
142
|
+
first = elems[0]
|
143
|
+
|
41
144
|
if first.is_a?(DataFrame)
|
42
145
|
if how == "vertical"
|
43
|
-
out = Utils.wrap_df(Plr.concat_df(
|
146
|
+
out = Utils.wrap_df(Plr.concat_df(elems))
|
147
|
+
elsif how == "vertical_relaxed"
|
148
|
+
out = Utils.wrap_ldf(
|
149
|
+
Plr.concat_lf(
|
150
|
+
elems.map { |df| df.lazy },
|
151
|
+
rechunk,
|
152
|
+
parallel,
|
153
|
+
true
|
154
|
+
)
|
155
|
+
).collect(no_optimization: true)
|
44
156
|
elsif how == "diagonal"
|
45
|
-
out = Utils.wrap_df(Plr.concat_df_diagonal(
|
157
|
+
out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
|
158
|
+
elsif how == "diagonal_relaxed"
|
159
|
+
out = Utils.wrap_ldf(
|
160
|
+
Plr.concat_lf_diagonal(
|
161
|
+
elems.map { |df| df.lazy },
|
162
|
+
rechunk,
|
163
|
+
parallel,
|
164
|
+
true
|
165
|
+
)
|
166
|
+
).collect(no_optimization: true)
|
46
167
|
elsif how == "horizontal"
|
47
|
-
out = Utils.wrap_df(Plr.concat_df_horizontal(
|
168
|
+
out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
|
48
169
|
else
|
49
|
-
raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
|
170
|
+
raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
|
50
171
|
end
|
51
172
|
elsif first.is_a?(LazyFrame)
|
52
173
|
if how == "vertical"
|
53
|
-
return Utils.wrap_ldf(Plr.concat_lf(
|
174
|
+
return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
|
54
175
|
elsif how == "vertical_relaxed"
|
55
|
-
return Utils.wrap_ldf(Plr.concat_lf(
|
176
|
+
return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
|
56
177
|
elsif how == "diagonal"
|
57
|
-
return Utils.wrap_ldf(Plr.concat_lf_diagonal(
|
178
|
+
return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
|
179
|
+
elsif how == "diagonal_relaxed"
|
180
|
+
return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
|
181
|
+
elsif how == "horizontal"
|
182
|
+
return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel))
|
58
183
|
else
|
59
|
-
raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and '
|
184
|
+
raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
|
60
185
|
end
|
61
186
|
elsif first.is_a?(Series)
|
62
|
-
|
63
|
-
|
187
|
+
if how == "vertical"
|
188
|
+
out = Utils.wrap_s(Plr.concat_series(elems))
|
189
|
+
else
|
190
|
+
msg = "Series only supports 'vertical' concat strategy"
|
191
|
+
raise ArgumentError, msg
|
192
|
+
end
|
64
193
|
elsif first.is_a?(Expr)
|
65
194
|
out = first
|
66
|
-
|
195
|
+
elems[1..-1].each do |e|
|
67
196
|
out = out.append(e)
|
68
197
|
end
|
69
198
|
else
|
data/lib/polars/io/database.rb
CHANGED
@@ -51,8 +51,25 @@ module Polars
|
|
51
51
|
when :decimal
|
52
52
|
Decimal
|
53
53
|
when :float
|
54
|
+
# TODO uncomment in future release
|
55
|
+
# if column_type.limit && column_type.limit <= 24
|
56
|
+
# Float32
|
57
|
+
# else
|
58
|
+
# Float64
|
59
|
+
# end
|
54
60
|
Float64
|
55
61
|
when :integer
|
62
|
+
# TODO uncomment in future release
|
63
|
+
# case column_type.limit
|
64
|
+
# when 1
|
65
|
+
# Int8
|
66
|
+
# when 2
|
67
|
+
# Int16
|
68
|
+
# when 4
|
69
|
+
# Int32
|
70
|
+
# else
|
71
|
+
# Int64
|
72
|
+
# end
|
56
73
|
Int64
|
57
74
|
when :string, :text
|
58
75
|
String
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -433,7 +433,10 @@ module Polars
|
|
433
433
|
no_optimization: false,
|
434
434
|
slice_pushdown: true,
|
435
435
|
storage_options: nil,
|
436
|
-
retries: 2
|
436
|
+
retries: 2,
|
437
|
+
sync_on_close: nil,
|
438
|
+
mkdir: false,
|
439
|
+
lazy: false
|
437
440
|
)
|
438
441
|
lf = _set_sink_optimizations(
|
439
442
|
type_coercion: type_coercion,
|
@@ -468,17 +471,30 @@ module Polars
|
|
468
471
|
storage_options = nil
|
469
472
|
end
|
470
473
|
|
471
|
-
|
474
|
+
sink_options = {
|
475
|
+
"sync_on_close" => sync_on_close || "none",
|
476
|
+
"maintain_order" => maintain_order,
|
477
|
+
"mkdir" => mkdir
|
478
|
+
}
|
479
|
+
|
480
|
+
lf = lf.sink_parquet(
|
472
481
|
path,
|
473
482
|
compression,
|
474
483
|
compression_level,
|
475
484
|
statistics,
|
476
485
|
row_group_size,
|
477
486
|
data_pagesize_limit,
|
478
|
-
maintain_order,
|
479
487
|
storage_options,
|
480
|
-
retries
|
488
|
+
retries,
|
489
|
+
sink_options
|
481
490
|
)
|
491
|
+
lf = LazyFrame._from_rbldf(lf)
|
492
|
+
|
493
|
+
if !lazy
|
494
|
+
lf.collect
|
495
|
+
return nil
|
496
|
+
end
|
497
|
+
lf
|
482
498
|
end
|
483
499
|
|
484
500
|
# Evaluate the query in streaming mode and write to an IPC file.
|
@@ -520,7 +536,10 @@ module Polars
|
|
520
536
|
projection_pushdown: true,
|
521
537
|
simplify_expression: true,
|
522
538
|
slice_pushdown: true,
|
523
|
-
no_optimization: false
|
539
|
+
no_optimization: false,
|
540
|
+
sync_on_close: nil,
|
541
|
+
mkdir: false,
|
542
|
+
lazy: false
|
524
543
|
)
|
525
544
|
# TODO support storage options in Rust
|
526
545
|
storage_options = nil
|
@@ -541,13 +560,26 @@ module Polars
|
|
541
560
|
storage_options = nil
|
542
561
|
end
|
543
562
|
|
544
|
-
|
563
|
+
sink_options = {
|
564
|
+
"sync_on_close" => sync_on_close || "none",
|
565
|
+
"maintain_order" => maintain_order,
|
566
|
+
"mkdir" => mkdir
|
567
|
+
}
|
568
|
+
|
569
|
+
lf = lf.sink_ipc(
|
545
570
|
path,
|
546
571
|
compression,
|
547
|
-
maintain_order,
|
548
572
|
storage_options,
|
549
|
-
retries
|
573
|
+
retries,
|
574
|
+
sink_options
|
550
575
|
)
|
576
|
+
lf = LazyFrame._from_rbldf(lf)
|
577
|
+
|
578
|
+
if !lazy
|
579
|
+
lf.collect
|
580
|
+
return nil
|
581
|
+
end
|
582
|
+
lf
|
551
583
|
end
|
552
584
|
|
553
585
|
# Evaluate the query in streaming mode and write to a CSV file.
|
@@ -619,6 +651,10 @@ module Polars
|
|
619
651
|
# Slice pushdown optimization.
|
620
652
|
# @param no_optimization [Boolean]
|
621
653
|
# Turn off (certain) optimizations.
|
654
|
+
# @param storage_options [Object]
|
655
|
+
# Options that indicate how to connect to a cloud provider.
|
656
|
+
# @param retries [Integer]
|
657
|
+
# Number of retries if accessing a cloud instance fails.
|
622
658
|
#
|
623
659
|
# @return [DataFrame]
|
624
660
|
#
|
@@ -646,7 +682,12 @@ module Polars
|
|
646
682
|
projection_pushdown: true,
|
647
683
|
simplify_expression: true,
|
648
684
|
slice_pushdown: true,
|
649
|
-
no_optimization: false
|
685
|
+
no_optimization: false,
|
686
|
+
storage_options: nil,
|
687
|
+
retries: 2,
|
688
|
+
sync_on_close: nil,
|
689
|
+
mkdir: false,
|
690
|
+
lazy: false
|
650
691
|
)
|
651
692
|
Utils._check_arg_is_1byte("separator", separator, false)
|
652
693
|
Utils._check_arg_is_1byte("quote_char", quote_char, false)
|
@@ -660,7 +701,19 @@ module Polars
|
|
660
701
|
no_optimization: no_optimization
|
661
702
|
)
|
662
703
|
|
663
|
-
|
704
|
+
if storage_options&.any?
|
705
|
+
storage_options = storage_options.to_a
|
706
|
+
else
|
707
|
+
storage_options = nil
|
708
|
+
end
|
709
|
+
|
710
|
+
sink_options = {
|
711
|
+
"sync_on_close" => sync_on_close || "none",
|
712
|
+
"maintain_order" => maintain_order,
|
713
|
+
"mkdir" => mkdir
|
714
|
+
}
|
715
|
+
|
716
|
+
lf = lf.sink_csv(
|
664
717
|
path,
|
665
718
|
include_bom,
|
666
719
|
include_header,
|
@@ -675,8 +728,17 @@ module Polars
|
|
675
728
|
float_precision,
|
676
729
|
null_value,
|
677
730
|
quote_style,
|
678
|
-
|
731
|
+
storage_options,
|
732
|
+
retries,
|
733
|
+
sink_options
|
679
734
|
)
|
735
|
+
lf = LazyFrame._from_rbldf(lf)
|
736
|
+
|
737
|
+
if !lazy
|
738
|
+
lf.collect
|
739
|
+
return nil
|
740
|
+
end
|
741
|
+
lf
|
680
742
|
end
|
681
743
|
|
682
744
|
# Evaluate the query in streaming mode and write to an NDJSON file.
|
@@ -716,7 +778,10 @@ module Polars
|
|
716
778
|
slice_pushdown: true,
|
717
779
|
no_optimization: false,
|
718
780
|
storage_options: nil,
|
719
|
-
retries: 2
|
781
|
+
retries: 2,
|
782
|
+
sync_on_close: nil,
|
783
|
+
mkdir: false,
|
784
|
+
lazy: false
|
720
785
|
)
|
721
786
|
lf = _set_sink_optimizations(
|
722
787
|
type_coercion: type_coercion,
|
@@ -733,7 +798,20 @@ module Polars
|
|
733
798
|
storage_options = nil
|
734
799
|
end
|
735
800
|
|
736
|
-
|
801
|
+
sink_options = {
|
802
|
+
"sync_on_close" => sync_on_close || "none",
|
803
|
+
"maintain_order" => maintain_order,
|
804
|
+
"mkdir" => mkdir
|
805
|
+
}
|
806
|
+
|
807
|
+
lf = lf.sink_json(path, storage_options, retries, sink_options)
|
808
|
+
lf = LazyFrame._from_rbldf(lf)
|
809
|
+
|
810
|
+
if !lazy
|
811
|
+
lf.collect
|
812
|
+
return nil
|
813
|
+
end
|
814
|
+
lf
|
737
815
|
end
|
738
816
|
|
739
817
|
# @private
|
@@ -1923,6 +2001,24 @@ module Polars
|
|
1923
2001
|
# - true: -> Always coalesce join columns.
|
1924
2002
|
# - false: -> Never coalesce join columns.
|
1925
2003
|
# Note that joining on any other expressions than `col` will turn off coalescing.
|
2004
|
+
# @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
|
2005
|
+
# Which DataFrame row order to preserve, if any.
|
2006
|
+
# Do not rely on any observed ordering without explicitly
|
2007
|
+
# setting this parameter, as your code may break in a future release.
|
2008
|
+
# Not specifying any ordering can improve performance
|
2009
|
+
# Supported for inner, left, right and full joins
|
2010
|
+
#
|
2011
|
+
# * *none*
|
2012
|
+
# No specific ordering is desired. The ordering might differ across
|
2013
|
+
# Polars versions or even between different runs.
|
2014
|
+
# * *left*
|
2015
|
+
# Preserves the order of the left DataFrame.
|
2016
|
+
# * *right*
|
2017
|
+
# Preserves the order of the right DataFrame.
|
2018
|
+
# * *left_right*
|
2019
|
+
# First preserves the order of the left DataFrame, then the right.
|
2020
|
+
# * *right_left*
|
2021
|
+
# First preserves the order of the right DataFrame, then the left.
|
1926
2022
|
#
|
1927
2023
|
# @return [LazyFrame]
|
1928
2024
|
#
|
@@ -2016,18 +2112,33 @@ module Polars
|
|
2016
2112
|
join_nulls: false,
|
2017
2113
|
allow_parallel: true,
|
2018
2114
|
force_parallel: false,
|
2019
|
-
coalesce: nil
|
2115
|
+
coalesce: nil,
|
2116
|
+
maintain_order: nil
|
2020
2117
|
)
|
2021
2118
|
if !other.is_a?(LazyFrame)
|
2022
2119
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
2023
2120
|
end
|
2024
2121
|
|
2122
|
+
if maintain_order.nil?
|
2123
|
+
maintain_order = "none"
|
2124
|
+
end
|
2125
|
+
|
2025
2126
|
if how == "outer"
|
2026
2127
|
how = "full"
|
2027
2128
|
elsif how == "cross"
|
2028
2129
|
return _from_rbldf(
|
2029
2130
|
_ldf.join(
|
2030
|
-
other._ldf,
|
2131
|
+
other._ldf,
|
2132
|
+
[],
|
2133
|
+
[],
|
2134
|
+
allow_parallel,
|
2135
|
+
join_nulls,
|
2136
|
+
force_parallel,
|
2137
|
+
how,
|
2138
|
+
suffix,
|
2139
|
+
validate,
|
2140
|
+
maintain_order,
|
2141
|
+
coalesce
|
2031
2142
|
)
|
2032
2143
|
)
|
2033
2144
|
end
|
@@ -2054,6 +2165,7 @@ module Polars
|
|
2054
2165
|
how,
|
2055
2166
|
suffix,
|
2056
2167
|
validate,
|
2168
|
+
maintain_order,
|
2057
2169
|
coalesce
|
2058
2170
|
)
|
2059
2171
|
)
|
@@ -3347,12 +3459,17 @@ module Polars
|
|
3347
3459
|
_from_rbldf(_ldf.merge_sorted(other._ldf, key))
|
3348
3460
|
end
|
3349
3461
|
|
3350
|
-
#
|
3462
|
+
# Flag a column as sorted.
|
3463
|
+
#
|
3464
|
+
# This can speed up future operations.
|
3465
|
+
#
|
3466
|
+
# @note
|
3467
|
+
# This can lead to incorrect results if the data is NOT sorted! Use with care!
|
3351
3468
|
#
|
3352
3469
|
# @param column [Object]
|
3353
|
-
#
|
3470
|
+
# Column that is sorted.
|
3354
3471
|
# @param descending [Boolean]
|
3355
|
-
# Whether the
|
3472
|
+
# Whether the column is sorted in descending order.
|
3356
3473
|
#
|
3357
3474
|
# @return [LazyFrame]
|
3358
3475
|
def set_sorted(
|
data/lib/polars/list_expr.rb
CHANGED
@@ -403,7 +403,7 @@ module Polars
|
|
403
403
|
# The indices may be defined in a single column, or by sublists in another
|
404
404
|
# column of dtype `List`.
|
405
405
|
#
|
406
|
-
# @param
|
406
|
+
# @param indices [Object]
|
407
407
|
# Indices to return per sublist
|
408
408
|
# @param null_on_oob [Boolean]
|
409
409
|
# Behavior if an index is out of bounds:
|
@@ -427,12 +427,9 @@ module Polars
|
|
427
427
|
# # │ [] ┆ [null, null] │
|
428
428
|
# # │ [1, 2, … 5] ┆ [1, 5] │
|
429
429
|
# # └─────────────┴──────────────┘
|
430
|
-
def gather(
|
431
|
-
|
432
|
-
|
433
|
-
end
|
434
|
-
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
|
-
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
430
|
+
def gather(indices, null_on_oob: false)
|
431
|
+
indices = Utils.parse_into_expression(indices)
|
432
|
+
Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
|
436
433
|
end
|
437
434
|
alias_method :take, :gather
|
438
435
|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Polars
|
2
|
+
class Schema
|
3
|
+
def initialize(schema, check_dtypes: true)
|
4
|
+
raise Todo if check_dtypes
|
5
|
+
@schema = schema.to_h
|
6
|
+
end
|
7
|
+
|
8
|
+
def [](key)
|
9
|
+
@schema[key]
|
10
|
+
end
|
11
|
+
|
12
|
+
def names
|
13
|
+
@schema.keys
|
14
|
+
end
|
15
|
+
|
16
|
+
def dtypes
|
17
|
+
@schema.values
|
18
|
+
end
|
19
|
+
|
20
|
+
def length
|
21
|
+
@schema.length
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"#{self.class.name}(#{@schema})"
|
26
|
+
end
|
27
|
+
alias_method :inspect, :to_s
|
28
|
+
end
|
29
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -2554,29 +2554,7 @@ module Polars
|
|
2554
2554
|
# # Numo::Int64#shape=[3]
|
2555
2555
|
# # [1, 2, 3]
|
2556
2556
|
def to_numo
|
2557
|
-
if
|
2558
|
-
if is_datelike
|
2559
|
-
Numo::RObject.cast(to_a)
|
2560
|
-
elsif is_numeric
|
2561
|
-
# TODO make more efficient
|
2562
|
-
{
|
2563
|
-
UInt8 => Numo::UInt8,
|
2564
|
-
UInt16 => Numo::UInt16,
|
2565
|
-
UInt32 => Numo::UInt32,
|
2566
|
-
UInt64 => Numo::UInt64,
|
2567
|
-
Int8 => Numo::Int8,
|
2568
|
-
Int16 => Numo::Int16,
|
2569
|
-
Int32 => Numo::Int32,
|
2570
|
-
Int64 => Numo::Int64,
|
2571
|
-
Float32 => Numo::SFloat,
|
2572
|
-
Float64 => Numo::DFloat
|
2573
|
-
}.fetch(dtype.class).cast(to_a)
|
2574
|
-
elsif is_boolean
|
2575
|
-
Numo::Bit.cast(to_a)
|
2576
|
-
else
|
2577
|
-
_s.to_numo
|
2578
|
-
end
|
2579
|
-
elsif is_datelike
|
2557
|
+
if is_datelike
|
2580
2558
|
Numo::RObject.cast(to_a)
|
2581
2559
|
else
|
2582
2560
|
_s.to_numo
|
@@ -3599,24 +3577,26 @@ module Polars
|
|
3599
3577
|
# Integer size of the rolling window.
|
3600
3578
|
# @param bias [Boolean]
|
3601
3579
|
# If false, the calculations are corrected for statistical bias.
|
3580
|
+
# @param min_samples [Integer]
|
3581
|
+
# The number of values in the window that should be non-null before computing
|
3582
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
3583
|
+
# @param center [Boolean]
|
3584
|
+
# Set the labels at the center of the window.
|
3602
3585
|
#
|
3603
3586
|
# @return [Series]
|
3604
3587
|
#
|
3605
3588
|
# @example
|
3606
|
-
#
|
3607
|
-
# s.rolling_skew(3)
|
3589
|
+
# Polars::Series.new([1, 4, 2, 9]).rolling_skew(3)
|
3608
3590
|
# # =>
|
3609
|
-
# # shape: (
|
3610
|
-
# # Series: '
|
3591
|
+
# # shape: (4,)
|
3592
|
+
# # Series: '' [f64]
|
3611
3593
|
# # [
|
3612
3594
|
# # null
|
3613
3595
|
# # null
|
3614
|
-
# # 0.0
|
3615
|
-
# # 0.0
|
3616
3596
|
# # 0.381802
|
3617
|
-
# # 0.
|
3597
|
+
# # 0.47033
|
3618
3598
|
# # ]
|
3619
|
-
def rolling_skew(window_size, bias: true)
|
3599
|
+
def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
|
3620
3600
|
super
|
3621
3601
|
end
|
3622
3602
|
|
@@ -3815,6 +3795,30 @@ module Polars
|
|
3815
3795
|
super
|
3816
3796
|
end
|
3817
3797
|
|
3798
|
+
# Fill null values using interpolation based on another column.
|
3799
|
+
#
|
3800
|
+
# @param by [Expr]
|
3801
|
+
# Column to interpolate values based on.
|
3802
|
+
#
|
3803
|
+
# @return [Series]
|
3804
|
+
#
|
3805
|
+
# @example Fill null values using linear interpolation.
|
3806
|
+
# s = Polars::Series.new("a", [1, nil, nil, 3])
|
3807
|
+
# by = Polars::Series.new("b", [1, 2, 7, 8])
|
3808
|
+
# s.interpolate_by(by)
|
3809
|
+
# # =>
|
3810
|
+
# # shape: (4,)
|
3811
|
+
# # Series: 'a' [f64]
|
3812
|
+
# # [
|
3813
|
+
# # 1.0
|
3814
|
+
# # 1.285714
|
3815
|
+
# # 2.714286
|
3816
|
+
# # 3.0
|
3817
|
+
# # ]
|
3818
|
+
def interpolate_by(by)
|
3819
|
+
super
|
3820
|
+
end
|
3821
|
+
|
3818
3822
|
# Compute absolute values.
|
3819
3823
|
#
|
3820
3824
|
# @return [Series]
|
@@ -4041,7 +4045,7 @@ module Polars
|
|
4041
4045
|
#
|
4042
4046
|
# @example
|
4043
4047
|
# s.kurtosis(fisher: false, bias: false)
|
4044
|
-
# # => 2.
|
4048
|
+
# # => 2.1040361802642717
|
4045
4049
|
def kurtosis(fisher: true, bias: true)
|
4046
4050
|
_s.kurtosis(fisher, bias)
|
4047
4051
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -61,6 +61,7 @@ require_relative "polars/list_name_space"
|
|
61
61
|
require_relative "polars/meta_expr"
|
62
62
|
require_relative "polars/name_expr"
|
63
63
|
require_relative "polars/rolling_group_by"
|
64
|
+
require_relative "polars/schema"
|
64
65
|
require_relative "polars/selectors"
|
65
66
|
require_relative "polars/series"
|
66
67
|
require_relative "polars/slice"
|