polars-df 0.10.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
data/lib/polars/lazy_frame.rb
CHANGED
@@ -27,149 +27,6 @@ module Polars
|
|
27
27
|
ldf
|
28
28
|
end
|
29
29
|
|
30
|
-
# @private
|
31
|
-
def self._scan_csv(
|
32
|
-
file,
|
33
|
-
has_header: true,
|
34
|
-
sep: ",",
|
35
|
-
comment_char: nil,
|
36
|
-
quote_char: '"',
|
37
|
-
skip_rows: 0,
|
38
|
-
dtypes: nil,
|
39
|
-
null_values: nil,
|
40
|
-
ignore_errors: false,
|
41
|
-
cache: true,
|
42
|
-
with_column_names: nil,
|
43
|
-
infer_schema_length: 100,
|
44
|
-
n_rows: nil,
|
45
|
-
encoding: "utf8",
|
46
|
-
low_memory: false,
|
47
|
-
rechunk: true,
|
48
|
-
skip_rows_after_header: 0,
|
49
|
-
row_count_name: nil,
|
50
|
-
row_count_offset: 0,
|
51
|
-
parse_dates: false,
|
52
|
-
eol_char: "\n",
|
53
|
-
truncate_ragged_lines: true
|
54
|
-
)
|
55
|
-
dtype_list = nil
|
56
|
-
if !dtypes.nil?
|
57
|
-
dtype_list = []
|
58
|
-
dtypes.each do |k, v|
|
59
|
-
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
60
|
-
end
|
61
|
-
end
|
62
|
-
processed_null_values = Utils._process_null_values(null_values)
|
63
|
-
|
64
|
-
_from_rbldf(
|
65
|
-
RbLazyFrame.new_from_csv(
|
66
|
-
file,
|
67
|
-
sep,
|
68
|
-
has_header,
|
69
|
-
ignore_errors,
|
70
|
-
skip_rows,
|
71
|
-
n_rows,
|
72
|
-
cache,
|
73
|
-
dtype_list,
|
74
|
-
low_memory,
|
75
|
-
comment_char,
|
76
|
-
quote_char,
|
77
|
-
processed_null_values,
|
78
|
-
infer_schema_length,
|
79
|
-
with_column_names,
|
80
|
-
rechunk,
|
81
|
-
skip_rows_after_header,
|
82
|
-
encoding,
|
83
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
84
|
-
parse_dates,
|
85
|
-
eol_char,
|
86
|
-
truncate_ragged_lines
|
87
|
-
)
|
88
|
-
)
|
89
|
-
end
|
90
|
-
|
91
|
-
# @private
|
92
|
-
def self._scan_parquet(
|
93
|
-
file,
|
94
|
-
n_rows: nil,
|
95
|
-
cache: true,
|
96
|
-
parallel: "auto",
|
97
|
-
rechunk: true,
|
98
|
-
row_count_name: nil,
|
99
|
-
row_count_offset: 0,
|
100
|
-
storage_options: nil,
|
101
|
-
low_memory: false,
|
102
|
-
use_statistics: true,
|
103
|
-
hive_partitioning: true
|
104
|
-
)
|
105
|
-
_from_rbldf(
|
106
|
-
RbLazyFrame.new_from_parquet(
|
107
|
-
file,
|
108
|
-
[],
|
109
|
-
n_rows,
|
110
|
-
cache,
|
111
|
-
parallel,
|
112
|
-
rechunk,
|
113
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
114
|
-
low_memory,
|
115
|
-
use_statistics,
|
116
|
-
hive_partitioning,
|
117
|
-
nil
|
118
|
-
)
|
119
|
-
)
|
120
|
-
end
|
121
|
-
|
122
|
-
# @private
|
123
|
-
def self._scan_ipc(
|
124
|
-
file,
|
125
|
-
n_rows: nil,
|
126
|
-
cache: true,
|
127
|
-
rechunk: true,
|
128
|
-
row_count_name: nil,
|
129
|
-
row_count_offset: 0,
|
130
|
-
storage_options: nil,
|
131
|
-
memory_map: true
|
132
|
-
)
|
133
|
-
if Utils.pathlike?(file)
|
134
|
-
file = Utils.normalise_filepath(file)
|
135
|
-
end
|
136
|
-
|
137
|
-
_from_rbldf(
|
138
|
-
RbLazyFrame.new_from_ipc(
|
139
|
-
file,
|
140
|
-
n_rows,
|
141
|
-
cache,
|
142
|
-
rechunk,
|
143
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
144
|
-
memory_map
|
145
|
-
)
|
146
|
-
)
|
147
|
-
end
|
148
|
-
|
149
|
-
# @private
|
150
|
-
def self._scan_ndjson(
|
151
|
-
file,
|
152
|
-
infer_schema_length: nil,
|
153
|
-
batch_size: nil,
|
154
|
-
n_rows: nil,
|
155
|
-
low_memory: false,
|
156
|
-
rechunk: true,
|
157
|
-
row_count_name: nil,
|
158
|
-
row_count_offset: 0
|
159
|
-
)
|
160
|
-
_from_rbldf(
|
161
|
-
RbLazyFrame.new_from_ndjson(
|
162
|
-
file,
|
163
|
-
infer_schema_length,
|
164
|
-
batch_size,
|
165
|
-
n_rows,
|
166
|
-
low_memory,
|
167
|
-
rechunk,
|
168
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset)
|
169
|
-
)
|
170
|
-
)
|
171
|
-
end
|
172
|
-
|
173
30
|
# def self.from_json
|
174
31
|
# end
|
175
32
|
|
@@ -181,7 +38,7 @@ module Polars
|
|
181
38
|
# @return [LazyFrame]
|
182
39
|
def self.read_json(file)
|
183
40
|
if Utils.pathlike?(file)
|
184
|
-
file = Utils.
|
41
|
+
file = Utils.normalize_filepath(file)
|
185
42
|
end
|
186
43
|
|
187
44
|
Utils.wrap_ldf(RbLazyFrame.read_json(file))
|
@@ -288,7 +145,7 @@ module Polars
|
|
288
145
|
# @return [nil]
|
289
146
|
def write_json(file)
|
290
147
|
if Utils.pathlike?(file)
|
291
|
-
file = Utils.
|
148
|
+
file = Utils.normalize_filepath(file)
|
292
149
|
end
|
293
150
|
_ldf.write_json(file)
|
294
151
|
nil
|
@@ -1137,7 +994,7 @@ module Polars
|
|
1137
994
|
# # ┌─────────┐
|
1138
995
|
# # │ literal │
|
1139
996
|
# # │ --- │
|
1140
|
-
# # │
|
997
|
+
# # │ i32 │
|
1141
998
|
# # ╞═════════╡
|
1142
999
|
# # │ 0 │
|
1143
1000
|
# # │ 0 │
|
@@ -1449,16 +1306,16 @@ module Polars
|
|
1449
1306
|
# )
|
1450
1307
|
# # =>
|
1451
1308
|
# # shape: (4, 3)
|
1452
|
-
# #
|
1453
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
1454
|
-
# # │ --- ┆ --- ┆ ---
|
1455
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
1456
|
-
# #
|
1457
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12
|
1458
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12
|
1459
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12
|
1460
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
1461
|
-
# #
|
1309
|
+
# # ┌─────────────────────┬────────────┬─────────────────────────────────┐
|
1310
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1311
|
+
# # │ --- ┆ --- ┆ --- │
|
1312
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1313
|
+
# # ╞═════════════════════╪════════════╪═════════════════════════════════╡
|
1314
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-… │
|
1315
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-… │
|
1316
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-… │
|
1317
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1318
|
+
# # └─────────────────────┴────────────┴─────────────────────────────────┘
|
1462
1319
|
#
|
1463
1320
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
1464
1321
|
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -2236,16 +2093,16 @@ module Polars
|
|
2236
2093
|
# df.with_row_index.collect
|
2237
2094
|
# # =>
|
2238
2095
|
# # shape: (3, 3)
|
2239
|
-
# #
|
2240
|
-
# # │
|
2241
|
-
# # │ ---
|
2242
|
-
# # │ u32
|
2243
|
-
# #
|
2244
|
-
# # │ 0
|
2245
|
-
# # │ 1
|
2246
|
-
# # │ 2
|
2247
|
-
# #
|
2248
|
-
def with_row_index(name: "
|
2096
|
+
# # ┌───────┬─────┬─────┐
|
2097
|
+
# # │ index ┆ a ┆ b │
|
2098
|
+
# # │ --- ┆ --- ┆ --- │
|
2099
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
2100
|
+
# # ╞═══════╪═════╪═════╡
|
2101
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
2102
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
2103
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
2104
|
+
# # └───────┴─────┴─────┘
|
2105
|
+
def with_row_index(name: "index", offset: 0)
|
2249
2106
|
_from_rbldf(_ldf.with_row_index(name, offset))
|
2250
2107
|
end
|
2251
2108
|
alias_method :with_row_count, :with_row_index
|
data/lib/polars/lazy_group_by.rb
CHANGED
@@ -6,11 +6,108 @@ module Polars
|
|
6
6
|
@lgb = lgb
|
7
7
|
end
|
8
8
|
|
9
|
-
#
|
9
|
+
# Compute aggregations for each group of a group by operation.
|
10
|
+
#
|
11
|
+
# @param aggs [Array]
|
12
|
+
# Aggregations to compute for each group of the group by operation,
|
13
|
+
# specified as positional arguments.
|
14
|
+
# Accepts expression input. Strings are parsed as column names.
|
15
|
+
# @param named_aggs [Hash]
|
16
|
+
# Additional aggregations, specified as keyword arguments.
|
17
|
+
# The resulting columns will be renamed to the keyword used.
|
10
18
|
#
|
11
19
|
# @return [LazyFrame]
|
12
|
-
|
13
|
-
|
20
|
+
#
|
21
|
+
# @example Compute the aggregation of the columns for each group.
|
22
|
+
# ldf = Polars::DataFrame.new(
|
23
|
+
# {
|
24
|
+
# "a" => ["a", "b", "a", "b", "c"],
|
25
|
+
# "b" => [1, 2, 1, 3, 3],
|
26
|
+
# "c" => [5, 4, 3, 2, 1]
|
27
|
+
# }
|
28
|
+
# ).lazy
|
29
|
+
# ldf.group_by("a").agg(
|
30
|
+
# [Polars.col("b"), Polars.col("c")]
|
31
|
+
# ).collect
|
32
|
+
# # =>
|
33
|
+
# # shape: (3, 3)
|
34
|
+
# # ┌─────┬───────────┬───────────┐
|
35
|
+
# # │ a ┆ b ┆ c │
|
36
|
+
# # │ --- ┆ --- ┆ --- │
|
37
|
+
# # │ str ┆ list[i64] ┆ list[i64] │
|
38
|
+
# # ╞═════╪═══════════╪═══════════╡
|
39
|
+
# # │ a ┆ [1, 1] ┆ [5, 3] │
|
40
|
+
# # │ b ┆ [2, 3] ┆ [4, 2] │
|
41
|
+
# # │ c ┆ [3] ┆ [1] │
|
42
|
+
# # └─────┴───────────┴───────────┘
|
43
|
+
#
|
44
|
+
# @example Compute the sum of a column for each group.
|
45
|
+
# ldf.group_by("a").agg(
|
46
|
+
# Polars.col("b").sum
|
47
|
+
# ).collect
|
48
|
+
# # =>
|
49
|
+
# # shape: (3, 2)
|
50
|
+
# # ┌─────┬─────┐
|
51
|
+
# # │ a ┆ b │
|
52
|
+
# # │ --- ┆ --- │
|
53
|
+
# # │ str ┆ i64 │
|
54
|
+
# # ╞═════╪═════╡
|
55
|
+
# # │ a ┆ 2 │
|
56
|
+
# # │ b ┆ 5 │
|
57
|
+
# # │ c ┆ 3 │
|
58
|
+
# # └─────┴─────┘
|
59
|
+
#
|
60
|
+
# @example Compute multiple aggregates at once by passing a list of expressions.
|
61
|
+
# ldf.group_by("a").agg(
|
62
|
+
# [Polars.sum("b"), Polars.mean("c")]
|
63
|
+
# ).collect
|
64
|
+
# # =>
|
65
|
+
# # shape: (3, 3)
|
66
|
+
# # ┌─────┬─────┬─────┐
|
67
|
+
# # │ a ┆ b ┆ c │
|
68
|
+
# # │ --- ┆ --- ┆ --- │
|
69
|
+
# # │ str ┆ i64 ┆ f64 │
|
70
|
+
# # ╞═════╪═════╪═════╡
|
71
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
72
|
+
# # │ a ┆ 2 ┆ 4.0 │
|
73
|
+
# # │ b ┆ 5 ┆ 3.0 │
|
74
|
+
# # └─────┴─────┴─────┘
|
75
|
+
#
|
76
|
+
# @example Or use positional arguments to compute multiple aggregations in the same way.
|
77
|
+
# ldf.group_by("a").agg(
|
78
|
+
# Polars.sum("b").name.suffix("_sum"),
|
79
|
+
# (Polars.col("c") ** 2).mean.name.suffix("_mean_squared")
|
80
|
+
# ).collect
|
81
|
+
# # =>
|
82
|
+
# # shape: (3, 3)
|
83
|
+
# # ┌─────┬───────┬────────────────┐
|
84
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
85
|
+
# # │ --- ┆ --- ┆ --- │
|
86
|
+
# # │ str ┆ i64 ┆ f64 │
|
87
|
+
# # ╞═════╪═══════╪════════════════╡
|
88
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
89
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
90
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
91
|
+
# # └─────┴───────┴────────────────┘
|
92
|
+
#
|
93
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
94
|
+
# ldf.group_by("a").agg(
|
95
|
+
# b_sum: Polars.sum("b"),
|
96
|
+
# c_mean_squared: (Polars.col("c") ** 2).mean
|
97
|
+
# ).collect
|
98
|
+
# # =>
|
99
|
+
# # shape: (3, 3)
|
100
|
+
# # ┌─────┬───────┬────────────────┐
|
101
|
+
# # │ a ┆ b_sum ┆ c_mean_squared │
|
102
|
+
# # │ --- ┆ --- ┆ --- │
|
103
|
+
# # │ str ┆ i64 ┆ f64 │
|
104
|
+
# # ╞═════╪═══════╪════════════════╡
|
105
|
+
# # │ a ┆ 2 ┆ 17.0 │
|
106
|
+
# # │ c ┆ 3 ┆ 1.0 │
|
107
|
+
# # │ b ┆ 5 ┆ 10.0 │
|
108
|
+
# # └─────┴───────┴────────────────┘
|
109
|
+
def agg(*aggs, **named_aggs)
|
110
|
+
rbexprs = Utils.parse_as_list_of_expressions(*aggs, **named_aggs)
|
14
111
|
Utils.wrap_ldf(@lgb.agg(rbexprs))
|
15
112
|
end
|
16
113
|
|
@@ -25,12 +25,12 @@ module Polars
|
|
25
25
|
@check_sorted = check_sorted
|
26
26
|
end
|
27
27
|
|
28
|
-
def agg(aggs)
|
28
|
+
def agg(*aggs, **named_aggs)
|
29
29
|
@df.lazy
|
30
30
|
.group_by_rolling(
|
31
31
|
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
|
32
32
|
)
|
33
|
-
.agg(aggs)
|
33
|
+
.agg(*aggs, **named_aggs)
|
34
34
|
.collect(no_optimization: true, string_cache: false)
|
35
35
|
end
|
36
36
|
end
|
data/lib/polars/series.rb
CHANGED
@@ -1594,7 +1594,7 @@ module Polars
|
|
1594
1594
|
# # 4
|
1595
1595
|
# # 3
|
1596
1596
|
# # ]
|
1597
|
-
def top_k(k: 5)
|
1597
|
+
def top_k(k: 5, nulls_last: false, multithreaded: true)
|
1598
1598
|
super
|
1599
1599
|
end
|
1600
1600
|
|
@@ -1616,7 +1616,7 @@ module Polars
|
|
1616
1616
|
# # 2
|
1617
1617
|
# # 3
|
1618
1618
|
# # ]
|
1619
|
-
def bottom_k(k: 5)
|
1619
|
+
def bottom_k(k: 5, nulls_last: false, multithreaded: true)
|
1620
1620
|
super
|
1621
1621
|
end
|
1622
1622
|
|
data/lib/polars/string_expr.rb
CHANGED
@@ -840,6 +840,7 @@ module Polars
|
|
840
840
|
# # │ true │
|
841
841
|
# # └──────────┘
|
842
842
|
def json_path_match(json_path)
|
843
|
+
json_path = Utils.parse_as_expression(json_path, str_as_lit: true)
|
843
844
|
Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
|
844
845
|
end
|
845
846
|
|
@@ -1018,15 +1019,15 @@ module Polars
|
|
1018
1019
|
# )
|
1019
1020
|
# # =>
|
1020
1021
|
# # shape: (3, 3)
|
1021
|
-
# #
|
1022
|
-
# # │ url
|
1023
|
-
# # │ ---
|
1024
|
-
# # │ str
|
1025
|
-
# #
|
1026
|
-
# # │ http://vote.com/ballon_dor?
|
1027
|
-
# # │ http://vote.com/ballon_dor?
|
1028
|
-
# # │ http://vote.com/ballon_dor?
|
1029
|
-
# #
|
1022
|
+
# # ┌─────────────────────────────────┬───────────────────────┬──────────┐
|
1023
|
+
# # │ url ┆ captures ┆ name │
|
1024
|
+
# # │ --- ┆ --- ┆ --- │
|
1025
|
+
# # │ str ┆ struct[2] ┆ str │
|
1026
|
+
# # ╞═════════════════════════════════╪═══════════════════════╪══════════╡
|
1027
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"messi","python"} ┆ MESSI │
|
1028
|
+
# # │ http://vote.com/ballon_dor?can… ┆ {"weghorst","polars"} ┆ WEGHORST │
|
1029
|
+
# # │ http://vote.com/ballon_dor?err… ┆ {null,null} ┆ null │
|
1030
|
+
# # └─────────────────────────────────┴───────────────────────┴──────────┘
|
1030
1031
|
def extract_groups(pattern)
|
1031
1032
|
Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
|
1032
1033
|
end
|
@@ -1418,15 +1419,15 @@ module Polars
|
|
1418
1419
|
# )
|
1419
1420
|
# # =>
|
1420
1421
|
# # shape: (3, 2)
|
1421
|
-
# #
|
1422
|
-
# # │ lyrics
|
1423
|
-
# # │ ---
|
1424
|
-
# # │ str
|
1425
|
-
# #
|
1426
|
-
# # │ Everybody wants to rule the
|
1427
|
-
# # │ Tell me what you want, what
|
1428
|
-
# # │ Can you feel the love tonight
|
1429
|
-
# #
|
1422
|
+
# # ┌─────────────────────────────────┬──────────────┐
|
1423
|
+
# # │ lyrics ┆ contains_any │
|
1424
|
+
# # │ --- ┆ --- │
|
1425
|
+
# # │ str ┆ bool │
|
1426
|
+
# # ╞═════════════════════════════════╪══════════════╡
|
1427
|
+
# # │ Everybody wants to rule the wo… ┆ false │
|
1428
|
+
# # │ Tell me what you want, what yo… ┆ true │
|
1429
|
+
# # │ Can you feel the love tonight ┆ true │
|
1430
|
+
# # └─────────────────────────────────┴──────────────┘
|
1430
1431
|
def contains_any(patterns, ascii_case_insensitive: false)
|
1431
1432
|
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1432
1433
|
Utils.wrap_expr(
|
@@ -1468,15 +1469,15 @@ module Polars
|
|
1468
1469
|
# )
|
1469
1470
|
# # =>
|
1470
1471
|
# # shape: (3, 2)
|
1471
|
-
# #
|
1472
|
-
# # │ lyrics
|
1473
|
-
# # │ ---
|
1474
|
-
# # │ str
|
1475
|
-
# #
|
1476
|
-
# # │ Everybody wants to rule the
|
1477
|
-
# # │ Tell me what you want, what
|
1478
|
-
# # │ Can you feel the love tonight
|
1479
|
-
# #
|
1472
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1473
|
+
# # │ lyrics ┆ removes_pronouns │
|
1474
|
+
# # │ --- ┆ --- │
|
1475
|
+
# # │ str ┆ str │
|
1476
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1477
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1478
|
+
# # │ Tell me what you want, what yo… ┆ Tell what want, what really… │
|
1479
|
+
# # │ Can you feel the love tonight ┆ Can feel the love tonight │
|
1480
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1480
1481
|
#
|
1481
1482
|
# @example
|
1482
1483
|
# df.with_columns(
|
@@ -1489,15 +1490,15 @@ module Polars
|
|
1489
1490
|
# )
|
1490
1491
|
# # =>
|
1491
1492
|
# # shape: (3, 2)
|
1492
|
-
# #
|
1493
|
-
# # │ lyrics
|
1494
|
-
# # │ ---
|
1495
|
-
# # │ str
|
1496
|
-
# #
|
1497
|
-
# # │ Everybody wants to rule the
|
1498
|
-
# # │ Tell me what you want, what
|
1499
|
-
# # │ Can you feel the love tonight
|
1500
|
-
# #
|
1493
|
+
# # ┌─────────────────────────────────┬─────────────────────────────────┐
|
1494
|
+
# # │ lyrics ┆ confusing │
|
1495
|
+
# # │ --- ┆ --- │
|
1496
|
+
# # │ str ┆ str │
|
1497
|
+
# # ╞═════════════════════════════════╪═════════════════════════════════╡
|
1498
|
+
# # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
|
1499
|
+
# # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
|
1500
|
+
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
1501
|
+
# # └─────────────────────────────────┴─────────────────────────────────┘
|
1501
1502
|
def replace_many(patterns, replace_with, ascii_case_insensitive: false)
|
1502
1503
|
patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
|
1503
1504
|
replace_with = Utils.parse_as_expression(
|
data/lib/polars/utils.rb
CHANGED
@@ -139,7 +139,7 @@ module Polars
|
|
139
139
|
Polars.lit(value)
|
140
140
|
end
|
141
141
|
|
142
|
-
def self.
|
142
|
+
def self.normalize_filepath(path, check_not_directory: true)
|
143
143
|
path = File.expand_path(path)
|
144
144
|
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
145
145
|
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
@@ -418,5 +418,39 @@ module Polars
|
|
418
418
|
def self.parse_when_inputs(*predicates, **constraints)
|
419
419
|
parse_predicates_constraints_as_expression(*predicates, **constraints)
|
420
420
|
end
|
421
|
+
|
422
|
+
def self.parse_interval_argument(interval)
|
423
|
+
if interval.include?(" ")
|
424
|
+
interval = interval.gsub(" ", "")
|
425
|
+
end
|
426
|
+
interval.downcase
|
427
|
+
end
|
428
|
+
|
429
|
+
def self.validate_rolling_by_aggs_arguments(weights, center:)
|
430
|
+
if !weights.nil?
|
431
|
+
msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
|
432
|
+
raise InvalidOperationError, msg
|
433
|
+
end
|
434
|
+
if center
|
435
|
+
msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
|
436
|
+
raise InvalidOperationError, msg
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
def self.validate_rolling_aggs_arguments(window_size, closed)
|
441
|
+
if window_size.is_a?(::String)
|
442
|
+
begin
|
443
|
+
window_size = window_size.delete_suffix("i").to_i
|
444
|
+
rescue
|
445
|
+
msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
|
446
|
+
raise InvalidOperationError, msg
|
447
|
+
end
|
448
|
+
end
|
449
|
+
if !closed.nil?
|
450
|
+
msg = "`closed` is not supported in `rolling_*(...)` expression"
|
451
|
+
raise InvalidOperationError, msg
|
452
|
+
end
|
453
|
+
window_size
|
454
|
+
end
|
421
455
|
end
|
422
456
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -42,9 +42,17 @@ require_relative "polars/functions/whenthen"
|
|
42
42
|
require_relative "polars/functions/aggregation/horizontal"
|
43
43
|
require_relative "polars/functions/aggregation/vertical"
|
44
44
|
require_relative "polars/functions/range/date_range"
|
45
|
+
require_relative "polars/functions/range/datetime_range"
|
45
46
|
require_relative "polars/functions/range/int_range"
|
47
|
+
require_relative "polars/functions/range/time_range"
|
46
48
|
require_relative "polars/group_by"
|
47
|
-
require_relative "polars/io"
|
49
|
+
require_relative "polars/io/avro"
|
50
|
+
require_relative "polars/io/csv"
|
51
|
+
require_relative "polars/io/database"
|
52
|
+
require_relative "polars/io/ipc"
|
53
|
+
require_relative "polars/io/json"
|
54
|
+
require_relative "polars/io/ndjson"
|
55
|
+
require_relative "polars/io/parquet"
|
48
56
|
require_relative "polars/lazy_frame"
|
49
57
|
require_relative "polars/lazy_group_by"
|
50
58
|
require_relative "polars/list_expr"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -71,11 +71,19 @@ files:
|
|
71
71
|
- lib/polars/functions/lit.rb
|
72
72
|
- lib/polars/functions/random.rb
|
73
73
|
- lib/polars/functions/range/date_range.rb
|
74
|
+
- lib/polars/functions/range/datetime_range.rb
|
74
75
|
- lib/polars/functions/range/int_range.rb
|
76
|
+
- lib/polars/functions/range/time_range.rb
|
75
77
|
- lib/polars/functions/repeat.rb
|
76
78
|
- lib/polars/functions/whenthen.rb
|
77
79
|
- lib/polars/group_by.rb
|
78
|
-
- lib/polars/io.rb
|
80
|
+
- lib/polars/io/avro.rb
|
81
|
+
- lib/polars/io/csv.rb
|
82
|
+
- lib/polars/io/database.rb
|
83
|
+
- lib/polars/io/ipc.rb
|
84
|
+
- lib/polars/io/json.rb
|
85
|
+
- lib/polars/io/ndjson.rb
|
86
|
+
- lib/polars/io/parquet.rb
|
79
87
|
- lib/polars/lazy_frame.rb
|
80
88
|
- lib/polars/lazy_group_by.rb
|
81
89
|
- lib/polars/list_expr.rb
|
@@ -96,7 +104,7 @@ files:
|
|
96
104
|
- lib/polars/utils.rb
|
97
105
|
- lib/polars/version.rb
|
98
106
|
- lib/polars/whenthen.rb
|
99
|
-
homepage: https://github.com/ankane/polars
|
107
|
+
homepage: https://github.com/ankane/ruby-polars
|
100
108
|
licenses:
|
101
109
|
- MIT
|
102
110
|
metadata: {}
|