polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
module Polars
|
|
2
|
+
# Series.plot namespace.
|
|
3
|
+
class SeriesPlot
|
|
4
|
+
# @private
|
|
5
|
+
def initialize(s)
|
|
6
|
+
require "vega"
|
|
7
|
+
|
|
8
|
+
name = s.name || "value"
|
|
9
|
+
@df = s.to_frame(name)
|
|
10
|
+
@series_name = name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Draw histogram.
|
|
14
|
+
#
|
|
15
|
+
# @return [Vega::LiteChart]
|
|
16
|
+
def hist
|
|
17
|
+
encoding = {
|
|
18
|
+
x: {field: @series_name, bin: true},
|
|
19
|
+
y: {aggregate: "count"}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
Vega.lite
|
|
23
|
+
.data(@df.rows(named: true))
|
|
24
|
+
.mark(type: "bar", tooltip: true)
|
|
25
|
+
.encoding(encoding)
|
|
26
|
+
.config(axis: {labelFontSize: 12})
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Draw kernel density estimate plot.
|
|
30
|
+
#
|
|
31
|
+
# @return [Vega::LiteChart]
|
|
32
|
+
def kde
|
|
33
|
+
if @series_name == "density"
|
|
34
|
+
msg = "cannot use `plot.kde` when Series name is `'density'`"
|
|
35
|
+
raise ArgumentError, msg
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
encoding = {
|
|
39
|
+
x: {field: @series_name, type: "quantitative"},
|
|
40
|
+
y: {field: "density", type: "quantitative"}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
Vega.lite
|
|
44
|
+
.data(@df.rows(named: true))
|
|
45
|
+
.transform(density: @series_name, as: [@series_name, "density"])
|
|
46
|
+
.mark(type: "area", tooltip: true)
|
|
47
|
+
.encoding(encoding)
|
|
48
|
+
.config(axis: {labelFontSize: 12})
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Draw line plot.
|
|
52
|
+
#
|
|
53
|
+
# @return [Vega::LiteChart]
|
|
54
|
+
def line
|
|
55
|
+
if @series_name == "index"
|
|
56
|
+
msg = "cannot call `plot.line` when Series name is 'index'"
|
|
57
|
+
raise ArgumentError, msg
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
encoding = {
|
|
61
|
+
x: {field: "index", type: "quantitative"},
|
|
62
|
+
y: {field: @series_name, type: "quantitative"}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
Vega.lite
|
|
66
|
+
.data(@df.with_row_index.rows(named: true))
|
|
67
|
+
.mark(type: "line", tooltip: true)
|
|
68
|
+
.encoding(encoding)
|
|
69
|
+
.config(axis: {labelFontSize: 12})
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
data/lib/polars/slice.rb
CHANGED
|
@@ -12,7 +12,7 @@ module Polars
|
|
|
12
12
|
|
|
13
13
|
# check for fast-paths / single-operation calls
|
|
14
14
|
if @slice_length == 0
|
|
15
|
-
@obj.
|
|
15
|
+
@obj.clear
|
|
16
16
|
elsif @is_unbounded && [-1, 1].include?(@stride)
|
|
17
17
|
@stride < 0 ? @obj.reverse : @obj.clone
|
|
18
18
|
elsif @start >= 0 && @stop >= 0 && @stride == 1
|
data/lib/polars/sql_context.rb
CHANGED
|
@@ -5,11 +5,17 @@ module Polars
|
|
|
5
5
|
attr_accessor :_ctxt, :_eager_execution
|
|
6
6
|
|
|
7
7
|
# Initialize a new `SQLContext`.
|
|
8
|
-
def initialize(frames = nil,
|
|
8
|
+
def initialize(frames = nil, register_globals: false, eager: false, **named_frames)
|
|
9
|
+
Utils.issue_unstable_warning(
|
|
10
|
+
"`SQLContext` is considered **unstable**, although it is close to being considered stable."
|
|
11
|
+
)
|
|
9
12
|
self._ctxt = RbSQLContext.new
|
|
10
|
-
self._eager_execution =
|
|
13
|
+
self._eager_execution = eager
|
|
11
14
|
|
|
12
15
|
frames = (frames || {}).to_h
|
|
16
|
+
if register_globals
|
|
17
|
+
raise Todo
|
|
18
|
+
end
|
|
13
19
|
|
|
14
20
|
if frames.any? || named_frames.any?
|
|
15
21
|
register_many(frames, **named_frames)
|
|
@@ -37,7 +43,8 @@ module Polars
|
|
|
37
43
|
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
|
38
44
|
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
|
39
45
|
# ],
|
|
40
|
-
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
|
46
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"],
|
|
47
|
+
# orient: "row"
|
|
41
48
|
# )
|
|
42
49
|
# ctx = Polars::SQLContext.new(films: df)
|
|
43
50
|
# ctx.execute(
|
|
@@ -129,7 +136,7 @@ module Polars
|
|
|
129
136
|
# Named eager/lazy frames, provided as kwargs.
|
|
130
137
|
#
|
|
131
138
|
# @return [SQLContext]
|
|
132
|
-
def register_many(frames, **named_frames)
|
|
139
|
+
def register_many(frames = nil, **named_frames)
|
|
133
140
|
frames = (frames || {}).to_h
|
|
134
141
|
frames = frames.merge(named_frames)
|
|
135
142
|
frames.each do |name, frame|
|
data/lib/polars/string_expr.rb
CHANGED
|
@@ -154,9 +154,13 @@ module Polars
|
|
|
154
154
|
# - If false, allow the format to match anywhere in the target string.
|
|
155
155
|
# @param cache [Boolean]
|
|
156
156
|
# Use a cache of unique, converted dates to apply the datetime conversion.
|
|
157
|
-
# @param
|
|
158
|
-
#
|
|
159
|
-
#
|
|
157
|
+
# @param ambiguous ['raise', 'earliest', 'latest', 'null']
|
|
158
|
+
# Determine how to deal with ambiguous datetimes:
|
|
159
|
+
#
|
|
160
|
+
# - `'raise'` (default): raise
|
|
161
|
+
# - `'earliest'`: use the earliest datetime
|
|
162
|
+
# - `'latest'`: use the latest datetime
|
|
163
|
+
# - `'null'`: set to null
|
|
160
164
|
#
|
|
161
165
|
# @return [Expr]
|
|
162
166
|
#
|
|
@@ -203,7 +207,14 @@ module Polars
|
|
|
203
207
|
# # 2022-01-31
|
|
204
208
|
# # 2001-07-08
|
|
205
209
|
# # ]
|
|
206
|
-
def strptime(
|
|
210
|
+
def strptime(
|
|
211
|
+
dtype,
|
|
212
|
+
format = nil,
|
|
213
|
+
strict: true,
|
|
214
|
+
exact: true,
|
|
215
|
+
cache: true,
|
|
216
|
+
ambiguous: "raise"
|
|
217
|
+
)
|
|
207
218
|
_validate_format_argument(format)
|
|
208
219
|
|
|
209
220
|
if dtype == Date
|
|
@@ -212,7 +223,15 @@ module Polars
|
|
|
212
223
|
dtype = Datetime.new if dtype == Datetime
|
|
213
224
|
time_unit = dtype.time_unit
|
|
214
225
|
time_zone = dtype.time_zone
|
|
215
|
-
to_datetime(
|
|
226
|
+
to_datetime(
|
|
227
|
+
format,
|
|
228
|
+
time_unit: time_unit,
|
|
229
|
+
time_zone: time_zone,
|
|
230
|
+
strict: strict,
|
|
231
|
+
exact: exact,
|
|
232
|
+
cache: cache,
|
|
233
|
+
ambiguous: ambiguous
|
|
234
|
+
)
|
|
216
235
|
elsif dtype == Time
|
|
217
236
|
to_time(format, strict: strict, cache: cache)
|
|
218
237
|
else
|
|
@@ -292,7 +311,6 @@ module Polars
|
|
|
292
311
|
def len_bytes
|
|
293
312
|
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
|
294
313
|
end
|
|
295
|
-
alias_method :lengths, :len_bytes
|
|
296
314
|
|
|
297
315
|
# Get length of the strings as `:u32` (as number of chars).
|
|
298
316
|
#
|
|
@@ -325,7 +343,6 @@ module Polars
|
|
|
325
343
|
def len_chars
|
|
326
344
|
Utils.wrap_expr(_rbexpr.str_len_chars)
|
|
327
345
|
end
|
|
328
|
-
alias_method :n_chars, :len_chars
|
|
329
346
|
|
|
330
347
|
# Vertically concat the values in the Series to a single string value.
|
|
331
348
|
#
|
|
@@ -361,10 +378,15 @@ module Polars
|
|
|
361
378
|
# # ╞══════╡
|
|
362
379
|
# # │ null │
|
|
363
380
|
# # └──────┘
|
|
364
|
-
def join(delimiter =
|
|
381
|
+
def join(delimiter = nil, ignore_nulls: true)
|
|
382
|
+
# TODO update
|
|
383
|
+
if delimiter.nil?
|
|
384
|
+
warn "The default `delimiter` for `join` method will change from `-` to empty string in a future version"
|
|
385
|
+
delimiter = "-"
|
|
386
|
+
end
|
|
387
|
+
|
|
365
388
|
Utils.wrap_expr(_rbexpr.str_join(delimiter, ignore_nulls))
|
|
366
389
|
end
|
|
367
|
-
alias_method :concat, :join
|
|
368
390
|
|
|
369
391
|
# Returns string values with all regular expression meta characters escaped.
|
|
370
392
|
#
|
|
@@ -505,7 +527,7 @@ module Polars
|
|
|
505
527
|
#
|
|
506
528
|
# @example
|
|
507
529
|
# df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
|
|
508
|
-
# df.select(Polars.col("foo").str.
|
|
530
|
+
# df.select(Polars.col("foo").str.strip_chars)
|
|
509
531
|
# # =>
|
|
510
532
|
# # shape: (3, 1)
|
|
511
533
|
# # ┌───────┐
|
|
@@ -521,7 +543,6 @@ module Polars
|
|
|
521
543
|
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
|
522
544
|
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
|
523
545
|
end
|
|
524
|
-
alias_method :strip, :strip_chars
|
|
525
546
|
|
|
526
547
|
# Remove leading whitespace.
|
|
527
548
|
#
|
|
@@ -532,7 +553,7 @@ module Polars
|
|
|
532
553
|
#
|
|
533
554
|
# @example
|
|
534
555
|
# df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
|
|
535
|
-
# df.select(Polars.col("foo").str.
|
|
556
|
+
# df.select(Polars.col("foo").str.strip_chars_start)
|
|
536
557
|
# # =>
|
|
537
558
|
# # shape: (3, 1)
|
|
538
559
|
# # ┌────────┐
|
|
@@ -548,7 +569,6 @@ module Polars
|
|
|
548
569
|
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
|
549
570
|
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
|
550
571
|
end
|
|
551
|
-
alias_method :lstrip, :strip_chars_start
|
|
552
572
|
|
|
553
573
|
# Remove trailing whitespace.
|
|
554
574
|
#
|
|
@@ -559,7 +579,7 @@ module Polars
|
|
|
559
579
|
#
|
|
560
580
|
# @example
|
|
561
581
|
# df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
|
|
562
|
-
# df.select(Polars.col("foo").str.
|
|
582
|
+
# df.select(Polars.col("foo").str.strip_chars_end)
|
|
563
583
|
# # =>
|
|
564
584
|
# # shape: (3, 1)
|
|
565
585
|
# # ┌───────┐
|
|
@@ -575,7 +595,6 @@ module Polars
|
|
|
575
595
|
characters = Utils.parse_into_expression(characters, str_as_lit: true)
|
|
576
596
|
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
|
577
597
|
end
|
|
578
|
-
alias_method :rstrip, :strip_chars_end
|
|
579
598
|
|
|
580
599
|
# Remove prefix.
|
|
581
600
|
#
|
|
@@ -665,7 +684,6 @@ module Polars
|
|
|
665
684
|
length = Utils.parse_into_expression(length)
|
|
666
685
|
Utils.wrap_expr(_rbexpr.str_pad_start(length, fill_char))
|
|
667
686
|
end
|
|
668
|
-
alias_method :rjust, :pad_start
|
|
669
687
|
|
|
670
688
|
# Pad the end of the string until it reaches the given length.
|
|
671
689
|
#
|
|
@@ -696,7 +714,6 @@ module Polars
|
|
|
696
714
|
length = Utils.parse_into_expression(length)
|
|
697
715
|
Utils.wrap_expr(_rbexpr.str_pad_end(length, fill_char))
|
|
698
716
|
end
|
|
699
|
-
alias_method :ljust, :pad_end
|
|
700
717
|
|
|
701
718
|
# Fills the string with zeroes.
|
|
702
719
|
#
|
|
@@ -834,14 +851,14 @@ module Polars
|
|
|
834
851
|
|
|
835
852
|
# Check if string values end with a substring.
|
|
836
853
|
#
|
|
837
|
-
# @param
|
|
854
|
+
# @param suffix [String]
|
|
838
855
|
# Suffix substring.
|
|
839
856
|
#
|
|
840
857
|
# @return [Expr]
|
|
841
858
|
#
|
|
842
859
|
# @example
|
|
843
860
|
# df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
|
|
844
|
-
# df.
|
|
861
|
+
# df.with_columns(
|
|
845
862
|
# Polars.col("fruits").str.ends_with("go").alias("has_suffix")
|
|
846
863
|
# )
|
|
847
864
|
# # =>
|
|
@@ -867,21 +884,21 @@ module Polars
|
|
|
867
884
|
# # ╞════════╡
|
|
868
885
|
# # │ mango │
|
|
869
886
|
# # └────────┘
|
|
870
|
-
def ends_with(
|
|
871
|
-
|
|
872
|
-
Utils.wrap_expr(_rbexpr.str_ends_with(
|
|
887
|
+
def ends_with(suffix)
|
|
888
|
+
suffix_rbexpr = Utils.parse_into_expression(suffix, str_as_lit: true)
|
|
889
|
+
Utils.wrap_expr(_rbexpr.str_ends_with(suffix_rbexpr))
|
|
873
890
|
end
|
|
874
891
|
|
|
875
892
|
# Check if string values start with a substring.
|
|
876
893
|
#
|
|
877
|
-
# @param
|
|
894
|
+
# @param prefix [String]
|
|
878
895
|
# Prefix substring.
|
|
879
896
|
#
|
|
880
897
|
# @return [Expr]
|
|
881
898
|
#
|
|
882
899
|
# @example
|
|
883
900
|
# df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
|
|
884
|
-
# df.
|
|
901
|
+
# df.with_columns(
|
|
885
902
|
# Polars.col("fruits").str.starts_with("app").alias("has_prefix")
|
|
886
903
|
# )
|
|
887
904
|
# # =>
|
|
@@ -907,9 +924,9 @@ module Polars
|
|
|
907
924
|
# # ╞════════╡
|
|
908
925
|
# # │ apple │
|
|
909
926
|
# # └────────┘
|
|
910
|
-
def starts_with(
|
|
911
|
-
|
|
912
|
-
Utils.wrap_expr(_rbexpr.str_starts_with(
|
|
927
|
+
def starts_with(prefix)
|
|
928
|
+
prefix_rbexpr = Utils.parse_into_expression(prefix, str_as_lit: true)
|
|
929
|
+
Utils.wrap_expr(_rbexpr.str_starts_with(prefix_rbexpr))
|
|
913
930
|
end
|
|
914
931
|
|
|
915
932
|
# Parse string values as JSON.
|
|
@@ -949,7 +966,6 @@ module Polars
|
|
|
949
966
|
dtype_expr = Utils.parse_into_datatype_expr(dtype)._rbdatatype_expr
|
|
950
967
|
Utils.wrap_expr(_rbexpr.str_json_decode(dtype_expr))
|
|
951
968
|
end
|
|
952
|
-
alias_method :json_extract, :json_decode
|
|
953
969
|
|
|
954
970
|
# Extract the first match of json string with provided JSONPath expression.
|
|
955
971
|
#
|
|
@@ -1188,7 +1204,7 @@ module Polars
|
|
|
1188
1204
|
# df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
|
|
1189
1205
|
# df.select(
|
|
1190
1206
|
# [
|
|
1191
|
-
# Polars.col("foo").str.
|
|
1207
|
+
# Polars.col("foo").str.count_matches('\d').alias("count_digits")
|
|
1192
1208
|
# ]
|
|
1193
1209
|
# )
|
|
1194
1210
|
# # =>
|
|
@@ -1205,7 +1221,6 @@ module Polars
|
|
|
1205
1221
|
pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
|
|
1206
1222
|
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
|
1207
1223
|
end
|
|
1208
|
-
alias_method :count_match, :count_matches
|
|
1209
1224
|
|
|
1210
1225
|
# Split the string by a substring.
|
|
1211
1226
|
#
|
|
@@ -1329,7 +1344,7 @@ module Polars
|
|
|
1329
1344
|
#
|
|
1330
1345
|
# @example
|
|
1331
1346
|
# df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["123abc", "abc456"]})
|
|
1332
|
-
# df.
|
|
1347
|
+
# df.with_columns(
|
|
1333
1348
|
# Polars.col("text").str.replace('abc\b', "ABC")
|
|
1334
1349
|
# )
|
|
1335
1350
|
# # =>
|
|
@@ -1361,7 +1376,7 @@ module Polars
|
|
|
1361
1376
|
#
|
|
1362
1377
|
# @example
|
|
1363
1378
|
# df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["abcabc", "123a123"]})
|
|
1364
|
-
# df.
|
|
1379
|
+
# df.with_columns(Polars.col("text").str.replace_all("a", "-"))
|
|
1365
1380
|
# # =>
|
|
1366
1381
|
# # shape: (2, 2)
|
|
1367
1382
|
# # ┌─────┬─────────┐
|
|
@@ -1412,7 +1427,7 @@ module Polars
|
|
|
1412
1427
|
#
|
|
1413
1428
|
# @example
|
|
1414
1429
|
# df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
|
|
1415
|
-
# df.
|
|
1430
|
+
# df.with_columns(
|
|
1416
1431
|
# Polars.col("s").str.slice(-3).alias("s_sliced")
|
|
1417
1432
|
# )
|
|
1418
1433
|
# # =>
|
|
@@ -1604,38 +1619,6 @@ module Polars
|
|
|
1604
1619
|
Utils.wrap_expr(_rbexpr.str_to_integer(base, dtype, strict))
|
|
1605
1620
|
end
|
|
1606
1621
|
|
|
1607
|
-
# Parse integers with base radix from strings.
|
|
1608
|
-
#
|
|
1609
|
-
# By default base 2. ParseError/Overflows become Nulls.
|
|
1610
|
-
#
|
|
1611
|
-
# @param radix [Integer]
|
|
1612
|
-
# Positive integer which is the base of the string we are parsing.
|
|
1613
|
-
# Default: 2.
|
|
1614
|
-
# @param strict [Boolean]
|
|
1615
|
-
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
|
1616
|
-
# False silently convert to Null.
|
|
1617
|
-
#
|
|
1618
|
-
# @return [Expr]
|
|
1619
|
-
#
|
|
1620
|
-
# @example
|
|
1621
|
-
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
|
1622
|
-
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
|
1623
|
-
# # =>
|
|
1624
|
-
# # shape: (4, 1)
|
|
1625
|
-
# # ┌──────┐
|
|
1626
|
-
# # │ bin │
|
|
1627
|
-
# # │ --- │
|
|
1628
|
-
# # │ i32 │
|
|
1629
|
-
# # ╞══════╡
|
|
1630
|
-
# # │ 6 │
|
|
1631
|
-
# # │ 5 │
|
|
1632
|
-
# # │ 2 │
|
|
1633
|
-
# # │ null │
|
|
1634
|
-
# # └──────┘
|
|
1635
|
-
def parse_int(radix = 2, strict: true)
|
|
1636
|
-
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
|
1637
|
-
end
|
|
1638
|
-
|
|
1639
1622
|
# Use the aho-corasick algorithm to find matches.
|
|
1640
1623
|
#
|
|
1641
1624
|
# This version determines if any of the patterns find a match.
|
|
@@ -1744,8 +1727,8 @@ module Polars
|
|
|
1744
1727
|
# # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
|
|
1745
1728
|
# # │ Can you feel the love tonight ┆ Can me feel the love tonight │
|
|
1746
1729
|
# # └─────────────────────────────────┴─────────────────────────────────┘
|
|
1747
|
-
def replace_many(patterns, replace_with =
|
|
1748
|
-
if replace_with ==
|
|
1730
|
+
def replace_many(patterns, replace_with = NO_DEFAULT, ascii_case_insensitive: false)
|
|
1731
|
+
if replace_with == NO_DEFAULT
|
|
1749
1732
|
if !patterns.is_a?(Hash)
|
|
1750
1733
|
msg = "`replace_with` argument is required if `patterns` argument is not a Hash type"
|
|
1751
1734
|
raise TypeError, msg
|
|
@@ -1913,7 +1896,15 @@ module Polars
|
|
|
1913
1896
|
private
|
|
1914
1897
|
|
|
1915
1898
|
def _validate_format_argument(format)
|
|
1916
|
-
|
|
1899
|
+
if !format.nil? && format.include?(".%f")
|
|
1900
|
+
message = (
|
|
1901
|
+
"Detected the pattern `.%f` in the chrono format string." +
|
|
1902
|
+
" This pattern should not be used to parse values after a decimal point." +
|
|
1903
|
+
" Use `%.f` instead." +
|
|
1904
|
+
" See the full specification: https://docs.rs/chrono/latest/chrono/format/strftime"
|
|
1905
|
+
)
|
|
1906
|
+
warn message
|
|
1907
|
+
end
|
|
1917
1908
|
end
|
|
1918
1909
|
end
|
|
1919
1910
|
end
|