polars-df 0.5.0-arm64-darwin → 0.7.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4572 -5214
- data/README.md +11 -9
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -9,11 +9,134 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Convert a Utf8 column into a Date column.
|
13
|
+
#
|
14
|
+
# @param format [String]
|
15
|
+
# Format to use for conversion. Refer to the
|
16
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
17
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
18
|
+
# If set to nil (default), the format is inferred from the data.
|
19
|
+
# @param strict [Boolean]
|
20
|
+
# Raise an error if any conversion fails.
|
21
|
+
# @param exact [Boolean]
|
22
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
23
|
+
# in the target string.
|
24
|
+
# @param cache [Boolean]
|
25
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
31
|
+
# s.str.to_date
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: '' [date]
|
35
|
+
# # [
|
36
|
+
# # 2020-01-01
|
37
|
+
# # 2020-02-01
|
38
|
+
# # 2020-03-01
|
39
|
+
# # ]
|
40
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
|
+
_validate_format_argument(format)
|
42
|
+
Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Expr]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil,
|
87
|
+
ambiguous: "raise"
|
88
|
+
)
|
89
|
+
_validate_format_argument(format)
|
90
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
91
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
92
|
+
Utils.wrap_expr(
|
93
|
+
self._rbexpr.str_to_datetime(
|
94
|
+
format,
|
95
|
+
time_unit,
|
96
|
+
time_zone,
|
97
|
+
strict,
|
98
|
+
exact,
|
99
|
+
cache,
|
100
|
+
ambiguous._rbexpr
|
101
|
+
)
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert a Utf8 column into a Time column.
|
106
|
+
#
|
107
|
+
# @param format [String]
|
108
|
+
# Format to use for conversion. Refer to the
|
109
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
110
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
111
|
+
# If set to nil (default), the format is inferred from the data.
|
112
|
+
# @param strict [Boolean]
|
113
|
+
# Raise an error if any conversion fails.
|
114
|
+
# @param cache [Boolean]
|
115
|
+
# Use a cache of unique, converted times to apply the conversion.
|
116
|
+
#
|
117
|
+
# @return [Expr]
|
118
|
+
#
|
119
|
+
# @example
|
120
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
121
|
+
# s.str.to_time("%H:%M")
|
122
|
+
# # =>
|
123
|
+
# # shape: (3,)
|
124
|
+
# # Series: '' [time]
|
125
|
+
# # [
|
126
|
+
# # 01:00:00
|
127
|
+
# # 02:00:00
|
128
|
+
# # 03:00:00
|
129
|
+
# # ]
|
130
|
+
def to_time(format = nil, strict: true, cache: true)
|
131
|
+
_validate_format_argument(format)
|
132
|
+
Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
|
133
|
+
end
|
134
|
+
|
12
135
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
136
|
#
|
14
137
|
# @param dtype [Object]
|
15
138
|
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
|
-
# @param
|
139
|
+
# @param format [String]
|
17
140
|
# Format to use, refer to the
|
18
141
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
142
|
# for specification. Example: `"%y-%m-%d"`.
|
@@ -38,10 +161,10 @@ module Polars
|
|
38
161
|
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
162
|
# # =>
|
40
163
|
# # shape: (2,)
|
41
|
-
# # Series: '' [datetime[μs,
|
164
|
+
# # Series: '' [datetime[μs, UTC]]
|
42
165
|
# # [
|
43
|
-
# # 2020-01-01 01:00:00
|
44
|
-
# # 2020-01-01 02:00:00
|
166
|
+
# # 2020-01-01 01:00:00 UTC
|
167
|
+
# # 2020-01-01 02:00:00 UTC
|
45
168
|
# # ]
|
46
169
|
#
|
47
170
|
# @example Dealing with different formats.
|
@@ -71,16 +194,18 @@ module Polars
|
|
71
194
|
# # 2022-01-31
|
72
195
|
# # 2001-07-08
|
73
196
|
# # ]
|
74
|
-
def strptime(dtype,
|
197
|
+
def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
|
198
|
+
_validate_format_argument(format)
|
199
|
+
|
75
200
|
if dtype == Date
|
76
|
-
|
201
|
+
to_date(format, strict: strict, exact: exact, cache: cache)
|
77
202
|
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
203
|
dtype = Datetime.new if dtype == Datetime
|
79
204
|
time_unit = dtype.time_unit
|
80
205
|
time_zone = dtype.time_zone
|
81
|
-
|
206
|
+
to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
|
82
207
|
elsif dtype == Time
|
83
|
-
|
208
|
+
to_time(format, strict: strict, cache: cache)
|
84
209
|
else
|
85
210
|
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
86
211
|
end
|
@@ -115,7 +240,7 @@ module Polars
|
|
115
240
|
# # │ 東京 ┆ 6 ┆ 2 │
|
116
241
|
# # └──────┴────────┴────────┘
|
117
242
|
def lengths
|
118
|
-
Utils.wrap_expr(_rbexpr.
|
243
|
+
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
119
244
|
end
|
120
245
|
|
121
246
|
# Get length of the strings as `:u32` (as number of chars).
|
@@ -147,13 +272,15 @@ module Polars
|
|
147
272
|
# # │ 東京 ┆ 6 ┆ 2 │
|
148
273
|
# # └──────┴────────┴────────┘
|
149
274
|
def n_chars
|
150
|
-
Utils.wrap_expr(_rbexpr.
|
275
|
+
Utils.wrap_expr(_rbexpr.str_len_chars)
|
151
276
|
end
|
152
277
|
|
153
278
|
# Vertically concat the values in the Series to a single string value.
|
154
279
|
#
|
155
280
|
# @param delimiter [String]
|
156
281
|
# The delimiter to insert between consecutive string values.
|
282
|
+
# @param ignore_nulls [Boolean]
|
283
|
+
# Ignore null values (default).
|
157
284
|
#
|
158
285
|
# @return [Expr]
|
159
286
|
#
|
@@ -162,15 +289,28 @@ module Polars
|
|
162
289
|
# df.select(Polars.col("foo").str.concat("-"))
|
163
290
|
# # =>
|
164
291
|
# # shape: (1, 1)
|
165
|
-
# #
|
166
|
-
# # │ foo
|
167
|
-
# # │ ---
|
168
|
-
# # │ str
|
169
|
-
# #
|
170
|
-
# # │ 1-
|
171
|
-
# #
|
172
|
-
|
173
|
-
|
292
|
+
# # ┌─────┐
|
293
|
+
# # │ foo │
|
294
|
+
# # │ --- │
|
295
|
+
# # │ str │
|
296
|
+
# # ╞═════╡
|
297
|
+
# # │ 1-2 │
|
298
|
+
# # └─────┘
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
302
|
+
# df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 1)
|
305
|
+
# # ┌──────┐
|
306
|
+
# # │ foo │
|
307
|
+
# # │ --- │
|
308
|
+
# # │ str │
|
309
|
+
# # ╞══════╡
|
310
|
+
# # │ null │
|
311
|
+
# # └──────┘
|
312
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
313
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
|
174
314
|
end
|
175
315
|
|
176
316
|
# Transform to uppercase variant.
|
@@ -217,7 +357,7 @@ module Polars
|
|
217
357
|
|
218
358
|
# Remove leading and trailing whitespace.
|
219
359
|
#
|
220
|
-
# @param
|
360
|
+
# @param characters [String, nil]
|
221
361
|
# An optional single character that should be trimmed.
|
222
362
|
#
|
223
363
|
# @return [Expr]
|
@@ -236,16 +376,15 @@ module Polars
|
|
236
376
|
# # │ trail │
|
237
377
|
# # │ both │
|
238
378
|
# # └───────┘
|
239
|
-
def
|
240
|
-
|
241
|
-
|
242
|
-
end
|
243
|
-
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
379
|
+
def strip_chars(characters = nil)
|
380
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
381
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
244
382
|
end
|
383
|
+
alias_method :strip, :strip_chars
|
245
384
|
|
246
385
|
# Remove leading whitespace.
|
247
386
|
#
|
248
|
-
# @param
|
387
|
+
# @param characters [String, nil]
|
249
388
|
# An optional single character that should be trimmed.
|
250
389
|
#
|
251
390
|
# @return [Expr]
|
@@ -264,16 +403,15 @@ module Polars
|
|
264
403
|
# # │ trail │
|
265
404
|
# # │ both │
|
266
405
|
# # └────────┘
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
end
|
271
|
-
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
406
|
+
def strip_chars_start(characters = nil)
|
407
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
408
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
272
409
|
end
|
410
|
+
alias_method :lstrip, :strip_chars_start
|
273
411
|
|
274
412
|
# Remove trailing whitespace.
|
275
413
|
#
|
276
|
-
# @param
|
414
|
+
# @param characters [String, nil]
|
277
415
|
# An optional single character that should be trimmed.
|
278
416
|
#
|
279
417
|
# @return [Expr]
|
@@ -292,12 +430,11 @@ module Polars
|
|
292
430
|
# # │ trail │
|
293
431
|
# # │ both │
|
294
432
|
# # └───────┘
|
295
|
-
def
|
296
|
-
|
297
|
-
|
298
|
-
end
|
299
|
-
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
433
|
+
def strip_chars_end(characters = nil)
|
434
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
435
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
300
436
|
end
|
437
|
+
alias_method :rstrip, :strip_chars_end
|
301
438
|
|
302
439
|
# Fills the string with zeroes.
|
303
440
|
#
|
@@ -341,13 +478,13 @@ module Polars
|
|
341
478
|
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
342
479
|
end
|
343
480
|
|
344
|
-
# Return the string left justified in a string of length `
|
481
|
+
# Return the string left justified in a string of length `length`.
|
345
482
|
#
|
346
483
|
# Padding is done using the specified `fillchar`.
|
347
|
-
# The original string is returned if `
|
484
|
+
# The original string is returned if `length` is less than or equal to
|
348
485
|
# `s.length`.
|
349
486
|
#
|
350
|
-
# @param
|
487
|
+
# @param length [Integer]
|
351
488
|
# Justify left to this length.
|
352
489
|
# @param fillchar [String]
|
353
490
|
# Fill with this ASCII character.
|
@@ -369,17 +506,18 @@ module Polars
|
|
369
506
|
# # │ null │
|
370
507
|
# # │ hippopotamus │
|
371
508
|
# # └──────────────┘
|
372
|
-
def ljust(
|
373
|
-
Utils.wrap_expr(_rbexpr.
|
509
|
+
def ljust(length, fillchar = " ")
|
510
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
|
374
511
|
end
|
512
|
+
alias_method :pad_end, :ljust
|
375
513
|
|
376
|
-
# Return the string right justified in a string of length `
|
514
|
+
# Return the string right justified in a string of length `length`.
|
377
515
|
#
|
378
516
|
# Padding is done using the specified `fillchar`.
|
379
|
-
# The original string is returned if `
|
517
|
+
# The original string is returned if `length` is less than or equal to
|
380
518
|
# `s.length`.
|
381
519
|
#
|
382
|
-
# @param
|
520
|
+
# @param length [Integer]
|
383
521
|
# Justify right to this length.
|
384
522
|
# @param fillchar [String]
|
385
523
|
# Fill with this ASCII character.
|
@@ -401,9 +539,10 @@ module Polars
|
|
401
539
|
# # │ null │
|
402
540
|
# # │ hippopotamus │
|
403
541
|
# # └──────────────┘
|
404
|
-
def rjust(
|
405
|
-
Utils.wrap_expr(_rbexpr.
|
542
|
+
def rjust(length, fillchar = " ")
|
543
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
|
406
544
|
end
|
545
|
+
alias_method :pad_start, :rjust
|
407
546
|
|
408
547
|
# Check if string contains a substring that matches a regex.
|
409
548
|
#
|
@@ -547,11 +686,11 @@ module Polars
|
|
547
686
|
# # │ {null,null} │
|
548
687
|
# # │ {2,false} │
|
549
688
|
# # └─────────────┘
|
550
|
-
def json_extract(dtype = nil)
|
689
|
+
def json_extract(dtype = nil, infer_schema_length: 100)
|
551
690
|
if !dtype.nil?
|
552
691
|
dtype = Utils.rb_type_to_dtype(dtype)
|
553
692
|
end
|
554
|
-
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
693
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
|
555
694
|
end
|
556
695
|
|
557
696
|
# Extract the first match of json string with provided JSONPath expression.
|
@@ -744,9 +883,11 @@ module Polars
|
|
744
883
|
# # │ 5 │
|
745
884
|
# # │ 6 │
|
746
885
|
# # └──────────────┘
|
747
|
-
def
|
748
|
-
Utils.
|
886
|
+
def count_matches(pattern, literal: false)
|
887
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
888
|
+
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
749
889
|
end
|
890
|
+
alias_method :count_match, :count_matches
|
750
891
|
|
751
892
|
# Split the string by a substring.
|
752
893
|
#
|
@@ -772,6 +913,7 @@ module Polars
|
|
772
913
|
# # │ ["foo", "bar", "baz"] │
|
773
914
|
# # └───────────────────────┘
|
774
915
|
def split(by, inclusive: false)
|
916
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
775
917
|
if inclusive
|
776
918
|
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
777
919
|
else
|
@@ -814,6 +956,7 @@ module Polars
|
|
814
956
|
# # │ {"d","4"} │
|
815
957
|
# # └─────────────┘
|
816
958
|
def split_exact(by, n, inclusive: false)
|
959
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
817
960
|
if inclusive
|
818
961
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
819
962
|
else
|
@@ -850,6 +993,7 @@ module Polars
|
|
850
993
|
# # │ {"foo","bar baz"} │
|
851
994
|
# # └───────────────────┘
|
852
995
|
def splitn(by, n)
|
996
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
853
997
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
854
998
|
end
|
855
999
|
|
@@ -968,7 +1112,53 @@ module Polars
|
|
968
1112
|
# # │ r │
|
969
1113
|
# # └─────┘
|
970
1114
|
def explode
|
971
|
-
Utils.wrap_expr(_rbexpr.
|
1115
|
+
Utils.wrap_expr(_rbexpr.str_explode)
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
# Convert an Utf8 column into an Int64 column with base radix.
|
1119
|
+
#
|
1120
|
+
# @param base [Integer]
|
1121
|
+
# Positive integer which is the base of the string we are parsing.
|
1122
|
+
# Default: 10.
|
1123
|
+
# @param strict [Boolean]
|
1124
|
+
# Bool, default=true will raise any ParseError or overflow as ComputeError.
|
1125
|
+
# false silently convert to Null.
|
1126
|
+
#
|
1127
|
+
# @return [Expr]
|
1128
|
+
#
|
1129
|
+
# @example
|
1130
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1131
|
+
# df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
|
1132
|
+
# # =>
|
1133
|
+
# # shape: (4, 2)
|
1134
|
+
# # ┌─────────┬────────┐
|
1135
|
+
# # │ bin ┆ parsed │
|
1136
|
+
# # │ --- ┆ --- │
|
1137
|
+
# # │ str ┆ i64 │
|
1138
|
+
# # ╞═════════╪════════╡
|
1139
|
+
# # │ 110 ┆ 6 │
|
1140
|
+
# # │ 101 ┆ 5 │
|
1141
|
+
# # │ 010 ┆ 2 │
|
1142
|
+
# # │ invalid ┆ null │
|
1143
|
+
# # └─────────┴────────┘
|
1144
|
+
#
|
1145
|
+
# @example
|
1146
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1147
|
+
# df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
|
1148
|
+
# # =>
|
1149
|
+
# # shape: (4, 2)
|
1150
|
+
# # ┌──────┬────────┐
|
1151
|
+
# # │ hex ┆ parsed │
|
1152
|
+
# # │ --- ┆ --- │
|
1153
|
+
# # │ str ┆ i64 │
|
1154
|
+
# # ╞══════╪════════╡
|
1155
|
+
# # │ fa1e ┆ 64030 │
|
1156
|
+
# # │ ff00 ┆ 65280 │
|
1157
|
+
# # │ cafe ┆ 51966 │
|
1158
|
+
# # │ null ┆ null │
|
1159
|
+
# # └──────┴────────┘
|
1160
|
+
def to_integer(base: 10, strict: true)
|
1161
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
972
1162
|
end
|
973
1163
|
|
974
1164
|
# Parse integers with base radix from strings.
|
@@ -999,24 +1189,14 @@ module Polars
|
|
999
1189
|
# # │ 2 │
|
1000
1190
|
# # │ null │
|
1001
1191
|
# # └──────┘
|
1002
|
-
#
|
1003
|
-
# @example
|
1004
|
-
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
-
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
-
# # =>
|
1007
|
-
# # shape: (4, 1)
|
1008
|
-
# # ┌───────┐
|
1009
|
-
# # │ hex │
|
1010
|
-
# # │ --- │
|
1011
|
-
# # │ i32 │
|
1012
|
-
# # ╞═══════╡
|
1013
|
-
# # │ 64030 │
|
1014
|
-
# # │ 65280 │
|
1015
|
-
# # │ 51966 │
|
1016
|
-
# # │ null │
|
1017
|
-
# # └───────┘
|
1018
1192
|
def parse_int(radix = 2, strict: true)
|
1019
|
-
|
1193
|
+
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
private
|
1197
|
+
|
1198
|
+
def _validate_format_argument(format)
|
1199
|
+
# TODO
|
1020
1200
|
end
|
1021
1201
|
end
|
1022
1202
|
end
|