polars-df 0.5.0-x86_64-darwin → 0.7.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +4572 -5214
- data/README.md +11 -9
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +7 -2
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -9,11 +9,134 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
+
# Convert a Utf8 column into a Date column.
|
13
|
+
#
|
14
|
+
# @param format [String]
|
15
|
+
# Format to use for conversion. Refer to the
|
16
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
17
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
18
|
+
# If set to nil (default), the format is inferred from the data.
|
19
|
+
# @param strict [Boolean]
|
20
|
+
# Raise an error if any conversion fails.
|
21
|
+
# @param exact [Boolean]
|
22
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
23
|
+
# in the target string.
|
24
|
+
# @param cache [Boolean]
|
25
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
31
|
+
# s.str.to_date
|
32
|
+
# # =>
|
33
|
+
# # shape: (3,)
|
34
|
+
# # Series: '' [date]
|
35
|
+
# # [
|
36
|
+
# # 2020-01-01
|
37
|
+
# # 2020-02-01
|
38
|
+
# # 2020-03-01
|
39
|
+
# # ]
|
40
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
41
|
+
_validate_format_argument(format)
|
42
|
+
Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Expr]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil,
|
87
|
+
ambiguous: "raise"
|
88
|
+
)
|
89
|
+
_validate_format_argument(format)
|
90
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
91
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
92
|
+
Utils.wrap_expr(
|
93
|
+
self._rbexpr.str_to_datetime(
|
94
|
+
format,
|
95
|
+
time_unit,
|
96
|
+
time_zone,
|
97
|
+
strict,
|
98
|
+
exact,
|
99
|
+
cache,
|
100
|
+
ambiguous._rbexpr
|
101
|
+
)
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert a Utf8 column into a Time column.
|
106
|
+
#
|
107
|
+
# @param format [String]
|
108
|
+
# Format to use for conversion. Refer to the
|
109
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
110
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
111
|
+
# If set to nil (default), the format is inferred from the data.
|
112
|
+
# @param strict [Boolean]
|
113
|
+
# Raise an error if any conversion fails.
|
114
|
+
# @param cache [Boolean]
|
115
|
+
# Use a cache of unique, converted times to apply the conversion.
|
116
|
+
#
|
117
|
+
# @return [Expr]
|
118
|
+
#
|
119
|
+
# @example
|
120
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
121
|
+
# s.str.to_time("%H:%M")
|
122
|
+
# # =>
|
123
|
+
# # shape: (3,)
|
124
|
+
# # Series: '' [time]
|
125
|
+
# # [
|
126
|
+
# # 01:00:00
|
127
|
+
# # 02:00:00
|
128
|
+
# # 03:00:00
|
129
|
+
# # ]
|
130
|
+
def to_time(format = nil, strict: true, cache: true)
|
131
|
+
_validate_format_argument(format)
|
132
|
+
Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
|
133
|
+
end
|
134
|
+
|
12
135
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
136
|
#
|
14
137
|
# @param dtype [Object]
|
15
138
|
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
|
-
# @param
|
139
|
+
# @param format [String]
|
17
140
|
# Format to use, refer to the
|
18
141
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
19
142
|
# for specification. Example: `"%y-%m-%d"`.
|
@@ -38,10 +161,10 @@ module Polars
|
|
38
161
|
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
162
|
# # =>
|
40
163
|
# # shape: (2,)
|
41
|
-
# # Series: '' [datetime[μs,
|
164
|
+
# # Series: '' [datetime[μs, UTC]]
|
42
165
|
# # [
|
43
|
-
# # 2020-01-01 01:00:00
|
44
|
-
# # 2020-01-01 02:00:00
|
166
|
+
# # 2020-01-01 01:00:00 UTC
|
167
|
+
# # 2020-01-01 02:00:00 UTC
|
45
168
|
# # ]
|
46
169
|
#
|
47
170
|
# @example Dealing with different formats.
|
@@ -71,16 +194,18 @@ module Polars
|
|
71
194
|
# # 2022-01-31
|
72
195
|
# # 2001-07-08
|
73
196
|
# # ]
|
74
|
-
def strptime(dtype,
|
197
|
+
def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
|
198
|
+
_validate_format_argument(format)
|
199
|
+
|
75
200
|
if dtype == Date
|
76
|
-
|
201
|
+
to_date(format, strict: strict, exact: exact, cache: cache)
|
77
202
|
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
203
|
dtype = Datetime.new if dtype == Datetime
|
79
204
|
time_unit = dtype.time_unit
|
80
205
|
time_zone = dtype.time_zone
|
81
|
-
|
206
|
+
to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
|
82
207
|
elsif dtype == Time
|
83
|
-
|
208
|
+
to_time(format, strict: strict, cache: cache)
|
84
209
|
else
|
85
210
|
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
86
211
|
end
|
@@ -115,7 +240,7 @@ module Polars
|
|
115
240
|
# # │ 東京 ┆ 6 ┆ 2 │
|
116
241
|
# # └──────┴────────┴────────┘
|
117
242
|
def lengths
|
118
|
-
Utils.wrap_expr(_rbexpr.
|
243
|
+
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
119
244
|
end
|
120
245
|
|
121
246
|
# Get length of the strings as `:u32` (as number of chars).
|
@@ -147,13 +272,15 @@ module Polars
|
|
147
272
|
# # │ 東京 ┆ 6 ┆ 2 │
|
148
273
|
# # └──────┴────────┴────────┘
|
149
274
|
def n_chars
|
150
|
-
Utils.wrap_expr(_rbexpr.
|
275
|
+
Utils.wrap_expr(_rbexpr.str_len_chars)
|
151
276
|
end
|
152
277
|
|
153
278
|
# Vertically concat the values in the Series to a single string value.
|
154
279
|
#
|
155
280
|
# @param delimiter [String]
|
156
281
|
# The delimiter to insert between consecutive string values.
|
282
|
+
# @param ignore_nulls [Boolean]
|
283
|
+
# Ignore null values (default).
|
157
284
|
#
|
158
285
|
# @return [Expr]
|
159
286
|
#
|
@@ -162,15 +289,28 @@ module Polars
|
|
162
289
|
# df.select(Polars.col("foo").str.concat("-"))
|
163
290
|
# # =>
|
164
291
|
# # shape: (1, 1)
|
165
|
-
# #
|
166
|
-
# # │ foo
|
167
|
-
# # │ ---
|
168
|
-
# # │ str
|
169
|
-
# #
|
170
|
-
# # │ 1-
|
171
|
-
# #
|
172
|
-
|
173
|
-
|
292
|
+
# # ┌─────┐
|
293
|
+
# # │ foo │
|
294
|
+
# # │ --- │
|
295
|
+
# # │ str │
|
296
|
+
# # ╞═════╡
|
297
|
+
# # │ 1-2 │
|
298
|
+
# # └─────┘
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
302
|
+
# df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 1)
|
305
|
+
# # ┌──────┐
|
306
|
+
# # │ foo │
|
307
|
+
# # │ --- │
|
308
|
+
# # │ str │
|
309
|
+
# # ╞══════╡
|
310
|
+
# # │ null │
|
311
|
+
# # └──────┘
|
312
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
313
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
|
174
314
|
end
|
175
315
|
|
176
316
|
# Transform to uppercase variant.
|
@@ -217,7 +357,7 @@ module Polars
|
|
217
357
|
|
218
358
|
# Remove leading and trailing whitespace.
|
219
359
|
#
|
220
|
-
# @param
|
360
|
+
# @param characters [String, nil]
|
221
361
|
# An optional single character that should be trimmed.
|
222
362
|
#
|
223
363
|
# @return [Expr]
|
@@ -236,16 +376,15 @@ module Polars
|
|
236
376
|
# # │ trail │
|
237
377
|
# # │ both │
|
238
378
|
# # └───────┘
|
239
|
-
def
|
240
|
-
|
241
|
-
|
242
|
-
end
|
243
|
-
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
379
|
+
def strip_chars(characters = nil)
|
380
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
381
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
244
382
|
end
|
383
|
+
alias_method :strip, :strip_chars
|
245
384
|
|
246
385
|
# Remove leading whitespace.
|
247
386
|
#
|
248
|
-
# @param
|
387
|
+
# @param characters [String, nil]
|
249
388
|
# An optional single character that should be trimmed.
|
250
389
|
#
|
251
390
|
# @return [Expr]
|
@@ -264,16 +403,15 @@ module Polars
|
|
264
403
|
# # │ trail │
|
265
404
|
# # │ both │
|
266
405
|
# # └────────┘
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
end
|
271
|
-
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
406
|
+
def strip_chars_start(characters = nil)
|
407
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
408
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
272
409
|
end
|
410
|
+
alias_method :lstrip, :strip_chars_start
|
273
411
|
|
274
412
|
# Remove trailing whitespace.
|
275
413
|
#
|
276
|
-
# @param
|
414
|
+
# @param characters [String, nil]
|
277
415
|
# An optional single character that should be trimmed.
|
278
416
|
#
|
279
417
|
# @return [Expr]
|
@@ -292,12 +430,11 @@ module Polars
|
|
292
430
|
# # │ trail │
|
293
431
|
# # │ both │
|
294
432
|
# # └───────┘
|
295
|
-
def
|
296
|
-
|
297
|
-
|
298
|
-
end
|
299
|
-
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
433
|
+
def strip_chars_end(characters = nil)
|
434
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
435
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
300
436
|
end
|
437
|
+
alias_method :rstrip, :strip_chars_end
|
301
438
|
|
302
439
|
# Fills the string with zeroes.
|
303
440
|
#
|
@@ -341,13 +478,13 @@ module Polars
|
|
341
478
|
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
342
479
|
end
|
343
480
|
|
344
|
-
# Return the string left justified in a string of length `
|
481
|
+
# Return the string left justified in a string of length `length`.
|
345
482
|
#
|
346
483
|
# Padding is done using the specified `fillchar`.
|
347
|
-
# The original string is returned if `
|
484
|
+
# The original string is returned if `length` is less than or equal to
|
348
485
|
# `s.length`.
|
349
486
|
#
|
350
|
-
# @param
|
487
|
+
# @param length [Integer]
|
351
488
|
# Justify left to this length.
|
352
489
|
# @param fillchar [String]
|
353
490
|
# Fill with this ASCII character.
|
@@ -369,17 +506,18 @@ module Polars
|
|
369
506
|
# # │ null │
|
370
507
|
# # │ hippopotamus │
|
371
508
|
# # └──────────────┘
|
372
|
-
def ljust(
|
373
|
-
Utils.wrap_expr(_rbexpr.
|
509
|
+
def ljust(length, fillchar = " ")
|
510
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
|
374
511
|
end
|
512
|
+
alias_method :pad_end, :ljust
|
375
513
|
|
376
|
-
# Return the string right justified in a string of length `
|
514
|
+
# Return the string right justified in a string of length `length`.
|
377
515
|
#
|
378
516
|
# Padding is done using the specified `fillchar`.
|
379
|
-
# The original string is returned if `
|
517
|
+
# The original string is returned if `length` is less than or equal to
|
380
518
|
# `s.length`.
|
381
519
|
#
|
382
|
-
# @param
|
520
|
+
# @param length [Integer]
|
383
521
|
# Justify right to this length.
|
384
522
|
# @param fillchar [String]
|
385
523
|
# Fill with this ASCII character.
|
@@ -401,9 +539,10 @@ module Polars
|
|
401
539
|
# # │ null │
|
402
540
|
# # │ hippopotamus │
|
403
541
|
# # └──────────────┘
|
404
|
-
def rjust(
|
405
|
-
Utils.wrap_expr(_rbexpr.
|
542
|
+
def rjust(length, fillchar = " ")
|
543
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
|
406
544
|
end
|
545
|
+
alias_method :pad_start, :rjust
|
407
546
|
|
408
547
|
# Check if string contains a substring that matches a regex.
|
409
548
|
#
|
@@ -547,11 +686,11 @@ module Polars
|
|
547
686
|
# # │ {null,null} │
|
548
687
|
# # │ {2,false} │
|
549
688
|
# # └─────────────┘
|
550
|
-
def json_extract(dtype = nil)
|
689
|
+
def json_extract(dtype = nil, infer_schema_length: 100)
|
551
690
|
if !dtype.nil?
|
552
691
|
dtype = Utils.rb_type_to_dtype(dtype)
|
553
692
|
end
|
554
|
-
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
693
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
|
555
694
|
end
|
556
695
|
|
557
696
|
# Extract the first match of json string with provided JSONPath expression.
|
@@ -744,9 +883,11 @@ module Polars
|
|
744
883
|
# # │ 5 │
|
745
884
|
# # │ 6 │
|
746
885
|
# # └──────────────┘
|
747
|
-
def
|
748
|
-
Utils.
|
886
|
+
def count_matches(pattern, literal: false)
|
887
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
888
|
+
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
749
889
|
end
|
890
|
+
alias_method :count_match, :count_matches
|
750
891
|
|
751
892
|
# Split the string by a substring.
|
752
893
|
#
|
@@ -772,6 +913,7 @@ module Polars
|
|
772
913
|
# # │ ["foo", "bar", "baz"] │
|
773
914
|
# # └───────────────────────┘
|
774
915
|
def split(by, inclusive: false)
|
916
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
775
917
|
if inclusive
|
776
918
|
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
777
919
|
else
|
@@ -814,6 +956,7 @@ module Polars
|
|
814
956
|
# # │ {"d","4"} │
|
815
957
|
# # └─────────────┘
|
816
958
|
def split_exact(by, n, inclusive: false)
|
959
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
817
960
|
if inclusive
|
818
961
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
819
962
|
else
|
@@ -850,6 +993,7 @@ module Polars
|
|
850
993
|
# # │ {"foo","bar baz"} │
|
851
994
|
# # └───────────────────┘
|
852
995
|
def splitn(by, n)
|
996
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
853
997
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
854
998
|
end
|
855
999
|
|
@@ -968,7 +1112,53 @@ module Polars
|
|
968
1112
|
# # │ r │
|
969
1113
|
# # └─────┘
|
970
1114
|
def explode
|
971
|
-
Utils.wrap_expr(_rbexpr.
|
1115
|
+
Utils.wrap_expr(_rbexpr.str_explode)
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
# Convert an Utf8 column into an Int64 column with base radix.
|
1119
|
+
#
|
1120
|
+
# @param base [Integer]
|
1121
|
+
# Positive integer which is the base of the string we are parsing.
|
1122
|
+
# Default: 10.
|
1123
|
+
# @param strict [Boolean]
|
1124
|
+
# Bool, default=true will raise any ParseError or overflow as ComputeError.
|
1125
|
+
# false silently convert to Null.
|
1126
|
+
#
|
1127
|
+
# @return [Expr]
|
1128
|
+
#
|
1129
|
+
# @example
|
1130
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1131
|
+
# df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
|
1132
|
+
# # =>
|
1133
|
+
# # shape: (4, 2)
|
1134
|
+
# # ┌─────────┬────────┐
|
1135
|
+
# # │ bin ┆ parsed │
|
1136
|
+
# # │ --- ┆ --- │
|
1137
|
+
# # │ str ┆ i64 │
|
1138
|
+
# # ╞═════════╪════════╡
|
1139
|
+
# # │ 110 ┆ 6 │
|
1140
|
+
# # │ 101 ┆ 5 │
|
1141
|
+
# # │ 010 ┆ 2 │
|
1142
|
+
# # │ invalid ┆ null │
|
1143
|
+
# # └─────────┴────────┘
|
1144
|
+
#
|
1145
|
+
# @example
|
1146
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1147
|
+
# df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
|
1148
|
+
# # =>
|
1149
|
+
# # shape: (4, 2)
|
1150
|
+
# # ┌──────┬────────┐
|
1151
|
+
# # │ hex ┆ parsed │
|
1152
|
+
# # │ --- ┆ --- │
|
1153
|
+
# # │ str ┆ i64 │
|
1154
|
+
# # ╞══════╪════════╡
|
1155
|
+
# # │ fa1e ┆ 64030 │
|
1156
|
+
# # │ ff00 ┆ 65280 │
|
1157
|
+
# # │ cafe ┆ 51966 │
|
1158
|
+
# # │ null ┆ null │
|
1159
|
+
# # └──────┴────────┘
|
1160
|
+
def to_integer(base: 10, strict: true)
|
1161
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
972
1162
|
end
|
973
1163
|
|
974
1164
|
# Parse integers with base radix from strings.
|
@@ -999,24 +1189,14 @@ module Polars
|
|
999
1189
|
# # │ 2 │
|
1000
1190
|
# # │ null │
|
1001
1191
|
# # └──────┘
|
1002
|
-
#
|
1003
|
-
# @example
|
1004
|
-
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
-
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
-
# # =>
|
1007
|
-
# # shape: (4, 1)
|
1008
|
-
# # ┌───────┐
|
1009
|
-
# # │ hex │
|
1010
|
-
# # │ --- │
|
1011
|
-
# # │ i32 │
|
1012
|
-
# # ╞═══════╡
|
1013
|
-
# # │ 64030 │
|
1014
|
-
# # │ 65280 │
|
1015
|
-
# # │ 51966 │
|
1016
|
-
# # │ null │
|
1017
|
-
# # └───────┘
|
1018
1192
|
def parse_int(radix = 2, strict: true)
|
1019
|
-
|
1193
|
+
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
private
|
1197
|
+
|
1198
|
+
def _validate_format_argument(format)
|
1199
|
+
# TODO
|
1020
1200
|
end
|
1021
1201
|
end
|
1022
1202
|
end
|