polars-df 0.5.0-arm64-darwin → 0.7.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,194 @@
1
+ module Polars
2
+ # Run SQL queries against DataFrame/LazyFrame data.
3
+ class SQLContext
4
+ # @private
5
+ attr_accessor :_ctxt, :_eager_execution
6
+
7
+ # Initialize a new `SQLContext`.
8
+ def initialize(frames = nil, eager_execution: false, **named_frames)
9
+ self._ctxt = RbSQLContext.new
10
+ self._eager_execution = eager_execution
11
+
12
+ frames = (frames || {}).to_h
13
+
14
+ if frames.any? || named_frames.any?
15
+ register_many(frames, **named_frames)
16
+ end
17
+ end
18
+
19
+ # Parse the given SQL query and execute it against the registered frame data.
20
+ #
21
+ # @param query [String]
22
+ # A valid string SQL query.
23
+ # @param eager [Boolean]
24
+ # Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
25
+ # If unset, the value of the init-time parameter "eager_execution" will be
26
+ # used. (Note that the query itself is always executed in lazy-mode; this
27
+ # parameter only impacts the type of the returned frame).
28
+ #
29
+ # @return [Object]
30
+ #
31
+ # @example Execute a SQL query against the registered frame data:
32
+ # df = Polars::DataFrame.new(
33
+ # [
34
+ # ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
35
+ # ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
36
+ # ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
37
+ # ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
38
+ # ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
39
+ # ],
40
+ # schema: ["title", "release_year", "budget", "gross", "imdb_score"]
41
+ # )
42
+ # ctx = Polars::SQLContext.new(films: df)
43
+ # ctx.execute(
44
+ # "
45
+ # SELECT title, release_year, imdb_score
46
+ # FROM films
47
+ # WHERE release_year > 1990
48
+ # ORDER BY imdb_score DESC
49
+ # ",
50
+ # eager: true
51
+ # )
52
+ # # =>
53
+ # # shape: (4, 3)
54
+ # # ┌──────────────────────────┬──────────────┬────────────┐
55
+ # # │ title ┆ release_year ┆ imdb_score │
56
+ # # │ --- ┆ --- ┆ --- │
57
+ # # │ str ┆ i64 ┆ f64 │
58
+ # # ╞══════════════════════════╪══════════════╪════════════╡
59
+ # # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
60
+ # # │ The Dark Knight ┆ 2008 ┆ 9.0 │
61
+ # # │ Schindler's List ┆ 1993 ┆ 8.9 │
62
+ # # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
63
+ # # └──────────────────────────┴──────────────┴────────────┘
64
+ #
65
+ # @example Execute a GROUP BY query:
66
+ # ctx.execute(
67
+ # "
68
+ # SELECT
69
+ # MAX(release_year / 10) * 10 AS decade,
70
+ # SUM(gross) AS total_gross,
71
+ # COUNT(title) AS n_films,
72
+ # FROM films
73
+ # GROUP BY (release_year / 10) -- decade
74
+ # ORDER BY total_gross DESC
75
+ # ",
76
+ # eager: true
77
+ # )
78
+ # # =>
79
+ # # shape: (3, 3)
80
+ # # ┌────────┬─────────────┬─────────┐
81
+ # # │ decade ┆ total_gross ┆ n_films │
82
+ # # │ --- ┆ --- ┆ --- │
83
+ # # │ i64 ┆ i64 ┆ u32 │
84
+ # # ╞════════╪═════════════╪═════════╡
85
+ # # │ 2000 ┆ 533316061 ┆ 1 │
86
+ # # │ 1990 ┆ 232338648 ┆ 3 │
87
+ # # │ 1970 ┆ 134821952 ┆ 1 │
88
+ # # └────────┴─────────────┴─────────┘
89
+ def execute(query, eager: nil)
90
+ res = Utils.wrap_ldf(_ctxt.execute(query))
91
+ eager || _eager_execution ? res.collect : res
92
+ end
93
+
94
+ # Register a single frame as a table, using the given name.
95
+ #
96
+ # @param name [String]
97
+ # Name of the table.
98
+ # @param frame [Object]
99
+ # eager/lazy frame to associate with this table name.
100
+ #
101
+ # @return [SQLContext]
102
+ #
103
+ # @example
104
+ # df = Polars::DataFrame.new({"hello" => ["world"]})
105
+ # ctx = Polars::SQLContext.new
106
+ # ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
107
+ # # =>
108
+ # # shape: (1, 1)
109
+ # # ┌───────┐
110
+ # # │ hello │
111
+ # # │ --- │
112
+ # # │ str │
113
+ # # ╞═══════╡
114
+ # # │ world │
115
+ # # └───────┘
116
+ def register(name, frame)
117
+ if frame.is_a?(DataFrame)
118
+ frame = frame.lazy
119
+ end
120
+ _ctxt.register(name.to_s, frame._ldf)
121
+ self
122
+ end
123
+
124
+ # Register multiple eager/lazy frames as tables, using the associated names.
125
+ #
126
+ # @param frames [Hash]
127
+ # A `{name:frame, ...}` mapping.
128
+ # @param named_frames [Object]
129
+ # Named eager/lazy frames, provided as kwargs.
130
+ #
131
+ # @return [SQLContext]
132
+ def register_many(frames, **named_frames)
133
+ frames = (frames || {}).to_h
134
+ frames = frames.merge(named_frames)
135
+ frames.each do |name, frame|
136
+ register(name, frame)
137
+ end
138
+ self
139
+ end
140
+
141
+ # Unregister one or more eager/lazy frames by name.
142
+ #
143
+ # @param names [Object]
144
+ # Names of the tables to unregister.
145
+ #
146
+ # @return [SQLContext]
147
+ #
148
+ # @example Register with a SQLContext object:
149
+ # df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
150
+ # lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
151
+ # lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
152
+ # ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
153
+ # ctx.tables
154
+ # # => ["test1", "test2", "test3"]
155
+ #
156
+ # @example Unregister one or more of the tables:
157
+ # ctx.unregister(["test1", "test3"]).tables
158
+ # # => ["test2"]
159
+ def unregister(names)
160
+ if names.is_a?(String)
161
+ names = [names]
162
+ end
163
+ names.each do |nm|
164
+ _ctxt.unregister(nm)
165
+ end
166
+ self
167
+ end
168
+
169
+ # Return a list of the registered table names.
170
+ #
171
+ # @return [Array]
172
+ #
173
+ # @example Executing as SQL:
174
+ # frame_data = Polars::DataFrame.new({"hello" => ["world"]})
175
+ # ctx = Polars::SQLContext.new(hello_world: frame_data)
176
+ # ctx.execute("SHOW TABLES", eager: true)
177
+ # # =>
178
+ # # shape: (1, 1)
179
+ # # ┌─────────────┐
180
+ # # │ name │
181
+ # # │ --- │
182
+ # # │ str │
183
+ # # ╞═════════════╡
184
+ # # │ hello_world │
185
+ # # └─────────────┘
186
+ #
187
+ # @example Calling the method:
188
+ # ctx.tables
189
+ # # => ["hello_world"]
190
+ def tables
191
+ _ctxt.get_tables.sort
192
+ end
193
+ end
194
+ end
@@ -9,11 +9,134 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Convert a Utf8 column into a Date column.
13
+ #
14
+ # @param format [String]
15
+ # Format to use for conversion. Refer to the
16
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
17
+ # for the full specification. Example: `"%Y-%m-%d"`.
18
+ # If set to nil (default), the format is inferred from the data.
19
+ # @param strict [Boolean]
20
+ # Raise an error if any conversion fails.
21
+ # @param exact [Boolean]
22
+ # Require an exact format match. If false, allow the format to match anywhere
23
+ # in the target string.
24
+ # @param cache [Boolean]
25
+ # Use a cache of unique, converted dates to apply the conversion.
26
+ #
27
+ # @return [Expr]
28
+ #
29
+ # @example
30
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
31
+ # s.str.to_date
32
+ # # =>
33
+ # # shape: (3,)
34
+ # # Series: '' [date]
35
+ # # [
36
+ # # 2020-01-01
37
+ # # 2020-02-01
38
+ # # 2020-03-01
39
+ # # ]
40
+ def to_date(format = nil, strict: true, exact: true, cache: true)
41
+ _validate_format_argument(format)
42
+ Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Expr]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true,
86
+ use_earliest: nil,
87
+ ambiguous: "raise"
88
+ )
89
+ _validate_format_argument(format)
90
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
91
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
92
+ Utils.wrap_expr(
93
+ self._rbexpr.str_to_datetime(
94
+ format,
95
+ time_unit,
96
+ time_zone,
97
+ strict,
98
+ exact,
99
+ cache,
100
+ ambiguous._rbexpr
101
+ )
102
+ )
103
+ end
104
+
105
+ # Convert a Utf8 column into a Time column.
106
+ #
107
+ # @param format [String]
108
+ # Format to use for conversion. Refer to the
109
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
110
+ # for the full specification. Example: `"%H:%M:%S"`.
111
+ # If set to nil (default), the format is inferred from the data.
112
+ # @param strict [Boolean]
113
+ # Raise an error if any conversion fails.
114
+ # @param cache [Boolean]
115
+ # Use a cache of unique, converted times to apply the conversion.
116
+ #
117
+ # @return [Expr]
118
+ #
119
+ # @example
120
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
121
+ # s.str.to_time("%H:%M")
122
+ # # =>
123
+ # # shape: (3,)
124
+ # # Series: '' [time]
125
+ # # [
126
+ # # 01:00:00
127
+ # # 02:00:00
128
+ # # 03:00:00
129
+ # # ]
130
+ def to_time(format = nil, strict: true, cache: true)
131
+ _validate_format_argument(format)
132
+ Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
133
+ end
134
+
12
135
  # Parse a Utf8 expression to a Date/Datetime/Time type.
13
136
  #
14
137
  # @param dtype [Object]
15
138
  # The data type to convert into. Can be either Date, Datetime, or Time.
16
- # @param fmt [String]
139
+ # @param format [String]
17
140
  # Format to use, refer to the
18
141
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
19
142
  # for specification. Example: `"%y-%m-%d"`.
@@ -38,10 +161,10 @@ module Polars
38
161
  # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
39
162
  # # =>
40
163
  # # shape: (2,)
41
- # # Series: '' [datetime[μs, +00:00]]
164
+ # # Series: '' [datetime[μs, UTC]]
42
165
  # # [
43
- # # 2020-01-01 01:00:00 +00:00
44
- # # 2020-01-01 02:00:00 +00:00
166
+ # # 2020-01-01 01:00:00 UTC
167
+ # # 2020-01-01 02:00:00 UTC
45
168
  # # ]
46
169
  #
47
170
  # @example Dealing with different formats.
@@ -71,16 +194,18 @@ module Polars
71
194
  # # 2022-01-31
72
195
  # # 2001-07-08
73
196
  # # ]
74
- def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
197
+ def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
198
+ _validate_format_argument(format)
199
+
75
200
  if dtype == Date
76
- Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
201
+ to_date(format, strict: strict, exact: exact, cache: cache)
77
202
  elsif dtype == Datetime || dtype.is_a?(Datetime)
78
203
  dtype = Datetime.new if dtype == Datetime
79
204
  time_unit = dtype.time_unit
80
205
  time_zone = dtype.time_zone
81
- Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
206
+ to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
82
207
  elsif dtype == Time
83
- Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
208
+ to_time(format, strict: strict, cache: cache)
84
209
  else
85
210
  raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
86
211
  end
@@ -115,7 +240,7 @@ module Polars
115
240
  # # │ 東京 ┆ 6 ┆ 2 │
116
241
  # # └──────┴────────┴────────┘
117
242
  def lengths
118
- Utils.wrap_expr(_rbexpr.str_lengths)
243
+ Utils.wrap_expr(_rbexpr.str_len_bytes)
119
244
  end
120
245
 
121
246
  # Get length of the strings as `:u32` (as number of chars).
@@ -147,13 +272,15 @@ module Polars
147
272
  # # │ 東京 ┆ 6 ┆ 2 │
148
273
  # # └──────┴────────┴────────┘
149
274
  def n_chars
150
- Utils.wrap_expr(_rbexpr.str_n_chars)
275
+ Utils.wrap_expr(_rbexpr.str_len_chars)
151
276
  end
152
277
 
153
278
  # Vertically concat the values in the Series to a single string value.
154
279
  #
155
280
  # @param delimiter [String]
156
281
  # The delimiter to insert between consecutive string values.
282
+ # @param ignore_nulls [Boolean]
283
+ # Ignore null values (default).
157
284
  #
158
285
  # @return [Expr]
159
286
  #
@@ -162,15 +289,28 @@ module Polars
162
289
  # df.select(Polars.col("foo").str.concat("-"))
163
290
  # # =>
164
291
  # # shape: (1, 1)
165
- # # ┌──────────┐
166
- # # │ foo
167
- # # │ ---
168
- # # │ str
169
- # # ╞══════════╡
170
- # # │ 1-null-2 │
171
- # # └──────────┘
172
- def concat(delimiter = "-")
173
- Utils.wrap_expr(_rbexpr.str_concat(delimiter))
292
+ # # ┌─────┐
293
+ # # │ foo
294
+ # # │ ---
295
+ # # │ str
296
+ # # ╞═════╡
297
+ # # │ 1-2 │
298
+ # # └─────┘
299
+ #
300
+ # @example
301
+ # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
302
+ # df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
303
+ # # =>
304
+ # # shape: (1, 1)
305
+ # # ┌──────┐
306
+ # # │ foo │
307
+ # # │ --- │
308
+ # # │ str │
309
+ # # ╞══════╡
310
+ # # │ null │
311
+ # # └──────┘
312
+ def concat(delimiter = "-", ignore_nulls: true)
313
+ Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
174
314
  end
175
315
 
176
316
  # Transform to uppercase variant.
@@ -217,7 +357,7 @@ module Polars
217
357
 
218
358
  # Remove leading and trailing whitespace.
219
359
  #
220
- # @param matches [String, nil]
360
+ # @param characters [String, nil]
221
361
  # An optional single character that should be trimmed.
222
362
  #
223
363
  # @return [Expr]
@@ -236,16 +376,15 @@ module Polars
236
376
  # # │ trail │
237
377
  # # │ both │
238
378
  # # └───────┘
239
- def strip(matches = nil)
240
- if !matches.nil? && matches.length > 1
241
- raise ArgumentError, "matches should contain a single character"
242
- end
243
- Utils.wrap_expr(_rbexpr.str_strip(matches))
379
+ def strip_chars(characters = nil)
380
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
381
+ Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
244
382
  end
383
+ alias_method :strip, :strip_chars
245
384
 
246
385
  # Remove leading whitespace.
247
386
  #
248
- # @param matches [String, nil]
387
+ # @param characters [String, nil]
249
388
  # An optional single character that should be trimmed.
250
389
  #
251
390
  # @return [Expr]
@@ -264,16 +403,15 @@ module Polars
264
403
  # # │ trail │
265
404
  # # │ both │
266
405
  # # └────────┘
267
- def lstrip(matches = nil)
268
- if !matches.nil? && matches.length > 1
269
- raise ArgumentError, "matches should contain a single character"
270
- end
271
- Utils.wrap_expr(_rbexpr.str_lstrip(matches))
406
+ def strip_chars_start(characters = nil)
407
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
408
+ Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
272
409
  end
410
+ alias_method :lstrip, :strip_chars_start
273
411
 
274
412
  # Remove trailing whitespace.
275
413
  #
276
- # @param matches [String, nil]
414
+ # @param characters [String, nil]
277
415
  # An optional single character that should be trimmed.
278
416
  #
279
417
  # @return [Expr]
@@ -292,12 +430,11 @@ module Polars
292
430
  # # │ trail │
293
431
  # # │ both │
294
432
  # # └───────┘
295
- def rstrip(matches = nil)
296
- if !matches.nil? && matches.length > 1
297
- raise ArgumentError, "matches should contain a single character"
298
- end
299
- Utils.wrap_expr(_rbexpr.str_rstrip(matches))
433
+ def strip_chars_end(characters = nil)
434
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
435
+ Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
300
436
  end
437
+ alias_method :rstrip, :strip_chars_end
301
438
 
302
439
  # Fills the string with zeroes.
303
440
  #
@@ -341,13 +478,13 @@ module Polars
341
478
  Utils.wrap_expr(_rbexpr.str_zfill(alignment))
342
479
  end
343
480
 
344
- # Return the string left justified in a string of length `width`.
481
+ # Return the string left justified in a string of length `length`.
345
482
  #
346
483
  # Padding is done using the specified `fillchar`.
347
- # The original string is returned if `width` is less than or equal to
484
+ # The original string is returned if `length` is less than or equal to
348
485
  # `s.length`.
349
486
  #
350
- # @param width [Integer]
487
+ # @param length [Integer]
351
488
  # Justify left to this length.
352
489
  # @param fillchar [String]
353
490
  # Fill with this ASCII character.
@@ -369,17 +506,18 @@ module Polars
369
506
  # # │ null │
370
507
  # # │ hippopotamus │
371
508
  # # └──────────────┘
372
- def ljust(width, fillchar = " ")
373
- Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
509
+ def ljust(length, fillchar = " ")
510
+ Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
374
511
  end
512
+ alias_method :pad_end, :ljust
375
513
 
376
- # Return the string right justified in a string of length `width`.
514
+ # Return the string right justified in a string of length `length`.
377
515
  #
378
516
  # Padding is done using the specified `fillchar`.
379
- # The original string is returned if `width` is less than or equal to
517
+ # The original string is returned if `length` is less than or equal to
380
518
  # `s.length`.
381
519
  #
382
- # @param width [Integer]
520
+ # @param length [Integer]
383
521
  # Justify right to this length.
384
522
  # @param fillchar [String]
385
523
  # Fill with this ASCII character.
@@ -401,9 +539,10 @@ module Polars
401
539
  # # │ null │
402
540
  # # │ hippopotamus │
403
541
  # # └──────────────┘
404
- def rjust(width, fillchar = " ")
405
- Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
542
+ def rjust(length, fillchar = " ")
543
+ Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
406
544
  end
545
+ alias_method :pad_start, :rjust
407
546
 
408
547
  # Check if string contains a substring that matches a regex.
409
548
  #
@@ -547,11 +686,11 @@ module Polars
547
686
  # # │ {null,null} │
548
687
  # # │ {2,false} │
549
688
  # # └─────────────┘
550
- def json_extract(dtype = nil)
689
+ def json_extract(dtype = nil, infer_schema_length: 100)
551
690
  if !dtype.nil?
552
691
  dtype = Utils.rb_type_to_dtype(dtype)
553
692
  end
554
- Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
693
+ Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
555
694
  end
556
695
 
557
696
  # Extract the first match of json string with provided JSONPath expression.
@@ -744,9 +883,11 @@ module Polars
744
883
  # # │ 5 │
745
884
  # # │ 6 │
746
885
  # # └──────────────┘
747
- def count_match(pattern)
748
- Utils.wrap_expr(_rbexpr.count_match(pattern))
886
+ def count_matches(pattern, literal: false)
887
+ pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
888
+ Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
749
889
  end
890
+ alias_method :count_match, :count_matches
750
891
 
751
892
  # Split the string by a substring.
752
893
  #
@@ -772,6 +913,7 @@ module Polars
772
913
  # # │ ["foo", "bar", "baz"] │
773
914
  # # └───────────────────────┘
774
915
  def split(by, inclusive: false)
916
+ by = Utils.parse_as_expression(by, str_as_lit: true)
775
917
  if inclusive
776
918
  Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
777
919
  else
@@ -814,6 +956,7 @@ module Polars
814
956
  # # │ {"d","4"} │
815
957
  # # └─────────────┘
816
958
  def split_exact(by, n, inclusive: false)
959
+ by = Utils.parse_as_expression(by, str_as_lit: true)
817
960
  if inclusive
818
961
  Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
819
962
  else
@@ -850,6 +993,7 @@ module Polars
850
993
  # # │ {"foo","bar baz"} │
851
994
  # # └───────────────────┘
852
995
  def splitn(by, n)
996
+ by = Utils.parse_as_expression(by, str_as_lit: true)
853
997
  Utils.wrap_expr(_rbexpr.str_splitn(by, n))
854
998
  end
855
999
 
@@ -968,7 +1112,53 @@ module Polars
968
1112
  # # │ r │
969
1113
  # # └─────┘
970
1114
  def explode
971
- Utils.wrap_expr(_rbexpr.explode)
1115
+ Utils.wrap_expr(_rbexpr.str_explode)
1116
+ end
1117
+
1118
+ # Convert an Utf8 column into an Int64 column with base radix.
1119
+ #
1120
+ # @param base [Integer]
1121
+ # Positive integer which is the base of the string we are parsing.
1122
+ # Default: 10.
1123
+ # @param strict [Boolean]
1124
+ # Bool, default=true will raise any ParseError or overflow as ComputeError.
1125
+ # false silently convert to Null.
1126
+ #
1127
+ # @return [Expr]
1128
+ #
1129
+ # @example
1130
+ # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
1131
+ # df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
1132
+ # # =>
1133
+ # # shape: (4, 2)
1134
+ # # ┌─────────┬────────┐
1135
+ # # │ bin ┆ parsed │
1136
+ # # │ --- ┆ --- │
1137
+ # # │ str ┆ i64 │
1138
+ # # ╞═════════╪════════╡
1139
+ # # │ 110 ┆ 6 │
1140
+ # # │ 101 ┆ 5 │
1141
+ # # │ 010 ┆ 2 │
1142
+ # # │ invalid ┆ null │
1143
+ # # └─────────┴────────┘
1144
+ #
1145
+ # @example
1146
+ # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1147
+ # df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
1148
+ # # =>
1149
+ # # shape: (4, 2)
1150
+ # # ┌──────┬────────┐
1151
+ # # │ hex ┆ parsed │
1152
+ # # │ --- ┆ --- │
1153
+ # # │ str ┆ i64 │
1154
+ # # ╞══════╪════════╡
1155
+ # # │ fa1e ┆ 64030 │
1156
+ # # │ ff00 ┆ 65280 │
1157
+ # # │ cafe ┆ 51966 │
1158
+ # # │ null ┆ null │
1159
+ # # └──────┴────────┘
1160
+ def to_integer(base: 10, strict: true)
1161
+ Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
972
1162
  end
973
1163
 
974
1164
  # Parse integers with base radix from strings.
@@ -999,24 +1189,14 @@ module Polars
999
1189
  # # │ 2 │
1000
1190
  # # │ null │
1001
1191
  # # └──────┘
1002
- #
1003
- # @example
1004
- # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1005
- # df.select(Polars.col("hex").str.parse_int(16, strict: true))
1006
- # # =>
1007
- # # shape: (4, 1)
1008
- # # ┌───────┐
1009
- # # │ hex │
1010
- # # │ --- │
1011
- # # │ i32 │
1012
- # # ╞═══════╡
1013
- # # │ 64030 │
1014
- # # │ 65280 │
1015
- # # │ 51966 │
1016
- # # │ null │
1017
- # # └───────┘
1018
1192
  def parse_int(radix = 2, strict: true)
1019
- Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1193
+ to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
1194
+ end
1195
+
1196
+ private
1197
+
1198
+ def _validate_format_argument(format)
1199
+ # TODO
1020
1200
  end
1021
1201
  end
1022
1202
  end