polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(::String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -82,9 +82,13 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil,
|
87
|
+
ambiguous: "raise"
|
86
88
|
)
|
87
89
|
_validate_format_argument(format)
|
90
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
91
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
88
92
|
Utils.wrap_expr(
|
89
93
|
self._rbexpr.str_to_datetime(
|
90
94
|
format,
|
@@ -92,7 +96,8 @@ module Polars
|
|
92
96
|
time_zone,
|
93
97
|
strict,
|
94
98
|
exact,
|
95
|
-
cache
|
99
|
+
cache,
|
100
|
+
ambiguous._rbexpr
|
96
101
|
)
|
97
102
|
)
|
98
103
|
end
|
@@ -235,7 +240,7 @@ module Polars
|
|
235
240
|
# # │ 東京 ┆ 6 ┆ 2 │
|
236
241
|
# # └──────┴────────┴────────┘
|
237
242
|
def lengths
|
238
|
-
Utils.wrap_expr(_rbexpr.
|
243
|
+
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
239
244
|
end
|
240
245
|
|
241
246
|
# Get length of the strings as `:u32` (as number of chars).
|
@@ -267,13 +272,15 @@ module Polars
|
|
267
272
|
# # │ 東京 ┆ 6 ┆ 2 │
|
268
273
|
# # └──────┴────────┴────────┘
|
269
274
|
def n_chars
|
270
|
-
Utils.wrap_expr(_rbexpr.
|
275
|
+
Utils.wrap_expr(_rbexpr.str_len_chars)
|
271
276
|
end
|
272
277
|
|
273
278
|
# Vertically concat the values in the Series to a single string value.
|
274
279
|
#
|
275
280
|
# @param delimiter [String]
|
276
281
|
# The delimiter to insert between consecutive string values.
|
282
|
+
# @param ignore_nulls [Boolean]
|
283
|
+
# Ignore null values (default).
|
277
284
|
#
|
278
285
|
# @return [Expr]
|
279
286
|
#
|
@@ -282,15 +289,28 @@ module Polars
|
|
282
289
|
# df.select(Polars.col("foo").str.concat("-"))
|
283
290
|
# # =>
|
284
291
|
# # shape: (1, 1)
|
285
|
-
# #
|
286
|
-
# # │ foo
|
287
|
-
# # │ ---
|
288
|
-
# # │ str
|
289
|
-
# #
|
290
|
-
# # │ 1-
|
291
|
-
# #
|
292
|
-
|
293
|
-
|
292
|
+
# # ┌─────┐
|
293
|
+
# # │ foo │
|
294
|
+
# # │ --- │
|
295
|
+
# # │ str │
|
296
|
+
# # ╞═════╡
|
297
|
+
# # │ 1-2 │
|
298
|
+
# # └─────┘
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
302
|
+
# df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 1)
|
305
|
+
# # ┌──────┐
|
306
|
+
# # │ foo │
|
307
|
+
# # │ --- │
|
308
|
+
# # │ str │
|
309
|
+
# # ╞══════╡
|
310
|
+
# # │ null │
|
311
|
+
# # └──────┘
|
312
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
313
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
|
294
314
|
end
|
295
315
|
|
296
316
|
# Transform to uppercase variant.
|
@@ -337,7 +357,7 @@ module Polars
|
|
337
357
|
|
338
358
|
# Remove leading and trailing whitespace.
|
339
359
|
#
|
340
|
-
# @param
|
360
|
+
# @param characters [String, nil]
|
341
361
|
# An optional single character that should be trimmed.
|
342
362
|
#
|
343
363
|
# @return [Expr]
|
@@ -356,16 +376,15 @@ module Polars
|
|
356
376
|
# # │ trail │
|
357
377
|
# # │ both │
|
358
378
|
# # └───────┘
|
359
|
-
def
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
379
|
+
def strip_chars(characters = nil)
|
380
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
381
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
364
382
|
end
|
383
|
+
alias_method :strip, :strip_chars
|
365
384
|
|
366
385
|
# Remove leading whitespace.
|
367
386
|
#
|
368
|
-
# @param
|
387
|
+
# @param characters [String, nil]
|
369
388
|
# An optional single character that should be trimmed.
|
370
389
|
#
|
371
390
|
# @return [Expr]
|
@@ -384,16 +403,15 @@ module Polars
|
|
384
403
|
# # │ trail │
|
385
404
|
# # │ both │
|
386
405
|
# # └────────┘
|
387
|
-
def
|
388
|
-
|
389
|
-
|
390
|
-
end
|
391
|
-
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
406
|
+
def strip_chars_start(characters = nil)
|
407
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
408
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
392
409
|
end
|
410
|
+
alias_method :lstrip, :strip_chars_start
|
393
411
|
|
394
412
|
# Remove trailing whitespace.
|
395
413
|
#
|
396
|
-
# @param
|
414
|
+
# @param characters [String, nil]
|
397
415
|
# An optional single character that should be trimmed.
|
398
416
|
#
|
399
417
|
# @return [Expr]
|
@@ -412,12 +430,11 @@ module Polars
|
|
412
430
|
# # │ trail │
|
413
431
|
# # │ both │
|
414
432
|
# # └───────┘
|
415
|
-
def
|
416
|
-
|
417
|
-
|
418
|
-
end
|
419
|
-
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
433
|
+
def strip_chars_end(characters = nil)
|
434
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
435
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
420
436
|
end
|
437
|
+
alias_method :rstrip, :strip_chars_end
|
421
438
|
|
422
439
|
# Fills the string with zeroes.
|
423
440
|
#
|
@@ -461,13 +478,13 @@ module Polars
|
|
461
478
|
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
462
479
|
end
|
463
480
|
|
464
|
-
# Return the string left justified in a string of length `
|
481
|
+
# Return the string left justified in a string of length `length`.
|
465
482
|
#
|
466
483
|
# Padding is done using the specified `fillchar`.
|
467
|
-
# The original string is returned if `
|
484
|
+
# The original string is returned if `length` is less than or equal to
|
468
485
|
# `s.length`.
|
469
486
|
#
|
470
|
-
# @param
|
487
|
+
# @param length [Integer]
|
471
488
|
# Justify left to this length.
|
472
489
|
# @param fillchar [String]
|
473
490
|
# Fill with this ASCII character.
|
@@ -489,17 +506,18 @@ module Polars
|
|
489
506
|
# # │ null │
|
490
507
|
# # │ hippopotamus │
|
491
508
|
# # └──────────────┘
|
492
|
-
def ljust(
|
493
|
-
Utils.wrap_expr(_rbexpr.
|
509
|
+
def ljust(length, fillchar = " ")
|
510
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
|
494
511
|
end
|
512
|
+
alias_method :pad_end, :ljust
|
495
513
|
|
496
|
-
# Return the string right justified in a string of length `
|
514
|
+
# Return the string right justified in a string of length `length`.
|
497
515
|
#
|
498
516
|
# Padding is done using the specified `fillchar`.
|
499
|
-
# The original string is returned if `
|
517
|
+
# The original string is returned if `length` is less than or equal to
|
500
518
|
# `s.length`.
|
501
519
|
#
|
502
|
-
# @param
|
520
|
+
# @param length [Integer]
|
503
521
|
# Justify right to this length.
|
504
522
|
# @param fillchar [String]
|
505
523
|
# Fill with this ASCII character.
|
@@ -521,9 +539,10 @@ module Polars
|
|
521
539
|
# # │ null │
|
522
540
|
# # │ hippopotamus │
|
523
541
|
# # └──────────────┘
|
524
|
-
def rjust(
|
525
|
-
Utils.wrap_expr(_rbexpr.
|
542
|
+
def rjust(length, fillchar = " ")
|
543
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
|
526
544
|
end
|
545
|
+
alias_method :pad_start, :rjust
|
527
546
|
|
528
547
|
# Check if string contains a substring that matches a regex.
|
529
548
|
#
|
@@ -864,9 +883,11 @@ module Polars
|
|
864
883
|
# # │ 5 │
|
865
884
|
# # │ 6 │
|
866
885
|
# # └──────────────┘
|
867
|
-
def
|
868
|
-
Utils.
|
886
|
+
def count_matches(pattern, literal: false)
|
887
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
888
|
+
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
869
889
|
end
|
890
|
+
alias_method :count_match, :count_matches
|
870
891
|
|
871
892
|
# Split the string by a substring.
|
872
893
|
#
|
@@ -892,6 +913,7 @@ module Polars
|
|
892
913
|
# # │ ["foo", "bar", "baz"] │
|
893
914
|
# # └───────────────────────┘
|
894
915
|
def split(by, inclusive: false)
|
916
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
895
917
|
if inclusive
|
896
918
|
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
897
919
|
else
|
@@ -934,6 +956,7 @@ module Polars
|
|
934
956
|
# # │ {"d","4"} │
|
935
957
|
# # └─────────────┘
|
936
958
|
def split_exact(by, n, inclusive: false)
|
959
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
937
960
|
if inclusive
|
938
961
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
939
962
|
else
|
@@ -970,6 +993,7 @@ module Polars
|
|
970
993
|
# # │ {"foo","bar baz"} │
|
971
994
|
# # └───────────────────┘
|
972
995
|
def splitn(by, n)
|
996
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
973
997
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
974
998
|
end
|
975
999
|
|
@@ -1091,6 +1115,52 @@ module Polars
|
|
1091
1115
|
Utils.wrap_expr(_rbexpr.str_explode)
|
1092
1116
|
end
|
1093
1117
|
|
1118
|
+
# Convert an Utf8 column into an Int64 column with base radix.
|
1119
|
+
#
|
1120
|
+
# @param base [Integer]
|
1121
|
+
# Positive integer which is the base of the string we are parsing.
|
1122
|
+
# Default: 10.
|
1123
|
+
# @param strict [Boolean]
|
1124
|
+
# Bool, default=true will raise any ParseError or overflow as ComputeError.
|
1125
|
+
# false silently convert to Null.
|
1126
|
+
#
|
1127
|
+
# @return [Expr]
|
1128
|
+
#
|
1129
|
+
# @example
|
1130
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1131
|
+
# df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
|
1132
|
+
# # =>
|
1133
|
+
# # shape: (4, 2)
|
1134
|
+
# # ┌─────────┬────────┐
|
1135
|
+
# # │ bin ┆ parsed │
|
1136
|
+
# # │ --- ┆ --- │
|
1137
|
+
# # │ str ┆ i64 │
|
1138
|
+
# # ╞═════════╪════════╡
|
1139
|
+
# # │ 110 ┆ 6 │
|
1140
|
+
# # │ 101 ┆ 5 │
|
1141
|
+
# # │ 010 ┆ 2 │
|
1142
|
+
# # │ invalid ┆ null │
|
1143
|
+
# # └─────────┴────────┘
|
1144
|
+
#
|
1145
|
+
# @example
|
1146
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1147
|
+
# df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
|
1148
|
+
# # =>
|
1149
|
+
# # shape: (4, 2)
|
1150
|
+
# # ┌──────┬────────┐
|
1151
|
+
# # │ hex ┆ parsed │
|
1152
|
+
# # │ --- ┆ --- │
|
1153
|
+
# # │ str ┆ i64 │
|
1154
|
+
# # ╞══════╪════════╡
|
1155
|
+
# # │ fa1e ┆ 64030 │
|
1156
|
+
# # │ ff00 ┆ 65280 │
|
1157
|
+
# # │ cafe ┆ 51966 │
|
1158
|
+
# # │ null ┆ null │
|
1159
|
+
# # └──────┴────────┘
|
1160
|
+
def to_integer(base: 10, strict: true)
|
1161
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1162
|
+
end
|
1163
|
+
|
1094
1164
|
# Parse integers with base radix from strings.
|
1095
1165
|
#
|
1096
1166
|
# By default base 2. ParseError/Overflows become Nulls.
|
@@ -1119,24 +1189,8 @@ module Polars
|
|
1119
1189
|
# # │ 2 │
|
1120
1190
|
# # │ null │
|
1121
1191
|
# # └──────┘
|
1122
|
-
#
|
1123
|
-
# @example
|
1124
|
-
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1125
|
-
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1126
|
-
# # =>
|
1127
|
-
# # shape: (4, 1)
|
1128
|
-
# # ┌───────┐
|
1129
|
-
# # │ hex │
|
1130
|
-
# # │ --- │
|
1131
|
-
# # │ i32 │
|
1132
|
-
# # ╞═══════╡
|
1133
|
-
# # │ 64030 │
|
1134
|
-
# # │ 65280 │
|
1135
|
-
# # │ 51966 │
|
1136
|
-
# # │ null │
|
1137
|
-
# # └───────┘
|
1138
1192
|
def parse_int(radix = 2, strict: true)
|
1139
|
-
|
1193
|
+
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1140
1194
|
end
|
1141
1195
|
|
1142
1196
|
private
|
@@ -82,7 +82,8 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil
|
86
87
|
)
|
87
88
|
super
|
88
89
|
end
|
@@ -232,9 +233,23 @@ module Polars
|
|
232
233
|
# @return [Series]
|
233
234
|
#
|
234
235
|
# @example
|
235
|
-
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
236
|
-
# # =>
|
237
|
-
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
237
|
+
# # =>
|
238
|
+
# # shape: (1,)
|
239
|
+
# # Series: '' [str]
|
240
|
+
# # [
|
241
|
+
# # "1-2"
|
242
|
+
# # ]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
|
246
|
+
# # =>
|
247
|
+
# # shape: (1,)
|
248
|
+
# # Series: '' [str]
|
249
|
+
# # [
|
250
|
+
# # null
|
251
|
+
# # ]
|
252
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
238
253
|
super
|
239
254
|
end
|
240
255
|
|
data/lib/polars/struct_expr.rb
CHANGED
data/lib/polars/utils.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
if obj.is_a?(Range)
|
28
28
|
# size only works for numeric ranges
|
29
29
|
obj.to_a.length
|
30
|
-
elsif obj.is_a?(String)
|
30
|
+
elsif obj.is_a?(::String)
|
31
31
|
nil
|
32
32
|
else
|
33
33
|
obj.length
|
@@ -116,7 +116,7 @@ module Polars
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
119
|
+
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
120
|
exprs = [exprs]
|
121
121
|
end
|
122
122
|
|
@@ -124,9 +124,9 @@ module Polars
|
|
124
124
|
end
|
125
125
|
|
126
126
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
127
|
+
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
128
|
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
129
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
130
|
lit(expr)
|
131
131
|
elsif expr.is_a?(Expr)
|
132
132
|
expr
|
@@ -152,7 +152,7 @@ module Polars
|
|
152
152
|
if data_type == Unknown
|
153
153
|
return include_unknown
|
154
154
|
end
|
155
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
155
|
+
data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
156
156
|
end
|
157
157
|
|
158
158
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -160,7 +160,7 @@ module Polars
|
|
160
160
|
Float64
|
161
161
|
elsif ruby_dtype == Integer
|
162
162
|
Int64
|
163
|
-
elsif ruby_dtype == String
|
163
|
+
elsif ruby_dtype == ::String
|
164
164
|
Utf8
|
165
165
|
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
166
|
Boolean
|
@@ -211,7 +211,7 @@ module Polars
|
|
211
211
|
projection = nil
|
212
212
|
if columns
|
213
213
|
raise Todo
|
214
|
-
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
214
|
+
# if columns.is_a?(::String) || columns.is_a?(Symbol)
|
215
215
|
# columns = [columns]
|
216
216
|
# elsif is_int_sequence(columns)
|
217
217
|
# projection = columns.to_a
|
@@ -243,11 +243,11 @@ module Polars
|
|
243
243
|
end
|
244
244
|
|
245
245
|
def self.strlike?(value)
|
246
|
-
value.is_a?(String) || value.is_a?(Symbol)
|
246
|
+
value.is_a?(::String) || value.is_a?(Symbol)
|
247
247
|
end
|
248
248
|
|
249
249
|
def self.pathlike?(value)
|
250
|
-
value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
|
250
|
+
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
251
|
end
|
252
252
|
|
253
253
|
def self._is_iterable_of(val, eltype)
|
@@ -275,10 +275,10 @@ module Polars
|
|
275
275
|
end
|
276
276
|
|
277
277
|
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(String)
|
278
|
+
if allow_str == false && val.is_a?(::String)
|
279
279
|
false
|
280
280
|
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
282
|
end
|
283
283
|
end
|
284
284
|
|
@@ -289,10 +289,10 @@ module Polars
|
|
289
289
|
def self.parse_as_expression(input, str_as_lit: false, structify: false)
|
290
290
|
if input.is_a?(Expr)
|
291
291
|
expr = input
|
292
|
-
elsif input.is_a?(String) && !str_as_lit
|
292
|
+
elsif input.is_a?(::String) && !str_as_lit
|
293
293
|
expr = Polars.col(input)
|
294
294
|
structify = false
|
295
|
-
elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
295
|
+
elsif [Integer, Float, ::String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
296
296
|
expr = Polars.lit(input)
|
297
297
|
structify = false
|
298
298
|
elsif input.is_a?(Array)
|
@@ -308,5 +308,17 @@ module Polars
|
|
308
308
|
|
309
309
|
expr._rbexpr
|
310
310
|
end
|
311
|
+
|
312
|
+
USE_EARLIEST_TO_AMBIGUOUS = {
|
313
|
+
true => "earliest",
|
314
|
+
false => "latest"
|
315
|
+
}
|
316
|
+
|
317
|
+
def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
318
|
+
unless use_earliest.nil?
|
319
|
+
ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
|
320
|
+
end
|
321
|
+
ambiguous
|
322
|
+
end
|
311
323
|
end
|
312
324
|
end
|
data/lib/polars/version.rb
CHANGED