polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(::String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -82,9 +82,13 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil,
|
87
|
+
ambiguous: "raise"
|
86
88
|
)
|
87
89
|
_validate_format_argument(format)
|
90
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
91
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
88
92
|
Utils.wrap_expr(
|
89
93
|
self._rbexpr.str_to_datetime(
|
90
94
|
format,
|
@@ -92,7 +96,8 @@ module Polars
|
|
92
96
|
time_zone,
|
93
97
|
strict,
|
94
98
|
exact,
|
95
|
-
cache
|
99
|
+
cache,
|
100
|
+
ambiguous._rbexpr
|
96
101
|
)
|
97
102
|
)
|
98
103
|
end
|
@@ -235,7 +240,7 @@ module Polars
|
|
235
240
|
# # │ 東京 ┆ 6 ┆ 2 │
|
236
241
|
# # └──────┴────────┴────────┘
|
237
242
|
def lengths
|
238
|
-
Utils.wrap_expr(_rbexpr.
|
243
|
+
Utils.wrap_expr(_rbexpr.str_len_bytes)
|
239
244
|
end
|
240
245
|
|
241
246
|
# Get length of the strings as `:u32` (as number of chars).
|
@@ -267,13 +272,15 @@ module Polars
|
|
267
272
|
# # │ 東京 ┆ 6 ┆ 2 │
|
268
273
|
# # └──────┴────────┴────────┘
|
269
274
|
def n_chars
|
270
|
-
Utils.wrap_expr(_rbexpr.
|
275
|
+
Utils.wrap_expr(_rbexpr.str_len_chars)
|
271
276
|
end
|
272
277
|
|
273
278
|
# Vertically concat the values in the Series to a single string value.
|
274
279
|
#
|
275
280
|
# @param delimiter [String]
|
276
281
|
# The delimiter to insert between consecutive string values.
|
282
|
+
# @param ignore_nulls [Boolean]
|
283
|
+
# Ignore null values (default).
|
277
284
|
#
|
278
285
|
# @return [Expr]
|
279
286
|
#
|
@@ -282,15 +289,28 @@ module Polars
|
|
282
289
|
# df.select(Polars.col("foo").str.concat("-"))
|
283
290
|
# # =>
|
284
291
|
# # shape: (1, 1)
|
285
|
-
# #
|
286
|
-
# # │ foo
|
287
|
-
# # │ ---
|
288
|
-
# # │ str
|
289
|
-
# #
|
290
|
-
# # │ 1-
|
291
|
-
# #
|
292
|
-
|
293
|
-
|
292
|
+
# # ┌─────┐
|
293
|
+
# # │ foo │
|
294
|
+
# # │ --- │
|
295
|
+
# # │ str │
|
296
|
+
# # ╞═════╡
|
297
|
+
# # │ 1-2 │
|
298
|
+
# # └─────┘
|
299
|
+
#
|
300
|
+
# @example
|
301
|
+
# df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
|
302
|
+
# df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
|
303
|
+
# # =>
|
304
|
+
# # shape: (1, 1)
|
305
|
+
# # ┌──────┐
|
306
|
+
# # │ foo │
|
307
|
+
# # │ --- │
|
308
|
+
# # │ str │
|
309
|
+
# # ╞══════╡
|
310
|
+
# # │ null │
|
311
|
+
# # └──────┘
|
312
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
313
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
|
294
314
|
end
|
295
315
|
|
296
316
|
# Transform to uppercase variant.
|
@@ -337,7 +357,7 @@ module Polars
|
|
337
357
|
|
338
358
|
# Remove leading and trailing whitespace.
|
339
359
|
#
|
340
|
-
# @param
|
360
|
+
# @param characters [String, nil]
|
341
361
|
# An optional single character that should be trimmed.
|
342
362
|
#
|
343
363
|
# @return [Expr]
|
@@ -356,16 +376,15 @@ module Polars
|
|
356
376
|
# # │ trail │
|
357
377
|
# # │ both │
|
358
378
|
# # └───────┘
|
359
|
-
def
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
379
|
+
def strip_chars(characters = nil)
|
380
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
381
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
|
364
382
|
end
|
383
|
+
alias_method :strip, :strip_chars
|
365
384
|
|
366
385
|
# Remove leading whitespace.
|
367
386
|
#
|
368
|
-
# @param
|
387
|
+
# @param characters [String, nil]
|
369
388
|
# An optional single character that should be trimmed.
|
370
389
|
#
|
371
390
|
# @return [Expr]
|
@@ -384,16 +403,15 @@ module Polars
|
|
384
403
|
# # │ trail │
|
385
404
|
# # │ both │
|
386
405
|
# # └────────┘
|
387
|
-
def
|
388
|
-
|
389
|
-
|
390
|
-
end
|
391
|
-
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
406
|
+
def strip_chars_start(characters = nil)
|
407
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
408
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
|
392
409
|
end
|
410
|
+
alias_method :lstrip, :strip_chars_start
|
393
411
|
|
394
412
|
# Remove trailing whitespace.
|
395
413
|
#
|
396
|
-
# @param
|
414
|
+
# @param characters [String, nil]
|
397
415
|
# An optional single character that should be trimmed.
|
398
416
|
#
|
399
417
|
# @return [Expr]
|
@@ -412,12 +430,11 @@ module Polars
|
|
412
430
|
# # │ trail │
|
413
431
|
# # │ both │
|
414
432
|
# # └───────┘
|
415
|
-
def
|
416
|
-
|
417
|
-
|
418
|
-
end
|
419
|
-
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
433
|
+
def strip_chars_end(characters = nil)
|
434
|
+
characters = Utils.parse_as_expression(characters, str_as_lit: true)
|
435
|
+
Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
|
420
436
|
end
|
437
|
+
alias_method :rstrip, :strip_chars_end
|
421
438
|
|
422
439
|
# Fills the string with zeroes.
|
423
440
|
#
|
@@ -461,13 +478,13 @@ module Polars
|
|
461
478
|
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
462
479
|
end
|
463
480
|
|
464
|
-
# Return the string left justified in a string of length `
|
481
|
+
# Return the string left justified in a string of length `length`.
|
465
482
|
#
|
466
483
|
# Padding is done using the specified `fillchar`.
|
467
|
-
# The original string is returned if `
|
484
|
+
# The original string is returned if `length` is less than or equal to
|
468
485
|
# `s.length`.
|
469
486
|
#
|
470
|
-
# @param
|
487
|
+
# @param length [Integer]
|
471
488
|
# Justify left to this length.
|
472
489
|
# @param fillchar [String]
|
473
490
|
# Fill with this ASCII character.
|
@@ -489,17 +506,18 @@ module Polars
|
|
489
506
|
# # │ null │
|
490
507
|
# # │ hippopotamus │
|
491
508
|
# # └──────────────┘
|
492
|
-
def ljust(
|
493
|
-
Utils.wrap_expr(_rbexpr.
|
509
|
+
def ljust(length, fillchar = " ")
|
510
|
+
Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
|
494
511
|
end
|
512
|
+
alias_method :pad_end, :ljust
|
495
513
|
|
496
|
-
# Return the string right justified in a string of length `
|
514
|
+
# Return the string right justified in a string of length `length`.
|
497
515
|
#
|
498
516
|
# Padding is done using the specified `fillchar`.
|
499
|
-
# The original string is returned if `
|
517
|
+
# The original string is returned if `length` is less than or equal to
|
500
518
|
# `s.length`.
|
501
519
|
#
|
502
|
-
# @param
|
520
|
+
# @param length [Integer]
|
503
521
|
# Justify right to this length.
|
504
522
|
# @param fillchar [String]
|
505
523
|
# Fill with this ASCII character.
|
@@ -521,9 +539,10 @@ module Polars
|
|
521
539
|
# # │ null │
|
522
540
|
# # │ hippopotamus │
|
523
541
|
# # └──────────────┘
|
524
|
-
def rjust(
|
525
|
-
Utils.wrap_expr(_rbexpr.
|
542
|
+
def rjust(length, fillchar = " ")
|
543
|
+
Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
|
526
544
|
end
|
545
|
+
alias_method :pad_start, :rjust
|
527
546
|
|
528
547
|
# Check if string contains a substring that matches a regex.
|
529
548
|
#
|
@@ -864,9 +883,11 @@ module Polars
|
|
864
883
|
# # │ 5 │
|
865
884
|
# # │ 6 │
|
866
885
|
# # └──────────────┘
|
867
|
-
def
|
868
|
-
Utils.
|
886
|
+
def count_matches(pattern, literal: false)
|
887
|
+
pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
|
888
|
+
Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
|
869
889
|
end
|
890
|
+
alias_method :count_match, :count_matches
|
870
891
|
|
871
892
|
# Split the string by a substring.
|
872
893
|
#
|
@@ -892,6 +913,7 @@ module Polars
|
|
892
913
|
# # │ ["foo", "bar", "baz"] │
|
893
914
|
# # └───────────────────────┘
|
894
915
|
def split(by, inclusive: false)
|
916
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
895
917
|
if inclusive
|
896
918
|
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
897
919
|
else
|
@@ -934,6 +956,7 @@ module Polars
|
|
934
956
|
# # │ {"d","4"} │
|
935
957
|
# # └─────────────┘
|
936
958
|
def split_exact(by, n, inclusive: false)
|
959
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
937
960
|
if inclusive
|
938
961
|
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
939
962
|
else
|
@@ -970,6 +993,7 @@ module Polars
|
|
970
993
|
# # │ {"foo","bar baz"} │
|
971
994
|
# # └───────────────────┘
|
972
995
|
def splitn(by, n)
|
996
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
973
997
|
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
974
998
|
end
|
975
999
|
|
@@ -1091,6 +1115,52 @@ module Polars
|
|
1091
1115
|
Utils.wrap_expr(_rbexpr.str_explode)
|
1092
1116
|
end
|
1093
1117
|
|
1118
|
+
# Convert an Utf8 column into an Int64 column with base radix.
|
1119
|
+
#
|
1120
|
+
# @param base [Integer]
|
1121
|
+
# Positive integer which is the base of the string we are parsing.
|
1122
|
+
# Default: 10.
|
1123
|
+
# @param strict [Boolean]
|
1124
|
+
# Bool, default=true will raise any ParseError or overflow as ComputeError.
|
1125
|
+
# false silently convert to Null.
|
1126
|
+
#
|
1127
|
+
# @return [Expr]
|
1128
|
+
#
|
1129
|
+
# @example
|
1130
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
1131
|
+
# df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
|
1132
|
+
# # =>
|
1133
|
+
# # shape: (4, 2)
|
1134
|
+
# # ┌─────────┬────────┐
|
1135
|
+
# # │ bin ┆ parsed │
|
1136
|
+
# # │ --- ┆ --- │
|
1137
|
+
# # │ str ┆ i64 │
|
1138
|
+
# # ╞═════════╪════════╡
|
1139
|
+
# # │ 110 ┆ 6 │
|
1140
|
+
# # │ 101 ┆ 5 │
|
1141
|
+
# # │ 010 ┆ 2 │
|
1142
|
+
# # │ invalid ┆ null │
|
1143
|
+
# # └─────────┴────────┘
|
1144
|
+
#
|
1145
|
+
# @example
|
1146
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1147
|
+
# df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
|
1148
|
+
# # =>
|
1149
|
+
# # shape: (4, 2)
|
1150
|
+
# # ┌──────┬────────┐
|
1151
|
+
# # │ hex ┆ parsed │
|
1152
|
+
# # │ --- ┆ --- │
|
1153
|
+
# # │ str ┆ i64 │
|
1154
|
+
# # ╞══════╪════════╡
|
1155
|
+
# # │ fa1e ┆ 64030 │
|
1156
|
+
# # │ ff00 ┆ 65280 │
|
1157
|
+
# # │ cafe ┆ 51966 │
|
1158
|
+
# # │ null ┆ null │
|
1159
|
+
# # └──────┴────────┘
|
1160
|
+
def to_integer(base: 10, strict: true)
|
1161
|
+
Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
|
1162
|
+
end
|
1163
|
+
|
1094
1164
|
# Parse integers with base radix from strings.
|
1095
1165
|
#
|
1096
1166
|
# By default base 2. ParseError/Overflows become Nulls.
|
@@ -1119,24 +1189,8 @@ module Polars
|
|
1119
1189
|
# # │ 2 │
|
1120
1190
|
# # │ null │
|
1121
1191
|
# # └──────┘
|
1122
|
-
#
|
1123
|
-
# @example
|
1124
|
-
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1125
|
-
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1126
|
-
# # =>
|
1127
|
-
# # shape: (4, 1)
|
1128
|
-
# # ┌───────┐
|
1129
|
-
# # │ hex │
|
1130
|
-
# # │ --- │
|
1131
|
-
# # │ i32 │
|
1132
|
-
# # ╞═══════╡
|
1133
|
-
# # │ 64030 │
|
1134
|
-
# # │ 65280 │
|
1135
|
-
# # │ 51966 │
|
1136
|
-
# # │ null │
|
1137
|
-
# # └───────┘
|
1138
1192
|
def parse_int(radix = 2, strict: true)
|
1139
|
-
|
1193
|
+
to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
|
1140
1194
|
end
|
1141
1195
|
|
1142
1196
|
private
|
@@ -82,7 +82,8 @@ module Polars
|
|
82
82
|
time_zone: nil,
|
83
83
|
strict: true,
|
84
84
|
exact: true,
|
85
|
-
cache: true
|
85
|
+
cache: true,
|
86
|
+
use_earliest: nil
|
86
87
|
)
|
87
88
|
super
|
88
89
|
end
|
@@ -232,9 +233,23 @@ module Polars
|
|
232
233
|
# @return [Series]
|
233
234
|
#
|
234
235
|
# @example
|
235
|
-
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
236
|
-
# # =>
|
237
|
-
|
236
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-")
|
237
|
+
# # =>
|
238
|
+
# # shape: (1,)
|
239
|
+
# # Series: '' [str]
|
240
|
+
# # [
|
241
|
+
# # "1-2"
|
242
|
+
# # ]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
|
246
|
+
# # =>
|
247
|
+
# # shape: (1,)
|
248
|
+
# # Series: '' [str]
|
249
|
+
# # [
|
250
|
+
# # null
|
251
|
+
# # ]
|
252
|
+
def concat(delimiter = "-", ignore_nulls: true)
|
238
253
|
super
|
239
254
|
end
|
240
255
|
|
data/lib/polars/struct_expr.rb
CHANGED
data/lib/polars/utils.rb
CHANGED
@@ -27,7 +27,7 @@ module Polars
|
|
27
27
|
if obj.is_a?(Range)
|
28
28
|
# size only works for numeric ranges
|
29
29
|
obj.to_a.length
|
30
|
-
elsif obj.is_a?(String)
|
30
|
+
elsif obj.is_a?(::String)
|
31
31
|
nil
|
32
32
|
else
|
33
33
|
obj.length
|
@@ -116,7 +116,7 @@ module Polars
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def self.selection_to_rbexpr_list(exprs)
|
119
|
-
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
119
|
+
if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
120
120
|
exprs = [exprs]
|
121
121
|
end
|
122
122
|
|
@@ -124,9 +124,9 @@ module Polars
|
|
124
124
|
end
|
125
125
|
|
126
126
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
127
|
-
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
127
|
+
if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
|
128
128
|
col(expr)
|
129
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
129
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
130
130
|
lit(expr)
|
131
131
|
elsif expr.is_a?(Expr)
|
132
132
|
expr
|
@@ -152,7 +152,7 @@ module Polars
|
|
152
152
|
if data_type == Unknown
|
153
153
|
return include_unknown
|
154
154
|
end
|
155
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
155
|
+
data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
156
156
|
end
|
157
157
|
|
158
158
|
def self.map_rb_type_to_dtype(ruby_dtype)
|
@@ -160,7 +160,7 @@ module Polars
|
|
160
160
|
Float64
|
161
161
|
elsif ruby_dtype == Integer
|
162
162
|
Int64
|
163
|
-
elsif ruby_dtype == String
|
163
|
+
elsif ruby_dtype == ::String
|
164
164
|
Utf8
|
165
165
|
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
166
|
Boolean
|
@@ -211,7 +211,7 @@ module Polars
|
|
211
211
|
projection = nil
|
212
212
|
if columns
|
213
213
|
raise Todo
|
214
|
-
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
214
|
+
# if columns.is_a?(::String) || columns.is_a?(Symbol)
|
215
215
|
# columns = [columns]
|
216
216
|
# elsif is_int_sequence(columns)
|
217
217
|
# projection = columns.to_a
|
@@ -243,11 +243,11 @@ module Polars
|
|
243
243
|
end
|
244
244
|
|
245
245
|
def self.strlike?(value)
|
246
|
-
value.is_a?(String) || value.is_a?(Symbol)
|
246
|
+
value.is_a?(::String) || value.is_a?(Symbol)
|
247
247
|
end
|
248
248
|
|
249
249
|
def self.pathlike?(value)
|
250
|
-
value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
|
250
|
+
value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
|
251
251
|
end
|
252
252
|
|
253
253
|
def self._is_iterable_of(val, eltype)
|
@@ -275,10 +275,10 @@ module Polars
|
|
275
275
|
end
|
276
276
|
|
277
277
|
def self.is_str_sequence(val, allow_str: false)
|
278
|
-
if allow_str == false && val.is_a?(String)
|
278
|
+
if allow_str == false && val.is_a?(::String)
|
279
279
|
false
|
280
280
|
else
|
281
|
-
val.is_a?(::Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, ::String)
|
282
282
|
end
|
283
283
|
end
|
284
284
|
|
@@ -289,10 +289,10 @@ module Polars
|
|
289
289
|
def self.parse_as_expression(input, str_as_lit: false, structify: false)
|
290
290
|
if input.is_a?(Expr)
|
291
291
|
expr = input
|
292
|
-
elsif input.is_a?(String) && !str_as_lit
|
292
|
+
elsif input.is_a?(::String) && !str_as_lit
|
293
293
|
expr = Polars.col(input)
|
294
294
|
structify = false
|
295
|
-
elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
295
|
+
elsif [Integer, Float, ::String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
296
296
|
expr = Polars.lit(input)
|
297
297
|
structify = false
|
298
298
|
elsif input.is_a?(Array)
|
@@ -308,5 +308,17 @@ module Polars
|
|
308
308
|
|
309
309
|
expr._rbexpr
|
310
310
|
end
|
311
|
+
|
312
|
+
USE_EARLIEST_TO_AMBIGUOUS = {
|
313
|
+
true => "earliest",
|
314
|
+
false => "latest"
|
315
|
+
}
|
316
|
+
|
317
|
+
def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
318
|
+
unless use_earliest.nil?
|
319
|
+
ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
|
320
|
+
end
|
321
|
+
ambiguous
|
322
|
+
end
|
311
323
|
end
|
312
324
|
end
|
data/lib/polars/version.rb
CHANGED