polars-df 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -0,0 +1,194 @@
1
+ module Polars
2
+ # Run SQL queries against DataFrame/LazyFrame data.
3
+ class SQLContext
4
+ # @private
5
+ attr_accessor :_ctxt, :_eager_execution
6
+
7
+ # Initialize a new `SQLContext`.
8
+ def initialize(frames = nil, eager_execution: false, **named_frames)
9
+ self._ctxt = RbSQLContext.new
10
+ self._eager_execution = eager_execution
11
+
12
+ frames = (frames || {}).to_h
13
+
14
+ if frames.any? || named_frames.any?
15
+ register_many(frames, **named_frames)
16
+ end
17
+ end
18
+
19
+ # Parse the given SQL query and execute it against the registered frame data.
20
+ #
21
+ # @param query [String]
22
+ # A valid string SQL query.
23
+ # @param eager [Boolean]
24
+ # Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
25
+ # If unset, the value of the init-time parameter "eager_execution" will be
26
+ # used. (Note that the query itself is always executed in lazy-mode; this
27
+ # parameter only impacts the type of the returned frame).
28
+ #
29
+ # @return [Object]
30
+ #
31
+ # @example Execute a SQL query against the registered frame data:
32
+ # df = Polars::DataFrame.new(
33
+ # [
34
+ # ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
35
+ # ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
36
+ # ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
37
+ # ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
38
+ # ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
39
+ # ],
40
+ # schema: ["title", "release_year", "budget", "gross", "imdb_score"]
41
+ # )
42
+ # ctx = Polars::SQLContext.new(films: df)
43
+ # ctx.execute(
44
+ # "
45
+ # SELECT title, release_year, imdb_score
46
+ # FROM films
47
+ # WHERE release_year > 1990
48
+ # ORDER BY imdb_score DESC
49
+ # ",
50
+ # eager: true
51
+ # )
52
+ # # =>
53
+ # # shape: (4, 3)
54
+ # # ┌──────────────────────────┬──────────────┬────────────┐
55
+ # # │ title ┆ release_year ┆ imdb_score │
56
+ # # │ --- ┆ --- ┆ --- │
57
+ # # │ str ┆ i64 ┆ f64 │
58
+ # # ╞══════════════════════════╪══════════════╪════════════╡
59
+ # # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
60
+ # # │ The Dark Knight ┆ 2008 ┆ 9.0 │
61
+ # # │ Schindler's List ┆ 1993 ┆ 8.9 │
62
+ # # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
63
+ # # └──────────────────────────┴──────────────┴────────────┘
64
+ #
65
+ # @example Execute a GROUP BY query:
66
+ # ctx.execute(
67
+ # "
68
+ # SELECT
69
+ # MAX(release_year / 10) * 10 AS decade,
70
+ # SUM(gross) AS total_gross,
71
+ # COUNT(title) AS n_films,
72
+ # FROM films
73
+ # GROUP BY (release_year / 10) -- decade
74
+ # ORDER BY total_gross DESC
75
+ # ",
76
+ # eager: true
77
+ # )
78
+ # # =>
79
+ # # shape: (3, 3)
80
+ # # ┌────────┬─────────────┬─────────┐
81
+ # # │ decade ┆ total_gross ┆ n_films │
82
+ # # │ --- ┆ --- ┆ --- │
83
+ # # │ i64 ┆ i64 ┆ u32 │
84
+ # # ╞════════╪═════════════╪═════════╡
85
+ # # │ 2000 ┆ 533316061 ┆ 1 │
86
+ # # │ 1990 ┆ 232338648 ┆ 3 │
87
+ # # │ 1970 ┆ 134821952 ┆ 1 │
88
+ # # └────────┴─────────────┴─────────┘
89
+ def execute(query, eager: nil)
90
+ res = Utils.wrap_ldf(_ctxt.execute(query))
91
+ eager || _eager_execution ? res.collect : res
92
+ end
93
+
94
+ # Register a single frame as a table, using the given name.
95
+ #
96
+ # @param name [String]
97
+ # Name of the table.
98
+ # @param frame [Object]
99
+ # eager/lazy frame to associate with this table name.
100
+ #
101
+ # @return [SQLContext]
102
+ #
103
+ # @example
104
+ # df = Polars::DataFrame.new({"hello" => ["world"]})
105
+ # ctx = Polars::SQLContext.new
106
+ # ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
107
+ # # =>
108
+ # # shape: (1, 1)
109
+ # # ┌───────┐
110
+ # # │ hello │
111
+ # # │ --- │
112
+ # # │ str │
113
+ # # ╞═══════╡
114
+ # # │ world │
115
+ # # └───────┘
116
+ def register(name, frame)
117
+ if frame.is_a?(DataFrame)
118
+ frame = frame.lazy
119
+ end
120
+ _ctxt.register(name.to_s, frame._ldf)
121
+ self
122
+ end
123
+
124
+ # Register multiple eager/lazy frames as tables, using the associated names.
125
+ #
126
+ # @param frames [Hash]
127
+ # A `{name:frame, ...}` mapping.
128
+ # @param named_frames [Object]
129
+ # Named eager/lazy frames, provided as kwargs.
130
+ #
131
+ # @return [SQLContext]
132
+ def register_many(frames, **named_frames)
133
+ frames = (frames || {}).to_h
134
+ frames = frames.merge(named_frames)
135
+ frames.each do |name, frame|
136
+ register(name, frame)
137
+ end
138
+ self
139
+ end
140
+
141
+ # Unregister one or more eager/lazy frames by name.
142
+ #
143
+ # @param names [Object]
144
+ # Names of the tables to unregister.
145
+ #
146
+ # @return [SQLContext]
147
+ #
148
+ # @example Register with a SQLContext object:
149
+ # df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
150
+ # lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
151
+ # lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
152
+ # ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
153
+ # ctx.tables
154
+ # # => ["test1", "test2", "test3"]
155
+ #
156
+ # @example Unregister one or more of the tables:
157
+ # ctx.unregister(["test1", "test3"]).tables
158
+ # # => ["test2"]
159
+ def unregister(names)
160
+ if names.is_a?(::String)
161
+ names = [names]
162
+ end
163
+ names.each do |nm|
164
+ _ctxt.unregister(nm)
165
+ end
166
+ self
167
+ end
168
+
169
+ # Return a list of the registered table names.
170
+ #
171
+ # @return [Array]
172
+ #
173
+ # @example Executing as SQL:
174
+ # frame_data = Polars::DataFrame.new({"hello" => ["world"]})
175
+ # ctx = Polars::SQLContext.new(hello_world: frame_data)
176
+ # ctx.execute("SHOW TABLES", eager: true)
177
+ # # =>
178
+ # # shape: (1, 1)
179
+ # # ┌─────────────┐
180
+ # # │ name │
181
+ # # │ --- │
182
+ # # │ str │
183
+ # # ╞═════════════╡
184
+ # # │ hello_world │
185
+ # # └─────────────┘
186
+ #
187
+ # @example Calling the method:
188
+ # ctx.tables
189
+ # # => ["hello_world"]
190
+ def tables
191
+ _ctxt.get_tables.sort
192
+ end
193
+ end
194
+ end
@@ -82,9 +82,13 @@ module Polars
82
82
  time_zone: nil,
83
83
  strict: true,
84
84
  exact: true,
85
- cache: true
85
+ cache: true,
86
+ use_earliest: nil,
87
+ ambiguous: "raise"
86
88
  )
87
89
  _validate_format_argument(format)
90
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
91
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
88
92
  Utils.wrap_expr(
89
93
  self._rbexpr.str_to_datetime(
90
94
  format,
@@ -92,7 +96,8 @@ module Polars
92
96
  time_zone,
93
97
  strict,
94
98
  exact,
95
- cache
99
+ cache,
100
+ ambiguous._rbexpr
96
101
  )
97
102
  )
98
103
  end
@@ -235,7 +240,7 @@ module Polars
235
240
  # # │ 東京 ┆ 6 ┆ 2 │
236
241
  # # └──────┴────────┴────────┘
237
242
  def lengths
238
- Utils.wrap_expr(_rbexpr.str_lengths)
243
+ Utils.wrap_expr(_rbexpr.str_len_bytes)
239
244
  end
240
245
 
241
246
  # Get length of the strings as `:u32` (as number of chars).
@@ -267,13 +272,15 @@ module Polars
267
272
  # # │ 東京 ┆ 6 ┆ 2 │
268
273
  # # └──────┴────────┴────────┘
269
274
  def n_chars
270
- Utils.wrap_expr(_rbexpr.str_n_chars)
275
+ Utils.wrap_expr(_rbexpr.str_len_chars)
271
276
  end
272
277
 
273
278
  # Vertically concat the values in the Series to a single string value.
274
279
  #
275
280
  # @param delimiter [String]
276
281
  # The delimiter to insert between consecutive string values.
282
+ # @param ignore_nulls [Boolean]
283
+ # Ignore null values (default).
277
284
  #
278
285
  # @return [Expr]
279
286
  #
@@ -282,15 +289,28 @@ module Polars
282
289
  # df.select(Polars.col("foo").str.concat("-"))
283
290
  # # =>
284
291
  # # shape: (1, 1)
285
- # # ┌──────────┐
286
- # # │ foo
287
- # # │ ---
288
- # # │ str
289
- # # ╞══════════╡
290
- # # │ 1-null-2 │
291
- # # └──────────┘
292
- def concat(delimiter = "-")
293
- Utils.wrap_expr(_rbexpr.str_concat(delimiter))
292
+ # # ┌─────┐
293
+ # # │ foo
294
+ # # │ ---
295
+ # # │ str
296
+ # # ╞═════╡
297
+ # # │ 1-2 │
298
+ # # └─────┘
299
+ #
300
+ # @example
301
+ # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
302
+ # df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
303
+ # # =>
304
+ # # shape: (1, 1)
305
+ # # ┌──────┐
306
+ # # │ foo │
307
+ # # │ --- │
308
+ # # │ str │
309
+ # # ╞══════╡
310
+ # # │ null │
311
+ # # └──────┘
312
+ def concat(delimiter = "-", ignore_nulls: true)
313
+ Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
294
314
  end
295
315
 
296
316
  # Transform to uppercase variant.
@@ -337,7 +357,7 @@ module Polars
337
357
 
338
358
  # Remove leading and trailing whitespace.
339
359
  #
340
- # @param matches [String, nil]
360
+ # @param characters [String, nil]
341
361
  # An optional single character that should be trimmed.
342
362
  #
343
363
  # @return [Expr]
@@ -356,16 +376,15 @@ module Polars
356
376
  # # │ trail │
357
377
  # # │ both │
358
378
  # # └───────┘
359
- def strip(matches = nil)
360
- if !matches.nil? && matches.length > 1
361
- raise ArgumentError, "matches should contain a single character"
362
- end
363
- Utils.wrap_expr(_rbexpr.str_strip(matches))
379
+ def strip_chars(characters = nil)
380
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
381
+ Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
364
382
  end
383
+ alias_method :strip, :strip_chars
365
384
 
366
385
  # Remove leading whitespace.
367
386
  #
368
- # @param matches [String, nil]
387
+ # @param characters [String, nil]
369
388
  # An optional single character that should be trimmed.
370
389
  #
371
390
  # @return [Expr]
@@ -384,16 +403,15 @@ module Polars
384
403
  # # │ trail │
385
404
  # # │ both │
386
405
  # # └────────┘
387
- def lstrip(matches = nil)
388
- if !matches.nil? && matches.length > 1
389
- raise ArgumentError, "matches should contain a single character"
390
- end
391
- Utils.wrap_expr(_rbexpr.str_lstrip(matches))
406
+ def strip_chars_start(characters = nil)
407
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
408
+ Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
392
409
  end
410
+ alias_method :lstrip, :strip_chars_start
393
411
 
394
412
  # Remove trailing whitespace.
395
413
  #
396
- # @param matches [String, nil]
414
+ # @param characters [String, nil]
397
415
  # An optional single character that should be trimmed.
398
416
  #
399
417
  # @return [Expr]
@@ -412,12 +430,11 @@ module Polars
412
430
  # # │ trail │
413
431
  # # │ both │
414
432
  # # └───────┘
415
- def rstrip(matches = nil)
416
- if !matches.nil? && matches.length > 1
417
- raise ArgumentError, "matches should contain a single character"
418
- end
419
- Utils.wrap_expr(_rbexpr.str_rstrip(matches))
433
+ def strip_chars_end(characters = nil)
434
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
435
+ Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
420
436
  end
437
+ alias_method :rstrip, :strip_chars_end
421
438
 
422
439
  # Fills the string with zeroes.
423
440
  #
@@ -461,13 +478,13 @@ module Polars
461
478
  Utils.wrap_expr(_rbexpr.str_zfill(alignment))
462
479
  end
463
480
 
464
- # Return the string left justified in a string of length `width`.
481
+ # Return the string left justified in a string of length `length`.
465
482
  #
466
483
  # Padding is done using the specified `fillchar`.
467
- # The original string is returned if `width` is less than or equal to
484
+ # The original string is returned if `length` is less than or equal to
468
485
  # `s.length`.
469
486
  #
470
- # @param width [Integer]
487
+ # @param length [Integer]
471
488
  # Justify left to this length.
472
489
  # @param fillchar [String]
473
490
  # Fill with this ASCII character.
@@ -489,17 +506,18 @@ module Polars
489
506
  # # │ null │
490
507
  # # │ hippopotamus │
491
508
  # # └──────────────┘
492
- def ljust(width, fillchar = " ")
493
- Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
509
+ def ljust(length, fillchar = " ")
510
+ Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
494
511
  end
512
+ alias_method :pad_end, :ljust
495
513
 
496
- # Return the string right justified in a string of length `width`.
514
+ # Return the string right justified in a string of length `length`.
497
515
  #
498
516
  # Padding is done using the specified `fillchar`.
499
- # The original string is returned if `width` is less than or equal to
517
+ # The original string is returned if `length` is less than or equal to
500
518
  # `s.length`.
501
519
  #
502
- # @param width [Integer]
520
+ # @param length [Integer]
503
521
  # Justify right to this length.
504
522
  # @param fillchar [String]
505
523
  # Fill with this ASCII character.
@@ -521,9 +539,10 @@ module Polars
521
539
  # # │ null │
522
540
  # # │ hippopotamus │
523
541
  # # └──────────────┘
524
- def rjust(width, fillchar = " ")
525
- Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
542
+ def rjust(length, fillchar = " ")
543
+ Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
526
544
  end
545
+ alias_method :pad_start, :rjust
527
546
 
528
547
  # Check if string contains a substring that matches a regex.
529
548
  #
@@ -864,9 +883,11 @@ module Polars
864
883
  # # │ 5 │
865
884
  # # │ 6 │
866
885
  # # └──────────────┘
867
- def count_match(pattern)
868
- Utils.wrap_expr(_rbexpr.count_match(pattern))
886
+ def count_matches(pattern, literal: false)
887
+ pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
888
+ Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
869
889
  end
890
+ alias_method :count_match, :count_matches
870
891
 
871
892
  # Split the string by a substring.
872
893
  #
@@ -892,6 +913,7 @@ module Polars
892
913
  # # │ ["foo", "bar", "baz"] │
893
914
  # # └───────────────────────┘
894
915
  def split(by, inclusive: false)
916
+ by = Utils.parse_as_expression(by, str_as_lit: true)
895
917
  if inclusive
896
918
  Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
897
919
  else
@@ -934,6 +956,7 @@ module Polars
934
956
  # # │ {"d","4"} │
935
957
  # # └─────────────┘
936
958
  def split_exact(by, n, inclusive: false)
959
+ by = Utils.parse_as_expression(by, str_as_lit: true)
937
960
  if inclusive
938
961
  Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
939
962
  else
@@ -970,6 +993,7 @@ module Polars
970
993
  # # │ {"foo","bar baz"} │
971
994
  # # └───────────────────┘
972
995
  def splitn(by, n)
996
+ by = Utils.parse_as_expression(by, str_as_lit: true)
973
997
  Utils.wrap_expr(_rbexpr.str_splitn(by, n))
974
998
  end
975
999
 
@@ -1091,6 +1115,52 @@ module Polars
1091
1115
  Utils.wrap_expr(_rbexpr.str_explode)
1092
1116
  end
1093
1117
 
1118
+ # Convert an Utf8 column into an Int64 column with base radix.
1119
+ #
1120
+ # @param base [Integer]
1121
+ # Positive integer which is the base of the string we are parsing.
1122
+ # Default: 10.
1123
+ # @param strict [Boolean]
1124
+ # Bool, default=true will raise any ParseError or overflow as ComputeError.
1125
+ # false silently convert to Null.
1126
+ #
1127
+ # @return [Expr]
1128
+ #
1129
+ # @example
1130
+ # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
1131
+ # df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
1132
+ # # =>
1133
+ # # shape: (4, 2)
1134
+ # # ┌─────────┬────────┐
1135
+ # # │ bin ┆ parsed │
1136
+ # # │ --- ┆ --- │
1137
+ # # │ str ┆ i64 │
1138
+ # # ╞═════════╪════════╡
1139
+ # # │ 110 ┆ 6 │
1140
+ # # │ 101 ┆ 5 │
1141
+ # # │ 010 ┆ 2 │
1142
+ # # │ invalid ┆ null │
1143
+ # # └─────────┴────────┘
1144
+ #
1145
+ # @example
1146
+ # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1147
+ # df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
1148
+ # # =>
1149
+ # # shape: (4, 2)
1150
+ # # ┌──────┬────────┐
1151
+ # # │ hex ┆ parsed │
1152
+ # # │ --- ┆ --- │
1153
+ # # │ str ┆ i64 │
1154
+ # # ╞══════╪════════╡
1155
+ # # │ fa1e ┆ 64030 │
1156
+ # # │ ff00 ┆ 65280 │
1157
+ # # │ cafe ┆ 51966 │
1158
+ # # │ null ┆ null │
1159
+ # # └──────┴────────┘
1160
+ def to_integer(base: 10, strict: true)
1161
+ Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1162
+ end
1163
+
1094
1164
  # Parse integers with base radix from strings.
1095
1165
  #
1096
1166
  # By default base 2. ParseError/Overflows become Nulls.
@@ -1119,24 +1189,8 @@ module Polars
1119
1189
  # # │ 2 │
1120
1190
  # # │ null │
1121
1191
  # # └──────┘
1122
- #
1123
- # @example
1124
- # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1125
- # df.select(Polars.col("hex").str.parse_int(16, strict: true))
1126
- # # =>
1127
- # # shape: (4, 1)
1128
- # # ┌───────┐
1129
- # # │ hex │
1130
- # # │ --- │
1131
- # # │ i32 │
1132
- # # ╞═══════╡
1133
- # # │ 64030 │
1134
- # # │ 65280 │
1135
- # # │ 51966 │
1136
- # # │ null │
1137
- # # └───────┘
1138
1192
  def parse_int(radix = 2, strict: true)
1139
- Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1193
+ to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
1140
1194
  end
1141
1195
 
1142
1196
  private
@@ -82,7 +82,8 @@ module Polars
82
82
  time_zone: nil,
83
83
  strict: true,
84
84
  exact: true,
85
- cache: true
85
+ cache: true,
86
+ use_earliest: nil
86
87
  )
87
88
  super
88
89
  end
@@ -232,9 +233,23 @@ module Polars
232
233
  # @return [Series]
233
234
  #
234
235
  # @example
235
- # Polars::Series.new([1, nil, 2]).str.concat("-")[0]
236
- # # => "1-null-2"
237
- def concat(delimiter = "-")
236
+ # Polars::Series.new([1, nil, 2]).str.concat("-")
237
+ # # =>
238
+ # # shape: (1,)
239
+ # # Series: '' [str]
240
+ # # [
241
+ # # "1-2"
242
+ # # ]
243
+ #
244
+ # @example
245
+ # Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
246
+ # # =>
247
+ # # shape: (1,)
248
+ # # Series: '' [str]
249
+ # # [
250
+ # # null
251
+ # # ]
252
+ def concat(delimiter = "-", ignore_nulls: true)
238
253
  super
239
254
  end
240
255
 
@@ -13,7 +13,7 @@ module Polars
13
13
  #
14
14
  # @return [Expr]
15
15
  def [](item)
16
- if item.is_a?(String)
16
+ if item.is_a?(::String)
17
17
  field(item)
18
18
  elsif item.is_a?(Integer)
19
19
  Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
@@ -16,7 +16,7 @@ module Polars
16
16
  def [](item)
17
17
  if item.is_a?(Integer)
18
18
  field(fields[item])
19
- elsif item.is_a?(String)
19
+ elsif item.is_a?(::String)
20
20
  field(item)
21
21
  else
22
22
  raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
data/lib/polars/utils.rb CHANGED
@@ -27,7 +27,7 @@ module Polars
27
27
  if obj.is_a?(Range)
28
28
  # size only works for numeric ranges
29
29
  obj.to_a.length
30
- elsif obj.is_a?(String)
30
+ elsif obj.is_a?(::String)
31
31
  nil
32
32
  else
33
33
  obj.length
@@ -116,7 +116,7 @@ module Polars
116
116
  end
117
117
 
118
118
  def self.selection_to_rbexpr_list(exprs)
119
- if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
119
+ if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
120
120
  exprs = [exprs]
121
121
  end
122
122
 
@@ -124,9 +124,9 @@ module Polars
124
124
  end
125
125
 
126
126
  def self.expr_to_lit_or_expr(expr, str_to_lit: true)
127
- if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
127
+ if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
128
128
  col(expr)
129
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
129
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
130
130
  lit(expr)
131
131
  elsif expr.is_a?(Expr)
132
132
  expr
@@ -152,7 +152,7 @@ module Polars
152
152
  if data_type == Unknown
153
153
  return include_unknown
154
154
  end
155
- data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
155
+ data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
156
156
  end
157
157
 
158
158
  def self.map_rb_type_to_dtype(ruby_dtype)
@@ -160,7 +160,7 @@ module Polars
160
160
  Float64
161
161
  elsif ruby_dtype == Integer
162
162
  Int64
163
- elsif ruby_dtype == String
163
+ elsif ruby_dtype == ::String
164
164
  Utf8
165
165
  elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
166
  Boolean
@@ -211,7 +211,7 @@ module Polars
211
211
  projection = nil
212
212
  if columns
213
213
  raise Todo
214
- # if columns.is_a?(String) || columns.is_a?(Symbol)
214
+ # if columns.is_a?(::String) || columns.is_a?(Symbol)
215
215
  # columns = [columns]
216
216
  # elsif is_int_sequence(columns)
217
217
  # projection = columns.to_a
@@ -243,11 +243,11 @@ module Polars
243
243
  end
244
244
 
245
245
  def self.strlike?(value)
246
- value.is_a?(String) || value.is_a?(Symbol)
246
+ value.is_a?(::String) || value.is_a?(Symbol)
247
247
  end
248
248
 
249
249
  def self.pathlike?(value)
250
- value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
250
+ value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
251
251
  end
252
252
 
253
253
  def self._is_iterable_of(val, eltype)
@@ -275,10 +275,10 @@ module Polars
275
275
  end
276
276
 
277
277
  def self.is_str_sequence(val, allow_str: false)
278
- if allow_str == false && val.is_a?(String)
278
+ if allow_str == false && val.is_a?(::String)
279
279
  false
280
280
  else
281
- val.is_a?(::Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, ::String)
282
282
  end
283
283
  end
284
284
 
@@ -289,10 +289,10 @@ module Polars
289
289
  def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
290
  if input.is_a?(Expr)
291
291
  expr = input
292
- elsif input.is_a?(String) && !str_as_lit
292
+ elsif input.is_a?(::String) && !str_as_lit
293
293
  expr = Polars.col(input)
294
294
  structify = false
295
- elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
295
+ elsif [Integer, Float, ::String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
296
  expr = Polars.lit(input)
297
297
  structify = false
298
298
  elsif input.is_a?(Array)
@@ -308,5 +308,17 @@ module Polars
308
308
 
309
309
  expr._rbexpr
310
310
  end
311
+
312
+ USE_EARLIEST_TO_AMBIGUOUS = {
313
+ true => "earliest",
314
+ false => "latest"
315
+ }
316
+
317
+ def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
318
+ unless use_earliest.nil?
319
+ ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
320
+ end
321
+ ambiguous
322
+ end
311
323
  end
312
324
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.6.0"
3
+ VERSION = "0.8.0"
4
4
  end