polars-df 0.6.0-x86_64-linux → 0.7.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -82,9 +82,13 @@ module Polars
82
82
  time_zone: nil,
83
83
  strict: true,
84
84
  exact: true,
85
- cache: true
85
+ cache: true,
86
+ use_earliest: nil,
87
+ ambiguous: "raise"
86
88
  )
87
89
  _validate_format_argument(format)
90
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
91
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
88
92
  Utils.wrap_expr(
89
93
  self._rbexpr.str_to_datetime(
90
94
  format,
@@ -92,7 +96,8 @@ module Polars
92
96
  time_zone,
93
97
  strict,
94
98
  exact,
95
- cache
99
+ cache,
100
+ ambiguous._rbexpr
96
101
  )
97
102
  )
98
103
  end
@@ -235,7 +240,7 @@ module Polars
235
240
  # # │ 東京 ┆ 6 ┆ 2 │
236
241
  # # └──────┴────────┴────────┘
237
242
  def lengths
238
- Utils.wrap_expr(_rbexpr.str_lengths)
243
+ Utils.wrap_expr(_rbexpr.str_len_bytes)
239
244
  end
240
245
 
241
246
  # Get length of the strings as `:u32` (as number of chars).
@@ -267,13 +272,15 @@ module Polars
267
272
  # # │ 東京 ┆ 6 ┆ 2 │
268
273
  # # └──────┴────────┴────────┘
269
274
  def n_chars
270
- Utils.wrap_expr(_rbexpr.str_n_chars)
275
+ Utils.wrap_expr(_rbexpr.str_len_chars)
271
276
  end
272
277
 
273
278
  # Vertically concat the values in the Series to a single string value.
274
279
  #
275
280
  # @param delimiter [String]
276
281
  # The delimiter to insert between consecutive string values.
282
+ # @param ignore_nulls [Boolean]
283
+ # Ignore null values (default).
277
284
  #
278
285
  # @return [Expr]
279
286
  #
@@ -282,15 +289,28 @@ module Polars
282
289
  # df.select(Polars.col("foo").str.concat("-"))
283
290
  # # =>
284
291
  # # shape: (1, 1)
285
- # # ┌──────────┐
286
- # # │ foo
287
- # # │ ---
288
- # # │ str
289
- # # ╞══════════╡
290
- # # │ 1-null-2 │
291
- # # └──────────┘
292
- def concat(delimiter = "-")
293
- Utils.wrap_expr(_rbexpr.str_concat(delimiter))
292
+ # # ┌─────┐
293
+ # # │ foo
294
+ # # │ ---
295
+ # # │ str
296
+ # # ╞═════╡
297
+ # # │ 1-2 │
298
+ # # └─────┘
299
+ #
300
+ # @example
301
+ # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
302
+ # df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
303
+ # # =>
304
+ # # shape: (1, 1)
305
+ # # ┌──────┐
306
+ # # │ foo │
307
+ # # │ --- │
308
+ # # │ str │
309
+ # # ╞══════╡
310
+ # # │ null │
311
+ # # └──────┘
312
+ def concat(delimiter = "-", ignore_nulls: true)
313
+ Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
294
314
  end
295
315
 
296
316
  # Transform to uppercase variant.
@@ -337,7 +357,7 @@ module Polars
337
357
 
338
358
  # Remove leading and trailing whitespace.
339
359
  #
340
- # @param matches [String, nil]
360
+ # @param characters [String, nil]
341
361
  # An optional single character that should be trimmed.
342
362
  #
343
363
  # @return [Expr]
@@ -356,16 +376,15 @@ module Polars
356
376
  # # │ trail │
357
377
  # # │ both │
358
378
  # # └───────┘
359
- def strip(matches = nil)
360
- if !matches.nil? && matches.length > 1
361
- raise ArgumentError, "matches should contain a single character"
362
- end
363
- Utils.wrap_expr(_rbexpr.str_strip(matches))
379
+ def strip_chars(characters = nil)
380
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
381
+ Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
364
382
  end
383
+ alias_method :strip, :strip_chars
365
384
 
366
385
  # Remove leading whitespace.
367
386
  #
368
- # @param matches [String, nil]
387
+ # @param characters [String, nil]
369
388
  # An optional single character that should be trimmed.
370
389
  #
371
390
  # @return [Expr]
@@ -384,16 +403,15 @@ module Polars
384
403
  # # │ trail │
385
404
  # # │ both │
386
405
  # # └────────┘
387
- def lstrip(matches = nil)
388
- if !matches.nil? && matches.length > 1
389
- raise ArgumentError, "matches should contain a single character"
390
- end
391
- Utils.wrap_expr(_rbexpr.str_lstrip(matches))
406
+ def strip_chars_start(characters = nil)
407
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
408
+ Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
392
409
  end
410
+ alias_method :lstrip, :strip_chars_start
393
411
 
394
412
  # Remove trailing whitespace.
395
413
  #
396
- # @param matches [String, nil]
414
+ # @param characters [String, nil]
397
415
  # An optional single character that should be trimmed.
398
416
  #
399
417
  # @return [Expr]
@@ -412,12 +430,11 @@ module Polars
412
430
  # # │ trail │
413
431
  # # │ both │
414
432
  # # └───────┘
415
- def rstrip(matches = nil)
416
- if !matches.nil? && matches.length > 1
417
- raise ArgumentError, "matches should contain a single character"
418
- end
419
- Utils.wrap_expr(_rbexpr.str_rstrip(matches))
433
+ def strip_chars_end(characters = nil)
434
+ characters = Utils.parse_as_expression(characters, str_as_lit: true)
435
+ Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
420
436
  end
437
+ alias_method :rstrip, :strip_chars_end
421
438
 
422
439
  # Fills the string with zeroes.
423
440
  #
@@ -461,13 +478,13 @@ module Polars
461
478
  Utils.wrap_expr(_rbexpr.str_zfill(alignment))
462
479
  end
463
480
 
464
- # Return the string left justified in a string of length `width`.
481
+ # Return the string left justified in a string of length `length`.
465
482
  #
466
483
  # Padding is done using the specified `fillchar`.
467
- # The original string is returned if `width` is less than or equal to
484
+ # The original string is returned if `length` is less than or equal to
468
485
  # `s.length`.
469
486
  #
470
- # @param width [Integer]
487
+ # @param length [Integer]
471
488
  # Justify left to this length.
472
489
  # @param fillchar [String]
473
490
  # Fill with this ASCII character.
@@ -489,17 +506,18 @@ module Polars
489
506
  # # │ null │
490
507
  # # │ hippopotamus │
491
508
  # # └──────────────┘
492
- def ljust(width, fillchar = " ")
493
- Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
509
+ def ljust(length, fillchar = " ")
510
+ Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
494
511
  end
512
+ alias_method :pad_end, :ljust
495
513
 
496
- # Return the string right justified in a string of length `width`.
514
+ # Return the string right justified in a string of length `length`.
497
515
  #
498
516
  # Padding is done using the specified `fillchar`.
499
- # The original string is returned if `width` is less than or equal to
517
+ # The original string is returned if `length` is less than or equal to
500
518
  # `s.length`.
501
519
  #
502
- # @param width [Integer]
520
+ # @param length [Integer]
503
521
  # Justify right to this length.
504
522
  # @param fillchar [String]
505
523
  # Fill with this ASCII character.
@@ -521,9 +539,10 @@ module Polars
521
539
  # # │ null │
522
540
  # # │ hippopotamus │
523
541
  # # └──────────────┘
524
- def rjust(width, fillchar = " ")
525
- Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
542
+ def rjust(length, fillchar = " ")
543
+ Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
526
544
  end
545
+ alias_method :pad_start, :rjust
527
546
 
528
547
  # Check if string contains a substring that matches a regex.
529
548
  #
@@ -864,9 +883,11 @@ module Polars
864
883
  # # │ 5 │
865
884
  # # │ 6 │
866
885
  # # └──────────────┘
867
- def count_match(pattern)
868
- Utils.wrap_expr(_rbexpr.count_match(pattern))
886
+ def count_matches(pattern, literal: false)
887
+ pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
888
+ Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
869
889
  end
890
+ alias_method :count_match, :count_matches
870
891
 
871
892
  # Split the string by a substring.
872
893
  #
@@ -892,6 +913,7 @@ module Polars
892
913
  # # │ ["foo", "bar", "baz"] │
893
914
  # # └───────────────────────┘
894
915
  def split(by, inclusive: false)
916
+ by = Utils.parse_as_expression(by, str_as_lit: true)
895
917
  if inclusive
896
918
  Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
897
919
  else
@@ -934,6 +956,7 @@ module Polars
934
956
  # # │ {"d","4"} │
935
957
  # # └─────────────┘
936
958
  def split_exact(by, n, inclusive: false)
959
+ by = Utils.parse_as_expression(by, str_as_lit: true)
937
960
  if inclusive
938
961
  Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
939
962
  else
@@ -970,6 +993,7 @@ module Polars
970
993
  # # │ {"foo","bar baz"} │
971
994
  # # └───────────────────┘
972
995
  def splitn(by, n)
996
+ by = Utils.parse_as_expression(by, str_as_lit: true)
973
997
  Utils.wrap_expr(_rbexpr.str_splitn(by, n))
974
998
  end
975
999
 
@@ -1091,6 +1115,52 @@ module Polars
1091
1115
  Utils.wrap_expr(_rbexpr.str_explode)
1092
1116
  end
1093
1117
 
1118
+ # Convert an Utf8 column into an Int64 column with base radix.
1119
+ #
1120
+ # @param base [Integer]
1121
+ # Positive integer which is the base of the string we are parsing.
1122
+ # Default: 10.
1123
+ # @param strict [Boolean]
1124
+ # Bool, default=true will raise any ParseError or overflow as ComputeError.
1125
+ # false silently convert to Null.
1126
+ #
1127
+ # @return [Expr]
1128
+ #
1129
+ # @example
1130
+ # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
1131
+ # df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
1132
+ # # =>
1133
+ # # shape: (4, 2)
1134
+ # # ┌─────────┬────────┐
1135
+ # # │ bin ┆ parsed │
1136
+ # # │ --- ┆ --- │
1137
+ # # │ str ┆ i64 │
1138
+ # # ╞═════════╪════════╡
1139
+ # # │ 110 ┆ 6 │
1140
+ # # │ 101 ┆ 5 │
1141
+ # # │ 010 ┆ 2 │
1142
+ # # │ invalid ┆ null │
1143
+ # # └─────────┴────────┘
1144
+ #
1145
+ # @example
1146
+ # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1147
+ # df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
1148
+ # # =>
1149
+ # # shape: (4, 2)
1150
+ # # ┌──────┬────────┐
1151
+ # # │ hex ┆ parsed │
1152
+ # # │ --- ┆ --- │
1153
+ # # │ str ┆ i64 │
1154
+ # # ╞══════╪════════╡
1155
+ # # │ fa1e ┆ 64030 │
1156
+ # # │ ff00 ┆ 65280 │
1157
+ # # │ cafe ┆ 51966 │
1158
+ # # │ null ┆ null │
1159
+ # # └──────┴────────┘
1160
+ def to_integer(base: 10, strict: true)
1161
+ Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1162
+ end
1163
+
1094
1164
  # Parse integers with base radix from strings.
1095
1165
  #
1096
1166
  # By default base 2. ParseError/Overflows become Nulls.
@@ -1119,24 +1189,8 @@ module Polars
1119
1189
  # # │ 2 │
1120
1190
  # # │ null │
1121
1191
  # # └──────┘
1122
- #
1123
- # @example
1124
- # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1125
- # df.select(Polars.col("hex").str.parse_int(16, strict: true))
1126
- # # =>
1127
- # # shape: (4, 1)
1128
- # # ┌───────┐
1129
- # # │ hex │
1130
- # # │ --- │
1131
- # # │ i32 │
1132
- # # ╞═══════╡
1133
- # # │ 64030 │
1134
- # # │ 65280 │
1135
- # # │ 51966 │
1136
- # # │ null │
1137
- # # └───────┘
1138
1192
  def parse_int(radix = 2, strict: true)
1139
- Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1193
+ to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
1140
1194
  end
1141
1195
 
1142
1196
  private
@@ -82,7 +82,8 @@ module Polars
82
82
  time_zone: nil,
83
83
  strict: true,
84
84
  exact: true,
85
- cache: true
85
+ cache: true,
86
+ use_earliest: nil
86
87
  )
87
88
  super
88
89
  end
@@ -232,9 +233,23 @@ module Polars
232
233
  # @return [Series]
233
234
  #
234
235
  # @example
235
- # Polars::Series.new([1, nil, 2]).str.concat("-")[0]
236
- # # => "1-null-2"
237
- def concat(delimiter = "-")
236
+ # Polars::Series.new([1, nil, 2]).str.concat("-")
237
+ # # =>
238
+ # # shape: (1,)
239
+ # # Series: '' [str]
240
+ # # [
241
+ # # "1-2"
242
+ # # ]
243
+ #
244
+ # @example
245
+ # Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
246
+ # # =>
247
+ # # shape: (1,)
248
+ # # Series: '' [str]
249
+ # # [
250
+ # # null
251
+ # # ]
252
+ def concat(delimiter = "-", ignore_nulls: true)
238
253
  super
239
254
  end
240
255
 
data/lib/polars/utils.rb CHANGED
@@ -308,5 +308,17 @@ module Polars
308
308
 
309
309
  expr._rbexpr
310
310
  end
311
+
312
+ USE_EARLIEST_TO_AMBIGUOUS = {
313
+ true => "earliest",
314
+ false => "latest"
315
+ }
316
+
317
+ def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
318
+ unless use_earliest.nil?
319
+ ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
320
+ end
321
+ ambiguous
322
+ end
311
323
  end
312
324
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.6.0"
3
+ VERSION = "0.7.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -19,6 +19,7 @@ require_relative "polars/binary_expr"
19
19
  require_relative "polars/binary_name_space"
20
20
  require_relative "polars/cat_expr"
21
21
  require_relative "polars/cat_name_space"
22
+ require_relative "polars/config"
22
23
  require_relative "polars/convert"
23
24
  require_relative "polars/plot"
24
25
  require_relative "polars/data_frame"
@@ -37,9 +38,11 @@ require_relative "polars/lazy_group_by"
37
38
  require_relative "polars/list_expr"
38
39
  require_relative "polars/list_name_space"
39
40
  require_relative "polars/meta_expr"
41
+ require_relative "polars/name_expr"
40
42
  require_relative "polars/rolling_group_by"
41
43
  require_relative "polars/series"
42
44
  require_relative "polars/slice"
45
+ require_relative "polars/sql_context"
43
46
  require_relative "polars/string_expr"
44
47
  require_relative "polars/string_name_space"
45
48
  require_relative "polars/struct_expr"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-24 00:00:00.000000000 Z
11
+ date: 2023-11-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -35,6 +35,7 @@ files:
35
35
  - lib/polars/binary_name_space.rb
36
36
  - lib/polars/cat_expr.rb
37
37
  - lib/polars/cat_name_space.rb
38
+ - lib/polars/config.rb
38
39
  - lib/polars/convert.rb
39
40
  - lib/polars/data_frame.rb
40
41
  - lib/polars/data_types.rb
@@ -53,10 +54,12 @@ files:
53
54
  - lib/polars/list_expr.rb
54
55
  - lib/polars/list_name_space.rb
55
56
  - lib/polars/meta_expr.rb
57
+ - lib/polars/name_expr.rb
56
58
  - lib/polars/plot.rb
57
59
  - lib/polars/rolling_group_by.rb
58
60
  - lib/polars/series.rb
59
61
  - lib/polars/slice.rb
62
+ - lib/polars/sql_context.rb
60
63
  - lib/polars/string_expr.rb
61
64
  - lib/polars/string_name_space.rb
62
65
  - lib/polars/struct_expr.rb