polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -126,9 +126,9 @@ module Polars
126
126
 
127
127
  # Parse a Series of dtype Utf8 to a Date/Datetime Series.
128
128
  #
129
- # @param datatype [Symbol]
129
+ # @param dtype [Symbol]
130
130
  # `:date`, `:datetime`, or `:time`.
131
- # @param fmt [String]
131
+ # @param format [String]
132
132
  # Format to use, refer to the
133
133
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
134
134
  # for specification. Example: `"%y-%m-%d"`.
@@ -139,6 +139,13 @@ module Polars
139
139
  # - If false, allow the format to match anywhere in the target string.
140
140
  # @param cache [Boolean]
141
141
  # Use a cache of unique, converted dates to apply the datetime conversion.
142
+ # @param ambiguous ['raise', 'earliest', 'latest', 'null']
143
+ # Determine how to deal with ambiguous datetimes:
144
+ #
145
+ # - `'raise'` (default): raise
146
+ # - `'earliest'`: use the earliest datetime
147
+ # - `'latest'`: use the latest datetime
148
+ # - `'null'`: set to null
142
149
  #
143
150
  # @return [Series]
144
151
  #
@@ -180,7 +187,7 @@ module Polars
180
187
  # # 2022-01-31
181
188
  # # 2001-07-08
182
189
  # # ]
183
- def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
190
+ def strptime(dtype, format = nil, strict: true, exact: true, cache: true, ambiguous: "raise")
184
191
  super
185
192
  end
186
193
 
@@ -237,7 +244,6 @@ module Polars
237
244
  def len_bytes
238
245
  super
239
246
  end
240
- alias_method :lengths, :len_bytes
241
247
 
242
248
  # Return the length of each string as the number of characters.
243
249
  #
@@ -258,7 +264,6 @@ module Polars
258
264
  def len_chars
259
265
  super
260
266
  end
261
- alias_method :n_chars, :len_chars
262
267
 
263
268
  # Check if strings in Series contain a substring that matches a regex.
264
269
  #
@@ -266,6 +271,9 @@ module Polars
266
271
  # A valid regex pattern.
267
272
  # @param literal [Boolean]
268
273
  # Treat pattern as a literal string.
274
+ # @param strict [Boolean]
275
+ # Raise an error if the underlying pattern is not a valid regex,
276
+ # otherwise mask out with a null value.
269
277
  #
270
278
  # @return [Series]
271
279
  #
@@ -293,7 +301,7 @@ module Polars
293
301
  # # true
294
302
  # # null
295
303
  # # ]
296
- def contains(pattern, literal: false)
304
+ def contains(pattern, literal: false, strict: true)
297
305
  super
298
306
  end
299
307
 
@@ -358,7 +366,7 @@ module Polars
358
366
 
359
367
  # Check if string values end with a substring.
360
368
  #
361
- # @param sub [String]
369
+ # @param suffix [String]
362
370
  # Suffix substring.
363
371
  #
364
372
  # @return [Series]
@@ -374,13 +382,13 @@ module Polars
374
382
  # # true
375
383
  # # null
376
384
  # # ]
377
- def ends_with(sub)
385
+ def ends_with(suffix)
378
386
  super
379
387
  end
380
388
 
381
389
  # Check if string values start with a substring.
382
390
  #
383
- # @param sub [String]
391
+ # @param prefix [String]
384
392
  # Prefix substring.
385
393
  #
386
394
  # @return [Series]
@@ -396,7 +404,7 @@ module Polars
396
404
  # # false
397
405
  # # null
398
406
  # # ]
399
- def starts_with(sub)
407
+ def starts_with(prefix)
400
408
  super
401
409
  end
402
410
 
@@ -423,7 +431,7 @@ module Polars
423
431
  # # b"bar"
424
432
  # # null
425
433
  # # ]
426
- def decode(encoding, strict: false)
434
+ def decode(encoding, strict: true)
427
435
  super
428
436
  end
429
437
 
@@ -473,7 +481,7 @@ module Polars
473
481
  # # null
474
482
  # # {2,false}
475
483
  # # ]
476
- def json_decode(dtype = nil, infer_schema_length: 100)
484
+ def json_decode(dtype = nil, infer_schema_length: N_INFER_DEFAULT)
477
485
  if !dtype.nil?
478
486
  s = Utils.wrap_s(_s)
479
487
  return (
@@ -607,6 +615,8 @@ module Polars
607
615
  #
608
616
  # @param pattern [String]
609
617
  # A valid regex pattern
618
+ # @param literal [Boolean]
619
+ # Treat `pattern` as a literal string, not as a regular expression.
610
620
  #
611
621
  # @return [Series]
612
622
  #
@@ -620,10 +630,9 @@ module Polars
620
630
  # # 5
621
631
  # # 6
622
632
  # # ]
623
- def count_matches(pattern)
633
+ def count_matches(pattern, literal: false)
624
634
  super
625
635
  end
626
- alias_method :count_match, :count_matches
627
636
 
628
637
  # Split the string by a substring.
629
638
  #
@@ -746,6 +755,8 @@ module Polars
746
755
  # Substring to replace.
747
756
  # @param literal [Boolean]
748
757
  # Treat pattern as a literal string.
758
+ # @param n [Integer]
759
+ # Number of matches to replace.
749
760
  #
750
761
  # @return [Series]
751
762
  #
@@ -759,7 +770,7 @@ module Polars
759
770
  # # "123ABC"
760
771
  # # "abc456"
761
772
  # # ]
762
- def replace(pattern, value, literal: false)
773
+ def replace(pattern, value, literal: false, n: 1)
763
774
  super
764
775
  end
765
776
 
@@ -790,8 +801,10 @@ module Polars
790
801
 
791
802
  # Remove leading and trailing whitespace.
792
803
  #
793
- # @param matches [String, nil]
794
- # An optional single character that should be trimmed
804
+ # @param characters [String]
805
+ # The set of characters to be removed. All combinations of this set of
806
+ # characters will be stripped from the start and end of the string. If set to
807
+ # nil (default), all leading and trailing whitespace is removed instead.
795
808
  #
796
809
  # @return [Series]
797
810
  #
@@ -805,14 +818,16 @@ module Polars
805
818
  # # "hello"
806
819
  # # "world"
807
820
  # # ]
808
- def strip_chars(matches = nil)
821
+ def strip_chars(characters = nil)
809
822
  super
810
823
  end
811
824
 
812
825
  # Remove leading whitespace.
813
826
  #
814
- # @param matches [String, nil]
815
- # An optional single character that should be trimmed
827
+ # @param characters [String]
828
+ # The set of characters to be removed. All combinations of this set of
829
+ # characters will be stripped from the start of the string. If set to
830
+ # nil (default), all leading and trailing whitespace is removed instead.
816
831
  #
817
832
  # @return [Series]
818
833
  #
@@ -826,15 +841,16 @@ module Polars
826
841
  # # "hello "
827
842
  # # "world"
828
843
  # # ]
829
- def strip_chars_start(matches = nil)
844
+ def strip_chars_start(characters = nil)
830
845
  super
831
846
  end
832
- alias_method :lstrip, :strip_chars_start
833
847
 
834
848
  # Remove trailing whitespace.
835
849
  #
836
- # @param matches [String, nil]
837
- # An optional single character that should be trimmed
850
+ # @param characters [String]
851
+ # The set of characters to be removed. All combinations of this set of
852
+ # characters will be stripped from the end of the string. If set to
853
+ # nil (default), all leading and trailing whitespace is removed instead.
838
854
  #
839
855
  # @return [Series]
840
856
  #
@@ -848,10 +864,9 @@ module Polars
848
864
  # # " hello"
849
865
  # # "world"
850
866
  # # ]
851
- def strip_chars_end(matches = nil)
867
+ def strip_chars_end(characters = nil)
852
868
  super
853
869
  end
854
- alias_method :rstrip, :strip_chars_end
855
870
 
856
871
  # Remove prefix.
857
872
  #
@@ -985,62 +1000,6 @@ module Polars
985
1000
  super
986
1001
  end
987
1002
 
988
- # Return the string left justified in a string of length `width`.
989
- #
990
- # Padding is done using the specified `fillchar`. The original string is
991
- # returned if `width` is less than or equal to `s.length`.
992
- #
993
- # @param width [Integer]
994
- # Justify left to this length.
995
- # @param fillchar [String]
996
- # Fill with this ASCII character.
997
- #
998
- # @return [Series]
999
- #
1000
- # @example
1001
- # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
1002
- # s.str.ljust(8, "*")
1003
- # # =>
1004
- # # shape: (4,)
1005
- # # Series: 'a' [str]
1006
- # # [
1007
- # # "cow*****"
1008
- # # "monkey**"
1009
- # # null
1010
- # # "hippopotamus"
1011
- # # ]
1012
- def ljust(width, fillchar = " ")
1013
- super
1014
- end
1015
-
1016
- # Return the string right justified in a string of length `width`.
1017
- #
1018
- # Padding is done using the specified `fillchar`. The original string is
1019
- # returned if `width` is less than or equal to `s.length`.
1020
- #
1021
- # @param width [Integer]
1022
- # Justify right to this length.
1023
- # @param fillchar [String]
1024
- # Fill with this ASCII character.
1025
- #
1026
- # @return [Series]
1027
- #
1028
- # @example
1029
- # s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
1030
- # s.str.rjust(8, "*")
1031
- # # =>
1032
- # # shape: (4,)
1033
- # # Series: 'a' [str]
1034
- # # [
1035
- # # "*****cow"
1036
- # # "**monkey"
1037
- # # null
1038
- # # "hippopotamus"
1039
- # # ]
1040
- def rjust(width, fillchar = " ")
1041
- super
1042
- end
1043
-
1044
1003
  # Modify the strings to their lowercase equivalent.
1045
1004
  #
1046
1005
  # @return [Series]
@@ -1339,7 +1298,7 @@ module Polars
1339
1298
  # # "Can me feel the love tonight"
1340
1299
  # # ]
1341
1300
  #
1342
- # @example Broadcast a replacement for many patterns by passing a sequence of length 1 to the `replace_with` parameter.
1301
+ # @example Broadcast a replacement for many patterns by passing an array of length 1 to the `replace_with` parameter.
1343
1302
  # s = Polars::Series.new(
1344
1303
  # "lyrics",
1345
1304
  # [
@@ -1379,7 +1338,7 @@ module Polars
1379
1338
  # # ]
1380
1339
  def replace_many(
1381
1340
  patterns,
1382
- replace_with = Expr::NO_DEFAULT,
1341
+ replace_with = NO_DEFAULT,
1383
1342
  ascii_case_insensitive: false
1384
1343
  )
1385
1344
  super
@@ -1496,7 +1455,7 @@ module Polars
1496
1455
  # The delimiter to insert between consecutive string values.
1497
1456
  # @param ignore_nulls [Boolean]
1498
1457
  # Ignore null values (default).
1499
- # If set to `False`, null values will be propagated. This means that
1458
+ # If set to `false`, null values will be propagated. This means that
1500
1459
  # if the column contains any null values, the output is null.
1501
1460
  #
1502
1461
  # @return [Series]
@@ -1518,10 +1477,15 @@ module Polars
1518
1477
  # # [
1519
1478
  # # null
1520
1479
  # # ]
1521
- def join(delimiter = "-", ignore_nulls: true)
1480
+ def join(delimiter = nil, ignore_nulls: true)
1481
+ # TODO update
1482
+ if delimiter.nil?
1483
+ warn "The default `delimiter` for `join` method will change from `-` to empty string in a future version"
1484
+ delimiter = "-"
1485
+ end
1486
+
1522
1487
  super
1523
1488
  end
1524
- alias_method :concat, :join
1525
1489
 
1526
1490
  # Returns string values with all regular expression meta characters escaped.
1527
1491
  #
@@ -26,34 +26,52 @@ module Polars
26
26
  #
27
27
  # @param name [String]
28
28
  # Name of the field
29
+ # @param more_names [Array]
30
+ # Additional struct field names.
29
31
  #
30
32
  # @return [Expr]
31
33
  #
32
34
  # @example
33
- # df = (
34
- # Polars::DataFrame.new(
35
- # {
36
- # "int" => [1, 2],
37
- # "str" => ["a", "b"],
38
- # "bool" => [true, nil],
39
- # "list" => [[1, 2], [3]]
40
- # }
41
- # )
42
- # .to_struct("my_struct")
43
- # .to_frame
44
- # )
45
- # df.select(Polars.col("my_struct").struct.field("str"))
35
+ # df = Polars::DataFrame.new(
36
+ # {
37
+ # "aaa" => [1, 2],
38
+ # "bbb" => ["ab", "cd"],
39
+ # "ccc" => [true, nil],
40
+ # "ddd" => [[1, 2], [3]]
41
+ # }
42
+ # ).select(Polars.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
43
+ # df.select(Polars.col("struct_col").struct.field("bbb"))
46
44
  # # =>
47
45
  # # shape: (2, 1)
48
46
  # # ┌─────┐
49
- # # │ str
47
+ # # │ bbb
50
48
  # # │ --- │
51
49
  # # │ str │
52
50
  # # ╞═════╡
53
- # # │ a
54
- # # │ b
51
+ # # │ ab
52
+ # # │ cd
55
53
  # # └─────┘
56
- def field(name)
54
+ #
55
+ # @example
56
+ # df.select(Polars.col("struct_col").struct.field("aaa", "bbb"))
57
+ # # =>
58
+ # # shape: (2, 2)
59
+ # # ┌─────┬─────┐
60
+ # # │ aaa ┆ bbb │
61
+ # # │ --- ┆ --- │
62
+ # # │ i64 ┆ str │
63
+ # # ╞═════╪═════╡
64
+ # # │ 1 ┆ ab │
65
+ # # │ 2 ┆ cd │
66
+ # # └─────┴─────┘
67
+ def field(name, *more_names)
68
+ if more_names.any?
69
+ name = (name.is_a?(::String) ? [name] : name) + more_names
70
+ end
71
+ if name.is_a?(::Array)
72
+ return Utils.wrap_expr(_rbexpr.struct_multiple_fields(name))
73
+ end
74
+
57
75
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
58
76
  end
59
77
 
@@ -107,7 +125,7 @@ module Polars
107
125
  # .to_struct("my_struct")
108
126
  # .to_frame
109
127
  # )
110
- # df = df.with_column(
128
+ # df = df.with_columns(
111
129
  # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
112
130
  # )
113
131
  # df.select(Polars.col("my_struct").struct.field("INT"))