polars-df 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/ext/polars/Cargo.toml +10 -7
  5. data/ext/polars/src/batched_csv.rs +1 -1
  6. data/ext/polars/src/conversion/any_value.rs +261 -0
  7. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  8. data/ext/polars/src/conversion/mod.rs +51 -10
  9. data/ext/polars/src/dataframe/construction.rs +6 -8
  10. data/ext/polars/src/dataframe/general.rs +19 -29
  11. data/ext/polars/src/dataframe/io.rs +43 -33
  12. data/ext/polars/src/error.rs +26 -4
  13. data/ext/polars/src/expr/categorical.rs +0 -10
  14. data/ext/polars/src/expr/datetime.rs +4 -12
  15. data/ext/polars/src/expr/general.rs +123 -110
  16. data/ext/polars/src/expr/mod.rs +2 -2
  17. data/ext/polars/src/expr/rolling.rs +17 -9
  18. data/ext/polars/src/expr/string.rs +2 -6
  19. data/ext/polars/src/functions/eager.rs +10 -10
  20. data/ext/polars/src/functions/lazy.rs +21 -21
  21. data/ext/polars/src/functions/range.rs +6 -12
  22. data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
  23. data/ext/polars/src/lazyframe/mod.rs +81 -98
  24. data/ext/polars/src/lib.rs +55 -45
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/rb_modules.rs +25 -1
  27. data/ext/polars/src/series/aggregation.rs +4 -2
  28. data/ext/polars/src/series/arithmetic.rs +21 -11
  29. data/ext/polars/src/series/construction.rs +56 -38
  30. data/ext/polars/src/series/export.rs +1 -1
  31. data/ext/polars/src/series/mod.rs +31 -10
  32. data/ext/polars/src/sql.rs +3 -1
  33. data/lib/polars/array_expr.rb +4 -4
  34. data/lib/polars/batched_csv_reader.rb +2 -2
  35. data/lib/polars/cat_expr.rb +0 -36
  36. data/lib/polars/cat_name_space.rb +0 -37
  37. data/lib/polars/data_frame.rb +93 -101
  38. data/lib/polars/data_types.rb +1 -1
  39. data/lib/polars/date_time_expr.rb +525 -573
  40. data/lib/polars/date_time_name_space.rb +263 -464
  41. data/lib/polars/dynamic_group_by.rb +3 -3
  42. data/lib/polars/exceptions.rb +3 -0
  43. data/lib/polars/expr.rb +367 -330
  44. data/lib/polars/expr_dispatch.rb +1 -1
  45. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  46. data/lib/polars/functions/as_datatype.rb +63 -40
  47. data/lib/polars/functions/lazy.rb +63 -14
  48. data/lib/polars/functions/lit.rb +1 -1
  49. data/lib/polars/functions/range/date_range.rb +18 -77
  50. data/lib/polars/functions/range/datetime_range.rb +4 -4
  51. data/lib/polars/functions/range/int_range.rb +2 -2
  52. data/lib/polars/functions/range/time_range.rb +4 -4
  53. data/lib/polars/functions/repeat.rb +1 -1
  54. data/lib/polars/functions/whenthen.rb +1 -1
  55. data/lib/polars/io/csv.rb +8 -8
  56. data/lib/polars/io/ipc.rb +3 -3
  57. data/lib/polars/io/json.rb +13 -2
  58. data/lib/polars/io/ndjson.rb +15 -4
  59. data/lib/polars/io/parquet.rb +5 -4
  60. data/lib/polars/lazy_frame.rb +120 -106
  61. data/lib/polars/lazy_group_by.rb +1 -1
  62. data/lib/polars/list_expr.rb +11 -11
  63. data/lib/polars/list_name_space.rb +5 -1
  64. data/lib/polars/rolling_group_by.rb +5 -7
  65. data/lib/polars/series.rb +105 -189
  66. data/lib/polars/string_expr.rb +42 -67
  67. data/lib/polars/string_name_space.rb +5 -4
  68. data/lib/polars/testing.rb +2 -2
  69. data/lib/polars/utils/constants.rb +9 -0
  70. data/lib/polars/utils/convert.rb +97 -0
  71. data/lib/polars/utils/parse.rb +89 -0
  72. data/lib/polars/utils/various.rb +76 -0
  73. data/lib/polars/utils/wrap.rb +19 -0
  74. data/lib/polars/utils.rb +4 -330
  75. data/lib/polars/version.rb +1 -1
  76. data/lib/polars/whenthen.rb +6 -6
  77. data/lib/polars.rb +11 -0
  78. metadata +9 -4
  79. data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -39,7 +39,7 @@ module Polars
39
39
  # # ]
40
40
  def to_date(format = nil, strict: true, exact: true, cache: true)
41
41
  _validate_format_argument(format)
42
- Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
42
+ Utils.wrap_expr(_rbexpr.str_to_date(format, strict, exact, cache))
43
43
  end
44
44
 
45
45
  # Convert a Utf8 column into a Datetime column.
@@ -83,14 +83,14 @@ module Polars
83
83
  strict: true,
84
84
  exact: true,
85
85
  cache: true,
86
- use_earliest: nil,
87
86
  ambiguous: "raise"
88
87
  )
89
88
  _validate_format_argument(format)
90
- ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
91
- ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
89
+ unless ambiguous.is_a?(Expr)
90
+ ambiguous = Polars.lit(ambiguous)
91
+ end
92
92
  Utils.wrap_expr(
93
- self._rbexpr.str_to_datetime(
93
+ _rbexpr.str_to_datetime(
94
94
  format,
95
95
  time_unit,
96
96
  time_zone,
@@ -331,7 +331,7 @@ module Polars
331
331
  #
332
332
  # @example
333
333
  # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
334
- # df.select(Polars.col("foo").str.concat("-"))
334
+ # df.select(Polars.col("foo").str.join("-"))
335
335
  # # =>
336
336
  # # shape: (1, 1)
337
337
  # # ┌─────┐
@@ -344,7 +344,7 @@ module Polars
344
344
  #
345
345
  # @example
346
346
  # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
347
- # df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
347
+ # df.select(Polars.col("foo").str.join("-", ignore_nulls: false))
348
348
  # # =>
349
349
  # # shape: (1, 1)
350
350
  # # ┌──────┐
@@ -354,9 +354,10 @@ module Polars
354
354
  # # ╞══════╡
355
355
  # # │ null │
356
356
  # # └──────┘
357
- def concat(delimiter = "-", ignore_nulls: true)
358
- Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
357
+ def join(delimiter = "-", ignore_nulls: true)
358
+ Utils.wrap_expr(_rbexpr.str_join(delimiter, ignore_nulls))
359
359
  end
360
+ alias_method :concat, :join
360
361
 
361
362
  # Transform to uppercase variant.
362
363
  #
@@ -446,7 +447,7 @@ module Polars
446
447
  # # │ both │
447
448
  # # └───────┘
448
449
  def strip_chars(characters = nil)
449
- characters = Utils.parse_as_expression(characters, str_as_lit: true)
450
+ characters = Utils.parse_into_expression(characters, str_as_lit: true)
450
451
  Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
451
452
  end
452
453
  alias_method :strip, :strip_chars
@@ -473,7 +474,7 @@ module Polars
473
474
  # # │ both │
474
475
  # # └────────┘
475
476
  def strip_chars_start(characters = nil)
476
- characters = Utils.parse_as_expression(characters, str_as_lit: true)
477
+ characters = Utils.parse_into_expression(characters, str_as_lit: true)
477
478
  Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
478
479
  end
479
480
  alias_method :lstrip, :strip_chars_start
@@ -500,7 +501,7 @@ module Polars
500
501
  # # │ both │
501
502
  # # └───────┘
502
503
  def strip_chars_end(characters = nil)
503
- characters = Utils.parse_as_expression(characters, str_as_lit: true)
504
+ characters = Utils.parse_into_expression(characters, str_as_lit: true)
504
505
  Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
505
506
  end
506
507
  alias_method :rstrip, :strip_chars_end
@@ -530,7 +531,7 @@ module Polars
530
531
  # # │ bar ┆ bar │
531
532
  # # └───────────┴──────────┘
532
533
  def strip_prefix(prefix)
533
- prefix = Utils.parse_as_expression(prefix, str_as_lit: true)
534
+ prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
534
535
  Utils.wrap_expr(_rbexpr.str_strip_prefix(prefix))
535
536
  end
536
537
 
@@ -560,7 +561,7 @@ module Polars
560
561
  # # │ bar ┆ │
561
562
  # # └───────────┴──────────┘
562
563
  def strip_suffix(suffix)
563
- suffix = Utils.parse_as_expression(suffix, str_as_lit: true)
564
+ suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
564
565
  Utils.wrap_expr(_rbexpr.str_strip_suffix(suffix))
565
566
  end
566
567
 
@@ -654,7 +655,7 @@ module Polars
654
655
  # # │ null ┆ null │
655
656
  # # └────────┴────────┘
656
657
  def zfill(length)
657
- length = Utils.parse_as_expression(length)
658
+ length = Utils.parse_into_expression(length)
658
659
  Utils.wrap_expr(_rbexpr.str_zfill(length))
659
660
  end
660
661
 
@@ -689,7 +690,7 @@ module Polars
689
690
  # # │ null ┆ null ┆ null │
690
691
  # # └─────────────┴───────┴─────────┘
691
692
  def contains(pattern, literal: false, strict: true)
692
- pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)._rbexpr
693
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
693
694
  Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
694
695
  end
695
696
 
@@ -729,7 +730,7 @@ module Polars
729
730
  # # │ mango │
730
731
  # # └────────┘
731
732
  def ends_with(sub)
732
- sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr
733
+ sub = Utils.parse_into_expression(sub, str_as_lit: true)
733
734
  Utils.wrap_expr(_rbexpr.str_ends_with(sub))
734
735
  end
735
736
 
@@ -769,7 +770,7 @@ module Polars
769
770
  # # │ apple │
770
771
  # # └────────┘
771
772
  def starts_with(sub)
772
- sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr
773
+ sub = Utils.parse_into_expression(sub, str_as_lit: true)
773
774
  Utils.wrap_expr(_rbexpr.str_starts_with(sub))
774
775
  end
775
776
 
@@ -840,7 +841,7 @@ module Polars
840
841
  # # │ true │
841
842
  # # └──────────┘
842
843
  def json_path_match(json_path)
843
- json_path = Utils.parse_as_expression(json_path, str_as_lit: true)
844
+ json_path = Utils.parse_into_expression(json_path, str_as_lit: true)
844
845
  Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
845
846
  end
846
847
 
@@ -940,7 +941,7 @@ module Polars
940
941
  # # │ 678 │
941
942
  # # └─────┘
942
943
  def extract(pattern, group_index: 1)
943
- pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
944
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
944
945
  Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
945
946
  end
946
947
 
@@ -972,8 +973,8 @@ module Polars
972
973
  # # │ ["678", "910"] │
973
974
  # # └────────────────┘
974
975
  def extract_all(pattern)
975
- pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
976
- Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
976
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
977
+ Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
977
978
  end
978
979
 
979
980
  # Extract all capture groups for the given regex pattern.
@@ -1057,7 +1058,7 @@ module Polars
1057
1058
  # # │ 6 │
1058
1059
  # # └──────────────┘
1059
1060
  def count_matches(pattern, literal: false)
1060
- pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
1061
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
1061
1062
  Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
1062
1063
  end
1063
1064
  alias_method :count_match, :count_matches
@@ -1086,12 +1087,11 @@ module Polars
1086
1087
  # # │ ["foo", "bar", "baz"] │
1087
1088
  # # └───────────────────────┘
1088
1089
  def split(by, inclusive: false)
1089
- by = Utils.parse_as_expression(by, str_as_lit: true)
1090
+ by = Utils.parse_into_expression(by, str_as_lit: true)
1090
1091
  if inclusive
1091
- Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
1092
- else
1093
- Utils.wrap_expr(_rbexpr.str_split(by))
1092
+ return Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
1094
1093
  end
1094
+ Utils.wrap_expr(_rbexpr.str_split(by))
1095
1095
  end
1096
1096
 
1097
1097
  # Split the string by a substring using `n` splits.
@@ -1129,7 +1129,7 @@ module Polars
1129
1129
  # # │ {"d","4"} │
1130
1130
  # # └─────────────┘
1131
1131
  def split_exact(by, n, inclusive: false)
1132
- by = Utils.parse_as_expression(by, str_as_lit: true)
1132
+ by = Utils.parse_into_expression(by, str_as_lit: true)
1133
1133
  if inclusive
1134
1134
  Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
1135
1135
  else
@@ -1166,7 +1166,7 @@ module Polars
1166
1166
  # # │ {"foo","bar baz"} │
1167
1167
  # # └───────────────────┘
1168
1168
  def splitn(by, n)
1169
- by = Utils.parse_as_expression(by, str_as_lit: true)
1169
+ by = Utils.parse_into_expression(by, str_as_lit: true)
1170
1170
  Utils.wrap_expr(_rbexpr.str_splitn(by, n))
1171
1171
  end
1172
1172
 
@@ -1197,9 +1197,9 @@ module Polars
1197
1197
  # # │ 2 ┆ abc456 │
1198
1198
  # # └─────┴────────┘
1199
1199
  def replace(pattern, value, literal: false, n: 1)
1200
- pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
1201
- value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
1202
- Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
1200
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
1201
+ value = Utils.parse_into_expression(value, str_as_lit: true)
1202
+ Utils.wrap_expr(_rbexpr.str_replace_n(pattern, value, literal, n))
1203
1203
  end
1204
1204
 
1205
1205
  # Replace all matching regex/literal substrings with a new string value.
@@ -1227,9 +1227,9 @@ module Polars
1227
1227
  # # │ 2 ┆ 123-123 │
1228
1228
  # # └─────┴─────────┘
1229
1229
  def replace_all(pattern, value, literal: false)
1230
- pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
1231
- value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
1232
- Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
1230
+ pattern = Utils.parse_into_expression(pattern, str_as_lit: true)
1231
+ value = Utils.parse_into_expression(value, str_as_lit: true)
1232
+ Utils.wrap_expr(_rbexpr.str_replace_all(pattern, value, literal))
1233
1233
  end
1234
1234
 
1235
1235
  # Returns string values in reversed order.
@@ -1282,36 +1282,11 @@ module Polars
1282
1282
  # # │ dragonfruit ┆ uit │
1283
1283
  # # └─────────────┴──────────┘
1284
1284
  def slice(offset, length = nil)
1285
- offset = Utils.parse_as_expression(offset)
1286
- length = Utils.parse_as_expression(length)
1285
+ offset = Utils.parse_into_expression(offset)
1286
+ length = Utils.parse_into_expression(length)
1287
1287
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
1288
1288
  end
1289
1289
 
1290
- # Returns a column with a separate row for every string character.
1291
- #
1292
- # @return [Expr]
1293
- #
1294
- # @example
1295
- # df = Polars::DataFrame.new({"a": ["foo", "bar"]})
1296
- # df.select(Polars.col("a").str.explode)
1297
- # # =>
1298
- # # shape: (6, 1)
1299
- # # ┌─────┐
1300
- # # │ a │
1301
- # # │ --- │
1302
- # # │ str │
1303
- # # ╞═════╡
1304
- # # │ f │
1305
- # # │ o │
1306
- # # │ o │
1307
- # # │ b │
1308
- # # │ a │
1309
- # # │ r │
1310
- # # └─────┘
1311
- def explode
1312
- Utils.wrap_expr(_rbexpr.str_explode)
1313
- end
1314
-
1315
1290
  # Convert an Utf8 column into an Int64 column with base radix.
1316
1291
  #
1317
1292
  # @param base [Integer]
@@ -1355,7 +1330,7 @@ module Polars
1355
1330
  # # │ null ┆ null │
1356
1331
  # # └──────┴────────┘
1357
1332
  def to_integer(base: 10, strict: true)
1358
- base = Utils.parse_as_expression(base, str_as_lit: false)
1333
+ base = Utils.parse_into_expression(base, str_as_lit: false)
1359
1334
  Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1360
1335
  end
1361
1336
 
@@ -1429,7 +1404,7 @@ module Polars
1429
1404
  # # │ Can you feel the love tonight ┆ true │
1430
1405
  # # └─────────────────────────────────┴──────────────┘
1431
1406
  def contains_any(patterns, ascii_case_insensitive: false)
1432
- patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1407
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1433
1408
  Utils.wrap_expr(
1434
1409
  _rbexpr.str_contains_any(patterns, ascii_case_insensitive)
1435
1410
  )
@@ -1500,9 +1475,9 @@ module Polars
1500
1475
  # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1501
1476
  # # └─────────────────────────────────┴─────────────────────────────────┘
1502
1477
  def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1503
- patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1504
- replace_with = Utils.parse_as_expression(
1505
- replace_with, str_as_lit: true, list_as_lit: false
1478
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1479
+ replace_with = Utils.parse_into_expression(
1480
+ replace_with, str_as_lit: true, list_as_series: true
1506
1481
  )
1507
1482
  Utils.wrap_expr(
1508
1483
  _rbexpr.str_replace_many(
@@ -83,7 +83,7 @@ module Polars
83
83
  strict: true,
84
84
  exact: true,
85
85
  cache: true,
86
- use_earliest: nil
86
+ ambiguous: "raise"
87
87
  )
88
88
  super
89
89
  end
@@ -233,7 +233,7 @@ module Polars
233
233
  # @return [Series]
234
234
  #
235
235
  # @example
236
- # Polars::Series.new([1, nil, 2]).str.concat("-")
236
+ # Polars::Series.new([1, nil, 2]).str.join("-")
237
237
  # # =>
238
238
  # # shape: (1,)
239
239
  # # Series: '' [str]
@@ -242,16 +242,17 @@ module Polars
242
242
  # # ]
243
243
  #
244
244
  # @example
245
- # Polars::Series.new([1, nil, 2]).str.concat("-", ignore_nulls: false)
245
+ # Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
246
246
  # # =>
247
247
  # # shape: (1,)
248
248
  # # Series: '' [str]
249
249
  # # [
250
250
  # # null
251
251
  # # ]
252
- def concat(delimiter = "-", ignore_nulls: true)
252
+ def join(delimiter = "-", ignore_nulls: true)
253
253
  super
254
254
  end
255
+ alias_method :concat, :join
255
256
 
256
257
  # Check if strings in Series contain a substring that matches a regex.
257
258
  #
@@ -271,9 +271,9 @@ module Polars
271
271
 
272
272
  def _assert_correct_input_type(left, right)
273
273
  if left.is_a?(DataFrame) && right.is_a?(DataFrame)
274
- return false
274
+ false
275
275
  elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
276
- return true
276
+ true
277
277
  else
278
278
  raise_assertion_error(
279
279
  "inputs",
@@ -0,0 +1,9 @@
1
+ module Polars
2
+ module Utils
3
+ SECONDS_PER_DAY = 86_400
4
+ SECONDS_PER_HOUR = 3_600
5
+ NS_PER_SECOND = 1_000_000_000
6
+ US_PER_SECOND = 1_000_000
7
+ MS_PER_SECOND = 1_000
8
+ end
9
+ end
@@ -0,0 +1,97 @@
1
+ module Polars
2
+ module Utils
3
+ def self.parse_as_duration_string(td)
4
+ if td.nil? || td.is_a?(::String)
5
+ return td
6
+ end
7
+ _timedelta_to_duration_string(td)
8
+ end
9
+
10
+ def self._timedelta_to_pl_duration(td)
11
+ td
12
+ end
13
+
14
+ def self.negate_duration_string(duration)
15
+ if duration.start_with?("-")
16
+ duration[1..]
17
+ else
18
+ "-#{duration}"
19
+ end
20
+ end
21
+
22
+ def self.date_to_int(d)
23
+ dt = d.to_datetime.to_time
24
+ dt.to_i / SECONDS_PER_DAY
25
+ end
26
+
27
+ def self.datetime_to_int(dt, time_unit)
28
+ dt = dt.to_datetime.to_time
29
+ if time_unit == "ns"
30
+ nanos = dt.nsec
31
+ dt.to_i * NS_PER_SECOND + nanos
32
+ elsif time_unit == "us"
33
+ micros = dt.usec
34
+ dt.to_i * US_PER_SECOND + micros
35
+ elsif time_unit == "ms"
36
+ millis = dt.usec / 1000
37
+ dt.to_i * MS_PER_SECOND + millis
38
+ elsif time_unit.nil?
39
+ # Ruby has ns precision
40
+ nanos = dt.nsec
41
+ dt.to_i * NS_PER_SECOND + nanos
42
+ else
43
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
44
+ end
45
+ end
46
+
47
+ def self._to_ruby_date(value)
48
+ # days to seconds
49
+ # important to create from utc. Not doing this leads
50
+ # to inconsistencies dependent on the timezone you are in.
51
+ ::Time.at(value * 86400).utc.to_date
52
+ end
53
+
54
+ def self._to_ruby_time(value)
55
+ if value == 0
56
+ ::Time.utc(2000, 1, 1)
57
+ else
58
+ seconds, nanoseconds = value.divmod(1_000_000_000)
59
+ minutes, seconds = seconds.divmod(60)
60
+ hours, minutes = minutes.divmod(60)
61
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
62
+ end
63
+ end
64
+
65
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
66
+ if time_zone.nil? || time_zone == "" || time_zone == "UTC"
67
+ if time_unit == "ns"
68
+ ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
69
+ elsif time_unit == "us"
70
+ ::Time.at(value / 1000000, value % 1000000, :usec).utc
71
+ elsif time_unit == "ms"
72
+ ::Time.at(value / 1000, value % 1000, :millisecond).utc
73
+ else
74
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
75
+ end
76
+ else
77
+ raise Todo
78
+ end
79
+ end
80
+
81
+ def self._to_ruby_duration(value, time_unit = "ns")
82
+ if time_unit == "ns"
83
+ value / 1e9
84
+ elsif time_unit == "us"
85
+ value / 1e6
86
+ elsif time_unit == "ms"
87
+ value / 1e3
88
+ else
89
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
90
+ end
91
+ end
92
+
93
+ def self._to_ruby_decimal(digits, scale)
94
+ BigDecimal("#{digits}e#{scale}")
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,89 @@
1
+ module Polars
2
+ module Utils
3
+ def self.parse_into_expression(
4
+ input,
5
+ str_as_lit: false,
6
+ list_as_series: false,
7
+ structify: false,
8
+ dtype: nil
9
+ )
10
+ if input.is_a?(Expr)
11
+ expr = input
12
+ if structify
13
+ expr = _structify_expression(expr)
14
+ end
15
+ elsif (input.is_a?(::String) || input.is_a?(Symbol)) && !str_as_lit
16
+ expr = F.col(input)
17
+ elsif input.is_a?(::Array) && list_as_series
18
+ expr = F.lit(Series.new(input), dtype: dtype)
19
+ else
20
+ expr = F.lit(input, dtype: dtype)
21
+ end
22
+
23
+ expr._rbexpr
24
+ end
25
+
26
+ def self.parse_into_list_of_expressions(*inputs, __structify: false, **named_inputs)
27
+ exprs = _parse_positional_inputs(inputs, structify: __structify)
28
+ if named_inputs.any?
29
+ named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
30
+ exprs.concat(named_exprs)
31
+ end
32
+
33
+ exprs
34
+ end
35
+
36
+ def self._parse_positional_inputs(inputs, structify: false)
37
+ inputs_iter = _parse_inputs_as_iterable(inputs)
38
+ inputs_iter.map { |e| parse_into_expression(e, structify: structify) }
39
+ end
40
+
41
+ def self._parse_inputs_as_iterable(inputs)
42
+ if inputs.empty?
43
+ return []
44
+ end
45
+
46
+ if inputs.length == 1 && inputs[0].is_a?(::Array)
47
+ return inputs[0]
48
+ end
49
+
50
+ inputs
51
+ end
52
+
53
+ def self._parse_named_inputs(named_inputs, structify: false)
54
+ named_inputs.map do |name, input|
55
+ parse_into_expression(input, structify: structify)._alias(name.to_s)
56
+ end
57
+ end
58
+
59
+ def self.parse_predicates_constraints_into_expression(*predicates, **constraints)
60
+ all_predicates = _parse_positional_inputs(predicates)
61
+
62
+ if constraints.any?
63
+ constraint_predicates = _parse_constraints(constraints)
64
+ all_predicates.concat(constraint_predicates)
65
+ end
66
+
67
+ _combine_predicates(all_predicates)
68
+ end
69
+
70
+ def self._parse_constraints(constraints)
71
+ constraints.map do |name, value|
72
+ Polars.col(name).eq(value)._rbexpr
73
+ end
74
+ end
75
+
76
+ def self._combine_predicates(predicates)
77
+ if !predicates.any?
78
+ msg = "at least one predicate or constraint must be provided"
79
+ raise TypeError, msg
80
+ end
81
+
82
+ if predicates.length == 1
83
+ return predicates[0]
84
+ end
85
+
86
+ Plr.all_horizontal(predicates)
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,76 @@
1
+ module Polars
2
+ module Utils
3
+ def self._process_null_values(null_values)
4
+ if null_values.is_a?(Hash)
5
+ null_values.to_a
6
+ else
7
+ null_values
8
+ end
9
+ end
10
+
11
+ def self._is_iterable_of(val, eltype)
12
+ val.all? { |x| x.is_a?(eltype) }
13
+ end
14
+
15
+ def self.is_bool_sequence(val)
16
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
17
+ end
18
+
19
+ def self.is_int_sequence(val)
20
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
21
+ end
22
+
23
+ def self.is_str_sequence(val, allow_str: false)
24
+ if allow_str == false && val.is_a?(::String)
25
+ false
26
+ else
27
+ val.is_a?(::Array) && _is_iterable_of(val, ::String)
28
+ end
29
+ end
30
+
31
+ def self.arrlen(obj)
32
+ if obj.is_a?(Range)
33
+ # size only works for numeric ranges
34
+ obj.to_a.length
35
+ elsif obj.is_a?(::String)
36
+ nil
37
+ else
38
+ obj.length
39
+ end
40
+ rescue
41
+ nil
42
+ end
43
+
44
+ def self.normalize_filepath(path, check_not_directory: true)
45
+ path = File.expand_path(path)
46
+ if check_not_directory && File.exist?(path) && Dir.exist?(path)
47
+ raise ArgumentError, "Expected a file path; #{path} is a directory"
48
+ end
49
+ path
50
+ end
51
+
52
+ def self.scale_bytes(sz, to:)
53
+ scaling_factor = {
54
+ "b" => 1,
55
+ "k" => 1024,
56
+ "m" => 1024 ** 2,
57
+ "g" => 1024 ** 3,
58
+ "t" => 1024 ** 4
59
+ }[to[0]]
60
+ if scaling_factor > 1
61
+ sz / scaling_factor.to_f
62
+ else
63
+ sz
64
+ end
65
+ end
66
+
67
+ def self.extend_bool(value, n_match, value_name, match_name)
68
+ values = bool?(value) ? [value] * n_match : value
69
+ if n_match != values.length
70
+ msg = "the length of `#{value_name}` (#{values.length}) does not match the length of `#{match_name}` (#{n_match})"
71
+ raise ValueError, msg
72
+ end
73
+ values
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,19 @@
1
+ module Polars
2
+ module Utils
3
+ def self.wrap_df(df)
4
+ DataFrame._from_rbdf(df)
5
+ end
6
+
7
+ def self.wrap_ldf(ldf)
8
+ LazyFrame._from_rbldf(ldf)
9
+ end
10
+
11
+ def self.wrap_s(s)
12
+ Series._from_rbseries(s)
13
+ end
14
+
15
+ def self.wrap_expr(rbexpr)
16
+ Expr._from_rbexpr(rbexpr)
17
+ end
18
+ end
19
+ end