polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -39,7 +39,7 @@ module Polars
39
39
  #
40
40
  # @return [Selector]
41
41
  #
42
- # @example
42
+ # @example Select all columns, casting them to string:
43
43
  # df = Polars::DataFrame.new(
44
44
  # {
45
45
  # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
@@ -47,8 +47,6 @@ module Polars
47
47
  # },
48
48
  # schema_overrides: {"value" => Polars::Int32}
49
49
  # )
50
- #
51
- # @example Select all columns, casting them to string:
52
50
  # df.select(Polars.cs.all.cast(Polars::String))
53
51
  # # =>
54
52
  # # shape: (2, 2)
@@ -1036,13 +1034,31 @@ module Polars
1036
1034
 
1037
1035
  # Select all datetime columns, optionally filtering by time unit/zone.
1038
1036
  #
1037
+ # @param time_unit ['ms', 'us', 'ns']
1038
+ # One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
1039
+ # Omit to select columns with any valid timeunit.
1040
+ # @param time_zone [String]
1041
+ # * One or more timezone strings, as defined in zoneinfo (to see valid options
1042
+ # run `import zoneinfo; zoneinfo.available_timezones()` for a full list).
1043
+ # * Set `nil` to select Datetime columns that do not have a timezone.
1044
+ # * Set "*" to select Datetime columns that have *any* timezone.
1045
+ #
1039
1046
  # @return [Selector]
1040
- def self.datetime
1041
- time_unit = ["ms", "us", "ns"]
1047
+ def self.datetime(time_unit = nil, time_zone: ["*", nil])
1048
+ if time_unit.nil?
1049
+ time_unit_lst = ["ms", "us", "ns"]
1050
+ else
1051
+ time_unit_lst = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
1052
+ end
1042
1053
 
1043
- time_zone = [nil]
1054
+ if time_zone.nil?
1055
+ time_zone_lst = [nil]
1056
+ elsif time_zone
1057
+ # TODO improve
1058
+ time_zone_lst = time_zone.to_a
1059
+ end
1044
1060
 
1045
- Selector._from_rbselector(RbSelector.datetime(time_unit, time_zone))
1061
+ Selector._from_rbselector(RbSelector.datetime(time_unit_lst, time_zone_lst))
1046
1062
  end
1047
1063
 
1048
1064
  # Select all decimal columns.
@@ -1180,9 +1196,17 @@ module Polars
1180
1196
 
1181
1197
  # Select all duration columns, optionally filtering by time unit.
1182
1198
  #
1199
+ # @param time_unit ['ms', 'us', 'ns']
1200
+ # One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
1201
+ # Omit to select columns with any valid timeunit.
1202
+ #
1183
1203
  # @return [Selector]
1184
- def self.duration
1185
- time_unit = ["ms", "us", "ns"]
1204
+ def self.duration(time_unit = nil)
1205
+ if time_unit.nil?
1206
+ time_unit = ["ms", "us", "ns"]
1207
+ else
1208
+ time_unit = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
1209
+ end
1186
1210
 
1187
1211
  Selector._from_rbselector(RbSelector.duration(time_unit))
1188
1212
  end
@@ -1693,6 +1717,18 @@ module Polars
1693
1717
  # Select all object columns.
1694
1718
  #
1695
1719
  # @return [Selector]
1720
+ #
1721
+ # @example
1722
+ # df = Polars::DataFrame.new(
1723
+ # {
1724
+ # "idx" => [0, 1],
1725
+ # "uuid_obj" => ["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"],
1726
+ # "uuid_str" => ["acab9fea-c05d-4b91-b639-418004a63f33", "28c65415-8b7d-4857-a4ce-300dca14b12b"]
1727
+ # },
1728
+ # schema_overrides: {"idx" => Polars::Int32, "uuid_obj" => Polars::Object}
1729
+ # )
1730
+ # df.select(Polars.cs.object).to_h(as_series: false)
1731
+ # # => {"uuid_obj"=>["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"]}
1696
1732
  def self.object
1697
1733
  Selector._from_rbselector(RbSelector.object)
1698
1734
  end