polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
data/lib/polars/selectors.rb
CHANGED
|
@@ -39,7 +39,7 @@ module Polars
|
|
|
39
39
|
#
|
|
40
40
|
# @return [Selector]
|
|
41
41
|
#
|
|
42
|
-
# @example
|
|
42
|
+
# @example Select all columns, casting them to string:
|
|
43
43
|
# df = Polars::DataFrame.new(
|
|
44
44
|
# {
|
|
45
45
|
# "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
|
|
@@ -47,8 +47,6 @@ module Polars
|
|
|
47
47
|
# },
|
|
48
48
|
# schema_overrides: {"value" => Polars::Int32}
|
|
49
49
|
# )
|
|
50
|
-
#
|
|
51
|
-
# @example Select all columns, casting them to string:
|
|
52
50
|
# df.select(Polars.cs.all.cast(Polars::String))
|
|
53
51
|
# # =>
|
|
54
52
|
# # shape: (2, 2)
|
|
@@ -1036,13 +1034,31 @@ module Polars
|
|
|
1036
1034
|
|
|
1037
1035
|
# Select all datetime columns, optionally filtering by time unit/zone.
|
|
1038
1036
|
#
|
|
1037
|
+
# @param time_unit ['ms', 'us', 'ns']
|
|
1038
|
+
# One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
|
|
1039
|
+
# Omit to select columns with any valid timeunit.
|
|
1040
|
+
# @param time_zone [String]
|
|
1041
|
+
# * One or more timezone strings, as defined in zoneinfo (to see valid options
|
|
1042
|
+
# run `import zoneinfo; zoneinfo.available_timezones()` for a full list).
|
|
1043
|
+
# * Set `nil` to select Datetime columns that do not have a timezone.
|
|
1044
|
+
# * Set "*" to select Datetime columns that have *any* timezone.
|
|
1045
|
+
#
|
|
1039
1046
|
# @return [Selector]
|
|
1040
|
-
def self.datetime
|
|
1041
|
-
time_unit
|
|
1047
|
+
def self.datetime(time_unit = nil, time_zone: ["*", nil])
|
|
1048
|
+
if time_unit.nil?
|
|
1049
|
+
time_unit_lst = ["ms", "us", "ns"]
|
|
1050
|
+
else
|
|
1051
|
+
time_unit_lst = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
|
|
1052
|
+
end
|
|
1042
1053
|
|
|
1043
|
-
time_zone
|
|
1054
|
+
if time_zone.nil?
|
|
1055
|
+
time_zone_lst = [nil]
|
|
1056
|
+
elsif time_zone
|
|
1057
|
+
# TODO improve
|
|
1058
|
+
time_zone_lst = time_zone.to_a
|
|
1059
|
+
end
|
|
1044
1060
|
|
|
1045
|
-
Selector._from_rbselector(RbSelector.datetime(
|
|
1061
|
+
Selector._from_rbselector(RbSelector.datetime(time_unit_lst, time_zone_lst))
|
|
1046
1062
|
end
|
|
1047
1063
|
|
|
1048
1064
|
# Select all decimal columns.
|
|
@@ -1180,9 +1196,17 @@ module Polars
|
|
|
1180
1196
|
|
|
1181
1197
|
# Select all duration columns, optionally filtering by time unit.
|
|
1182
1198
|
#
|
|
1199
|
+
# @param time_unit ['ms', 'us', 'ns']
|
|
1200
|
+
# One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
|
|
1201
|
+
# Omit to select columns with any valid timeunit.
|
|
1202
|
+
#
|
|
1183
1203
|
# @return [Selector]
|
|
1184
|
-
def self.duration
|
|
1185
|
-
time_unit
|
|
1204
|
+
def self.duration(time_unit = nil)
|
|
1205
|
+
if time_unit.nil?
|
|
1206
|
+
time_unit = ["ms", "us", "ns"]
|
|
1207
|
+
else
|
|
1208
|
+
time_unit = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
|
|
1209
|
+
end
|
|
1186
1210
|
|
|
1187
1211
|
Selector._from_rbselector(RbSelector.duration(time_unit))
|
|
1188
1212
|
end
|
|
@@ -1693,6 +1717,18 @@ module Polars
|
|
|
1693
1717
|
# Select all object columns.
|
|
1694
1718
|
#
|
|
1695
1719
|
# @return [Selector]
|
|
1720
|
+
#
|
|
1721
|
+
# @example
|
|
1722
|
+
# df = Polars::DataFrame.new(
|
|
1723
|
+
# {
|
|
1724
|
+
# "idx" => [0, 1],
|
|
1725
|
+
# "uuid_obj" => ["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"],
|
|
1726
|
+
# "uuid_str" => ["acab9fea-c05d-4b91-b639-418004a63f33", "28c65415-8b7d-4857-a4ce-300dca14b12b"]
|
|
1727
|
+
# },
|
|
1728
|
+
# schema_overrides: {"idx" => Polars::Int32, "uuid_obj" => Polars::Object}
|
|
1729
|
+
# )
|
|
1730
|
+
# df.select(Polars.cs.object).to_h(as_series: false)
|
|
1731
|
+
# # => {"uuid_obj"=>["6be063cf-c9c6-43be-878e-e446cfd42981", "7849d8f9-2cac-48e7-96d3-63cf81c14869"]}
|
|
1696
1732
|
def self.object
|
|
1697
1733
|
Selector._from_rbselector(RbSelector.object)
|
|
1698
1734
|
end
|