polars-df 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +13 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +187 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +1 -1
  18. data/ext/polars/src/expr/datatype.rs +14 -0
  19. data/ext/polars/src/expr/general.rs +36 -44
  20. data/ext/polars/src/expr/list.rs +27 -17
  21. data/ext/polars/src/expr/meta.rs +18 -41
  22. data/ext/polars/src/expr/mod.rs +3 -1
  23. data/ext/polars/src/expr/name.rs +2 -2
  24. data/ext/polars/src/expr/rolling.rs +1 -1
  25. data/ext/polars/src/expr/selector.rs +219 -0
  26. data/ext/polars/src/expr/string.rs +14 -7
  27. data/ext/polars/src/file.rs +12 -6
  28. data/ext/polars/src/functions/io.rs +2 -11
  29. data/ext/polars/src/functions/lazy.rs +22 -54
  30. data/ext/polars/src/functions/meta.rs +2 -2
  31. data/ext/polars/src/functions/misc.rs +1 -1
  32. data/ext/polars/src/functions/range.rs +14 -10
  33. data/ext/polars/src/functions/string_cache.rs +4 -5
  34. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  35. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  36. data/ext/polars/src/io/mod.rs +102 -0
  37. data/ext/polars/src/lazyframe/general.rs +75 -113
  38. data/ext/polars/src/lazyframe/serde.rs +1 -1
  39. data/ext/polars/src/lazyframe/sink.rs +6 -6
  40. data/ext/polars/src/lib.rs +104 -26
  41. data/ext/polars/src/map/dataframe.rs +7 -7
  42. data/ext/polars/src/map/lazy.rs +1 -1
  43. data/ext/polars/src/map/mod.rs +31 -19
  44. data/ext/polars/src/map/series.rs +8 -8
  45. data/ext/polars/src/on_startup.rs +5 -2
  46. data/ext/polars/src/rb_modules.rs +1 -1
  47. data/ext/polars/src/series/construction.rs +11 -7
  48. data/ext/polars/src/series/export.rs +6 -4
  49. data/ext/polars/src/series/general.rs +12 -207
  50. data/ext/polars/src/series/import.rs +2 -2
  51. data/ext/polars/src/series/map.rs +227 -0
  52. data/ext/polars/src/series/mod.rs +2 -1
  53. data/ext/polars/src/series/scatter.rs +1 -1
  54. data/ext/polars/src/utils.rs +10 -2
  55. data/lib/polars/cat_name_space.rb +3 -43
  56. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  57. data/lib/polars/catalog/unity/column_info.rb +31 -0
  58. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  59. data/lib/polars/catalog/unity/table_info.rb +50 -0
  60. data/lib/polars/catalog.rb +448 -0
  61. data/lib/polars/convert.rb +10 -0
  62. data/lib/polars/data_frame.rb +151 -30
  63. data/lib/polars/data_types.rb +47 -3
  64. data/lib/polars/exceptions.rb +7 -2
  65. data/lib/polars/expr.rb +48 -39
  66. data/lib/polars/functions/col.rb +6 -5
  67. data/lib/polars/functions/eager.rb +1 -1
  68. data/lib/polars/functions/lazy.rb +114 -15
  69. data/lib/polars/functions/repeat.rb +4 -0
  70. data/lib/polars/io/csv.rb +18 -0
  71. data/lib/polars/io/json.rb +16 -0
  72. data/lib/polars/io/ndjson.rb +13 -0
  73. data/lib/polars/io/parquet.rb +45 -63
  74. data/lib/polars/io/scan_options.rb +47 -0
  75. data/lib/polars/lazy_frame.rb +163 -75
  76. data/lib/polars/list_expr.rb +213 -17
  77. data/lib/polars/list_name_space.rb +121 -8
  78. data/lib/polars/meta_expr.rb +14 -29
  79. data/lib/polars/scan_cast_options.rb +64 -0
  80. data/lib/polars/schema.rb +6 -1
  81. data/lib/polars/selector.rb +138 -0
  82. data/lib/polars/selectors.rb +931 -202
  83. data/lib/polars/series.rb +46 -19
  84. data/lib/polars/string_expr.rb +24 -3
  85. data/lib/polars/string_name_space.rb +12 -1
  86. data/lib/polars/utils/parse.rb +40 -0
  87. data/lib/polars/utils.rb +5 -1
  88. data/lib/polars/version.rb +1 -1
  89. data/lib/polars.rb +8 -0
  90. metadata +17 -2
@@ -0,0 +1,227 @@
1
+ use magnus::Value;
2
+
3
+ use super::RbSeries;
4
+ use crate::map::check_nested_object;
5
+ use crate::map::series::{ApplyLambda, call_lambda_and_extract};
6
+ use crate::prelude::*;
7
+ use crate::{RbPolarsErr, RbResult};
8
+ use crate::{apply_method_all_arrow_series2, raise_err};
9
+
10
+ impl RbSeries {
11
+ pub fn map_elements(
12
+ &self,
13
+ function: Value,
14
+ return_dtype: Option<Wrap<DataType>>,
15
+ skip_nulls: bool,
16
+ ) -> RbResult<Self> {
17
+ let series = &self.series.borrow();
18
+
19
+ if return_dtype.is_none() {
20
+ polars_warn!(
21
+ MapWithoutReturnDtypeWarning,
22
+ "Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. \
23
+ Specify `return_dtype` to silence this warning."
24
+ )
25
+ }
26
+
27
+ if skip_nulls && (series.null_count() == series.len()) {
28
+ if let Some(return_dtype) = return_dtype {
29
+ return Ok(
30
+ Series::full_null(series.name().clone(), series.len(), &return_dtype.0).into(),
31
+ );
32
+ }
33
+ let msg = "The output type of the 'map_elements' function cannot be determined.\n\
34
+ The function was never called because 'skip_nulls: true' and all values are null.\n\
35
+ Consider setting 'skip_nulls: false' or setting the 'return_dtype'.";
36
+ raise_err!(msg, ComputeError)
37
+ }
38
+
39
+ let return_dtype = return_dtype.map(|dt| dt.0);
40
+
41
+ macro_rules! dispatch_apply {
42
+ ($self:expr, $method:ident, $($args:expr),*) => {
43
+ match $self.dtype() {
44
+ DataType::Object(_) => {
45
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
46
+ // ca.$method($($args),*)
47
+ todo!()
48
+ }
49
+ _ => {
50
+ apply_method_all_arrow_series2!(
51
+ $self,
52
+ $method,
53
+ $($args),*
54
+ )
55
+ }
56
+ }
57
+ }
58
+
59
+ }
60
+
61
+ if matches!(
62
+ series.dtype(),
63
+ DataType::Datetime(_, _)
64
+ | DataType::Date
65
+ | DataType::Duration(_)
66
+ | DataType::Categorical(_, _)
67
+ | DataType::Enum(_, _)
68
+ | DataType::Binary
69
+ | DataType::Array(_, _)
70
+ | DataType::Time
71
+ | DataType::Decimal(_, _)
72
+ ) || !skip_nulls
73
+ {
74
+ let mut avs = Vec::with_capacity(series.len());
75
+ let s = series.rechunk();
76
+
77
+ for av in s.iter() {
78
+ let out = match (skip_nulls, av) {
79
+ (true, AnyValue::Null) => AnyValue::Null,
80
+ (_, av) => {
81
+ let av: Option<Wrap<AnyValue>> =
82
+ call_lambda_and_extract(function, Wrap(av))?;
83
+ match av {
84
+ None => AnyValue::Null,
85
+ Some(av) => av.0,
86
+ }
87
+ }
88
+ };
89
+ avs.push(out)
90
+ }
91
+ let out = Series::new(series.name().clone(), &avs);
92
+ let dtype = out.dtype();
93
+ if dtype.is_nested() {
94
+ check_nested_object(dtype)?;
95
+ }
96
+
97
+ return Ok(out.into());
98
+ }
99
+
100
+ let out = match return_dtype {
101
+ Some(DataType::Int8) => {
102
+ let ca: Int8Chunked = dispatch_apply!(
103
+ series,
104
+ apply_lambda_with_primitive_out_type,
105
+ function,
106
+ 0,
107
+ None
108
+ )?;
109
+ ca.into_series()
110
+ }
111
+ Some(DataType::Int16) => {
112
+ let ca: Int16Chunked = dispatch_apply!(
113
+ series,
114
+ apply_lambda_with_primitive_out_type,
115
+ function,
116
+ 0,
117
+ None
118
+ )?;
119
+ ca.into_series()
120
+ }
121
+ Some(DataType::Int32) => {
122
+ let ca: Int32Chunked = dispatch_apply!(
123
+ series,
124
+ apply_lambda_with_primitive_out_type,
125
+ function,
126
+ 0,
127
+ None
128
+ )?;
129
+ ca.into_series()
130
+ }
131
+ Some(DataType::Int64) => {
132
+ let ca: Int64Chunked = dispatch_apply!(
133
+ series,
134
+ apply_lambda_with_primitive_out_type,
135
+ function,
136
+ 0,
137
+ None
138
+ )?;
139
+ ca.into_series()
140
+ }
141
+ Some(DataType::UInt8) => {
142
+ let ca: UInt8Chunked = dispatch_apply!(
143
+ series,
144
+ apply_lambda_with_primitive_out_type,
145
+ function,
146
+ 0,
147
+ None
148
+ )?;
149
+ ca.into_series()
150
+ }
151
+ Some(DataType::UInt16) => {
152
+ let ca: UInt16Chunked = dispatch_apply!(
153
+ series,
154
+ apply_lambda_with_primitive_out_type,
155
+ function,
156
+ 0,
157
+ None
158
+ )?;
159
+ ca.into_series()
160
+ }
161
+ Some(DataType::UInt32) => {
162
+ let ca: UInt32Chunked = dispatch_apply!(
163
+ series,
164
+ apply_lambda_with_primitive_out_type,
165
+ function,
166
+ 0,
167
+ None
168
+ )?;
169
+ ca.into_series()
170
+ }
171
+ Some(DataType::UInt64) => {
172
+ let ca: UInt64Chunked = dispatch_apply!(
173
+ series,
174
+ apply_lambda_with_primitive_out_type,
175
+ function,
176
+ 0,
177
+ None
178
+ )?;
179
+ ca.into_series()
180
+ }
181
+ Some(DataType::Float32) => {
182
+ let ca: Float32Chunked = dispatch_apply!(
183
+ series,
184
+ apply_lambda_with_primitive_out_type,
185
+ function,
186
+ 0,
187
+ None
188
+ )?;
189
+ ca.into_series()
190
+ }
191
+ Some(DataType::Float64) => {
192
+ let ca: Float64Chunked = dispatch_apply!(
193
+ series,
194
+ apply_lambda_with_primitive_out_type,
195
+ function,
196
+ 0,
197
+ None
198
+ )?;
199
+ ca.into_series()
200
+ }
201
+ Some(DataType::Boolean) => {
202
+ let ca: BooleanChunked =
203
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, function, 0, None)?;
204
+ ca.into_series()
205
+ }
206
+ Some(DataType::String) => {
207
+ let ca =
208
+ dispatch_apply!(series, apply_lambda_with_utf8_out_type, function, 0, None)?;
209
+
210
+ ca.into_series()
211
+ }
212
+ Some(DataType::List(_inner)) => {
213
+ todo!()
214
+ }
215
+ Some(DataType::Object(_)) => {
216
+ let ca =
217
+ dispatch_apply!(series, apply_lambda_with_object_out_type, function, 0, None)?;
218
+ ca.into_series()
219
+ }
220
+ None => return dispatch_apply!(series, apply_lambda_unknown, function),
221
+
222
+ _ => return dispatch_apply!(series, apply_lambda_unknown, function),
223
+ };
224
+
225
+ Ok(RbSeries::new(out))
226
+ }
227
+ }
@@ -5,9 +5,10 @@ mod construction;
5
5
  mod export;
6
6
  mod general;
7
7
  mod import;
8
+ mod map;
8
9
  mod scatter;
9
10
 
10
- use magnus::{prelude::*, RArray};
11
+ use magnus::{RArray, prelude::*};
11
12
  use polars::prelude::*;
12
13
  use std::cell::RefCell;
13
14
 
@@ -100,7 +100,7 @@ fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series>
100
100
  let values = values.str()?;
101
101
  ca.scatter(idx, values)
102
102
  }
103
- _ => panic!("not yet implemented for dtype: {}", logical_dtype),
103
+ _ => panic!("not yet implemented for dtype: {logical_dtype}"),
104
104
  };
105
105
 
106
106
  s.and_then(|s| s.cast(&logical_dtype))
@@ -1,3 +1,5 @@
1
+ use crate::{RbErr, RbPolarsErr};
2
+
1
3
  #[macro_export]
2
4
  macro_rules! apply_method_all_arrow_series2 {
3
5
  ($self:expr, $method:ident, $($args:expr),*) => {
@@ -14,11 +16,17 @@ macro_rules! apply_method_all_arrow_series2 {
14
16
  DataType::Int64 => $self.i64().unwrap().$method($($args),*),
15
17
  DataType::Float32 => $self.f32().unwrap().$method($($args),*),
16
18
  DataType::Float64 => $self.f64().unwrap().$method($($args),*),
17
- DataType::Date => $self.date().unwrap().$method($($args),*),
18
- DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
19
+ DataType::Date => $self.date().unwrap().physical().$method($($args),*),
20
+ DataType::Datetime(_, _) => $self.datetime().unwrap().physical().$method($($args),*),
19
21
  // DataType::List(_) => $self.list().unwrap().$method($($args),*),
20
22
  DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
21
23
  dt => panic!("dtype {:?} not supported", dt)
22
24
  }
23
25
  }
24
26
  }
27
+
28
+ /// Boilerplate for `|e| RbPolarsErr::from(e).into()`
29
+ #[allow(unused)]
30
+ pub(crate) fn to_rb_err<E: Into<RbPolarsErr>>(e: E) -> RbErr {
31
+ e.into().into()
32
+ }
@@ -31,56 +31,16 @@ module Polars
31
31
 
32
32
  # Return whether or not the column is a local categorical.
33
33
  #
34
- # @return [Boolean]
35
- #
36
- # @example Categoricals constructed without a string cache are considered local.
37
- # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
38
- # s.cat.is_local
39
- # # => true
34
+ # Always returns false.
40
35
  #
41
- # @example Categoricals constructed with a string cache are considered global.
42
- # s = nil
43
- # Polars::StringCache.new do
44
- # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
45
- # end
46
- # s.cat.is_local
47
- # # => false
36
+ # @return [Boolean]
48
37
  def is_local
49
38
  _s.cat_is_local
50
39
  end
51
40
 
52
- # Convert a categorical column to its local representation.
53
- #
54
- # This may change the underlying physical representation of the column.
41
+ # Simply returns the column as-is, local representations are deprecated.
55
42
  #
56
43
  # @return [Series]
57
- #
58
- # @example Compare the global and local representations of a categorical.
59
- # s = nil
60
- # Polars::StringCache.new do
61
- # _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
62
- # s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
63
- # end
64
- # s.to_physical
65
- # # =>
66
- # # shape: (3,)
67
- # # Series: 'y' [u32]
68
- # # [
69
- # # 2
70
- # # 1
71
- # # 3
72
- # # ]
73
- #
74
- # @example
75
- # s.cat.to_local.to_physical
76
- # # =>
77
- # # shape: (3,)
78
- # # Series: 'y' [u32]
79
- # # [
80
- # # 0
81
- # # 1
82
- # # 2
83
- # # ]
84
44
  def to_local
85
45
  Utils.wrap_s(_s.cat_to_local)
86
46
  end
@@ -0,0 +1,20 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a catalog within a metastore.
5
+ CatalogInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :comment,
9
+ :properties,
10
+ :options,
11
+ :storage_location,
12
+ :created_at,
13
+ :created_by,
14
+ :updated_at,
15
+ :updated_by,
16
+ keyword_init: true
17
+ )
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a column within a catalog table.
5
+ ColumnInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :type_name,
9
+ :type_text,
10
+ :type_json,
11
+ :position,
12
+ :comment,
13
+ :partition_index,
14
+ keyword_init: true
15
+ )
16
+
17
+ class ColumnInfo
18
+ # Get the native polars datatype of this column.
19
+ #
20
+ # @note
21
+ # This functionality is considered **unstable**. It may be changed
22
+ # at any point without it being considered a breaking change.
23
+ #
24
+ # @return [Object]
25
+ def get_polars_dtype
26
+ RbCatalogClient.type_json_to_polars_type(type_json)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a namespace within a catalog.
5
+ #
6
+ # This is also known by the name "schema" in unity catalog terminology.
7
+ NamespaceInfo =
8
+ ::Struct.new(
9
+ :name,
10
+ :comment,
11
+ :properties,
12
+ :storage_location,
13
+ :created_at,
14
+ :created_by,
15
+ :updated_at,
16
+ :updated_by,
17
+ keyword_init: true
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,50 @@
1
+ module Polars
2
+ class Catalog
3
+ module Unity
4
+ # Information for a catalog table.
5
+ TableInfo =
6
+ ::Struct.new(
7
+ :name,
8
+ :comment,
9
+ :table_id,
10
+ :table_type,
11
+ :storage_location,
12
+ :data_source_format,
13
+ :columns,
14
+ :properties,
15
+ :created_at,
16
+ :created_by,
17
+ :updated_at,
18
+ :updated_by,
19
+ keyword_init: true
20
+ )
21
+
22
+ class TableInfo
23
+ # Get the native polars schema of this table.
24
+ #
25
+ # @note
26
+ # This functionality is considered **unstable**. It may be changed
27
+ # at any point without it being considered a breaking change.
28
+ #
29
+ # @return [Schema]
30
+ def get_polars_schema
31
+ if columns.nil?
32
+ return nil
33
+ end
34
+
35
+ schema = Schema.new(check_dtypes: false)
36
+
37
+ columns.each do |column_info|
38
+ if schema[column_info.name]
39
+ msg = "duplicate column name: #{column_info.name}"
40
+ raise DuplicateError, msg
41
+ end
42
+ schema[column_info.name] = column_info.get_polars_dtype
43
+ end
44
+
45
+ schema
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end