polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -77,21 +77,22 @@ pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
77
77
 
78
78
  pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
79
79
  let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
80
- Ok(rbdf.df.borrow().clone())
80
+ Ok(rbdf.df.read().clone())
81
81
  }
82
82
 
83
83
  pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
84
84
  let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
85
- Ok(rbdf.ldf.borrow().clone())
85
+ Ok(rbdf.ldf.read().clone())
86
86
  }
87
87
 
88
88
  pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
89
89
  let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
90
- Ok(rbs.series.borrow().clone())
90
+ Ok(rbs.series.read().clone())
91
91
  }
92
92
 
93
93
  pub(crate) fn to_series(s: RbSeries) -> Value {
94
- let series = pl_series();
94
+ let ruby = Ruby::get().unwrap();
95
+ let series = pl_series(&ruby);
95
96
  series
96
97
  .funcall::<_, _, Value>("_from_rbseries", (s,))
97
98
  .unwrap()
@@ -136,7 +137,7 @@ fn struct_dict<'a>(ruby: &Ruby, vals: impl Iterator<Item = AnyValue<'a>>, flds:
136
137
 
137
138
  impl IntoValue for Wrap<DataType> {
138
139
  fn into_value_with(self, ruby: &Ruby) -> Value {
139
- let pl = crate::rb_modules::polars();
140
+ let pl = crate::rb_modules::polars(ruby);
140
141
 
141
142
  match self.0 {
142
143
  DataType::Int8 => {
@@ -338,10 +339,12 @@ impl TryConvert for Wrap<DataType> {
338
339
  "Polars::Int16" => DataType::Int16,
339
340
  "Polars::Int32" => DataType::Int32,
340
341
  "Polars::Int64" => DataType::Int64,
342
+ "Polars::Int128" => DataType::Int64,
341
343
  "Polars::UInt8" => DataType::UInt8,
342
344
  "Polars::UInt16" => DataType::UInt16,
343
345
  "Polars::UInt32" => DataType::UInt32,
344
346
  "Polars::UInt64" => DataType::UInt64,
347
+ "Polars::UInt128" => DataType::UInt128,
345
348
  "Polars::Float32" => DataType::Float32,
346
349
  "Polars::Float64" => DataType::Float64,
347
350
  "Polars::Boolean" => DataType::Boolean,
@@ -367,12 +370,12 @@ impl TryConvert for Wrap<DataType> {
367
370
  "Polars::Object" => DataType::Object(OBJECT_NAME),
368
371
  "Polars::Unknown" => DataType::Unknown(Default::default()),
369
372
  dt => {
370
- return Err(RbValueError::new_err(format!(
371
- "{dt} is not a correct polars DataType.",
373
+ return Err(RbTypeError::new_err(format!(
374
+ "'{dt}' is not a Polars data type",
372
375
  )));
373
376
  }
374
377
  }
375
- } else if String::try_convert(ob).is_err() {
378
+ } else {
376
379
  let cls = ob.class();
377
380
  let name = unsafe { cls.name() }.into_owned();
378
381
  match name.as_str() {
@@ -380,10 +383,12 @@ impl TryConvert for Wrap<DataType> {
380
383
  "Polars::Int16" => DataType::Int16,
381
384
  "Polars::Int32" => DataType::Int32,
382
385
  "Polars::Int64" => DataType::Int64,
386
+ "Polars::Int128" => DataType::Int128,
383
387
  "Polars::UInt8" => DataType::UInt8,
384
388
  "Polars::UInt16" => DataType::UInt16,
385
389
  "Polars::UInt32" => DataType::UInt32,
386
390
  "Polars::UInt64" => DataType::UInt64,
391
+ "Polars::UInt128" => DataType::UInt128,
387
392
  "Polars::Float32" => DataType::Float32,
388
393
  "Polars::Float64" => DataType::Float64,
389
394
  "Polars::Boolean" => DataType::Boolean,
@@ -448,42 +453,11 @@ impl TryConvert for Wrap<DataType> {
448
453
  DataType::Struct(fields)
449
454
  }
450
455
  "Polars::Null" => DataType::Null,
451
- "Object" => DataType::Object(OBJECT_NAME),
456
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
452
457
  "Polars::Unknown" => DataType::Unknown(Default::default()),
453
458
  dt => {
454
459
  return Err(RbTypeError::new_err(format!(
455
- "A {dt} object is not a correct polars DataType. \
456
- Hint: use the class without instantiating it.",
457
- )));
458
- }
459
- }
460
- } else {
461
- match String::try_convert(ob)?.as_str() {
462
- "u8" => DataType::UInt8,
463
- "u16" => DataType::UInt16,
464
- "u32" => DataType::UInt32,
465
- "u64" => DataType::UInt64,
466
- "i8" => DataType::Int8,
467
- "i16" => DataType::Int16,
468
- "i32" => DataType::Int32,
469
- "i64" => DataType::Int64,
470
- "str" => DataType::String,
471
- "bin" => DataType::Binary,
472
- "bool" => DataType::Boolean,
473
- "cat" => DataType::from_categories(Categories::global()),
474
- "date" => DataType::Date,
475
- "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
476
- "f32" => DataType::Float32,
477
- "time" => DataType::Time,
478
- "dur" => DataType::Duration(TimeUnit::Microseconds),
479
- "f64" => DataType::Float64,
480
- "obj" => DataType::Object(OBJECT_NAME),
481
- "list" => DataType::List(Box::new(DataType::Boolean)),
482
- "null" => DataType::Null,
483
- "unk" => DataType::Unknown(Default::default()),
484
- _ => {
485
- return Err(RbValueError::new_err(format!(
486
- "{ob} is not a supported DataType."
460
+ "'{dt}' is not a Polars data type",
487
461
  )));
488
462
  }
489
463
  }
@@ -531,7 +505,7 @@ impl<'s> TryConvert for Wrap<Row<'s>> {
531
505
 
532
506
  impl TryConvert for Wrap<Schema> {
533
507
  fn try_convert(ob: Value) -> RbResult<Self> {
534
- let dict = RHash::try_convert(ob)?;
508
+ let dict: RHash = ob.funcall("to_h", ())?;
535
509
 
536
510
  let mut schema = Vec::new();
537
511
  dict.foreach(|key: String, val: Wrap<DataType>| {
@@ -628,7 +602,7 @@ impl TryConvert for Wrap<ScanSources> {
628
602
  }
629
603
  RubyScanSourceInput::File(file) => {
630
604
  let mut sources = Vec::with_capacity(num_items);
631
- sources.push(file);
605
+ sources.push(file.into());
632
606
  MutableSources::Files(sources)
633
607
  }
634
608
  RubyScanSourceInput::Buffer(buffer) => {
@@ -641,7 +615,7 @@ impl TryConvert for Wrap<ScanSources> {
641
615
  for source in iter {
642
616
  match (&mut sources, source?) {
643
617
  (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
644
- (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
618
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f.into()),
645
619
  (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
646
620
  _ => {
647
621
  return Err(RbTypeError::new_err(
@@ -1052,6 +1026,24 @@ impl TryConvert for Wrap<RankMethod> {
1052
1026
  }
1053
1027
  }
1054
1028
 
1029
+ impl TryConvert for Wrap<RollingRankMethod> {
1030
+ fn try_convert(ob: Value) -> RbResult<Self> {
1031
+ let parsed = match String::try_convert(ob)?.as_str() {
1032
+ "min" => RollingRankMethod::Min,
1033
+ "max" => RollingRankMethod::Max,
1034
+ "average" => RollingRankMethod::Average,
1035
+ "dense" => RollingRankMethod::Dense,
1036
+ "random" => RollingRankMethod::Random,
1037
+ v => {
1038
+ return Err(RbValueError::new_err(format!(
1039
+ "rank `method` must be one of {{'min', 'max', 'average', 'dense', 'random'}}, got {v}",
1040
+ )));
1041
+ }
1042
+ };
1043
+ Ok(Wrap(parsed))
1044
+ }
1045
+ }
1046
+
1055
1047
  impl TryConvert for Wrap<Roll> {
1056
1048
  fn try_convert(ob: Value) -> RbResult<Self> {
1057
1049
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1091,9 +1083,11 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1091
1083
  let parsed = match String::try_convert(ob)?.as_str() {
1092
1084
  "first" => UniqueKeepStrategy::First,
1093
1085
  "last" => UniqueKeepStrategy::Last,
1086
+ "none" => UniqueKeepStrategy::None,
1087
+ "any" => UniqueKeepStrategy::Any,
1094
1088
  v => {
1095
1089
  return Err(RbValueError::new_err(format!(
1096
- "keep must be one of {{'first', 'last'}}, got {v}"
1090
+ "`keep` must be one of {{'first', 'last', 'any', 'none'}}, got {v}",
1097
1091
  )));
1098
1092
  }
1099
1093
  };
@@ -1467,6 +1461,17 @@ impl TryConvert for Wrap<UnicodeForm> {
1467
1461
  }
1468
1462
  }
1469
1463
 
1464
+ impl TryConvert for Wrap<Option<KeyValueMetadata>> {
1465
+ fn try_convert(ob: Value) -> RbResult<Self> {
1466
+ if ob.is_nil() {
1467
+ return Ok(Wrap(None));
1468
+ }
1469
+ todo!();
1470
+ }
1471
+ }
1472
+
1473
+ unsafe impl TryConvertOwned for Wrap<Option<KeyValueMetadata>> {}
1474
+
1470
1475
  impl TryConvert for Wrap<Option<TimeZone>> {
1471
1476
  fn try_convert(ob: Value) -> RbResult<Self> {
1472
1477
  let tz = Option::<Wrap<PlSmallStr>>::try_convert(ob)?;
@@ -3,19 +3,19 @@ use magnus::{IntoValue, Ruby, Value, prelude::*};
3
3
  use super::*;
4
4
  use crate::RbResult;
5
5
  use crate::conversion::{ObjectValue, Wrap};
6
- use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::interop::arrow::to_rb::dataframe_to_stream;
7
7
 
8
8
  impl RbDataFrame {
9
- pub fn row_tuple(ruby: &Ruby, rb_self: &Self, idx: i64) -> Value {
9
+ pub fn row_tuple(ruby: &Ruby, self_: &Self, idx: i64) -> Value {
10
10
  let idx = if idx < 0 {
11
- (rb_self.df.borrow().height() as i64 + idx) as usize
11
+ (self_.df.read().height() as i64 + idx) as usize
12
12
  } else {
13
13
  idx as usize
14
14
  };
15
15
  ruby.ary_from_iter(
16
- rb_self
16
+ self_
17
17
  .df
18
- .borrow()
18
+ .read()
19
19
  .get_columns()
20
20
  .iter()
21
21
  .map(|s| match s.dtype() {
@@ -29,13 +29,13 @@ impl RbDataFrame {
29
29
  .as_value()
30
30
  }
31
31
 
32
- pub fn row_tuples(ruby: &Ruby, rb_self: &Self) -> Value {
33
- let df = &rb_self.df;
34
- ruby.ary_from_iter((0..df.borrow().height()).map(|idx| {
32
+ pub fn row_tuples(ruby: &Ruby, self_: &Self) -> Value {
33
+ let df = &self_.df;
34
+ ruby.ary_from_iter((0..df.read().height()).map(|idx| {
35
35
  ruby.ary_from_iter(
36
- rb_self
36
+ self_
37
37
  .df
38
- .borrow()
38
+ .read()
39
39
  .get_columns()
40
40
  .iter()
41
41
  .map(|s| match s.dtype() {
@@ -50,8 +50,8 @@ impl RbDataFrame {
50
50
  .as_value()
51
51
  }
52
52
 
53
- pub fn __arrow_c_stream__(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
54
- rb_self.df.borrow_mut().align_chunks();
55
- dataframe_to_stream(&rb_self.df.borrow(), ruby)
53
+ pub fn __arrow_c_stream__(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
54
+ self_.df.write().align_chunks();
55
+ dataframe_to_stream(&self_.df.read(), ruby)
56
56
  }
57
57
  }