polars-df 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,83 @@
1
+ use arrow::datatypes::ArrowDataType;
2
+ use arrow::ffi;
3
+ use magnus::{IntoValue, Value};
4
+ use polars::datatypes::CompatLevel;
5
+ use polars::frame::DataFrame;
6
+ use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
7
+ use polars::series::Series;
8
+ use polars_core::utils::arrow;
9
+
10
+ use crate::RbResult;
11
+
12
+ #[magnus::wrap(class = "Polars::ArrowArrayStream")]
13
+ pub struct RbArrowArrayStream {
14
+ stream: ffi::ArrowArrayStream,
15
+ }
16
+
17
+ impl RbArrowArrayStream {
18
+ pub fn to_i(&self) -> usize {
19
+ (&self.stream as *const _) as usize
20
+ }
21
+ }
22
+
23
+ pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
24
+ let iter = Box::new(DataFrameStreamIterator::new(df));
25
+ let field = iter.field();
26
+ let stream = ffi::export_iterator(iter, field);
27
+ Ok(RbArrowArrayStream { stream }.into_value())
28
+ }
29
+
30
+ pub struct DataFrameStreamIterator {
31
+ columns: Vec<Series>,
32
+ dtype: ArrowDataType,
33
+ idx: usize,
34
+ n_chunks: usize,
35
+ }
36
+
37
+ impl DataFrameStreamIterator {
38
+ fn new(df: &DataFrame) -> Self {
39
+ let schema = df.schema().to_arrow(CompatLevel::newest());
40
+ let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
41
+
42
+ Self {
43
+ columns: df
44
+ .get_columns()
45
+ .iter()
46
+ .map(|v| v.as_materialized_series().clone())
47
+ .collect(),
48
+ dtype,
49
+ idx: 0,
50
+ n_chunks: df.first_col_n_chunks(),
51
+ }
52
+ }
53
+
54
+ fn field(&self) -> ArrowField {
55
+ ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
56
+ }
57
+ }
58
+
59
+ impl Iterator for DataFrameStreamIterator {
60
+ type Item = PolarsResult<ArrayRef>;
61
+
62
+ fn next(&mut self) -> Option<Self::Item> {
63
+ if self.idx >= self.n_chunks {
64
+ None
65
+ } else {
66
+ // create a batch of the columns with the same chunk no.
67
+ let batch_cols = self
68
+ .columns
69
+ .iter()
70
+ .map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
71
+ .collect::<Vec<_>>();
72
+ self.idx += 1;
73
+
74
+ let array = arrow::array::StructArray::new(
75
+ self.dtype.clone(),
76
+ batch_cols[0].len(),
77
+ batch_cols,
78
+ None,
79
+ );
80
+ Some(Ok(Box::new(array)))
81
+ }
82
+ }
83
+ }
@@ -1 +1,2 @@
1
+ pub mod arrow;
1
2
  pub mod numo;