polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,83 @@
1
+ use arrow::datatypes::ArrowDataType;
2
+ use arrow::ffi;
3
+ use magnus::{IntoValue, Value};
4
+ use polars::datatypes::CompatLevel;
5
+ use polars::frame::DataFrame;
6
+ use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
7
+ use polars::series::Series;
8
+ use polars_core::utils::arrow;
9
+
10
+ use crate::RbResult;
11
+
12
+ #[magnus::wrap(class = "Polars::ArrowArrayStream")]
13
+ pub struct RbArrowArrayStream {
14
+ stream: ffi::ArrowArrayStream,
15
+ }
16
+
17
+ impl RbArrowArrayStream {
18
+ pub fn to_i(&self) -> usize {
19
+ (&self.stream as *const _) as usize
20
+ }
21
+ }
22
+
23
+ pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
24
+ let iter = Box::new(DataFrameStreamIterator::new(df));
25
+ let field = iter.field();
26
+ let stream = ffi::export_iterator(iter, field);
27
+ Ok(RbArrowArrayStream { stream }.into_value())
28
+ }
29
+
30
+ pub struct DataFrameStreamIterator {
31
+ columns: Vec<Series>,
32
+ dtype: ArrowDataType,
33
+ idx: usize,
34
+ n_chunks: usize,
35
+ }
36
+
37
+ impl DataFrameStreamIterator {
38
+ fn new(df: &DataFrame) -> Self {
39
+ let schema = df.schema().to_arrow(CompatLevel::newest());
40
+ let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
41
+
42
+ Self {
43
+ columns: df
44
+ .get_columns()
45
+ .iter()
46
+ .map(|v| v.as_materialized_series().clone())
47
+ .collect(),
48
+ dtype,
49
+ idx: 0,
50
+ n_chunks: df.first_col_n_chunks(),
51
+ }
52
+ }
53
+
54
+ fn field(&self) -> ArrowField {
55
+ ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
56
+ }
57
+ }
58
+
59
+ impl Iterator for DataFrameStreamIterator {
60
+ type Item = PolarsResult<ArrayRef>;
61
+
62
+ fn next(&mut self) -> Option<Self::Item> {
63
+ if self.idx >= self.n_chunks {
64
+ None
65
+ } else {
66
+ // create a batch of the columns with the same chunk no.
67
+ let batch_cols = self
68
+ .columns
69
+ .iter()
70
+ .map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
71
+ .collect::<Vec<_>>();
72
+ self.idx += 1;
73
+
74
+ let array = arrow::array::StructArray::new(
75
+ self.dtype.clone(),
76
+ batch_cols[0].len(),
77
+ batch_cols,
78
+ None,
79
+ );
80
+ Some(Ok(Box::new(array)))
81
+ }
82
+ }
83
+ }
@@ -1 +1,2 @@
1
+ pub mod arrow;
1
2
  pub mod numo;