polars-df 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,186 +0,0 @@
1
- use magnus::encoding::{EncodingCapable, Index};
2
- use magnus::{
3
- class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
4
- TryConvert, Value,
5
- };
6
- use polars::prelude::*;
7
- use polars_core::utils::any_values_to_supertype_and_n_dtypes;
8
-
9
- use super::{struct_dict, ObjectValue, Wrap};
10
-
11
- use crate::rb_modules::utils;
12
- use crate::{RbPolarsErr, RbResult, RbSeries};
13
-
14
- impl IntoValue for Wrap<AnyValue<'_>> {
15
- fn into_value_with(self, ruby: &Ruby) -> Value {
16
- match self.0 {
17
- AnyValue::UInt8(v) => ruby.into_value(v),
18
- AnyValue::UInt16(v) => ruby.into_value(v),
19
- AnyValue::UInt32(v) => ruby.into_value(v),
20
- AnyValue::UInt64(v) => ruby.into_value(v),
21
- AnyValue::Int8(v) => ruby.into_value(v),
22
- AnyValue::Int16(v) => ruby.into_value(v),
23
- AnyValue::Int32(v) => ruby.into_value(v),
24
- AnyValue::Int64(v) => ruby.into_value(v),
25
- AnyValue::Float32(v) => ruby.into_value(v),
26
- AnyValue::Float64(v) => ruby.into_value(v),
27
- AnyValue::Null => ruby.qnil().as_value(),
28
- AnyValue::Boolean(v) => ruby.into_value(v),
29
- AnyValue::String(v) => ruby.into_value(v),
30
- AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
31
- AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
32
- let s = if arr.is_null() {
33
- rev.get(idx)
34
- } else {
35
- unsafe { arr.deref_unchecked().value(idx as usize) }
36
- };
37
- s.into_value()
38
- }
39
- AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
40
- AnyValue::Datetime(v, time_unit, time_zone) => {
41
- let time_unit = time_unit.to_ascii();
42
- utils()
43
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
44
- .unwrap()
45
- }
46
- AnyValue::Duration(v, time_unit) => {
47
- let time_unit = time_unit.to_ascii();
48
- utils()
49
- .funcall("_to_ruby_duration", (v, time_unit))
50
- .unwrap()
51
- }
52
- AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
53
- AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
54
- ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
55
- AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
56
- AnyValue::Object(v) => {
57
- let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
58
- object.to_object()
59
- }
60
- AnyValue::ObjectOwned(v) => {
61
- let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
62
- object.to_object()
63
- }
64
- AnyValue::Binary(v) => RString::from_slice(v).into_value(),
65
- AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
66
- AnyValue::Decimal(v, scale) => utils()
67
- .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
68
- .unwrap(),
69
- }
70
- }
71
- }
72
-
73
- impl<'s> TryConvert for Wrap<AnyValue<'s>> {
74
- fn try_convert(ob: Value) -> RbResult<Self> {
75
- if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
76
- Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
77
- } else if let Some(v) = Integer::from_value(ob) {
78
- Ok(AnyValue::Int64(v.to_i64()?).into())
79
- } else if let Some(v) = Float::from_value(ob) {
80
- Ok(AnyValue::Float64(v.to_f64()).into())
81
- } else if let Some(v) = RString::from_value(ob) {
82
- if v.enc_get() == Index::utf8() {
83
- Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
84
- } else {
85
- Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
86
- }
87
- // call is_a? for ActiveSupport::TimeWithZone
88
- } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
89
- let sec = ob.funcall::<_, _, i64>("to_i", ())?;
90
- let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
91
- let v = sec * 1_000_000_000 + nsec;
92
- // TODO support time zone when possible
93
- // https://github.com/pola-rs/polars/issues/9103
94
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
95
- } else if ob.is_nil() {
96
- Ok(AnyValue::Null.into())
97
- } else if let Some(dict) = RHash::from_value(ob) {
98
- let len = dict.len();
99
- let mut keys = Vec::with_capacity(len);
100
- let mut vals = Vec::with_capacity(len);
101
- dict.foreach(|k: Value, v: Value| {
102
- let key = String::try_convert(k)?;
103
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
104
- let dtype = DataType::from(&val);
105
- keys.push(Field::new(&key, dtype));
106
- vals.push(val);
107
- Ok(ForEach::Continue)
108
- })?;
109
- Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
110
- } else if let Some(v) = RArray::from_value(ob) {
111
- if v.is_empty() {
112
- Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
113
- } else {
114
- let list = v;
115
-
116
- let mut avs = Vec::with_capacity(25);
117
- let mut iter = list.each();
118
-
119
- for item in (&mut iter).take(25) {
120
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
121
- }
122
-
123
- let (dtype, _n_types) =
124
- any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
125
-
126
- // push the rest
127
- avs.reserve(list.len());
128
- for item in iter {
129
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
130
- }
131
-
132
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
133
- .map_err(RbPolarsErr::from)?;
134
- Ok(Wrap(AnyValue::List(s)))
135
- }
136
- } else if ob.is_kind_of(crate::rb_modules::datetime()) {
137
- let sec: i64 = ob.funcall("to_i", ())?;
138
- let nsec: i64 = ob.funcall("nsec", ())?;
139
- Ok(Wrap(AnyValue::Datetime(
140
- sec * 1_000_000_000 + nsec,
141
- TimeUnit::Nanoseconds,
142
- &None,
143
- )))
144
- } else if ob.is_kind_of(crate::rb_modules::date()) {
145
- // convert to DateTime for UTC
146
- let v = ob
147
- .funcall::<_, _, Value>("to_datetime", ())?
148
- .funcall::<_, _, Value>("to_time", ())?
149
- .funcall::<_, _, i64>("to_i", ())?;
150
- Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
151
- } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
152
- let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
153
- let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
154
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
155
- })?;
156
- if sign < 0 {
157
- // TODO better error
158
- v = v.checked_neg().unwrap();
159
- }
160
- Ok(Wrap(AnyValue::Decimal(v, scale)))
161
- } else {
162
- Err(RbPolarsErr::other(format!(
163
- "object type not supported {:?}",
164
- ob
165
- )))
166
- }
167
- }
168
- }
169
-
170
- fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
171
- let exp = exp - (digits.len() as i32);
172
- match digits.parse::<i128>() {
173
- Ok(mut v) => {
174
- let scale = if exp > 0 {
175
- v = 10_i128
176
- .checked_pow(exp as u32)
177
- .and_then(|factor| v.checked_mul(factor))?;
178
- 0
179
- } else {
180
- (-exp) as usize
181
- };
182
- Some((v, scale))
183
- }
184
- Err(_) => None,
185
- }
186
- }