polars-df 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +374 -222
- data/Cargo.toml +2 -2
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +4 -3
- data/ext/polars/src/apply/dataframe.rs +24 -13
- data/ext/polars/src/apply/mod.rs +3 -4
- data/ext/polars/src/conversion.rs +155 -31
- data/ext/polars/src/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dataframe.rs +8 -2
- data/ext/polars/src/lazy/dsl.rs +54 -18
- data/ext/polars/src/lib.rs +19 -11
- data/ext/polars/src/series.rs +32 -10
- data/lib/polars/data_frame.rb +25 -23
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +5 -2
- data/lib/polars/expr.rb +4 -3
- data/lib/polars/functions.rb +2 -2
- data/lib/polars/group_by.rb +33 -33
- data/lib/polars/lazy_frame.rb +8 -5
- data/lib/polars/lazy_functions.rb +8 -3
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/series.rb +64 -21
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +9 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +34 -33
- metadata +5 -4
data/ext/polars/src/lib.rs
CHANGED
@@ -56,6 +56,7 @@ fn series() -> RClass {
|
|
56
56
|
#[magnus::init]
|
57
57
|
fn init() -> RbResult<()> {
|
58
58
|
let module = module();
|
59
|
+
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
59
60
|
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
60
61
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
61
62
|
module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
|
@@ -289,9 +290,9 @@ fn init() -> RbResult<()> {
|
|
289
290
|
class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
|
290
291
|
class.define_method("product", method!(RbExpr::product, 0))?;
|
291
292
|
class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
|
292
|
-
class.define_method("str_parse_date", method!(RbExpr::str_parse_date,
|
293
|
-
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime,
|
294
|
-
class.define_method("str_parse_time", method!(RbExpr::str_parse_time,
|
293
|
+
class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
|
294
|
+
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 5))?;
|
295
|
+
class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
|
295
296
|
class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
|
296
297
|
class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
|
297
298
|
class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
|
@@ -382,7 +383,7 @@ fn init() -> RbResult<()> {
|
|
382
383
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
383
384
|
class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
|
384
385
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
385
|
-
class.define_method("interpolate", method!(RbExpr::interpolate,
|
386
|
+
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
386
387
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
387
388
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
388
389
|
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
@@ -410,7 +411,7 @@ fn init() -> RbResult<()> {
|
|
410
411
|
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
411
412
|
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
412
413
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
413
|
-
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct,
|
414
|
+
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
|
414
415
|
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
415
416
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
416
417
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
@@ -507,7 +508,7 @@ fn init() -> RbResult<()> {
|
|
507
508
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
508
509
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
509
510
|
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
510
|
-
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic,
|
511
|
+
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 9))?;
|
511
512
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
512
513
|
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
513
514
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
@@ -792,6 +793,15 @@ fn init() -> RbResult<()> {
|
|
792
793
|
Ok(())
|
793
794
|
}
|
794
795
|
|
796
|
+
fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
797
|
+
let dtypes = dtypes
|
798
|
+
.each()
|
799
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
800
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
801
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
802
|
+
Ok(crate::lazy::dsl::dtype_cols(dtypes))
|
803
|
+
}
|
804
|
+
|
795
805
|
#[allow(clippy::too_many_arguments)]
|
796
806
|
fn rb_duration(
|
797
807
|
days: Option<&RbExpr>,
|
@@ -927,7 +937,7 @@ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
|
927
937
|
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
928
938
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
929
939
|
|
930
|
-
|
940
|
+
polars_core::POOL.install(|| {
|
931
941
|
lfs.par_iter()
|
932
942
|
.map(|lf| {
|
933
943
|
let df = lf.ldf.clone().collect()?;
|
@@ -935,9 +945,7 @@ fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
|
935
945
|
})
|
936
946
|
.collect::<polars_core::error::PolarsResult<Vec<_>>>()
|
937
947
|
.map_err(RbPolarsErr::from)
|
938
|
-
})
|
939
|
-
|
940
|
-
Ok(out?)
|
948
|
+
})
|
941
949
|
}
|
942
950
|
|
943
951
|
fn rb_date_range(
|
@@ -956,7 +964,7 @@ fn rb_date_range(
|
|
956
964
|
Duration::parse(&every),
|
957
965
|
closed.0,
|
958
966
|
tu.0,
|
959
|
-
tz,
|
967
|
+
tz.as_ref(),
|
960
968
|
)
|
961
969
|
.into_series()
|
962
970
|
.into()
|
data/ext/polars/src/series.rs
CHANGED
@@ -144,7 +144,7 @@ impl RbSeries {
|
|
144
144
|
}
|
145
145
|
|
146
146
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
147
|
-
let val = format!("{}", self.series.borrow().get(index));
|
147
|
+
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
148
148
|
if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
|
149
149
|
let v_trunc = &val[..val
|
150
150
|
.char_indices()
|
@@ -172,8 +172,8 @@ impl RbSeries {
|
|
172
172
|
}
|
173
173
|
}
|
174
174
|
|
175
|
-
pub fn get_idx(&self, idx: usize) -> Value {
|
176
|
-
Wrap(self.series.borrow().get(idx)).into()
|
175
|
+
pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
|
176
|
+
Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into())
|
177
177
|
}
|
178
178
|
|
179
179
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
@@ -247,16 +247,37 @@ impl RbSeries {
|
|
247
247
|
}
|
248
248
|
}
|
249
249
|
|
250
|
-
pub fn max(&self) -> Value {
|
251
|
-
Wrap(
|
250
|
+
pub fn max(&self) -> RbResult<Value> {
|
251
|
+
Ok(Wrap(
|
252
|
+
self.series
|
253
|
+
.borrow()
|
254
|
+
.max_as_series()
|
255
|
+
.get(0)
|
256
|
+
.map_err(RbPolarsErr::from)?,
|
257
|
+
)
|
258
|
+
.into())
|
252
259
|
}
|
253
260
|
|
254
|
-
pub fn min(&self) -> Value {
|
255
|
-
Wrap(
|
261
|
+
pub fn min(&self) -> RbResult<Value> {
|
262
|
+
Ok(Wrap(
|
263
|
+
self.series
|
264
|
+
.borrow()
|
265
|
+
.min_as_series()
|
266
|
+
.get(0)
|
267
|
+
.map_err(RbPolarsErr::from)?,
|
268
|
+
)
|
269
|
+
.into())
|
256
270
|
}
|
257
271
|
|
258
|
-
pub fn sum(&self) -> Value {
|
259
|
-
Wrap(
|
272
|
+
pub fn sum(&self) -> RbResult<Value> {
|
273
|
+
Ok(Wrap(
|
274
|
+
self.series
|
275
|
+
.borrow()
|
276
|
+
.sum_as_series()
|
277
|
+
.get(0)
|
278
|
+
.map_err(RbPolarsErr::from)?,
|
279
|
+
)
|
280
|
+
.into())
|
260
281
|
}
|
261
282
|
|
262
283
|
pub fn n_chunks(&self) -> usize {
|
@@ -522,7 +543,8 @@ impl RbSeries {
|
|
522
543
|
.borrow()
|
523
544
|
.quantile_as_series(quantile, interpolation.0)
|
524
545
|
.map_err(|_| RbValueError::new_err("invalid quantile".into()))?
|
525
|
-
.get(0)
|
546
|
+
.get(0)
|
547
|
+
.unwrap_or(AnyValue::Null),
|
526
548
|
)
|
527
549
|
.into())
|
528
550
|
}
|
data/lib/polars/data_frame.rb
CHANGED
@@ -354,7 +354,7 @@ module Polars
|
|
354
354
|
# }
|
355
355
|
# )
|
356
356
|
# df.dtypes
|
357
|
-
# # => [
|
357
|
+
# # => [Polars::Int64, Polars::Float64, Polars::Utf8]
|
358
358
|
def dtypes
|
359
359
|
_df.dtypes
|
360
360
|
end
|
@@ -372,7 +372,7 @@ module Polars
|
|
372
372
|
# }
|
373
373
|
# )
|
374
374
|
# df.schema
|
375
|
-
# # => {"foo"
|
375
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
|
376
376
|
def schema
|
377
377
|
columns.zip(dtypes).to_h
|
378
378
|
end
|
@@ -1178,25 +1178,25 @@ module Polars
|
|
1178
1178
|
# df.describe
|
1179
1179
|
# # =>
|
1180
1180
|
# # shape: (7, 6)
|
1181
|
-
# #
|
1182
|
-
# # │ describe ┆ a ┆ b ┆ c
|
1183
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
1184
|
-
# # │ str ┆ f64 ┆ f64 ┆ f64
|
1185
|
-
# #
|
1186
|
-
# # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0
|
1187
|
-
# #
|
1188
|
-
# # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0
|
1189
|
-
# #
|
1190
|
-
# # │ mean ┆ 2.266667 ┆ 4.5 ┆
|
1191
|
-
# #
|
1192
|
-
# # │ std ┆ 1.101514 ┆ 0.707107 ┆
|
1193
|
-
# #
|
1194
|
-
# # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0
|
1195
|
-
# #
|
1196
|
-
# # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0
|
1197
|
-
# #
|
1198
|
-
# # │ median ┆ 2.8 ┆ 4.5 ┆
|
1199
|
-
# #
|
1181
|
+
# # ┌────────────┬──────────┬──────────┬──────────┬──────┬──────┐
|
1182
|
+
# # │ describe ┆ a ┆ b ┆ c ┆ d ┆ e │
|
1183
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
1184
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ str │
|
1185
|
+
# # ╞════════════╪══════════╪══════════╪══════════╪══════╪══════╡
|
1186
|
+
# # │ count ┆ 3.0 ┆ 3.0 ┆ 3.0 ┆ 3 ┆ 3 │
|
1187
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1188
|
+
# # │ null_count ┆ 0.0 ┆ 1.0 ┆ 0.0 ┆ 1 ┆ 1 │
|
1189
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1190
|
+
# # │ mean ┆ 2.266667 ┆ 4.5 ┆ 0.666667 ┆ null ┆ null │
|
1191
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1192
|
+
# # │ std ┆ 1.101514 ┆ 0.707107 ┆ 0.57735 ┆ null ┆ null │
|
1193
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1194
|
+
# # │ min ┆ 1.0 ┆ 4.0 ┆ 0.0 ┆ b ┆ eur │
|
1195
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1196
|
+
# # │ max ┆ 3.0 ┆ 5.0 ┆ 1.0 ┆ c ┆ usd │
|
1197
|
+
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
1198
|
+
# # │ median ┆ 2.8 ┆ 4.5 ┆ 1.0 ┆ null ┆ null │
|
1199
|
+
# # └────────────┴──────────┴──────────┴──────────┴──────┴──────┘
|
1200
1200
|
def describe
|
1201
1201
|
describe_cast = lambda do |stat|
|
1202
1202
|
columns = []
|
@@ -2074,7 +2074,8 @@ module Polars
|
|
2074
2074
|
truncate: true,
|
2075
2075
|
include_boundaries: false,
|
2076
2076
|
closed: "left",
|
2077
|
-
by: nil
|
2077
|
+
by: nil,
|
2078
|
+
start_by: "window"
|
2078
2079
|
)
|
2079
2080
|
DynamicGroupBy.new(
|
2080
2081
|
self,
|
@@ -2085,7 +2086,8 @@ module Polars
|
|
2085
2086
|
truncate,
|
2086
2087
|
include_boundaries,
|
2087
2088
|
closed,
|
2088
|
-
by
|
2089
|
+
by,
|
2090
|
+
start_by
|
2089
2091
|
)
|
2090
2092
|
end
|
2091
2093
|
|
@@ -0,0 +1,122 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base class for all Polars data types.
|
3
|
+
class DataType
|
4
|
+
end
|
5
|
+
|
6
|
+
# 8-bit signed integer type.
|
7
|
+
class Int8 < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# 16-bit signed integer type.
|
11
|
+
class Int16 < DataType
|
12
|
+
end
|
13
|
+
|
14
|
+
# 32-bit signed integer type.
|
15
|
+
class Int32 < DataType
|
16
|
+
end
|
17
|
+
|
18
|
+
# 64-bit signed integer type.
|
19
|
+
class Int64 < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# 8-bit unsigned integer type.
|
23
|
+
class UInt8 < DataType
|
24
|
+
end
|
25
|
+
|
26
|
+
# 16-bit unsigned integer type.
|
27
|
+
class UInt16 < DataType
|
28
|
+
end
|
29
|
+
|
30
|
+
# 32-bit unsigned integer type.
|
31
|
+
class UInt32 < DataType
|
32
|
+
end
|
33
|
+
|
34
|
+
# 64-bit unsigned integer type.
|
35
|
+
class UInt64 < DataType
|
36
|
+
end
|
37
|
+
|
38
|
+
# 32-bit floating point type.
|
39
|
+
class Float32 < DataType
|
40
|
+
end
|
41
|
+
|
42
|
+
# 64-bit floating point type.
|
43
|
+
class Float64 < DataType
|
44
|
+
end
|
45
|
+
|
46
|
+
# Boolean type.
|
47
|
+
class Boolean < DataType
|
48
|
+
end
|
49
|
+
|
50
|
+
# UTF-8 encoded string type.
|
51
|
+
class Utf8 < DataType
|
52
|
+
end
|
53
|
+
|
54
|
+
# Binary type.
|
55
|
+
class Binary < DataType
|
56
|
+
end
|
57
|
+
|
58
|
+
# Type representing Null / None values.
|
59
|
+
class Null < DataType
|
60
|
+
end
|
61
|
+
|
62
|
+
# Type representing Datatype values that could not be determined statically.
|
63
|
+
class Unknown < DataType
|
64
|
+
end
|
65
|
+
|
66
|
+
# Nested list/array type.
|
67
|
+
class List < DataType
|
68
|
+
def initialize(inner)
|
69
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Calendar date type.
|
74
|
+
class Date < DataType
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calendar date and time type.
|
78
|
+
class Datetime < DataType
|
79
|
+
def initialize(time_unit = "us", time_zone = nil)
|
80
|
+
@tu = time_unit || "us"
|
81
|
+
@time_zone = time_zone
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Time duration/delta type.
|
86
|
+
class Duration < DataType
|
87
|
+
def initialize(time_unit = "us")
|
88
|
+
@tu = time_unit
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Time of day type.
|
93
|
+
class Time < DataType
|
94
|
+
end
|
95
|
+
|
96
|
+
# Type for wrapping arbitrary Python objects.
|
97
|
+
class Object < DataType
|
98
|
+
end
|
99
|
+
|
100
|
+
# A categorical encoding of a set of strings.
|
101
|
+
class Categorical < DataType
|
102
|
+
end
|
103
|
+
|
104
|
+
# Definition of a single field within a `Struct` DataType.
|
105
|
+
class Field < DataType
|
106
|
+
def initialize(name, dtype)
|
107
|
+
@name = name
|
108
|
+
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Struct composite type.
|
113
|
+
class Struct < DataType
|
114
|
+
def initialize(fields)
|
115
|
+
if fields.is_a?(Hash)
|
116
|
+
@fields = fields.map { |n, d| Field.new(n, d) }
|
117
|
+
else
|
118
|
+
@fields = fields
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -465,7 +465,7 @@ module Polars
|
|
465
465
|
#
|
466
466
|
# Applies to Date and Datetime columns.
|
467
467
|
#
|
468
|
-
# Returns the weekday number where monday =
|
468
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
469
469
|
#
|
470
470
|
# @return [Expr]
|
471
471
|
#
|
@@ -502,11 +502,11 @@ module Polars
|
|
502
502
|
# # │ --- ┆ --- ┆ --- │
|
503
503
|
# # │ u32 ┆ u32 ┆ u32 │
|
504
504
|
# # ╞═════════╪══════════════╪═════════════╡
|
505
|
-
# # │
|
505
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
506
506
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
507
|
-
# # │
|
507
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
508
508
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
509
|
-
# # │
|
509
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
510
510
|
# # └─────────┴──────────────┴─────────────┘
|
511
511
|
def weekday
|
512
512
|
Utils.wrap_expr(_rbexpr.weekday)
|
@@ -554,11 +554,11 @@ module Polars
|
|
554
554
|
# # │ --- ┆ --- ┆ --- │
|
555
555
|
# # │ u32 ┆ u32 ┆ u32 │
|
556
556
|
# # ╞═════════╪══════════════╪═════════════╡
|
557
|
-
# # │
|
557
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
558
558
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
559
|
-
# # │
|
559
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
560
560
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
561
|
-
# # │
|
561
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
562
562
|
# # └─────────┴──────────────┴─────────────┘
|
563
563
|
def day
|
564
564
|
Utils.wrap_expr(_rbexpr.day)
|
@@ -606,11 +606,11 @@ module Polars
|
|
606
606
|
# # │ --- ┆ --- ┆ --- │
|
607
607
|
# # │ u32 ┆ u32 ┆ u32 │
|
608
608
|
# # ╞═════════╪══════════════╪═════════════╡
|
609
|
-
# # │
|
609
|
+
# # │ 1 ┆ 1 ┆ 1 │
|
610
610
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
611
|
-
# # │
|
611
|
+
# # │ 4 ┆ 4 ┆ 4 │
|
612
612
|
# # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
613
|
-
# # │
|
613
|
+
# # │ 7 ┆ 7 ┆ 7 │
|
614
614
|
# # └─────────┴──────────────┴─────────────┘
|
615
615
|
def ordinal_day
|
616
616
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
@@ -317,7 +317,7 @@ module Polars
|
|
317
317
|
#
|
318
318
|
# Applies to Date and Datetime columns.
|
319
319
|
#
|
320
|
-
# Returns the weekday number where monday =
|
320
|
+
# Returns the ISO weekday number where monday = 1 and sunday = 7
|
321
321
|
#
|
322
322
|
# @return [Series]
|
323
323
|
#
|
@@ -344,13 +344,13 @@ module Polars
|
|
344
344
|
# # shape: (7,)
|
345
345
|
# # Series: '' [u32]
|
346
346
|
# # [
|
347
|
-
# # 0
|
348
347
|
# # 1
|
349
348
|
# # 2
|
350
349
|
# # 3
|
351
350
|
# # 4
|
352
351
|
# # 5
|
353
352
|
# # 6
|
353
|
+
# # 7
|
354
354
|
# # ]
|
355
355
|
def weekday
|
356
356
|
super
|
@@ -973,9 +973,9 @@ module Polars
|
|
973
973
|
# # shape: (3,)
|
974
974
|
# # Series: 'NYC' [datetime[μs, America/New_York]]
|
975
975
|
# # [
|
976
|
-
# # 2020-
|
977
|
-
# # 2020-
|
978
|
-
# # 2020-
|
976
|
+
# # 2020-03-01 00:00:00 EST
|
977
|
+
# # 2020-04-01 01:00:00 EDT
|
978
|
+
# # 2020-05-01 01:00:00 EDT
|
979
979
|
# # ]
|
980
980
|
#
|
981
981
|
# @example Timestamps have changed after cast_time_zone
|
@@ -984,9 +984,9 @@ module Polars
|
|
984
984
|
# # shape: (3,)
|
985
985
|
# # Series: 'NYC' [i64]
|
986
986
|
# # [
|
987
|
-
# #
|
988
|
-
# #
|
989
|
-
# #
|
987
|
+
# # 1583038800
|
988
|
+
# # 1585717200
|
989
|
+
# # 1588309200
|
990
990
|
# # ]
|
991
991
|
def cast_time_zone(tz)
|
992
992
|
super
|
@@ -13,7 +13,8 @@ module Polars
|
|
13
13
|
truncate,
|
14
14
|
include_boundaries,
|
15
15
|
closed,
|
16
|
-
by
|
16
|
+
by,
|
17
|
+
start_by
|
17
18
|
)
|
18
19
|
period = Utils._timedelta_to_pl_duration(period)
|
19
20
|
offset = Utils._timedelta_to_pl_duration(offset)
|
@@ -28,6 +29,7 @@ module Polars
|
|
28
29
|
@include_boundaries = include_boundaries
|
29
30
|
@closed = closed
|
30
31
|
@by = by
|
32
|
+
@start_by = start_by
|
31
33
|
end
|
32
34
|
|
33
35
|
def agg(aggs)
|
@@ -40,7 +42,8 @@ module Polars
|
|
40
42
|
truncate: @truncate,
|
41
43
|
include_boundaries: @include_boundaries,
|
42
44
|
closed: @closed,
|
43
|
-
by: @by
|
45
|
+
by: @by,
|
46
|
+
start_by: @start_by
|
44
47
|
)
|
45
48
|
.agg(aggs)
|
46
49
|
.collect(no_optimization: true, string_cache: false)
|
data/lib/polars/expr.rb
CHANGED
@@ -2486,7 +2486,8 @@ module Polars
|
|
2486
2486
|
# # │ 1.5 │
|
2487
2487
|
# # └─────┘
|
2488
2488
|
def quantile(quantile, interpolation: "nearest")
|
2489
|
-
|
2489
|
+
quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
|
2490
|
+
wrap_expr(_rbexpr.quantile(quantile._rbexpr, interpolation))
|
2490
2491
|
end
|
2491
2492
|
|
2492
2493
|
# Filter a single column.
|
@@ -3151,8 +3152,8 @@ module Polars
|
|
3151
3152
|
# # ├╌╌╌╌╌┼╌╌╌╌╌┤
|
3152
3153
|
# # │ 3 ┆ 3.0 │
|
3153
3154
|
# # └─────┴─────┘
|
3154
|
-
def interpolate
|
3155
|
-
wrap_expr(_rbexpr.interpolate)
|
3155
|
+
def interpolate(method: "linear")
|
3156
|
+
wrap_expr(_rbexpr.interpolate(method))
|
3156
3157
|
end
|
3157
3158
|
|
3158
3159
|
# Apply a rolling min (moving min) over the values in this array.
|
data/lib/polars/functions.rb
CHANGED
@@ -438,8 +438,8 @@ module Polars
|
|
438
438
|
|
439
439
|
def _ensure_datetime(value)
|
440
440
|
is_date_type = false
|
441
|
-
if !value.is_a?(DateTime)
|
442
|
-
value = DateTime.new(value.year, value.month, value.day)
|
441
|
+
if !value.is_a?(::DateTime)
|
442
|
+
value = ::DateTime.new(value.year, value.month, value.day)
|
443
443
|
is_date_type = true
|
444
444
|
end
|
445
445
|
[value, is_date_type]
|
data/lib/polars/group_by.rb
CHANGED
@@ -334,17 +334,17 @@ module Polars
|
|
334
334
|
# df.groupby("d", maintain_order: true).min
|
335
335
|
# # =>
|
336
336
|
# # shape: (3, 4)
|
337
|
-
# #
|
338
|
-
# # │ d ┆ a ┆ b ┆ c
|
339
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
340
|
-
# # │ str ┆ i64 ┆ f64 ┆
|
341
|
-
# #
|
342
|
-
# # │ Apple ┆ 1 ┆ 0.5 ┆
|
343
|
-
# #
|
344
|
-
# # │ Orange ┆ 2 ┆ 0.5 ┆
|
345
|
-
# #
|
346
|
-
# # │ Banana ┆ 4 ┆ 13.0 ┆
|
347
|
-
# #
|
337
|
+
# # ┌────────┬─────┬──────┬───────┐
|
338
|
+
# # │ d ┆ a ┆ b ┆ c │
|
339
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
340
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
341
|
+
# # ╞════════╪═════╪══════╪═══════╡
|
342
|
+
# # │ Apple ┆ 1 ┆ 0.5 ┆ false │
|
343
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
344
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
345
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
346
|
+
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
347
|
+
# # └────────┴─────┴──────┴───────┘
|
348
348
|
def min
|
349
349
|
agg(Polars.all.min)
|
350
350
|
end
|
@@ -365,17 +365,17 @@ module Polars
|
|
365
365
|
# df.groupby("d", maintain_order: true).max
|
366
366
|
# # =>
|
367
367
|
# # shape: (3, 4)
|
368
|
-
# #
|
369
|
-
# # │ d ┆ a ┆ b ┆ c
|
370
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
371
|
-
# # │ str ┆ i64 ┆ f64 ┆
|
372
|
-
# #
|
373
|
-
# # │ Apple ┆ 3 ┆ 10.0 ┆
|
374
|
-
# #
|
375
|
-
# # │ Orange ┆ 2 ┆ 0.5 ┆
|
376
|
-
# #
|
377
|
-
# # │ Banana ┆ 5 ┆ 14.0 ┆
|
378
|
-
# #
|
368
|
+
# # ┌────────┬─────┬──────┬──────┐
|
369
|
+
# # │ d ┆ a ┆ b ┆ c │
|
370
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
371
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
372
|
+
# # ╞════════╪═════╪══════╪══════╡
|
373
|
+
# # │ Apple ┆ 3 ┆ 10.0 ┆ true │
|
374
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
375
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
376
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
377
|
+
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
378
|
+
# # └────────┴─────┴──────┴──────┘
|
379
379
|
def max
|
380
380
|
agg(Polars.all.max)
|
381
381
|
end
|
@@ -427,17 +427,17 @@ module Polars
|
|
427
427
|
# df.groupby("d", maintain_order: true).mean
|
428
428
|
# # =>
|
429
429
|
# # shape: (3, 4)
|
430
|
-
# #
|
431
|
-
# # │ d ┆ a ┆ b ┆ c
|
432
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
433
|
-
# # │ str ┆ f64 ┆ f64 ┆
|
434
|
-
# #
|
435
|
-
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆
|
436
|
-
# #
|
437
|
-
# # │ Orange ┆ 2.0 ┆ 0.5 ┆
|
438
|
-
# #
|
439
|
-
# # │ Banana ┆ 4.5 ┆ 13.5 ┆
|
440
|
-
# #
|
430
|
+
# # ┌────────┬─────┬──────────┬──────────┐
|
431
|
+
# # │ d ┆ a ┆ b ┆ c │
|
432
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
433
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 │
|
434
|
+
# # ╞════════╪═════╪══════════╪══════════╡
|
435
|
+
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
436
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
437
|
+
# # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
438
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
439
|
+
# # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
440
|
+
# # └────────┴─────┴──────────┴──────────┘
|
441
441
|
def mean
|
442
442
|
agg(Polars.all.mean)
|
443
443
|
end
|