polars-df 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.18.0"
3
+ version = "0.19.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,22 +12,22 @@ crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
14
  ahash = "0.8"
15
- arrow = { package = "polars-arrow", version = "=0.47.1" }
15
+ arrow = { package = "polars-arrow", version = "=0.48.0" }
16
16
  bytes = "1"
17
17
  chrono = "0.4"
18
18
  either = "1.8"
19
19
  magnus = "0.7"
20
20
  num-traits = "0.2"
21
- polars-core = "=0.47.1"
22
- polars-plan = "=0.47.1"
23
- polars-parquet = "=0.47.1"
24
- polars-utils = "=0.47.1"
21
+ polars-core = "=0.48.0"
22
+ polars-plan = "=0.48.0"
23
+ polars-parquet = "=0.48.0"
24
+ polars-utils = "=0.48.0"
25
25
  rayon = "1.9"
26
26
  regex = "1"
27
27
  serde_json = "1"
28
28
 
29
29
  [dependencies.polars]
30
- version = "=0.47.1"
30
+ version = "=0.48.0"
31
31
  features = [
32
32
  "abs",
33
33
  "approx_unique",
@@ -173,7 +173,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
173
173
  }
174
174
 
175
175
  fn get_struct(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
176
- let dict = RHash::from_value(ob).unwrap();
176
+ let dict = RHash::try_convert(ob)?;
177
177
  let len = dict.len();
178
178
  let mut keys = Vec::with_capacity(len);
179
179
  let mut vals = Vec::with_capacity(len);
@@ -210,7 +210,10 @@ impl IntoValue for Wrap<DataType> {
210
210
  DataType::Datetime(tu, tz) => {
211
211
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
212
212
  datetime_class
213
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
213
+ .funcall::<_, _, Value>(
214
+ "new",
215
+ (tu.to_ascii(), tz.as_deref().map(|x| x.as_str())),
216
+ )
214
217
  .unwrap()
215
218
  }
216
219
  DataType::Duration(tu) => {
@@ -375,7 +378,10 @@ impl TryConvert for Wrap<DataType> {
375
378
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
376
379
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
377
380
  let time_zone: Option<String> = ob.funcall("time_zone", ())?;
378
- DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
381
+ DataType::Datetime(
382
+ time_unit,
383
+ TimeZone::opt_try_new(time_zone.as_deref()).map_err(RbPolarsErr::from)?,
384
+ )
379
385
  }
380
386
  "Polars::Duration" => {
381
387
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
@@ -1229,3 +1235,13 @@ impl TryConvert for RbCompatLevel {
1229
1235
  }))
1230
1236
  }
1231
1237
  }
1238
+
1239
+ impl TryConvert for Wrap<Option<TimeZone>> {
1240
+ fn try_convert(ob: Value) -> RbResult<Self> {
1241
+ let tz = Option::<Wrap<PlSmallStr>>::try_convert(ob)?;
1242
+
1243
+ let tz = tz.map(|x| x.0);
1244
+
1245
+ Ok(Wrap(TimeZone::opt_try_new(tz).map_err(RbPolarsErr::from)?))
1246
+ }
1247
+ }
@@ -75,11 +75,11 @@ impl RbExpr {
75
75
  .into()
76
76
  }
77
77
 
78
- pub fn arr_contains(&self, other: &RbExpr) -> Self {
78
+ pub fn arr_contains(&self, other: &RbExpr, nulls_equal: bool) -> Self {
79
79
  self.inner
80
80
  .clone()
81
81
  .arr()
82
- .contains(other.inner.clone())
82
+ .contains(other.inner.clone(), nulls_equal)
83
83
  .into()
84
84
  }
85
85
 
@@ -1,7 +1,7 @@
1
1
  use polars::prelude::*;
2
2
 
3
3
  use crate::conversion::Wrap;
4
- use crate::RbExpr;
4
+ use crate::{RbExpr, RbPolarsErr, RbResult};
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn dt_to_string(&self, format: String) -> Self {
@@ -30,12 +30,17 @@ impl RbExpr {
30
30
  self.inner.clone().dt().with_time_unit(tu.0).into()
31
31
  }
32
32
 
33
- pub fn dt_convert_time_zone(&self, time_zone: String) -> Self {
34
- self.inner
33
+ pub fn dt_convert_time_zone(&self, time_zone: String) -> RbResult<Self> {
34
+ Ok(self
35
+ .inner
35
36
  .clone()
36
37
  .dt()
37
- .convert_time_zone(time_zone.into())
38
- .into()
38
+ .convert_time_zone(
39
+ TimeZone::opt_try_new(Some(PlSmallStr::from(time_zone)))
40
+ .map_err(RbPolarsErr::from)?
41
+ .unwrap_or(TimeZone::UTC),
42
+ )
43
+ .into())
39
44
  }
40
45
 
41
46
  pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
@@ -47,16 +52,18 @@ impl RbExpr {
47
52
  time_zone: Option<String>,
48
53
  ambiguous: &Self,
49
54
  non_existent: Wrap<NonExistent>,
50
- ) -> Self {
51
- self.inner
55
+ ) -> RbResult<Self> {
56
+ Ok(self
57
+ .inner
52
58
  .clone()
53
59
  .dt()
54
60
  .replace_time_zone(
55
- time_zone.map(|x| x.into()),
61
+ TimeZone::opt_try_new(time_zone.map(PlSmallStr::from_string))
62
+ .map_err(RbPolarsErr::from)?,
56
63
  ambiguous.inner.clone(),
57
64
  non_existent.0,
58
65
  )
59
- .into()
66
+ .into())
60
67
  }
61
68
 
62
69
  pub fn dt_truncate(&self, every: &Self) -> Self {
@@ -23,11 +23,11 @@ impl RbExpr {
23
23
  self.inner.clone().list().arg_min().into()
24
24
  }
25
25
 
26
- pub fn list_contains(&self, other: &RbExpr) -> Self {
26
+ pub fn list_contains(&self, other: &RbExpr, nulls_equal: bool) -> Self {
27
27
  self.inner
28
28
  .clone()
29
29
  .list()
30
- .contains(other.inner.clone())
30
+ .contains(other.inner.clone(), nulls_equal)
31
31
  .into()
32
32
  }
33
33
 
@@ -180,7 +180,7 @@ impl RbExpr {
180
180
  &self,
181
181
  width_strat: Wrap<ListToStructWidthStrategy>,
182
182
  name_gen: Option<Value>,
183
- upper_bound: usize,
183
+ upper_bound: Option<usize>,
184
184
  ) -> RbResult<Self> {
185
185
  let name_gen = name_gen.map(|lambda| {
186
186
  let lambda = Opaque::from(lambda);
@@ -35,14 +35,14 @@ impl RbExpr {
35
35
  &self,
36
36
  format: Option<String>,
37
37
  time_unit: Option<Wrap<TimeUnit>>,
38
- time_zone: Option<Wrap<TimeZone>>,
38
+ time_zone: Wrap<Option<TimeZone>>,
39
39
  strict: bool,
40
40
  exact: bool,
41
41
  cache: bool,
42
42
  ambiguous: &Self,
43
43
  ) -> Self {
44
44
  let format = format.map(|x| x.into());
45
- let time_zone = time_zone.map(|x| x.0);
45
+ let time_zone = time_zone.0;
46
46
 
47
47
  let options = StrptimeOptions {
48
48
  format,
@@ -310,7 +310,7 @@ pub fn last() -> RbExpr {
310
310
  dsl::last().into()
311
311
  }
312
312
 
313
- pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
313
+ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
314
314
  if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
315
315
  Ok(dsl::lit(bool::try_convert(value)?).into())
316
316
  } else if let Some(v) = Integer::from_value(value) {
@@ -336,7 +336,16 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
336
336
  Ok(dsl::lit(unsafe { v.as_slice() }).into())
337
337
  }
338
338
  } else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
339
- Ok(dsl::lit(series.series.borrow().clone()).into())
339
+ let s = series.series.borrow();
340
+ if is_scalar {
341
+ let av = s
342
+ .get(0)
343
+ .map_err(|_| RbValueError::new_err("expected at least 1 value"))?;
344
+ let av = av.into_static();
345
+ Ok(dsl::lit(Scalar::new(s.dtype().clone(), av)).into())
346
+ } else {
347
+ Ok(dsl::lit(s.clone()).into())
348
+ }
340
349
  } else if value.is_nil() {
341
350
  Ok(dsl::lit(Null {}).into())
342
351
  } else if allow_object {
@@ -56,14 +56,14 @@ pub fn datetime_range(
56
56
  every: String,
57
57
  closed: Wrap<ClosedWindow>,
58
58
  time_unit: Option<Wrap<TimeUnit>>,
59
- time_zone: Option<Wrap<TimeZone>>,
59
+ time_zone: Wrap<Option<TimeZone>>,
60
60
  ) -> RbExpr {
61
61
  let start = start.inner.clone();
62
62
  let end = end.inner.clone();
63
63
  let every = Duration::parse(&every);
64
64
  let closed = closed.0;
65
65
  let time_unit = time_unit.map(|x| x.0);
66
- let time_zone = time_zone.map(|x| x.0);
66
+ let time_zone = time_zone.0;
67
67
  dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
68
68
  }
69
69
 
@@ -73,14 +73,14 @@ pub fn datetime_ranges(
73
73
  every: String,
74
74
  closed: Wrap<ClosedWindow>,
75
75
  time_unit: Option<Wrap<TimeUnit>>,
76
- time_zone: Option<Wrap<TimeZone>>,
76
+ time_zone: Wrap<Option<TimeZone>>,
77
77
  ) -> RbExpr {
78
78
  let start = start.inner.clone();
79
79
  let end = end.inner.clone();
80
80
  let every = Duration::parse(&every);
81
81
  let closed = closed.0;
82
82
  let time_unit = time_unit.map(|x| x.0);
83
- let time_zone = time_zone.map(|x| x.0);
83
+ let time_zone = time_zone.0;
84
84
  dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
85
85
  }
86
86
 
@@ -392,6 +392,8 @@ impl RbLazyFrame {
392
392
  statistics: statistics.0,
393
393
  row_group_size,
394
394
  data_page_size,
395
+ key_value_metadata: None,
396
+ field_overwrites: Vec::new(),
395
397
  };
396
398
 
397
399
  let cloud_options = match target.base_path() {
@@ -320,7 +320,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
320
320
  class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
321
321
  class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
322
322
  class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
323
- class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
323
+ class.define_method("arr_contains", method!(RbExpr::arr_contains, 2))?;
324
324
  class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
325
325
  class.define_method("binary_contains", method!(RbExpr::bin_contains, 1))?;
326
326
  class.define_method("binary_ends_with", method!(RbExpr::bin_ends_with, 1))?;
@@ -365,7 +365,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
365
365
  class.define_method("str_contains_any", method!(RbExpr::str_contains_any, 2))?;
366
366
  class.define_method("str_replace_many", method!(RbExpr::str_replace_many, 3))?;
367
367
  class.define_method("list_len", method!(RbExpr::list_len, 0))?;
368
- class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
368
+ class.define_method("list_contains", method!(RbExpr::list_contains, 2))?;
369
369
  class.define_method("list_count_matches", method!(RbExpr::list_count_matches, 1))?;
370
370
  class.define_method("dt_year", method!(RbExpr::dt_year, 0))?;
371
371
  class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
@@ -559,7 +559,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
559
559
  class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
560
560
  class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
561
561
  class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
562
- class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?;
562
+ class.define_singleton_method("lit", function!(functions::lazy::lit, 3))?;
563
563
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
564
564
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
565
565
  class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
@@ -481,6 +481,8 @@ module Polars
481
481
  #
482
482
  # @param item [Object]
483
483
  # Item that will be checked for membership
484
+ # @param nulls_equal [Boolean]
485
+ # If true, treat null as a distinct value. Null values will not propagate.
484
486
  #
485
487
  # @return [Expr]
486
488
  #
@@ -501,9 +503,9 @@ module Polars
501
503
  # # │ ["x", "y"] ┆ false │
502
504
  # # │ ["a", "c"] ┆ true │
503
505
  # # └───────────────┴──────────┘
504
- def contains(item)
506
+ def contains(item, nulls_equal: true)
505
507
  item = Utils.parse_into_expression(item, str_as_lit: true)
506
- Utils.wrap_expr(_rbexpr.arr_contains(item))
508
+ Utils.wrap_expr(_rbexpr.arr_contains(item, nulls_equal))
507
509
  end
508
510
 
509
511
  # Count how often the value produced by `element` occurs.
data/lib/polars/expr.rb CHANGED
@@ -3734,15 +3734,7 @@ module Polars
3734
3734
  # # │ [9, 10] ┆ 3 ┆ false │
3735
3735
  # # └───────────┴──────────────────┴──────────┘
3736
3736
  def is_in(other, nulls_equal: false)
3737
- if other.is_a?(::Array)
3738
- if other.length == 0
3739
- other = Polars.lit(nil)._rbexpr
3740
- else
3741
- other = Polars.lit(Series.new(other))._rbexpr
3742
- end
3743
- else
3744
- other = Utils.parse_into_expression(other, str_as_lit: false)
3745
- end
3737
+ other = Utils.parse_into_expression(other)
3746
3738
  _from_rbexpr(_rbexpr.is_in(other, nulls_equal))
3747
3739
  end
3748
3740
  alias_method :in?, :is_in
@@ -16,20 +16,15 @@ module Polars
16
16
  elsif value.is_a?(::Date)
17
17
  return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
18
18
  elsif value.is_a?(Polars::Series)
19
- name = value.name
20
19
  value = value._s
21
- e = Utils.wrap_expr(Plr.lit(value, allow_object))
22
- if name == ""
23
- return e
24
- end
25
- return e.alias(name)
20
+ return Utils.wrap_expr(Plr.lit(value, allow_object, false))
26
21
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
27
- return lit(Series.new("", value))
22
+ return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
28
23
  elsif dtype
29
- return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
24
+ return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
30
25
  end
31
26
 
32
- Utils.wrap_expr(Plr.lit(value, allow_object))
27
+ Utils.wrap_expr(Plr.lit(value, allow_object, true))
33
28
  end
34
29
  end
35
30
  end
@@ -481,6 +481,8 @@ module Polars
481
481
  #
482
482
  # @param item [Object]
483
483
  # Item that will be checked for membership
484
+ # @param nulls_equal [Boolean]
485
+ # If true, treat null as a distinct value. Null values will not propagate.
484
486
  #
485
487
  # @return [Expr]
486
488
  #
@@ -498,8 +500,8 @@ module Polars
498
500
  # # │ false │
499
501
  # # │ true │
500
502
  # # └───────┘
501
- def contains(item)
502
- Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
503
+ def contains(item, nulls_equal: true)
504
+ Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item), nulls_equal))
503
505
  end
504
506
 
505
507
  # Join all string items in a sublist and place a separator between them.
@@ -746,9 +748,9 @@ module Polars
746
748
  # # │ {1,2,3} │
747
749
  # # │ {1,2,null} │
748
750
  # # └────────────┘
749
- def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
751
+ def to_struct(n_field_strategy: "first_non_null", name_generator: nil, upper_bound: nil)
750
752
  raise Todo if name_generator
751
- Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
753
+ Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, nil))
752
754
  end
753
755
 
754
756
  # Run any polars expression against the lists' elements.
data/lib/polars/series.rb CHANGED
@@ -2144,18 +2144,33 @@ module Polars
2144
2144
 
2145
2145
  # Check if elements of this Series are in the other Series.
2146
2146
  #
2147
+ # @param nulls_equal [Boolean]
2148
+ # If true, treat null as a distinct value. Null values will not propagate.
2149
+ #
2147
2150
  # @return [Series]
2148
2151
  #
2149
2152
  # @example
2150
2153
  # s = Polars::Series.new("a", [1, 2, 3])
2151
- # s2 = Polars::Series.new("b", [2, 4])
2154
+ # s2 = Polars::Series.new("b", [2, 4, nil])
2152
2155
  # s2.is_in(s)
2153
2156
  # # =>
2154
- # # shape: (2,)
2157
+ # # shape: (3,)
2158
+ # # Series: 'b' [bool]
2159
+ # # [
2160
+ # # true
2161
+ # # false
2162
+ # # null
2163
+ # # ]
2164
+ #
2165
+ # @example
2166
+ # s2.is_in(s, nulls_equal: true)
2167
+ # # =>
2168
+ # # shape: (3,)
2155
2169
  # # Series: 'b' [bool]
2156
2170
  # # [
2157
2171
  # # true
2158
2172
  # # false
2173
+ # # false
2159
2174
  # # ]
2160
2175
  #
2161
2176
  # @example
@@ -2190,7 +2205,7 @@ module Polars
2190
2205
  # # true
2191
2206
  # # false
2192
2207
  # # ]
2193
- def is_in(other)
2208
+ def is_in(other, nulls_equal: false)
2194
2209
  super
2195
2210
  end
2196
2211
  alias_method :in?, :is_in
@@ -1403,7 +1403,7 @@ module Polars
1403
1403
  # # │ Can you feel the love tonight ┆ true │
1404
1404
  # # └─────────────────────────────────┴──────────────┘
1405
1405
  def contains_any(patterns, ascii_case_insensitive: false)
1406
- patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1406
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1407
1407
  Utils.wrap_expr(
1408
1408
  _rbexpr.str_contains_any(patterns, ascii_case_insensitive)
1409
1409
  )
@@ -1474,9 +1474,9 @@ module Polars
1474
1474
  # # │ Can you feel the love tonight ┆ Can me feel the love tonight │
1475
1475
  # # └─────────────────────────────────┴─────────────────────────────────┘
1476
1476
  def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1477
- patterns = Utils.parse_into_expression(patterns, str_as_lit: false, list_as_series: true)
1477
+ patterns = Utils.parse_into_expression(patterns, str_as_lit: false)
1478
1478
  replace_with = Utils.parse_into_expression(
1479
- replace_with, str_as_lit: true, list_as_series: true
1479
+ replace_with, str_as_lit: true
1480
1480
  )
1481
1481
  Utils.wrap_expr(
1482
1482
  _rbexpr.str_replace_many(
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.18.0"
3
+ VERSION = "0.19.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.0
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane