polars-df 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
- Copyright (c) 2022-2024 Andrew Kane
2
+ Copyright (c) 2022-2025 Andrew Kane
3
3
  Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
data/README.md CHANGED
@@ -88,7 +88,7 @@ From Avro
88
88
  Polars.read_avro("file.avro")
89
89
  ```
90
90
 
91
- From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
91
+ From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
92
92
 
93
93
  ```ruby
94
94
  Polars.read_delta("./table")
@@ -365,7 +365,7 @@ Avro
365
365
  df.write_avro("file.avro")
366
366
  ```
367
367
 
368
- Delta Lake [experimental, unreleased]
368
+ Delta Lake [experimental]
369
369
 
370
370
  ```ruby
371
371
  df.write_delta("./table")
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.16.0"
3
+ version = "0.17.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,20 +12,21 @@ crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
14
  ahash = "0.8"
15
- arrow = { package = "polars-arrow", version = "=0.45.1" }
15
+ arrow = { package = "polars-arrow", version = "=0.46.0" }
16
16
  bytes = "1"
17
17
  chrono = "0.4"
18
18
  either = "1.8"
19
19
  magnus = "0.7"
20
- polars-core = "=0.45.1"
21
- polars-plan = "=0.45.1"
22
- polars-parquet = "=0.45.1"
23
- polars-utils = "=0.45.1"
20
+ polars-core = "=0.46.0"
21
+ polars-plan = "=0.46.0"
22
+ polars-parquet = "=0.46.0"
23
+ polars-utils = "=0.46.0"
24
+ rayon = "1.9"
24
25
  regex = "1"
25
26
  serde_json = "1"
26
27
 
27
28
  [dependencies.polars]
28
- version = "=0.45.1"
29
+ version = "=0.46.0"
29
30
  features = [
30
31
  "abs",
31
32
  "approx_unique",
@@ -33,6 +33,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
33
33
  AnyValue::Int16(v) => ruby.into_value(v),
34
34
  AnyValue::Int32(v) => ruby.into_value(v),
35
35
  AnyValue::Int64(v) => ruby.into_value(v),
36
+ AnyValue::Int128(_v) => todo!(),
36
37
  AnyValue::Float32(v) => ruby.into_value(v),
37
38
  AnyValue::Float64(v) => ruby.into_value(v),
38
39
  AnyValue::Null => ruby.qnil().as_value(),
@@ -146,6 +146,10 @@ impl IntoValue for Wrap<DataType> {
146
146
  let class = pl.const_get::<_, Value>("Int64").unwrap();
147
147
  class.funcall("new", ()).unwrap()
148
148
  }
149
+ DataType::Int128 => {
150
+ let class = pl.const_get::<_, Value>("Int128").unwrap();
151
+ class.funcall("new", ()).unwrap()
152
+ }
149
153
  DataType::UInt8 => {
150
154
  let class = pl.const_get::<_, Value>("UInt8").unwrap();
151
155
  class.funcall("new", ()).unwrap()
@@ -296,30 +296,24 @@ impl RbDataFrame {
296
296
  Ok(())
297
297
  }
298
298
 
299
- pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
299
+ pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
300
300
  let file = BufWriter::new(get_file_like(rb_f, true)?);
301
301
 
302
- let r = match (pretty, row_oriented) {
303
- (_, true) => JsonWriter::new(file)
304
- .with_json_format(JsonFormat::Json)
305
- .finish(&mut self.df.borrow_mut()),
306
- (true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
307
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
308
- (false, _) => serde_json::to_writer(file, &*self.df.borrow())
309
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
310
- };
311
- r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
302
+ JsonWriter::new(file)
303
+ .with_json_format(JsonFormat::Json)
304
+ .finish(&mut self.df.borrow_mut())
305
+ .map_err(RbPolarsErr::from)?;
312
306
  Ok(())
313
307
  }
314
308
 
315
309
  pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
316
310
  let file = BufWriter::new(get_file_like(rb_f, true)?);
317
311
 
318
- let r = JsonWriter::new(file)
312
+ JsonWriter::new(file)
319
313
  .with_json_format(JsonFormat::JsonLines)
320
- .finish(&mut self.df.borrow_mut());
314
+ .finish(&mut self.df.borrow_mut())
315
+ .map_err(RbPolarsErr::from)?;
321
316
 
322
- r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
323
317
  Ok(())
324
318
  }
325
319
 
@@ -1,9 +1,9 @@
1
1
  use std::io::BufReader;
2
2
 
3
+ use arrow::array::Utf8ViewArray;
3
4
  use magnus::{RHash, Value};
4
5
  use polars::prelude::ArrowSchema;
5
6
  use polars_core::datatypes::create_enum_dtype;
6
- use polars_core::export::arrow::array::Utf8ViewArray;
7
7
 
8
8
  use crate::conversion::Wrap;
9
9
  use crate::file::{get_either_file, EitherRustRubyFile};
@@ -11,7 +11,7 @@ use crate::prelude::ArrowDataType;
11
11
  use crate::{RbPolarsErr, RbResult};
12
12
 
13
13
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
14
- use polars_core::export::arrow::io::ipc::read::read_file_metadata;
14
+ use arrow::io::ipc::read::read_file_metadata;
15
15
  let metadata = match get_either_file(rb_f, false)? {
16
16
  EitherRustRubyFile::Rust(r) => {
17
17
  read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
@@ -23,7 +23,7 @@ impl RbSeries {
23
23
  .const_get::<_, RClass>("RObject")?
24
24
  .funcall("cast", (np_arr,))
25
25
  }
26
- dt if dt.is_numeric() => {
26
+ dt if dt.is_primitive_numeric() => {
27
27
  if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
28
  let s = s.cast(&DataType::Float64).unwrap();
29
29
  let ca = s.f64().unwrap();
@@ -633,6 +633,8 @@ impl RbLazyFrame {
633
633
  tolerance: Option<Wrap<AnyValue<'_>>>,
634
634
  tolerance_str: Option<String>,
635
635
  coalesce: bool,
636
+ allow_eq: bool,
637
+ check_sortedness: bool,
636
638
  ) -> RbResult<Self> {
637
639
  let coalesce = if coalesce {
638
640
  JoinCoalesce::CoalesceColumns
@@ -657,6 +659,8 @@ impl RbLazyFrame {
657
659
  right_by: right_by.map(strings_to_pl_smallstr),
658
660
  tolerance: tolerance.map(|t| t.0.into_static()),
659
661
  tolerance_str: tolerance_str.map(|s| s.into()),
662
+ allow_eq,
663
+ check_sortedness,
660
664
  }))
661
665
  .suffix(suffix)
662
666
  .finish()
@@ -69,7 +69,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
69
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
70
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
71
71
  class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
72
- class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
72
+ class.define_method("write_json", method!(RbDataFrame::write_json, 1))?;
73
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
74
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
75
75
  class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
@@ -758,7 +758,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
758
758
  method!(RbLazyFrame::group_by_dynamic, 9),
759
759
  )?;
760
760
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
761
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
761
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
762
762
  class.define_method("join", method!(RbLazyFrame::join, 10))?;
763
763
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
764
764
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
@@ -5,9 +5,9 @@ pub mod series;
5
5
  use magnus::{prelude::*, RHash, Value};
6
6
  use polars::chunked_array::builder::get_list_builder;
7
7
  use polars::prelude::*;
8
- use polars_core::export::rayon::prelude::*;
9
8
  use polars_core::utils::CustomIterTools;
10
9
  use polars_core::POOL;
10
+ use rayon::prelude::*;
11
11
 
12
12
  use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
13
13
 
@@ -21,6 +21,7 @@ impl RbSeries {
21
21
  DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
22
22
  DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
23
23
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
24
+ DataType::Int128 => todo!(),
24
25
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
25
26
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
26
27
  DataType::Categorical(_, _) | DataType::Enum(_, _) => {
@@ -1,7 +1,7 @@
1
+ use arrow::array::Array;
2
+ use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
1
3
  use magnus::prelude::*;
2
4
  use magnus::Value;
3
- use polars::export::arrow::array::Array;
4
- use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
5
  use polars::prelude::*;
6
6
 
7
7
  use super::RbSeries;
@@ -1,4 +1,4 @@
1
- use polars::export::arrow::array::Array;
1
+ use arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
4
  use crate::error::RbPolarsErr;
@@ -604,10 +604,6 @@ module Polars
604
604
  #
605
605
  # @param file [String]
606
606
  # File path to which the result should be written.
607
- # @param pretty [Boolean]
608
- # Pretty serialize json.
609
- # @param row_oriented [Boolean]
610
- # Write to row oriented json. This is slower, but more common.
611
607
  #
612
608
  # @return [nil]
613
609
  #
@@ -619,16 +615,8 @@ module Polars
619
615
  # }
620
616
  # )
621
617
  # df.write_json
622
- # # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
623
- #
624
- # @example
625
- # df.write_json(row_oriented: true)
626
618
  # # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
627
- def write_json(
628
- file = nil,
629
- pretty: false,
630
- row_oriented: false
631
- )
619
+ def write_json(file = nil)
632
620
  if Utils.pathlike?(file)
633
621
  file = Utils.normalize_filepath(file)
634
622
  end
@@ -636,7 +624,7 @@ module Polars
636
624
  if file.nil? || to_string_io
637
625
  buf = StringIO.new
638
626
  buf.set_encoding(Encoding::BINARY)
639
- _df.write_json(buf, pretty, row_oriented)
627
+ _df.write_json(buf)
640
628
  json_bytes = buf.string
641
629
 
642
630
  json_str = json_bytes.force_encoding(Encoding::UTF_8)
@@ -646,7 +634,7 @@ module Polars
646
634
  return json_str
647
635
  end
648
636
  else
649
- _df.write_json(file, pretty, row_oriented)
637
+ _df.write_json(file)
650
638
  end
651
639
  nil
652
640
  end
@@ -2294,6 +2282,14 @@ module Polars
2294
2282
  # keys are within this distance. If an asof join is done on columns of dtype
2295
2283
  # "Date", "Datetime", "Duration" or "Time" you use the following string
2296
2284
  # language:
2285
+ # @param allow_exact_matches [Boolean]
2286
+ # Whether exact matches are valid join predicates.
2287
+ # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
2288
+ # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
2289
+ # @param check_sortedness [Boolean]
2290
+ # Check the sortedness of the asof keys. If the keys are not sorted Polars
2291
+ # will error, or in case of 'by' argument raise a warning. This might become
2292
+ # a hard error in the future.
2297
2293
  #
2298
2294
  # - 1ns (1 nanosecond)
2299
2295
  # - 1us (1 microsecond)
@@ -2375,7 +2371,9 @@ module Polars
2375
2371
  tolerance: nil,
2376
2372
  allow_parallel: true,
2377
2373
  force_parallel: false,
2378
- coalesce: true
2374
+ coalesce: true,
2375
+ allow_exact_matches: true,
2376
+ check_sortedness: true
2379
2377
  )
2380
2378
  lazy
2381
2379
  .join_asof(
@@ -2391,7 +2389,9 @@ module Polars
2391
2389
  tolerance: tolerance,
2392
2390
  allow_parallel: allow_parallel,
2393
2391
  force_parallel: force_parallel,
2394
- coalesce: coalesce
2392
+ coalesce: coalesce,
2393
+ allow_exact_matches: allow_exact_matches,
2394
+ check_sortedness: check_sortedness
2395
2395
  )
2396
2396
  .collect(no_optimization: true)
2397
2397
  end
@@ -167,6 +167,10 @@ module Polars
167
167
  class Int64 < SignedIntegerType
168
168
  end
169
169
 
170
+ # 128-bit signed integer type.
171
+ class Int128 < SignedIntegerType
172
+ end
173
+
170
174
  # 8-bit unsigned integer type.
171
175
  class UInt8 < UnsignedIntegerType
172
176
  end
@@ -1616,6 +1616,14 @@ module Polars
1616
1616
  # - true: -> Always coalesce join columns.
1617
1617
  # - false: -> Never coalesce join columns.
1618
1618
  # Note that joining on any other expressions than `col` will turn off coalescing.
1619
+ # @param allow_exact_matches [Boolean]
1620
+ # Whether exact matches are valid join predicates.
1621
+ # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
1622
+ # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
1623
+ # @param check_sortedness [Boolean]
1624
+ # Check the sortedness of the asof keys. If the keys are not sorted Polars
1625
+ # will error, or in case of 'by' argument raise a warning. This might become
1626
+ # a hard error in the future.
1619
1627
  #
1620
1628
  # @return [LazyFrame]
1621
1629
  #
@@ -1815,7 +1823,9 @@ module Polars
1815
1823
  tolerance: nil,
1816
1824
  allow_parallel: true,
1817
1825
  force_parallel: false,
1818
- coalesce: true
1826
+ coalesce: true,
1827
+ allow_exact_matches: true,
1828
+ check_sortedness: true
1819
1829
  )
1820
1830
  if !other.is_a?(LazyFrame)
1821
1831
  raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
@@ -1871,7 +1881,9 @@ module Polars
1871
1881
  strategy,
1872
1882
  tolerance_num,
1873
1883
  tolerance_str,
1874
- coalesce
1884
+ coalesce,
1885
+ allow_exact_matches,
1886
+ check_sortedness
1875
1887
  )
1876
1888
  )
1877
1889
  end
data/lib/polars/series.rb CHANGED
@@ -4696,7 +4696,12 @@ module Polars
4696
4696
  end
4697
4697
 
4698
4698
  constructor = polars_type_to_constructor(dtype)
4699
- rbseries = constructor.call(name, values, strict)
4699
+ rbseries =
4700
+ if dtype == Array
4701
+ constructor.call(name, values, strict)
4702
+ else
4703
+ construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
4704
+ end
4700
4705
 
4701
4706
  base_type = dtype.is_a?(DataType) ? dtype.class : dtype
4702
4707
  if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum, Decimal].include?(base_type)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.16.0"
3
+ VERSION = "0.17.0"
4
4
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-12-29 00:00:00.000000000 Z
10
+ date: 2025-01-28 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: bigdecimal