polars-df 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +222 -229
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -7
- data/ext/polars/src/conversion/any_value.rs +1 -0
- data/ext/polars/src/conversion/mod.rs +4 -0
- data/ext/polars/src/dataframe/io.rs +8 -14
- data/ext/polars/src/functions/io.rs +2 -2
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/general.rs +4 -0
- data/ext/polars/src/lib.rs +2 -2
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/series/export.rs +1 -0
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/scatter.rs +1 -1
- data/lib/polars/data_frame.rb +17 -17
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/lazy_frame.rb +14 -2
- data/lib/polars/series.rb +6 -1
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
data/LICENSE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
|
-
Copyright (c) 2022-
|
2
|
+
Copyright (c) 2022-2025 Andrew Kane
|
3
3
|
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
data/README.md
CHANGED
@@ -88,7 +88,7 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
-
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
|
92
92
|
|
93
93
|
```ruby
|
94
94
|
Polars.read_delta("./table")
|
@@ -365,7 +365,7 @@ Avro
|
|
365
365
|
df.write_avro("file.avro")
|
366
366
|
```
|
367
367
|
|
368
|
-
Delta Lake [experimental
|
368
|
+
Delta Lake [experimental]
|
369
369
|
|
370
370
|
```ruby
|
371
371
|
df.write_delta("./table")
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.17.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,20 +12,21 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.46.0" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
|
-
polars-core = "=0.
|
21
|
-
polars-plan = "=0.
|
22
|
-
polars-parquet = "=0.
|
23
|
-
polars-utils = "=0.
|
20
|
+
polars-core = "=0.46.0"
|
21
|
+
polars-plan = "=0.46.0"
|
22
|
+
polars-parquet = "=0.46.0"
|
23
|
+
polars-utils = "=0.46.0"
|
24
|
+
rayon = "1.9"
|
24
25
|
regex = "1"
|
25
26
|
serde_json = "1"
|
26
27
|
|
27
28
|
[dependencies.polars]
|
28
|
-
version = "=0.
|
29
|
+
version = "=0.46.0"
|
29
30
|
features = [
|
30
31
|
"abs",
|
31
32
|
"approx_unique",
|
@@ -33,6 +33,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
33
33
|
AnyValue::Int16(v) => ruby.into_value(v),
|
34
34
|
AnyValue::Int32(v) => ruby.into_value(v),
|
35
35
|
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Int128(_v) => todo!(),
|
36
37
|
AnyValue::Float32(v) => ruby.into_value(v),
|
37
38
|
AnyValue::Float64(v) => ruby.into_value(v),
|
38
39
|
AnyValue::Null => ruby.qnil().as_value(),
|
@@ -146,6 +146,10 @@ impl IntoValue for Wrap<DataType> {
|
|
146
146
|
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
147
147
|
class.funcall("new", ()).unwrap()
|
148
148
|
}
|
149
|
+
DataType::Int128 => {
|
150
|
+
let class = pl.const_get::<_, Value>("Int128").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
149
153
|
DataType::UInt8 => {
|
150
154
|
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
151
155
|
class.funcall("new", ()).unwrap()
|
@@ -296,30 +296,24 @@ impl RbDataFrame {
|
|
296
296
|
Ok(())
|
297
297
|
}
|
298
298
|
|
299
|
-
pub fn write_json(&self, rb_f: Value
|
299
|
+
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
300
300
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
301
301
|
|
302
|
-
|
303
|
-
(
|
304
|
-
|
305
|
-
|
306
|
-
(true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
|
307
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
308
|
-
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
309
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
310
|
-
};
|
311
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
302
|
+
JsonWriter::new(file)
|
303
|
+
.with_json_format(JsonFormat::Json)
|
304
|
+
.finish(&mut self.df.borrow_mut())
|
305
|
+
.map_err(RbPolarsErr::from)?;
|
312
306
|
Ok(())
|
313
307
|
}
|
314
308
|
|
315
309
|
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
316
310
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
317
311
|
|
318
|
-
|
312
|
+
JsonWriter::new(file)
|
319
313
|
.with_json_format(JsonFormat::JsonLines)
|
320
|
-
.finish(&mut self.df.borrow_mut())
|
314
|
+
.finish(&mut self.df.borrow_mut())
|
315
|
+
.map_err(RbPolarsErr::from)?;
|
321
316
|
|
322
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
323
317
|
Ok(())
|
324
318
|
}
|
325
319
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
+
use arrow::array::Utf8ViewArray;
|
3
4
|
use magnus::{RHash, Value};
|
4
5
|
use polars::prelude::ArrowSchema;
|
5
6
|
use polars_core::datatypes::create_enum_dtype;
|
6
|
-
use polars_core::export::arrow::array::Utf8ViewArray;
|
7
7
|
|
8
8
|
use crate::conversion::Wrap;
|
9
9
|
use crate::file::{get_either_file, EitherRustRubyFile};
|
@@ -11,7 +11,7 @@ use crate::prelude::ArrowDataType;
|
|
11
11
|
use crate::{RbPolarsErr, RbResult};
|
12
12
|
|
13
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
14
|
-
use
|
14
|
+
use arrow::io::ipc::read::read_file_metadata;
|
15
15
|
let metadata = match get_either_file(rb_f, false)? {
|
16
16
|
EitherRustRubyFile::Rust(r) => {
|
17
17
|
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
@@ -23,7 +23,7 @@ impl RbSeries {
|
|
23
23
|
.const_get::<_, RClass>("RObject")?
|
24
24
|
.funcall("cast", (np_arr,))
|
25
25
|
}
|
26
|
-
dt if dt.
|
26
|
+
dt if dt.is_primitive_numeric() => {
|
27
27
|
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
28
28
|
let s = s.cast(&DataType::Float64).unwrap();
|
29
29
|
let ca = s.f64().unwrap();
|
@@ -633,6 +633,8 @@ impl RbLazyFrame {
|
|
633
633
|
tolerance: Option<Wrap<AnyValue<'_>>>,
|
634
634
|
tolerance_str: Option<String>,
|
635
635
|
coalesce: bool,
|
636
|
+
allow_eq: bool,
|
637
|
+
check_sortedness: bool,
|
636
638
|
) -> RbResult<Self> {
|
637
639
|
let coalesce = if coalesce {
|
638
640
|
JoinCoalesce::CoalesceColumns
|
@@ -657,6 +659,8 @@ impl RbLazyFrame {
|
|
657
659
|
right_by: right_by.map(strings_to_pl_smallstr),
|
658
660
|
tolerance: tolerance.map(|t| t.0.into_static()),
|
659
661
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
662
|
+
allow_eq,
|
663
|
+
check_sortedness,
|
660
664
|
}))
|
661
665
|
.suffix(suffix)
|
662
666
|
.finish()
|
data/ext/polars/src/lib.rs
CHANGED
@@ -69,7 +69,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
69
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
71
71
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
72
|
-
class.define_method("write_json", method!(RbDataFrame::write_json,
|
72
|
+
class.define_method("write_json", method!(RbDataFrame::write_json, 1))?;
|
73
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
74
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
75
75
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
@@ -758,7 +758,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
758
758
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
759
759
|
)?;
|
760
760
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
761
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
761
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
|
762
762
|
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
763
763
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
764
764
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -5,9 +5,9 @@ pub mod series;
|
|
5
5
|
use magnus::{prelude::*, RHash, Value};
|
6
6
|
use polars::chunked_array::builder::get_list_builder;
|
7
7
|
use polars::prelude::*;
|
8
|
-
use polars_core::export::rayon::prelude::*;
|
9
8
|
use polars_core::utils::CustomIterTools;
|
10
9
|
use polars_core::POOL;
|
10
|
+
use rayon::prelude::*;
|
11
11
|
|
12
12
|
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
|
13
13
|
|
@@ -21,6 +21,7 @@ impl RbSeries {
|
|
21
21
|
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
22
|
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
23
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Int128 => todo!(),
|
24
25
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
26
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
@@ -1,7 +1,7 @@
|
|
1
|
+
use arrow::array::Array;
|
2
|
+
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
1
3
|
use magnus::prelude::*;
|
2
4
|
use magnus::Value;
|
3
|
-
use polars::export::arrow::array::Array;
|
4
|
-
use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
5
5
|
use polars::prelude::*;
|
6
6
|
|
7
7
|
use super::RbSeries;
|
data/lib/polars/data_frame.rb
CHANGED
@@ -604,10 +604,6 @@ module Polars
|
|
604
604
|
#
|
605
605
|
# @param file [String]
|
606
606
|
# File path to which the result should be written.
|
607
|
-
# @param pretty [Boolean]
|
608
|
-
# Pretty serialize json.
|
609
|
-
# @param row_oriented [Boolean]
|
610
|
-
# Write to row oriented json. This is slower, but more common.
|
611
607
|
#
|
612
608
|
# @return [nil]
|
613
609
|
#
|
@@ -619,16 +615,8 @@ module Polars
|
|
619
615
|
# }
|
620
616
|
# )
|
621
617
|
# df.write_json
|
622
|
-
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
|
623
|
-
#
|
624
|
-
# @example
|
625
|
-
# df.write_json(row_oriented: true)
|
626
618
|
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
|
627
|
-
def write_json(
|
628
|
-
file = nil,
|
629
|
-
pretty: false,
|
630
|
-
row_oriented: false
|
631
|
-
)
|
619
|
+
def write_json(file = nil)
|
632
620
|
if Utils.pathlike?(file)
|
633
621
|
file = Utils.normalize_filepath(file)
|
634
622
|
end
|
@@ -636,7 +624,7 @@ module Polars
|
|
636
624
|
if file.nil? || to_string_io
|
637
625
|
buf = StringIO.new
|
638
626
|
buf.set_encoding(Encoding::BINARY)
|
639
|
-
_df.write_json(buf
|
627
|
+
_df.write_json(buf)
|
640
628
|
json_bytes = buf.string
|
641
629
|
|
642
630
|
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
@@ -646,7 +634,7 @@ module Polars
|
|
646
634
|
return json_str
|
647
635
|
end
|
648
636
|
else
|
649
|
-
_df.write_json(file
|
637
|
+
_df.write_json(file)
|
650
638
|
end
|
651
639
|
nil
|
652
640
|
end
|
@@ -2294,6 +2282,14 @@ module Polars
|
|
2294
2282
|
# keys are within this distance. If an asof join is done on columns of dtype
|
2295
2283
|
# "Date", "Datetime", "Duration" or "Time" you use the following string
|
2296
2284
|
# language:
|
2285
|
+
# @param allow_exact_matches [Boolean]
|
2286
|
+
# Whether exact matches are valid join predicates.
|
2287
|
+
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
2288
|
+
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
2289
|
+
# @param check_sortedness [Boolean]
|
2290
|
+
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
2291
|
+
# will error, or in case of 'by' argument raise a warning. This might become
|
2292
|
+
# a hard error in the future.
|
2297
2293
|
#
|
2298
2294
|
# - 1ns (1 nanosecond)
|
2299
2295
|
# - 1us (1 microsecond)
|
@@ -2375,7 +2371,9 @@ module Polars
|
|
2375
2371
|
tolerance: nil,
|
2376
2372
|
allow_parallel: true,
|
2377
2373
|
force_parallel: false,
|
2378
|
-
coalesce: true
|
2374
|
+
coalesce: true,
|
2375
|
+
allow_exact_matches: true,
|
2376
|
+
check_sortedness: true
|
2379
2377
|
)
|
2380
2378
|
lazy
|
2381
2379
|
.join_asof(
|
@@ -2391,7 +2389,9 @@ module Polars
|
|
2391
2389
|
tolerance: tolerance,
|
2392
2390
|
allow_parallel: allow_parallel,
|
2393
2391
|
force_parallel: force_parallel,
|
2394
|
-
coalesce: coalesce
|
2392
|
+
coalesce: coalesce,
|
2393
|
+
allow_exact_matches: allow_exact_matches,
|
2394
|
+
check_sortedness: check_sortedness
|
2395
2395
|
)
|
2396
2396
|
.collect(no_optimization: true)
|
2397
2397
|
end
|
data/lib/polars/data_types.rb
CHANGED
data/lib/polars/lazy_frame.rb
CHANGED
@@ -1616,6 +1616,14 @@ module Polars
|
|
1616
1616
|
# - true: -> Always coalesce join columns.
|
1617
1617
|
# - false: -> Never coalesce join columns.
|
1618
1618
|
# Note that joining on any other expressions than `col` will turn off coalescing.
|
1619
|
+
# @param allow_exact_matches [Boolean]
|
1620
|
+
# Whether exact matches are valid join predicates.
|
1621
|
+
# - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
|
1622
|
+
# - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
|
1623
|
+
# @param check_sortedness [Boolean]
|
1624
|
+
# Check the sortedness of the asof keys. If the keys are not sorted Polars
|
1625
|
+
# will error, or in case of 'by' argument raise a warning. This might become
|
1626
|
+
# a hard error in the future.
|
1619
1627
|
#
|
1620
1628
|
# @return [LazyFrame]
|
1621
1629
|
#
|
@@ -1815,7 +1823,9 @@ module Polars
|
|
1815
1823
|
tolerance: nil,
|
1816
1824
|
allow_parallel: true,
|
1817
1825
|
force_parallel: false,
|
1818
|
-
coalesce: true
|
1826
|
+
coalesce: true,
|
1827
|
+
allow_exact_matches: true,
|
1828
|
+
check_sortedness: true
|
1819
1829
|
)
|
1820
1830
|
if !other.is_a?(LazyFrame)
|
1821
1831
|
raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
|
@@ -1871,7 +1881,9 @@ module Polars
|
|
1871
1881
|
strategy,
|
1872
1882
|
tolerance_num,
|
1873
1883
|
tolerance_str,
|
1874
|
-
coalesce
|
1884
|
+
coalesce,
|
1885
|
+
allow_exact_matches,
|
1886
|
+
check_sortedness
|
1875
1887
|
)
|
1876
1888
|
)
|
1877
1889
|
end
|
data/lib/polars/series.rb
CHANGED
@@ -4696,7 +4696,12 @@ module Polars
|
|
4696
4696
|
end
|
4697
4697
|
|
4698
4698
|
constructor = polars_type_to_constructor(dtype)
|
4699
|
-
rbseries =
|
4699
|
+
rbseries =
|
4700
|
+
if dtype == Array
|
4701
|
+
constructor.call(name, values, strict)
|
4702
|
+
else
|
4703
|
+
construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
|
4704
|
+
end
|
4700
4705
|
|
4701
4706
|
base_type = dtype.is_a?(DataType) ? dtype.class : dtype
|
4702
4707
|
if [Date, Datetime, Duration, Time, Categorical, Boolean, Enum, Decimal].include?(base_type)
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-01-28 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: bigdecimal
|