polars-df 0.2.3 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +7 -7
- data/README.md +10 -0
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/conversion.rs +51 -12
- data/ext/polars/src/lib.rs +2 -3
- data/ext/polars/src/rb_modules.rs +7 -3
- data/ext/polars/src/series.rs +6 -4
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +45 -13
- data/lib/polars/io.rb +6 -6
- data/lib/polars/lazy_frame.rb +3 -3
- data/lib/polars/lazy_functions.rb +14 -14
- data/lib/polars/utils.rb +8 -0
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 122f88f6a702c0252a98404b8ecccddcd0195cadd79b7062f451cb23f5d3b8e3
|
4
|
+
data.tar.gz: 67588b6a0aa9829af179c8e3d5339b125b50dd6941e02cea4fc29aedd2e29217
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6393dcab24e4001c7d1b9149ef441317d8ba76571850071ed829b569e055c9ad4d4ffcbdb256ede8225922b56378984ebb113570051d27b2761a50f1b23fa168
|
7
|
+
data.tar.gz: 4356270d7f15d964eedd84bf37c05054323a19128fa5840563829af88f46107e1277534ae976ea078808807b7c80433196eb351f7160118d1251d1dd4ddbbc4b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.2.5 (2023-02-01)
|
2
|
+
|
3
|
+
- Added support for glob patterns to `read_csv` method
|
4
|
+
- Added support for symbols to more methods
|
5
|
+
|
6
|
+
## 0.2.4 (2023-01-29)
|
7
|
+
|
8
|
+
- Added support for more types when creating a data frame from an array of hashes
|
9
|
+
|
1
10
|
## 0.2.3 (2023-01-22)
|
2
11
|
|
3
12
|
- Fixed error with precompiled gem on Mac ARM
|
data/Cargo.lock
CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
|
|
1367
1367
|
|
1368
1368
|
[[package]]
|
1369
1369
|
name = "polars"
|
1370
|
-
version = "0.2.
|
1370
|
+
version = "0.2.4"
|
1371
1371
|
dependencies = [
|
1372
1372
|
"ahash",
|
1373
1373
|
"jemallocator",
|
@@ -1653,18 +1653,18 @@ dependencies = [
|
|
1653
1653
|
|
1654
1654
|
[[package]]
|
1655
1655
|
name = "rb-sys"
|
1656
|
-
version = "0.9.
|
1656
|
+
version = "0.9.58"
|
1657
1657
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1658
|
-
checksum = "
|
1658
|
+
checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
|
1659
1659
|
dependencies = [
|
1660
1660
|
"rb-sys-build",
|
1661
1661
|
]
|
1662
1662
|
|
1663
1663
|
[[package]]
|
1664
1664
|
name = "rb-sys-build"
|
1665
|
-
version = "0.9.
|
1665
|
+
version = "0.9.58"
|
1666
1666
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1667
|
-
checksum = "
|
1667
|
+
checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
|
1668
1668
|
dependencies = [
|
1669
1669
|
"bindgen",
|
1670
1670
|
"regex",
|
@@ -1673,9 +1673,9 @@ dependencies = [
|
|
1673
1673
|
|
1674
1674
|
[[package]]
|
1675
1675
|
name = "rb-sys-env"
|
1676
|
-
version = "0.1.
|
1676
|
+
version = "0.1.2"
|
1677
1677
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1678
|
-
checksum = "
|
1678
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
1679
1679
|
|
1680
1680
|
[[package]]
|
1681
1681
|
name = "redox_syscall"
|
data/README.md
CHANGED
data/ext/polars/Cargo.toml
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{
|
2
|
+
class, r_hash::ForEach, Integer, Module, RArray, RFloat, RHash, RString, Symbol, TryConvert,
|
3
|
+
Value, QNIL,
|
4
|
+
};
|
2
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
7
|
use polars::datatypes::AnyValue;
|
5
|
-
use polars::frame::row::Row;
|
8
|
+
use polars::frame::row::{any_values_to_dtype, Row};
|
6
9
|
use polars::frame::NullStrategy;
|
7
10
|
use polars::io::avro::AvroCompression;
|
8
11
|
use polars::prelude::*;
|
@@ -273,15 +276,24 @@ impl TryConvert for Wrap<DataType> {
|
|
273
276
|
|
274
277
|
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
275
278
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
279
|
+
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
280
|
+
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
281
|
+
} else if let Some(v) = Integer::from_value(ob) {
|
282
|
+
Ok(AnyValue::Int64(v.to_i64()?).into())
|
283
|
+
} else if let Some(v) = RFloat::from_value(ob) {
|
284
|
+
Ok(AnyValue::Float64(v.to_f64()).into())
|
285
|
+
} else if let Some(v) = RString::from_value(ob) {
|
286
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
287
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
288
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
289
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
290
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
291
|
+
let v = sec * 1_000_000_000 + nsec;
|
292
|
+
// TODO support time zone
|
293
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
281
294
|
} else if ob.is_nil() {
|
282
295
|
Ok(AnyValue::Null.into())
|
283
|
-
} else if
|
284
|
-
let dict = ob.try_convert::<RHash>().unwrap();
|
296
|
+
} else if let Some(dict) = RHash::from_value(ob) {
|
285
297
|
let len = dict.len();
|
286
298
|
let mut keys = Vec::with_capacity(len);
|
287
299
|
let mut vals = Vec::with_capacity(len);
|
@@ -294,6 +306,28 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
294
306
|
Ok(ForEach::Continue)
|
295
307
|
})?;
|
296
308
|
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
309
|
+
} else if let Some(v) = RArray::from_value(ob) {
|
310
|
+
if v.is_empty() {
|
311
|
+
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
312
|
+
} else {
|
313
|
+
let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
|
314
|
+
// use first `n` values to infer datatype
|
315
|
+
// this value is not too large as this will be done with every
|
316
|
+
// anyvalue that has to be converted, which can be many
|
317
|
+
let n = 25;
|
318
|
+
let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
|
319
|
+
.map_err(RbPolarsErr::from)?;
|
320
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype)
|
321
|
+
.map_err(RbPolarsErr::from)?;
|
322
|
+
Ok(Wrap(AnyValue::List(s)))
|
323
|
+
}
|
324
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
325
|
+
// convert to DateTime for UTC
|
326
|
+
let v = ob
|
327
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
328
|
+
.funcall::<_, _, Value>("to_time", ())?
|
329
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
330
|
+
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
297
331
|
} else {
|
298
332
|
Err(RbPolarsErr::other(format!(
|
299
333
|
"object type not supported {:?}",
|
@@ -412,8 +446,12 @@ pub(crate) fn dicts_to_rows(
|
|
412
446
|
let d = d?;
|
413
447
|
let d = d.try_convert::<RHash>()?;
|
414
448
|
|
415
|
-
d.foreach(|name:
|
416
|
-
|
449
|
+
d.foreach(|name: Value, _value: Value| {
|
450
|
+
if let Some(v) = Symbol::from_value(name) {
|
451
|
+
key_names.insert(v.name()?.into());
|
452
|
+
} else {
|
453
|
+
key_names.insert(name.try_convert::<String>()?);
|
454
|
+
};
|
417
455
|
Ok(ForEach::Continue)
|
418
456
|
})?;
|
419
457
|
}
|
@@ -427,7 +465,8 @@ pub(crate) fn dicts_to_rows(
|
|
427
465
|
let mut row = Vec::with_capacity(key_names.len());
|
428
466
|
|
429
467
|
for k in key_names.iter() {
|
430
|
-
|
468
|
+
// TODO improve performance
|
469
|
+
let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
|
431
470
|
None => AnyValue::Null,
|
432
471
|
Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
|
433
472
|
};
|
data/ext/polars/src/lib.rs
CHANGED
@@ -21,13 +21,12 @@ use file::get_file_like;
|
|
21
21
|
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
22
22
|
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
23
23
|
use lazy::utils::rb_exprs_to_exprs;
|
24
|
-
use magnus::{function, method, prelude::*, Error, RArray, RHash, Value};
|
24
|
+
use magnus::{define_module, function, method, prelude::*, Error, RArray, RHash, Value};
|
25
25
|
use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
|
26
26
|
use polars::error::PolarsResult;
|
27
27
|
use polars::frame::DataFrame;
|
28
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
29
29
|
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
30
|
-
use rb_modules::polars;
|
31
30
|
use series::RbSeries;
|
32
31
|
|
33
32
|
#[cfg(target_os = "linux")]
|
@@ -48,7 +47,7 @@ type RbResult<T> = Result<T, Error>;
|
|
48
47
|
|
49
48
|
#[magnus::init]
|
50
49
|
fn init() -> RbResult<()> {
|
51
|
-
let module =
|
50
|
+
let module = define_module("Polars")?;
|
52
51
|
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
53
52
|
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
54
53
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
@@ -1,9 +1,13 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{class, memoize, Module, RClass, RModule};
|
2
2
|
|
3
3
|
pub(crate) fn polars() -> RModule {
|
4
|
-
*memoize!(RModule:
|
4
|
+
*memoize!(RModule: class::object().const_get("Polars").unwrap())
|
5
5
|
}
|
6
6
|
|
7
7
|
pub(crate) fn series() -> RClass {
|
8
|
-
*memoize!(RClass: polars().
|
8
|
+
*memoize!(RClass: polars().const_get("Series").unwrap())
|
9
|
+
}
|
10
|
+
|
11
|
+
pub(crate) fn date() -> RClass {
|
12
|
+
*memoize!(RClass: class::object().const_get("Date").unwrap())
|
9
13
|
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -1109,11 +1109,13 @@ impl RbSeries {
|
|
1109
1109
|
builder.append_null();
|
1110
1110
|
} else {
|
1111
1111
|
// convert to DateTime for UTC
|
1112
|
-
let v
|
1113
|
-
|
1114
|
-
|
1112
|
+
let v = v
|
1113
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
1114
|
+
.funcall::<_, _, Value>("to_time", ())?
|
1115
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
1116
|
+
|
1115
1117
|
// TODO use strict
|
1116
|
-
builder.append_value(v
|
1118
|
+
builder.append_value((v / 86400) as i32);
|
1117
1119
|
}
|
1118
1120
|
}
|
1119
1121
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
data/lib/polars/data_frame.rb
CHANGED
@@ -94,7 +94,7 @@ module Polars
|
|
94
94
|
sample_size: 1024,
|
95
95
|
eol_char: "\n"
|
96
96
|
)
|
97
|
-
if
|
97
|
+
if Utils.pathlike?(file)
|
98
98
|
path = Utils.format_path(file)
|
99
99
|
else
|
100
100
|
path = nil
|
@@ -124,7 +124,39 @@ module Polars
|
|
124
124
|
columns = [columns]
|
125
125
|
end
|
126
126
|
if file.is_a?(String) && file.include?("*")
|
127
|
-
|
127
|
+
dtypes_dict = nil
|
128
|
+
if !dtype_list.nil?
|
129
|
+
dtypes_dict = dtype_list.to_h
|
130
|
+
end
|
131
|
+
if !dtype_slice.nil?
|
132
|
+
raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
|
133
|
+
end
|
134
|
+
scan = Polars.scan_csv(
|
135
|
+
file,
|
136
|
+
has_header: has_header,
|
137
|
+
sep: sep,
|
138
|
+
comment_char: comment_char,
|
139
|
+
quote_char: quote_char,
|
140
|
+
skip_rows: skip_rows,
|
141
|
+
dtypes: dtypes_dict,
|
142
|
+
null_values: null_values,
|
143
|
+
ignore_errors: ignore_errors,
|
144
|
+
infer_schema_length: infer_schema_length,
|
145
|
+
n_rows: n_rows,
|
146
|
+
low_memory: low_memory,
|
147
|
+
rechunk: rechunk,
|
148
|
+
skip_rows_after_header: skip_rows_after_header,
|
149
|
+
row_count_name: row_count_name,
|
150
|
+
row_count_offset: row_count_offset,
|
151
|
+
eol_char: eol_char
|
152
|
+
)
|
153
|
+
if columns.nil?
|
154
|
+
return _from_rbdf(scan.collect._df)
|
155
|
+
elsif is_str_sequence(columns, allow_str: false)
|
156
|
+
return _from_rbdf(scan.select(columns).collect._df)
|
157
|
+
else
|
158
|
+
raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
|
159
|
+
end
|
128
160
|
end
|
129
161
|
|
130
162
|
projection, columns = Utils.handle_projection_columns(columns)
|
@@ -170,7 +202,7 @@ module Polars
|
|
170
202
|
row_count_offset: 0,
|
171
203
|
low_memory: false
|
172
204
|
)
|
173
|
-
if
|
205
|
+
if Utils.pathlike?(file)
|
174
206
|
file = Utils.format_path(file)
|
175
207
|
end
|
176
208
|
|
@@ -194,7 +226,7 @@ module Polars
|
|
194
226
|
|
195
227
|
# @private
|
196
228
|
def self._read_avro(file, columns: nil, n_rows: nil)
|
197
|
-
if
|
229
|
+
if Utils.pathlike?(file)
|
198
230
|
file = Utils.format_path(file)
|
199
231
|
end
|
200
232
|
projection, columns = Utils.handle_projection_columns(columns)
|
@@ -211,7 +243,7 @@ module Polars
|
|
211
243
|
rechunk: true,
|
212
244
|
memory_map: true
|
213
245
|
)
|
214
|
-
if
|
246
|
+
if Utils.pathlike?(file)
|
215
247
|
file = Utils.format_path(file)
|
216
248
|
end
|
217
249
|
if columns.is_a?(String)
|
@@ -237,7 +269,7 @@ module Polars
|
|
237
269
|
|
238
270
|
# @private
|
239
271
|
def self._read_json(file)
|
240
|
-
if
|
272
|
+
if Utils.pathlike?(file)
|
241
273
|
file = Utils.format_path(file)
|
242
274
|
end
|
243
275
|
|
@@ -246,7 +278,7 @@ module Polars
|
|
246
278
|
|
247
279
|
# @private
|
248
280
|
def self._read_ndjson(file)
|
249
|
-
if
|
281
|
+
if Utils.pathlike?(file)
|
250
282
|
file = Utils.format_path(file)
|
251
283
|
end
|
252
284
|
|
@@ -698,7 +730,7 @@ module Polars
|
|
698
730
|
pretty: false,
|
699
731
|
row_oriented: false
|
700
732
|
)
|
701
|
-
if
|
733
|
+
if Utils.pathlike?(file)
|
702
734
|
file = Utils.format_path(file)
|
703
735
|
end
|
704
736
|
|
@@ -713,7 +745,7 @@ module Polars
|
|
713
745
|
#
|
714
746
|
# @return [nil]
|
715
747
|
def write_ndjson(file)
|
716
|
-
if
|
748
|
+
if Utils.pathlike?(file)
|
717
749
|
file = Utils.format_path(file)
|
718
750
|
end
|
719
751
|
|
@@ -803,7 +835,7 @@ module Polars
|
|
803
835
|
return buffer.string.force_encoding(Encoding::UTF_8)
|
804
836
|
end
|
805
837
|
|
806
|
-
if
|
838
|
+
if Utils.pathlike?(file)
|
807
839
|
file = Utils.format_path(file)
|
808
840
|
end
|
809
841
|
|
@@ -841,7 +873,7 @@ module Polars
|
|
841
873
|
if compression.nil?
|
842
874
|
compression = "uncompressed"
|
843
875
|
end
|
844
|
-
if
|
876
|
+
if Utils.pathlike?(file)
|
845
877
|
file = Utils.format_path(file)
|
846
878
|
end
|
847
879
|
|
@@ -860,7 +892,7 @@ module Polars
|
|
860
892
|
if compression.nil?
|
861
893
|
compression = "uncompressed"
|
862
894
|
end
|
863
|
-
if
|
895
|
+
if Utils.pathlike?(file)
|
864
896
|
file = Utils.format_path(file)
|
865
897
|
end
|
866
898
|
|
@@ -902,7 +934,7 @@ module Polars
|
|
902
934
|
if compression.nil?
|
903
935
|
compression = "uncompressed"
|
904
936
|
end
|
905
|
-
if
|
937
|
+
if Utils.pathlike?(file)
|
906
938
|
file = Utils.format_path(file)
|
907
939
|
end
|
908
940
|
|
data/lib/polars/io.rb
CHANGED
@@ -268,7 +268,7 @@ module Polars
|
|
268
268
|
_check_arg_is_1byte("comment_char", comment_char, false)
|
269
269
|
_check_arg_is_1byte("quote_char", quote_char, true)
|
270
270
|
|
271
|
-
if
|
271
|
+
if Utils.pathlike?(file)
|
272
272
|
file = Utils.format_path(file)
|
273
273
|
end
|
274
274
|
|
@@ -384,7 +384,7 @@ module Polars
|
|
384
384
|
storage_options: nil,
|
385
385
|
low_memory: false
|
386
386
|
)
|
387
|
-
if
|
387
|
+
if Utils.pathlike?(file)
|
388
388
|
file = Utils.format_path(file)
|
389
389
|
end
|
390
390
|
|
@@ -435,7 +435,7 @@ module Polars
|
|
435
435
|
row_count_name: nil,
|
436
436
|
row_count_offset: 0
|
437
437
|
)
|
438
|
-
if
|
438
|
+
if Utils.pathlike?(file)
|
439
439
|
file = Utils.format_path(file)
|
440
440
|
end
|
441
441
|
|
@@ -463,7 +463,7 @@ module Polars
|
|
463
463
|
#
|
464
464
|
# @return [DataFrame]
|
465
465
|
def read_avro(file, columns: nil, n_rows: nil)
|
466
|
-
if
|
466
|
+
if Utils.pathlike?(file)
|
467
467
|
file = Utils.format_path(file)
|
468
468
|
end
|
469
469
|
|
@@ -786,7 +786,7 @@ module Polars
|
|
786
786
|
#
|
787
787
|
# @return [Hash]
|
788
788
|
def read_ipc_schema(file)
|
789
|
-
if
|
789
|
+
if Utils.pathlike?(file)
|
790
790
|
file = Utils.format_path(file)
|
791
791
|
end
|
792
792
|
|
@@ -800,7 +800,7 @@ module Polars
|
|
800
800
|
#
|
801
801
|
# @return [Hash]
|
802
802
|
def read_parquet_schema(file)
|
803
|
-
if
|
803
|
+
if Utils.pathlike?(file)
|
804
804
|
file = Utils.format_path(file)
|
805
805
|
end
|
806
806
|
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -106,7 +106,7 @@ module Polars
|
|
106
106
|
storage_options: nil,
|
107
107
|
memory_map: true
|
108
108
|
)
|
109
|
-
if
|
109
|
+
if Utils.pathlike?(file)
|
110
110
|
file = Utils.format_path(file)
|
111
111
|
end
|
112
112
|
|
@@ -156,7 +156,7 @@ module Polars
|
|
156
156
|
#
|
157
157
|
# @return [LazyFrame]
|
158
158
|
def self.read_json(file)
|
159
|
-
if
|
159
|
+
if Utils.pathlike?(file)
|
160
160
|
file = Utils.format_path(file)
|
161
161
|
end
|
162
162
|
|
@@ -263,7 +263,7 @@ module Polars
|
|
263
263
|
#
|
264
264
|
# @return [nil]
|
265
265
|
def write_json(file)
|
266
|
-
if
|
266
|
+
if Utils.pathlike?(file)
|
267
267
|
file = Utils.format_path(file)
|
268
268
|
end
|
269
269
|
_ldf.write_json(file)
|
@@ -15,7 +15,7 @@ module Polars
|
|
15
15
|
if name.is_a?(DataType)
|
16
16
|
Utils.wrap_expr(_dtype_cols([name]))
|
17
17
|
elsif name.is_a?(Array)
|
18
|
-
if name.length == 0 ||
|
18
|
+
if name.length == 0 || Utils.strlike?(name[0])
|
19
19
|
name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
|
20
20
|
Utils.wrap_expr(RbExpr.cols(name))
|
21
21
|
elsif Utils.is_polars_dtype(name[0])
|
@@ -119,7 +119,7 @@ module Polars
|
|
119
119
|
def max(column)
|
120
120
|
if column.is_a?(Series)
|
121
121
|
column.max
|
122
|
-
elsif
|
122
|
+
elsif Utils.strlike?(column)
|
123
123
|
col(column).max
|
124
124
|
else
|
125
125
|
exprs = Utils.selection_to_rbexpr_list(column)
|
@@ -141,7 +141,7 @@ module Polars
|
|
141
141
|
def min(column)
|
142
142
|
if column.is_a?(Series)
|
143
143
|
column.min
|
144
|
-
elsif
|
144
|
+
elsif Utils.strlike?(column)
|
145
145
|
col(column).min
|
146
146
|
else
|
147
147
|
exprs = Utils.selection_to_rbexpr_list(column)
|
@@ -156,7 +156,7 @@ module Polars
|
|
156
156
|
def sum(column)
|
157
157
|
if column.is_a?(Series)
|
158
158
|
column.sum
|
159
|
-
elsif
|
159
|
+
elsif Utils.strlike?(column)
|
160
160
|
col(column.to_s).sum
|
161
161
|
elsif column.is_a?(Array)
|
162
162
|
exprs = Utils.selection_to_rbexpr_list(column)
|
@@ -356,7 +356,7 @@ module Polars
|
|
356
356
|
def cumsum(column)
|
357
357
|
if column.is_a?(Series)
|
358
358
|
column.cumsum
|
359
|
-
elsif
|
359
|
+
elsif Utils.strlike?(column)
|
360
360
|
col(column).cumsum
|
361
361
|
else
|
362
362
|
cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
|
@@ -380,10 +380,10 @@ module Polars
|
|
380
380
|
#
|
381
381
|
# @return [Expr]
|
382
382
|
def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
|
383
|
-
if
|
383
|
+
if Utils.strlike?(a)
|
384
384
|
a = col(a)
|
385
385
|
end
|
386
|
-
if
|
386
|
+
if Utils.strlike?(b)
|
387
387
|
b = col(b)
|
388
388
|
end
|
389
389
|
Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
|
@@ -400,10 +400,10 @@ module Polars
|
|
400
400
|
#
|
401
401
|
# @return [Expr]
|
402
402
|
def pearson_corr(a, b, ddof: 1)
|
403
|
-
if
|
403
|
+
if Utils.strlike?(a)
|
404
404
|
a = col(a)
|
405
405
|
end
|
406
|
-
if
|
406
|
+
if Utils.strlike?(b)
|
407
407
|
b = col(b)
|
408
408
|
end
|
409
409
|
Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
|
@@ -418,10 +418,10 @@ module Polars
|
|
418
418
|
#
|
419
419
|
# @return [Expr]
|
420
420
|
def cov(a, b)
|
421
|
-
if
|
421
|
+
if Utils.strlike?(a)
|
422
422
|
a = col(a)
|
423
423
|
end
|
424
|
-
if
|
424
|
+
if Utils.strlike?(b)
|
425
425
|
b = col(b)
|
426
426
|
end
|
427
427
|
Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
|
@@ -486,7 +486,7 @@ module Polars
|
|
486
486
|
#
|
487
487
|
# @return [Expr]
|
488
488
|
def any(name)
|
489
|
-
if
|
489
|
+
if Utils.strlike?(name)
|
490
490
|
col(name).any
|
491
491
|
else
|
492
492
|
fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
|
@@ -589,7 +589,7 @@ module Polars
|
|
589
589
|
def all(name = nil)
|
590
590
|
if name.nil?
|
591
591
|
col("*")
|
592
|
-
elsif
|
592
|
+
elsif Utils.strlike?(name)
|
593
593
|
col(name).all
|
594
594
|
else
|
595
595
|
raise Todo
|
@@ -1137,7 +1137,7 @@ module Polars
|
|
1137
1137
|
# # │ 2022-10-25 07:31:39 │
|
1138
1138
|
# # └─────────────────────┘
|
1139
1139
|
def from_epoch(column, unit: "s", eager: false)
|
1140
|
-
if
|
1140
|
+
if Utils.strlike?(column)
|
1141
1141
|
column = col(column)
|
1142
1142
|
elsif !column.is_a?(Series) && !column.is_a?(Expr)
|
1143
1143
|
column = Series.new(column)
|
data/lib/polars/utils.rb
CHANGED
@@ -177,6 +177,14 @@ module Polars
|
|
177
177
|
value == true || value == false
|
178
178
|
end
|
179
179
|
|
180
|
+
def self.strlike?(value)
|
181
|
+
value.is_a?(String) || value.is_a?(Symbol)
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.pathlike?(value)
|
185
|
+
value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
|
186
|
+
end
|
187
|
+
|
180
188
|
def self._is_iterable_of(val, eltype)
|
181
189
|
val.all? { |x| x.is_a?(eltype) }
|
182
190
|
end
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|