polars-df 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +1 -1
- data/README.md +6 -3
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/lib.rs +6 -1
- data/lib/polars/data_frame.rb +66 -3
- data/lib/polars/io.rb +3 -1
- data/lib/polars/series.rb +71 -11
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/utils.rb +20 -0
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
|
|
4
|
+
data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
|
|
7
|
+
data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
|
@@ -41,6 +41,9 @@ From a CSV
|
|
|
41
41
|
|
|
42
42
|
```ruby
|
|
43
43
|
Polars.read_csv("file.csv")
|
|
44
|
+
|
|
45
|
+
# or lazily with
|
|
46
|
+
Polars.scan_csv("file.csv")
|
|
44
47
|
```
|
|
45
48
|
|
|
46
49
|
From Parquet
|
|
@@ -135,9 +138,9 @@ df[Polars.col("a") <= 2]
|
|
|
135
138
|
And, or, and exclusive or
|
|
136
139
|
|
|
137
140
|
```ruby
|
|
138
|
-
df[(Polars.col("a") >
|
|
139
|
-
df[(Polars.col("a") >
|
|
140
|
-
df[(Polars.col("a") >
|
|
141
|
+
df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
|
|
142
|
+
df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
|
|
143
|
+
df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
|
|
141
144
|
```
|
|
142
145
|
|
|
143
146
|
## Operations
|
data/ext/polars/Cargo.toml
CHANGED
data/ext/polars/src/lib.rs
CHANGED
|
@@ -22,7 +22,7 @@ use magnus::{
|
|
|
22
22
|
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
|
|
23
23
|
Value,
|
|
24
24
|
};
|
|
25
|
-
use polars::datatypes::{DataType, TimeUnit};
|
|
25
|
+
use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
|
|
26
26
|
use polars::error::PolarsResult;
|
|
27
27
|
use polars::frame::DataFrame;
|
|
28
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
|
@@ -71,6 +71,7 @@ fn init() -> RbResult<()> {
|
|
|
71
71
|
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
|
72
72
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
|
73
73
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
|
74
|
+
module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
|
|
74
75
|
|
|
75
76
|
let class = module.define_class("RbBatchedCsv", Default::default())?;
|
|
76
77
|
class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
|
|
@@ -988,3 +989,7 @@ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
|
|
988
989
|
fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
989
990
|
polars::lazy::dsl::arg_where(condition.inner.clone()).into()
|
|
990
991
|
}
|
|
992
|
+
|
|
993
|
+
fn get_idx_type() -> Value {
|
|
994
|
+
Wrap(IDX_DTYPE).into()
|
|
995
|
+
}
|
data/lib/polars/data_frame.rb
CHANGED
|
@@ -277,6 +277,7 @@ module Polars
|
|
|
277
277
|
_df.height
|
|
278
278
|
end
|
|
279
279
|
alias_method :count, :height
|
|
280
|
+
alias_method :length, :height
|
|
280
281
|
|
|
281
282
|
# Get the width of the DataFrame.
|
|
282
283
|
#
|
|
@@ -541,7 +542,7 @@ module Polars
|
|
|
541
542
|
|
|
542
543
|
if col_selection.is_a?(Array)
|
|
543
544
|
# df[.., [1, 2]]
|
|
544
|
-
if is_int_sequence(col_selection)
|
|
545
|
+
if Utils.is_int_sequence(col_selection)
|
|
545
546
|
series_list = col_selection.map { |i| to_series(i) }
|
|
546
547
|
df = self.class.new(series_list)
|
|
547
548
|
return df[row_selection]
|
|
@@ -574,6 +575,23 @@ module Polars
|
|
|
574
575
|
# df[["foo", "bar"]]
|
|
575
576
|
return _from_rbdf(_df.select(item))
|
|
576
577
|
end
|
|
578
|
+
|
|
579
|
+
if Utils.is_int_sequence(item)
|
|
580
|
+
item = Series.new("", item)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
if item.is_a?(Series)
|
|
584
|
+
dtype = item.dtype
|
|
585
|
+
if dtype == Utf8
|
|
586
|
+
return _from_rbdf(_df.select(item))
|
|
587
|
+
elsif dtype == UInt32
|
|
588
|
+
return _from_rbdf(_df.take_with_series(item._s))
|
|
589
|
+
elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
|
|
590
|
+
return _from_rbdf(
|
|
591
|
+
_df.take_with_series(_pos_idxs(item, 0)._s)
|
|
592
|
+
)
|
|
593
|
+
end
|
|
594
|
+
end
|
|
577
595
|
end
|
|
578
596
|
|
|
579
597
|
# Ruby-specific
|
|
@@ -4662,8 +4680,53 @@ module Polars
|
|
|
4662
4680
|
end
|
|
4663
4681
|
end
|
|
4664
4682
|
|
|
4665
|
-
|
|
4666
|
-
|
|
4683
|
+
def _pos_idxs(idxs, dim)
|
|
4684
|
+
idx_type = Polars._get_idx_type
|
|
4685
|
+
|
|
4686
|
+
if idxs.is_a?(Series)
|
|
4687
|
+
if idxs.dtype == idx_type
|
|
4688
|
+
return idxs
|
|
4689
|
+
end
|
|
4690
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
|
4691
|
+
if idx_type == UInt32
|
|
4692
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
|
4693
|
+
if idxs.max >= 2**32
|
|
4694
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
|
4695
|
+
end
|
|
4696
|
+
end
|
|
4697
|
+
if idxs.dtype == Int64
|
|
4698
|
+
if idxs.min < -(2**32)
|
|
4699
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
|
4700
|
+
end
|
|
4701
|
+
end
|
|
4702
|
+
end
|
|
4703
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
|
4704
|
+
if idxs.min < 0
|
|
4705
|
+
if idx_type == UInt32
|
|
4706
|
+
if [Int8, Int16].include?(idxs.dtype)
|
|
4707
|
+
idxs = idxs.cast(Int32)
|
|
4708
|
+
end
|
|
4709
|
+
else
|
|
4710
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
|
4711
|
+
idxs = idxs.cast(Int64)
|
|
4712
|
+
end
|
|
4713
|
+
end
|
|
4714
|
+
|
|
4715
|
+
idxs =
|
|
4716
|
+
Polars.select(
|
|
4717
|
+
Polars.when(Polars.lit(idxs) < 0)
|
|
4718
|
+
.then(shape[dim] + Polars.lit(idxs))
|
|
4719
|
+
.otherwise(Polars.lit(idxs))
|
|
4720
|
+
).to_series
|
|
4721
|
+
end
|
|
4722
|
+
end
|
|
4723
|
+
|
|
4724
|
+
return idxs.cast(idx_type)
|
|
4725
|
+
end
|
|
4726
|
+
end
|
|
4727
|
+
|
|
4728
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
|
4729
|
+
end
|
|
4667
4730
|
|
|
4668
4731
|
# @private
|
|
4669
4732
|
def self.hash_to_rbdf(data, columns: nil)
|
data/lib/polars/io.rb
CHANGED
|
@@ -606,8 +606,10 @@ module Polars
|
|
|
606
606
|
sql
|
|
607
607
|
elsif sql.is_a?(ActiveRecord::Relation)
|
|
608
608
|
sql.connection.select_all(sql.to_sql)
|
|
609
|
+
elsif sql.is_a?(String)
|
|
610
|
+
ActiveRecord::Base.connection.select_all(sql)
|
|
609
611
|
else
|
|
610
|
-
raise ArgumentError, "Expected ActiveRecord::Relation
|
|
612
|
+
raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
|
|
611
613
|
end
|
|
612
614
|
data = {}
|
|
613
615
|
result.columns.each_with_index do |k, i|
|
data/lib/polars/series.rb
CHANGED
|
@@ -263,6 +263,10 @@ module Polars
|
|
|
263
263
|
#
|
|
264
264
|
# @return [Object]
|
|
265
265
|
def [](item)
|
|
266
|
+
if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
|
|
267
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
|
268
|
+
end
|
|
269
|
+
|
|
266
270
|
if item.is_a?(Integer)
|
|
267
271
|
return _s.get_idx(item)
|
|
268
272
|
end
|
|
@@ -271,6 +275,10 @@ module Polars
|
|
|
271
275
|
return Slice.new(self).apply(item)
|
|
272
276
|
end
|
|
273
277
|
|
|
278
|
+
if Utils.is_int_sequence(item)
|
|
279
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
|
|
280
|
+
end
|
|
281
|
+
|
|
274
282
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
|
275
283
|
end
|
|
276
284
|
|
|
@@ -287,24 +295,23 @@ module Polars
|
|
|
287
295
|
end
|
|
288
296
|
|
|
289
297
|
if key.is_a?(Series)
|
|
290
|
-
if key.dtype ==
|
|
298
|
+
if key.dtype == Boolean
|
|
291
299
|
self._s = set(key, value)._s
|
|
292
|
-
elsif key.dtype ==
|
|
293
|
-
self._s = set_at_idx(key.cast(
|
|
294
|
-
elsif key.dtype ==
|
|
300
|
+
elsif key.dtype == UInt64
|
|
301
|
+
self._s = set_at_idx(key.cast(UInt32), value)._s
|
|
302
|
+
elsif key.dtype == UInt32
|
|
295
303
|
self._s = set_at_idx(key, value)._s
|
|
296
304
|
else
|
|
297
305
|
raise Todo
|
|
298
306
|
end
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
307
|
+
elsif key.is_a?(Array)
|
|
308
|
+
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
|
309
|
+
self[s] = value
|
|
310
|
+
elsif key.is_a?(Range)
|
|
311
|
+
s = Series.new("", key, dtype: UInt32)
|
|
303
312
|
self[s] = value
|
|
304
313
|
elsif key.is_a?(Integer)
|
|
305
|
-
|
|
306
|
-
# self[[key]] = value
|
|
307
|
-
set_at_idx(key, value)
|
|
314
|
+
self[[key]] = value
|
|
308
315
|
else
|
|
309
316
|
raise ArgumentError, "cannot use #{key} for indexing"
|
|
310
317
|
end
|
|
@@ -3527,6 +3534,59 @@ module Polars
|
|
|
3527
3534
|
end
|
|
3528
3535
|
end
|
|
3529
3536
|
|
|
3537
|
+
def _pos_idxs(idxs)
|
|
3538
|
+
idx_type = Polars._get_idx_type
|
|
3539
|
+
|
|
3540
|
+
if idxs.is_a?(Series)
|
|
3541
|
+
if idxs.dtype == idx_type
|
|
3542
|
+
return idxs
|
|
3543
|
+
end
|
|
3544
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
|
3545
|
+
if idx_type == UInt32
|
|
3546
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
|
3547
|
+
if idxs.max >= 2**32
|
|
3548
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
|
3549
|
+
end
|
|
3550
|
+
end
|
|
3551
|
+
if idxs.dtype == Int64
|
|
3552
|
+
if idxs.min < -(2**32)
|
|
3553
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
|
3554
|
+
end
|
|
3555
|
+
end
|
|
3556
|
+
end
|
|
3557
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
|
3558
|
+
if idxs.min < 0
|
|
3559
|
+
if idx_type == UInt32
|
|
3560
|
+
if [Int8, Int16].include?(idxs.dtype)
|
|
3561
|
+
idxs = idxs.cast(Int32)
|
|
3562
|
+
end
|
|
3563
|
+
else
|
|
3564
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
|
3565
|
+
idxs = idxs.cast(Int64)
|
|
3566
|
+
end
|
|
3567
|
+
end
|
|
3568
|
+
|
|
3569
|
+
# Update negative indexes to absolute indexes.
|
|
3570
|
+
return (
|
|
3571
|
+
idxs.to_frame
|
|
3572
|
+
.select(
|
|
3573
|
+
Polars.when(Polars.col(idxs.name) < 0)
|
|
3574
|
+
.then(len + Polars.col(idxs.name))
|
|
3575
|
+
.otherwise(Polars.col(idxs.name))
|
|
3576
|
+
.cast(idx_type)
|
|
3577
|
+
)
|
|
3578
|
+
.to_series(0)
|
|
3579
|
+
)
|
|
3580
|
+
end
|
|
3581
|
+
end
|
|
3582
|
+
|
|
3583
|
+
return idxs.cast(idx_type)
|
|
3584
|
+
end
|
|
3585
|
+
end
|
|
3586
|
+
|
|
3587
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
|
3588
|
+
end
|
|
3589
|
+
|
|
3530
3590
|
def _comp(other, op)
|
|
3531
3591
|
if other.is_a?(Series)
|
|
3532
3592
|
return Utils.wrap_s(_s.send(op, other._s))
|
data/lib/polars/slice.rb
CHANGED
|
@@ -56,7 +56,7 @@ module Polars
|
|
|
56
56
|
# Normalize slice bounds, identify unbounded and/or zero-length slices.
|
|
57
57
|
def _slice_setup(s)
|
|
58
58
|
# can normalize slice indices as we know object size
|
|
59
|
-
obj_len = @obj.
|
|
59
|
+
obj_len = @obj.length
|
|
60
60
|
start = if s.begin
|
|
61
61
|
if s.begin < 0
|
|
62
62
|
[s.begin + obj_len, 0].max
|
data/lib/polars/utils.rb
CHANGED
|
@@ -181,6 +181,26 @@ module Polars
|
|
|
181
181
|
val.all? { |x| x.is_a?(eltype) }
|
|
182
182
|
end
|
|
183
183
|
|
|
184
|
+
def self.is_bool_sequence(val)
|
|
185
|
+
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def self.is_dtype_sequence(val)
|
|
189
|
+
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def self.is_int_sequence(val)
|
|
193
|
+
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def self.is_expr_sequence(val)
|
|
197
|
+
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def self.is_rbexpr_sequence(val)
|
|
201
|
+
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
|
202
|
+
end
|
|
203
|
+
|
|
184
204
|
def self.is_str_sequence(val, allow_str: false)
|
|
185
205
|
if allow_str == false && val.is_a?(String)
|
|
186
206
|
false
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: polars-df
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-01-
|
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|