polars-df 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +1 -1
- data/README.md +266 -2
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/lib.rs +6 -1
- data/lib/polars/data_frame.rb +84 -7
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/io.rb +27 -2
- data/lib/polars/series.rb +78 -11
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/utils.rb +23 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
|
4
|
+
data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
|
7
|
+
data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.2.2 (2023-01-20)
|
2
|
+
|
3
|
+
- Added support for strings to `read_sql` method
|
4
|
+
- Improved indexing
|
5
|
+
- Fixed error with precompiled gem on Mac ARM
|
6
|
+
|
7
|
+
## 0.2.1 (2023-01-18)
|
8
|
+
|
9
|
+
- Added `read_sql` method
|
10
|
+
- Added `to_csv` method
|
11
|
+
- Added support for symbol keys
|
12
|
+
|
1
13
|
## 0.2.0 (2023-01-14)
|
2
14
|
|
3
15
|
- Updated Polars to 0.26.1
|
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
|
+
|
30
|
+
## Reference
|
31
|
+
|
32
|
+
- [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
|
33
|
+
- [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
|
34
|
+
- [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
|
29
35
|
|
30
36
|
## Examples
|
31
37
|
|
@@ -35,6 +41,9 @@ From a CSV
|
|
35
41
|
|
36
42
|
```ruby
|
37
43
|
Polars.read_csv("file.csv")
|
44
|
+
|
45
|
+
# or lazily with
|
46
|
+
Polars.scan_csv("file.csv")
|
38
47
|
```
|
39
48
|
|
40
49
|
From Parquet
|
@@ -46,7 +55,7 @@ Polars.read_parquet("file.parquet")
|
|
46
55
|
From Active Record
|
47
56
|
|
48
57
|
```ruby
|
49
|
-
Polars
|
58
|
+
Polars.read_sql(User.all)
|
50
59
|
```
|
51
60
|
|
52
61
|
From a hash
|
@@ -67,6 +76,261 @@ Polars::DataFrame.new([
|
|
67
76
|
])
|
68
77
|
```
|
69
78
|
|
79
|
+
## Attributes
|
80
|
+
|
81
|
+
Get number of rows
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
df.height
|
85
|
+
```
|
86
|
+
|
87
|
+
Get column names
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
df.columns
|
91
|
+
```
|
92
|
+
|
93
|
+
Check if a column exists
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
df.include?(name)
|
97
|
+
```
|
98
|
+
|
99
|
+
## Selecting Data
|
100
|
+
|
101
|
+
Select a column
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
df["a"]
|
105
|
+
```
|
106
|
+
|
107
|
+
Select multiple columns
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
df[["a", "b"]]
|
111
|
+
```
|
112
|
+
|
113
|
+
Select first rows
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
df.head
|
117
|
+
```
|
118
|
+
|
119
|
+
Select last rows
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
df.tail
|
123
|
+
```
|
124
|
+
|
125
|
+
## Filtering
|
126
|
+
|
127
|
+
Filter on a condition
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
df[Polars.col("a") == 2]
|
131
|
+
df[Polars.col("a") != 2]
|
132
|
+
df[Polars.col("a") > 2]
|
133
|
+
df[Polars.col("a") >= 2]
|
134
|
+
df[Polars.col("a") < 2]
|
135
|
+
df[Polars.col("a") <= 2]
|
136
|
+
```
|
137
|
+
|
138
|
+
And, or, and exclusive or
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
|
142
|
+
df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
|
143
|
+
df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
|
144
|
+
```
|
145
|
+
|
146
|
+
## Operations
|
147
|
+
|
148
|
+
Basic operations
|
149
|
+
|
150
|
+
```ruby
|
151
|
+
df["a"] + 5
|
152
|
+
df["a"] - 5
|
153
|
+
df["a"] * 5
|
154
|
+
df["a"] / 5
|
155
|
+
df["a"] % 5
|
156
|
+
df["a"] ** 2
|
157
|
+
df["a"].sqrt
|
158
|
+
df["a"].abs
|
159
|
+
```
|
160
|
+
|
161
|
+
Rounding
|
162
|
+
|
163
|
+
```ruby
|
164
|
+
df["a"].round(2)
|
165
|
+
df["a"].ceil
|
166
|
+
df["a"].floor
|
167
|
+
```
|
168
|
+
|
169
|
+
Logarithm
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
df["a"].log # natural log
|
173
|
+
df["a"].log(10)
|
174
|
+
```
|
175
|
+
|
176
|
+
Exponentiation
|
177
|
+
|
178
|
+
```ruby
|
179
|
+
df["a"].exp
|
180
|
+
```
|
181
|
+
|
182
|
+
Trigonometric functions
|
183
|
+
|
184
|
+
```ruby
|
185
|
+
df["a"].sin
|
186
|
+
df["a"].cos
|
187
|
+
df["a"].tan
|
188
|
+
df["a"].asin
|
189
|
+
df["a"].acos
|
190
|
+
df["a"].atan
|
191
|
+
```
|
192
|
+
|
193
|
+
Hyperbolic functions
|
194
|
+
|
195
|
+
```ruby
|
196
|
+
df["a"].sinh
|
197
|
+
df["a"].cosh
|
198
|
+
df["a"].tanh
|
199
|
+
df["a"].asinh
|
200
|
+
df["a"].acosh
|
201
|
+
df["a"].atanh
|
202
|
+
```
|
203
|
+
|
204
|
+
Summary statistics
|
205
|
+
|
206
|
+
```ruby
|
207
|
+
df["a"].sum
|
208
|
+
df["a"].mean
|
209
|
+
df["a"].median
|
210
|
+
df["a"].quantile(0.90)
|
211
|
+
df["a"].min
|
212
|
+
df["a"].max
|
213
|
+
df["a"].std
|
214
|
+
df["a"].var
|
215
|
+
```
|
216
|
+
|
217
|
+
## Grouping
|
218
|
+
|
219
|
+
Group
|
220
|
+
|
221
|
+
```ruby
|
222
|
+
df.groupby("a").count
|
223
|
+
```
|
224
|
+
|
225
|
+
Works with all summary statistics
|
226
|
+
|
227
|
+
```ruby
|
228
|
+
df.groupby("a").max
|
229
|
+
```
|
230
|
+
|
231
|
+
Multiple groups
|
232
|
+
|
233
|
+
```ruby
|
234
|
+
df.groupby(["a", "b"]).count
|
235
|
+
```
|
236
|
+
|
237
|
+
## Combining Data Frames
|
238
|
+
|
239
|
+
Add rows
|
240
|
+
|
241
|
+
```ruby
|
242
|
+
df.vstack(other_df)
|
243
|
+
```
|
244
|
+
|
245
|
+
Add columns
|
246
|
+
|
247
|
+
```ruby
|
248
|
+
df.hstack(other_df)
|
249
|
+
```
|
250
|
+
|
251
|
+
Inner join
|
252
|
+
|
253
|
+
```ruby
|
254
|
+
df.join(other_df, on: "a")
|
255
|
+
```
|
256
|
+
|
257
|
+
Left join
|
258
|
+
|
259
|
+
```ruby
|
260
|
+
df.join(other_df, on: "a", how: "left")
|
261
|
+
```
|
262
|
+
|
263
|
+
## Encoding
|
264
|
+
|
265
|
+
One-hot encoding
|
266
|
+
|
267
|
+
```ruby
|
268
|
+
df.to_dummies
|
269
|
+
```
|
270
|
+
|
271
|
+
## Conversion
|
272
|
+
|
273
|
+
Array of rows
|
274
|
+
|
275
|
+
```ruby
|
276
|
+
df.rows
|
277
|
+
```
|
278
|
+
|
279
|
+
Hash of series
|
280
|
+
|
281
|
+
```ruby
|
282
|
+
df.to_h
|
283
|
+
```
|
284
|
+
|
285
|
+
CSV
|
286
|
+
|
287
|
+
```ruby
|
288
|
+
df.to_csv
|
289
|
+
# or
|
290
|
+
df.write_csv("data.csv")
|
291
|
+
```
|
292
|
+
|
293
|
+
Parquet
|
294
|
+
|
295
|
+
```ruby
|
296
|
+
df.write_parquet("data.parquet")
|
297
|
+
```
|
298
|
+
|
299
|
+
## Types
|
300
|
+
|
301
|
+
You can specify column types when creating a data frame
|
302
|
+
|
303
|
+
```ruby
|
304
|
+
Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
|
305
|
+
```
|
306
|
+
|
307
|
+
Supported types are:
|
308
|
+
|
309
|
+
- boolean - `Boolean`
|
310
|
+
- float - `Float64`, `Float32`
|
311
|
+
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
312
|
+
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
313
|
+
- string - `Utf8`, `Categorical`
|
314
|
+
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
315
|
+
|
316
|
+
Get column types
|
317
|
+
|
318
|
+
```ruby
|
319
|
+
df.schema
|
320
|
+
```
|
321
|
+
|
322
|
+
For a specific column
|
323
|
+
|
324
|
+
```ruby
|
325
|
+
df["a"].dtype
|
326
|
+
```
|
327
|
+
|
328
|
+
Cast a column
|
329
|
+
|
330
|
+
```ruby
|
331
|
+
df["a"].cast(Polars::Int32)
|
332
|
+
```
|
333
|
+
|
70
334
|
## History
|
71
335
|
|
72
336
|
View the [changelog](CHANGELOG.md)
|
data/ext/polars/Cargo.toml
CHANGED
data/ext/polars/src/lib.rs
CHANGED
@@ -22,7 +22,7 @@ use magnus::{
|
|
22
22
|
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
|
23
23
|
Value,
|
24
24
|
};
|
25
|
-
use polars::datatypes::{DataType, TimeUnit};
|
25
|
+
use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
|
26
26
|
use polars::error::PolarsResult;
|
27
27
|
use polars::frame::DataFrame;
|
28
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
@@ -71,6 +71,7 @@ fn init() -> RbResult<()> {
|
|
71
71
|
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
72
72
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
73
73
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
74
|
+
module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
|
74
75
|
|
75
76
|
let class = module.define_class("RbBatchedCsv", Default::default())?;
|
76
77
|
class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
|
@@ -988,3 +989,7 @@ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
|
988
989
|
fn arg_where(condition: &RbExpr) -> RbExpr {
|
989
990
|
polars::lazy::dsl::arg_where(condition.inner.clone()).into()
|
990
991
|
}
|
992
|
+
|
993
|
+
fn get_idx_type() -> Value {
|
994
|
+
Wrap(IDX_DTYPE).into()
|
995
|
+
}
|
data/lib/polars/data_frame.rb
CHANGED
@@ -17,6 +17,7 @@ module Polars
|
|
17
17
|
# the orientation is inferred by matching the columns and data dimensions. If
|
18
18
|
# this does not yield conclusive results, column orientation is used.
|
19
19
|
def initialize(data = nil, columns: nil, orient: nil)
|
20
|
+
# TODO deprecate in favor of read_sql
|
20
21
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
21
22
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
22
23
|
data = {}
|
@@ -275,6 +276,8 @@ module Polars
|
|
275
276
|
def height
|
276
277
|
_df.height
|
277
278
|
end
|
279
|
+
alias_method :count, :height
|
280
|
+
alias_method :length, :height
|
278
281
|
|
279
282
|
# Get the width of the DataFrame.
|
280
283
|
#
|
@@ -521,13 +524,13 @@ module Polars
|
|
521
524
|
return df.slice(row_selection, 1)
|
522
525
|
end
|
523
526
|
# df[2, "a"]
|
524
|
-
if col_selection.is_a?(String)
|
527
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
525
528
|
return self[col_selection][row_selection]
|
526
529
|
end
|
527
530
|
end
|
528
531
|
|
529
532
|
# column selection can be "a" and ["a", "b"]
|
530
|
-
if col_selection.is_a?(String)
|
533
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
531
534
|
col_selection = [col_selection]
|
532
535
|
end
|
533
536
|
|
@@ -539,7 +542,7 @@ module Polars
|
|
539
542
|
|
540
543
|
if col_selection.is_a?(Array)
|
541
544
|
# df[.., [1, 2]]
|
542
|
-
if is_int_sequence(col_selection)
|
545
|
+
if Utils.is_int_sequence(col_selection)
|
543
546
|
series_list = col_selection.map { |i| to_series(i) }
|
544
547
|
df = self.class.new(series_list)
|
545
548
|
return df[row_selection]
|
@@ -553,8 +556,8 @@ module Polars
|
|
553
556
|
|
554
557
|
# select single column
|
555
558
|
# df["foo"]
|
556
|
-
if item.is_a?(String)
|
557
|
-
return Utils.wrap_s(_df.column(item))
|
559
|
+
if item.is_a?(String) || item.is_a?(Symbol)
|
560
|
+
return Utils.wrap_s(_df.column(item.to_s))
|
558
561
|
end
|
559
562
|
|
560
563
|
# df[idx]
|
@@ -572,6 +575,28 @@ module Polars
|
|
572
575
|
# df[["foo", "bar"]]
|
573
576
|
return _from_rbdf(_df.select(item))
|
574
577
|
end
|
578
|
+
|
579
|
+
if Utils.is_int_sequence(item)
|
580
|
+
item = Series.new("", item)
|
581
|
+
end
|
582
|
+
|
583
|
+
if item.is_a?(Series)
|
584
|
+
dtype = item.dtype
|
585
|
+
if dtype == Utf8
|
586
|
+
return _from_rbdf(_df.select(item))
|
587
|
+
elsif dtype == UInt32
|
588
|
+
return _from_rbdf(_df.take_with_series(item._s))
|
589
|
+
elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
|
590
|
+
return _from_rbdf(
|
591
|
+
_df.take_with_series(_pos_idxs(item, 0)._s)
|
592
|
+
)
|
593
|
+
end
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
# Ruby-specific
|
598
|
+
if item.is_a?(Expr)
|
599
|
+
return filter(item)
|
575
600
|
end
|
576
601
|
|
577
602
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
@@ -797,6 +822,13 @@ module Polars
|
|
797
822
|
nil
|
798
823
|
end
|
799
824
|
|
825
|
+
# Write to comma-separated values (CSV) string.
|
826
|
+
#
|
827
|
+
# @return [String]
|
828
|
+
def to_csv(**options)
|
829
|
+
write_csv(**options)
|
830
|
+
end
|
831
|
+
|
800
832
|
# Write to Apache Avro file.
|
801
833
|
#
|
802
834
|
# @param file [String]
|
@@ -4648,8 +4680,53 @@ module Polars
|
|
4648
4680
|
end
|
4649
4681
|
end
|
4650
4682
|
|
4651
|
-
|
4652
|
-
|
4683
|
+
def _pos_idxs(idxs, dim)
|
4684
|
+
idx_type = Polars._get_idx_type
|
4685
|
+
|
4686
|
+
if idxs.is_a?(Series)
|
4687
|
+
if idxs.dtype == idx_type
|
4688
|
+
return idxs
|
4689
|
+
end
|
4690
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4691
|
+
if idx_type == UInt32
|
4692
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
4693
|
+
if idxs.max >= 2**32
|
4694
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
4695
|
+
end
|
4696
|
+
end
|
4697
|
+
if idxs.dtype == Int64
|
4698
|
+
if idxs.min < -(2**32)
|
4699
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
4700
|
+
end
|
4701
|
+
end
|
4702
|
+
end
|
4703
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4704
|
+
if idxs.min < 0
|
4705
|
+
if idx_type == UInt32
|
4706
|
+
if [Int8, Int16].include?(idxs.dtype)
|
4707
|
+
idxs = idxs.cast(Int32)
|
4708
|
+
end
|
4709
|
+
else
|
4710
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
4711
|
+
idxs = idxs.cast(Int64)
|
4712
|
+
end
|
4713
|
+
end
|
4714
|
+
|
4715
|
+
idxs =
|
4716
|
+
Polars.select(
|
4717
|
+
Polars.when(Polars.lit(idxs) < 0)
|
4718
|
+
.then(shape[dim] + Polars.lit(idxs))
|
4719
|
+
.otherwise(Polars.lit(idxs))
|
4720
|
+
).to_series
|
4721
|
+
end
|
4722
|
+
end
|
4723
|
+
|
4724
|
+
return idxs.cast(idx_type)
|
4725
|
+
end
|
4726
|
+
end
|
4727
|
+
|
4728
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
4729
|
+
end
|
4653
4730
|
|
4654
4731
|
# @private
|
4655
4732
|
def self.hash_to_rbdf(data, columns: nil)
|
data/lib/polars/data_types.rb
CHANGED
data/lib/polars/io.rb
CHANGED
@@ -590,8 +590,33 @@ module Polars
|
|
590
590
|
DataFrame._read_ndjson(file)
|
591
591
|
end
|
592
592
|
|
593
|
-
#
|
594
|
-
#
|
593
|
+
# Read a SQL query into a DataFrame.
|
594
|
+
#
|
595
|
+
# @param sql [Object]
|
596
|
+
# ActiveRecord::Relation or ActiveRecord::Result.
|
597
|
+
#
|
598
|
+
# @return [DataFrame]
|
599
|
+
def read_sql(sql)
|
600
|
+
if !defined?(ActiveRecord)
|
601
|
+
raise Error, "Active Record not available"
|
602
|
+
end
|
603
|
+
|
604
|
+
result =
|
605
|
+
if sql.is_a?(ActiveRecord::Result)
|
606
|
+
sql
|
607
|
+
elsif sql.is_a?(ActiveRecord::Relation)
|
608
|
+
sql.connection.select_all(sql.to_sql)
|
609
|
+
elsif sql.is_a?(String)
|
610
|
+
ActiveRecord::Base.connection.select_all(sql)
|
611
|
+
else
|
612
|
+
raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
|
613
|
+
end
|
614
|
+
data = {}
|
615
|
+
result.columns.each_with_index do |k, i|
|
616
|
+
data[k] = result.rows.map { |r| r[i] }
|
617
|
+
end
|
618
|
+
DataFrame.new(data)
|
619
|
+
end
|
595
620
|
|
596
621
|
# def read_excel
|
597
622
|
# end
|
data/lib/polars/series.rb
CHANGED
@@ -263,6 +263,10 @@ module Polars
|
|
263
263
|
#
|
264
264
|
# @return [Object]
|
265
265
|
def [](item)
|
266
|
+
if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
|
267
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
268
|
+
end
|
269
|
+
|
266
270
|
if item.is_a?(Integer)
|
267
271
|
return _s.get_idx(item)
|
268
272
|
end
|
@@ -271,6 +275,10 @@ module Polars
|
|
271
275
|
return Slice.new(self).apply(item)
|
272
276
|
end
|
273
277
|
|
278
|
+
if Utils.is_int_sequence(item)
|
279
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
|
280
|
+
end
|
281
|
+
|
274
282
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
275
283
|
end
|
276
284
|
|
@@ -287,24 +295,23 @@ module Polars
|
|
287
295
|
end
|
288
296
|
|
289
297
|
if key.is_a?(Series)
|
290
|
-
if key.dtype ==
|
298
|
+
if key.dtype == Boolean
|
291
299
|
self._s = set(key, value)._s
|
292
|
-
elsif key.dtype ==
|
293
|
-
self._s = set_at_idx(key.cast(
|
294
|
-
elsif key.dtype ==
|
300
|
+
elsif key.dtype == UInt64
|
301
|
+
self._s = set_at_idx(key.cast(UInt32), value)._s
|
302
|
+
elsif key.dtype == UInt32
|
295
303
|
self._s = set_at_idx(key, value)._s
|
296
304
|
else
|
297
305
|
raise Todo
|
298
306
|
end
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
307
|
+
elsif key.is_a?(Array)
|
308
|
+
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
309
|
+
self[s] = value
|
310
|
+
elsif key.is_a?(Range)
|
311
|
+
s = Series.new("", key, dtype: UInt32)
|
303
312
|
self[s] = value
|
304
313
|
elsif key.is_a?(Integer)
|
305
|
-
|
306
|
-
# self[[key]] = value
|
307
|
-
set_at_idx(key, value)
|
314
|
+
self[[key]] = value
|
308
315
|
else
|
309
316
|
raise ArgumentError, "cannot use #{key} for indexing"
|
310
317
|
end
|
@@ -1647,6 +1654,7 @@ module Polars
|
|
1647
1654
|
def len
|
1648
1655
|
_s.len
|
1649
1656
|
end
|
1657
|
+
alias_method :count, :len
|
1650
1658
|
alias_method :length, :len
|
1651
1659
|
|
1652
1660
|
# Cast between data types.
|
@@ -2183,6 +2191,7 @@ module Polars
|
|
2183
2191
|
def arcsin
|
2184
2192
|
super
|
2185
2193
|
end
|
2194
|
+
alias_method :asin, :arcsin
|
2186
2195
|
|
2187
2196
|
# Compute the element-wise value for the inverse cosine.
|
2188
2197
|
#
|
@@ -2202,6 +2211,7 @@ module Polars
|
|
2202
2211
|
def arccos
|
2203
2212
|
super
|
2204
2213
|
end
|
2214
|
+
alias_method :acos, :arccos
|
2205
2215
|
|
2206
2216
|
# Compute the element-wise value for the inverse tangent.
|
2207
2217
|
#
|
@@ -2221,6 +2231,7 @@ module Polars
|
|
2221
2231
|
def arctan
|
2222
2232
|
super
|
2223
2233
|
end
|
2234
|
+
alias_method :atan, :arctan
|
2224
2235
|
|
2225
2236
|
# Compute the element-wise value for the inverse hyperbolic sine.
|
2226
2237
|
#
|
@@ -2240,6 +2251,7 @@ module Polars
|
|
2240
2251
|
def arcsinh
|
2241
2252
|
super
|
2242
2253
|
end
|
2254
|
+
alias_method :asinh, :arcsinh
|
2243
2255
|
|
2244
2256
|
# Compute the element-wise value for the inverse hyperbolic cosine.
|
2245
2257
|
#
|
@@ -2260,6 +2272,7 @@ module Polars
|
|
2260
2272
|
def arccosh
|
2261
2273
|
super
|
2262
2274
|
end
|
2275
|
+
alias_method :acosh, :arccosh
|
2263
2276
|
|
2264
2277
|
# Compute the element-wise value for the inverse hyperbolic tangent.
|
2265
2278
|
#
|
@@ -2283,6 +2296,7 @@ module Polars
|
|
2283
2296
|
def arctanh
|
2284
2297
|
super
|
2285
2298
|
end
|
2299
|
+
alias_method :atanh, :arctanh
|
2286
2300
|
|
2287
2301
|
# Compute the element-wise value for the hyperbolic sine.
|
2288
2302
|
#
|
@@ -3520,6 +3534,59 @@ module Polars
|
|
3520
3534
|
end
|
3521
3535
|
end
|
3522
3536
|
|
3537
|
+
def _pos_idxs(idxs)
|
3538
|
+
idx_type = Polars._get_idx_type
|
3539
|
+
|
3540
|
+
if idxs.is_a?(Series)
|
3541
|
+
if idxs.dtype == idx_type
|
3542
|
+
return idxs
|
3543
|
+
end
|
3544
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3545
|
+
if idx_type == UInt32
|
3546
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
3547
|
+
if idxs.max >= 2**32
|
3548
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
3549
|
+
end
|
3550
|
+
end
|
3551
|
+
if idxs.dtype == Int64
|
3552
|
+
if idxs.min < -(2**32)
|
3553
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
3554
|
+
end
|
3555
|
+
end
|
3556
|
+
end
|
3557
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3558
|
+
if idxs.min < 0
|
3559
|
+
if idx_type == UInt32
|
3560
|
+
if [Int8, Int16].include?(idxs.dtype)
|
3561
|
+
idxs = idxs.cast(Int32)
|
3562
|
+
end
|
3563
|
+
else
|
3564
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
3565
|
+
idxs = idxs.cast(Int64)
|
3566
|
+
end
|
3567
|
+
end
|
3568
|
+
|
3569
|
+
# Update negative indexes to absolute indexes.
|
3570
|
+
return (
|
3571
|
+
idxs.to_frame
|
3572
|
+
.select(
|
3573
|
+
Polars.when(Polars.col(idxs.name) < 0)
|
3574
|
+
.then(len + Polars.col(idxs.name))
|
3575
|
+
.otherwise(Polars.col(idxs.name))
|
3576
|
+
.cast(idx_type)
|
3577
|
+
)
|
3578
|
+
.to_series(0)
|
3579
|
+
)
|
3580
|
+
end
|
3581
|
+
end
|
3582
|
+
|
3583
|
+
return idxs.cast(idx_type)
|
3584
|
+
end
|
3585
|
+
end
|
3586
|
+
|
3587
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
3588
|
+
end
|
3589
|
+
|
3523
3590
|
def _comp(other, op)
|
3524
3591
|
if other.is_a?(Series)
|
3525
3592
|
return Utils.wrap_s(_s.send(op, other._s))
|
data/lib/polars/slice.rb
CHANGED
@@ -56,7 +56,7 @@ module Polars
|
|
56
56
|
# Normalize slice bounds, identify unbounded and/or zero-length slices.
|
57
57
|
def _slice_setup(s)
|
58
58
|
# can normalize slice indices as we know object size
|
59
|
-
obj_len = @obj.
|
59
|
+
obj_len = @obj.length
|
60
60
|
start = if s.begin
|
61
61
|
if s.begin < 0
|
62
62
|
[s.begin + obj_len, 0].max
|
data/lib/polars/utils.rb
CHANGED
@@ -70,7 +70,7 @@ module Polars
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def self.selection_to_rbexpr_list(exprs)
|
73
|
-
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
73
|
+
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
74
|
exprs = [exprs]
|
75
75
|
end
|
76
76
|
|
@@ -78,9 +78,9 @@ module Polars
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
81
|
-
if expr.is_a?(String) && !str_to_lit
|
81
|
+
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
82
82
|
col(expr)
|
83
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
|
83
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
84
84
|
lit(expr)
|
85
85
|
elsif expr.is_a?(Expr)
|
86
86
|
expr
|
@@ -181,6 +181,26 @@ module Polars
|
|
181
181
|
val.all? { |x| x.is_a?(eltype) }
|
182
182
|
end
|
183
183
|
|
184
|
+
def self.is_bool_sequence(val)
|
185
|
+
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
186
|
+
end
|
187
|
+
|
188
|
+
def self.is_dtype_sequence(val)
|
189
|
+
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.is_int_sequence(val)
|
193
|
+
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.is_expr_sequence(val)
|
197
|
+
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
198
|
+
end
|
199
|
+
|
200
|
+
def self.is_rbexpr_sequence(val)
|
201
|
+
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
202
|
+
end
|
203
|
+
|
184
204
|
def self.is_str_sequence(val, allow_str: false)
|
185
205
|
if allow_str == false && val.is_a?(String)
|
186
206
|
false
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|