polars-df 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a06a5c0bdca450318f76aaf5e810795e663d07ccf58d94e903307abef87a964
4
- data.tar.gz: 2589b519a564b46acc2bffb8c15c8f0abadd24ef374c1e2746b6711faf3e0c48
3
+ metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
4
+ data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
5
5
  SHA512:
6
- metadata.gz: bdf428c391a3d31b98021a080194b7e4dfb34266a0e7634d3676e903e732f3328123b3711c9b36a037a97b1ed18df2b129cf2d980703ec2d39cc6dfae2279eac
7
- data.tar.gz: '0749f2c7fe1f5fb7b10954ae16db47b15e54a1d042c91aae8dcdf3515f5db00ed68a1e82fa9a46546af2f4051356dac1dfb886dc7ac0852e58e92b68d09983ef'
6
+ metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
7
+ data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.2 (2023-01-20)
2
+
3
+ - Added support for strings to `read_sql` method
4
+ - Improved indexing
5
+ - Fixed error with precompiled gem on Mac ARM
6
+
1
7
  ## 0.2.1 (2023-01-18)
2
8
 
3
9
  - Added `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.1"
1370
+ version = "0.2.2"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -41,6 +41,9 @@ From a CSV
41
41
 
42
42
  ```ruby
43
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
44
47
  ```
45
48
 
46
49
  From Parquet
@@ -135,9 +138,9 @@ df[Polars.col("a") <= 2]
135
138
  And, or, and exclusive or
136
139
 
137
140
  ```ruby
138
- df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
- df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
- df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
141
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
142
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
143
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
141
144
  ```
142
145
 
143
146
  ## Operations
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -22,7 +22,7 @@ use magnus::{
22
22
  define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
23
23
  Value,
24
24
  };
25
- use polars::datatypes::{DataType, TimeUnit};
25
+ use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
@@ -71,6 +71,7 @@ fn init() -> RbResult<()> {
71
71
  module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
72
72
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
73
73
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
74
+ module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
74
75
 
75
76
  let class = module.define_class("RbBatchedCsv", Default::default())?;
76
77
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -988,3 +989,7 @@ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
988
989
  fn arg_where(condition: &RbExpr) -> RbExpr {
989
990
  polars::lazy::dsl::arg_where(condition.inner.clone()).into()
990
991
  }
992
+
993
+ fn get_idx_type() -> Value {
994
+ Wrap(IDX_DTYPE).into()
995
+ }
@@ -277,6 +277,7 @@ module Polars
277
277
  _df.height
278
278
  end
279
279
  alias_method :count, :height
280
+ alias_method :length, :height
280
281
 
281
282
  # Get the width of the DataFrame.
282
283
  #
@@ -541,7 +542,7 @@ module Polars
541
542
 
542
543
  if col_selection.is_a?(Array)
543
544
  # df[.., [1, 2]]
544
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
545
546
  series_list = col_selection.map { |i| to_series(i) }
546
547
  df = self.class.new(series_list)
547
548
  return df[row_selection]
@@ -574,6 +575,23 @@ module Polars
574
575
  # df[["foo", "bar"]]
575
576
  return _from_rbdf(_df.select(item))
576
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
577
595
  end
578
596
 
579
597
  # Ruby-specific
@@ -4662,8 +4680,53 @@ module Polars
4662
4680
  end
4663
4681
  end
4664
4682
 
4665
- # def _pos_idxs
4666
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4667
4730
 
4668
4731
  # @private
4669
4732
  def self.hash_to_rbdf(data, columns: nil)
data/lib/polars/io.rb CHANGED
@@ -606,8 +606,10 @@ module Polars
606
606
  sql
607
607
  elsif sql.is_a?(ActiveRecord::Relation)
608
608
  sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
609
611
  else
610
- raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
611
613
  end
612
614
  data = {}
613
615
  result.columns.each_with_index do |k, i|
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -3527,6 +3534,59 @@ module Polars
3527
3534
  end
3528
3535
  end
3529
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3530
3590
  def _comp(other, op)
3531
3591
  if other.is_a?(Series)
3532
3592
  return Utils.wrap_s(_s.send(op, other._s))
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.2"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-18 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys