polars-df 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a06a5c0bdca450318f76aaf5e810795e663d07ccf58d94e903307abef87a964
4
- data.tar.gz: 2589b519a564b46acc2bffb8c15c8f0abadd24ef374c1e2746b6711faf3e0c48
3
+ metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
4
+ data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
5
5
  SHA512:
6
- metadata.gz: bdf428c391a3d31b98021a080194b7e4dfb34266a0e7634d3676e903e732f3328123b3711c9b36a037a97b1ed18df2b129cf2d980703ec2d39cc6dfae2279eac
7
- data.tar.gz: '0749f2c7fe1f5fb7b10954ae16db47b15e54a1d042c91aae8dcdf3515f5db00ed68a1e82fa9a46546af2f4051356dac1dfb886dc7ac0852e58e92b68d09983ef'
6
+ metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
7
+ data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.2 (2023-01-20)
2
+
3
+ - Added support for strings to `read_sql` method
4
+ - Improved indexing
5
+ - Fixed error with precompiled gem on Mac ARM
6
+
1
7
  ## 0.2.1 (2023-01-18)
2
8
 
3
9
  - Added `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.1"
1370
+ version = "0.2.2"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -41,6 +41,9 @@ From a CSV
41
41
 
42
42
  ```ruby
43
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
44
47
  ```
45
48
 
46
49
  From Parquet
@@ -135,9 +138,9 @@ df[Polars.col("a") <= 2]
135
138
  And, or, and exclusive or
136
139
 
137
140
  ```ruby
138
- df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
- df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
- df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
141
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
142
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
143
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
141
144
  ```
142
145
 
143
146
  ## Operations
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -22,7 +22,7 @@ use magnus::{
22
22
  define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
23
23
  Value,
24
24
  };
25
- use polars::datatypes::{DataType, TimeUnit};
25
+ use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
@@ -71,6 +71,7 @@ fn init() -> RbResult<()> {
71
71
  module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
72
72
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
73
73
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
74
+ module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
74
75
 
75
76
  let class = module.define_class("RbBatchedCsv", Default::default())?;
76
77
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -988,3 +989,7 @@ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
988
989
  fn arg_where(condition: &RbExpr) -> RbExpr {
989
990
  polars::lazy::dsl::arg_where(condition.inner.clone()).into()
990
991
  }
992
+
993
+ fn get_idx_type() -> Value {
994
+ Wrap(IDX_DTYPE).into()
995
+ }
@@ -277,6 +277,7 @@ module Polars
277
277
  _df.height
278
278
  end
279
279
  alias_method :count, :height
280
+ alias_method :length, :height
280
281
 
281
282
  # Get the width of the DataFrame.
282
283
  #
@@ -541,7 +542,7 @@ module Polars
541
542
 
542
543
  if col_selection.is_a?(Array)
543
544
  # df[.., [1, 2]]
544
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
545
546
  series_list = col_selection.map { |i| to_series(i) }
546
547
  df = self.class.new(series_list)
547
548
  return df[row_selection]
@@ -574,6 +575,23 @@ module Polars
574
575
  # df[["foo", "bar"]]
575
576
  return _from_rbdf(_df.select(item))
576
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
577
595
  end
578
596
 
579
597
  # Ruby-specific
@@ -4662,8 +4680,53 @@ module Polars
4662
4680
  end
4663
4681
  end
4664
4682
 
4665
- # def _pos_idxs
4666
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4667
4730
 
4668
4731
  # @private
4669
4732
  def self.hash_to_rbdf(data, columns: nil)
data/lib/polars/io.rb CHANGED
@@ -606,8 +606,10 @@ module Polars
606
606
  sql
607
607
  elsif sql.is_a?(ActiveRecord::Relation)
608
608
  sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
609
611
  else
610
- raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
611
613
  end
612
614
  data = {}
613
615
  result.columns.each_with_index do |k, i|
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -3527,6 +3534,59 @@ module Polars
3527
3534
  end
3528
3535
  end
3529
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3530
3590
  def _comp(other, op)
3531
3591
  if other.is_a?(Series)
3532
3592
  return Utils.wrap_s(_s.send(op, other._s))
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.2"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-18 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys