polars-df 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca8491538991cd918a2706c572c43994ba00558937636d7d90bb98b51f27cb76
4
- data.tar.gz: a8d6667309db4c17c807d0b2a398c6acdbcb2a9c4282d07a313f0de57d10b6b7
3
+ metadata.gz: df03134e7edf09e86b5a4f4f9ae9a926bac4c9c0804a29c3422c32675f478825
4
+ data.tar.gz: e0338be1aa96d0ad082ebf8fe27e608b2906b243dd49fa837aceb7f8186947d8
5
5
  SHA512:
6
- metadata.gz: b26a1722981f2616f979d8a46590cba04d3fd4f420f95923693fd3796787153bc5197b3308ee93b83a06a14cc4bcbba52f9c3046f9f366e6599c987dc8d0fd4c
7
- data.tar.gz: 0da65a6f3543d500a4d62e2d5816043060156d6b94f02750d50036ecc406adbaa037e3ecefe5d23e667ce814db6580fc8336e8a4f52beceda85551c6dd4a0c96
6
+ metadata.gz: 75a139d30f9fdebaa84a21fa45cec8a199da76eb295e7099ceb849646a93fbc7ed80ffed18aaa8eb7bbfc53a32792b2e47101485ad31d727a47ed67d8d7e8110
7
+ data.tar.gz: 589f7fbc1300aadc05568308700f6a94b934e63c40bd1be0a3e7b6f564c0d55f256e2e45e926c128d80453d0e7d200b057f640b02cd6fb9aaddf5bf55dd89754
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.2.2 (2023-01-20)
2
+
3
+ - Added support for strings to `read_sql` method
4
+ - Improved indexing
5
+ - Fixed error with precompiled gem on Mac ARM
6
+
7
+ ## 0.2.1 (2023-01-18)
8
+
9
+ - Added `read_sql` method
10
+ - Added `to_csv` method
11
+ - Added support for symbol keys
12
+
1
13
  ## 0.2.0 (2023-01-14)
2
14
 
3
15
  - Updated Polars to 0.26.1
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.0"
1370
+ version = "0.2.2"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
+
30
+ ## Reference
31
+
32
+ - [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
33
+ - [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
34
+ - [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
29
35
 
30
36
  ## Examples
31
37
 
@@ -35,6 +41,9 @@ From a CSV
35
41
 
36
42
  ```ruby
37
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
38
47
  ```
39
48
 
40
49
  From Parquet
@@ -46,7 +55,7 @@ Polars.read_parquet("file.parquet")
46
55
  From Active Record
47
56
 
48
57
  ```ruby
49
- Polars::DataFrame.new(User.all)
58
+ Polars.read_sql(User.all)
50
59
  ```
51
60
 
52
61
  From a hash
@@ -67,6 +76,261 @@ Polars::DataFrame.new([
67
76
  ])
68
77
  ```
69
78
 
79
+ ## Attributes
80
+
81
+ Get number of rows
82
+
83
+ ```ruby
84
+ df.height
85
+ ```
86
+
87
+ Get column names
88
+
89
+ ```ruby
90
+ df.columns
91
+ ```
92
+
93
+ Check if a column exists
94
+
95
+ ```ruby
96
+ df.include?(name)
97
+ ```
98
+
99
+ ## Selecting Data
100
+
101
+ Select a column
102
+
103
+ ```ruby
104
+ df["a"]
105
+ ```
106
+
107
+ Select multiple columns
108
+
109
+ ```ruby
110
+ df[["a", "b"]]
111
+ ```
112
+
113
+ Select first rows
114
+
115
+ ```ruby
116
+ df.head
117
+ ```
118
+
119
+ Select last rows
120
+
121
+ ```ruby
122
+ df.tail
123
+ ```
124
+
125
+ ## Filtering
126
+
127
+ Filter on a condition
128
+
129
+ ```ruby
130
+ df[Polars.col("a") == 2]
131
+ df[Polars.col("a") != 2]
132
+ df[Polars.col("a") > 2]
133
+ df[Polars.col("a") >= 2]
134
+ df[Polars.col("a") < 2]
135
+ df[Polars.col("a") <= 2]
136
+ ```
137
+
138
+ And, or, and exclusive or
139
+
140
+ ```ruby
141
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
142
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
143
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
144
+ ```
145
+
146
+ ## Operations
147
+
148
+ Basic operations
149
+
150
+ ```ruby
151
+ df["a"] + 5
152
+ df["a"] - 5
153
+ df["a"] * 5
154
+ df["a"] / 5
155
+ df["a"] % 5
156
+ df["a"] ** 2
157
+ df["a"].sqrt
158
+ df["a"].abs
159
+ ```
160
+
161
+ Rounding
162
+
163
+ ```ruby
164
+ df["a"].round(2)
165
+ df["a"].ceil
166
+ df["a"].floor
167
+ ```
168
+
169
+ Logarithm
170
+
171
+ ```ruby
172
+ df["a"].log # natural log
173
+ df["a"].log(10)
174
+ ```
175
+
176
+ Exponentiation
177
+
178
+ ```ruby
179
+ df["a"].exp
180
+ ```
181
+
182
+ Trigonometric functions
183
+
184
+ ```ruby
185
+ df["a"].sin
186
+ df["a"].cos
187
+ df["a"].tan
188
+ df["a"].asin
189
+ df["a"].acos
190
+ df["a"].atan
191
+ ```
192
+
193
+ Hyperbolic functions
194
+
195
+ ```ruby
196
+ df["a"].sinh
197
+ df["a"].cosh
198
+ df["a"].tanh
199
+ df["a"].asinh
200
+ df["a"].acosh
201
+ df["a"].atanh
202
+ ```
203
+
204
+ Summary statistics
205
+
206
+ ```ruby
207
+ df["a"].sum
208
+ df["a"].mean
209
+ df["a"].median
210
+ df["a"].quantile(0.90)
211
+ df["a"].min
212
+ df["a"].max
213
+ df["a"].std
214
+ df["a"].var
215
+ ```
216
+
217
+ ## Grouping
218
+
219
+ Group
220
+
221
+ ```ruby
222
+ df.groupby("a").count
223
+ ```
224
+
225
+ Works with all summary statistics
226
+
227
+ ```ruby
228
+ df.groupby("a").max
229
+ ```
230
+
231
+ Multiple groups
232
+
233
+ ```ruby
234
+ df.groupby(["a", "b"]).count
235
+ ```
236
+
237
+ ## Combining Data Frames
238
+
239
+ Add rows
240
+
241
+ ```ruby
242
+ df.vstack(other_df)
243
+ ```
244
+
245
+ Add columns
246
+
247
+ ```ruby
248
+ df.hstack(other_df)
249
+ ```
250
+
251
+ Inner join
252
+
253
+ ```ruby
254
+ df.join(other_df, on: "a")
255
+ ```
256
+
257
+ Left join
258
+
259
+ ```ruby
260
+ df.join(other_df, on: "a", how: "left")
261
+ ```
262
+
263
+ ## Encoding
264
+
265
+ One-hot encoding
266
+
267
+ ```ruby
268
+ df.to_dummies
269
+ ```
270
+
271
+ ## Conversion
272
+
273
+ Array of rows
274
+
275
+ ```ruby
276
+ df.rows
277
+ ```
278
+
279
+ Hash of series
280
+
281
+ ```ruby
282
+ df.to_h
283
+ ```
284
+
285
+ CSV
286
+
287
+ ```ruby
288
+ df.to_csv
289
+ # or
290
+ df.write_csv("data.csv")
291
+ ```
292
+
293
+ Parquet
294
+
295
+ ```ruby
296
+ df.write_parquet("data.parquet")
297
+ ```
298
+
299
+ ## Types
300
+
301
+ You can specify column types when creating a data frame
302
+
303
+ ```ruby
304
+ Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
305
+ ```
306
+
307
+ Supported types are:
308
+
309
+ - boolean - `Boolean`
310
+ - float - `Float64`, `Float32`
311
+ - integer - `Int64`, `Int32`, `Int16`, `Int8`
312
+ - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
313
+ - string - `Utf8`, `Categorical`
314
+ - temporal - `Date`, `Datetime`, `Time`, `Duration`
315
+
316
+ Get column types
317
+
318
+ ```ruby
319
+ df.schema
320
+ ```
321
+
322
+ For a specific column
323
+
324
+ ```ruby
325
+ df["a"].dtype
326
+ ```
327
+
328
+ Cast a column
329
+
330
+ ```ruby
331
+ df["a"].cast(Polars::Int32)
332
+ ```
333
+
70
334
  ## History
71
335
 
72
336
  View the [changelog](CHANGELOG.md)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -22,7 +22,7 @@ use magnus::{
22
22
  define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
23
23
  Value,
24
24
  };
25
- use polars::datatypes::{DataType, TimeUnit};
25
+ use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
26
26
  use polars::error::PolarsResult;
27
27
  use polars::frame::DataFrame;
28
28
  use polars::functions::{diag_concat_df, hor_concat_df};
@@ -71,6 +71,7 @@ fn init() -> RbResult<()> {
71
71
  module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
72
72
  module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
73
73
  module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
74
+ module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
74
75
 
75
76
  let class = module.define_class("RbBatchedCsv", Default::default())?;
76
77
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -988,3 +989,7 @@ fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
988
989
  fn arg_where(condition: &RbExpr) -> RbExpr {
989
990
  polars::lazy::dsl::arg_where(condition.inner.clone()).into()
990
991
  }
992
+
993
+ fn get_idx_type() -> Value {
994
+ Wrap(IDX_DTYPE).into()
995
+ }
@@ -17,6 +17,7 @@ module Polars
17
17
  # the orientation is inferred by matching the columns and data dimensions. If
18
18
  # this does not yield conclusive results, column orientation is used.
19
19
  def initialize(data = nil, columns: nil, orient: nil)
20
+ # TODO deprecate in favor of read_sql
20
21
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
21
22
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
22
23
  data = {}
@@ -275,6 +276,8 @@ module Polars
275
276
  def height
276
277
  _df.height
277
278
  end
279
+ alias_method :count, :height
280
+ alias_method :length, :height
278
281
 
279
282
  # Get the width of the DataFrame.
280
283
  #
@@ -521,13 +524,13 @@ module Polars
521
524
  return df.slice(row_selection, 1)
522
525
  end
523
526
  # df[2, "a"]
524
- if col_selection.is_a?(String)
527
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
525
528
  return self[col_selection][row_selection]
526
529
  end
527
530
  end
528
531
 
529
532
  # column selection can be "a" and ["a", "b"]
530
- if col_selection.is_a?(String)
533
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
531
534
  col_selection = [col_selection]
532
535
  end
533
536
 
@@ -539,7 +542,7 @@ module Polars
539
542
 
540
543
  if col_selection.is_a?(Array)
541
544
  # df[.., [1, 2]]
542
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
543
546
  series_list = col_selection.map { |i| to_series(i) }
544
547
  df = self.class.new(series_list)
545
548
  return df[row_selection]
@@ -553,8 +556,8 @@ module Polars
553
556
 
554
557
  # select single column
555
558
  # df["foo"]
556
- if item.is_a?(String)
557
- return Utils.wrap_s(_df.column(item))
559
+ if item.is_a?(String) || item.is_a?(Symbol)
560
+ return Utils.wrap_s(_df.column(item.to_s))
558
561
  end
559
562
 
560
563
  # df[idx]
@@ -572,6 +575,28 @@ module Polars
572
575
  # df[["foo", "bar"]]
573
576
  return _from_rbdf(_df.select(item))
574
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
595
+ end
596
+
597
+ # Ruby-specific
598
+ if item.is_a?(Expr)
599
+ return filter(item)
575
600
  end
576
601
 
577
602
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
@@ -797,6 +822,13 @@ module Polars
797
822
  nil
798
823
  end
799
824
 
825
+ # Write to comma-separated values (CSV) string.
826
+ #
827
+ # @return [String]
828
+ def to_csv(**options)
829
+ write_csv(**options)
830
+ end
831
+
800
832
  # Write to Apache Avro file.
801
833
  #
802
834
  # @param file [String]
@@ -4648,8 +4680,53 @@ module Polars
4648
4680
  end
4649
4681
  end
4650
4682
 
4651
- # def _pos_idxs
4652
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4653
4730
 
4654
4731
  # @private
4655
4732
  def self.hash_to_rbdf(data, columns: nil)
@@ -93,7 +93,7 @@ module Polars
93
93
  class Time < DataType
94
94
  end
95
95
 
96
- # Type for wrapping arbitrary Python objects.
96
+ # Type for wrapping arbitrary Ruby objects.
97
97
  class Object < DataType
98
98
  end
99
99
 
data/lib/polars/io.rb CHANGED
@@ -590,8 +590,33 @@ module Polars
590
590
  DataFrame._read_ndjson(file)
591
591
  end
592
592
 
593
- # def read_sql
594
- # end
593
+ # Read a SQL query into a DataFrame.
594
+ #
595
+ # @param sql [Object]
596
+ # ActiveRecord::Relation or ActiveRecord::Result.
597
+ #
598
+ # @return [DataFrame]
599
+ def read_sql(sql)
600
+ if !defined?(ActiveRecord)
601
+ raise Error, "Active Record not available"
602
+ end
603
+
604
+ result =
605
+ if sql.is_a?(ActiveRecord::Result)
606
+ sql
607
+ elsif sql.is_a?(ActiveRecord::Relation)
608
+ sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
611
+ else
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
613
+ end
614
+ data = {}
615
+ result.columns.each_with_index do |k, i|
616
+ data[k] = result.rows.map { |r| r[i] }
617
+ end
618
+ DataFrame.new(data)
619
+ end
595
620
 
596
621
  # def read_excel
597
622
  # end
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -1647,6 +1654,7 @@ module Polars
1647
1654
  def len
1648
1655
  _s.len
1649
1656
  end
1657
+ alias_method :count, :len
1650
1658
  alias_method :length, :len
1651
1659
 
1652
1660
  # Cast between data types.
@@ -2183,6 +2191,7 @@ module Polars
2183
2191
  def arcsin
2184
2192
  super
2185
2193
  end
2194
+ alias_method :asin, :arcsin
2186
2195
 
2187
2196
  # Compute the element-wise value for the inverse cosine.
2188
2197
  #
@@ -2202,6 +2211,7 @@ module Polars
2202
2211
  def arccos
2203
2212
  super
2204
2213
  end
2214
+ alias_method :acos, :arccos
2205
2215
 
2206
2216
  # Compute the element-wise value for the inverse tangent.
2207
2217
  #
@@ -2221,6 +2231,7 @@ module Polars
2221
2231
  def arctan
2222
2232
  super
2223
2233
  end
2234
+ alias_method :atan, :arctan
2224
2235
 
2225
2236
  # Compute the element-wise value for the inverse hyperbolic sine.
2226
2237
  #
@@ -2240,6 +2251,7 @@ module Polars
2240
2251
  def arcsinh
2241
2252
  super
2242
2253
  end
2254
+ alias_method :asinh, :arcsinh
2243
2255
 
2244
2256
  # Compute the element-wise value for the inverse hyperbolic cosine.
2245
2257
  #
@@ -2260,6 +2272,7 @@ module Polars
2260
2272
  def arccosh
2261
2273
  super
2262
2274
  end
2275
+ alias_method :acosh, :arccosh
2263
2276
 
2264
2277
  # Compute the element-wise value for the inverse hyperbolic tangent.
2265
2278
  #
@@ -2283,6 +2296,7 @@ module Polars
2283
2296
  def arctanh
2284
2297
  super
2285
2298
  end
2299
+ alias_method :atanh, :arctanh
2286
2300
 
2287
2301
  # Compute the element-wise value for the hyperbolic sine.
2288
2302
  #
@@ -3520,6 +3534,59 @@ module Polars
3520
3534
  end
3521
3535
  end
3522
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3523
3590
  def _comp(other, op)
3524
3591
  if other.is_a?(Series)
3525
3592
  return Utils.wrap_s(_s.send(op, other._s))
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -70,7 +70,7 @@ module Polars
70
70
  end
71
71
 
72
72
  def self.selection_to_rbexpr_list(exprs)
73
- if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
73
+ if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
74
  exprs = [exprs]
75
75
  end
76
76
 
@@ -78,9 +78,9 @@ module Polars
78
78
  end
79
79
 
80
80
  def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
- if expr.is_a?(String) && !str_to_lit
81
+ if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
82
82
  col(expr)
83
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
84
84
  lit(expr)
85
85
  elsif expr.is_a?(Expr)
86
86
  expr
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.2"
4
4
  end
data/lib/polars.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
+ require "stringio"
10
11
 
11
12
  # modules
12
13
  require_relative "polars/expr_dispatch"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-15 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys