polars-df 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/series.rb CHANGED
@@ -23,7 +23,7 @@ module Polars
23
23
  # @example Constructing a Series by specifying name and values positionally:
24
24
  # s = Polars::Series.new("a", [1, 2, 3])
25
25
  #
26
- # @example Notice that the dtype is automatically inferred as a polars Int64:
26
+ # @example Notice that the dtype is automatically inferred as a polars `:i64`:
27
27
  # s.dtype
28
28
  # # => :i64
29
29
  #
@@ -45,6 +45,11 @@ module Polars
45
45
 
46
46
  name = "" if name.nil?
47
47
 
48
+ # TODO improve
49
+ if values.is_a?(Range) && values.begin.is_a?(String)
50
+ values = values.to_a
51
+ end
52
+
48
53
  if values.nil?
49
54
  self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
50
55
  elsif values.is_a?(Series)
@@ -1668,8 +1673,32 @@ module Polars
1668
1673
  super
1669
1674
  end
1670
1675
 
1671
- # def to_physical
1672
- # end
1676
+ # Cast to physical representation of the logical dtype.
1677
+ #
1678
+ # - `:date` -> `:i32`
1679
+ # - `:datetime` -> `:i64`
1680
+ # - `:time` -> `:i64`
1681
+ # - `:duration` -> `:i64`
1682
+ # - `:cat` -> `:u32`
1683
+ # - other data types will be left unchanged.
1684
+ #
1685
+ # @return [Series]
1686
+ #
1687
+ # @example
1688
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
1689
+ # s.cast(:cat).to_physical
1690
+ # # =>
1691
+ # # shape: (4,)
1692
+ # # Series: 'values' [u32]
1693
+ # # [
1694
+ # # 0
1695
+ # # null
1696
+ # # 1
1697
+ # # 0
1698
+ # # ]
1699
+ def to_physical
1700
+ super
1701
+ end
1673
1702
 
1674
1703
  # Convert this Series to a Ruby Array. This operation clones data.
1675
1704
  #
@@ -1785,8 +1814,34 @@ module Polars
1785
1814
  # def to_numo
1786
1815
  # end
1787
1816
 
1788
- # def set
1789
- # end
1817
+ # Set masked values.
1818
+ #
1819
+ # @param filter [Series]
1820
+ # Boolean mask.
1821
+ # @param value [Object]
1822
+ # Value with which to replace the masked values.
1823
+ #
1824
+ # @return [Series]
1825
+ #
1826
+ # @note
1827
+ # Use of this function is frequently an anti-pattern, as it can
1828
+ # block optimization (predicate pushdown, etc). Consider using
1829
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
1830
+ #
1831
+ # @example
1832
+ # s = Polars::Series.new("a", [1, 2, 3])
1833
+ # s.set(s == 2, 10)
1834
+ # # =>
1835
+ # # shape: (3,)
1836
+ # # Series: 'a' [i64]
1837
+ # # [
1838
+ # # 1
1839
+ # # 10
1840
+ # # 3
1841
+ # # ]
1842
+ def set(filter, value)
1843
+ Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
1844
+ end
1790
1845
 
1791
1846
  # Set values at the index locations.
1792
1847
  #
@@ -2286,8 +2341,41 @@ module Polars
2286
2341
  super
2287
2342
  end
2288
2343
 
2289
- # def apply
2290
- # end
2344
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
2345
+ # return a new Series.
2346
+ #
2347
+ # If the function returns another datatype, the return_dtype arg should be set,
2348
+ # otherwise the method will fail.
2349
+ #
2350
+ # @param return_dtype [Symbol]
2351
+ # Output datatype. If none is given, the same datatype as this Series will be
2352
+ # used.
2353
+ # @param skip_nulls [Boolean]
2354
+ # Nulls will be skipped and not passed to the Ruby function.
2355
+ # This is faster because Ruby can be skipped and because we call
2356
+ # more specialized functions.
2357
+ #
2358
+ # @return [Series]
2359
+ #
2360
+ # @example
2361
+ # s = Polars::Series.new("a", [1, 2, 3])
2362
+ # s.apply { |x| x + 10 }
2363
+ # # =>
2364
+ # # shape: (3,)
2365
+ # # Series: 'a' [i64]
2366
+ # # [
2367
+ # # 11
2368
+ # # 12
2369
+ # # 13
2370
+ # # ]
2371
+ def apply(return_dtype: nil, skip_nulls: true, &func)
2372
+ if return_dtype.nil?
2373
+ pl_return_dtype = nil
2374
+ else
2375
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
2376
+ end
2377
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
2378
+ end
2291
2379
 
2292
2380
  # Shift the values by a given period.
2293
2381
  #
@@ -2953,8 +3041,35 @@ module Polars
2953
3041
  end
2954
3042
  end
2955
3043
 
2956
- # def _hash
2957
- # end
3044
+ # Hash the Series.
3045
+ #
3046
+ # The hash value is of type `:u64`.
3047
+ #
3048
+ # @param seed [Integer]
3049
+ # Random seed parameter. Defaults to 0.
3050
+ # @param seed_1 [Integer]
3051
+ # Random seed parameter. Defaults to `seed` if not set.
3052
+ # @param seed_2 [Integer]
3053
+ # Random seed parameter. Defaults to `seed` if not set.
3054
+ # @param seed_3 [Integer]
3055
+ # Random seed parameter. Defaults to `seed` if not set.
3056
+ #
3057
+ # @return [Series]
3058
+ #
3059
+ # @example
3060
+ # s = Polars::Series.new("a", [1, 2, 3])
3061
+ # s._hash(42)
3062
+ # # =>
3063
+ # # shape: (3,)
3064
+ # # Series: 'a' [u64]
3065
+ # # [
3066
+ # # 2374023516666777365
3067
+ # # 10386026231460783898
3068
+ # # 17796317186427479491
3069
+ # # ]
3070
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
3071
+ super
3072
+ end
2958
3073
 
2959
3074
  # Reinterpret the underlying bits as a signed/unsigned integer.
2960
3075
  #
@@ -3424,7 +3539,14 @@ module Polars
3424
3539
  return Utils.wrap_s(_s.send(op, other._s))
3425
3540
  end
3426
3541
 
3427
- raise Todo
3542
+ if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
3543
+ raise Todo
3544
+ end
3545
+ if other.is_a?(Float) && !is_float
3546
+ raise Todo
3547
+ end
3548
+
3549
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
3428
3550
  end
3429
3551
 
3430
3552
  def series_to_rbseries(name, values)
data/lib/polars/utils.rb CHANGED
@@ -11,6 +11,10 @@ module Polars
11
11
  DataFrame._from_rbdf(df)
12
12
  end
13
13
 
14
+ def self.wrap_ldf(ldf)
15
+ LazyFrame._from_rbldf(ldf)
16
+ end
17
+
14
18
  def self.wrap_expr(rbexpr)
15
19
  Expr._from_rbexpr(rbexpr)
16
20
  end
@@ -171,5 +175,17 @@ module Polars
171
175
  def self.bool?(value)
172
176
  value == true || value == false
173
177
  end
178
+
179
+ def self._is_iterable_of(val, eltype)
180
+ val.all? { |x| x.is_a?(eltype) }
181
+ end
182
+
183
+ def self.is_str_sequence(val, allow_str: false)
184
+ if allow_str == false && val.is_a?(String)
185
+ false
186
+ else
187
+ val.is_a?(Array) && _is_iterable_of(val, String)
188
+ end
189
+ end
174
190
  end
175
191
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.1.4"
3
+ VERSION = "0.1.5"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,5 +1,9 @@
1
1
  # ext
2
- require "polars/polars"
2
+ begin
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
+ rescue LoadError
5
+ require "polars/polars"
6
+ end
3
7
 
4
8
  # stdlib
5
9
  require "date"
@@ -9,9 +13,11 @@ require "polars/expr_dispatch"
9
13
  require "polars/batched_csv_reader"
10
14
  require "polars/cat_expr"
11
15
  require "polars/cat_name_space"
16
+ require "polars/convert"
12
17
  require "polars/data_frame"
13
18
  require "polars/date_time_expr"
14
19
  require "polars/date_time_name_space"
20
+ require "polars/dynamic_group_by"
15
21
  require "polars/exceptions"
16
22
  require "polars/expr"
17
23
  require "polars/functions"
@@ -23,6 +29,7 @@ require "polars/lazy_group_by"
23
29
  require "polars/list_expr"
24
30
  require "polars/list_name_space"
25
31
  require "polars/meta_expr"
32
+ require "polars/rolling_group_by"
26
33
  require "polars/series"
27
34
  require "polars/slice"
28
35
  require "polars/string_expr"
@@ -35,6 +42,7 @@ require "polars/when"
35
42
  require "polars/when_then"
36
43
 
37
44
  module Polars
45
+ extend Convert
38
46
  extend Functions
39
47
  extend IO
40
48
  extend LazyFunctions
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-02 00:00:00.000000000 Z
11
+ date: 2022-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -39,6 +39,9 @@ files:
39
39
  - README.md
40
40
  - ext/polars/Cargo.toml
41
41
  - ext/polars/extconf.rb
42
+ - ext/polars/src/apply/dataframe.rs
43
+ - ext/polars/src/apply/mod.rs
44
+ - ext/polars/src/apply/series.rs
42
45
  - ext/polars/src/batched_csv.rs
43
46
  - ext/polars/src/conversion.rs
44
47
  - ext/polars/src/dataframe.rs
@@ -60,9 +63,11 @@ files:
60
63
  - lib/polars/batched_csv_reader.rb
61
64
  - lib/polars/cat_expr.rb
62
65
  - lib/polars/cat_name_space.rb
66
+ - lib/polars/convert.rb
63
67
  - lib/polars/data_frame.rb
64
68
  - lib/polars/date_time_expr.rb
65
69
  - lib/polars/date_time_name_space.rb
70
+ - lib/polars/dynamic_group_by.rb
66
71
  - lib/polars/exceptions.rb
67
72
  - lib/polars/expr.rb
68
73
  - lib/polars/expr_dispatch.rb
@@ -75,6 +80,7 @@ files:
75
80
  - lib/polars/list_expr.rb
76
81
  - lib/polars/list_name_space.rb
77
82
  - lib/polars/meta_expr.rb
83
+ - lib/polars/rolling_group_by.rb
78
84
  - lib/polars/series.rb
79
85
  - lib/polars/slice.rb
80
86
  - lib/polars/string_expr.rb
@@ -104,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
110
  - !ruby/object:Gem::Version
105
111
  version: '0'
106
112
  requirements: []
107
- rubygems_version: 3.3.7
113
+ rubygems_version: 3.3.26
108
114
  signing_key:
109
115
  specification_version: 4
110
116
  summary: Blazingly fast DataFrames for Ruby