polars-df 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
data/lib/polars/series.rb
CHANGED
@@ -23,7 +23,7 @@ module Polars
|
|
23
23
|
# @example Constructing a Series by specifying name and values positionally:
|
24
24
|
# s = Polars::Series.new("a", [1, 2, 3])
|
25
25
|
#
|
26
|
-
# @example Notice that the dtype is automatically inferred as a polars
|
26
|
+
# @example Notice that the dtype is automatically inferred as a polars `:i64`:
|
27
27
|
# s.dtype
|
28
28
|
# # => :i64
|
29
29
|
#
|
@@ -45,6 +45,11 @@ module Polars
|
|
45
45
|
|
46
46
|
name = "" if name.nil?
|
47
47
|
|
48
|
+
# TODO improve
|
49
|
+
if values.is_a?(Range) && values.begin.is_a?(String)
|
50
|
+
values = values.to_a
|
51
|
+
end
|
52
|
+
|
48
53
|
if values.nil?
|
49
54
|
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
50
55
|
elsif values.is_a?(Series)
|
@@ -1668,8 +1673,32 @@ module Polars
|
|
1668
1673
|
super
|
1669
1674
|
end
|
1670
1675
|
|
1671
|
-
#
|
1672
|
-
#
|
1676
|
+
# Cast to physical representation of the logical dtype.
|
1677
|
+
#
|
1678
|
+
# - `:date` -> `:i32`
|
1679
|
+
# - `:datetime` -> `:i64`
|
1680
|
+
# - `:time` -> `:i64`
|
1681
|
+
# - `:duration` -> `:i64`
|
1682
|
+
# - `:cat` -> `:u32`
|
1683
|
+
# - other data types will be left unchanged.
|
1684
|
+
#
|
1685
|
+
# @return [Series]
|
1686
|
+
#
|
1687
|
+
# @example
|
1688
|
+
# s = Polars::Series.new("values", ["a", nil, "x", "a"])
|
1689
|
+
# s.cast(:cat).to_physical
|
1690
|
+
# # =>
|
1691
|
+
# # shape: (4,)
|
1692
|
+
# # Series: 'values' [u32]
|
1693
|
+
# # [
|
1694
|
+
# # 0
|
1695
|
+
# # null
|
1696
|
+
# # 1
|
1697
|
+
# # 0
|
1698
|
+
# # ]
|
1699
|
+
def to_physical
|
1700
|
+
super
|
1701
|
+
end
|
1673
1702
|
|
1674
1703
|
# Convert this Series to a Ruby Array. This operation clones data.
|
1675
1704
|
#
|
@@ -1785,8 +1814,34 @@ module Polars
|
|
1785
1814
|
# def to_numo
|
1786
1815
|
# end
|
1787
1816
|
|
1788
|
-
#
|
1789
|
-
#
|
1817
|
+
# Set masked values.
|
1818
|
+
#
|
1819
|
+
# @param filter [Series]
|
1820
|
+
# Boolean mask.
|
1821
|
+
# @param value [Object]
|
1822
|
+
# Value with which to replace the masked values.
|
1823
|
+
#
|
1824
|
+
# @return [Series]
|
1825
|
+
#
|
1826
|
+
# @note
|
1827
|
+
# Use of this function is frequently an anti-pattern, as it can
|
1828
|
+
# block optimization (predicate pushdown, etc). Consider using
|
1829
|
+
# `Polars.when(predicate).then(value).otherwise(self)` instead.
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.set(s == 2, 10)
|
1834
|
+
# # =>
|
1835
|
+
# # shape: (3,)
|
1836
|
+
# # Series: 'a' [i64]
|
1837
|
+
# # [
|
1838
|
+
# # 1
|
1839
|
+
# # 10
|
1840
|
+
# # 3
|
1841
|
+
# # ]
|
1842
|
+
def set(filter, value)
|
1843
|
+
Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
|
1844
|
+
end
|
1790
1845
|
|
1791
1846
|
# Set values at the index locations.
|
1792
1847
|
#
|
@@ -2286,8 +2341,41 @@ module Polars
|
|
2286
2341
|
super
|
2287
2342
|
end
|
2288
2343
|
|
2289
|
-
#
|
2290
|
-
#
|
2344
|
+
# Apply a custom/user-defined function (UDF) over elements in this Series and
|
2345
|
+
# return a new Series.
|
2346
|
+
#
|
2347
|
+
# If the function returns another datatype, the return_dtype arg should be set,
|
2348
|
+
# otherwise the method will fail.
|
2349
|
+
#
|
2350
|
+
# @param return_dtype [Symbol]
|
2351
|
+
# Output datatype. If none is given, the same datatype as this Series will be
|
2352
|
+
# used.
|
2353
|
+
# @param skip_nulls [Boolean]
|
2354
|
+
# Nulls will be skipped and not passed to the Ruby function.
|
2355
|
+
# This is faster because Ruby can be skipped and because we call
|
2356
|
+
# more specialized functions.
|
2357
|
+
#
|
2358
|
+
# @return [Series]
|
2359
|
+
#
|
2360
|
+
# @example
|
2361
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2362
|
+
# s.apply { |x| x + 10 }
|
2363
|
+
# # =>
|
2364
|
+
# # shape: (3,)
|
2365
|
+
# # Series: 'a' [i64]
|
2366
|
+
# # [
|
2367
|
+
# # 11
|
2368
|
+
# # 12
|
2369
|
+
# # 13
|
2370
|
+
# # ]
|
2371
|
+
def apply(return_dtype: nil, skip_nulls: true, &func)
|
2372
|
+
if return_dtype.nil?
|
2373
|
+
pl_return_dtype = nil
|
2374
|
+
else
|
2375
|
+
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2376
|
+
end
|
2377
|
+
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2378
|
+
end
|
2291
2379
|
|
2292
2380
|
# Shift the values by a given period.
|
2293
2381
|
#
|
@@ -2953,8 +3041,35 @@ module Polars
|
|
2953
3041
|
end
|
2954
3042
|
end
|
2955
3043
|
|
2956
|
-
#
|
2957
|
-
#
|
3044
|
+
# Hash the Series.
|
3045
|
+
#
|
3046
|
+
# The hash value is of type `:u64`.
|
3047
|
+
#
|
3048
|
+
# @param seed [Integer]
|
3049
|
+
# Random seed parameter. Defaults to 0.
|
3050
|
+
# @param seed_1 [Integer]
|
3051
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3052
|
+
# @param seed_2 [Integer]
|
3053
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3054
|
+
# @param seed_3 [Integer]
|
3055
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3056
|
+
#
|
3057
|
+
# @return [Series]
|
3058
|
+
#
|
3059
|
+
# @example
|
3060
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
3061
|
+
# s._hash(42)
|
3062
|
+
# # =>
|
3063
|
+
# # shape: (3,)
|
3064
|
+
# # Series: 'a' [u64]
|
3065
|
+
# # [
|
3066
|
+
# # 2374023516666777365
|
3067
|
+
# # 10386026231460783898
|
3068
|
+
# # 17796317186427479491
|
3069
|
+
# # ]
|
3070
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3071
|
+
super
|
3072
|
+
end
|
2958
3073
|
|
2959
3074
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2960
3075
|
#
|
@@ -3424,7 +3539,14 @@ module Polars
|
|
3424
3539
|
return Utils.wrap_s(_s.send(op, other._s))
|
3425
3540
|
end
|
3426
3541
|
|
3427
|
-
|
3542
|
+
if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
|
3543
|
+
raise Todo
|
3544
|
+
end
|
3545
|
+
if other.is_a?(Float) && !is_float
|
3546
|
+
raise Todo
|
3547
|
+
end
|
3548
|
+
|
3549
|
+
Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
|
3428
3550
|
end
|
3429
3551
|
|
3430
3552
|
def series_to_rbseries(name, values)
|
data/lib/polars/utils.rb
CHANGED
@@ -11,6 +11,10 @@ module Polars
|
|
11
11
|
DataFrame._from_rbdf(df)
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.wrap_ldf(ldf)
|
15
|
+
LazyFrame._from_rbldf(ldf)
|
16
|
+
end
|
17
|
+
|
14
18
|
def self.wrap_expr(rbexpr)
|
15
19
|
Expr._from_rbexpr(rbexpr)
|
16
20
|
end
|
@@ -171,5 +175,17 @@ module Polars
|
|
171
175
|
def self.bool?(value)
|
172
176
|
value == true || value == false
|
173
177
|
end
|
178
|
+
|
179
|
+
def self._is_iterable_of(val, eltype)
|
180
|
+
val.all? { |x| x.is_a?(eltype) }
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.is_str_sequence(val, allow_str: false)
|
184
|
+
if allow_str == false && val.is_a?(String)
|
185
|
+
false
|
186
|
+
else
|
187
|
+
val.is_a?(Array) && _is_iterable_of(val, String)
|
188
|
+
end
|
189
|
+
end
|
174
190
|
end
|
175
191
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# ext
|
2
|
-
|
2
|
+
begin
|
3
|
+
require "polars/#{RUBY_VERSION.to_f}/polars"
|
4
|
+
rescue LoadError
|
5
|
+
require "polars/polars"
|
6
|
+
end
|
3
7
|
|
4
8
|
# stdlib
|
5
9
|
require "date"
|
@@ -9,9 +13,11 @@ require "polars/expr_dispatch"
|
|
9
13
|
require "polars/batched_csv_reader"
|
10
14
|
require "polars/cat_expr"
|
11
15
|
require "polars/cat_name_space"
|
16
|
+
require "polars/convert"
|
12
17
|
require "polars/data_frame"
|
13
18
|
require "polars/date_time_expr"
|
14
19
|
require "polars/date_time_name_space"
|
20
|
+
require "polars/dynamic_group_by"
|
15
21
|
require "polars/exceptions"
|
16
22
|
require "polars/expr"
|
17
23
|
require "polars/functions"
|
@@ -23,6 +29,7 @@ require "polars/lazy_group_by"
|
|
23
29
|
require "polars/list_expr"
|
24
30
|
require "polars/list_name_space"
|
25
31
|
require "polars/meta_expr"
|
32
|
+
require "polars/rolling_group_by"
|
26
33
|
require "polars/series"
|
27
34
|
require "polars/slice"
|
28
35
|
require "polars/string_expr"
|
@@ -35,6 +42,7 @@ require "polars/when"
|
|
35
42
|
require "polars/when_then"
|
36
43
|
|
37
44
|
module Polars
|
45
|
+
extend Convert
|
38
46
|
extend Functions
|
39
47
|
extend IO
|
40
48
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,6 +39,9 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- ext/polars/Cargo.toml
|
41
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/apply/dataframe.rs
|
43
|
+
- ext/polars/src/apply/mod.rs
|
44
|
+
- ext/polars/src/apply/series.rs
|
42
45
|
- ext/polars/src/batched_csv.rs
|
43
46
|
- ext/polars/src/conversion.rs
|
44
47
|
- ext/polars/src/dataframe.rs
|
@@ -60,9 +63,11 @@ files:
|
|
60
63
|
- lib/polars/batched_csv_reader.rb
|
61
64
|
- lib/polars/cat_expr.rb
|
62
65
|
- lib/polars/cat_name_space.rb
|
66
|
+
- lib/polars/convert.rb
|
63
67
|
- lib/polars/data_frame.rb
|
64
68
|
- lib/polars/date_time_expr.rb
|
65
69
|
- lib/polars/date_time_name_space.rb
|
70
|
+
- lib/polars/dynamic_group_by.rb
|
66
71
|
- lib/polars/exceptions.rb
|
67
72
|
- lib/polars/expr.rb
|
68
73
|
- lib/polars/expr_dispatch.rb
|
@@ -75,6 +80,7 @@ files:
|
|
75
80
|
- lib/polars/list_expr.rb
|
76
81
|
- lib/polars/list_name_space.rb
|
77
82
|
- lib/polars/meta_expr.rb
|
83
|
+
- lib/polars/rolling_group_by.rb
|
78
84
|
- lib/polars/series.rb
|
79
85
|
- lib/polars/slice.rb
|
80
86
|
- lib/polars/string_expr.rb
|
@@ -104,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
110
|
- !ruby/object:Gem::Version
|
105
111
|
version: '0'
|
106
112
|
requirements: []
|
107
|
-
rubygems_version: 3.3.
|
113
|
+
rubygems_version: 3.3.26
|
108
114
|
signing_key:
|
109
115
|
specification_version: 4
|
110
116
|
summary: Blazingly fast DataFrames for Ruby
|