polars-df 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
data/lib/polars/series.rb
CHANGED
@@ -23,7 +23,7 @@ module Polars
|
|
23
23
|
# @example Constructing a Series by specifying name and values positionally:
|
24
24
|
# s = Polars::Series.new("a", [1, 2, 3])
|
25
25
|
#
|
26
|
-
# @example Notice that the dtype is automatically inferred as a polars
|
26
|
+
# @example Notice that the dtype is automatically inferred as a polars `:i64`:
|
27
27
|
# s.dtype
|
28
28
|
# # => :i64
|
29
29
|
#
|
@@ -45,6 +45,11 @@ module Polars
|
|
45
45
|
|
46
46
|
name = "" if name.nil?
|
47
47
|
|
48
|
+
# TODO improve
|
49
|
+
if values.is_a?(Range) && values.begin.is_a?(String)
|
50
|
+
values = values.to_a
|
51
|
+
end
|
52
|
+
|
48
53
|
if values.nil?
|
49
54
|
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
50
55
|
elsif values.is_a?(Series)
|
@@ -1668,8 +1673,32 @@ module Polars
|
|
1668
1673
|
super
|
1669
1674
|
end
|
1670
1675
|
|
1671
|
-
#
|
1672
|
-
#
|
1676
|
+
# Cast to physical representation of the logical dtype.
|
1677
|
+
#
|
1678
|
+
# - `:date` -> `:i32`
|
1679
|
+
# - `:datetime` -> `:i64`
|
1680
|
+
# - `:time` -> `:i64`
|
1681
|
+
# - `:duration` -> `:i64`
|
1682
|
+
# - `:cat` -> `:u32`
|
1683
|
+
# - other data types will be left unchanged.
|
1684
|
+
#
|
1685
|
+
# @return [Series]
|
1686
|
+
#
|
1687
|
+
# @example
|
1688
|
+
# s = Polars::Series.new("values", ["a", nil, "x", "a"])
|
1689
|
+
# s.cast(:cat).to_physical
|
1690
|
+
# # =>
|
1691
|
+
# # shape: (4,)
|
1692
|
+
# # Series: 'values' [u32]
|
1693
|
+
# # [
|
1694
|
+
# # 0
|
1695
|
+
# # null
|
1696
|
+
# # 1
|
1697
|
+
# # 0
|
1698
|
+
# # ]
|
1699
|
+
def to_physical
|
1700
|
+
super
|
1701
|
+
end
|
1673
1702
|
|
1674
1703
|
# Convert this Series to a Ruby Array. This operation clones data.
|
1675
1704
|
#
|
@@ -1785,8 +1814,34 @@ module Polars
|
|
1785
1814
|
# def to_numo
|
1786
1815
|
# end
|
1787
1816
|
|
1788
|
-
#
|
1789
|
-
#
|
1817
|
+
# Set masked values.
|
1818
|
+
#
|
1819
|
+
# @param filter [Series]
|
1820
|
+
# Boolean mask.
|
1821
|
+
# @param value [Object]
|
1822
|
+
# Value with which to replace the masked values.
|
1823
|
+
#
|
1824
|
+
# @return [Series]
|
1825
|
+
#
|
1826
|
+
# @note
|
1827
|
+
# Use of this function is frequently an anti-pattern, as it can
|
1828
|
+
# block optimization (predicate pushdown, etc). Consider using
|
1829
|
+
# `Polars.when(predicate).then(value).otherwise(self)` instead.
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.set(s == 2, 10)
|
1834
|
+
# # =>
|
1835
|
+
# # shape: (3,)
|
1836
|
+
# # Series: 'a' [i64]
|
1837
|
+
# # [
|
1838
|
+
# # 1
|
1839
|
+
# # 10
|
1840
|
+
# # 3
|
1841
|
+
# # ]
|
1842
|
+
def set(filter, value)
|
1843
|
+
Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
|
1844
|
+
end
|
1790
1845
|
|
1791
1846
|
# Set values at the index locations.
|
1792
1847
|
#
|
@@ -2286,8 +2341,41 @@ module Polars
|
|
2286
2341
|
super
|
2287
2342
|
end
|
2288
2343
|
|
2289
|
-
#
|
2290
|
-
#
|
2344
|
+
# Apply a custom/user-defined function (UDF) over elements in this Series and
|
2345
|
+
# return a new Series.
|
2346
|
+
#
|
2347
|
+
# If the function returns another datatype, the return_dtype arg should be set,
|
2348
|
+
# otherwise the method will fail.
|
2349
|
+
#
|
2350
|
+
# @param return_dtype [Symbol]
|
2351
|
+
# Output datatype. If none is given, the same datatype as this Series will be
|
2352
|
+
# used.
|
2353
|
+
# @param skip_nulls [Boolean]
|
2354
|
+
# Nulls will be skipped and not passed to the Ruby function.
|
2355
|
+
# This is faster because Ruby can be skipped and because we call
|
2356
|
+
# more specialized functions.
|
2357
|
+
#
|
2358
|
+
# @return [Series]
|
2359
|
+
#
|
2360
|
+
# @example
|
2361
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2362
|
+
# s.apply { |x| x + 10 }
|
2363
|
+
# # =>
|
2364
|
+
# # shape: (3,)
|
2365
|
+
# # Series: 'a' [i64]
|
2366
|
+
# # [
|
2367
|
+
# # 11
|
2368
|
+
# # 12
|
2369
|
+
# # 13
|
2370
|
+
# # ]
|
2371
|
+
def apply(return_dtype: nil, skip_nulls: true, &func)
|
2372
|
+
if return_dtype.nil?
|
2373
|
+
pl_return_dtype = nil
|
2374
|
+
else
|
2375
|
+
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2376
|
+
end
|
2377
|
+
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2378
|
+
end
|
2291
2379
|
|
2292
2380
|
# Shift the values by a given period.
|
2293
2381
|
#
|
@@ -2953,8 +3041,35 @@ module Polars
|
|
2953
3041
|
end
|
2954
3042
|
end
|
2955
3043
|
|
2956
|
-
#
|
2957
|
-
#
|
3044
|
+
# Hash the Series.
|
3045
|
+
#
|
3046
|
+
# The hash value is of type `:u64`.
|
3047
|
+
#
|
3048
|
+
# @param seed [Integer]
|
3049
|
+
# Random seed parameter. Defaults to 0.
|
3050
|
+
# @param seed_1 [Integer]
|
3051
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3052
|
+
# @param seed_2 [Integer]
|
3053
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3054
|
+
# @param seed_3 [Integer]
|
3055
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3056
|
+
#
|
3057
|
+
# @return [Series]
|
3058
|
+
#
|
3059
|
+
# @example
|
3060
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
3061
|
+
# s._hash(42)
|
3062
|
+
# # =>
|
3063
|
+
# # shape: (3,)
|
3064
|
+
# # Series: 'a' [u64]
|
3065
|
+
# # [
|
3066
|
+
# # 2374023516666777365
|
3067
|
+
# # 10386026231460783898
|
3068
|
+
# # 17796317186427479491
|
3069
|
+
# # ]
|
3070
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3071
|
+
super
|
3072
|
+
end
|
2958
3073
|
|
2959
3074
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2960
3075
|
#
|
@@ -3424,7 +3539,14 @@ module Polars
|
|
3424
3539
|
return Utils.wrap_s(_s.send(op, other._s))
|
3425
3540
|
end
|
3426
3541
|
|
3427
|
-
|
3542
|
+
if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
|
3543
|
+
raise Todo
|
3544
|
+
end
|
3545
|
+
if other.is_a?(Float) && !is_float
|
3546
|
+
raise Todo
|
3547
|
+
end
|
3548
|
+
|
3549
|
+
Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
|
3428
3550
|
end
|
3429
3551
|
|
3430
3552
|
def series_to_rbseries(name, values)
|
data/lib/polars/utils.rb
CHANGED
@@ -11,6 +11,10 @@ module Polars
|
|
11
11
|
DataFrame._from_rbdf(df)
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.wrap_ldf(ldf)
|
15
|
+
LazyFrame._from_rbldf(ldf)
|
16
|
+
end
|
17
|
+
|
14
18
|
def self.wrap_expr(rbexpr)
|
15
19
|
Expr._from_rbexpr(rbexpr)
|
16
20
|
end
|
@@ -171,5 +175,17 @@ module Polars
|
|
171
175
|
def self.bool?(value)
|
172
176
|
value == true || value == false
|
173
177
|
end
|
178
|
+
|
179
|
+
def self._is_iterable_of(val, eltype)
|
180
|
+
val.all? { |x| x.is_a?(eltype) }
|
181
|
+
end
|
182
|
+
|
183
|
+
def self.is_str_sequence(val, allow_str: false)
|
184
|
+
if allow_str == false && val.is_a?(String)
|
185
|
+
false
|
186
|
+
else
|
187
|
+
val.is_a?(Array) && _is_iterable_of(val, String)
|
188
|
+
end
|
189
|
+
end
|
174
190
|
end
|
175
191
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# ext
|
2
|
-
|
2
|
+
begin
|
3
|
+
require "polars/#{RUBY_VERSION.to_f}/polars"
|
4
|
+
rescue LoadError
|
5
|
+
require "polars/polars"
|
6
|
+
end
|
3
7
|
|
4
8
|
# stdlib
|
5
9
|
require "date"
|
@@ -9,9 +13,11 @@ require "polars/expr_dispatch"
|
|
9
13
|
require "polars/batched_csv_reader"
|
10
14
|
require "polars/cat_expr"
|
11
15
|
require "polars/cat_name_space"
|
16
|
+
require "polars/convert"
|
12
17
|
require "polars/data_frame"
|
13
18
|
require "polars/date_time_expr"
|
14
19
|
require "polars/date_time_name_space"
|
20
|
+
require "polars/dynamic_group_by"
|
15
21
|
require "polars/exceptions"
|
16
22
|
require "polars/expr"
|
17
23
|
require "polars/functions"
|
@@ -23,6 +29,7 @@ require "polars/lazy_group_by"
|
|
23
29
|
require "polars/list_expr"
|
24
30
|
require "polars/list_name_space"
|
25
31
|
require "polars/meta_expr"
|
32
|
+
require "polars/rolling_group_by"
|
26
33
|
require "polars/series"
|
27
34
|
require "polars/slice"
|
28
35
|
require "polars/string_expr"
|
@@ -35,6 +42,7 @@ require "polars/when"
|
|
35
42
|
require "polars/when_then"
|
36
43
|
|
37
44
|
module Polars
|
45
|
+
extend Convert
|
38
46
|
extend Functions
|
39
47
|
extend IO
|
40
48
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,6 +39,9 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- ext/polars/Cargo.toml
|
41
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/apply/dataframe.rs
|
43
|
+
- ext/polars/src/apply/mod.rs
|
44
|
+
- ext/polars/src/apply/series.rs
|
42
45
|
- ext/polars/src/batched_csv.rs
|
43
46
|
- ext/polars/src/conversion.rs
|
44
47
|
- ext/polars/src/dataframe.rs
|
@@ -60,9 +63,11 @@ files:
|
|
60
63
|
- lib/polars/batched_csv_reader.rb
|
61
64
|
- lib/polars/cat_expr.rb
|
62
65
|
- lib/polars/cat_name_space.rb
|
66
|
+
- lib/polars/convert.rb
|
63
67
|
- lib/polars/data_frame.rb
|
64
68
|
- lib/polars/date_time_expr.rb
|
65
69
|
- lib/polars/date_time_name_space.rb
|
70
|
+
- lib/polars/dynamic_group_by.rb
|
66
71
|
- lib/polars/exceptions.rb
|
67
72
|
- lib/polars/expr.rb
|
68
73
|
- lib/polars/expr_dispatch.rb
|
@@ -75,6 +80,7 @@ files:
|
|
75
80
|
- lib/polars/list_expr.rb
|
76
81
|
- lib/polars/list_name_space.rb
|
77
82
|
- lib/polars/meta_expr.rb
|
83
|
+
- lib/polars/rolling_group_by.rb
|
78
84
|
- lib/polars/series.rb
|
79
85
|
- lib/polars/slice.rb
|
80
86
|
- lib/polars/string_expr.rb
|
@@ -104,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
110
|
- !ruby/object:Gem::Version
|
105
111
|
version: '0'
|
106
112
|
requirements: []
|
107
|
-
rubygems_version: 3.3.
|
113
|
+
rubygems_version: 3.3.26
|
108
114
|
signing_key:
|
109
115
|
specification_version: 4
|
110
116
|
summary: Blazingly fast DataFrames for Ruby
|