polars-df 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/lib/polars/struct_expr.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for struct related expressions.
|
2
3
|
class StructExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
9
15
|
def [](item)
|
10
16
|
if item.is_a?(String)
|
11
17
|
field(item)
|
@@ -16,10 +22,77 @@ module Polars
|
|
16
22
|
end
|
17
23
|
end
|
18
24
|
|
25
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
26
|
+
#
|
27
|
+
# @param name [String]
|
28
|
+
# Name of the field
|
29
|
+
#
|
30
|
+
# @return [Expr]
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# df = (
|
34
|
+
# Polars::DataFrame.new(
|
35
|
+
# {
|
36
|
+
# "int" => [1, 2],
|
37
|
+
# "str" => ["a", "b"],
|
38
|
+
# "bool" => [true, nil],
|
39
|
+
# "list" => [[1, 2], [3]]
|
40
|
+
# }
|
41
|
+
# )
|
42
|
+
# .to_struct("my_struct")
|
43
|
+
# .to_frame
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("my_struct").struct.field("str"))
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ str │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ str │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ a │
|
54
|
+
# # ├╌╌╌╌╌┤
|
55
|
+
# # │ b │
|
56
|
+
# # └─────┘
|
19
57
|
def field(name)
|
20
58
|
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
59
|
end
|
22
60
|
|
61
|
+
# Rename the fields of the struct.
|
62
|
+
#
|
63
|
+
# @param names [Array]
|
64
|
+
# New names in the order of the struct's fields
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# df = (
|
70
|
+
# Polars::DataFrame.new(
|
71
|
+
# {
|
72
|
+
# "int" => [1, 2],
|
73
|
+
# "str" => ["a", "b"],
|
74
|
+
# "bool" => [true, nil],
|
75
|
+
# "list" => [[1, 2], [3]]
|
76
|
+
# }
|
77
|
+
# )
|
78
|
+
# .to_struct("my_struct")
|
79
|
+
# .to_frame
|
80
|
+
# )
|
81
|
+
# df = df.with_column(
|
82
|
+
# Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
|
83
|
+
# )
|
84
|
+
# df.select(Polars.col("my_struct").struct.field("INT"))
|
85
|
+
# # =>
|
86
|
+
# # shape: (2, 1)
|
87
|
+
# # ┌─────┐
|
88
|
+
# # │ INT │
|
89
|
+
# # │ --- │
|
90
|
+
# # │ i64 │
|
91
|
+
# # ╞═════╡
|
92
|
+
# # │ 1 │
|
93
|
+
# # ├╌╌╌╌╌┤
|
94
|
+
# # │ 2 │
|
95
|
+
# # └─────┘
|
23
96
|
def rename_fields(names)
|
24
97
|
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
98
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.struct namespace.
|
3
|
+
class StructNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "struct"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
def [](item)
|
17
|
+
if item.is_a?(Integer)
|
18
|
+
field(fields[item])
|
19
|
+
elsif item.is_a?(String)
|
20
|
+
field(item)
|
21
|
+
else
|
22
|
+
raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Convert this Struct Series to a DataFrame.
|
27
|
+
#
|
28
|
+
# @return [DataFrame]
|
29
|
+
def to_frame
|
30
|
+
Utils.wrap_df(_s.struct_to_frame)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get the names of the fields.
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
def fields
|
37
|
+
if _s.nil?
|
38
|
+
[]
|
39
|
+
else
|
40
|
+
_s.struct_fields
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
45
|
+
#
|
46
|
+
# @param name [String]
|
47
|
+
# Name of the field
|
48
|
+
#
|
49
|
+
# @return [Series]
|
50
|
+
def field(name)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
|
54
|
+
# Rename the fields of the struct.
|
55
|
+
#
|
56
|
+
# @param names [Array]
|
57
|
+
# New names in the order of the struct's fields
|
58
|
+
#
|
59
|
+
# @return [Series]
|
60
|
+
def rename_fields(names)
|
61
|
+
super
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -19,6 +19,52 @@ module Polars
|
|
19
19
|
Polars.col(name)
|
20
20
|
end
|
21
21
|
|
22
|
+
def self._timedelta_to_pl_duration(td)
|
23
|
+
td
|
24
|
+
end
|
25
|
+
|
26
|
+
def self._datetime_to_pl_timestamp(dt, tu)
|
27
|
+
if tu == "ns"
|
28
|
+
(dt.to_datetime.utc.to_f * 1e9).to_i
|
29
|
+
elsif tu == "us"
|
30
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
31
|
+
elsif tu == "ms"
|
32
|
+
(dt.to_datetime.utc.to_f * 1e3).to_i
|
33
|
+
elsif tu.nil?
|
34
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
35
|
+
else
|
36
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
41
|
+
if dtype == :date
|
42
|
+
# days to seconds
|
43
|
+
# important to create from utc. Not doing this leads
|
44
|
+
# to inconsistencies dependent on the timezone you are in.
|
45
|
+
Time.at(value * 86400).utc.to_date
|
46
|
+
# TODO fix dtype
|
47
|
+
elsif dtype.to_s.start_with?("datetime[")
|
48
|
+
if tz.nil? || tz == ""
|
49
|
+
if tu == "ns"
|
50
|
+
raise Todo
|
51
|
+
elsif tu == "us"
|
52
|
+
dt = Time.at(value / 1000000, value % 1000000, :usec).utc
|
53
|
+
elsif tu == "ms"
|
54
|
+
raise Todo
|
55
|
+
else
|
56
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
57
|
+
end
|
58
|
+
else
|
59
|
+
raise Todo
|
60
|
+
end
|
61
|
+
|
62
|
+
dt
|
63
|
+
else
|
64
|
+
raise NotImplementedError
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
22
68
|
def self.selection_to_rbexpr_list(exprs)
|
23
69
|
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
24
70
|
exprs = [exprs]
|
@@ -49,12 +95,30 @@ module Polars
|
|
49
95
|
|
50
96
|
# TODO fix
|
51
97
|
def self.is_polars_dtype(data_type)
|
52
|
-
|
98
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
53
99
|
end
|
54
100
|
|
101
|
+
RB_TYPE_TO_DTYPE = {
|
102
|
+
Float => :f64,
|
103
|
+
Integer => :i64,
|
104
|
+
String => :str,
|
105
|
+
TrueClass => :bool,
|
106
|
+
FalseClass => :bool,
|
107
|
+
Date => :date,
|
108
|
+
DateTime => :datetime
|
109
|
+
}
|
110
|
+
|
55
111
|
# TODO fix
|
56
|
-
def self.rb_type_to_dtype(
|
57
|
-
|
112
|
+
def self.rb_type_to_dtype(data_type)
|
113
|
+
if is_polars_dtype(data_type)
|
114
|
+
return data_type.to_s
|
115
|
+
end
|
116
|
+
|
117
|
+
begin
|
118
|
+
RB_TYPE_TO_DTYPE.fetch(data_type).to_s
|
119
|
+
rescue KeyError
|
120
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
121
|
+
end
|
58
122
|
end
|
59
123
|
|
60
124
|
def self._process_null_values(null_values)
|
@@ -103,5 +167,9 @@ module Polars
|
|
103
167
|
sz
|
104
168
|
end
|
105
169
|
end
|
170
|
+
|
171
|
+
def self.bool?(value)
|
172
|
+
value == true || value == false
|
173
|
+
end
|
106
174
|
end
|
107
175
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/when.rb
CHANGED
data/lib/polars/when_then.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,38 +1,40 @@
|
|
1
1
|
# ext
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
|
+
# stdlib
|
5
|
+
require "date"
|
6
|
+
|
4
7
|
# modules
|
8
|
+
require "polars/expr_dispatch"
|
5
9
|
require "polars/batched_csv_reader"
|
6
10
|
require "polars/cat_expr"
|
11
|
+
require "polars/cat_name_space"
|
7
12
|
require "polars/data_frame"
|
8
13
|
require "polars/date_time_expr"
|
14
|
+
require "polars/date_time_name_space"
|
15
|
+
require "polars/exceptions"
|
9
16
|
require "polars/expr"
|
10
17
|
require "polars/functions"
|
18
|
+
require "polars/group_by"
|
11
19
|
require "polars/io"
|
12
20
|
require "polars/lazy_frame"
|
13
21
|
require "polars/lazy_functions"
|
14
22
|
require "polars/lazy_group_by"
|
15
23
|
require "polars/list_expr"
|
24
|
+
require "polars/list_name_space"
|
16
25
|
require "polars/meta_expr"
|
17
26
|
require "polars/series"
|
27
|
+
require "polars/slice"
|
18
28
|
require "polars/string_expr"
|
29
|
+
require "polars/string_name_space"
|
19
30
|
require "polars/struct_expr"
|
31
|
+
require "polars/struct_name_space"
|
20
32
|
require "polars/utils"
|
21
33
|
require "polars/version"
|
22
34
|
require "polars/when"
|
23
35
|
require "polars/when_then"
|
24
36
|
|
25
37
|
module Polars
|
26
|
-
# @private
|
27
|
-
class Error < StandardError; end
|
28
|
-
|
29
|
-
# @private
|
30
|
-
class Todo < Error
|
31
|
-
def message
|
32
|
-
"not implemented yet"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
38
|
extend Functions
|
37
39
|
extend IO
|
38
40
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -31,6 +31,7 @@ extensions:
|
|
31
31
|
- ext/polars/extconf.rb
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
+
- ".yardopts"
|
34
35
|
- CHANGELOG.md
|
35
36
|
- Cargo.lock
|
36
37
|
- Cargo.toml
|
@@ -50,24 +51,36 @@ files:
|
|
50
51
|
- ext/polars/src/lazy/mod.rs
|
51
52
|
- ext/polars/src/lazy/utils.rs
|
52
53
|
- ext/polars/src/lib.rs
|
54
|
+
- ext/polars/src/list_construction.rs
|
53
55
|
- ext/polars/src/series.rs
|
56
|
+
- ext/polars/src/set.rs
|
57
|
+
- ext/polars/src/utils.rs
|
54
58
|
- lib/polars-df.rb
|
55
59
|
- lib/polars.rb
|
56
60
|
- lib/polars/batched_csv_reader.rb
|
57
61
|
- lib/polars/cat_expr.rb
|
62
|
+
- lib/polars/cat_name_space.rb
|
58
63
|
- lib/polars/data_frame.rb
|
59
64
|
- lib/polars/date_time_expr.rb
|
65
|
+
- lib/polars/date_time_name_space.rb
|
66
|
+
- lib/polars/exceptions.rb
|
60
67
|
- lib/polars/expr.rb
|
68
|
+
- lib/polars/expr_dispatch.rb
|
61
69
|
- lib/polars/functions.rb
|
70
|
+
- lib/polars/group_by.rb
|
62
71
|
- lib/polars/io.rb
|
63
72
|
- lib/polars/lazy_frame.rb
|
64
73
|
- lib/polars/lazy_functions.rb
|
65
74
|
- lib/polars/lazy_group_by.rb
|
66
75
|
- lib/polars/list_expr.rb
|
76
|
+
- lib/polars/list_name_space.rb
|
67
77
|
- lib/polars/meta_expr.rb
|
68
78
|
- lib/polars/series.rb
|
79
|
+
- lib/polars/slice.rb
|
69
80
|
- lib/polars/string_expr.rb
|
81
|
+
- lib/polars/string_name_space.rb
|
70
82
|
- lib/polars/struct_expr.rb
|
83
|
+
- lib/polars/struct_name_space.rb
|
71
84
|
- lib/polars/utils.rb
|
72
85
|
- lib/polars/version.rb
|
73
86
|
- lib/polars/when.rb
|