polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
data/lib/polars/struct_expr.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for struct related expressions.
|
2
3
|
class StructExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
9
15
|
def [](item)
|
10
16
|
if item.is_a?(String)
|
11
17
|
field(item)
|
@@ -16,10 +22,77 @@ module Polars
|
|
16
22
|
end
|
17
23
|
end
|
18
24
|
|
25
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
26
|
+
#
|
27
|
+
# @param name [String]
|
28
|
+
# Name of the field
|
29
|
+
#
|
30
|
+
# @return [Expr]
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# df = (
|
34
|
+
# Polars::DataFrame.new(
|
35
|
+
# {
|
36
|
+
# "int" => [1, 2],
|
37
|
+
# "str" => ["a", "b"],
|
38
|
+
# "bool" => [true, nil],
|
39
|
+
# "list" => [[1, 2], [3]]
|
40
|
+
# }
|
41
|
+
# )
|
42
|
+
# .to_struct("my_struct")
|
43
|
+
# .to_frame
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("my_struct").struct.field("str"))
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ str │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ str │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ a │
|
54
|
+
# # ├╌╌╌╌╌┤
|
55
|
+
# # │ b │
|
56
|
+
# # └─────┘
|
19
57
|
def field(name)
|
20
58
|
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
59
|
end
|
22
60
|
|
61
|
+
# Rename the fields of the struct.
|
62
|
+
#
|
63
|
+
# @param names [Array]
|
64
|
+
# New names in the order of the struct's fields
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# df = (
|
70
|
+
# Polars::DataFrame.new(
|
71
|
+
# {
|
72
|
+
# "int" => [1, 2],
|
73
|
+
# "str" => ["a", "b"],
|
74
|
+
# "bool" => [true, nil],
|
75
|
+
# "list" => [[1, 2], [3]]
|
76
|
+
# }
|
77
|
+
# )
|
78
|
+
# .to_struct("my_struct")
|
79
|
+
# .to_frame
|
80
|
+
# )
|
81
|
+
# df = df.with_column(
|
82
|
+
# Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
|
83
|
+
# )
|
84
|
+
# df.select(Polars.col("my_struct").struct.field("INT"))
|
85
|
+
# # =>
|
86
|
+
# # shape: (2, 1)
|
87
|
+
# # ┌─────┐
|
88
|
+
# # │ INT │
|
89
|
+
# # │ --- │
|
90
|
+
# # │ i64 │
|
91
|
+
# # ╞═════╡
|
92
|
+
# # │ 1 │
|
93
|
+
# # ├╌╌╌╌╌┤
|
94
|
+
# # │ 2 │
|
95
|
+
# # └─────┘
|
23
96
|
def rename_fields(names)
|
24
97
|
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
98
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.struct namespace.
|
3
|
+
class StructNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "struct"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
def [](item)
|
17
|
+
if item.is_a?(Integer)
|
18
|
+
field(fields[item])
|
19
|
+
elsif item.is_a?(String)
|
20
|
+
field(item)
|
21
|
+
else
|
22
|
+
raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Convert this Struct Series to a DataFrame.
|
27
|
+
#
|
28
|
+
# @return [DataFrame]
|
29
|
+
def to_frame
|
30
|
+
Utils.wrap_df(_s.struct_to_frame)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get the names of the fields.
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
def fields
|
37
|
+
if _s.nil?
|
38
|
+
[]
|
39
|
+
else
|
40
|
+
_s.struct_fields
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
45
|
+
#
|
46
|
+
# @param name [String]
|
47
|
+
# Name of the field
|
48
|
+
#
|
49
|
+
# @return [Series]
|
50
|
+
def field(name)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
|
54
|
+
# Rename the fields of the struct.
|
55
|
+
#
|
56
|
+
# @param names [Array]
|
57
|
+
# New names in the order of the struct's fields
|
58
|
+
#
|
59
|
+
# @return [Series]
|
60
|
+
def rename_fields(names)
|
61
|
+
super
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -19,6 +19,52 @@ module Polars
|
|
19
19
|
Polars.col(name)
|
20
20
|
end
|
21
21
|
|
22
|
+
def self._timedelta_to_pl_duration(td)
|
23
|
+
td
|
24
|
+
end
|
25
|
+
|
26
|
+
def self._datetime_to_pl_timestamp(dt, tu)
|
27
|
+
if tu == "ns"
|
28
|
+
(dt.to_datetime.utc.to_f * 1e9).to_i
|
29
|
+
elsif tu == "us"
|
30
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
31
|
+
elsif tu == "ms"
|
32
|
+
(dt.to_datetime.utc.to_f * 1e3).to_i
|
33
|
+
elsif tu.nil?
|
34
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
35
|
+
else
|
36
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
41
|
+
if dtype == :date
|
42
|
+
# days to seconds
|
43
|
+
# important to create from utc. Not doing this leads
|
44
|
+
# to inconsistencies dependent on the timezone you are in.
|
45
|
+
Time.at(value * 86400).utc.to_date
|
46
|
+
# TODO fix dtype
|
47
|
+
elsif dtype.to_s.start_with?("datetime[")
|
48
|
+
if tz.nil? || tz == ""
|
49
|
+
if tu == "ns"
|
50
|
+
raise Todo
|
51
|
+
elsif tu == "us"
|
52
|
+
dt = Time.at(value / 1000000, value % 1000000, :usec).utc
|
53
|
+
elsif tu == "ms"
|
54
|
+
raise Todo
|
55
|
+
else
|
56
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
57
|
+
end
|
58
|
+
else
|
59
|
+
raise Todo
|
60
|
+
end
|
61
|
+
|
62
|
+
dt
|
63
|
+
else
|
64
|
+
raise NotImplementedError
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
22
68
|
def self.selection_to_rbexpr_list(exprs)
|
23
69
|
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
24
70
|
exprs = [exprs]
|
@@ -49,12 +95,30 @@ module Polars
|
|
49
95
|
|
50
96
|
# TODO fix
|
51
97
|
def self.is_polars_dtype(data_type)
|
52
|
-
|
98
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
53
99
|
end
|
54
100
|
|
101
|
+
RB_TYPE_TO_DTYPE = {
|
102
|
+
Float => :f64,
|
103
|
+
Integer => :i64,
|
104
|
+
String => :str,
|
105
|
+
TrueClass => :bool,
|
106
|
+
FalseClass => :bool,
|
107
|
+
Date => :date,
|
108
|
+
DateTime => :datetime
|
109
|
+
}
|
110
|
+
|
55
111
|
# TODO fix
|
56
|
-
def self.rb_type_to_dtype(
|
57
|
-
|
112
|
+
def self.rb_type_to_dtype(data_type)
|
113
|
+
if is_polars_dtype(data_type)
|
114
|
+
return data_type.to_s
|
115
|
+
end
|
116
|
+
|
117
|
+
begin
|
118
|
+
RB_TYPE_TO_DTYPE.fetch(data_type).to_s
|
119
|
+
rescue KeyError
|
120
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
121
|
+
end
|
58
122
|
end
|
59
123
|
|
60
124
|
def self._process_null_values(null_values)
|
@@ -103,5 +167,9 @@ module Polars
|
|
103
167
|
sz
|
104
168
|
end
|
105
169
|
end
|
170
|
+
|
171
|
+
def self.bool?(value)
|
172
|
+
value == true || value == false
|
173
|
+
end
|
106
174
|
end
|
107
175
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/when.rb
CHANGED
data/lib/polars/when_then.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,38 +1,40 @@
|
|
1
1
|
# ext
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
|
+
# stdlib
|
5
|
+
require "date"
|
6
|
+
|
4
7
|
# modules
|
8
|
+
require "polars/expr_dispatch"
|
5
9
|
require "polars/batched_csv_reader"
|
6
10
|
require "polars/cat_expr"
|
11
|
+
require "polars/cat_name_space"
|
7
12
|
require "polars/data_frame"
|
8
13
|
require "polars/date_time_expr"
|
14
|
+
require "polars/date_time_name_space"
|
15
|
+
require "polars/exceptions"
|
9
16
|
require "polars/expr"
|
10
17
|
require "polars/functions"
|
18
|
+
require "polars/group_by"
|
11
19
|
require "polars/io"
|
12
20
|
require "polars/lazy_frame"
|
13
21
|
require "polars/lazy_functions"
|
14
22
|
require "polars/lazy_group_by"
|
15
23
|
require "polars/list_expr"
|
24
|
+
require "polars/list_name_space"
|
16
25
|
require "polars/meta_expr"
|
17
26
|
require "polars/series"
|
27
|
+
require "polars/slice"
|
18
28
|
require "polars/string_expr"
|
29
|
+
require "polars/string_name_space"
|
19
30
|
require "polars/struct_expr"
|
31
|
+
require "polars/struct_name_space"
|
20
32
|
require "polars/utils"
|
21
33
|
require "polars/version"
|
22
34
|
require "polars/when"
|
23
35
|
require "polars/when_then"
|
24
36
|
|
25
37
|
module Polars
|
26
|
-
# @private
|
27
|
-
class Error < StandardError; end
|
28
|
-
|
29
|
-
# @private
|
30
|
-
class Todo < Error
|
31
|
-
def message
|
32
|
-
"not implemented yet"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
38
|
extend Functions
|
37
39
|
extend IO
|
38
40
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -31,6 +31,7 @@ extensions:
|
|
31
31
|
- ext/polars/extconf.rb
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
+
- ".yardopts"
|
34
35
|
- CHANGELOG.md
|
35
36
|
- Cargo.lock
|
36
37
|
- Cargo.toml
|
@@ -50,24 +51,36 @@ files:
|
|
50
51
|
- ext/polars/src/lazy/mod.rs
|
51
52
|
- ext/polars/src/lazy/utils.rs
|
52
53
|
- ext/polars/src/lib.rs
|
54
|
+
- ext/polars/src/list_construction.rs
|
53
55
|
- ext/polars/src/series.rs
|
56
|
+
- ext/polars/src/set.rs
|
57
|
+
- ext/polars/src/utils.rs
|
54
58
|
- lib/polars-df.rb
|
55
59
|
- lib/polars.rb
|
56
60
|
- lib/polars/batched_csv_reader.rb
|
57
61
|
- lib/polars/cat_expr.rb
|
62
|
+
- lib/polars/cat_name_space.rb
|
58
63
|
- lib/polars/data_frame.rb
|
59
64
|
- lib/polars/date_time_expr.rb
|
65
|
+
- lib/polars/date_time_name_space.rb
|
66
|
+
- lib/polars/exceptions.rb
|
60
67
|
- lib/polars/expr.rb
|
68
|
+
- lib/polars/expr_dispatch.rb
|
61
69
|
- lib/polars/functions.rb
|
70
|
+
- lib/polars/group_by.rb
|
62
71
|
- lib/polars/io.rb
|
63
72
|
- lib/polars/lazy_frame.rb
|
64
73
|
- lib/polars/lazy_functions.rb
|
65
74
|
- lib/polars/lazy_group_by.rb
|
66
75
|
- lib/polars/list_expr.rb
|
76
|
+
- lib/polars/list_name_space.rb
|
67
77
|
- lib/polars/meta_expr.rb
|
68
78
|
- lib/polars/series.rb
|
79
|
+
- lib/polars/slice.rb
|
69
80
|
- lib/polars/string_expr.rb
|
81
|
+
- lib/polars/string_name_space.rb
|
70
82
|
- lib/polars/struct_expr.rb
|
83
|
+
- lib/polars/struct_name_space.rb
|
71
84
|
- lib/polars/utils.rb
|
72
85
|
- lib/polars/version.rb
|
73
86
|
- lib/polars/when.rb
|