polars-df 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +139 -6
- data/ext/polars/src/dataframe.rs +360 -15
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +135 -3
- data/ext/polars/src/lazy/dsl.rs +97 -2
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +227 -12
- data/ext/polars/src/series.rs +190 -38
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2813 -100
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +631 -11
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +763 -4
- data/lib/polars/lazy_frame.rb +1415 -67
- data/lib/polars/lazy_functions.rb +430 -9
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2244 -192
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +76 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +8 -2
- metadata +12 -2
data/lib/polars/struct_expr.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for struct related expressions.
|
2
3
|
class StructExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
9
15
|
def [](item)
|
10
16
|
if item.is_a?(String)
|
11
17
|
field(item)
|
@@ -16,10 +22,77 @@ module Polars
|
|
16
22
|
end
|
17
23
|
end
|
18
24
|
|
25
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
26
|
+
#
|
27
|
+
# @param name [String]
|
28
|
+
# Name of the field
|
29
|
+
#
|
30
|
+
# @return [Expr]
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# df = (
|
34
|
+
# Polars::DataFrame.new(
|
35
|
+
# {
|
36
|
+
# "int" => [1, 2],
|
37
|
+
# "str" => ["a", "b"],
|
38
|
+
# "bool" => [true, nil],
|
39
|
+
# "list" => [[1, 2], [3]]
|
40
|
+
# }
|
41
|
+
# )
|
42
|
+
# .to_struct("my_struct")
|
43
|
+
# .to_frame
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("my_struct").struct.field("str"))
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ str │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ str │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ a │
|
54
|
+
# # ├╌╌╌╌╌┤
|
55
|
+
# # │ b │
|
56
|
+
# # └─────┘
|
19
57
|
def field(name)
|
20
58
|
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
59
|
end
|
22
60
|
|
61
|
+
# Rename the fields of the struct.
|
62
|
+
#
|
63
|
+
# @param names [Array]
|
64
|
+
# New names in the order of the struct's fields
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# df = (
|
70
|
+
# Polars::DataFrame.new(
|
71
|
+
# {
|
72
|
+
# "int" => [1, 2],
|
73
|
+
# "str" => ["a", "b"],
|
74
|
+
# "bool" => [true, nil],
|
75
|
+
# "list" => [[1, 2], [3]]
|
76
|
+
# }
|
77
|
+
# )
|
78
|
+
# .to_struct("my_struct")
|
79
|
+
# .to_frame
|
80
|
+
# )
|
81
|
+
# df = df.with_column(
|
82
|
+
# Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
|
83
|
+
# )
|
84
|
+
# df.select(Polars.col("my_struct").struct.field("INT"))
|
85
|
+
# # =>
|
86
|
+
# # shape: (2, 1)
|
87
|
+
# # ┌─────┐
|
88
|
+
# # │ INT │
|
89
|
+
# # │ --- │
|
90
|
+
# # │ i64 │
|
91
|
+
# # ╞═════╡
|
92
|
+
# # │ 1 │
|
93
|
+
# # ├╌╌╌╌╌┤
|
94
|
+
# # │ 2 │
|
95
|
+
# # └─────┘
|
23
96
|
def rename_fields(names)
|
24
97
|
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
98
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Polars
|
2
|
+
# @private
|
2
3
|
module Utils
|
3
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
4
5
|
|
@@ -18,6 +19,24 @@ module Polars
|
|
18
19
|
Polars.col(name)
|
19
20
|
end
|
20
21
|
|
22
|
+
def self._timedelta_to_pl_duration(td)
|
23
|
+
td
|
24
|
+
end
|
25
|
+
|
26
|
+
def self._datetime_to_pl_timestamp(dt, tu)
|
27
|
+
if tu == "ns"
|
28
|
+
(dt.to_datetime.utc.to_f * 1e9).to_i
|
29
|
+
elsif tu == "us"
|
30
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
31
|
+
elsif tu == "ms"
|
32
|
+
(dt.to_datetime.utc.to_f * 1e3).to_i
|
33
|
+
elsif tu.nil?
|
34
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
35
|
+
else
|
36
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
21
40
|
def self.selection_to_rbexpr_list(exprs)
|
22
41
|
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
23
42
|
exprs = [exprs]
|
@@ -48,12 +67,62 @@ module Polars
|
|
48
67
|
|
49
68
|
# TODO fix
|
50
69
|
def self.is_polars_dtype(data_type)
|
51
|
-
|
70
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
52
71
|
end
|
53
72
|
|
73
|
+
RB_TYPE_TO_DTYPE = {
|
74
|
+
Float => :f64,
|
75
|
+
Integer => :i64,
|
76
|
+
String => :str,
|
77
|
+
TrueClass => :bool,
|
78
|
+
FalseClass => :bool,
|
79
|
+
Date => :date,
|
80
|
+
DateTime => :datetime
|
81
|
+
}
|
82
|
+
|
54
83
|
# TODO fix
|
55
|
-
def self.rb_type_to_dtype(
|
56
|
-
|
84
|
+
def self.rb_type_to_dtype(data_type)
|
85
|
+
if is_polars_dtype(data_type)
|
86
|
+
return data_type.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
RB_TYPE_TO_DTYPE.fetch(data_type).to_s
|
91
|
+
rescue KeyError
|
92
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self._process_null_values(null_values)
|
97
|
+
if null_values.is_a?(Hash)
|
98
|
+
null_values.to_a
|
99
|
+
else
|
100
|
+
null_values
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
105
|
+
if !row_count_name.nil?
|
106
|
+
[row_count_name, row_count_offset]
|
107
|
+
else
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.handle_projection_columns(columns)
|
113
|
+
projection = nil
|
114
|
+
if columns
|
115
|
+
raise Todo
|
116
|
+
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
117
|
+
# columns = [columns]
|
118
|
+
# elsif is_int_sequence(columns)
|
119
|
+
# projection = columns.to_a
|
120
|
+
# columns = nil
|
121
|
+
# elsif !is_str_sequence(columns)
|
122
|
+
# raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
|
123
|
+
# end
|
124
|
+
end
|
125
|
+
[projection, columns]
|
57
126
|
end
|
58
127
|
|
59
128
|
def self.scale_bytes(sz, to:)
|
@@ -70,5 +139,9 @@ module Polars
|
|
70
139
|
sz
|
71
140
|
end
|
72
141
|
end
|
142
|
+
|
143
|
+
def self.bool?(value)
|
144
|
+
value == true || value == false
|
145
|
+
end
|
73
146
|
end
|
74
147
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/when.rb
CHANGED
data/lib/polars/when_then.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# ext
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
|
+
# stdlib
|
5
|
+
require "date"
|
6
|
+
|
4
7
|
# modules
|
8
|
+
require "polars/expr_dispatch"
|
9
|
+
require "polars/batched_csv_reader"
|
5
10
|
require "polars/cat_expr"
|
6
11
|
require "polars/data_frame"
|
7
12
|
require "polars/date_time_expr"
|
13
|
+
require "polars/exceptions"
|
8
14
|
require "polars/expr"
|
9
15
|
require "polars/functions"
|
16
|
+
require "polars/group_by"
|
10
17
|
require "polars/io"
|
11
18
|
require "polars/lazy_frame"
|
12
19
|
require "polars/lazy_functions"
|
@@ -14,6 +21,7 @@ require "polars/lazy_group_by"
|
|
14
21
|
require "polars/list_expr"
|
15
22
|
require "polars/meta_expr"
|
16
23
|
require "polars/series"
|
24
|
+
require "polars/slice"
|
17
25
|
require "polars/string_expr"
|
18
26
|
require "polars/struct_expr"
|
19
27
|
require "polars/utils"
|
@@ -22,8 +30,6 @@ require "polars/when"
|
|
22
30
|
require "polars/when_then"
|
23
31
|
|
24
32
|
module Polars
|
25
|
-
class Error < StandardError; end
|
26
|
-
|
27
33
|
extend Functions
|
28
34
|
extend IO
|
29
35
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -31,6 +31,7 @@ extensions:
|
|
31
31
|
- ext/polars/extconf.rb
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
+
- ".yardopts"
|
34
35
|
- CHANGELOG.md
|
35
36
|
- Cargo.lock
|
36
37
|
- Cargo.toml
|
@@ -38,10 +39,12 @@ files:
|
|
38
39
|
- README.md
|
39
40
|
- ext/polars/Cargo.toml
|
40
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/batched_csv.rs
|
41
43
|
- ext/polars/src/conversion.rs
|
42
44
|
- ext/polars/src/dataframe.rs
|
43
45
|
- ext/polars/src/error.rs
|
44
46
|
- ext/polars/src/file.rs
|
47
|
+
- ext/polars/src/lazy/apply.rs
|
45
48
|
- ext/polars/src/lazy/dataframe.rs
|
46
49
|
- ext/polars/src/lazy/dsl.rs
|
47
50
|
- ext/polars/src/lazy/meta.rs
|
@@ -49,13 +52,19 @@ files:
|
|
49
52
|
- ext/polars/src/lazy/utils.rs
|
50
53
|
- ext/polars/src/lib.rs
|
51
54
|
- ext/polars/src/series.rs
|
55
|
+
- ext/polars/src/set.rs
|
56
|
+
- ext/polars/src/utils.rs
|
52
57
|
- lib/polars-df.rb
|
53
58
|
- lib/polars.rb
|
59
|
+
- lib/polars/batched_csv_reader.rb
|
54
60
|
- lib/polars/cat_expr.rb
|
55
61
|
- lib/polars/data_frame.rb
|
56
62
|
- lib/polars/date_time_expr.rb
|
63
|
+
- lib/polars/exceptions.rb
|
57
64
|
- lib/polars/expr.rb
|
65
|
+
- lib/polars/expr_dispatch.rb
|
58
66
|
- lib/polars/functions.rb
|
67
|
+
- lib/polars/group_by.rb
|
59
68
|
- lib/polars/io.rb
|
60
69
|
- lib/polars/lazy_frame.rb
|
61
70
|
- lib/polars/lazy_functions.rb
|
@@ -63,6 +72,7 @@ files:
|
|
63
72
|
- lib/polars/list_expr.rb
|
64
73
|
- lib/polars/meta_expr.rb
|
65
74
|
- lib/polars/series.rb
|
75
|
+
- lib/polars/slice.rb
|
66
76
|
- lib/polars/string_expr.rb
|
67
77
|
- lib/polars/struct_expr.rb
|
68
78
|
- lib/polars/utils.rb
|