polars-df 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +139 -6
- data/ext/polars/src/dataframe.rs +360 -15
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +135 -3
- data/ext/polars/src/lazy/dsl.rs +97 -2
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +227 -12
- data/ext/polars/src/series.rs +190 -38
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +96 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2813 -100
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +631 -11
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +763 -4
- data/lib/polars/lazy_frame.rb +1415 -67
- data/lib/polars/lazy_functions.rb +430 -9
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2244 -192
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +76 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +8 -2
- metadata +12 -2
data/lib/polars/struct_expr.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for struct related expressions.
|
2
3
|
class StructExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
9
15
|
def [](item)
|
10
16
|
if item.is_a?(String)
|
11
17
|
field(item)
|
@@ -16,10 +22,77 @@ module Polars
|
|
16
22
|
end
|
17
23
|
end
|
18
24
|
|
25
|
+
# Retrieve one of the fields of this `Struct` as a new Series.
|
26
|
+
#
|
27
|
+
# @param name [String]
|
28
|
+
# Name of the field
|
29
|
+
#
|
30
|
+
# @return [Expr]
|
31
|
+
#
|
32
|
+
# @example
|
33
|
+
# df = (
|
34
|
+
# Polars::DataFrame.new(
|
35
|
+
# {
|
36
|
+
# "int" => [1, 2],
|
37
|
+
# "str" => ["a", "b"],
|
38
|
+
# "bool" => [true, nil],
|
39
|
+
# "list" => [[1, 2], [3]]
|
40
|
+
# }
|
41
|
+
# )
|
42
|
+
# .to_struct("my_struct")
|
43
|
+
# .to_frame
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("my_struct").struct.field("str"))
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ str │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ str │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ a │
|
54
|
+
# # ├╌╌╌╌╌┤
|
55
|
+
# # │ b │
|
56
|
+
# # └─────┘
|
19
57
|
def field(name)
|
20
58
|
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
59
|
end
|
22
60
|
|
61
|
+
# Rename the fields of the struct.
|
62
|
+
#
|
63
|
+
# @param names [Array]
|
64
|
+
# New names in the order of the struct's fields
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# df = (
|
70
|
+
# Polars::DataFrame.new(
|
71
|
+
# {
|
72
|
+
# "int" => [1, 2],
|
73
|
+
# "str" => ["a", "b"],
|
74
|
+
# "bool" => [true, nil],
|
75
|
+
# "list" => [[1, 2], [3]]
|
76
|
+
# }
|
77
|
+
# )
|
78
|
+
# .to_struct("my_struct")
|
79
|
+
# .to_frame
|
80
|
+
# )
|
81
|
+
# df = df.with_column(
|
82
|
+
# Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
|
83
|
+
# )
|
84
|
+
# df.select(Polars.col("my_struct").struct.field("INT"))
|
85
|
+
# # =>
|
86
|
+
# # shape: (2, 1)
|
87
|
+
# # ┌─────┐
|
88
|
+
# # │ INT │
|
89
|
+
# # │ --- │
|
90
|
+
# # │ i64 │
|
91
|
+
# # ╞═════╡
|
92
|
+
# # │ 1 │
|
93
|
+
# # ├╌╌╌╌╌┤
|
94
|
+
# # │ 2 │
|
95
|
+
# # └─────┘
|
23
96
|
def rename_fields(names)
|
24
97
|
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
98
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Polars
|
2
|
+
# @private
|
2
3
|
module Utils
|
3
4
|
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
4
5
|
|
@@ -18,6 +19,24 @@ module Polars
|
|
18
19
|
Polars.col(name)
|
19
20
|
end
|
20
21
|
|
22
|
+
def self._timedelta_to_pl_duration(td)
|
23
|
+
td
|
24
|
+
end
|
25
|
+
|
26
|
+
def self._datetime_to_pl_timestamp(dt, tu)
|
27
|
+
if tu == "ns"
|
28
|
+
(dt.to_datetime.utc.to_f * 1e9).to_i
|
29
|
+
elsif tu == "us"
|
30
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
31
|
+
elsif tu == "ms"
|
32
|
+
(dt.to_datetime.utc.to_f * 1e3).to_i
|
33
|
+
elsif tu.nil?
|
34
|
+
(dt.to_datetime.utc.to_f * 1e6).to_i
|
35
|
+
else
|
36
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
21
40
|
def self.selection_to_rbexpr_list(exprs)
|
22
41
|
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
23
42
|
exprs = [exprs]
|
@@ -48,12 +67,62 @@ module Polars
|
|
48
67
|
|
49
68
|
# TODO fix
|
50
69
|
def self.is_polars_dtype(data_type)
|
51
|
-
|
70
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
52
71
|
end
|
53
72
|
|
73
|
+
RB_TYPE_TO_DTYPE = {
|
74
|
+
Float => :f64,
|
75
|
+
Integer => :i64,
|
76
|
+
String => :str,
|
77
|
+
TrueClass => :bool,
|
78
|
+
FalseClass => :bool,
|
79
|
+
Date => :date,
|
80
|
+
DateTime => :datetime
|
81
|
+
}
|
82
|
+
|
54
83
|
# TODO fix
|
55
|
-
def self.rb_type_to_dtype(
|
56
|
-
|
84
|
+
def self.rb_type_to_dtype(data_type)
|
85
|
+
if is_polars_dtype(data_type)
|
86
|
+
return data_type.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
RB_TYPE_TO_DTYPE.fetch(data_type).to_s
|
91
|
+
rescue KeyError
|
92
|
+
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self._process_null_values(null_values)
|
97
|
+
if null_values.is_a?(Hash)
|
98
|
+
null_values.to_a
|
99
|
+
else
|
100
|
+
null_values
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
105
|
+
if !row_count_name.nil?
|
106
|
+
[row_count_name, row_count_offset]
|
107
|
+
else
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.handle_projection_columns(columns)
|
113
|
+
projection = nil
|
114
|
+
if columns
|
115
|
+
raise Todo
|
116
|
+
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
117
|
+
# columns = [columns]
|
118
|
+
# elsif is_int_sequence(columns)
|
119
|
+
# projection = columns.to_a
|
120
|
+
# columns = nil
|
121
|
+
# elsif !is_str_sequence(columns)
|
122
|
+
# raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
|
123
|
+
# end
|
124
|
+
end
|
125
|
+
[projection, columns]
|
57
126
|
end
|
58
127
|
|
59
128
|
def self.scale_bytes(sz, to:)
|
@@ -70,5 +139,9 @@ module Polars
|
|
70
139
|
sz
|
71
140
|
end
|
72
141
|
end
|
142
|
+
|
143
|
+
def self.bool?(value)
|
144
|
+
value == true || value == false
|
145
|
+
end
|
73
146
|
end
|
74
147
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars/when.rb
CHANGED
data/lib/polars/when_then.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# ext
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
|
+
# stdlib
|
5
|
+
require "date"
|
6
|
+
|
4
7
|
# modules
|
8
|
+
require "polars/expr_dispatch"
|
9
|
+
require "polars/batched_csv_reader"
|
5
10
|
require "polars/cat_expr"
|
6
11
|
require "polars/data_frame"
|
7
12
|
require "polars/date_time_expr"
|
13
|
+
require "polars/exceptions"
|
8
14
|
require "polars/expr"
|
9
15
|
require "polars/functions"
|
16
|
+
require "polars/group_by"
|
10
17
|
require "polars/io"
|
11
18
|
require "polars/lazy_frame"
|
12
19
|
require "polars/lazy_functions"
|
@@ -14,6 +21,7 @@ require "polars/lazy_group_by"
|
|
14
21
|
require "polars/list_expr"
|
15
22
|
require "polars/meta_expr"
|
16
23
|
require "polars/series"
|
24
|
+
require "polars/slice"
|
17
25
|
require "polars/string_expr"
|
18
26
|
require "polars/struct_expr"
|
19
27
|
require "polars/utils"
|
@@ -22,8 +30,6 @@ require "polars/when"
|
|
22
30
|
require "polars/when_then"
|
23
31
|
|
24
32
|
module Polars
|
25
|
-
class Error < StandardError; end
|
26
|
-
|
27
33
|
extend Functions
|
28
34
|
extend IO
|
29
35
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -31,6 +31,7 @@ extensions:
|
|
31
31
|
- ext/polars/extconf.rb
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
+
- ".yardopts"
|
34
35
|
- CHANGELOG.md
|
35
36
|
- Cargo.lock
|
36
37
|
- Cargo.toml
|
@@ -38,10 +39,12 @@ files:
|
|
38
39
|
- README.md
|
39
40
|
- ext/polars/Cargo.toml
|
40
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/batched_csv.rs
|
41
43
|
- ext/polars/src/conversion.rs
|
42
44
|
- ext/polars/src/dataframe.rs
|
43
45
|
- ext/polars/src/error.rs
|
44
46
|
- ext/polars/src/file.rs
|
47
|
+
- ext/polars/src/lazy/apply.rs
|
45
48
|
- ext/polars/src/lazy/dataframe.rs
|
46
49
|
- ext/polars/src/lazy/dsl.rs
|
47
50
|
- ext/polars/src/lazy/meta.rs
|
@@ -49,13 +52,19 @@ files:
|
|
49
52
|
- ext/polars/src/lazy/utils.rs
|
50
53
|
- ext/polars/src/lib.rs
|
51
54
|
- ext/polars/src/series.rs
|
55
|
+
- ext/polars/src/set.rs
|
56
|
+
- ext/polars/src/utils.rs
|
52
57
|
- lib/polars-df.rb
|
53
58
|
- lib/polars.rb
|
59
|
+
- lib/polars/batched_csv_reader.rb
|
54
60
|
- lib/polars/cat_expr.rb
|
55
61
|
- lib/polars/data_frame.rb
|
56
62
|
- lib/polars/date_time_expr.rb
|
63
|
+
- lib/polars/exceptions.rb
|
57
64
|
- lib/polars/expr.rb
|
65
|
+
- lib/polars/expr_dispatch.rb
|
58
66
|
- lib/polars/functions.rb
|
67
|
+
- lib/polars/group_by.rb
|
59
68
|
- lib/polars/io.rb
|
60
69
|
- lib/polars/lazy_frame.rb
|
61
70
|
- lib/polars/lazy_functions.rb
|
@@ -63,6 +72,7 @@ files:
|
|
63
72
|
- lib/polars/list_expr.rb
|
64
73
|
- lib/polars/meta_expr.rb
|
65
74
|
- lib/polars/series.rb
|
75
|
+
- lib/polars/slice.rb
|
66
76
|
- lib/polars/string_expr.rb
|
67
77
|
- lib/polars/struct_expr.rb
|
68
78
|
- lib/polars/utils.rb
|