polars-df 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +336 -42
- data/ext/polars/src/dataframe.rs +409 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +436 -10
- data/ext/polars/src/lazy/dsl.rs +1134 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +2 -0
- data/ext/polars/src/lib.rs +390 -3
- data/ext/polars/src/series.rs +175 -13
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +892 -21
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +503 -0
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +338 -6
- data/lib/polars/lazy_functions.rb +158 -11
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +1304 -14
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +60 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -1
- metadata +13 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -6,12 +6,129 @@ module Polars
|
|
6
6
|
self._rbexpr = expr._rbexpr
|
7
7
|
end
|
8
8
|
|
9
|
+
# def strptime
|
10
|
+
# end
|
11
|
+
|
9
12
|
def lengths
|
10
13
|
Utils.wrap_expr(_rbexpr.str_lengths)
|
11
14
|
end
|
12
15
|
|
16
|
+
def n_chars
|
17
|
+
Utils.wrap_expr(_rbexpr.str_n_chars)
|
18
|
+
end
|
19
|
+
|
20
|
+
def concat(delimiter = "-")
|
21
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter))
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_uppercase
|
25
|
+
Utils.wrap_expr(_rbexpr.str_to_uppercase)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_lowercase
|
29
|
+
Utils.wrap_expr(_rbexpr.str_to_lowercase)
|
30
|
+
end
|
31
|
+
|
32
|
+
def strip(matches = nil)
|
33
|
+
if !matches.nil? && matches.length > 1
|
34
|
+
raise ArgumentError, "matches should contain a single character"
|
35
|
+
end
|
36
|
+
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
37
|
+
end
|
38
|
+
|
39
|
+
def lstrip(matches = nil)
|
40
|
+
if !matches.nil? && matches.length > 1
|
41
|
+
raise ArgumentError, "matches should contain a single character"
|
42
|
+
end
|
43
|
+
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
44
|
+
end
|
45
|
+
|
46
|
+
def rstrip(matches = nil)
|
47
|
+
if !matches.nil? && matches.length > 1
|
48
|
+
raise ArgumentError, "matches should contain a single character"
|
49
|
+
end
|
50
|
+
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
51
|
+
end
|
52
|
+
|
53
|
+
def zfill(alignment)
|
54
|
+
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
55
|
+
end
|
56
|
+
|
57
|
+
def ljust(width, fillchar = " ")
|
58
|
+
Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
|
59
|
+
end
|
60
|
+
|
61
|
+
def rjust(width, fillchar = " ")
|
62
|
+
Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
|
63
|
+
end
|
64
|
+
|
13
65
|
def contains(pattern, literal: false)
|
14
66
|
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
|
15
67
|
end
|
68
|
+
|
69
|
+
def ends_with(sub)
|
70
|
+
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
|
71
|
+
end
|
72
|
+
|
73
|
+
def starts_with(sub)
|
74
|
+
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
75
|
+
end
|
76
|
+
|
77
|
+
# def json_path_match
|
78
|
+
# end
|
79
|
+
|
80
|
+
# def decode
|
81
|
+
# end
|
82
|
+
|
83
|
+
# def encode
|
84
|
+
# end
|
85
|
+
|
86
|
+
def extract(pattern, group_index: 1)
|
87
|
+
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
88
|
+
end
|
89
|
+
|
90
|
+
def extract_all(pattern)
|
91
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
|
92
|
+
end
|
93
|
+
|
94
|
+
def count_match(pattern)
|
95
|
+
Utils.wrap_expr(_rbexpr.count_match(pattern))
|
96
|
+
end
|
97
|
+
|
98
|
+
def split(by, inclusive: false)
|
99
|
+
if inclusive
|
100
|
+
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
101
|
+
else
|
102
|
+
Utils.wrap_expr(_rbexpr.str_split(by))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def split_exact(by, n, inclusive: false)
|
107
|
+
if inclusive
|
108
|
+
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
109
|
+
else
|
110
|
+
Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def splitn(by, n)
|
115
|
+
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
116
|
+
end
|
117
|
+
|
118
|
+
def replace(pattern, literal: false)
|
119
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
120
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
121
|
+
Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
|
122
|
+
end
|
123
|
+
|
124
|
+
def replace_all(pattern, literal: false)
|
125
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
126
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
127
|
+
Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
|
128
|
+
end
|
129
|
+
|
130
|
+
def slice(offset, length = nil)
|
131
|
+
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
132
|
+
end
|
16
133
|
end
|
17
134
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Polars
|
2
|
+
class StructExpr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def initialize(expr)
|
6
|
+
self._rbexpr = expr._rbexpr
|
7
|
+
end
|
8
|
+
|
9
|
+
def [](item)
|
10
|
+
if item.is_a?(String)
|
11
|
+
field(item)
|
12
|
+
elsif item.is_a?(Integer)
|
13
|
+
Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
|
14
|
+
else
|
15
|
+
raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def field(name)
|
20
|
+
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
|
+
end
|
22
|
+
|
23
|
+
def rename_fields(names)
|
24
|
+
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Polars
|
2
|
+
# @private
|
2
3
|
module Utils
|
4
|
+
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
|
+
|
3
6
|
def self.wrap_s(s)
|
4
7
|
Series._from_rbseries(s)
|
5
8
|
end
|
@@ -43,5 +46,62 @@ module Polars
|
|
43
46
|
def self.format_path(path)
|
44
47
|
File.expand_path(path)
|
45
48
|
end
|
49
|
+
|
50
|
+
# TODO fix
|
51
|
+
def self.is_polars_dtype(data_type)
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
# TODO fix
|
56
|
+
def self.rb_type_to_dtype(dtype)
|
57
|
+
dtype.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self._process_null_values(null_values)
|
61
|
+
if null_values.is_a?(Hash)
|
62
|
+
null_values.to_a
|
63
|
+
else
|
64
|
+
null_values
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
69
|
+
if !row_count_name.nil?
|
70
|
+
[row_count_name, row_count_offset]
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.handle_projection_columns(columns)
|
77
|
+
projection = nil
|
78
|
+
if columns
|
79
|
+
raise Todo
|
80
|
+
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
81
|
+
# columns = [columns]
|
82
|
+
# elsif is_int_sequence(columns)
|
83
|
+
# projection = columns.to_a
|
84
|
+
# columns = nil
|
85
|
+
# elsif !is_str_sequence(columns)
|
86
|
+
# raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
|
87
|
+
# end
|
88
|
+
end
|
89
|
+
[projection, columns]
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.scale_bytes(sz, to:)
|
93
|
+
scaling_factor = {
|
94
|
+
"b" => 1,
|
95
|
+
"k" => 1024,
|
96
|
+
"m" => 1024 ** 2,
|
97
|
+
"g" => 1024 ** 3,
|
98
|
+
"t" => 1024 ** 4,
|
99
|
+
}[to[0]]
|
100
|
+
if scaling_factor > 1
|
101
|
+
sz / scaling_factor.to_f
|
102
|
+
else
|
103
|
+
sz
|
104
|
+
end
|
105
|
+
end
|
46
106
|
end
|
47
107
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -2,23 +2,37 @@
|
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
4
|
# modules
|
5
|
+
require "polars/batched_csv_reader"
|
6
|
+
require "polars/cat_expr"
|
5
7
|
require "polars/data_frame"
|
8
|
+
require "polars/date_time_expr"
|
6
9
|
require "polars/expr"
|
7
10
|
require "polars/functions"
|
11
|
+
require "polars/io"
|
8
12
|
require "polars/lazy_frame"
|
9
13
|
require "polars/lazy_functions"
|
10
14
|
require "polars/lazy_group_by"
|
11
|
-
require "polars/
|
15
|
+
require "polars/list_expr"
|
16
|
+
require "polars/meta_expr"
|
12
17
|
require "polars/series"
|
13
18
|
require "polars/string_expr"
|
19
|
+
require "polars/struct_expr"
|
14
20
|
require "polars/utils"
|
15
21
|
require "polars/version"
|
16
22
|
require "polars/when"
|
17
23
|
require "polars/when_then"
|
18
24
|
|
19
25
|
module Polars
|
26
|
+
# @private
|
20
27
|
class Error < StandardError; end
|
21
28
|
|
29
|
+
# @private
|
30
|
+
class Todo < Error
|
31
|
+
def message
|
32
|
+
"not implemented yet"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
22
36
|
extend Functions
|
23
37
|
extend IO
|
24
38
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -32,31 +32,42 @@ extensions:
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- CHANGELOG.md
|
35
|
+
- Cargo.lock
|
36
|
+
- Cargo.toml
|
35
37
|
- LICENSE.txt
|
36
38
|
- README.md
|
37
39
|
- ext/polars/Cargo.toml
|
38
40
|
- ext/polars/extconf.rb
|
41
|
+
- ext/polars/src/batched_csv.rs
|
39
42
|
- ext/polars/src/conversion.rs
|
40
43
|
- ext/polars/src/dataframe.rs
|
41
44
|
- ext/polars/src/error.rs
|
42
45
|
- ext/polars/src/file.rs
|
46
|
+
- ext/polars/src/lazy/apply.rs
|
43
47
|
- ext/polars/src/lazy/dataframe.rs
|
44
48
|
- ext/polars/src/lazy/dsl.rs
|
49
|
+
- ext/polars/src/lazy/meta.rs
|
45
50
|
- ext/polars/src/lazy/mod.rs
|
46
51
|
- ext/polars/src/lazy/utils.rs
|
47
52
|
- ext/polars/src/lib.rs
|
48
53
|
- ext/polars/src/series.rs
|
49
54
|
- lib/polars-df.rb
|
50
55
|
- lib/polars.rb
|
56
|
+
- lib/polars/batched_csv_reader.rb
|
57
|
+
- lib/polars/cat_expr.rb
|
51
58
|
- lib/polars/data_frame.rb
|
59
|
+
- lib/polars/date_time_expr.rb
|
52
60
|
- lib/polars/expr.rb
|
53
61
|
- lib/polars/functions.rb
|
54
62
|
- lib/polars/io.rb
|
55
63
|
- lib/polars/lazy_frame.rb
|
56
64
|
- lib/polars/lazy_functions.rb
|
57
65
|
- lib/polars/lazy_group_by.rb
|
66
|
+
- lib/polars/list_expr.rb
|
67
|
+
- lib/polars/meta_expr.rb
|
58
68
|
- lib/polars/series.rb
|
59
69
|
- lib/polars/string_expr.rb
|
70
|
+
- lib/polars/struct_expr.rb
|
60
71
|
- lib/polars/utils.rb
|
61
72
|
- lib/polars/version.rb
|
62
73
|
- lib/polars/when.rb
|