polars-df 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +336 -42
- data/ext/polars/src/dataframe.rs +409 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +436 -10
- data/ext/polars/src/lazy/dsl.rs +1134 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +2 -0
- data/ext/polars/src/lib.rs +390 -3
- data/ext/polars/src/series.rs +175 -13
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +892 -21
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +503 -0
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +338 -6
- data/lib/polars/lazy_functions.rb +158 -11
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +1304 -14
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +60 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -1
- metadata +13 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -6,12 +6,129 @@ module Polars
|
|
6
6
|
self._rbexpr = expr._rbexpr
|
7
7
|
end
|
8
8
|
|
9
|
+
# def strptime
|
10
|
+
# end
|
11
|
+
|
9
12
|
def lengths
|
10
13
|
Utils.wrap_expr(_rbexpr.str_lengths)
|
11
14
|
end
|
12
15
|
|
16
|
+
def n_chars
|
17
|
+
Utils.wrap_expr(_rbexpr.str_n_chars)
|
18
|
+
end
|
19
|
+
|
20
|
+
def concat(delimiter = "-")
|
21
|
+
Utils.wrap_expr(_rbexpr.str_concat(delimiter))
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_uppercase
|
25
|
+
Utils.wrap_expr(_rbexpr.str_to_uppercase)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_lowercase
|
29
|
+
Utils.wrap_expr(_rbexpr.str_to_lowercase)
|
30
|
+
end
|
31
|
+
|
32
|
+
def strip(matches = nil)
|
33
|
+
if !matches.nil? && matches.length > 1
|
34
|
+
raise ArgumentError, "matches should contain a single character"
|
35
|
+
end
|
36
|
+
Utils.wrap_expr(_rbexpr.str_strip(matches))
|
37
|
+
end
|
38
|
+
|
39
|
+
def lstrip(matches = nil)
|
40
|
+
if !matches.nil? && matches.length > 1
|
41
|
+
raise ArgumentError, "matches should contain a single character"
|
42
|
+
end
|
43
|
+
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
|
44
|
+
end
|
45
|
+
|
46
|
+
def rstrip(matches = nil)
|
47
|
+
if !matches.nil? && matches.length > 1
|
48
|
+
raise ArgumentError, "matches should contain a single character"
|
49
|
+
end
|
50
|
+
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
|
51
|
+
end
|
52
|
+
|
53
|
+
def zfill(alignment)
|
54
|
+
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
|
55
|
+
end
|
56
|
+
|
57
|
+
def ljust(width, fillchar = " ")
|
58
|
+
Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
|
59
|
+
end
|
60
|
+
|
61
|
+
def rjust(width, fillchar = " ")
|
62
|
+
Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
|
63
|
+
end
|
64
|
+
|
13
65
|
def contains(pattern, literal: false)
|
14
66
|
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
|
15
67
|
end
|
68
|
+
|
69
|
+
def ends_with(sub)
|
70
|
+
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
|
71
|
+
end
|
72
|
+
|
73
|
+
def starts_with(sub)
|
74
|
+
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
75
|
+
end
|
76
|
+
|
77
|
+
# def json_path_match
|
78
|
+
# end
|
79
|
+
|
80
|
+
# def decode
|
81
|
+
# end
|
82
|
+
|
83
|
+
# def encode
|
84
|
+
# end
|
85
|
+
|
86
|
+
def extract(pattern, group_index: 1)
|
87
|
+
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
|
88
|
+
end
|
89
|
+
|
90
|
+
def extract_all(pattern)
|
91
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
|
92
|
+
end
|
93
|
+
|
94
|
+
def count_match(pattern)
|
95
|
+
Utils.wrap_expr(_rbexpr.count_match(pattern))
|
96
|
+
end
|
97
|
+
|
98
|
+
def split(by, inclusive: false)
|
99
|
+
if inclusive
|
100
|
+
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
|
101
|
+
else
|
102
|
+
Utils.wrap_expr(_rbexpr.str_split(by))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def split_exact(by, n, inclusive: false)
|
107
|
+
if inclusive
|
108
|
+
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
|
109
|
+
else
|
110
|
+
Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def splitn(by, n)
|
115
|
+
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
|
116
|
+
end
|
117
|
+
|
118
|
+
def replace(pattern, literal: false)
|
119
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
120
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
121
|
+
Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
|
122
|
+
end
|
123
|
+
|
124
|
+
def replace_all(pattern, literal: false)
|
125
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
126
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
127
|
+
Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
|
128
|
+
end
|
129
|
+
|
130
|
+
def slice(offset, length = nil)
|
131
|
+
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
132
|
+
end
|
16
133
|
end
|
17
134
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Polars
|
2
|
+
class StructExpr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def initialize(expr)
|
6
|
+
self._rbexpr = expr._rbexpr
|
7
|
+
end
|
8
|
+
|
9
|
+
def [](item)
|
10
|
+
if item.is_a?(String)
|
11
|
+
field(item)
|
12
|
+
elsif item.is_a?(Integer)
|
13
|
+
Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
|
14
|
+
else
|
15
|
+
raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def field(name)
|
20
|
+
Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
|
21
|
+
end
|
22
|
+
|
23
|
+
def rename_fields(names)
|
24
|
+
Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/polars/utils.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Polars
|
2
|
+
# @private
|
2
3
|
module Utils
|
4
|
+
DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
|
5
|
+
|
3
6
|
def self.wrap_s(s)
|
4
7
|
Series._from_rbseries(s)
|
5
8
|
end
|
@@ -43,5 +46,62 @@ module Polars
|
|
43
46
|
def self.format_path(path)
|
44
47
|
File.expand_path(path)
|
45
48
|
end
|
49
|
+
|
50
|
+
# TODO fix
|
51
|
+
def self.is_polars_dtype(data_type)
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
# TODO fix
|
56
|
+
def self.rb_type_to_dtype(dtype)
|
57
|
+
dtype.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self._process_null_values(null_values)
|
61
|
+
if null_values.is_a?(Hash)
|
62
|
+
null_values.to_a
|
63
|
+
else
|
64
|
+
null_values
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
|
69
|
+
if !row_count_name.nil?
|
70
|
+
[row_count_name, row_count_offset]
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.handle_projection_columns(columns)
|
77
|
+
projection = nil
|
78
|
+
if columns
|
79
|
+
raise Todo
|
80
|
+
# if columns.is_a?(String) || columns.is_a?(Symbol)
|
81
|
+
# columns = [columns]
|
82
|
+
# elsif is_int_sequence(columns)
|
83
|
+
# projection = columns.to_a
|
84
|
+
# columns = nil
|
85
|
+
# elsif !is_str_sequence(columns)
|
86
|
+
# raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
|
87
|
+
# end
|
88
|
+
end
|
89
|
+
[projection, columns]
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.scale_bytes(sz, to:)
|
93
|
+
scaling_factor = {
|
94
|
+
"b" => 1,
|
95
|
+
"k" => 1024,
|
96
|
+
"m" => 1024 ** 2,
|
97
|
+
"g" => 1024 ** 3,
|
98
|
+
"t" => 1024 ** 4,
|
99
|
+
}[to[0]]
|
100
|
+
if scaling_factor > 1
|
101
|
+
sz / scaling_factor.to_f
|
102
|
+
else
|
103
|
+
sz
|
104
|
+
end
|
105
|
+
end
|
46
106
|
end
|
47
107
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -2,23 +2,37 @@
|
|
2
2
|
require "polars/polars"
|
3
3
|
|
4
4
|
# modules
|
5
|
+
require "polars/batched_csv_reader"
|
6
|
+
require "polars/cat_expr"
|
5
7
|
require "polars/data_frame"
|
8
|
+
require "polars/date_time_expr"
|
6
9
|
require "polars/expr"
|
7
10
|
require "polars/functions"
|
11
|
+
require "polars/io"
|
8
12
|
require "polars/lazy_frame"
|
9
13
|
require "polars/lazy_functions"
|
10
14
|
require "polars/lazy_group_by"
|
11
|
-
require "polars/
|
15
|
+
require "polars/list_expr"
|
16
|
+
require "polars/meta_expr"
|
12
17
|
require "polars/series"
|
13
18
|
require "polars/string_expr"
|
19
|
+
require "polars/struct_expr"
|
14
20
|
require "polars/utils"
|
15
21
|
require "polars/version"
|
16
22
|
require "polars/when"
|
17
23
|
require "polars/when_then"
|
18
24
|
|
19
25
|
module Polars
|
26
|
+
# @private
|
20
27
|
class Error < StandardError; end
|
21
28
|
|
29
|
+
# @private
|
30
|
+
class Todo < Error
|
31
|
+
def message
|
32
|
+
"not implemented yet"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
22
36
|
extend Functions
|
23
37
|
extend IO
|
24
38
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -32,31 +32,42 @@ extensions:
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- CHANGELOG.md
|
35
|
+
- Cargo.lock
|
36
|
+
- Cargo.toml
|
35
37
|
- LICENSE.txt
|
36
38
|
- README.md
|
37
39
|
- ext/polars/Cargo.toml
|
38
40
|
- ext/polars/extconf.rb
|
41
|
+
- ext/polars/src/batched_csv.rs
|
39
42
|
- ext/polars/src/conversion.rs
|
40
43
|
- ext/polars/src/dataframe.rs
|
41
44
|
- ext/polars/src/error.rs
|
42
45
|
- ext/polars/src/file.rs
|
46
|
+
- ext/polars/src/lazy/apply.rs
|
43
47
|
- ext/polars/src/lazy/dataframe.rs
|
44
48
|
- ext/polars/src/lazy/dsl.rs
|
49
|
+
- ext/polars/src/lazy/meta.rs
|
45
50
|
- ext/polars/src/lazy/mod.rs
|
46
51
|
- ext/polars/src/lazy/utils.rs
|
47
52
|
- ext/polars/src/lib.rs
|
48
53
|
- ext/polars/src/series.rs
|
49
54
|
- lib/polars-df.rb
|
50
55
|
- lib/polars.rb
|
56
|
+
- lib/polars/batched_csv_reader.rb
|
57
|
+
- lib/polars/cat_expr.rb
|
51
58
|
- lib/polars/data_frame.rb
|
59
|
+
- lib/polars/date_time_expr.rb
|
52
60
|
- lib/polars/expr.rb
|
53
61
|
- lib/polars/functions.rb
|
54
62
|
- lib/polars/io.rb
|
55
63
|
- lib/polars/lazy_frame.rb
|
56
64
|
- lib/polars/lazy_functions.rb
|
57
65
|
- lib/polars/lazy_group_by.rb
|
66
|
+
- lib/polars/list_expr.rb
|
67
|
+
- lib/polars/meta_expr.rb
|
58
68
|
- lib/polars/series.rb
|
59
69
|
- lib/polars/string_expr.rb
|
70
|
+
- lib/polars/struct_expr.rb
|
60
71
|
- lib/polars/utils.rb
|
61
72
|
- lib/polars/version.rb
|
62
73
|
- lib/polars/when.rb
|