polars-df 0.20.0-x86_64-darwin → 0.21.1-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +12 -2
@@ -31,58 +31,156 @@ module Polars
|
|
31
31
|
|
32
32
|
# Return whether or not the column is a local categorical.
|
33
33
|
#
|
34
|
+
# Always returns false.
|
35
|
+
#
|
36
|
+
# @return [Boolean]
|
37
|
+
def is_local
|
38
|
+
_s.cat_is_local
|
39
|
+
end
|
40
|
+
|
41
|
+
# Simply returns the column as-is, local representations are deprecated.
|
42
|
+
#
|
43
|
+
# @return [Series]
|
44
|
+
def to_local
|
45
|
+
Utils.wrap_s(_s.cat_to_local)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Indicate whether the Series uses lexical ordering.
|
49
|
+
#
|
50
|
+
# @note
|
51
|
+
# This functionality is considered **unstable**. It may be changed
|
52
|
+
# at any point without it being considered a breaking change.
|
53
|
+
#
|
34
54
|
# @return [Boolean]
|
35
55
|
#
|
36
|
-
# @example
|
37
|
-
# s = Polars::Series.new(["
|
38
|
-
# s.cat.
|
56
|
+
# @example
|
57
|
+
# s = Polars::Series.new(["b", "a", "b"]).cast(Polars::Categorical)
|
58
|
+
# s.cat.uses_lexical_ordering
|
39
59
|
# # => true
|
60
|
+
def uses_lexical_ordering
|
61
|
+
_s.cat_uses_lexical_ordering
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return the byte-length of the string representation of each value.
|
40
65
|
#
|
41
|
-
# @
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# #
|
48
|
-
|
49
|
-
|
66
|
+
# @return [Series]
|
67
|
+
#
|
68
|
+
# @example
|
69
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)
|
70
|
+
# s.cat.len_bytes
|
71
|
+
# # =>
|
72
|
+
# # shape: (4,)
|
73
|
+
# # Series: '' [u32]
|
74
|
+
# # [
|
75
|
+
# # 5
|
76
|
+
# # 3
|
77
|
+
# # 6
|
78
|
+
# # null
|
79
|
+
# # ]
|
80
|
+
def len_bytes
|
81
|
+
super
|
50
82
|
end
|
51
83
|
|
52
|
-
#
|
84
|
+
# Return the number of characters of the string representation of each value.
|
53
85
|
#
|
54
|
-
#
|
86
|
+
# @return [Series]
|
87
|
+
#
|
88
|
+
# @example
|
89
|
+
# s = Polars::Series.new(["Café", "345", "東京", nil], dtype: Polars::Categorical)
|
90
|
+
# s.cat.len_chars
|
91
|
+
# # =>
|
92
|
+
# # shape: (4,)
|
93
|
+
# # Series: '' [u32]
|
94
|
+
# # [
|
95
|
+
# # 4
|
96
|
+
# # 3
|
97
|
+
# # 2
|
98
|
+
# # null
|
99
|
+
# # ]
|
100
|
+
def len_chars
|
101
|
+
super
|
102
|
+
end
|
103
|
+
|
104
|
+
# Check if string representations of values start with a substring.
|
105
|
+
#
|
106
|
+
# @param prefix [String]
|
107
|
+
# Prefix substring.
|
55
108
|
#
|
56
109
|
# @return [Series]
|
57
110
|
#
|
58
|
-
# @example
|
59
|
-
# s = nil
|
60
|
-
#
|
61
|
-
# _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
|
62
|
-
# s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
|
63
|
-
# end
|
64
|
-
# s.to_physical
|
111
|
+
# @example
|
112
|
+
# s = Polars::Series.new("fruits", ["apple", "mango", nil], dtype: Polars::Categorical)
|
113
|
+
# s.cat.starts_with("app")
|
65
114
|
# # =>
|
66
115
|
# # shape: (3,)
|
67
|
-
# # Series: '
|
116
|
+
# # Series: 'fruits' [bool]
|
68
117
|
# # [
|
69
|
-
# #
|
70
|
-
# #
|
71
|
-
# #
|
118
|
+
# # true
|
119
|
+
# # false
|
120
|
+
# # null
|
72
121
|
# # ]
|
122
|
+
def starts_with(prefix)
|
123
|
+
super
|
124
|
+
end
|
125
|
+
|
126
|
+
# Check if string representations of values end with a substring.
|
127
|
+
#
|
128
|
+
# @param suffix [String]
|
129
|
+
# Suffix substring.
|
130
|
+
#
|
131
|
+
# @return [Series]
|
73
132
|
#
|
74
133
|
# @example
|
75
|
-
# s.
|
134
|
+
# s = Polars::Series.new("fruits", ["apple", "mango", nil], dtype: Polars::Categorical)
|
135
|
+
# s.cat.ends_with("go")
|
76
136
|
# # =>
|
77
137
|
# # shape: (3,)
|
78
|
-
# # Series: '
|
138
|
+
# # Series: 'fruits' [bool]
|
79
139
|
# # [
|
80
|
-
# #
|
81
|
-
# #
|
82
|
-
# #
|
140
|
+
# # false
|
141
|
+
# # true
|
142
|
+
# # null
|
83
143
|
# # ]
|
84
|
-
def
|
85
|
-
|
144
|
+
def ends_with(suffix)
|
145
|
+
super
|
146
|
+
end
|
147
|
+
|
148
|
+
# Extract a substring from the string representation of each string value.
|
149
|
+
#
|
150
|
+
# @param offset [Integer]
|
151
|
+
# Start index. Negative indexing is supported.
|
152
|
+
# @param length [Integer]
|
153
|
+
# Length of the slice. If set to `nil` (default), the slice is taken to the
|
154
|
+
# end of the string.
|
155
|
+
#
|
156
|
+
# @return [Series]
|
157
|
+
#
|
158
|
+
# @example
|
159
|
+
# s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"], dtype: Polars::Categorical)
|
160
|
+
# s.cat.slice(-3)
|
161
|
+
# # =>
|
162
|
+
# # shape: (4,)
|
163
|
+
# # Series: '' [str]
|
164
|
+
# # [
|
165
|
+
# # "ear"
|
166
|
+
# # null
|
167
|
+
# # "aya"
|
168
|
+
# # "uit"
|
169
|
+
# # ]
|
170
|
+
#
|
171
|
+
# @example Using the optional `length` parameter
|
172
|
+
# s.cat.slice(4, 3)
|
173
|
+
# # =>
|
174
|
+
# # shape: (4,)
|
175
|
+
# # Series: '' [str]
|
176
|
+
# # [
|
177
|
+
# # ""
|
178
|
+
# # null
|
179
|
+
# # "ya"
|
180
|
+
# # "onf"
|
181
|
+
# # ]
|
182
|
+
def slice(offset, length = nil)
|
183
|
+
super
|
86
184
|
end
|
87
185
|
end
|
88
186
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a catalog within a metastore.
|
5
|
+
CatalogInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:comment,
|
9
|
+
:properties,
|
10
|
+
:options,
|
11
|
+
:storage_location,
|
12
|
+
:created_at,
|
13
|
+
:created_by,
|
14
|
+
:updated_at,
|
15
|
+
:updated_by,
|
16
|
+
keyword_init: true
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a column within a catalog table.
|
5
|
+
ColumnInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:type_name,
|
9
|
+
:type_text,
|
10
|
+
:type_json,
|
11
|
+
:position,
|
12
|
+
:comment,
|
13
|
+
:partition_index,
|
14
|
+
keyword_init: true
|
15
|
+
)
|
16
|
+
|
17
|
+
class ColumnInfo
|
18
|
+
# Get the native polars datatype of this column.
|
19
|
+
#
|
20
|
+
# @note
|
21
|
+
# This functionality is considered **unstable**. It may be changed
|
22
|
+
# at any point without it being considered a breaking change.
|
23
|
+
#
|
24
|
+
# @return [Object]
|
25
|
+
def get_polars_dtype
|
26
|
+
RbCatalogClient.type_json_to_polars_type(type_json)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a namespace within a catalog.
|
5
|
+
#
|
6
|
+
# This is also known by the name "schema" in unity catalog terminology.
|
7
|
+
NamespaceInfo =
|
8
|
+
::Struct.new(
|
9
|
+
:name,
|
10
|
+
:comment,
|
11
|
+
:properties,
|
12
|
+
:storage_location,
|
13
|
+
:created_at,
|
14
|
+
:created_by,
|
15
|
+
:updated_at,
|
16
|
+
:updated_by,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a catalog table.
|
5
|
+
TableInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:comment,
|
9
|
+
:table_id,
|
10
|
+
:table_type,
|
11
|
+
:storage_location,
|
12
|
+
:data_source_format,
|
13
|
+
:columns,
|
14
|
+
:properties,
|
15
|
+
:created_at,
|
16
|
+
:created_by,
|
17
|
+
:updated_at,
|
18
|
+
:updated_by,
|
19
|
+
keyword_init: true
|
20
|
+
)
|
21
|
+
|
22
|
+
class TableInfo
|
23
|
+
# Get the native polars schema of this table.
|
24
|
+
#
|
25
|
+
# @note
|
26
|
+
# This functionality is considered **unstable**. It may be changed
|
27
|
+
# at any point without it being considered a breaking change.
|
28
|
+
#
|
29
|
+
# @return [Schema]
|
30
|
+
def get_polars_schema
|
31
|
+
if columns.nil?
|
32
|
+
return nil
|
33
|
+
end
|
34
|
+
|
35
|
+
schema = Schema.new(check_dtypes: false)
|
36
|
+
|
37
|
+
columns.each do |column_info|
|
38
|
+
if schema[column_info.name]
|
39
|
+
msg = "duplicate column name: #{column_info.name}"
|
40
|
+
raise DuplicateError, msg
|
41
|
+
end
|
42
|
+
schema[column_info.name] = column_info.get_polars_dtype
|
43
|
+
end
|
44
|
+
|
45
|
+
schema
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|