polars-df 0.19.0-x64-mingw-ucrt → 0.21.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE-THIRD-PARTY.txt +1376 -2634
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
data/LICENSE.txt
CHANGED
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
data/lib/polars/3.4/polars.so
CHANGED
Binary file
|
@@ -31,56 +31,16 @@ module Polars
|
|
31
31
|
|
32
32
|
# Return whether or not the column is a local categorical.
|
33
33
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
# @example Categoricals constructed without a string cache are considered local.
|
37
|
-
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
38
|
-
# s.cat.is_local
|
39
|
-
# # => true
|
34
|
+
# Always returns false.
|
40
35
|
#
|
41
|
-
# @
|
42
|
-
# s = nil
|
43
|
-
# Polars::StringCache.new do
|
44
|
-
# s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical)
|
45
|
-
# end
|
46
|
-
# s.cat.is_local
|
47
|
-
# # => false
|
36
|
+
# @return [Boolean]
|
48
37
|
def is_local
|
49
38
|
_s.cat_is_local
|
50
39
|
end
|
51
40
|
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# This may change the underlying physical representation of the column.
|
41
|
+
# Simply returns the column as-is, local representations are deprecated.
|
55
42
|
#
|
56
43
|
# @return [Series]
|
57
|
-
#
|
58
|
-
# @example Compare the global and local representations of a categorical.
|
59
|
-
# s = nil
|
60
|
-
# Polars::StringCache.new do
|
61
|
-
# _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical)
|
62
|
-
# s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical)
|
63
|
-
# end
|
64
|
-
# s.to_physical
|
65
|
-
# # =>
|
66
|
-
# # shape: (3,)
|
67
|
-
# # Series: 'y' [u32]
|
68
|
-
# # [
|
69
|
-
# # 2
|
70
|
-
# # 1
|
71
|
-
# # 3
|
72
|
-
# # ]
|
73
|
-
#
|
74
|
-
# @example
|
75
|
-
# s.cat.to_local.to_physical
|
76
|
-
# # =>
|
77
|
-
# # shape: (3,)
|
78
|
-
# # Series: 'y' [u32]
|
79
|
-
# # [
|
80
|
-
# # 0
|
81
|
-
# # 1
|
82
|
-
# # 2
|
83
|
-
# # ]
|
84
44
|
def to_local
|
85
45
|
Utils.wrap_s(_s.cat_to_local)
|
86
46
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a catalog within a metastore.
|
5
|
+
CatalogInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:comment,
|
9
|
+
:properties,
|
10
|
+
:options,
|
11
|
+
:storage_location,
|
12
|
+
:created_at,
|
13
|
+
:created_by,
|
14
|
+
:updated_at,
|
15
|
+
:updated_by,
|
16
|
+
keyword_init: true
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a column within a catalog table.
|
5
|
+
ColumnInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:type_name,
|
9
|
+
:type_text,
|
10
|
+
:type_json,
|
11
|
+
:position,
|
12
|
+
:comment,
|
13
|
+
:partition_index,
|
14
|
+
keyword_init: true
|
15
|
+
)
|
16
|
+
|
17
|
+
class ColumnInfo
|
18
|
+
# Get the native polars datatype of this column.
|
19
|
+
#
|
20
|
+
# @note
|
21
|
+
# This functionality is considered **unstable**. It may be changed
|
22
|
+
# at any point without it being considered a breaking change.
|
23
|
+
#
|
24
|
+
# @return [Object]
|
25
|
+
def get_polars_dtype
|
26
|
+
RbCatalogClient.type_json_to_polars_type(type_json)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a namespace within a catalog.
|
5
|
+
#
|
6
|
+
# This is also known by the name "schema" in unity catalog terminology.
|
7
|
+
NamespaceInfo =
|
8
|
+
::Struct.new(
|
9
|
+
:name,
|
10
|
+
:comment,
|
11
|
+
:properties,
|
12
|
+
:storage_location,
|
13
|
+
:created_at,
|
14
|
+
:created_by,
|
15
|
+
:updated_at,
|
16
|
+
:updated_by,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Polars
|
2
|
+
class Catalog
|
3
|
+
module Unity
|
4
|
+
# Information for a catalog table.
|
5
|
+
TableInfo =
|
6
|
+
::Struct.new(
|
7
|
+
:name,
|
8
|
+
:comment,
|
9
|
+
:table_id,
|
10
|
+
:table_type,
|
11
|
+
:storage_location,
|
12
|
+
:data_source_format,
|
13
|
+
:columns,
|
14
|
+
:properties,
|
15
|
+
:created_at,
|
16
|
+
:created_by,
|
17
|
+
:updated_at,
|
18
|
+
:updated_by,
|
19
|
+
keyword_init: true
|
20
|
+
)
|
21
|
+
|
22
|
+
class TableInfo
|
23
|
+
# Get the native polars schema of this table.
|
24
|
+
#
|
25
|
+
# @note
|
26
|
+
# This functionality is considered **unstable**. It may be changed
|
27
|
+
# at any point without it being considered a breaking change.
|
28
|
+
#
|
29
|
+
# @return [Schema]
|
30
|
+
def get_polars_schema
|
31
|
+
if columns.nil?
|
32
|
+
return nil
|
33
|
+
end
|
34
|
+
|
35
|
+
schema = Schema.new(check_dtypes: false)
|
36
|
+
|
37
|
+
columns.each do |column_info|
|
38
|
+
if schema[column_info.name]
|
39
|
+
msg = "duplicate column name: #{column_info.name}"
|
40
|
+
raise DuplicateError, msg
|
41
|
+
end
|
42
|
+
schema[column_info.name] = column_info.get_polars_dtype
|
43
|
+
end
|
44
|
+
|
45
|
+
schema
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,448 @@
|
|
1
|
+
module Polars
|
2
|
+
# Unity catalog client.
|
3
|
+
class Catalog
|
4
|
+
# Initialize a catalog client.
|
5
|
+
#
|
6
|
+
# @note
|
7
|
+
# This functionality is considered **unstable**. It may be changed
|
8
|
+
# at any point without it being considered a breaking change.
|
9
|
+
#
|
10
|
+
# @param workspace_url [String]
|
11
|
+
# URL of the workspace, or alternatively the URL of the Unity catalog
|
12
|
+
# API endpoint.
|
13
|
+
# @param bearer_token [String]
|
14
|
+
# Bearer token to authenticate with. This can also be set to:
|
15
|
+
# * "auto": Automatically retrieve bearer tokens from the environment.
|
16
|
+
# * "databricks-sdk": Use the Databricks SDK to retrieve and use the
|
17
|
+
# bearer token from the environment.
|
18
|
+
# @param require_https [Boolean]
|
19
|
+
# Require the `workspace_url` to use HTTPS.
|
20
|
+
def initialize(workspace_url, bearer_token: "auto", require_https: true)
|
21
|
+
if require_https && !workspace_url.start_with?("https://")
|
22
|
+
msg = (
|
23
|
+
"a non-HTTPS workspace_url was given. To " +
|
24
|
+
"allow non-HTTPS URLs, pass require_https: false."
|
25
|
+
)
|
26
|
+
raise ArgumentError, msg
|
27
|
+
end
|
28
|
+
|
29
|
+
if bearer_token == "auto"
|
30
|
+
bearer_token = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
@client = RbCatalogClient.new(workspace_url, bearer_token)
|
34
|
+
end
|
35
|
+
|
36
|
+
# List the available catalogs.
|
37
|
+
#
|
38
|
+
# @note
|
39
|
+
# This functionality is considered **unstable**. It may be changed
|
40
|
+
# at any point without it being considered a breaking change.
|
41
|
+
#
|
42
|
+
# @return [Array]
|
43
|
+
def list_catalogs
|
44
|
+
@client.list_catalogs
|
45
|
+
end
|
46
|
+
|
47
|
+
# List the available namespaces (unity schema) under the specified catalog.
|
48
|
+
#
|
49
|
+
# @note
|
50
|
+
# This functionality is considered **unstable**. It may be changed
|
51
|
+
# at any point without it being considered a breaking change.
|
52
|
+
#
|
53
|
+
# @param catalog_name [String]
|
54
|
+
# Name of the catalog.
|
55
|
+
#
|
56
|
+
# @return [Array]
|
57
|
+
def list_namespaces(catalog_name)
|
58
|
+
@client.list_namespaces(catalog_name)
|
59
|
+
end
|
60
|
+
|
61
|
+
# List the available tables under the specified schema.
|
62
|
+
#
|
63
|
+
# @note
|
64
|
+
# This functionality is considered **unstable**. It may be changed
|
65
|
+
# at any point without it being considered a breaking change.
|
66
|
+
#
|
67
|
+
# @param catalog_name [String]
|
68
|
+
# Name of the catalog.
|
69
|
+
# @param namespace [String]
|
70
|
+
# Name of the namespace (unity schema).
|
71
|
+
#
|
72
|
+
# @return [Array]
|
73
|
+
def list_tables(catalog_name, namespace)
|
74
|
+
@client.list_tables(catalog_name, namespace)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Retrieve the metadata of the specified table.
|
78
|
+
#
|
79
|
+
# @note
|
80
|
+
# This functionality is considered **unstable**. It may be changed
|
81
|
+
# at any point without it being considered a breaking change.
|
82
|
+
#
|
83
|
+
# @param catalog_name [String]
|
84
|
+
# Name of the catalog.
|
85
|
+
# @param namespace [String]
|
86
|
+
# Name of the namespace (unity schema).
|
87
|
+
# @param table_name [String]
|
88
|
+
# Name of the table.
|
89
|
+
#
|
90
|
+
# @return [TableInfo]
|
91
|
+
def get_table_info(catalog_name, namespace, table_name)
|
92
|
+
@client.get_table_info(catalog_name, namespace, table_name)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Retrieve the metadata of the specified table.
|
96
|
+
#
|
97
|
+
# @note
|
98
|
+
# This functionality is considered **unstable**. It may be changed
|
99
|
+
# at any point without it being considered a breaking change.
|
100
|
+
#
|
101
|
+
# @param catalog_name [String]
|
102
|
+
# Name of the catalog.
|
103
|
+
# @param namespace [String]
|
104
|
+
# Name of the namespace (unity schema).
|
105
|
+
# @param table_name [String]
|
106
|
+
# Name of the table.
|
107
|
+
# @param delta_table_version [Object]
|
108
|
+
# Version of the table to scan (Deltalake only).
|
109
|
+
# @param delta_table_options [Hash]
|
110
|
+
# Additional keyword arguments while reading a Deltalake table.
|
111
|
+
# @param storage_options [Hash]
|
112
|
+
# Options that indicate how to connect to a cloud provider.
|
113
|
+
#
|
114
|
+
# The cloud providers currently supported are AWS, GCP, and Azure.
|
115
|
+
# See supported keys here:
|
116
|
+
#
|
117
|
+
# * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
|
118
|
+
# * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
|
119
|
+
# * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
|
120
|
+
# * Hugging Face (`hf://`): Accepts an API key under the `token` parameter:
|
121
|
+
# `{"token" => "..."}`, or by setting the `HF_TOKEN` environment variable.
|
122
|
+
#
|
123
|
+
# If `storage_options` is not provided, Polars will try to infer the
|
124
|
+
# information from environment variables.
|
125
|
+
#
|
126
|
+
# @return [LazyFrame]
|
127
|
+
def scan_table(
|
128
|
+
catalog_name,
|
129
|
+
namespace,
|
130
|
+
table_name,
|
131
|
+
delta_table_version: nil,
|
132
|
+
delta_table_options: nil,
|
133
|
+
storage_options: nil
|
134
|
+
)
|
135
|
+
table_info = get_table_info(catalog_name, namespace, table_name)
|
136
|
+
storage_location, data_source_format = _extract_location_and_data_format(
|
137
|
+
table_info, "scan table"
|
138
|
+
)
|
139
|
+
|
140
|
+
if ["DELTA", "DELTASHARING"].include?(data_source_format)
|
141
|
+
return Polars.scan_delta(
|
142
|
+
storage_location,
|
143
|
+
version: delta_table_version,
|
144
|
+
delta_table_options: delta_table_options,
|
145
|
+
storage_options: storage_options
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
if !delta_table_version.nil?
|
150
|
+
msg = (
|
151
|
+
"cannot apply delta_table_version for table of type " +
|
152
|
+
"#{data_source_format}"
|
153
|
+
)
|
154
|
+
raise ArgumentError, msg
|
155
|
+
end
|
156
|
+
|
157
|
+
if !delta_table_options.nil?
|
158
|
+
msg = (
|
159
|
+
"cannot apply delta_table_options for table of type " +
|
160
|
+
"#{data_source_format}"
|
161
|
+
)
|
162
|
+
raise ArgumentError, msg
|
163
|
+
end
|
164
|
+
|
165
|
+
if storage_options&.any?
|
166
|
+
storage_options = storage_options.to_a
|
167
|
+
else
|
168
|
+
# Handle empty dict input
|
169
|
+
storage_options = nil
|
170
|
+
end
|
171
|
+
|
172
|
+
raise Todo
|
173
|
+
end
|
174
|
+
|
175
|
+
# Write a DataFrame to a catalog table.
|
176
|
+
#
|
177
|
+
# @note
|
178
|
+
# This functionality is considered **unstable**. It may be changed
|
179
|
+
# at any point without it being considered a breaking change.
|
180
|
+
#
|
181
|
+
# @param df [DataFrame]
|
182
|
+
# DataFrame to write.
|
183
|
+
# @param catalog_name [String]
|
184
|
+
# Name of the catalog.
|
185
|
+
# @param namespace [String]
|
186
|
+
# Name of the namespace (unity schema).
|
187
|
+
# @param table_name [String]
|
188
|
+
# Name of the table.
|
189
|
+
# @param delta_mode ['error', 'append', 'overwrite', 'ignore', 'merge']
|
190
|
+
# (For delta tables) How to handle existing data.
|
191
|
+
#
|
192
|
+
# - If 'error', throw an error if the table already exists (default).
|
193
|
+
# - If 'append', will add new data.
|
194
|
+
# - If 'overwrite', will replace table with new data.
|
195
|
+
# - If 'ignore', will not write anything if table already exists.
|
196
|
+
# - If 'merge', return a `TableMerger` object to merge data from the DataFrame
|
197
|
+
# with the existing data.
|
198
|
+
# @param delta_write_options [Hash]
|
199
|
+
# (For delta tables) Additional keyword arguments while writing a
|
200
|
+
# Delta lake Table.
|
201
|
+
# See a list of supported write options [here](https://delta-io.github.io/delta-rs/api/delta_writer/#deltalake.write_deltalake).
|
202
|
+
# @param delta_merge_options [Hash]
|
203
|
+
# (For delta tables) Keyword arguments which are required to `MERGE` a
|
204
|
+
# Delta lake Table.
|
205
|
+
# See a list of supported merge options [here](https://delta-io.github.io/delta-rs/api/delta_table/#deltalake.DeltaTable.merge).
|
206
|
+
# @param storage_options [Hash]
|
207
|
+
# Options that indicate how to connect to a cloud provider.
|
208
|
+
#
|
209
|
+
# The cloud providers currently supported are AWS, GCP, and Azure.
|
210
|
+
# See supported keys here:
|
211
|
+
#
|
212
|
+
# * [aws](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html)
|
213
|
+
# * [gcp](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html)
|
214
|
+
# * [azure](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html)
|
215
|
+
# * Hugging Face (`hf://`): Accepts an API key under the `token` parameter:
|
216
|
+
# `{"token" => "..."}`, or by setting the `HF_TOKEN` environment variable.
|
217
|
+
#
|
218
|
+
# If `storage_options` is not provided, Polars will try to infer the
|
219
|
+
# information from environment variables.
|
220
|
+
#
|
221
|
+
# @return [Object]
|
222
|
+
def write_table(
|
223
|
+
df,
|
224
|
+
catalog_name,
|
225
|
+
namespace,
|
226
|
+
table_name,
|
227
|
+
delta_mode: "error",
|
228
|
+
delta_write_options: nil,
|
229
|
+
delta_merge_options: nil,
|
230
|
+
storage_options: nil
|
231
|
+
)
|
232
|
+
table_info = get_table_info(catalog_name, namespace, table_name)
|
233
|
+
storage_location, data_source_format = _extract_location_and_data_format(
|
234
|
+
table_info, "scan table"
|
235
|
+
)
|
236
|
+
|
237
|
+
if ["DELTA", "DELTASHARING"].include?(data_source_format)
|
238
|
+
df.write_delta(
|
239
|
+
storage_location,
|
240
|
+
storage_options: storage_options,
|
241
|
+
mode: delta_mode,
|
242
|
+
delta_write_options: delta_write_options,
|
243
|
+
delta_merge_options: delta_merge_options
|
244
|
+
)
|
245
|
+
else
|
246
|
+
msg = (
|
247
|
+
"write_table: table format of " +
|
248
|
+
"#{catalog_name}.#{namespace}.#{table_name} " +
|
249
|
+
"(#{data_source_format}) is unsupported."
|
250
|
+
)
|
251
|
+
raise NotImplementedError, msg
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# Create a catalog.
|
256
|
+
#
|
257
|
+
# @note
|
258
|
+
# This functionality is considered **unstable**. It may be changed
|
259
|
+
# at any point without it being considered a breaking change.
|
260
|
+
#
|
261
|
+
# @param catalog_name [String]
|
262
|
+
# Name of the catalog.
|
263
|
+
# @param comment [String]
|
264
|
+
# Leaves a comment about the catalog.
|
265
|
+
# @param storage_root [String]
|
266
|
+
# Base location at which to store the catalog.
|
267
|
+
#
|
268
|
+
# @return [CatalogInfo]
|
269
|
+
def create_catalog(catalog_name, comment: nil, storage_root: nil)
|
270
|
+
@client.create_catalog(catalog_name, comment, storage_root)
|
271
|
+
end
|
272
|
+
|
273
|
+
# Delete a catalog.
|
274
|
+
#
|
275
|
+
# Note that depending on the table type and catalog server, this may not
|
276
|
+
# delete the actual data files from storage. For more details, please
|
277
|
+
# consult the documentation of the catalog provider you are using.
|
278
|
+
#
|
279
|
+
# @note
|
280
|
+
# This functionality is considered **unstable**. It may be changed
|
281
|
+
# at any point without it being considered a breaking change.
|
282
|
+
#
|
283
|
+
# @param catalog_name [String]
|
284
|
+
# Name of the catalog.
|
285
|
+
# @param force [Boolean]
|
286
|
+
# Forcibly delete the catalog even if it is not empty.
|
287
|
+
#
|
288
|
+
# @return [Object]
|
289
|
+
def delete_catalog(catalog_name, force: false)
|
290
|
+
@client.delete_catalog(catalog_name, force)
|
291
|
+
end
|
292
|
+
|
293
|
+
# Create a namespace (unity schema) in the catalog.
|
294
|
+
#
|
295
|
+
# @note
|
296
|
+
# This functionality is considered **unstable**. It may be changed
|
297
|
+
# at any point without it being considered a breaking change.
|
298
|
+
#
|
299
|
+
# @param catalog_name [String]
|
300
|
+
# Name of the catalog.
|
301
|
+
# @param namespace [String]
|
302
|
+
# Name of the namespace (unity schema).
|
303
|
+
# @param comment [String]
|
304
|
+
# Leaves a comment about the table.
|
305
|
+
# @param storage_root [String]
|
306
|
+
# Base location at which to store the namespace.
|
307
|
+
#
|
308
|
+
# @return [NamespaceInfo]
|
309
|
+
def create_namespace(
|
310
|
+
catalog_name,
|
311
|
+
namespace,
|
312
|
+
comment: nil,
|
313
|
+
storage_root: nil
|
314
|
+
)
|
315
|
+
@client.create_namespace(
|
316
|
+
catalog_name,
|
317
|
+
namespace,
|
318
|
+
comment,
|
319
|
+
storage_root
|
320
|
+
)
|
321
|
+
end
|
322
|
+
|
323
|
+
# Delete a namespace (unity schema) in the catalog.
|
324
|
+
#
|
325
|
+
# Note that depending on the table type and catalog server, this may not
|
326
|
+
# delete the actual data files from storage. For more details, please
|
327
|
+
# consult the documentation of the catalog provider you are using.
|
328
|
+
#
|
329
|
+
# @note
|
330
|
+
# This functionality is considered **unstable**. It may be changed
|
331
|
+
# at any point without it being considered a breaking change.
|
332
|
+
#
|
333
|
+
# @param catalog_name [String]
|
334
|
+
# Name of the catalog.
|
335
|
+
# @param namespace [String]
|
336
|
+
# Name of the namespace (unity schema).
|
337
|
+
# @param force [Boolean]
|
338
|
+
# Forcibly delete the namespace even if it is not empty.
|
339
|
+
#
|
340
|
+
# @return [Object]
|
341
|
+
def delete_namespace(
|
342
|
+
catalog_name,
|
343
|
+
namespace,
|
344
|
+
force: false
|
345
|
+
)
|
346
|
+
@client.delete_namespace(catalog_name, namespace, force)
|
347
|
+
end
|
348
|
+
|
349
|
+
# Create a table in the catalog.
|
350
|
+
#
|
351
|
+
# @note
|
352
|
+
# This functionality is considered **unstable**. It may be changed
|
353
|
+
# at any point without it being considered a breaking change.
|
354
|
+
#
|
355
|
+
# @param catalog_name [String]
|
356
|
+
# Name of the catalog.
|
357
|
+
# @param namespace [String]
|
358
|
+
# Name of the namespace (unity schema).
|
359
|
+
# @param table_name [String]
|
360
|
+
# Name of the table.
|
361
|
+
# @param schema [Object]
|
362
|
+
# Schema of the table.
|
363
|
+
# @param table_type [Object]
|
364
|
+
# Type of the table
|
365
|
+
# @param data_source_format [Object]
|
366
|
+
# Storage format of the table.
|
367
|
+
# @param comment [String]
|
368
|
+
# Leaves a comment about the table.
|
369
|
+
# @param storage_root [String]
|
370
|
+
# Base location at which to store the table.
|
371
|
+
# @param properties [Hash]
|
372
|
+
# Extra key-value metadata to store.
|
373
|
+
#
|
374
|
+
# @return [TableInfo]
|
375
|
+
def create_table(
|
376
|
+
catalog_name,
|
377
|
+
namespace,
|
378
|
+
table_name,
|
379
|
+
schema:,
|
380
|
+
table_type:,
|
381
|
+
data_source_format: nil,
|
382
|
+
comment: nil,
|
383
|
+
storage_root: nil,
|
384
|
+
properties: nil
|
385
|
+
)
|
386
|
+
@client.create_table(
|
387
|
+
catalog_name,
|
388
|
+
namespace,
|
389
|
+
table_name,
|
390
|
+
schema,
|
391
|
+
table_type,
|
392
|
+
data_source_format,
|
393
|
+
comment,
|
394
|
+
storage_root,
|
395
|
+
(properties || {}).to_a
|
396
|
+
)
|
397
|
+
end
|
398
|
+
|
399
|
+
# Delete the table stored at this location.
|
400
|
+
#
|
401
|
+
# Note that depending on the table type and catalog server, this may not
|
402
|
+
# delete the actual data files from storage. For more details, please
|
403
|
+
# consult the documentation of the catalog provider you are using.
|
404
|
+
#
|
405
|
+
# If you would like to perform manual deletions, the storage location of
|
406
|
+
# the files can be found using `get_table_info`.
|
407
|
+
#
|
408
|
+
# @note
|
409
|
+
# This functionality is considered **unstable**. It may be changed
|
410
|
+
# at any point without it being considered a breaking change.
|
411
|
+
#
|
412
|
+
# @param catalog_name [String]
|
413
|
+
# Name of the catalog.
|
414
|
+
# @param namespace [String]
|
415
|
+
# Name of the namespace (unity schema).
|
416
|
+
# @param table_name [String]
|
417
|
+
# Name of the table.
|
418
|
+
#
|
419
|
+
# @return [Object]
|
420
|
+
def delete_table(
|
421
|
+
catalog_name,
|
422
|
+
namespace,
|
423
|
+
table_name
|
424
|
+
)
|
425
|
+
@client.delete_table(
|
426
|
+
catalog_name,
|
427
|
+
namespace,
|
428
|
+
table_name
|
429
|
+
)
|
430
|
+
end
|
431
|
+
|
432
|
+
private
|
433
|
+
|
434
|
+
def _extract_location_and_data_format(table_info, operation)
|
435
|
+
if table_info.storage_location.nil?
|
436
|
+
msg = "cannot #{operation}: no storage_location found"
|
437
|
+
raise ArgumentError, msg
|
438
|
+
end
|
439
|
+
|
440
|
+
if table_info.data_source_format.nil?
|
441
|
+
msg = "cannot #{operation}: no data_source_format found"
|
442
|
+
raise ArgumentError, msg
|
443
|
+
end
|
444
|
+
|
445
|
+
[table_info.storage_location, table_info.data_source_format]
|
446
|
+
end
|
447
|
+
end
|
448
|
+
end
|
data/lib/polars/convert.rb
CHANGED
@@ -7,6 +7,16 @@ module Polars
|
|
7
7
|
# @param data [Hash]
|
8
8
|
# Two-dimensional data represented as a hash. Hash must contain
|
9
9
|
# arrays.
|
10
|
+
# @param schema [Object]
|
11
|
+
# The DataFrame schema may be declared in several ways:
|
12
|
+
#
|
13
|
+
# * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
|
14
|
+
# * As a list of column names; in this case types are automatically inferred.
|
15
|
+
# * As a list of (name,type) pairs; this is equivalent to the dictionary form.
|
16
|
+
#
|
17
|
+
# If you supply a list of column names that does not match the names in the
|
18
|
+
# underlying data, the names given here will overwrite them. The number
|
19
|
+
# of names given in the schema should match the underlying data dimensions.
|
10
20
|
# @param columns [Array]
|
11
21
|
# Column labels to use for resulting DataFrame. If specified, overrides any
|
12
22
|
# labels already present in the data. Must match data dimensions.
|