iflow-mcp_alaturqua-mcp-trino-python 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_alaturqua_mcp_trino_python-0.7.1.dist-info/METADATA +446 -0
- iflow_mcp_alaturqua_mcp_trino_python-0.7.1.dist-info/RECORD +9 -0
- iflow_mcp_alaturqua_mcp_trino_python-0.7.1.dist-info/WHEEL +4 -0
- iflow_mcp_alaturqua_mcp_trino_python-0.7.1.dist-info/entry_points.txt +2 -0
- iflow_mcp_alaturqua_mcp_trino_python-0.7.1.dist-info/licenses/LICENSE +201 -0
- src/__init__.py +0 -0
- src/config.py +39 -0
- src/server.py +617 -0
- src/trino_client.py +596 -0
src/trino_client.py
ADDED
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
"""Client for interacting with Trino server.
|
|
2
|
+
|
|
3
|
+
This module provides a client for executing queries and managing operations on Trino,
|
|
4
|
+
including specific support for Iceberg table operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
import trino
|
|
10
|
+
|
|
11
|
+
from config import TrinoConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TrinoError(Exception):
|
|
15
|
+
"""Base class for Trino-related errors."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, message: str):
|
|
18
|
+
"""Initialize with error message."""
|
|
19
|
+
self.message = message
|
|
20
|
+
super().__init__(self.message)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CatalogSchemaError(TrinoError):
|
|
24
|
+
"""Error raised when catalog or schema information is missing."""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
super().__init__("Both catalog and schema must be specified")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TrinoClient:
|
|
31
|
+
"""A client for interacting with Trino server.
|
|
32
|
+
|
|
33
|
+
This class provides methods to execute queries and perform administrative operations
|
|
34
|
+
on a Trino server, with special support for Iceberg table operations.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
config (TrinoConfig): Configuration object containing Trino connection settings.
|
|
38
|
+
client (trino.dbapi.Connection): Active connection to the Trino server.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, config: TrinoConfig):
|
|
42
|
+
"""Initialize the Trino client.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
config (TrinoConfig): Configuration object containing Trino connection settings.
|
|
46
|
+
"""
|
|
47
|
+
self.config = config
|
|
48
|
+
self.client = self._create_client()
|
|
49
|
+
|
|
50
|
+
def _create_client(self) -> trino.dbapi.Connection:
|
|
51
|
+
"""Create a new Trino DB API connection.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
trino.dbapi.Connection: A new connection to the Trino server.
|
|
55
|
+
"""
|
|
56
|
+
return trino.dbapi.connect(
|
|
57
|
+
host=self.config.host,
|
|
58
|
+
port=self.config.port,
|
|
59
|
+
user=self.config.user,
|
|
60
|
+
catalog=self.config.catalog,
|
|
61
|
+
schema=self.config.schema,
|
|
62
|
+
http_scheme=self.config.http_scheme,
|
|
63
|
+
auth=self.config.auth,
|
|
64
|
+
source=self.config.source,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def execute_query(self, query: str) -> str:
|
|
68
|
+
"""Execute a SQL query against Trino and return results as a formatted string.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
query (str): The SQL query to execute.
|
|
72
|
+
params (Optional[dict]): Dictionary of query parameters with primitive types.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
str: JSON-formatted string containing query results or success message.
|
|
76
|
+
"""
|
|
77
|
+
cur: trino.dbapi.Cursor = self.client.cursor()
|
|
78
|
+
cur.execute(query)
|
|
79
|
+
if cur.description:
|
|
80
|
+
return json.dumps(
|
|
81
|
+
[dict(zip([col[0] for col in cur.description], row, strict=True)) for row in cur.fetchall()],
|
|
82
|
+
default=str,
|
|
83
|
+
)
|
|
84
|
+
return "Query executed successfully (no results to display)"
|
|
85
|
+
|
|
86
|
+
def get_query_history(self, limit: int) -> str:
|
|
87
|
+
"""Retrieve the history of executed queries.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
limit (Optional[int]): Maximum number of queries to return. If None, returns all queries.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
str: JSON-formatted string containing query history.
|
|
94
|
+
"""
|
|
95
|
+
query = "SELECT * FROM system.runtime.queries"
|
|
96
|
+
if limit is not None:
|
|
97
|
+
query += f" LIMIT {limit}"
|
|
98
|
+
return self.execute_query(query)
|
|
99
|
+
|
|
100
|
+
def list_catalogs(self) -> str:
|
|
101
|
+
"""List all available catalogs.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
str: Newline-separated list of catalog names.
|
|
105
|
+
"""
|
|
106
|
+
catalogs = [row["Catalog"] for row in json.loads(self.execute_query("SHOW CATALOGS"))]
|
|
107
|
+
return "\n".join(catalogs)
|
|
108
|
+
|
|
109
|
+
def list_schemas(self, catalog: str) -> str:
|
|
110
|
+
"""List all schemas in a catalog.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
catalog: The catalog name. If None, uses configured default.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Newline-separated list of schema names.
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
CatalogSchemaError: If no catalog is specified and none is configured.
|
|
120
|
+
"""
|
|
121
|
+
catalog = catalog or self.config.catalog
|
|
122
|
+
if not catalog:
|
|
123
|
+
msg = "Catalog must be specified"
|
|
124
|
+
raise CatalogSchemaError(msg)
|
|
125
|
+
query = f"SHOW SCHEMAS FROM {catalog}"
|
|
126
|
+
schemas = [row["Schema"] for row in json.loads(self.execute_query(query))]
|
|
127
|
+
return "\n".join(schemas)
|
|
128
|
+
|
|
129
|
+
def list_tables(self, catalog: str, schema: str) -> str:
|
|
130
|
+
"""List all tables in a schema.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
catalog: The catalog name. If None, uses configured default.
|
|
134
|
+
schema: The schema name. If None, uses configured default.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Newline-separated list of table names.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
141
|
+
"""
|
|
142
|
+
catalog = catalog or self.config.catalog
|
|
143
|
+
schema = schema or self.config.schema
|
|
144
|
+
if not catalog or not schema:
|
|
145
|
+
msg = "Both catalog and schema must be specified"
|
|
146
|
+
raise CatalogSchemaError(msg)
|
|
147
|
+
query = f"SHOW TABLES FROM {catalog}.{schema}"
|
|
148
|
+
tables = [row["Table"] for row in json.loads(self.execute_query(query))]
|
|
149
|
+
return "\n".join(tables)
|
|
150
|
+
|
|
151
|
+
def describe_table(self, catalog: str, schema: str, table: str) -> str:
|
|
152
|
+
"""Describe the structure of a table.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
catalog (str): The catalog name. If None, uses configured default.
|
|
156
|
+
schema (str): The schema name. If None, uses configured default.
|
|
157
|
+
table (str): The name of the table.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
str: JSON-formatted string containing table description.
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
164
|
+
"""
|
|
165
|
+
catalog = catalog or self.config.catalog
|
|
166
|
+
schema = schema or self.config.schema
|
|
167
|
+
if not catalog or not schema:
|
|
168
|
+
raise CatalogSchemaError
|
|
169
|
+
query = f"DESCRIBE {catalog}.{schema}.{table}"
|
|
170
|
+
return self.execute_query(query)
|
|
171
|
+
|
|
172
|
+
def show_create_table(self, catalog: str, schema: str, table: str) -> str:
|
|
173
|
+
"""Show the CREATE TABLE statement for a table.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
schema (str): The schema name. If None, uses configured default.
|
|
177
|
+
catalog (str): The catalog name. If None, uses configured default.
|
|
178
|
+
table (str): The name of the table.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
str: The CREATE TABLE statement for the specified table.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
185
|
+
"""
|
|
186
|
+
catalog = catalog or self.config.catalog
|
|
187
|
+
schema = schema or self.config.schema
|
|
188
|
+
if not catalog or not schema:
|
|
189
|
+
raise CatalogSchemaError
|
|
190
|
+
query = f"SHOW CREATE TABLE {catalog}.{schema}.{table}"
|
|
191
|
+
result = json.loads(self.execute_query(query))
|
|
192
|
+
return result[0]["Create Table"] if result else ""
|
|
193
|
+
|
|
194
|
+
def show_create_view(
|
|
195
|
+
self,
|
|
196
|
+
catalog: str,
|
|
197
|
+
schema: str,
|
|
198
|
+
view: str,
|
|
199
|
+
) -> str:
|
|
200
|
+
"""Show the CREATE VIEW statement for a view.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
catalog (str): The catalog name. If None, uses configured default.
|
|
204
|
+
schema (str): The schema name. If None, uses configured default.
|
|
205
|
+
view (str): The name of the view.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
str: The CREATE VIEW statement for the specified view.
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
212
|
+
"""
|
|
213
|
+
catalog = catalog or self.config.catalog
|
|
214
|
+
schema = schema or self.config.schema
|
|
215
|
+
if not catalog or not schema:
|
|
216
|
+
raise CatalogSchemaError
|
|
217
|
+
query = f"SHOW CREATE VIEW {catalog}.{schema}.{view}"
|
|
218
|
+
result = json.loads(self.execute_query(query))
|
|
219
|
+
return result[0]["Create View"] if result else ""
|
|
220
|
+
|
|
221
|
+
def show_stats(self, catalog: str, schema: str, table: str) -> str:
|
|
222
|
+
"""Show statistics for a table.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
catalog (str): The catalog name. If None, uses configured default.
|
|
226
|
+
schema (str): The schema name. If None, uses configured default.
|
|
227
|
+
table (str): The name of the table.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
str: JSON-formatted string containing table statistics.
|
|
231
|
+
|
|
232
|
+
Raises:
|
|
233
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
234
|
+
"""
|
|
235
|
+
catalog = catalog or self.config.catalog
|
|
236
|
+
schema = schema or self.config.schema
|
|
237
|
+
if not catalog or not schema:
|
|
238
|
+
raise CatalogSchemaError
|
|
239
|
+
query = f"SHOW STATS FOR {catalog}.{schema}.{table}"
|
|
240
|
+
return self.execute_query(query)
|
|
241
|
+
|
|
242
|
+
def optimize(self, catalog: str, schema: str, table: str) -> str:
|
|
243
|
+
"""Optimize an Iceberg table by compacting small files.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
catalog (str): The catalog name. If None, uses configured default.
|
|
247
|
+
schema (str): The schema name. If None, uses configured default.
|
|
248
|
+
table (str): The name of the table to optimize.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
str: Success message indicating the table was optimized.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
255
|
+
"""
|
|
256
|
+
catalog = catalog or self.config.catalog
|
|
257
|
+
schema = schema or self.config.schema
|
|
258
|
+
if not catalog or not schema:
|
|
259
|
+
raise CatalogSchemaError
|
|
260
|
+
query = f"ALTER TABLE {catalog}.{schema}.{table} EXECUTE optimize"
|
|
261
|
+
self.execute_query(query)
|
|
262
|
+
return f"Table {catalog}.{schema}.{table} optimized successfully"
|
|
263
|
+
|
|
264
|
+
def optimize_manifests(self, table: str, catalog: str, schema: str) -> str:
|
|
265
|
+
"""Optimize manifest files for an Iceberg table.
|
|
266
|
+
|
|
267
|
+
This operation reorganizes and compacts the table's manifest files for improved
|
|
268
|
+
performance.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
table (str): The name of the table.
|
|
272
|
+
catalog (Optional[str]): The catalog name. If None, uses configured default.
|
|
273
|
+
schema (Optional[str]): The schema name. If None, uses configured default.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
str: Success message indicating the manifests were optimized.
|
|
277
|
+
|
|
278
|
+
Raises:
|
|
279
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
280
|
+
"""
|
|
281
|
+
catalog = catalog or self.config.catalog
|
|
282
|
+
schema = schema or self.config.schema
|
|
283
|
+
if not catalog or not schema:
|
|
284
|
+
raise CatalogSchemaError
|
|
285
|
+
query = f"ALTER TABLE {catalog}.{schema}.{table} EXECUTE optimize_manifests"
|
|
286
|
+
self.execute_query(query)
|
|
287
|
+
return f"Manifests for table {catalog}.{schema}.{table} optimized successfully"
|
|
288
|
+
|
|
289
|
+
def expire_snapshots(
|
|
290
|
+
self,
|
|
291
|
+
catalog: str,
|
|
292
|
+
table: str,
|
|
293
|
+
schema: str,
|
|
294
|
+
retention_threshold: str = "7d",
|
|
295
|
+
) -> str:
|
|
296
|
+
"""Remove old snapshots from an Iceberg table.
|
|
297
|
+
|
|
298
|
+
This operation removes snapshots older than the specified retention threshold,
|
|
299
|
+
helping to manage storage and improve performance.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
table: The name of the table.
|
|
303
|
+
retention_threshold: Age threshold for snapshot removal (e.g., "7d").
|
|
304
|
+
catalog: The catalog name. If None, uses configured default.
|
|
305
|
+
schema: The schema name. If None, uses configured default.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Success message indicating snapshots were expired.
|
|
309
|
+
|
|
310
|
+
Raises:
|
|
311
|
+
CatalogSchemaError: If either catalog or schema is not specified and not configured.
|
|
312
|
+
"""
|
|
313
|
+
catalog = catalog or self.config.catalog
|
|
314
|
+
schema = schema or self.config.schema
|
|
315
|
+
if not catalog or not schema:
|
|
316
|
+
msg = "Both catalog and schema must be specified"
|
|
317
|
+
raise CatalogSchemaError(msg)
|
|
318
|
+
query = (
|
|
319
|
+
f"ALTER TABLE {catalog}.{schema}.{table} "
|
|
320
|
+
f"EXECUTE expire_snapshots(retention_threshold => '{retention_threshold}')"
|
|
321
|
+
)
|
|
322
|
+
self.execute_query(query)
|
|
323
|
+
return f"Snapshots older than {retention_threshold} expired for table {catalog}.{schema}.{table}"
|
|
324
|
+
|
|
325
|
+
def show_catalog_tree(self) -> str:
|
|
326
|
+
"""Show a hierarchical tree view of all catalogs, schemas, and tables.
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
A formatted string showing the catalog > schema > table hierarchy.
|
|
330
|
+
"""
|
|
331
|
+
tree = []
|
|
332
|
+
catalogs = [row["Catalog"] for row in json.loads(self.execute_query("SHOW CATALOGS"))]
|
|
333
|
+
for catalog in sorted(catalogs):
|
|
334
|
+
tree.append(f"{catalog}")
|
|
335
|
+
try:
|
|
336
|
+
schemas = [row["Schema"] for row in json.loads(self.execute_query(f"SHOW SCHEMAS FROM {catalog}"))]
|
|
337
|
+
for schema in sorted(schemas):
|
|
338
|
+
tree.append(f"{schema}")
|
|
339
|
+
try:
|
|
340
|
+
tables = [
|
|
341
|
+
row["Table"]
|
|
342
|
+
for row in json.loads(self.execute_query(f"SHOW TABLES FROM {catalog}.{schema}"))
|
|
343
|
+
]
|
|
344
|
+
tree.extend(f" {table}" for table in sorted(tables))
|
|
345
|
+
except (trino.dbapi.TrinoQueryError, KeyError):
|
|
346
|
+
tree.append(" Unable to list tables")
|
|
347
|
+
except (trino.dbapi.TrinoQueryError, KeyError):
|
|
348
|
+
tree.append("Unable to list schemas")
|
|
349
|
+
return "\n".join(tree) if tree else "No catalogs found"
|
|
350
|
+
|
|
351
|
+
def show_table_properties(self, table: str, catalog: str, schema: str) -> str:
|
|
352
|
+
"""Show Iceberg table properties.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
table: The name of the table
|
|
356
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
357
|
+
schema: Optional schema name (defaults to configured schema)
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
str: JSON-formatted string containing table properties
|
|
361
|
+
"""
|
|
362
|
+
catalog = catalog or self.config.catalog
|
|
363
|
+
schema = schema or self.config.schema
|
|
364
|
+
if not catalog or not schema:
|
|
365
|
+
raise CatalogSchemaError
|
|
366
|
+
query = 'SELECT * FROM "{}$properties"'
|
|
367
|
+
table_identifier = f"{catalog}.{schema}.{table}"
|
|
368
|
+
return self.execute_query(query.format(table_identifier))
|
|
369
|
+
|
|
370
|
+
def show_table_history(self, table: str, catalog: str, schema: str) -> str:
|
|
371
|
+
"""Show Iceberg table history/changelog.
|
|
372
|
+
|
|
373
|
+
The history contains:
|
|
374
|
+
- made_current_at: TIMESTAMP(3) WITH TIME ZONE - Time when snapshot became active
|
|
375
|
+
- snapshot_id: BIGINT - Identifier of the snapshot
|
|
376
|
+
- parent_id: BIGINT - Identifier of the parent snapshot
|
|
377
|
+
- is_current_ancestor: BOOLEAN - Whether this snapshot is an ancestor of current
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
table: The name of the table
|
|
381
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
382
|
+
schema: Optional schema name (defaults to configured schema)
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
str: JSON-formatted string containing table history
|
|
386
|
+
"""
|
|
387
|
+
catalog = catalog or self.config.catalog
|
|
388
|
+
schema = schema or self.config.schema
|
|
389
|
+
if not catalog or not schema:
|
|
390
|
+
raise CatalogSchemaError
|
|
391
|
+
table_identifier = f"{catalog}.{schema}.{table}"
|
|
392
|
+
query = 'SELECT * FROM "{}$history"'
|
|
393
|
+
return self.execute_query(query.format(table_identifier))
|
|
394
|
+
|
|
395
|
+
def show_metadata_log_entries(self, table: str, catalog: str, schema: str) -> str:
|
|
396
|
+
"""Show Iceberg table metadata log entries.
|
|
397
|
+
|
|
398
|
+
The metadata log contains:
|
|
399
|
+
- timestamp: TIMESTAMP(3) WITH TIME ZONE - Time when metadata was created
|
|
400
|
+
- file: VARCHAR - Location of the metadata file
|
|
401
|
+
- latest_snapshot_id: BIGINT - ID of latest snapshot when metadata was updated
|
|
402
|
+
- latest_schema_id: INTEGER - ID of latest schema when metadata was updated
|
|
403
|
+
- latest_sequence_number: BIGINT - Data sequence number of metadata file
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
table: The name of the table
|
|
407
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
408
|
+
schema: Optional schema name (defaults to configured schema)
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
str: JSON-formatted string containing metadata log entries
|
|
412
|
+
"""
|
|
413
|
+
catalog = catalog or self.config.catalog
|
|
414
|
+
schema = schema or self.config.schema
|
|
415
|
+
if not catalog or not schema:
|
|
416
|
+
raise CatalogSchemaError
|
|
417
|
+
query = 'SELECT * FROM "{}$metadata_log_entries"'
|
|
418
|
+
table_identifier = f"{catalog}.{schema}.{table}"
|
|
419
|
+
return self.execute_query(query.format(table_identifier))
|
|
420
|
+
|
|
421
|
+
def show_snapshots(self, table: str, catalog: str, schema: str) -> str:
|
|
422
|
+
"""Show Iceberg table snapshots.
|
|
423
|
+
|
|
424
|
+
The snapshots table contains:
|
|
425
|
+
- committed_at: TIMESTAMP(3) WITH TIME ZONE - Time when snapshot became active
|
|
426
|
+
- snapshot_id: BIGINT - Identifier for the snapshot
|
|
427
|
+
- parent_id: BIGINT - Identifier for the parent snapshot
|
|
428
|
+
- operation: VARCHAR - Type of operation (append/replace/overwrite/delete)
|
|
429
|
+
- manifest_list: VARCHAR - List of Avro manifest files
|
|
430
|
+
- summary: map(VARCHAR, VARCHAR) - Summary of changes from previous snapshot
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
table: The name of the table
|
|
434
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
435
|
+
schema: Optional schema name (defaults to configured schema)
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
str: JSON-formatted string containing table snapshots
|
|
439
|
+
"""
|
|
440
|
+
catalog = catalog or self.config.catalog
|
|
441
|
+
schema = schema or self.config.schema
|
|
442
|
+
if not catalog or not schema:
|
|
443
|
+
raise CatalogSchemaError
|
|
444
|
+
table_identifier = f"{catalog}.{schema}.{table}$snapshots"
|
|
445
|
+
query = 'SELECT * FROM "{}"'
|
|
446
|
+
return self.execute_query(query.format(table_identifier))
|
|
447
|
+
|
|
448
|
+
def show_manifests(self, table: str, catalog: str, schema: str, all_snapshots: bool = False) -> str:
|
|
449
|
+
"""Show Iceberg table manifests for current or all snapshots.
|
|
450
|
+
|
|
451
|
+
The manifests table contains:
|
|
452
|
+
- path: VARCHAR - Manifest file location
|
|
453
|
+
- length: BIGINT - Manifest file length
|
|
454
|
+
- partition_spec_id: INTEGER - ID of partition spec used
|
|
455
|
+
- added_snapshot_id: BIGINT - ID of snapshot when manifest was added
|
|
456
|
+
- added_data_files_count: INTEGER - Number of data files with status ADDED
|
|
457
|
+
- added_rows_count: BIGINT - Total rows in ADDED files
|
|
458
|
+
- existing_data_files_count: INTEGER - Number of EXISTING files
|
|
459
|
+
- existing_rows_count: BIGINT - Total rows in EXISTING files
|
|
460
|
+
- deleted_data_files_count: INTEGER - Number of DELETED files
|
|
461
|
+
- deleted_rows_count: BIGINT - Total rows in DELETED files
|
|
462
|
+
- partition_summaries: ARRAY(ROW(...)) - Partition range metadata
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
table: The name of the table
|
|
466
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
467
|
+
schema: Optional schema name (defaults to configured schema)
|
|
468
|
+
all_snapshots: If True, show manifests from all snapshots
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
str: JSON-formatted string containing table manifests
|
|
472
|
+
"""
|
|
473
|
+
catalog = catalog or self.config.catalog
|
|
474
|
+
schema = schema or self.config.schema
|
|
475
|
+
if not catalog or not schema:
|
|
476
|
+
raise CatalogSchemaError
|
|
477
|
+
table_type = "all_manifests" if all_snapshots else "manifests"
|
|
478
|
+
query = 'SELECT * FROM "{}${}"'
|
|
479
|
+
table_identifier = f"{catalog}.{schema}.{table}"
|
|
480
|
+
return self.execute_query(query.format(table_identifier, table_type))
|
|
481
|
+
|
|
482
|
+
def show_partitions(self, table: str, catalog: str, schema: str) -> str:
|
|
483
|
+
"""Show Iceberg table partitions.
|
|
484
|
+
|
|
485
|
+
The partitions table contains:
|
|
486
|
+
- partition: ROW(...) - Mapping of partition column names to values
|
|
487
|
+
- record_count: BIGINT - Number of records in partition
|
|
488
|
+
- file_count: BIGINT - Number of files in partition
|
|
489
|
+
- total_size: BIGINT - Total size of files in partition
|
|
490
|
+
- data: ROW(...) - Partition range metadata with min/max values and null/nan counts
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
table: The name of the table
|
|
494
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
495
|
+
schema: Optional schema name (defaults to configured schema)
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
str: JSON-formatted string containing table partitions
|
|
499
|
+
"""
|
|
500
|
+
catalog = catalog or self.config.catalog
|
|
501
|
+
schema = schema or self.config.schema
|
|
502
|
+
if not catalog or not schema:
|
|
503
|
+
raise CatalogSchemaError
|
|
504
|
+
table_identifier = f"{catalog}.{schema}.{table}$partitions"
|
|
505
|
+
query = 'SELECT * FROM "{}"'
|
|
506
|
+
return self.execute_query(query.format(table_identifier))
|
|
507
|
+
|
|
508
|
+
def show_files(self, table: str, catalog: str, schema: str) -> str:
|
|
509
|
+
"""Show Iceberg table data files in current snapshot.
|
|
510
|
+
|
|
511
|
+
The files table contains:
|
|
512
|
+
- content: INTEGER - Type of content (0=DATA, 1=POSITION_DELETES, 2=EQUALITY_DELETES)
|
|
513
|
+
- file_path: VARCHAR - Data file location
|
|
514
|
+
- file_format: VARCHAR - Format of the data file
|
|
515
|
+
- record_count: BIGINT - Number of records in file
|
|
516
|
+
- file_size_in_bytes: BIGINT - File size
|
|
517
|
+
- column_sizes: map(INTEGER, BIGINT) - Column ID to size mapping
|
|
518
|
+
- value_counts: map(INTEGER, BIGINT) - Column ID to value count mapping
|
|
519
|
+
- null_value_counts: map(INTEGER, BIGINT) - Column ID to null count mapping
|
|
520
|
+
- nan_value_counts: map(INTEGER, BIGINT) - Column ID to NaN count mapping
|
|
521
|
+
- lower_bounds: map(INTEGER, VARCHAR) - Column ID to lower bound mapping
|
|
522
|
+
- upper_bounds: map(INTEGER, VARCHAR) - Column ID to upper bound mapping
|
|
523
|
+
- key_metadata: VARBINARY - Encryption key metadata
|
|
524
|
+
- split_offsets: array(BIGINT) - Recommended split locations
|
|
525
|
+
- equality_ids: array(INTEGER) - Field IDs for equality deletes
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
table: The name of the table
|
|
529
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
530
|
+
schema: Optional schema name (defaults to configured schema)
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
str: JSON-formatted string containing table files info
|
|
534
|
+
"""
|
|
535
|
+
catalog = catalog or self.config.catalog
|
|
536
|
+
schema = schema or self.config.schema
|
|
537
|
+
if not catalog or not schema:
|
|
538
|
+
raise CatalogSchemaError
|
|
539
|
+
table_identifier = f"{catalog}.{schema}.{table}$files"
|
|
540
|
+
query = 'SELECT * FROM "{}"'
|
|
541
|
+
return self.execute_query(query.format(table_identifier))
|
|
542
|
+
|
|
543
|
+
def show_entries(self, table: str, catalog: str, schema: str, all_snapshots: bool = False) -> str:
|
|
544
|
+
"""Show Iceberg table manifest entries for current or all snapshots.
|
|
545
|
+
|
|
546
|
+
The entries table contains:
|
|
547
|
+
- status: INTEGER - Status of entry (0=EXISTING, 1=ADDED, 2=DELETED)
|
|
548
|
+
- snapshot_id: BIGINT - ID of the snapshot
|
|
549
|
+
- sequence_number: BIGINT - Data sequence number
|
|
550
|
+
- file_sequence_number: BIGINT - File sequence number
|
|
551
|
+
- data_file: ROW(...) - File metadata including path, format, size etc
|
|
552
|
+
- readable_metrics: JSON - Human-readable file metrics
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
table: The name of the table
|
|
556
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
557
|
+
schema: Optional schema name (defaults to configured schema)
|
|
558
|
+
all_snapshots: If True, show entries from all snapshots
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
str: JSON-formatted string containing manifest entries
|
|
562
|
+
"""
|
|
563
|
+
catalog = catalog or self.config.catalog
|
|
564
|
+
schema = schema or self.config.schema
|
|
565
|
+
if not catalog or not schema:
|
|
566
|
+
raise CatalogSchemaError
|
|
567
|
+
table_name = f"{catalog}.{schema}.{table}${'all_' if all_snapshots else ''}entries"
|
|
568
|
+
query = 'SELECT * FROM "{}"'
|
|
569
|
+
return self.execute_query(query.format(table_name))
|
|
570
|
+
|
|
571
|
+
def show_refs(self, table: str, catalog: str, schema: str) -> str:
|
|
572
|
+
"""Show Iceberg table references (branches and tags).
|
|
573
|
+
|
|
574
|
+
The refs table contains:
|
|
575
|
+
- name: VARCHAR - Name of the reference
|
|
576
|
+
- type: VARCHAR - Type of reference (BRANCH or TAG)
|
|
577
|
+
- snapshot_id: BIGINT - ID of referenced snapshot
|
|
578
|
+
- max_reference_age_in_ms: BIGINT - Max age before reference expiry
|
|
579
|
+
- min_snapshots_to_keep: INTEGER - Min snapshots to keep (branches only)
|
|
580
|
+
- max_snapshot_age_in_ms: BIGINT - Max snapshot age in branch
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
table: The name of the table
|
|
584
|
+
catalog: Optional catalog name (defaults to configured catalog)
|
|
585
|
+
schema: Optional schema name (defaults to configured schema)
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
str: JSON-formatted string containing table references
|
|
589
|
+
"""
|
|
590
|
+
catalog = catalog or self.config.catalog
|
|
591
|
+
schema = schema or self.config.schema
|
|
592
|
+
if not catalog or not schema:
|
|
593
|
+
raise CatalogSchemaError
|
|
594
|
+
table_identifier = f"{catalog}.{schema}.{table}$refs"
|
|
595
|
+
query = 'SELECT * FROM "{}"'
|
|
596
|
+
return self.execute_query(query.format(table_identifier))
|