maxc-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxc_cli/__init__.py +5 -0
- maxc_cli/__main__.py +6 -0
- maxc_cli/app.py +3406 -0
- maxc_cli/audit.py +18 -0
- maxc_cli/auth_providers.py +471 -0
- maxc_cli/backend/__init__.py +8 -0
- maxc_cli/backend/auth.py +144 -0
- maxc_cli/backend/data.py +87 -0
- maxc_cli/backend/job.py +304 -0
- maxc_cli/backend/meta.py +312 -0
- maxc_cli/backend/odps.py +130 -0
- maxc_cli/backend/query.py +148 -0
- maxc_cli/cache.py +662 -0
- maxc_cli/cli.py +1274 -0
- maxc_cli/config.py +406 -0
- maxc_cli/exceptions.py +99 -0
- maxc_cli/helpers.py +964 -0
- maxc_cli/models.py +533 -0
- maxc_cli/output.py +75 -0
- maxc_cli/store.py +123 -0
- maxc_cli/utils.py +136 -0
- maxc_cli-0.1.0.dist-info/METADATA +220 -0
- maxc_cli-0.1.0.dist-info/RECORD +26 -0
- maxc_cli-0.1.0.dist-info/WHEEL +5 -0
- maxc_cli-0.1.0.dist-info/entry_points.txt +2 -0
- maxc_cli-0.1.0.dist-info/top_level.txt +1 -0
maxc_cli/backend/meta.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""Meta-related mixin for OdpsBackend."""
|
|
2
|
+
|
|
3
|
+
from itertools import islice
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from ..config import TableColumn, TableDefinition
|
|
7
|
+
from ..helpers import (
|
|
8
|
+
_dt_to_iso,
|
|
9
|
+
build_freshness_info,
|
|
10
|
+
build_latest_partition_info,
|
|
11
|
+
partition_spec_text,
|
|
12
|
+
record_to_dict,
|
|
13
|
+
translate_odps_error,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MetaMixin:
|
|
18
|
+
"""Mixin providing metadata methods."""
|
|
19
|
+
|
|
20
|
+
def list_tables(self) -> 'list[TableDefinition]':
|
|
21
|
+
"""List tables in the current project."""
|
|
22
|
+
tables: 'list[TableDefinition]' = []
|
|
23
|
+
try:
|
|
24
|
+
for table in self.client.list_tables(project=self.project):
|
|
25
|
+
tables.append(self._table_stub(table))
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
raise translate_odps_error(exc) from exc
|
|
28
|
+
return sorted(tables, key=lambda item: item.name)
|
|
29
|
+
|
|
30
|
+
def describe_table(self, table_name: 'str') -> 'TableDefinition':
|
|
31
|
+
"""Describe a table with partitions and sample rows."""
|
|
32
|
+
table = self._get_table(table_name)
|
|
33
|
+
partitions = self._list_partitions(table, limit=20)
|
|
34
|
+
sample_rows = self._table_head(table, limit=2)
|
|
35
|
+
definition = self._table_definition_from_table(table)
|
|
36
|
+
definition.partitions = partitions
|
|
37
|
+
definition.sample_rows = sample_rows
|
|
38
|
+
return definition
|
|
39
|
+
|
|
40
|
+
def search_tables(self, keyword: 'str') -> 'list[dict[str, Any]]':
|
|
41
|
+
"""Search tables by keyword."""
|
|
42
|
+
tokens = [item.lower() for item in keyword.split() if item.strip()] or [keyword.lower()]
|
|
43
|
+
matches: 'list[dict[str, Any]]' = []
|
|
44
|
+
for table in self.list_tables():
|
|
45
|
+
score = 0
|
|
46
|
+
searchable = f"{table.name} {table.description}".lower()
|
|
47
|
+
matched_columns: 'list[str]' = []
|
|
48
|
+
for token in tokens:
|
|
49
|
+
if token in searchable:
|
|
50
|
+
score += 5
|
|
51
|
+
if score == 0:
|
|
52
|
+
for column in table.columns:
|
|
53
|
+
text = f"{column.name} {column.comment}".lower()
|
|
54
|
+
if any(token in text for token in tokens):
|
|
55
|
+
score += 2
|
|
56
|
+
matched_columns.append(column.name)
|
|
57
|
+
if score:
|
|
58
|
+
matches.append(
|
|
59
|
+
{
|
|
60
|
+
"table_name": table.name,
|
|
61
|
+
"description": table.description,
|
|
62
|
+
"score": score,
|
|
63
|
+
"matched_columns": matched_columns,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
return sorted(matches, key=lambda item: (-item["score"], item["table_name"]))
|
|
67
|
+
|
|
68
|
+
def search_columns(self, keyword: 'str') -> 'list[dict[str, Any]]':
|
|
69
|
+
"""Search columns by keyword."""
|
|
70
|
+
tokens = [item.lower() for item in keyword.split() if item.strip()] or [keyword.lower()]
|
|
71
|
+
matches: 'list[dict[str, Any]]' = []
|
|
72
|
+
for table in self.list_tables():
|
|
73
|
+
for column in table.columns:
|
|
74
|
+
score = 0
|
|
75
|
+
text = f"{column.name} {column.comment}".lower()
|
|
76
|
+
searchable = f"{table.name} {text}".lower()
|
|
77
|
+
for token in tokens:
|
|
78
|
+
if token in column.name.lower():
|
|
79
|
+
score += 8
|
|
80
|
+
if token in text:
|
|
81
|
+
score += 4
|
|
82
|
+
if token in searchable:
|
|
83
|
+
score += 2
|
|
84
|
+
if score:
|
|
85
|
+
matches.append(
|
|
86
|
+
{
|
|
87
|
+
"table_name": table.name,
|
|
88
|
+
"column_name": column.name,
|
|
89
|
+
"type": column.type,
|
|
90
|
+
"comment": column.comment,
|
|
91
|
+
"score": score,
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return sorted(matches, key=lambda item: (-item["score"], item["table_name"], item["column_name"]))
|
|
95
|
+
|
|
96
|
+
def latest_partition_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
|
|
97
|
+
"""Get latest partition info for a table."""
|
|
98
|
+
table = self._get_table(table_name)
|
|
99
|
+
definition = self._table_definition_from_table(table)
|
|
100
|
+
return self._latest_partition_info_from_table(table, definition)
|
|
101
|
+
|
|
102
|
+
def freshness_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
|
|
103
|
+
"""Get data freshness info for a table."""
|
|
104
|
+
table = self._get_table(table_name)
|
|
105
|
+
definition = self._table_definition_from_table(table)
|
|
106
|
+
latest_payload, warnings = self._latest_partition_info_from_table(table, definition)
|
|
107
|
+
return build_freshness_info(definition, latest_payload, warnings=warnings)
|
|
108
|
+
|
|
109
|
+
def lineage_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
|
|
110
|
+
"""Get table lineage info (placeholder - API not yet integrated)."""
|
|
111
|
+
table = self._get_table(table_name)
|
|
112
|
+
definition = self._table_definition_from_table(table)
|
|
113
|
+
return (
|
|
114
|
+
{
|
|
115
|
+
"table_name": definition.name,
|
|
116
|
+
"supported": False,
|
|
117
|
+
"lineage_source": "unavailable",
|
|
118
|
+
"coverage": "unsupported",
|
|
119
|
+
"upstream_tables": [],
|
|
120
|
+
"downstream_tables": [],
|
|
121
|
+
"limitation": "The current version does not integrate with the MaxCompute lineage API.",
|
|
122
|
+
},
|
|
123
|
+
["The current version does not integrate with the MaxCompute lineage API, so lineage returns an explicit unsupported placeholder result."],
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def list_projects(self) -> 'list[dict[str, Any]]':
|
|
127
|
+
"""List all projects owned by the current user.
|
|
128
|
+
|
|
129
|
+
Note: This only returns basic info (name) to avoid triggering project.reload()
|
|
130
|
+
which requires Read permission on each project. Use get_project_info() for details.
|
|
131
|
+
"""
|
|
132
|
+
projects: 'list[dict[str, Any]]' = []
|
|
133
|
+
try:
|
|
134
|
+
# 获取当前用户的 display name 作为 owner 过滤条件
|
|
135
|
+
owner = self._get_owner_display_name()
|
|
136
|
+
for project in self.client.list_projects(owner=owner):
|
|
137
|
+
# 只返回 list_projects 直接提供的基本信息
|
|
138
|
+
# 不要访问 comment, owner, properties 等属性,会触发 reload 需要 Read 权限
|
|
139
|
+
projects.append({
|
|
140
|
+
"name": project.name,
|
|
141
|
+
})
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
raise translate_odps_error(exc, "list_projects") from exc
|
|
144
|
+
return sorted(projects, key=lambda item: item["name"])
|
|
145
|
+
|
|
146
|
+
def list_schemas(self, *, project: 'str | None' = None) -> 'list[dict[str, Any]]':
|
|
147
|
+
"""List all schemas in a project."""
|
|
148
|
+
target_project = project or self.project
|
|
149
|
+
schemas: 'list[dict[str, Any]]' = []
|
|
150
|
+
try:
|
|
151
|
+
for schema in self.client.list_schemas(project=target_project):
|
|
152
|
+
schemas.append({
|
|
153
|
+
"name": schema.name,
|
|
154
|
+
})
|
|
155
|
+
except Exception as exc:
|
|
156
|
+
raise translate_odps_error(exc, "list_schemas") from exc
|
|
157
|
+
return sorted(schemas, key=lambda item: item["name"])
|
|
158
|
+
|
|
159
|
+
def get_project_info(self, project_name: 'str | None' = None) -> 'dict[str, Any]':
|
|
160
|
+
"""Get detailed information about a project."""
|
|
161
|
+
target = project_name or self.project
|
|
162
|
+
try:
|
|
163
|
+
project = self.client.get_project(target)
|
|
164
|
+
# get_project 返回的对象需要 reload 才能获取完整属性
|
|
165
|
+
# 访问属性会自动触发 lazy loading
|
|
166
|
+
props = getattr(project, "properties", {}) or {}
|
|
167
|
+
extended_props = getattr(project, "extended_properties", {}) or {}
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
"name": project.name,
|
|
171
|
+
"project_type": getattr(project, "type", None),
|
|
172
|
+
"comment": getattr(project, "comment", None),
|
|
173
|
+
"owner": getattr(project, "owner", None),
|
|
174
|
+
"state": getattr(project, "state", None) or getattr(project, "status", None),
|
|
175
|
+
"creation_time": _dt_to_iso(getattr(project, "creation_time", None)),
|
|
176
|
+
"last_modified_time": _dt_to_iso(getattr(project, "last_modified_time", None)),
|
|
177
|
+
"region": getattr(project, "region_id", None),
|
|
178
|
+
"allow_3_tier": props.get("allow3tier") or extended_props.get("allow3tier"),
|
|
179
|
+
"is_external_catalog_bound": props.get("isExternalCatalogBound") or extended_props.get("isExternalCatalogBound"),
|
|
180
|
+
}
|
|
181
|
+
except Exception as exc:
|
|
182
|
+
raise translate_odps_error(exc, "get_project_info") from exc
|
|
183
|
+
|
|
184
|
+
# Private methods for metadata handling
|
|
185
|
+
|
|
186
|
+
def _get_table(self, table_name: 'str', *, project: 'str | None' = None):
|
|
187
|
+
"""Get ODPS table by name."""
|
|
188
|
+
try:
|
|
189
|
+
return self.client.get_table(table_name, project=project or self.project)
|
|
190
|
+
except Exception as exc:
|
|
191
|
+
raise translate_odps_error(exc) from exc
|
|
192
|
+
|
|
193
|
+
def _table_stub(self, table) -> 'TableDefinition':
|
|
194
|
+
"""Create a minimal TableDefinition from table object (name only, no schema access)."""
|
|
195
|
+
return TableDefinition(
|
|
196
|
+
name=table.name,
|
|
197
|
+
description="",
|
|
198
|
+
columns=[],
|
|
199
|
+
sample_rows=[],
|
|
200
|
+
partitions=[],
|
|
201
|
+
upstream_tables=[],
|
|
202
|
+
downstream_tables=[],
|
|
203
|
+
partition_columns=[],
|
|
204
|
+
owner=None,
|
|
205
|
+
created_at=None,
|
|
206
|
+
updated_at=None,
|
|
207
|
+
table_type="TABLE",
|
|
208
|
+
size_bytes=None,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _table_definition_from_table(self, table) -> 'TableDefinition':
|
|
212
|
+
"""Create a full TableDefinition from ODPS table object."""
|
|
213
|
+
try:
|
|
214
|
+
columns = [
|
|
215
|
+
TableColumn(
|
|
216
|
+
name=column.name,
|
|
217
|
+
type=str(column.type),
|
|
218
|
+
comment=getattr(column, "comment", "") or "",
|
|
219
|
+
)
|
|
220
|
+
for column in getattr(table.table_schema, "columns", [])
|
|
221
|
+
]
|
|
222
|
+
partition_columns = [
|
|
223
|
+
TableColumn(
|
|
224
|
+
name=column.name,
|
|
225
|
+
type=str(column.type),
|
|
226
|
+
comment=getattr(column, "comment", "") or "",
|
|
227
|
+
)
|
|
228
|
+
for column in getattr(table.table_schema, "partitions", [])
|
|
229
|
+
]
|
|
230
|
+
return TableDefinition(
|
|
231
|
+
name=table.name,
|
|
232
|
+
description=getattr(table, "comment", "") or "",
|
|
233
|
+
columns=columns,
|
|
234
|
+
sample_rows=[],
|
|
235
|
+
partitions=[],
|
|
236
|
+
upstream_tables=[],
|
|
237
|
+
downstream_tables=[],
|
|
238
|
+
partition_columns=partition_columns,
|
|
239
|
+
owner=getattr(table, "owner", None),
|
|
240
|
+
created_at=_dt_to_iso(getattr(table, "creation_time", None)),
|
|
241
|
+
updated_at=_dt_to_iso(getattr(table, "last_data_modified_time", None)),
|
|
242
|
+
table_type="VIRTUAL_VIEW" if getattr(table, "is_virtual_view", False) else "TABLE",
|
|
243
|
+
size_bytes=(
|
|
244
|
+
int(getattr(table, "size", 0))
|
|
245
|
+
if getattr(table, "size", None) is not None
|
|
246
|
+
else None
|
|
247
|
+
),
|
|
248
|
+
extra_metadata={"lifecycle": getattr(table, "lifecycle", None)},
|
|
249
|
+
)
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
raise translate_odps_error(exc) from exc
|
|
252
|
+
|
|
253
|
+
def _table_head(self, table, *, limit: 'int') -> 'list[dict[str, Any]]':
|
|
254
|
+
"""Get first N rows from a table."""
|
|
255
|
+
try:
|
|
256
|
+
reader = table.head(limit)
|
|
257
|
+
rows = list(islice(reader, limit))
|
|
258
|
+
except Exception:
|
|
259
|
+
return []
|
|
260
|
+
columns = [column.name for column in table.table_schema.columns]
|
|
261
|
+
return [record_to_dict(columns, record.values) for record in rows]
|
|
262
|
+
|
|
263
|
+
def _list_partitions(self, table, *, limit: 'int') -> 'list[str]':
|
|
264
|
+
"""List partition specs for a table."""
|
|
265
|
+
try:
|
|
266
|
+
from odps.errors import InvalidParameter as OdpsInvalidParameter
|
|
267
|
+
partitions = list(islice(table.iterate_partitions(), limit))
|
|
268
|
+
except Exception:
|
|
269
|
+
return []
|
|
270
|
+
return [str(partition.partition_spec) for partition in partitions]
|
|
271
|
+
|
|
272
|
+
def _latest_partition_info_from_table(
|
|
273
|
+
self,
|
|
274
|
+
table,
|
|
275
|
+
definition: 'TableDefinition',
|
|
276
|
+
) -> 'tuple[dict[str, Any], list[str]]':
|
|
277
|
+
"""Get latest partition info from ODPS table object."""
|
|
278
|
+
latest_partition = self._max_partition_spec(table)
|
|
279
|
+
if latest_partition:
|
|
280
|
+
return build_latest_partition_info(
|
|
281
|
+
definition,
|
|
282
|
+
source="odps_get_max_partition",
|
|
283
|
+
latest_partition_override=latest_partition,
|
|
284
|
+
visible_partition_count=None,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
partitions = self._list_partitions(table, limit=200)
|
|
288
|
+
payload, warnings = build_latest_partition_info(
|
|
289
|
+
definition,
|
|
290
|
+
source="odps_iterate_partitions",
|
|
291
|
+
partitions=partitions,
|
|
292
|
+
visible_partition_count=len(partitions),
|
|
293
|
+
)
|
|
294
|
+
if definition.partition_columns and len(partitions) == 200:
|
|
295
|
+
warnings.append("Only the first 200 visible partitions were inspected. For very large tables, verify the result in the MaxCompute console as well.")
|
|
296
|
+
return payload, warnings
|
|
297
|
+
|
|
298
|
+
def _max_partition_spec(self, table) -> 'str | None':
|
|
299
|
+
"""Get max partition spec from table using get_max_partition if available."""
|
|
300
|
+
getter = getattr(table, "get_max_partition", None)
|
|
301
|
+
if callable(getter):
|
|
302
|
+
for kwargs in ({"skip_empty": True}, {}):
|
|
303
|
+
try:
|
|
304
|
+
partition = getter(**kwargs)
|
|
305
|
+
except TypeError:
|
|
306
|
+
continue
|
|
307
|
+
except Exception:
|
|
308
|
+
partition = None
|
|
309
|
+
text = partition_spec_text(partition)
|
|
310
|
+
if text:
|
|
311
|
+
return text
|
|
312
|
+
return None
|
maxc_cli/backend/odps.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Main OdpsBackend class combining all mixins."""
|
|
2
|
+
|
|
3
|
+
from itertools import islice
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from ..auth_providers import resolve_auth_connection
|
|
7
|
+
from ..config import MaxCConfig
|
|
8
|
+
from ..exceptions import PermissionDeniedError
|
|
9
|
+
from ..helpers import (
|
|
10
|
+
_dt_to_iso,
|
|
11
|
+
record_to_dict,
|
|
12
|
+
translate_odps_error,
|
|
13
|
+
)
|
|
14
|
+
from ..models import QueryResult
|
|
15
|
+
from ..utils import detect_operation, extract_table_names
|
|
16
|
+
from .auth import AuthMixin
|
|
17
|
+
from .data import DataMixin
|
|
18
|
+
from .job import JobMixin
|
|
19
|
+
from .meta import MetaMixin
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OdpsBackend(
|
|
23
|
+
JobMixin, # JobMixin extends QueryMixin
|
|
24
|
+
MetaMixin,
|
|
25
|
+
DataMixin,
|
|
26
|
+
AuthMixin,
|
|
27
|
+
):
|
|
28
|
+
"""MaxCompute backend for production use."""
|
|
29
|
+
|
|
30
|
+
supports_remote_jobs = True
|
|
31
|
+
supports_cost_check = False
|
|
32
|
+
|
|
33
|
+
def __init__(self, config: 'MaxCConfig') -> 'None':
|
|
34
|
+
"""Initialize OdpsBackend with configuration."""
|
|
35
|
+
self.config = config
|
|
36
|
+
resolved = resolve_auth_connection(config)
|
|
37
|
+
self.resolved_auth = resolved
|
|
38
|
+
self.settings = resolved.settings
|
|
39
|
+
self.setting_sources = resolved.setting_sources
|
|
40
|
+
# Priority: config.default_project (includes session_override) > resolved.project
|
|
41
|
+
self.project = config.default_project or resolved.project
|
|
42
|
+
# Update resolved settings with the actual project being used
|
|
43
|
+
self.settings = dict(resolved.settings)
|
|
44
|
+
self.settings["project"] = self.project
|
|
45
|
+
self.client = resolved.create_client()
|
|
46
|
+
# 延迟获取 owner display name,避免不必要的 API 调用
|
|
47
|
+
self._owner_display_name: 'str | None' = None
|
|
48
|
+
|
|
49
|
+
def _validate_select(self, sql: 'str') -> 'None':
|
|
50
|
+
"""Validate that SQL is a SELECT statement and allowed by config."""
|
|
51
|
+
operation = detect_operation(sql)
|
|
52
|
+
if operation not in self.config.allowed_operations:
|
|
53
|
+
raise PermissionDeniedError(
|
|
54
|
+
f"Configured allowed operations are limited to {', '.join(self.config.allowed_operations)}; received {operation}.",
|
|
55
|
+
suggestion="Update `allowed_operations` if you intentionally want to permit this operation.",
|
|
56
|
+
)
|
|
57
|
+
if operation != "SELECT":
|
|
58
|
+
raise PermissionDeniedError(f"This CLI currently supports only SELECT statements; received {operation}.")
|
|
59
|
+
|
|
60
|
+
def _instance_to_query_result(
|
|
61
|
+
self,
|
|
62
|
+
instance,
|
|
63
|
+
*,
|
|
64
|
+
project: 'str',
|
|
65
|
+
max_rows: 'int',
|
|
66
|
+
sql: 'str',
|
|
67
|
+
elapsed_ms: 'int',
|
|
68
|
+
offset: 'int' = 0,
|
|
69
|
+
) -> 'QueryResult':
|
|
70
|
+
"""Convert ODPS instance to QueryResult."""
|
|
71
|
+
try:
|
|
72
|
+
with instance.open_reader() as reader:
|
|
73
|
+
schema = [
|
|
74
|
+
{
|
|
75
|
+
"name": column.name,
|
|
76
|
+
"type": str(column.type),
|
|
77
|
+
"comment": "",
|
|
78
|
+
}
|
|
79
|
+
for column in reader.schema.columns
|
|
80
|
+
]
|
|
81
|
+
rows = [
|
|
82
|
+
record_to_dict(
|
|
83
|
+
[column["name"] for column in schema],
|
|
84
|
+
record.values,
|
|
85
|
+
)
|
|
86
|
+
for record in islice(reader, offset, offset + max_rows)
|
|
87
|
+
]
|
|
88
|
+
total_rows = int(getattr(reader, "count", len(rows)) or len(rows))
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
raise translate_odps_error(exc) from exc
|
|
91
|
+
|
|
92
|
+
bytes_scanned, extra_metadata = self._task_cost(instance)
|
|
93
|
+
returned_rows = len(rows)
|
|
94
|
+
has_more = total_rows > (offset + returned_rows)
|
|
95
|
+
extra_metadata["current_offset"] = offset
|
|
96
|
+
|
|
97
|
+
return QueryResult(
|
|
98
|
+
rows=rows,
|
|
99
|
+
schema=schema,
|
|
100
|
+
total_rows=total_rows,
|
|
101
|
+
returned_rows=returned_rows,
|
|
102
|
+
has_more=has_more,
|
|
103
|
+
next_cursor=None, # cursor 由 app 层生成
|
|
104
|
+
elapsed_ms=elapsed_ms,
|
|
105
|
+
bytes_scanned=bytes_scanned,
|
|
106
|
+
project=project,
|
|
107
|
+
sql_executed=sql.rstrip(";"),
|
|
108
|
+
tables_used=extract_table_names(sql),
|
|
109
|
+
job_id=instance.id,
|
|
110
|
+
submitted_at=_dt_to_iso(getattr(instance, "start_time", None)),
|
|
111
|
+
completed_at=_dt_to_iso(getattr(instance, "end_time", None)),
|
|
112
|
+
extra_metadata=extra_metadata,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _task_cost(self, instance) -> 'tuple[int | None, dict[str, Any]]':
|
|
116
|
+
"""Get task cost from ODPS instance."""
|
|
117
|
+
try:
|
|
118
|
+
task_cost = instance.get_task_cost()
|
|
119
|
+
except Exception:
|
|
120
|
+
return None, {}
|
|
121
|
+
if task_cost is None:
|
|
122
|
+
return None, {}
|
|
123
|
+
return (
|
|
124
|
+
int(getattr(task_cost, "input_size", 0) or 0),
|
|
125
|
+
{
|
|
126
|
+
"task_cost_cpu": getattr(task_cost, "cpu_cost", None),
|
|
127
|
+
"task_cost_memory": getattr(task_cost, "memory_cost", None),
|
|
128
|
+
"estimated_input_size_bytes": getattr(task_cost, "input_size", None),
|
|
129
|
+
},
|
|
130
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Query-related mixin for OdpsBackend."""
|
|
2
|
+
|
|
3
|
+
from time import monotonic
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from ..helpers import (
|
|
7
|
+
build_query_outline,
|
|
8
|
+
translate_odps_error,
|
|
9
|
+
)
|
|
10
|
+
from ..models import QueryResult
|
|
11
|
+
from ..utils import extract_table_names, now_utc_iso
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class QueryMixin:
|
|
15
|
+
"""Mixin providing query execution methods."""
|
|
16
|
+
|
|
17
|
+
def execute_query(
|
|
18
|
+
self,
|
|
19
|
+
sql: 'str',
|
|
20
|
+
*,
|
|
21
|
+
project: 'str',
|
|
22
|
+
max_rows: 'int',
|
|
23
|
+
dry_run: 'bool',
|
|
24
|
+
offset: 'int' = 0,
|
|
25
|
+
timeout: 'int | None' = None,
|
|
26
|
+
) -> 'QueryResult':
|
|
27
|
+
"""Execute a SQL query and return results.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
sql: SQL query to execute
|
|
31
|
+
project: Project name
|
|
32
|
+
max_rows: Maximum rows to return
|
|
33
|
+
dry_run: If True, only estimate cost without executing
|
|
34
|
+
offset: Row offset for pagination
|
|
35
|
+
timeout: Timeout in seconds (default: 300s / 5 minutes)
|
|
36
|
+
"""
|
|
37
|
+
self._validate_select(sql)
|
|
38
|
+
|
|
39
|
+
started_at = now_utc_iso()
|
|
40
|
+
started_monotonic = monotonic()
|
|
41
|
+
|
|
42
|
+
if dry_run:
|
|
43
|
+
try:
|
|
44
|
+
sql_cost = self.client.execute_sql_cost(sql, project=project)
|
|
45
|
+
except Exception as exc:
|
|
46
|
+
raise translate_odps_error(exc) from exc
|
|
47
|
+
elapsed_ms = int((monotonic() - started_monotonic) * 1000)
|
|
48
|
+
return QueryResult(
|
|
49
|
+
rows=[],
|
|
50
|
+
schema=[],
|
|
51
|
+
total_rows=0,
|
|
52
|
+
returned_rows=0,
|
|
53
|
+
has_more=False,
|
|
54
|
+
next_cursor=None,
|
|
55
|
+
elapsed_ms=elapsed_ms,
|
|
56
|
+
bytes_scanned=int(sql_cost.input_size or 0),
|
|
57
|
+
project=project,
|
|
58
|
+
sql_executed=sql,
|
|
59
|
+
tables_used=extract_table_names(sql),
|
|
60
|
+
warnings=["MaxCompute dry-run returned SQLCost metadata and did not execute the query."],
|
|
61
|
+
submitted_at=started_at,
|
|
62
|
+
completed_at=now_utc_iso(),
|
|
63
|
+
extra_metadata={
|
|
64
|
+
"sql_complexity": sql_cost.complexity,
|
|
65
|
+
"sql_udf_num": sql_cost.udf_num,
|
|
66
|
+
"estimated_input_size_bytes": sql_cost.input_size,
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
instance = self.client.execute_sql(sql, project=project)
|
|
72
|
+
# Default timeout: 300 seconds (5 minutes) to prevent indefinite blocking
|
|
73
|
+
instance.wait_for_success(timeout=timeout or 300)
|
|
74
|
+
except Exception as exc:
|
|
75
|
+
raise translate_odps_error(exc) from exc
|
|
76
|
+
|
|
77
|
+
elapsed_ms = int((monotonic() - started_monotonic) * 1000)
|
|
78
|
+
result = self._instance_to_query_result(
|
|
79
|
+
instance,
|
|
80
|
+
project=project,
|
|
81
|
+
max_rows=max_rows,
|
|
82
|
+
sql=sql,
|
|
83
|
+
elapsed_ms=elapsed_ms,
|
|
84
|
+
offset=offset,
|
|
85
|
+
)
|
|
86
|
+
result.submitted_at = started_at
|
|
87
|
+
result.completed_at = now_utc_iso()
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
def estimate_query_cost(self, sql: 'str', *, project: 'str') -> 'dict[str, Any]':
|
|
91
|
+
"""Estimate the cost of a query."""
|
|
92
|
+
self._validate_select(sql)
|
|
93
|
+
started_monotonic = monotonic()
|
|
94
|
+
try:
|
|
95
|
+
sql_cost = self.client.execute_sql_cost(sql, project=project)
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
raise translate_odps_error(exc) from exc
|
|
98
|
+
return {
|
|
99
|
+
**build_query_outline(sql),
|
|
100
|
+
"project": project,
|
|
101
|
+
"cost_model": "maxcompute_native_sql_cost",
|
|
102
|
+
"estimated_input_size_bytes": int(sql_cost.input_size or 0),
|
|
103
|
+
"task_cost_cpu": None,
|
|
104
|
+
"task_cost_memory": None,
|
|
105
|
+
"sql_complexity": sql_cost.complexity,
|
|
106
|
+
"sql_udf_num": sql_cost.udf_num,
|
|
107
|
+
"total_row_estimate": None,
|
|
108
|
+
"elapsed_ms": int((monotonic() - started_monotonic) * 1000),
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
def explain_query(self, sql: 'str', *, project: 'str') -> 'dict[str, Any]':
|
|
112
|
+
"""Explain a query execution plan."""
|
|
113
|
+
estimate = self.estimate_query_cost(sql, project=project)
|
|
114
|
+
warnings = list(estimate.pop("warnings", []))
|
|
115
|
+
estimate["warnings"] = warnings
|
|
116
|
+
estimate["analysis_mode"] = "explain"
|
|
117
|
+
estimate["read_path"] = True
|
|
118
|
+
return estimate
|
|
119
|
+
|
|
120
|
+
def submit_query(
|
|
121
|
+
self,
|
|
122
|
+
sql: 'str',
|
|
123
|
+
*,
|
|
124
|
+
project: 'str',
|
|
125
|
+
idempotency_key: 'str | None' = None,
|
|
126
|
+
):
|
|
127
|
+
"""Submit a query for async execution."""
|
|
128
|
+
from ..models import JobInfo
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
instance = self.client.execute_sql(
|
|
132
|
+
sql,
|
|
133
|
+
project=project,
|
|
134
|
+
unique_identifier_id=idempotency_key,
|
|
135
|
+
)
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
raise translate_odps_error(exc) from exc
|
|
138
|
+
return JobInfo(
|
|
139
|
+
job_id=instance.id,
|
|
140
|
+
status="pending",
|
|
141
|
+
project=project,
|
|
142
|
+
progress=0,
|
|
143
|
+
sql=sql,
|
|
144
|
+
submitted_at=now_utc_iso(),
|
|
145
|
+
updated_at=now_utc_iso(),
|
|
146
|
+
logview=self._safe_logview(instance),
|
|
147
|
+
warnings=["The MaxCompute instance has been submitted; use job.status or job.wait to track it."],
|
|
148
|
+
)
|