maxc-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ """Meta-related mixin for OdpsBackend."""
2
+
3
+ from itertools import islice
4
+ from typing import Any
5
+
6
+ from ..config import TableColumn, TableDefinition
7
+ from ..helpers import (
8
+ _dt_to_iso,
9
+ build_freshness_info,
10
+ build_latest_partition_info,
11
+ partition_spec_text,
12
+ record_to_dict,
13
+ translate_odps_error,
14
+ )
15
+
16
+
17
+ class MetaMixin:
18
+ """Mixin providing metadata methods."""
19
+
20
+ def list_tables(self) -> 'list[TableDefinition]':
21
+ """List tables in the current project."""
22
+ tables: 'list[TableDefinition]' = []
23
+ try:
24
+ for table in self.client.list_tables(project=self.project):
25
+ tables.append(self._table_stub(table))
26
+ except Exception as exc:
27
+ raise translate_odps_error(exc) from exc
28
+ return sorted(tables, key=lambda item: item.name)
29
+
30
+ def describe_table(self, table_name: 'str') -> 'TableDefinition':
31
+ """Describe a table with partitions and sample rows."""
32
+ table = self._get_table(table_name)
33
+ partitions = self._list_partitions(table, limit=20)
34
+ sample_rows = self._table_head(table, limit=2)
35
+ definition = self._table_definition_from_table(table)
36
+ definition.partitions = partitions
37
+ definition.sample_rows = sample_rows
38
+ return definition
39
+
40
+ def search_tables(self, keyword: 'str') -> 'list[dict[str, Any]]':
41
+ """Search tables by keyword."""
42
+ tokens = [item.lower() for item in keyword.split() if item.strip()] or [keyword.lower()]
43
+ matches: 'list[dict[str, Any]]' = []
44
+ for table in self.list_tables():
45
+ score = 0
46
+ searchable = f"{table.name} {table.description}".lower()
47
+ matched_columns: 'list[str]' = []
48
+ for token in tokens:
49
+ if token in searchable:
50
+ score += 5
51
+ if score == 0:
52
+ for column in table.columns:
53
+ text = f"{column.name} {column.comment}".lower()
54
+ if any(token in text for token in tokens):
55
+ score += 2
56
+ matched_columns.append(column.name)
57
+ if score:
58
+ matches.append(
59
+ {
60
+ "table_name": table.name,
61
+ "description": table.description,
62
+ "score": score,
63
+ "matched_columns": matched_columns,
64
+ }
65
+ )
66
+ return sorted(matches, key=lambda item: (-item["score"], item["table_name"]))
67
+
68
+ def search_columns(self, keyword: 'str') -> 'list[dict[str, Any]]':
69
+ """Search columns by keyword."""
70
+ tokens = [item.lower() for item in keyword.split() if item.strip()] or [keyword.lower()]
71
+ matches: 'list[dict[str, Any]]' = []
72
+ for table in self.list_tables():
73
+ for column in table.columns:
74
+ score = 0
75
+ text = f"{column.name} {column.comment}".lower()
76
+ searchable = f"{table.name} {text}".lower()
77
+ for token in tokens:
78
+ if token in column.name.lower():
79
+ score += 8
80
+ if token in text:
81
+ score += 4
82
+ if token in searchable:
83
+ score += 2
84
+ if score:
85
+ matches.append(
86
+ {
87
+ "table_name": table.name,
88
+ "column_name": column.name,
89
+ "type": column.type,
90
+ "comment": column.comment,
91
+ "score": score,
92
+ }
93
+ )
94
+ return sorted(matches, key=lambda item: (-item["score"], item["table_name"], item["column_name"]))
95
+
96
+ def latest_partition_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
97
+ """Get latest partition info for a table."""
98
+ table = self._get_table(table_name)
99
+ definition = self._table_definition_from_table(table)
100
+ return self._latest_partition_info_from_table(table, definition)
101
+
102
+ def freshness_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
103
+ """Get data freshness info for a table."""
104
+ table = self._get_table(table_name)
105
+ definition = self._table_definition_from_table(table)
106
+ latest_payload, warnings = self._latest_partition_info_from_table(table, definition)
107
+ return build_freshness_info(definition, latest_payload, warnings=warnings)
108
+
109
+ def lineage_info(self, table_name: 'str') -> 'tuple[dict[str, Any], list[str]]':
110
+ """Get table lineage info (placeholder - API not yet integrated)."""
111
+ table = self._get_table(table_name)
112
+ definition = self._table_definition_from_table(table)
113
+ return (
114
+ {
115
+ "table_name": definition.name,
116
+ "supported": False,
117
+ "lineage_source": "unavailable",
118
+ "coverage": "unsupported",
119
+ "upstream_tables": [],
120
+ "downstream_tables": [],
121
+ "limitation": "The current version does not integrate with the MaxCompute lineage API.",
122
+ },
123
+ ["The current version does not integrate with the MaxCompute lineage API, so lineage returns an explicit unsupported placeholder result."],
124
+ )
125
+
126
+ def list_projects(self) -> 'list[dict[str, Any]]':
127
+ """List all projects owned by the current user.
128
+
129
+ Note: This only returns basic info (name) to avoid triggering project.reload()
130
+ which requires Read permission on each project. Use get_project_info() for details.
131
+ """
132
+ projects: 'list[dict[str, Any]]' = []
133
+ try:
134
+ # 获取当前用户的 display name 作为 owner 过滤条件
135
+ owner = self._get_owner_display_name()
136
+ for project in self.client.list_projects(owner=owner):
137
+ # 只返回 list_projects 直接提供的基本信息
138
+ # 不要访问 comment, owner, properties 等属性,会触发 reload 需要 Read 权限
139
+ projects.append({
140
+ "name": project.name,
141
+ })
142
+ except Exception as exc:
143
+ raise translate_odps_error(exc, "list_projects") from exc
144
+ return sorted(projects, key=lambda item: item["name"])
145
+
146
+ def list_schemas(self, *, project: 'str | None' = None) -> 'list[dict[str, Any]]':
147
+ """List all schemas in a project."""
148
+ target_project = project or self.project
149
+ schemas: 'list[dict[str, Any]]' = []
150
+ try:
151
+ for schema in self.client.list_schemas(project=target_project):
152
+ schemas.append({
153
+ "name": schema.name,
154
+ })
155
+ except Exception as exc:
156
+ raise translate_odps_error(exc, "list_schemas") from exc
157
+ return sorted(schemas, key=lambda item: item["name"])
158
+
159
+ def get_project_info(self, project_name: 'str | None' = None) -> 'dict[str, Any]':
160
+ """Get detailed information about a project."""
161
+ target = project_name or self.project
162
+ try:
163
+ project = self.client.get_project(target)
164
+ # get_project 返回的对象需要 reload 才能获取完整属性
165
+ # 访问属性会自动触发 lazy loading
166
+ props = getattr(project, "properties", {}) or {}
167
+ extended_props = getattr(project, "extended_properties", {}) or {}
168
+
169
+ return {
170
+ "name": project.name,
171
+ "project_type": getattr(project, "type", None),
172
+ "comment": getattr(project, "comment", None),
173
+ "owner": getattr(project, "owner", None),
174
+ "state": getattr(project, "state", None) or getattr(project, "status", None),
175
+ "creation_time": _dt_to_iso(getattr(project, "creation_time", None)),
176
+ "last_modified_time": _dt_to_iso(getattr(project, "last_modified_time", None)),
177
+ "region": getattr(project, "region_id", None),
178
+ "allow_3_tier": props.get("allow3tier") or extended_props.get("allow3tier"),
179
+ "is_external_catalog_bound": props.get("isExternalCatalogBound") or extended_props.get("isExternalCatalogBound"),
180
+ }
181
+ except Exception as exc:
182
+ raise translate_odps_error(exc, "get_project_info") from exc
183
+
184
+ # Private methods for metadata handling
185
+
186
+ def _get_table(self, table_name: 'str', *, project: 'str | None' = None):
187
+ """Get ODPS table by name."""
188
+ try:
189
+ return self.client.get_table(table_name, project=project or self.project)
190
+ except Exception as exc:
191
+ raise translate_odps_error(exc) from exc
192
+
193
+ def _table_stub(self, table) -> 'TableDefinition':
194
+ """Create a minimal TableDefinition from table object (name only, no schema access)."""
195
+ return TableDefinition(
196
+ name=table.name,
197
+ description="",
198
+ columns=[],
199
+ sample_rows=[],
200
+ partitions=[],
201
+ upstream_tables=[],
202
+ downstream_tables=[],
203
+ partition_columns=[],
204
+ owner=None,
205
+ created_at=None,
206
+ updated_at=None,
207
+ table_type="TABLE",
208
+ size_bytes=None,
209
+ )
210
+
211
+ def _table_definition_from_table(self, table) -> 'TableDefinition':
212
+ """Create a full TableDefinition from ODPS table object."""
213
+ try:
214
+ columns = [
215
+ TableColumn(
216
+ name=column.name,
217
+ type=str(column.type),
218
+ comment=getattr(column, "comment", "") or "",
219
+ )
220
+ for column in getattr(table.table_schema, "columns", [])
221
+ ]
222
+ partition_columns = [
223
+ TableColumn(
224
+ name=column.name,
225
+ type=str(column.type),
226
+ comment=getattr(column, "comment", "") or "",
227
+ )
228
+ for column in getattr(table.table_schema, "partitions", [])
229
+ ]
230
+ return TableDefinition(
231
+ name=table.name,
232
+ description=getattr(table, "comment", "") or "",
233
+ columns=columns,
234
+ sample_rows=[],
235
+ partitions=[],
236
+ upstream_tables=[],
237
+ downstream_tables=[],
238
+ partition_columns=partition_columns,
239
+ owner=getattr(table, "owner", None),
240
+ created_at=_dt_to_iso(getattr(table, "creation_time", None)),
241
+ updated_at=_dt_to_iso(getattr(table, "last_data_modified_time", None)),
242
+ table_type="VIRTUAL_VIEW" if getattr(table, "is_virtual_view", False) else "TABLE",
243
+ size_bytes=(
244
+ int(getattr(table, "size", 0))
245
+ if getattr(table, "size", None) is not None
246
+ else None
247
+ ),
248
+ extra_metadata={"lifecycle": getattr(table, "lifecycle", None)},
249
+ )
250
+ except Exception as exc:
251
+ raise translate_odps_error(exc) from exc
252
+
253
+ def _table_head(self, table, *, limit: 'int') -> 'list[dict[str, Any]]':
254
+ """Get first N rows from a table."""
255
+ try:
256
+ reader = table.head(limit)
257
+ rows = list(islice(reader, limit))
258
+ except Exception:
259
+ return []
260
+ columns = [column.name for column in table.table_schema.columns]
261
+ return [record_to_dict(columns, record.values) for record in rows]
262
+
263
+ def _list_partitions(self, table, *, limit: 'int') -> 'list[str]':
264
+ """List partition specs for a table."""
265
+ try:
266
+ from odps.errors import InvalidParameter as OdpsInvalidParameter
267
+ partitions = list(islice(table.iterate_partitions(), limit))
268
+ except Exception:
269
+ return []
270
+ return [str(partition.partition_spec) for partition in partitions]
271
+
272
+ def _latest_partition_info_from_table(
273
+ self,
274
+ table,
275
+ definition: 'TableDefinition',
276
+ ) -> 'tuple[dict[str, Any], list[str]]':
277
+ """Get latest partition info from ODPS table object."""
278
+ latest_partition = self._max_partition_spec(table)
279
+ if latest_partition:
280
+ return build_latest_partition_info(
281
+ definition,
282
+ source="odps_get_max_partition",
283
+ latest_partition_override=latest_partition,
284
+ visible_partition_count=None,
285
+ )
286
+
287
+ partitions = self._list_partitions(table, limit=200)
288
+ payload, warnings = build_latest_partition_info(
289
+ definition,
290
+ source="odps_iterate_partitions",
291
+ partitions=partitions,
292
+ visible_partition_count=len(partitions),
293
+ )
294
+ if definition.partition_columns and len(partitions) == 200:
295
+ warnings.append("Only the first 200 visible partitions were inspected. For very large tables, verify the result in the MaxCompute console as well.")
296
+ return payload, warnings
297
+
298
+ def _max_partition_spec(self, table) -> 'str | None':
299
+ """Get max partition spec from table using get_max_partition if available."""
300
+ getter = getattr(table, "get_max_partition", None)
301
+ if callable(getter):
302
+ for kwargs in ({"skip_empty": True}, {}):
303
+ try:
304
+ partition = getter(**kwargs)
305
+ except TypeError:
306
+ continue
307
+ except Exception:
308
+ partition = None
309
+ text = partition_spec_text(partition)
310
+ if text:
311
+ return text
312
+ return None
@@ -0,0 +1,130 @@
1
+ """Main OdpsBackend class combining all mixins."""
2
+
3
+ from itertools import islice
4
+ from typing import Any
5
+
6
+ from ..auth_providers import resolve_auth_connection
7
+ from ..config import MaxCConfig
8
+ from ..exceptions import PermissionDeniedError
9
+ from ..helpers import (
10
+ _dt_to_iso,
11
+ record_to_dict,
12
+ translate_odps_error,
13
+ )
14
+ from ..models import QueryResult
15
+ from ..utils import detect_operation, extract_table_names
16
+ from .auth import AuthMixin
17
+ from .data import DataMixin
18
+ from .job import JobMixin
19
+ from .meta import MetaMixin
20
+
21
+
22
+ class OdpsBackend(
23
+ JobMixin, # JobMixin extends QueryMixin
24
+ MetaMixin,
25
+ DataMixin,
26
+ AuthMixin,
27
+ ):
28
+ """MaxCompute backend for production use."""
29
+
30
+ supports_remote_jobs = True
31
+ supports_cost_check = False
32
+
33
+ def __init__(self, config: 'MaxCConfig') -> 'None':
34
+ """Initialize OdpsBackend with configuration."""
35
+ self.config = config
36
+ resolved = resolve_auth_connection(config)
37
+ self.resolved_auth = resolved
38
+ self.settings = resolved.settings
39
+ self.setting_sources = resolved.setting_sources
40
+ # Priority: config.default_project (includes session_override) > resolved.project
41
+ self.project = config.default_project or resolved.project
42
+ # Update resolved settings with the actual project being used
43
+ self.settings = dict(resolved.settings)
44
+ self.settings["project"] = self.project
45
+ self.client = resolved.create_client()
46
+ # 延迟获取 owner display name,避免不必要的 API 调用
47
+ self._owner_display_name: 'str | None' = None
48
+
49
+ def _validate_select(self, sql: 'str') -> 'None':
50
+ """Validate that SQL is a SELECT statement and allowed by config."""
51
+ operation = detect_operation(sql)
52
+ if operation not in self.config.allowed_operations:
53
+ raise PermissionDeniedError(
54
+ f"Configured allowed operations are limited to {', '.join(self.config.allowed_operations)}; received {operation}.",
55
+ suggestion="Update `allowed_operations` if you intentionally want to permit this operation.",
56
+ )
57
+ if operation != "SELECT":
58
+ raise PermissionDeniedError(f"This CLI currently supports only SELECT statements; received {operation}.")
59
+
60
+ def _instance_to_query_result(
61
+ self,
62
+ instance,
63
+ *,
64
+ project: 'str',
65
+ max_rows: 'int',
66
+ sql: 'str',
67
+ elapsed_ms: 'int',
68
+ offset: 'int' = 0,
69
+ ) -> 'QueryResult':
70
+ """Convert ODPS instance to QueryResult."""
71
+ try:
72
+ with instance.open_reader() as reader:
73
+ schema = [
74
+ {
75
+ "name": column.name,
76
+ "type": str(column.type),
77
+ "comment": "",
78
+ }
79
+ for column in reader.schema.columns
80
+ ]
81
+ rows = [
82
+ record_to_dict(
83
+ [column["name"] for column in schema],
84
+ record.values,
85
+ )
86
+ for record in islice(reader, offset, offset + max_rows)
87
+ ]
88
+ total_rows = int(getattr(reader, "count", len(rows)) or len(rows))
89
+ except Exception as exc:
90
+ raise translate_odps_error(exc) from exc
91
+
92
+ bytes_scanned, extra_metadata = self._task_cost(instance)
93
+ returned_rows = len(rows)
94
+ has_more = total_rows > (offset + returned_rows)
95
+ extra_metadata["current_offset"] = offset
96
+
97
+ return QueryResult(
98
+ rows=rows,
99
+ schema=schema,
100
+ total_rows=total_rows,
101
+ returned_rows=returned_rows,
102
+ has_more=has_more,
103
+ next_cursor=None, # cursor 由 app 层生成
104
+ elapsed_ms=elapsed_ms,
105
+ bytes_scanned=bytes_scanned,
106
+ project=project,
107
+ sql_executed=sql.rstrip(";"),
108
+ tables_used=extract_table_names(sql),
109
+ job_id=instance.id,
110
+ submitted_at=_dt_to_iso(getattr(instance, "start_time", None)),
111
+ completed_at=_dt_to_iso(getattr(instance, "end_time", None)),
112
+ extra_metadata=extra_metadata,
113
+ )
114
+
115
+ def _task_cost(self, instance) -> 'tuple[int | None, dict[str, Any]]':
116
+ """Get task cost from ODPS instance."""
117
+ try:
118
+ task_cost = instance.get_task_cost()
119
+ except Exception:
120
+ return None, {}
121
+ if task_cost is None:
122
+ return None, {}
123
+ return (
124
+ int(getattr(task_cost, "input_size", 0) or 0),
125
+ {
126
+ "task_cost_cpu": getattr(task_cost, "cpu_cost", None),
127
+ "task_cost_memory": getattr(task_cost, "memory_cost", None),
128
+ "estimated_input_size_bytes": getattr(task_cost, "input_size", None),
129
+ },
130
+ )
@@ -0,0 +1,148 @@
1
+ """Query-related mixin for OdpsBackend."""
2
+
3
+ from time import monotonic
4
+ from typing import Any
5
+
6
+ from ..helpers import (
7
+ build_query_outline,
8
+ translate_odps_error,
9
+ )
10
+ from ..models import QueryResult
11
+ from ..utils import extract_table_names, now_utc_iso
12
+
13
+
14
+ class QueryMixin:
15
+ """Mixin providing query execution methods."""
16
+
17
+ def execute_query(
18
+ self,
19
+ sql: 'str',
20
+ *,
21
+ project: 'str',
22
+ max_rows: 'int',
23
+ dry_run: 'bool',
24
+ offset: 'int' = 0,
25
+ timeout: 'int | None' = None,
26
+ ) -> 'QueryResult':
27
+ """Execute a SQL query and return results.
28
+
29
+ Args:
30
+ sql: SQL query to execute
31
+ project: Project name
32
+ max_rows: Maximum rows to return
33
+ dry_run: If True, only estimate cost without executing
34
+ offset: Row offset for pagination
35
+ timeout: Timeout in seconds (default: 300s / 5 minutes)
36
+ """
37
+ self._validate_select(sql)
38
+
39
+ started_at = now_utc_iso()
40
+ started_monotonic = monotonic()
41
+
42
+ if dry_run:
43
+ try:
44
+ sql_cost = self.client.execute_sql_cost(sql, project=project)
45
+ except Exception as exc:
46
+ raise translate_odps_error(exc) from exc
47
+ elapsed_ms = int((monotonic() - started_monotonic) * 1000)
48
+ return QueryResult(
49
+ rows=[],
50
+ schema=[],
51
+ total_rows=0,
52
+ returned_rows=0,
53
+ has_more=False,
54
+ next_cursor=None,
55
+ elapsed_ms=elapsed_ms,
56
+ bytes_scanned=int(sql_cost.input_size or 0),
57
+ project=project,
58
+ sql_executed=sql,
59
+ tables_used=extract_table_names(sql),
60
+ warnings=["MaxCompute dry-run returned SQLCost metadata and did not execute the query."],
61
+ submitted_at=started_at,
62
+ completed_at=now_utc_iso(),
63
+ extra_metadata={
64
+ "sql_complexity": sql_cost.complexity,
65
+ "sql_udf_num": sql_cost.udf_num,
66
+ "estimated_input_size_bytes": sql_cost.input_size,
67
+ },
68
+ )
69
+
70
+ try:
71
+ instance = self.client.execute_sql(sql, project=project)
72
+ # Default timeout: 300 seconds (5 minutes) to prevent indefinite blocking
73
+ instance.wait_for_success(timeout=timeout or 300)
74
+ except Exception as exc:
75
+ raise translate_odps_error(exc) from exc
76
+
77
+ elapsed_ms = int((monotonic() - started_monotonic) * 1000)
78
+ result = self._instance_to_query_result(
79
+ instance,
80
+ project=project,
81
+ max_rows=max_rows,
82
+ sql=sql,
83
+ elapsed_ms=elapsed_ms,
84
+ offset=offset,
85
+ )
86
+ result.submitted_at = started_at
87
+ result.completed_at = now_utc_iso()
88
+ return result
89
+
90
+ def estimate_query_cost(self, sql: 'str', *, project: 'str') -> 'dict[str, Any]':
91
+ """Estimate the cost of a query."""
92
+ self._validate_select(sql)
93
+ started_monotonic = monotonic()
94
+ try:
95
+ sql_cost = self.client.execute_sql_cost(sql, project=project)
96
+ except Exception as exc:
97
+ raise translate_odps_error(exc) from exc
98
+ return {
99
+ **build_query_outline(sql),
100
+ "project": project,
101
+ "cost_model": "maxcompute_native_sql_cost",
102
+ "estimated_input_size_bytes": int(sql_cost.input_size or 0),
103
+ "task_cost_cpu": None,
104
+ "task_cost_memory": None,
105
+ "sql_complexity": sql_cost.complexity,
106
+ "sql_udf_num": sql_cost.udf_num,
107
+ "total_row_estimate": None,
108
+ "elapsed_ms": int((monotonic() - started_monotonic) * 1000),
109
+ }
110
+
111
+ def explain_query(self, sql: 'str', *, project: 'str') -> 'dict[str, Any]':
112
+ """Explain a query execution plan."""
113
+ estimate = self.estimate_query_cost(sql, project=project)
114
+ warnings = list(estimate.pop("warnings", []))
115
+ estimate["warnings"] = warnings
116
+ estimate["analysis_mode"] = "explain"
117
+ estimate["read_path"] = True
118
+ return estimate
119
+
120
+ def submit_query(
121
+ self,
122
+ sql: 'str',
123
+ *,
124
+ project: 'str',
125
+ idempotency_key: 'str | None' = None,
126
+ ):
127
+ """Submit a query for async execution."""
128
+ from ..models import JobInfo
129
+
130
+ try:
131
+ instance = self.client.execute_sql(
132
+ sql,
133
+ project=project,
134
+ unique_identifier_id=idempotency_key,
135
+ )
136
+ except Exception as exc:
137
+ raise translate_odps_error(exc) from exc
138
+ return JobInfo(
139
+ job_id=instance.id,
140
+ status="pending",
141
+ project=project,
142
+ progress=0,
143
+ sql=sql,
144
+ submitted_at=now_utc_iso(),
145
+ updated_at=now_utc_iso(),
146
+ logview=self._safe_logview(instance),
147
+ warnings=["The MaxCompute instance has been submitted; use job.status or job.wait to track it."],
148
+ )