maxc-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxc_cli/__init__.py +5 -0
- maxc_cli/__main__.py +6 -0
- maxc_cli/app.py +3406 -0
- maxc_cli/audit.py +18 -0
- maxc_cli/auth_providers.py +471 -0
- maxc_cli/backend/__init__.py +8 -0
- maxc_cli/backend/auth.py +144 -0
- maxc_cli/backend/data.py +87 -0
- maxc_cli/backend/job.py +304 -0
- maxc_cli/backend/meta.py +312 -0
- maxc_cli/backend/odps.py +130 -0
- maxc_cli/backend/query.py +148 -0
- maxc_cli/cache.py +662 -0
- maxc_cli/cli.py +1274 -0
- maxc_cli/config.py +406 -0
- maxc_cli/exceptions.py +99 -0
- maxc_cli/helpers.py +964 -0
- maxc_cli/models.py +533 -0
- maxc_cli/output.py +75 -0
- maxc_cli/store.py +123 -0
- maxc_cli/utils.py +136 -0
- maxc_cli-0.1.0.dist-info/METADATA +220 -0
- maxc_cli-0.1.0.dist-info/RECORD +26 -0
- maxc_cli-0.1.0.dist-info/WHEEL +5 -0
- maxc_cli-0.1.0.dist-info/entry_points.txt +2 -0
- maxc_cli-0.1.0.dist-info/top_level.txt +1 -0
maxc_cli/backend/data.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Data-related mixin for OdpsBackend."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from ..config import TableDefinition
|
|
6
|
+
from ..helpers import (
|
|
7
|
+
build_profile,
|
|
8
|
+
quote_table_name,
|
|
9
|
+
resolve_sample_request,
|
|
10
|
+
sql_string_literal,
|
|
11
|
+
translate_odps_error,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataMixin:
|
|
16
|
+
"""Mixin providing data sampling and profiling methods."""
|
|
17
|
+
|
|
18
|
+
def sample_table(
|
|
19
|
+
self,
|
|
20
|
+
table_name: 'str',
|
|
21
|
+
rows: 'int',
|
|
22
|
+
*,
|
|
23
|
+
partition: 'str | None' = None,
|
|
24
|
+
columns: 'list[str] | None' = None,
|
|
25
|
+
) -> 'tuple[TableDefinition, list[dict[str, Any]], dict[str, Any]]':
|
|
26
|
+
"""Sample data from a table using ODPS read_table for better performance."""
|
|
27
|
+
definition = self.describe_table(table_name)
|
|
28
|
+
selected_columns, applied_partition, partition_values = resolve_sample_request(
|
|
29
|
+
definition,
|
|
30
|
+
partition=partition,
|
|
31
|
+
columns=columns,
|
|
32
|
+
strict_partition_check=False,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Build column selection
|
|
36
|
+
column_names = selected_columns if selected_columns else [c.name for c in definition.columns]
|
|
37
|
+
|
|
38
|
+
# Build partition spec if needed
|
|
39
|
+
partition_spec = None
|
|
40
|
+
if applied_partition and partition_values:
|
|
41
|
+
partition_spec = ",".join(
|
|
42
|
+
f"{k}={v}" for k, v in partition_values.items()
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Read data using ODPS read_table method
|
|
46
|
+
def _serialize_value(value):
|
|
47
|
+
"""Convert value to JSON-serializable format."""
|
|
48
|
+
from datetime import datetime, date
|
|
49
|
+
if isinstance(value, datetime):
|
|
50
|
+
return value.isoformat()
|
|
51
|
+
if isinstance(value, date):
|
|
52
|
+
return value.isoformat()
|
|
53
|
+
return value
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
records = self.client.read_table(
|
|
57
|
+
table_name,
|
|
58
|
+
limit=rows,
|
|
59
|
+
partition=partition_spec,
|
|
60
|
+
project=self.project,
|
|
61
|
+
)
|
|
62
|
+
sample_rows = [
|
|
63
|
+
{column: _serialize_value(record[column]) for column in column_names}
|
|
64
|
+
for record in records
|
|
65
|
+
]
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
raise translate_odps_error(exc) from exc
|
|
68
|
+
|
|
69
|
+
return definition, sample_rows, {
|
|
70
|
+
"schema": [{"name": c.name, "type": c.type, "comment": c.comment} for c in definition.columns if c.name in column_names],
|
|
71
|
+
"applied_partition": applied_partition,
|
|
72
|
+
"selected_columns": selected_columns,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
def profile_table(self, table_name: 'str', *, partition: 'str | None' = None) -> 'dict[str, Any]':
|
|
76
|
+
"""Profile data from a table."""
|
|
77
|
+
definition, sample_rows, sample_info = self.sample_table(
|
|
78
|
+
table_name,
|
|
79
|
+
rows=20,
|
|
80
|
+
partition=partition,
|
|
81
|
+
columns=None,
|
|
82
|
+
)
|
|
83
|
+
return build_profile(
|
|
84
|
+
definition,
|
|
85
|
+
sample_rows,
|
|
86
|
+
applied_partition=sample_info["applied_partition"],
|
|
87
|
+
)
|
maxc_cli/backend/job.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Job-related mixin for OdpsBackend."""
|
|
2
|
+
|
|
3
|
+
from itertools import islice
|
|
4
|
+
from time import monotonic, sleep
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ..exceptions import BackendConnectionError, JobTimeoutError
|
|
8
|
+
from ..helpers import (
|
|
9
|
+
_dt_to_iso,
|
|
10
|
+
_duration_ms,
|
|
11
|
+
build_task_summary,
|
|
12
|
+
classify_failure_reason,
|
|
13
|
+
translate_odps_error,
|
|
14
|
+
)
|
|
15
|
+
from ..models import JobInfo, QueryResult
|
|
16
|
+
from ..utils import now_utc_iso
|
|
17
|
+
from .query import QueryMixin
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class JobMixin(QueryMixin):
|
|
21
|
+
"""Mixin providing job management methods."""
|
|
22
|
+
|
|
23
|
+
def get_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'JobInfo':
|
|
24
|
+
"""Get job status."""
|
|
25
|
+
instance = self._get_instance(job_id, project=project)
|
|
26
|
+
return self._instance_to_job_info(instance, project=project or self.project)
|
|
27
|
+
|
|
28
|
+
def wait_job(
|
|
29
|
+
self,
|
|
30
|
+
job_id: 'str',
|
|
31
|
+
*,
|
|
32
|
+
project: 'str | None' = None,
|
|
33
|
+
timeout: 'int | None' = None,
|
|
34
|
+
poll_interval: 'int' = 3,
|
|
35
|
+
) -> 'JobInfo':
|
|
36
|
+
"""Wait for job completion with polling and timeout.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
job_id: Job identifier
|
|
40
|
+
project: Project name (optional)
|
|
41
|
+
timeout: Timeout in seconds (default: 300s / 5 minutes)
|
|
42
|
+
poll_interval: Seconds between status checks (default: 3s)
|
|
43
|
+
"""
|
|
44
|
+
instance = self._get_instance(job_id, project=project)
|
|
45
|
+
start_time = monotonic()
|
|
46
|
+
default_timeout = timeout if timeout is not None else 300
|
|
47
|
+
consecutive_errors = 0
|
|
48
|
+
|
|
49
|
+
while True:
|
|
50
|
+
elapsed = monotonic() - start_time
|
|
51
|
+
if elapsed > default_timeout:
|
|
52
|
+
raise JobTimeoutError(
|
|
53
|
+
f"Job {job_id} did not complete within {default_timeout} seconds"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
instance.reload(blocking=False)
|
|
58
|
+
consecutive_errors = 0
|
|
59
|
+
except Exception as exc:
|
|
60
|
+
consecutive_errors += 1
|
|
61
|
+
if consecutive_errors >= 5:
|
|
62
|
+
raise BackendConnectionError(
|
|
63
|
+
f"Lost contact with backend after 5 consecutive errors: {exc}",
|
|
64
|
+
suggestion="Check network connectivity and retry.",
|
|
65
|
+
) from exc
|
|
66
|
+
|
|
67
|
+
status_name = str(getattr(instance, "status", "")).split(".")[-1]
|
|
68
|
+
if status_name != "RUNNING":
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
sleep(poll_interval)
|
|
72
|
+
|
|
73
|
+
return self._instance_to_job_info(instance, project=project or self.project)
|
|
74
|
+
|
|
75
|
+
def fetch_job_result(
|
|
76
|
+
self,
|
|
77
|
+
job_id: 'str',
|
|
78
|
+
*,
|
|
79
|
+
project: 'str | None' = None,
|
|
80
|
+
max_rows: 'int',
|
|
81
|
+
offset: 'int' = 0,
|
|
82
|
+
) -> 'QueryResult':
|
|
83
|
+
"""Fetch job results."""
|
|
84
|
+
from ..exceptions import FeatureUnavailableError
|
|
85
|
+
|
|
86
|
+
instance = self._get_instance(job_id, project=project)
|
|
87
|
+
info = self._instance_to_job_info(instance, project=project or self.project)
|
|
88
|
+
if info.status != "success":
|
|
89
|
+
raise FeatureUnavailableError(
|
|
90
|
+
f"Job {job_id} is currently {info.status}; results are not readable yet.",
|
|
91
|
+
suggestion="Run `maxc job wait` or `maxc job status` first.",
|
|
92
|
+
)
|
|
93
|
+
sql = self._safe_sql(instance) or ""
|
|
94
|
+
return self._instance_to_query_result(
|
|
95
|
+
instance,
|
|
96
|
+
project=project or self.project,
|
|
97
|
+
max_rows=max_rows,
|
|
98
|
+
sql=sql,
|
|
99
|
+
elapsed_ms=_duration_ms(instance.start_time, instance.end_time),
|
|
100
|
+
offset=offset,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def cancel_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'JobInfo':
|
|
104
|
+
"""Cancel a job."""
|
|
105
|
+
instance = self._get_instance(job_id, project=project)
|
|
106
|
+
try:
|
|
107
|
+
instance.stop()
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
raise translate_odps_error(exc) from exc
|
|
110
|
+
sql = self._safe_sql(instance)
|
|
111
|
+
return JobInfo(
|
|
112
|
+
job_id=job_id,
|
|
113
|
+
status="failure",
|
|
114
|
+
project=project or self.project,
|
|
115
|
+
progress=0,
|
|
116
|
+
stage="cancel_requested",
|
|
117
|
+
retryable=False,
|
|
118
|
+
failure_reason="Cancellation has been requested.",
|
|
119
|
+
task_summary=build_task_summary(sql),
|
|
120
|
+
sql=sql,
|
|
121
|
+
submitted_at=_dt_to_iso(getattr(instance, "start_time", None)),
|
|
122
|
+
updated_at=now_utc_iso(),
|
|
123
|
+
logview=self._safe_logview(instance),
|
|
124
|
+
warnings=["Cancellation has been requested. Run `job status` again to confirm the final state."],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def diagnose_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'dict[str, Any]':
|
|
128
|
+
"""Diagnose a job failure."""
|
|
129
|
+
instance = self._get_instance(job_id, project=project)
|
|
130
|
+
info = self._instance_to_job_info(instance, project=project or self.project)
|
|
131
|
+
diagnosis = classify_failure_reason(info.failure_reason)
|
|
132
|
+
task_statuses = self._safe_task_statuses(instance)
|
|
133
|
+
task_results = self._safe_task_results(instance)
|
|
134
|
+
return {
|
|
135
|
+
"job_id": info.job_id,
|
|
136
|
+
"status": info.status,
|
|
137
|
+
"stage": info.stage,
|
|
138
|
+
"retryable": info.retryable,
|
|
139
|
+
"failure_reason": info.failure_reason,
|
|
140
|
+
"diagnosis_category": diagnosis["category"],
|
|
141
|
+
"diagnosis_summary": diagnosis["summary"],
|
|
142
|
+
"logview": info.logview,
|
|
143
|
+
"task_summary": info.task_summary,
|
|
144
|
+
"task_statuses": [
|
|
145
|
+
{
|
|
146
|
+
"task_name": name,
|
|
147
|
+
"status": str(getattr(task, "status", "")).split(".")[-1].lower(),
|
|
148
|
+
"type": str(getattr(task, "type", "") or ""),
|
|
149
|
+
}
|
|
150
|
+
for name, task in task_statuses.items()
|
|
151
|
+
],
|
|
152
|
+
"task_results": task_results,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def list_jobs(self, *, project: 'str | None' = None, limit: 'int' = 20) -> 'list[JobInfo]':
|
|
156
|
+
"""List jobs."""
|
|
157
|
+
jobs: 'list[JobInfo]' = []
|
|
158
|
+
try:
|
|
159
|
+
iterator = self.client.list_instances(project=project or self.project)
|
|
160
|
+
for instance in islice(iterator, limit):
|
|
161
|
+
jobs.append(self._instance_to_job_info(instance, project=project or self.project))
|
|
162
|
+
except Exception as exc:
|
|
163
|
+
raise translate_odps_error(exc) from exc
|
|
164
|
+
return jobs
|
|
165
|
+
|
|
166
|
+
# Private methods for job handling
|
|
167
|
+
|
|
168
|
+
def _get_instance(self, job_id: 'str', *, project: 'str | None' = None):
|
|
169
|
+
"""Get ODPS instance by job ID."""
|
|
170
|
+
try:
|
|
171
|
+
return self.client.get_instance(job_id, project=project or self.project)
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
raise translate_odps_error(exc) from exc
|
|
174
|
+
|
|
175
|
+
def _safe_task_statuses(self, instance) -> 'dict[str, Any]':
|
|
176
|
+
"""Safely get task statuses from instance."""
|
|
177
|
+
try:
|
|
178
|
+
return dict(instance.get_task_statuses())
|
|
179
|
+
except Exception:
|
|
180
|
+
return {}
|
|
181
|
+
|
|
182
|
+
def _safe_task_results(self, instance) -> 'dict[str, str]':
|
|
183
|
+
"""Safely get task results from instance."""
|
|
184
|
+
try:
|
|
185
|
+
results = instance.get_task_results()
|
|
186
|
+
except Exception:
|
|
187
|
+
return {}
|
|
188
|
+
return {
|
|
189
|
+
str(name): str(value)
|
|
190
|
+
for name, value in dict(results).items()
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
def _first_failure_reason(self, instance) -> 'str | None':
|
|
194
|
+
"""Get first non-empty failure reason from task results."""
|
|
195
|
+
task_results = self._safe_task_results(instance)
|
|
196
|
+
for value in task_results.values():
|
|
197
|
+
text = str(value).strip()
|
|
198
|
+
if text:
|
|
199
|
+
return text
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
def _instance_to_job_info(self, instance, *, project: 'str') -> 'JobInfo':
|
|
203
|
+
"""Convert ODPS instance to JobInfo."""
|
|
204
|
+
try:
|
|
205
|
+
instance.reload(blocking=False)
|
|
206
|
+
except Exception:
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
status_name = str(getattr(instance, "status", "")).split(".")[-1]
|
|
210
|
+
sql = self._safe_sql(instance)
|
|
211
|
+
logview = self._safe_logview(instance)
|
|
212
|
+
submitted_at = _dt_to_iso(getattr(instance, "start_time", None))
|
|
213
|
+
completed_at = _dt_to_iso(getattr(instance, "end_time", None))
|
|
214
|
+
task_statuses = self._safe_task_statuses(instance)
|
|
215
|
+
task_names = sorted(task_statuses)
|
|
216
|
+
task_types = {
|
|
217
|
+
name: str(getattr(task, "type", "") or "")
|
|
218
|
+
for name, task in task_statuses.items()
|
|
219
|
+
}
|
|
220
|
+
task_summary = build_task_summary(sql, task_names=task_names, task_types=task_types)
|
|
221
|
+
|
|
222
|
+
if status_name == "RUNNING":
|
|
223
|
+
return JobInfo(
|
|
224
|
+
job_id=instance.id,
|
|
225
|
+
status="running",
|
|
226
|
+
project=project,
|
|
227
|
+
progress=50,
|
|
228
|
+
stage="running",
|
|
229
|
+
retryable=None,
|
|
230
|
+
task_summary=task_summary,
|
|
231
|
+
sql=sql,
|
|
232
|
+
submitted_at=submitted_at,
|
|
233
|
+
updated_at=now_utc_iso(),
|
|
234
|
+
completed_at=completed_at,
|
|
235
|
+
logview=logview,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if status_name == "TERMINATED":
|
|
239
|
+
try:
|
|
240
|
+
succeeded = instance.is_successful()
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
return JobInfo(
|
|
243
|
+
job_id=instance.id,
|
|
244
|
+
status="failure",
|
|
245
|
+
project=project,
|
|
246
|
+
progress=100,
|
|
247
|
+
stage="failed",
|
|
248
|
+
retryable=False,
|
|
249
|
+
failure_reason=str(exc),
|
|
250
|
+
task_summary=task_summary,
|
|
251
|
+
sql=sql,
|
|
252
|
+
submitted_at=submitted_at,
|
|
253
|
+
updated_at=now_utc_iso(),
|
|
254
|
+
completed_at=completed_at,
|
|
255
|
+
logview=logview,
|
|
256
|
+
error_message=str(exc),
|
|
257
|
+
)
|
|
258
|
+
failure_reason = None if succeeded else self._first_failure_reason(instance)
|
|
259
|
+
diagnosis = classify_failure_reason(failure_reason)
|
|
260
|
+
return JobInfo(
|
|
261
|
+
job_id=instance.id,
|
|
262
|
+
status="success" if succeeded else "failure",
|
|
263
|
+
project=project,
|
|
264
|
+
progress=100,
|
|
265
|
+
stage="completed" if succeeded else "failed",
|
|
266
|
+
retryable=False if succeeded else diagnosis["retryable"],
|
|
267
|
+
failure_reason=failure_reason,
|
|
268
|
+
task_summary=task_summary,
|
|
269
|
+
sql=sql,
|
|
270
|
+
submitted_at=submitted_at,
|
|
271
|
+
updated_at=now_utc_iso(),
|
|
272
|
+
completed_at=completed_at,
|
|
273
|
+
logview=logview,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return JobInfo(
|
|
277
|
+
job_id=instance.id,
|
|
278
|
+
status="pending",
|
|
279
|
+
project=project,
|
|
280
|
+
progress=0,
|
|
281
|
+
stage="queue",
|
|
282
|
+
retryable=None,
|
|
283
|
+
task_summary=task_summary,
|
|
284
|
+
sql=sql,
|
|
285
|
+
submitted_at=submitted_at,
|
|
286
|
+
updated_at=now_utc_iso(),
|
|
287
|
+
completed_at=completed_at,
|
|
288
|
+
logview=logview,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def _safe_sql(self, instance) -> 'str | None':
|
|
292
|
+
"""Safely get SQL from instance."""
|
|
293
|
+
try:
|
|
294
|
+
sql = instance.get_sql_query()
|
|
295
|
+
except Exception:
|
|
296
|
+
return None
|
|
297
|
+
return sql.rstrip(";") if sql else None
|
|
298
|
+
|
|
299
|
+
def _safe_logview(self, instance) -> 'str | None':
|
|
300
|
+
"""Safely get logview URL from instance."""
|
|
301
|
+
try:
|
|
302
|
+
return instance.get_logview_address()
|
|
303
|
+
except Exception:
|
|
304
|
+
return None
|