maxc-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ """Data-related mixin for OdpsBackend."""
2
+
3
+ from typing import Any
4
+
5
+ from ..config import TableDefinition
6
+ from ..helpers import (
7
+ build_profile,
8
+ quote_table_name,
9
+ resolve_sample_request,
10
+ sql_string_literal,
11
+ translate_odps_error,
12
+ )
13
+
14
+
15
+ class DataMixin:
16
+ """Mixin providing data sampling and profiling methods."""
17
+
18
+ def sample_table(
19
+ self,
20
+ table_name: 'str',
21
+ rows: 'int',
22
+ *,
23
+ partition: 'str | None' = None,
24
+ columns: 'list[str] | None' = None,
25
+ ) -> 'tuple[TableDefinition, list[dict[str, Any]], dict[str, Any]]':
26
+ """Sample data from a table using ODPS read_table for better performance."""
27
+ definition = self.describe_table(table_name)
28
+ selected_columns, applied_partition, partition_values = resolve_sample_request(
29
+ definition,
30
+ partition=partition,
31
+ columns=columns,
32
+ strict_partition_check=False,
33
+ )
34
+
35
+ # Build column selection
36
+ column_names = selected_columns if selected_columns else [c.name for c in definition.columns]
37
+
38
+ # Build partition spec if needed
39
+ partition_spec = None
40
+ if applied_partition and partition_values:
41
+ partition_spec = ",".join(
42
+ f"{k}={v}" for k, v in partition_values.items()
43
+ )
44
+
45
+ # Read data using ODPS read_table method
46
+ def _serialize_value(value):
47
+ """Convert value to JSON-serializable format."""
48
+ from datetime import datetime, date
49
+ if isinstance(value, datetime):
50
+ return value.isoformat()
51
+ if isinstance(value, date):
52
+ return value.isoformat()
53
+ return value
54
+
55
+ try:
56
+ records = self.client.read_table(
57
+ table_name,
58
+ limit=rows,
59
+ partition=partition_spec,
60
+ project=self.project,
61
+ )
62
+ sample_rows = [
63
+ {column: _serialize_value(record[column]) for column in column_names}
64
+ for record in records
65
+ ]
66
+ except Exception as exc:
67
+ raise translate_odps_error(exc) from exc
68
+
69
+ return definition, sample_rows, {
70
+ "schema": [{"name": c.name, "type": c.type, "comment": c.comment} for c in definition.columns if c.name in column_names],
71
+ "applied_partition": applied_partition,
72
+ "selected_columns": selected_columns,
73
+ }
74
+
75
+ def profile_table(self, table_name: 'str', *, partition: 'str | None' = None) -> 'dict[str, Any]':
76
+ """Profile data from a table."""
77
+ definition, sample_rows, sample_info = self.sample_table(
78
+ table_name,
79
+ rows=20,
80
+ partition=partition,
81
+ columns=None,
82
+ )
83
+ return build_profile(
84
+ definition,
85
+ sample_rows,
86
+ applied_partition=sample_info["applied_partition"],
87
+ )
@@ -0,0 +1,304 @@
1
+ """Job-related mixin for OdpsBackend."""
2
+
3
+ from itertools import islice
4
+ from time import monotonic, sleep
5
+ from typing import Any
6
+
7
+ from ..exceptions import BackendConnectionError, JobTimeoutError
8
+ from ..helpers import (
9
+ _dt_to_iso,
10
+ _duration_ms,
11
+ build_task_summary,
12
+ classify_failure_reason,
13
+ translate_odps_error,
14
+ )
15
+ from ..models import JobInfo, QueryResult
16
+ from ..utils import now_utc_iso
17
+ from .query import QueryMixin
18
+
19
+
20
+ class JobMixin(QueryMixin):
21
+ """Mixin providing job management methods."""
22
+
23
+ def get_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'JobInfo':
24
+ """Get job status."""
25
+ instance = self._get_instance(job_id, project=project)
26
+ return self._instance_to_job_info(instance, project=project or self.project)
27
+
28
+ def wait_job(
29
+ self,
30
+ job_id: 'str',
31
+ *,
32
+ project: 'str | None' = None,
33
+ timeout: 'int | None' = None,
34
+ poll_interval: 'int' = 3,
35
+ ) -> 'JobInfo':
36
+ """Wait for job completion with polling and timeout.
37
+
38
+ Args:
39
+ job_id: Job identifier
40
+ project: Project name (optional)
41
+ timeout: Timeout in seconds (default: 300s / 5 minutes)
42
+ poll_interval: Seconds between status checks (default: 3s)
43
+ """
44
+ instance = self._get_instance(job_id, project=project)
45
+ start_time = monotonic()
46
+ default_timeout = timeout if timeout is not None else 300
47
+ consecutive_errors = 0
48
+
49
+ while True:
50
+ elapsed = monotonic() - start_time
51
+ if elapsed > default_timeout:
52
+ raise JobTimeoutError(
53
+ f"Job {job_id} did not complete within {default_timeout} seconds"
54
+ )
55
+
56
+ try:
57
+ instance.reload(blocking=False)
58
+ consecutive_errors = 0
59
+ except Exception as exc:
60
+ consecutive_errors += 1
61
+ if consecutive_errors >= 5:
62
+ raise BackendConnectionError(
63
+ f"Lost contact with backend after 5 consecutive errors: {exc}",
64
+ suggestion="Check network connectivity and retry.",
65
+ ) from exc
66
+
67
+ status_name = str(getattr(instance, "status", "")).split(".")[-1]
68
+ if status_name != "RUNNING":
69
+ break
70
+
71
+ sleep(poll_interval)
72
+
73
+ return self._instance_to_job_info(instance, project=project or self.project)
74
+
75
+ def fetch_job_result(
76
+ self,
77
+ job_id: 'str',
78
+ *,
79
+ project: 'str | None' = None,
80
+ max_rows: 'int',
81
+ offset: 'int' = 0,
82
+ ) -> 'QueryResult':
83
+ """Fetch job results."""
84
+ from ..exceptions import FeatureUnavailableError
85
+
86
+ instance = self._get_instance(job_id, project=project)
87
+ info = self._instance_to_job_info(instance, project=project or self.project)
88
+ if info.status != "success":
89
+ raise FeatureUnavailableError(
90
+ f"Job {job_id} is currently {info.status}; results are not readable yet.",
91
+ suggestion="Run `maxc job wait` or `maxc job status` first.",
92
+ )
93
+ sql = self._safe_sql(instance) or ""
94
+ return self._instance_to_query_result(
95
+ instance,
96
+ project=project or self.project,
97
+ max_rows=max_rows,
98
+ sql=sql,
99
+ elapsed_ms=_duration_ms(instance.start_time, instance.end_time),
100
+ offset=offset,
101
+ )
102
+
103
+ def cancel_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'JobInfo':
104
+ """Cancel a job."""
105
+ instance = self._get_instance(job_id, project=project)
106
+ try:
107
+ instance.stop()
108
+ except Exception as exc:
109
+ raise translate_odps_error(exc) from exc
110
+ sql = self._safe_sql(instance)
111
+ return JobInfo(
112
+ job_id=job_id,
113
+ status="failure",
114
+ project=project or self.project,
115
+ progress=0,
116
+ stage="cancel_requested",
117
+ retryable=False,
118
+ failure_reason="Cancellation has been requested.",
119
+ task_summary=build_task_summary(sql),
120
+ sql=sql,
121
+ submitted_at=_dt_to_iso(getattr(instance, "start_time", None)),
122
+ updated_at=now_utc_iso(),
123
+ logview=self._safe_logview(instance),
124
+ warnings=["Cancellation has been requested. Run `job status` again to confirm the final state."],
125
+ )
126
+
127
+ def diagnose_job(self, job_id: 'str', *, project: 'str | None' = None) -> 'dict[str, Any]':
128
+ """Diagnose a job failure."""
129
+ instance = self._get_instance(job_id, project=project)
130
+ info = self._instance_to_job_info(instance, project=project or self.project)
131
+ diagnosis = classify_failure_reason(info.failure_reason)
132
+ task_statuses = self._safe_task_statuses(instance)
133
+ task_results = self._safe_task_results(instance)
134
+ return {
135
+ "job_id": info.job_id,
136
+ "status": info.status,
137
+ "stage": info.stage,
138
+ "retryable": info.retryable,
139
+ "failure_reason": info.failure_reason,
140
+ "diagnosis_category": diagnosis["category"],
141
+ "diagnosis_summary": diagnosis["summary"],
142
+ "logview": info.logview,
143
+ "task_summary": info.task_summary,
144
+ "task_statuses": [
145
+ {
146
+ "task_name": name,
147
+ "status": str(getattr(task, "status", "")).split(".")[-1].lower(),
148
+ "type": str(getattr(task, "type", "") or ""),
149
+ }
150
+ for name, task in task_statuses.items()
151
+ ],
152
+ "task_results": task_results,
153
+ }
154
+
155
+ def list_jobs(self, *, project: 'str | None' = None, limit: 'int' = 20) -> 'list[JobInfo]':
156
+ """List jobs."""
157
+ jobs: 'list[JobInfo]' = []
158
+ try:
159
+ iterator = self.client.list_instances(project=project or self.project)
160
+ for instance in islice(iterator, limit):
161
+ jobs.append(self._instance_to_job_info(instance, project=project or self.project))
162
+ except Exception as exc:
163
+ raise translate_odps_error(exc) from exc
164
+ return jobs
165
+
166
+ # Private methods for job handling
167
+
168
+ def _get_instance(self, job_id: 'str', *, project: 'str | None' = None):
169
+ """Get ODPS instance by job ID."""
170
+ try:
171
+ return self.client.get_instance(job_id, project=project or self.project)
172
+ except Exception as exc:
173
+ raise translate_odps_error(exc) from exc
174
+
175
+ def _safe_task_statuses(self, instance) -> 'dict[str, Any]':
176
+ """Safely get task statuses from instance."""
177
+ try:
178
+ return dict(instance.get_task_statuses())
179
+ except Exception:
180
+ return {}
181
+
182
+ def _safe_task_results(self, instance) -> 'dict[str, str]':
183
+ """Safely get task results from instance."""
184
+ try:
185
+ results = instance.get_task_results()
186
+ except Exception:
187
+ return {}
188
+ return {
189
+ str(name): str(value)
190
+ for name, value in dict(results).items()
191
+ }
192
+
193
+ def _first_failure_reason(self, instance) -> 'str | None':
194
+ """Get first non-empty failure reason from task results."""
195
+ task_results = self._safe_task_results(instance)
196
+ for value in task_results.values():
197
+ text = str(value).strip()
198
+ if text:
199
+ return text
200
+ return None
201
+
202
+ def _instance_to_job_info(self, instance, *, project: 'str') -> 'JobInfo':
203
+ """Convert ODPS instance to JobInfo."""
204
+ try:
205
+ instance.reload(blocking=False)
206
+ except Exception:
207
+ pass
208
+
209
+ status_name = str(getattr(instance, "status", "")).split(".")[-1]
210
+ sql = self._safe_sql(instance)
211
+ logview = self._safe_logview(instance)
212
+ submitted_at = _dt_to_iso(getattr(instance, "start_time", None))
213
+ completed_at = _dt_to_iso(getattr(instance, "end_time", None))
214
+ task_statuses = self._safe_task_statuses(instance)
215
+ task_names = sorted(task_statuses)
216
+ task_types = {
217
+ name: str(getattr(task, "type", "") or "")
218
+ for name, task in task_statuses.items()
219
+ }
220
+ task_summary = build_task_summary(sql, task_names=task_names, task_types=task_types)
221
+
222
+ if status_name == "RUNNING":
223
+ return JobInfo(
224
+ job_id=instance.id,
225
+ status="running",
226
+ project=project,
227
+ progress=50,
228
+ stage="running",
229
+ retryable=None,
230
+ task_summary=task_summary,
231
+ sql=sql,
232
+ submitted_at=submitted_at,
233
+ updated_at=now_utc_iso(),
234
+ completed_at=completed_at,
235
+ logview=logview,
236
+ )
237
+
238
+ if status_name == "TERMINATED":
239
+ try:
240
+ succeeded = instance.is_successful()
241
+ except Exception as exc:
242
+ return JobInfo(
243
+ job_id=instance.id,
244
+ status="failure",
245
+ project=project,
246
+ progress=100,
247
+ stage="failed",
248
+ retryable=False,
249
+ failure_reason=str(exc),
250
+ task_summary=task_summary,
251
+ sql=sql,
252
+ submitted_at=submitted_at,
253
+ updated_at=now_utc_iso(),
254
+ completed_at=completed_at,
255
+ logview=logview,
256
+ error_message=str(exc),
257
+ )
258
+ failure_reason = None if succeeded else self._first_failure_reason(instance)
259
+ diagnosis = classify_failure_reason(failure_reason)
260
+ return JobInfo(
261
+ job_id=instance.id,
262
+ status="success" if succeeded else "failure",
263
+ project=project,
264
+ progress=100,
265
+ stage="completed" if succeeded else "failed",
266
+ retryable=False if succeeded else diagnosis["retryable"],
267
+ failure_reason=failure_reason,
268
+ task_summary=task_summary,
269
+ sql=sql,
270
+ submitted_at=submitted_at,
271
+ updated_at=now_utc_iso(),
272
+ completed_at=completed_at,
273
+ logview=logview,
274
+ )
275
+
276
+ return JobInfo(
277
+ job_id=instance.id,
278
+ status="pending",
279
+ project=project,
280
+ progress=0,
281
+ stage="queue",
282
+ retryable=None,
283
+ task_summary=task_summary,
284
+ sql=sql,
285
+ submitted_at=submitted_at,
286
+ updated_at=now_utc_iso(),
287
+ completed_at=completed_at,
288
+ logview=logview,
289
+ )
290
+
291
+ def _safe_sql(self, instance) -> 'str | None':
292
+ """Safely get SQL from instance."""
293
+ try:
294
+ sql = instance.get_sql_query()
295
+ except Exception:
296
+ return None
297
+ return sql.rstrip(";") if sql else None
298
+
299
+ def _safe_logview(self, instance) -> 'str | None':
300
+ """Safely get logview URL from instance."""
301
+ try:
302
+ return instance.get_logview_address()
303
+ except Exception:
304
+ return None