maxc-cli 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/PKG-INFO +1 -1
  2. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/setup.py +1 -1
  3. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/__init__.py +1 -1
  4. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/app.py +99 -0
  5. maxc_cli-0.2.4/src/maxc_cli/backend/data.py +507 -0
  6. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/cli.py +63 -0
  7. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/exceptions.py +31 -0
  8. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/helpers.py +83 -0
  9. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/SKILL.md +11 -6
  10. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/command-patterns.md +35 -0
  11. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/red-lines.md +3 -2
  12. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/PKG-INFO +1 -1
  13. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/SOURCES.txt +1 -0
  14. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_cli_mock.py +445 -0
  15. maxc_cli-0.2.4/tests/test_helpers_csv.py +88 -0
  16. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_integration_real.py +38 -0
  17. maxc_cli-0.2.3/src/maxc_cli/backend/data.py +0 -176
  18. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/MANIFEST.in +0 -0
  19. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/README.md +0 -0
  20. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/pyproject.toml +0 -0
  21. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/scripts/regression_test.py +0 -0
  22. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/setup.cfg +0 -0
  23. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/__main__.py +0 -0
  24. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/audit.py +0 -0
  25. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/auth_providers.py +0 -0
  26. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/__init__.py +0 -0
  27. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/auth.py +0 -0
  28. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/catalog.py +0 -0
  29. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/job.py +0 -0
  30. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/meta.py +0 -0
  31. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/odps.py +0 -0
  32. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/backend/query.py +0 -0
  33. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/cache.py +0 -0
  34. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/config.py +0 -0
  35. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/masking.py +0 -0
  36. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/models.py +0 -0
  37. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/output.py +0 -0
  38. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/setting_parser.py +0 -0
  39. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/agents/openai.yaml +0 -0
  40. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/bootstrap-auth.md +0 -0
  41. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/bootstrap-flow.md +0 -0
  42. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/json-output-format.md +0 -0
  43. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/maxcompute-sql-notes.md +0 -0
  44. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/migrate-from-odpscmd.md +0 -0
  45. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/partition-guide.md +0 -0
  46. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/skills/references/setup-install.md +0 -0
  47. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/store.py +0 -0
  48. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli/utils.py +0 -0
  49. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/dependency_links.txt +0 -0
  50. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/entry_points.txt +0 -0
  51. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/requires.txt +0 -0
  52. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/src/maxc_cli.egg-info/top_level.txt +0 -0
  53. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_agent_hints_and_cli.py +0 -0
  54. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_agent_skill_commands_context.py +0 -0
  55. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_cache.py +0 -0
  56. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_catalog.py +0 -0
  57. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_compat.py +0 -0
  58. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_e2e_smoke.py +0 -0
  59. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_error_self_correction.py +0 -0
  60. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_external_auth.py +0 -0
  61. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_integration.py +0 -0
  62. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_job_improvements.py +0 -0
  63. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_masking.py +0 -0
  64. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_phase1_improvements.py +0 -0
  65. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_query_auto_promote.py +0 -0
  66. {maxc_cli-0.2.3 → maxc_cli-0.2.4}/tests/test_setting_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: maxc-cli
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Agent-native MaxCompute CLI for external coding agents
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.9
@@ -9,7 +9,7 @@ README = ROOT / "README.md"
9
9
 
10
10
  setup(
11
11
  name="maxc-cli",
12
- version="0.2.3",
12
+ version="0.2.4",
13
13
  description="Agent-native MaxCompute CLI for external coding agents",
14
14
  long_description=README.read_text(encoding="utf-8"),
15
15
  long_description_content_type="text/markdown",
@@ -2,4 +2,4 @@
2
2
 
3
3
  __all__ = ["__version__"]
4
4
 
5
- __version__ = "0.2.3"
5
+ __version__ = "0.2.4"
@@ -2211,6 +2211,105 @@ class MaxCApp:
2211
2211
  self.log("data.profile", envelope.status, envelope.metadata)
2212
2212
  return envelope
2213
2213
 
2214
+ def data_upload(
2215
+ self,
2216
+ table_name: 'str',
2217
+ file_path: 'str',
2218
+ *,
2219
+ partition: 'str | None' = None,
2220
+ overwrite: 'bool' = False,
2221
+ delimiter: 'str' = ",",
2222
+ has_header: 'bool' = True,
2223
+ null_marker: 'str' = r"\N",
2224
+ block_size: 'int' = 10000,
2225
+ project: 'str | None' = None,
2226
+ ) -> 'Envelope':
2227
+ target_project = project or self.config.default_project
2228
+ result = self.backend.upload_table(
2229
+ table_name, file_path,
2230
+ partition=partition, overwrite=overwrite,
2231
+ delimiter=delimiter, has_header=has_header,
2232
+ null_marker=null_marker, block_size=block_size,
2233
+ project=project,
2234
+ )
2235
+ metadata = {
2236
+ "project": target_project,
2237
+ "requested_partition": partition,
2238
+ "delimiter": delimiter,
2239
+ "block_size": block_size,
2240
+ }
2241
+ envelope = Envelope(
2242
+ command="data.upload",
2243
+ status="success",
2244
+ data=result,
2245
+ metadata=metadata,
2246
+ agent_hints=AgentHints(
2247
+ actions=[
2248
+ action("data.sample", data=result, metadata=metadata),
2249
+ ],
2250
+ warnings=result.get("warnings", []),
2251
+ ),
2252
+ )
2253
+ self.log("data.upload", envelope.status, envelope.metadata)
2254
+ return envelope
2255
+
2256
+ def data_download(
2257
+ self,
2258
+ table_name: 'str',
2259
+ output_path: 'str',
2260
+ *,
2261
+ partition: 'str | None' = None,
2262
+ columns: 'list[str] | None' = None,
2263
+ limit: 'int | None' = None,
2264
+ delimiter: 'str' = ",",
2265
+ write_header: 'bool' = True,
2266
+ null_marker: 'str' = "",
2267
+ project: 'str | None' = None,
2268
+ ) -> 'Envelope':
2269
+ """Download a table or partition to a local CSV/TSV file via Tunnel.
2270
+
2271
+ Args:
2272
+ table_name: Table name (schema.table or table).
2273
+ output_path: Local file path to write.
2274
+ partition: Required when table is partitioned.
2275
+ columns: Optional column subset; default = all columns in schema order.
2276
+ limit: Optional max rows; default = full partition / table.
2277
+ delimiter: Field delimiter (default ",").
2278
+ write_header: When False, suppress header row.
2279
+ null_marker: Token written for SQL NULL (default empty string).
2280
+ project: Target project; default = config's default_project.
2281
+
2282
+ Returns:
2283
+ Envelope with table, applied_partition, output_path, rows_written,
2284
+ bytes_written, columns, truncated, warnings.
2285
+ """
2286
+ target_project = project or self.config.default_project
2287
+ result = self.backend.download_table(
2288
+ table_name, output_path,
2289
+ partition=partition, columns=columns, limit=limit,
2290
+ delimiter=delimiter, write_header=write_header,
2291
+ null_marker=null_marker, project=project,
2292
+ )
2293
+ metadata = {
2294
+ "project": target_project,
2295
+ "requested_partition": partition,
2296
+ "requested_columns": columns or [],
2297
+ "requested_limit": limit,
2298
+ "delimiter": delimiter,
2299
+ }
2300
+ envelope = Envelope(
2301
+ command="data.download",
2302
+ status="success",
2303
+ data=result,
2304
+ metadata=metadata,
2305
+ agent_hints=AgentHints(
2306
+ actions=[],
2307
+ warnings=result.get("warnings", []),
2308
+ ),
2309
+ )
2310
+ self.log("data.download", envelope.status, envelope.metadata)
2311
+ return envelope
2312
+
2214
2313
  def auth_login(
2215
2314
  self,
2216
2315
  *,
@@ -0,0 +1,507 @@
1
+ """Data-related mixin for OdpsBackend."""
2
+
3
+ from typing import Any
4
+
5
+ from ..config import TableDefinition
6
+ from ..exceptions import CsvParseError, ValidationError
7
+ from ..helpers import (
8
+ build_profile,
9
+ csv_format_value,
10
+ csv_parse_value,
11
+ csv_supported_type,
12
+ quote_table_name,
13
+ resolve_sample_request,
14
+ sql_string_literal,
15
+ translate_odps_error,
16
+ )
17
+
18
+
19
+ class DataMixin:
20
+ """Mixin providing data sampling and profiling methods."""
21
+
22
+ def _table_tunnel(self):
23
+ """Return a TableTunnel for the current ODPS client.
24
+
25
+ Real PyODPS `ODPS` instances do not expose a `.tunnel` attribute,
26
+ so we construct `odps.tunnel.TableTunnel(odps=self.client)` lazily.
27
+ Test doubles (FakeODPS) DO expose `.tunnel` directly — honor that
28
+ so existing FakeTunnel infrastructure keeps working.
29
+ """
30
+ existing = getattr(self.client, "tunnel", None)
31
+ if existing is not None:
32
+ return existing
33
+ from odps.tunnel import TableTunnel
34
+ return TableTunnel(odps=self.client)
35
+
36
+ def _resolve_partition_for_sample(
37
+ self,
38
+ definition: 'TableDefinition',
39
+ partition: 'str | None',
40
+ *,
41
+ project: 'str | None',
42
+ ) -> 'tuple[str | None, list[str]]':
43
+ """Resolve the partition spec to use, auto-detecting latest if needed.
44
+
45
+ Returns (partition_spec, warnings).
46
+
47
+ Raises ValidationError if the table is partitioned and no partition
48
+ can be determined.
49
+ """
50
+ warnings: 'list[str]' = []
51
+ if partition or not definition.partition_columns:
52
+ return partition, warnings
53
+
54
+ # Partitioned table without partition spec — try latest-partition.
55
+ try:
56
+ latest_payload, _latest_warnings = self.latest_partition_info(
57
+ definition.name, project=project,
58
+ )
59
+ latest_spec = latest_payload.get("latest_partition")
60
+ except Exception:
61
+ latest_spec = None
62
+
63
+ if latest_spec:
64
+ warnings.append(
65
+ f"No --partition specified; auto-selected latest partition "
66
+ f"`{latest_spec}`. Pass --partition explicitly to pin a value."
67
+ )
68
+ return latest_spec, warnings
69
+
70
+ partition_keys = ", ".join(c.name for c in definition.partition_columns)
71
+ raise ValidationError(
72
+ (
73
+ f"Table `{definition.name}` is partitioned ({partition_keys}) "
74
+ f"but no --partition was specified, and no latest partition "
75
+ f"could be determined."
76
+ ),
77
+ suggestion=(
78
+ f"Run `maxc meta latest-partition {definition.name}` to find a "
79
+ f"valid partition, then re-run with --partition <spec>."
80
+ ),
81
+ )
82
+
83
+ def sample_table(
84
+ self,
85
+ table_name: 'str',
86
+ rows: 'int',
87
+ *,
88
+ partition: 'str | None' = None,
89
+ columns: 'list[str] | None' = None,
90
+ project: 'str | None' = None,
91
+ ) -> 'tuple[TableDefinition, list[dict[str, Any]], dict[str, Any]]':
92
+ """Sample data from a table.
93
+
94
+ Uses ``client.read_table()`` for efficient row-level access with
95
+ optional partition pruning and column selection. When the table is
96
+ partitioned and *partition* is not provided, automatically selects
97
+ the latest partition (and adds a warning to ``sample_info``).
98
+
99
+ Args:
100
+ table_name: Table name.
101
+ rows: Maximum number of rows to return.
102
+ partition: Optional partition spec (e.g. ``"ds=20260101"``).
103
+ columns: Optional list of column names to select.
104
+
105
+ Returns:
106
+ Tuple of (table definition, sample rows as list of dicts,
107
+ sample metadata with applied_partition, selected_columns,
108
+ and warnings).
109
+ """
110
+ definition = self.describe_table(table_name, project=project)
111
+ partition, auto_partition_warnings = self._resolve_partition_for_sample(
112
+ definition, partition, project=project,
113
+ )
114
+
115
+ selected_columns, applied_partition, partition_values = resolve_sample_request(
116
+ definition,
117
+ partition=partition,
118
+ columns=columns,
119
+ strict_partition_check=False,
120
+ )
121
+
122
+ # Build column selection
123
+ column_names = selected_columns if selected_columns else [c.name for c in definition.columns]
124
+
125
+ # Build partition spec if needed
126
+ partition_spec = None
127
+ if applied_partition and partition_values:
128
+ partition_spec = ",".join(
129
+ f"{k}={v}" for k, v in partition_values.items()
130
+ )
131
+
132
+ # Read data using ODPS read_table method
133
+ def _serialize_value(value):
134
+ """Convert value to JSON-serializable format."""
135
+ from datetime import datetime, date
136
+ if isinstance(value, datetime):
137
+ return value.isoformat()
138
+ if isinstance(value, date):
139
+ return value.isoformat()
140
+ return value
141
+
142
+ try:
143
+ records = self.client.read_table(
144
+ table_name,
145
+ limit=rows,
146
+ partition=partition_spec,
147
+ project=project or self.project,
148
+ )
149
+ sample_rows = [
150
+ {column: _serialize_value(record[column]) for column in column_names}
151
+ for record in records
152
+ ]
153
+ except Exception as exc:
154
+ raise translate_odps_error(exc) from exc
155
+
156
+ return definition, sample_rows, {
157
+ "schema": [{"name": c.name, "type": c.type, "comment": c.comment} for c in definition.columns if c.name in column_names],
158
+ "applied_partition": applied_partition,
159
+ "selected_columns": selected_columns,
160
+ "warnings": auto_partition_warnings,
161
+ }
162
+
163
+ def profile_table(self, table_name: 'str', *, partition: 'str | None' = None, project: 'str | None' = None) -> 'dict[str, Any]':
164
+ """Profile data from a table by sampling and computing statistics.
165
+
166
+ Samples up to 20 rows and computes per-column statistics (null count,
167
+ distinct count, min/max, etc.) using heuristic analysis. Not a native
168
+ ODPS profile feature — results are approximate.
169
+
170
+ Limitations:
171
+ - Based on a 20-row sample; not statistically representative.
172
+ - No native ODPS ``PROFILE`` command is used.
173
+ - For accurate statistics, run explicit aggregation SQL.
174
+
175
+ Args:
176
+ table_name: Table name.
177
+ partition: Optional partition spec for partition pruning.
178
+
179
+ Returns:
180
+ Dict with table name, column profiles, and sample info.
181
+ """
182
+ definition, sample_rows, sample_info = self.sample_table(
183
+ table_name,
184
+ rows=20,
185
+ partition=partition,
186
+ columns=None,
187
+ project=project,
188
+ )
189
+ return build_profile(
190
+ definition,
191
+ sample_rows,
192
+ applied_partition=sample_info["applied_partition"],
193
+ )
194
+
195
+ def upload_table(
196
+ self,
197
+ table_name: 'str',
198
+ file_path: 'str',
199
+ *,
200
+ partition: 'str | None' = None,
201
+ overwrite: 'bool' = False,
202
+ delimiter: 'str' = ",",
203
+ has_header: 'bool' = True,
204
+ null_marker: 'str' = r"\N",
205
+ block_size: 'int' = 10000,
206
+ project: 'str | None' = None,
207
+ ) -> 'dict[str, Any]':
208
+ """Upload a CSV/TSV file into an existing table or partition via Tunnel.
209
+
210
+ Args:
211
+ table_name: Target table name (schema.table or table).
212
+ file_path: Path to the local CSV/TSV file to upload.
213
+ partition: Optional partition spec (e.g. ``"ds=20260508"``); required
214
+ for partitioned tables, forbidden for non-partitioned tables.
215
+ overwrite: If True, use INSERT OVERWRITE semantics for the target.
216
+ delimiter: Field delimiter (default ``","``).
217
+ has_header: If True, the first row is treated as a header and
218
+ columns are mapped by name; otherwise mapped by ordinal.
219
+ null_marker: Token interpreted as SQL NULL (default ``"\\N"``).
220
+ block_size: Rows per Tunnel block (default 10000).
221
+ project: Optional MaxCompute project override.
222
+
223
+ Returns:
224
+ Dict with ``table``, ``applied_partition``, ``rows_written``,
225
+ ``bytes_read``, ``blocks``, ``overwrite``, and ``warnings``.
226
+
227
+ Raises:
228
+ ValidationError: For invalid partitioning, unsupported column
229
+ types, or invalid block sizes.
230
+ CsvParseError: When a CSV row cannot be parsed; carries
231
+ ``line`` / ``column`` context. The Tunnel session is aborted
232
+ before the exception propagates.
233
+ """
234
+ import csv
235
+ import os
236
+
237
+ if block_size < 1:
238
+ raise ValidationError("`block_size` must be >= 1.")
239
+
240
+ definition = self.describe_table(table_name, project=project)
241
+ partition_columns = {c.name for c in definition.partition_columns}
242
+ data_columns = [c for c in definition.columns if c.name not in partition_columns]
243
+ name_to_type = {c.name: c.type for c in data_columns}
244
+
245
+ if definition.partition_columns and not partition:
246
+ keys = ", ".join(c.name for c in definition.partition_columns)
247
+ raise ValidationError(
248
+ f"Table `{definition.name}` is partitioned ({keys}); --partition is required.",
249
+ suggestion=f"Pass --partition <{keys}=...>.",
250
+ )
251
+ if partition and not definition.partition_columns:
252
+ raise ValidationError(
253
+ f"Table `{definition.name}` is not partitioned; --partition is not allowed.",
254
+ )
255
+ if partition:
256
+ _validate_partition_keys(partition, definition.partition_columns)
257
+
258
+ unsupported = [c.name for c in data_columns if not csv_supported_type(c.type)]
259
+ if unsupported:
260
+ raise ValidationError(
261
+ f"Columns {unsupported} have complex types not supported by CSV upload.",
262
+ suggestion="Use INSERT ... SELECT via `maxc query` instead.",
263
+ )
264
+
265
+ bytes_read = os.path.getsize(file_path)
266
+ block_ids: 'list[int]' = []
267
+ rows_written = 0
268
+ warnings: 'list[str]' = []
269
+
270
+ upload_session = self._table_tunnel().create_upload_session(
271
+ definition.name, partition_spec=partition, overwrite=overwrite,
272
+ )
273
+
274
+ try:
275
+ with open(file_path, "r", encoding="utf-8", newline="") as fh:
276
+ reader = csv.reader(fh, delimiter=delimiter)
277
+
278
+ if has_header:
279
+ try:
280
+ header = next(reader)
281
+ except StopIteration:
282
+ header = []
283
+ column_order = _resolve_header_mapping(header, data_columns, warnings)
284
+ else:
285
+ column_order = [c.name for c in data_columns]
286
+
287
+ current_block = 0
288
+ writer = upload_session.open_record_writer(current_block)
289
+ block_ids.append(current_block)
290
+ in_block = 0
291
+ line_no = 1 if not has_header else 2
292
+
293
+ for row in reader:
294
+ if not has_header and len(row) != len(column_order):
295
+ raise CsvParseError(
296
+ f"expected {len(column_order)} columns, got {len(row)}",
297
+ line=line_no,
298
+ )
299
+ if has_header and len(row) < len(column_order):
300
+ raise CsvParseError(
301
+ f"row has {len(row)} columns, header has {len(column_order)}",
302
+ line=line_no,
303
+ )
304
+ record = upload_session.new_record()
305
+ for col_name, cell in zip(column_order, row):
306
+ try:
307
+ record[col_name] = csv_parse_value(
308
+ cell, name_to_type[col_name], null_marker=null_marker,
309
+ )
310
+ except CsvParseError as exc:
311
+ exc.line = line_no
312
+ exc.column = col_name
313
+ raise
314
+ writer.write(record)
315
+ rows_written += 1
316
+ in_block += 1
317
+ line_no += 1
318
+ if in_block >= block_size:
319
+ writer.close()
320
+ current_block += 1
321
+ writer = upload_session.open_record_writer(current_block)
322
+ block_ids.append(current_block)
323
+ in_block = 0
324
+
325
+ writer.close()
326
+ except CsvParseError:
327
+ upload_session.abort()
328
+ raise
329
+ except Exception as exc:
330
+ upload_session.abort()
331
+ raise translate_odps_error(exc) from exc
332
+
333
+ upload_session.commit(block_ids)
334
+
335
+ return {
336
+ "table": definition.name,
337
+ "applied_partition": partition,
338
+ "rows_written": rows_written,
339
+ "bytes_read": bytes_read,
340
+ "blocks": len(block_ids),
341
+ "overwrite": overwrite,
342
+ "warnings": warnings,
343
+ }
344
+
345
+ def download_table(
346
+ self,
347
+ table_name: 'str',
348
+ output_path: 'str',
349
+ *,
350
+ partition: 'str | None' = None,
351
+ columns: 'list[str] | None' = None,
352
+ limit: 'int | None' = None,
353
+ delimiter: 'str' = ",",
354
+ write_header: 'bool' = True,
355
+ null_marker: 'str' = "",
356
+ project: 'str | None' = None,
357
+ ) -> 'dict[str, Any]':
358
+ """Download a table or partition to a local CSV/TSV file via Tunnel.
359
+
360
+ Args:
361
+ table_name: Table name (schema.table or table).
362
+ output_path: Local file path to write.
363
+ partition: Required when table is partitioned.
364
+ columns: Optional column subset; default = all columns in schema order.
365
+ limit: Optional max rows; default = full partition / table.
366
+ delimiter: Field delimiter (default ",").
367
+ write_header: When False, suppress header row.
368
+ null_marker: Token written for SQL NULL (default empty string).
369
+ project: Target project; default = backend's default project.
370
+
371
+ Returns:
372
+ Dict with table, applied_partition, output_path, rows_written,
373
+ bytes_written, columns, truncated, warnings.
374
+ """
375
+ import csv
376
+ import os
377
+
378
+ if limit is not None and limit < 1:
379
+ raise ValidationError("`limit` must be >= 1.")
380
+
381
+ definition = self.describe_table(table_name, project=project)
382
+ partition_columns = {c.name for c in definition.partition_columns}
383
+ data_columns = [c for c in definition.columns if c.name not in partition_columns]
384
+ name_to_type = {c.name: c.type for c in data_columns}
385
+
386
+ if definition.partition_columns and not partition:
387
+ keys = ", ".join(c.name for c in definition.partition_columns)
388
+ raise ValidationError(
389
+ f"Table `{definition.name}` is partitioned ({keys}); --partition is required.",
390
+ suggestion=f"Pass --partition <{keys}=...>.",
391
+ )
392
+ if partition and not definition.partition_columns:
393
+ raise ValidationError(
394
+ f"Table `{definition.name}` is not partitioned; --partition is not allowed.",
395
+ )
396
+ if partition:
397
+ _validate_partition_keys(partition, definition.partition_columns)
398
+
399
+ if columns:
400
+ unknown = [c for c in columns if c not in name_to_type]
401
+ if unknown:
402
+ raise ValidationError(f"Unknown columns: {unknown}")
403
+ selected = list(columns)
404
+ else:
405
+ selected = [c.name for c in data_columns]
406
+
407
+ try:
408
+ session = self._table_tunnel().create_download_session(
409
+ definition.name, partition_spec=partition,
410
+ )
411
+ total = session.count
412
+ count = min(total, limit) if limit is not None else total
413
+
414
+ rows_written = 0
415
+ try:
416
+ with open(output_path, "w", encoding="utf-8", newline="") as fh:
417
+ writer = csv.writer(fh, delimiter=delimiter)
418
+ if write_header:
419
+ writer.writerow(selected)
420
+ for record in session.open_record_reader(0, count):
421
+ writer.writerow([
422
+ csv_format_value(
423
+ record[col], name_to_type[col],
424
+ null_marker=null_marker,
425
+ )
426
+ for col in selected
427
+ ])
428
+ rows_written += 1
429
+ except Exception:
430
+ try:
431
+ os.remove(output_path)
432
+ except OSError:
433
+ pass
434
+ raise
435
+ except ValidationError:
436
+ raise
437
+ except Exception as exc:
438
+ raise translate_odps_error(exc) from exc
439
+
440
+ bytes_written = os.path.getsize(output_path)
441
+ truncated = limit is not None and limit < total
442
+ warnings: 'list[str]' = []
443
+ if truncated:
444
+ warnings.append(
445
+ f"--limit reached; output may be partial (session has {total} rows)."
446
+ )
447
+
448
+ return {
449
+ "table": definition.name,
450
+ "applied_partition": partition,
451
+ "output_path": os.path.abspath(output_path),
452
+ "rows_written": rows_written,
453
+ "bytes_written": bytes_written,
454
+ "columns": selected,
455
+ "truncated": truncated,
456
+ "warnings": warnings,
457
+ }
458
+
459
+
460
+ def _resolve_header_mapping(
461
+ header: 'list[str]',
462
+ data_columns: 'list',
463
+ warnings: 'list[str]',
464
+ ) -> 'list[str]':
465
+ expected = {c.name for c in data_columns}
466
+ seen = set(header)
467
+ missing = expected - seen
468
+ if missing:
469
+ raise ValidationError(
470
+ f"CSV header missing required columns: {sorted(missing)}",
471
+ )
472
+ extras = [name for name in header if name not in expected]
473
+ if extras:
474
+ warnings.append(
475
+ f"CSV header has extra columns ignored: {extras}"
476
+ )
477
+ return [name for name in header if name in expected]
478
+
479
+
480
+ def _validate_partition_keys(
481
+ partition: 'str',
482
+ partition_columns: 'list',
483
+ ) -> 'None':
484
+ """Raise ValidationError if `partition` doesn't match the table's keys."""
485
+ from ..helpers import parse_partition_spec
486
+
487
+ expected_keys = [c.name for c in partition_columns]
488
+ parsed = parse_partition_spec(partition)
489
+ if not parsed:
490
+ raise ValidationError(
491
+ f"Could not parse --partition {partition!r}.",
492
+ suggestion=f"Use the form {','.join(f'{k}=...' for k in expected_keys)}.",
493
+ )
494
+ given = set(parsed.keys())
495
+ expected = set(expected_keys)
496
+ missing = expected - given
497
+ extra = given - expected
498
+ if missing or extra:
499
+ parts = []
500
+ if missing:
501
+ parts.append(f"missing keys {sorted(missing)}")
502
+ if extra:
503
+ parts.append(f"unknown keys {sorted(extra)}")
504
+ raise ValidationError(
505
+ f"--partition {partition!r} {' and '.join(parts)}; "
506
+ f"table keys are {expected_keys}.",
507
+ )