maxc-cli 0.1.9__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/PKG-INFO +1 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/setup.py +1 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/__init__.py +1 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/app.py +245 -58
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/data.py +63 -5
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/job.py +19 -2
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/meta.py +147 -36
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/odps.py +1 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/query.py +95 -11
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/cli.py +108 -17
- maxc_cli-0.2.1/src/maxc_cli/exceptions.py +136 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/helpers.py +6 -2
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/models.py +18 -8
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/output.py +11 -5
- maxc_cli-0.2.1/src/maxc_cli/skills/SKILL.md +241 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/references/bootstrap-auth.md +4 -73
- maxc_cli-0.2.1/src/maxc_cli/skills/references/bootstrap-flow.md +100 -0
- maxc_cli-0.2.1/src/maxc_cli/skills/references/command-patterns.md +383 -0
- maxc_cli-0.2.1/src/maxc_cli/skills/references/json-output-format.md +100 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/references/migrate-from-odpscmd.md +10 -28
- maxc_cli-0.2.1/src/maxc_cli/skills/references/red-lines.md +94 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/PKG-INFO +1 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/SOURCES.txt +3 -1
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_agent_hints_and_cli.py +16 -4
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_agent_skill_commands_context.py +0 -65
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_phase1_improvements.py +249 -16
- maxc_cli-0.1.9/src/maxc_cli/exceptions.py +0 -193
- maxc_cli-0.1.9/src/maxc_cli/skills/SKILL.md +0 -516
- maxc_cli-0.1.9/src/maxc_cli/skills/nohup.out +0 -0
- maxc_cli-0.1.9/src/maxc_cli/skills/references/command-patterns.md +0 -274
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/MANIFEST.in +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/README.md +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/pyproject.toml +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/scripts/regression_test.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/setup.cfg +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/__main__.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/audit.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/auth_providers.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/__init__.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/auth.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/backend/catalog.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/cache.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/config.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/masking.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/setting_parser.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/agents/openai.yaml +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/references/maxcompute-sql-notes.md +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/references/partition-guide.md +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/skills/references/setup-install.md +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/store.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli/utils.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/dependency_links.txt +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/entry_points.txt +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/requires.txt +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/src/maxc_cli.egg-info/top_level.txt +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_cache.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_catalog.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_cli_mock.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_compat.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_e2e_smoke.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_error_self_correction.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_external_auth.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_integration.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_integration_real.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_job_improvements.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_masking.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_query_auto_promote.py +0 -0
- {maxc_cli-0.1.9 → maxc_cli-0.2.1}/tests/test_setting_parser.py +0 -0
|
@@ -9,7 +9,7 @@ README = ROOT / "README.md"
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="maxc-cli",
|
|
12
|
-
version="0.1
|
|
12
|
+
version="0.2.1",
|
|
13
13
|
description="Agent-native MaxCompute CLI for external coding agents",
|
|
14
14
|
long_description=README.read_text(encoding="utf-8"),
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -351,6 +351,7 @@ class MaxCApp:
|
|
|
351
351
|
return envelope
|
|
352
352
|
# Job ended — check outcome
|
|
353
353
|
if job_info.status == "failure":
|
|
354
|
+
error_msg = job_info.failure_reason or job_info.error_message or "Job failed"
|
|
354
355
|
envelope = Envelope(
|
|
355
356
|
command=command,
|
|
356
357
|
status="failure",
|
|
@@ -362,6 +363,12 @@ class MaxCApp:
|
|
|
362
363
|
"logview": job_info.logview,
|
|
363
364
|
"sql_executed": sql,
|
|
364
365
|
},
|
|
366
|
+
error=ErrorPayload(
|
|
367
|
+
code="EXECUTION_FAILED",
|
|
368
|
+
message=error_msg,
|
|
369
|
+
suggestion=None,
|
|
370
|
+
recoverable=False,
|
|
371
|
+
),
|
|
365
372
|
agent_hints=AgentHints(
|
|
366
373
|
actions=[
|
|
367
374
|
action("job.diagnose", data={"job_id": job_info.job_id}, metadata={"job_id": job_info.job_id, "project": job_info.project, "sql_executed": sql}),
|
|
@@ -897,27 +904,51 @@ class MaxCApp:
|
|
|
897
904
|
self.log("job.list", envelope.status, envelope.metadata)
|
|
898
905
|
return envelope
|
|
899
906
|
|
|
900
|
-
def meta_list_tables(
|
|
907
|
+
def meta_list_tables(
|
|
908
|
+
self,
|
|
909
|
+
*,
|
|
910
|
+
schema: 'str | None' = None,
|
|
911
|
+
project: 'str | None' = None,
|
|
912
|
+
limit: 'int | None' = None,
|
|
913
|
+
cursor: 'str | None' = None,
|
|
914
|
+
) -> 'Envelope':
|
|
901
915
|
started = monotonic()
|
|
916
|
+
target_project = project or self.config.default_project
|
|
902
917
|
effective_schema = schema or self.config.default_schema
|
|
903
918
|
|
|
904
|
-
#
|
|
919
|
+
# Decode cursor (offset token, mirrors cli.py pagination scheme)
|
|
920
|
+
offset = 0
|
|
921
|
+
if cursor:
|
|
922
|
+
try:
|
|
923
|
+
offset = max(0, int(cursor))
|
|
924
|
+
except (TypeError, ValueError):
|
|
925
|
+
raise ValidationError(
|
|
926
|
+
f"Invalid --cursor value: {cursor!r}",
|
|
927
|
+
suggestion="Pass the `next_cursor` value returned by the previous call.",
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
# Try to get from cache first (cache pagination is in-memory slicing)
|
|
905
931
|
cached_tables = self.cache.get_all_cached_tables(
|
|
906
|
-
|
|
932
|
+
target_project,
|
|
907
933
|
schema_name=effective_schema,
|
|
908
934
|
)
|
|
909
935
|
|
|
936
|
+
has_more = False
|
|
937
|
+
next_cursor: 'str | None' = None
|
|
938
|
+
|
|
910
939
|
if cached_tables:
|
|
911
940
|
# Use cached data (returns list of dicts)
|
|
912
|
-
|
|
941
|
+
window = cached_tables[offset:]
|
|
942
|
+
if limit is not None:
|
|
943
|
+
has_more = len(window) > limit
|
|
944
|
+
window = window[:limit]
|
|
945
|
+
tables = window
|
|
913
946
|
source = "cache"
|
|
914
947
|
rows = [
|
|
915
948
|
{
|
|
916
949
|
"table_name": table.get("table_name"),
|
|
917
950
|
"schema_name": effective_schema or table.get("schema_name", "default"),
|
|
918
951
|
"table_type": table.get("table_type", "TABLE"),
|
|
919
|
-
"size_bytes": table.get("size_bytes"),
|
|
920
|
-
"owner": table.get("owner"),
|
|
921
952
|
"description": table.get("description"),
|
|
922
953
|
"partition_columns": [
|
|
923
954
|
c.get("name") if isinstance(c, dict) else str(c)
|
|
@@ -927,34 +958,48 @@ class MaxCApp:
|
|
|
927
958
|
for table in tables
|
|
928
959
|
]
|
|
929
960
|
else:
|
|
930
|
-
# Cache miss — fall back to live backend query
|
|
931
|
-
live_tables = self.backend.list_tables(
|
|
961
|
+
# Cache miss — fall back to live backend query (now paginated)
|
|
962
|
+
live_tables, has_more = self.backend.list_tables(
|
|
963
|
+
schema=effective_schema,
|
|
964
|
+
project=project,
|
|
965
|
+
limit=limit,
|
|
966
|
+
offset=offset,
|
|
967
|
+
)
|
|
932
968
|
source = "backend"
|
|
933
969
|
rows = [
|
|
934
970
|
{
|
|
935
971
|
"table_name": t.name,
|
|
936
972
|
"schema_name": effective_schema or "default",
|
|
937
973
|
"table_type": t.table_type or "TABLE",
|
|
938
|
-
"size_bytes": t.size_bytes,
|
|
939
|
-
"owner": t.owner,
|
|
940
974
|
"description": t.description,
|
|
941
975
|
"partition_columns": [c.name for c in (t.partition_columns or [])],
|
|
942
976
|
}
|
|
943
977
|
for t in live_tables
|
|
944
978
|
]
|
|
945
|
-
|
|
979
|
+
|
|
980
|
+
if has_more and limit is not None:
|
|
981
|
+
next_cursor = str(offset + limit)
|
|
982
|
+
|
|
946
983
|
metadata = self._cache_metadata(
|
|
947
|
-
project=
|
|
984
|
+
project=target_project,
|
|
948
985
|
source=source,
|
|
949
986
|
query_time_ms=int((monotonic() - started) * 1000),
|
|
950
987
|
)
|
|
951
|
-
|
|
988
|
+
|
|
952
989
|
schema_label = effective_schema or "default"
|
|
953
990
|
insights = [f"Table list served from {source}."]
|
|
954
991
|
if effective_schema and effective_schema != "default":
|
|
955
992
|
insights.append(f"Use schema-qualified names in SQL: `{schema_label}.<table_name>`")
|
|
956
993
|
|
|
957
|
-
data = {
|
|
994
|
+
data = {
|
|
995
|
+
"tables": rows,
|
|
996
|
+
"total": len(rows),
|
|
997
|
+
"schema": schema_label,
|
|
998
|
+
"has_more": has_more,
|
|
999
|
+
"next_cursor": next_cursor,
|
|
1000
|
+
"limit": limit,
|
|
1001
|
+
"offset": offset,
|
|
1002
|
+
}
|
|
958
1003
|
envelope = Envelope(
|
|
959
1004
|
command="meta.list-tables",
|
|
960
1005
|
status="success",
|
|
@@ -971,12 +1016,13 @@ class MaxCApp:
|
|
|
971
1016
|
self.log("meta.list-tables", envelope.status, envelope.metadata)
|
|
972
1017
|
return envelope
|
|
973
1018
|
|
|
974
|
-
def meta_describe(self, table_name: 'str', full: 'bool' = False) -> 'Envelope':
|
|
1019
|
+
def meta_describe(self, table_name: 'str', full: 'bool' = False, project: 'str | None' = None) -> 'Envelope':
|
|
975
1020
|
started = monotonic()
|
|
1021
|
+
target_project = project or self.config.default_project
|
|
976
1022
|
|
|
977
1023
|
# Try to get from cache first
|
|
978
1024
|
cached_table = self.cache.get_cached_table(
|
|
979
|
-
|
|
1025
|
+
target_project,
|
|
980
1026
|
table_name,
|
|
981
1027
|
schema_name=self.config.default_schema or "default"
|
|
982
1028
|
)
|
|
@@ -1011,7 +1057,7 @@ class MaxCApp:
|
|
|
1011
1057
|
warnings = []
|
|
1012
1058
|
# Optionally fetch additional metadata from API (description, owner, size, sample rows, partitions)
|
|
1013
1059
|
try:
|
|
1014
|
-
api_table = self.backend.describe_table(table_name)
|
|
1060
|
+
api_table = self.backend.describe_table(table_name, project=project)
|
|
1015
1061
|
# Update with API data (API has priority over cache for these fields)
|
|
1016
1062
|
table.description = api_table.description or table.description
|
|
1017
1063
|
table.owner = api_table.owner or table.owner
|
|
@@ -1026,13 +1072,13 @@ class MaxCApp:
|
|
|
1026
1072
|
warnings.append("Backend API unavailable, showing cached schema only")
|
|
1027
1073
|
else:
|
|
1028
1074
|
# Fall back to live API
|
|
1029
|
-
table = self.backend.describe_table(table_name)
|
|
1075
|
+
table = self.backend.describe_table(table_name, project=project)
|
|
1030
1076
|
source = "live"
|
|
1031
1077
|
warnings = []
|
|
1032
1078
|
|
|
1033
1079
|
# Get semantic metadata from cache
|
|
1034
1080
|
semantic = self.cache.get_semantic(
|
|
1035
|
-
project=
|
|
1081
|
+
project=target_project,
|
|
1036
1082
|
table_name=table_name,
|
|
1037
1083
|
schema_name=self.config.default_schema or "default",
|
|
1038
1084
|
)
|
|
@@ -1055,7 +1101,7 @@ class MaxCApp:
|
|
|
1055
1101
|
payload["semantic"] = semantic
|
|
1056
1102
|
|
|
1057
1103
|
meta_metadata = {
|
|
1058
|
-
"project":
|
|
1104
|
+
"project": target_project,
|
|
1059
1105
|
"source": source,
|
|
1060
1106
|
"query_time_ms": int((monotonic() - started) * 1000) if source == "live" else None,
|
|
1061
1107
|
}
|
|
@@ -1076,8 +1122,16 @@ class MaxCApp:
|
|
|
1076
1122
|
self.log("meta.describe", envelope.status, envelope.metadata)
|
|
1077
1123
|
return envelope
|
|
1078
1124
|
|
|
1079
|
-
def meta_search(
|
|
1125
|
+
def meta_search(
|
|
1126
|
+
self,
|
|
1127
|
+
keyword: 'str',
|
|
1128
|
+
*,
|
|
1129
|
+
schema: 'str | None' = None,
|
|
1130
|
+
project: 'str | None' = None,
|
|
1131
|
+
limit: 'int | None' = None,
|
|
1132
|
+
) -> 'Envelope':
|
|
1080
1133
|
started = monotonic()
|
|
1134
|
+
target_project = project or self.config.default_project
|
|
1081
1135
|
effective_schema = schema or self.config.default_schema
|
|
1082
1136
|
|
|
1083
1137
|
# Priority: Catalog API → cache → live scan
|
|
@@ -1100,18 +1154,31 @@ class MaxCApp:
|
|
|
1100
1154
|
|
|
1101
1155
|
if not catalog_available:
|
|
1102
1156
|
cached_tables = self.cache.get_all_cached_tables(
|
|
1103
|
-
|
|
1157
|
+
target_project, schema_name=effective_schema,
|
|
1104
1158
|
)
|
|
1105
1159
|
if cached_tables:
|
|
1106
1160
|
matches = self._search_in_cache(keyword, cached_tables)
|
|
1107
1161
|
source = "cache"
|
|
1108
1162
|
else:
|
|
1109
|
-
matches = self.backend.search_tables(keyword, schema=effective_schema)
|
|
1163
|
+
matches = self.backend.search_tables(keyword, schema=effective_schema, project=project)
|
|
1110
1164
|
source = "live"
|
|
1111
1165
|
|
|
1112
|
-
|
|
1166
|
+
original_total = len(matches)
|
|
1167
|
+
truncated = False
|
|
1168
|
+
if limit is not None and len(matches) > limit:
|
|
1169
|
+
matches = matches[:limit]
|
|
1170
|
+
truncated = True
|
|
1171
|
+
|
|
1172
|
+
search_data = {
|
|
1173
|
+
"keyword": keyword,
|
|
1174
|
+
"matches": matches,
|
|
1175
|
+
"total": original_total,
|
|
1176
|
+
"has_more": truncated,
|
|
1177
|
+
"limit": limit,
|
|
1178
|
+
"truncated": truncated,
|
|
1179
|
+
}
|
|
1113
1180
|
search_metadata = self._cache_metadata(
|
|
1114
|
-
project=
|
|
1181
|
+
project=target_project,
|
|
1115
1182
|
source=source,
|
|
1116
1183
|
query_time_ms=int((monotonic() - started) * 1000) if source in ("live", "catalog") else None,
|
|
1117
1184
|
)
|
|
@@ -1131,11 +1198,19 @@ class MaxCApp:
|
|
|
1131
1198
|
self.log("meta.search", envelope.status, envelope.metadata)
|
|
1132
1199
|
return envelope
|
|
1133
1200
|
|
|
1134
|
-
def meta_search_columns(
|
|
1201
|
+
def meta_search_columns(
|
|
1202
|
+
self,
|
|
1203
|
+
keyword: 'str',
|
|
1204
|
+
*,
|
|
1205
|
+
schema: 'str | None' = None,
|
|
1206
|
+
project: 'str | None' = None,
|
|
1207
|
+
limit: 'int | None' = None,
|
|
1208
|
+
) -> 'Envelope':
|
|
1135
1209
|
started = monotonic()
|
|
1210
|
+
target_project = project or self.config.default_project
|
|
1136
1211
|
effective_schema = schema or self.config.default_schema
|
|
1137
1212
|
cached_tables = self.cache.get_all_cached_tables(
|
|
1138
|
-
|
|
1213
|
+
target_project, schema_name=effective_schema,
|
|
1139
1214
|
)
|
|
1140
1215
|
if cached_tables:
|
|
1141
1216
|
matches = self._search_columns_in_cache(keyword, cached_tables)
|
|
@@ -1152,9 +1227,23 @@ class MaxCApp:
|
|
|
1152
1227
|
"Column search requires a metadata cache. "
|
|
1153
1228
|
"Run `maxc cache build` first, then retry `maxc meta search-columns`.",
|
|
1154
1229
|
]
|
|
1155
|
-
|
|
1230
|
+
|
|
1231
|
+
original_total = len(matches)
|
|
1232
|
+
truncated = False
|
|
1233
|
+
if limit is not None and len(matches) > limit:
|
|
1234
|
+
matches = matches[:limit]
|
|
1235
|
+
truncated = True
|
|
1236
|
+
|
|
1237
|
+
sc_data = {
|
|
1238
|
+
"keyword": keyword,
|
|
1239
|
+
"matches": matches,
|
|
1240
|
+
"total": original_total,
|
|
1241
|
+
"has_more": truncated,
|
|
1242
|
+
"limit": limit,
|
|
1243
|
+
"truncated": truncated,
|
|
1244
|
+
}
|
|
1156
1245
|
sc_metadata = self._cache_metadata(
|
|
1157
|
-
project=
|
|
1246
|
+
project=target_project,
|
|
1158
1247
|
source=source,
|
|
1159
1248
|
query_time_ms=int((monotonic() - started) * 1000) if source not in ("cache", "cache_required") else None,
|
|
1160
1249
|
)
|
|
@@ -1381,6 +1470,13 @@ class MaxCApp:
|
|
|
1381
1470
|
if t["table_name"] not in semantic_table_names
|
|
1382
1471
|
]
|
|
1383
1472
|
|
|
1473
|
+
warnings: 'list[str]' = []
|
|
1474
|
+
if len(all_tables) == 0:
|
|
1475
|
+
warnings.append(
|
|
1476
|
+
"Cache is empty — no tables to analyze. Run "
|
|
1477
|
+
"`maxc cache build` first to populate metadata."
|
|
1478
|
+
)
|
|
1479
|
+
|
|
1384
1480
|
envelope = Envelope(
|
|
1385
1481
|
command="meta.semantic.list-missing",
|
|
1386
1482
|
status="success",
|
|
@@ -1403,6 +1499,7 @@ class MaxCApp:
|
|
|
1403
1499
|
},
|
|
1404
1500
|
agent_hints=AgentHints(
|
|
1405
1501
|
insights=[f"{len(missing)} tables lack semantic metadata."],
|
|
1502
|
+
warnings=warnings,
|
|
1406
1503
|
actions=[
|
|
1407
1504
|
action("meta.semantic.set", data={"table_name": missing[0]["table_name"]}, metadata={"project": self.config.default_project})
|
|
1408
1505
|
] if missing else [],
|
|
@@ -1428,9 +1525,10 @@ class MaxCApp:
|
|
|
1428
1525
|
self.log("meta.semantic.list-missing", envelope.status, envelope.metadata)
|
|
1429
1526
|
return envelope
|
|
1430
1527
|
|
|
1431
|
-
def meta_latest_partition(self, table_name: 'str') -> 'Envelope':
|
|
1432
|
-
|
|
1433
|
-
|
|
1528
|
+
def meta_latest_partition(self, table_name: 'str', project: 'str | None' = None) -> 'Envelope':
|
|
1529
|
+
target_project = project or self.config.default_project
|
|
1530
|
+
payload, warnings = self.backend.latest_partition_info(table_name, project=project)
|
|
1531
|
+
lp_metadata = {"project": target_project}
|
|
1434
1532
|
if payload.get("has_partitions"):
|
|
1435
1533
|
lp_actions = [
|
|
1436
1534
|
action("meta.freshness", data=payload, metadata=lp_metadata),
|
|
@@ -1452,9 +1550,10 @@ class MaxCApp:
|
|
|
1452
1550
|
self.log("meta.latest-partition", envelope.status, envelope.metadata)
|
|
1453
1551
|
return envelope
|
|
1454
1552
|
|
|
1455
|
-
def meta_freshness(self, table_name: 'str') -> 'Envelope':
|
|
1456
|
-
|
|
1457
|
-
|
|
1553
|
+
def meta_freshness(self, table_name: 'str', project: 'str | None' = None) -> 'Envelope':
|
|
1554
|
+
target_project = project or self.config.default_project
|
|
1555
|
+
payload, warnings = self.backend.freshness_info(table_name, project=project)
|
|
1556
|
+
fresh_metadata = {"project": target_project}
|
|
1458
1557
|
fresh_actions = []
|
|
1459
1558
|
if payload.get("freshness_status") == "stale":
|
|
1460
1559
|
fresh_actions.append(action("job.submit", data=payload, metadata=fresh_metadata))
|
|
@@ -1503,7 +1602,7 @@ class MaxCApp:
|
|
|
1503
1602
|
}
|
|
1504
1603
|
)
|
|
1505
1604
|
|
|
1506
|
-
all_tables = self.backend.list_tables(schema=schema_name)
|
|
1605
|
+
all_tables, _ = self.backend.list_tables(schema=schema_name)
|
|
1507
1606
|
tables = all_tables
|
|
1508
1607
|
|
|
1509
1608
|
if progress_callback is not None:
|
|
@@ -1799,20 +1898,29 @@ class MaxCApp:
|
|
|
1799
1898
|
)
|
|
1800
1899
|
return envelope
|
|
1801
1900
|
|
|
1802
|
-
def meta_partitions(
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1901
|
+
def meta_partitions(
|
|
1902
|
+
self,
|
|
1903
|
+
table_name: 'str',
|
|
1904
|
+
project: 'str | None' = None,
|
|
1905
|
+
*,
|
|
1906
|
+
limit: 'int' = 100,
|
|
1907
|
+
) -> 'Envelope':
|
|
1908
|
+
target_project = project or self.config.default_project
|
|
1909
|
+
payload, warnings = self.backend.list_partitions(
|
|
1910
|
+
table_name, limit=limit, project=project,
|
|
1911
|
+
)
|
|
1912
|
+
mp_metadata = {"project": target_project}
|
|
1806
1913
|
envelope = Envelope(
|
|
1807
1914
|
command="meta.partitions",
|
|
1808
1915
|
status="success",
|
|
1809
|
-
data=
|
|
1916
|
+
data=payload,
|
|
1810
1917
|
metadata=mp_metadata,
|
|
1811
1918
|
agent_hints=AgentHints(
|
|
1812
1919
|
actions=[
|
|
1813
|
-
action("query", data=
|
|
1814
|
-
action("meta.latest-partition", data=
|
|
1920
|
+
action("query", data=payload, metadata=mp_metadata),
|
|
1921
|
+
action("meta.latest-partition", data=payload, metadata=mp_metadata),
|
|
1815
1922
|
],
|
|
1923
|
+
warnings=warnings,
|
|
1816
1924
|
),
|
|
1817
1925
|
)
|
|
1818
1926
|
self.log("meta.partitions", envelope.status, envelope.metadata)
|
|
@@ -2043,7 +2151,9 @@ class MaxCApp:
|
|
|
2043
2151
|
*,
|
|
2044
2152
|
partition: 'str | None' = None,
|
|
2045
2153
|
columns: 'list[str] | None' = None,
|
|
2154
|
+
project: 'str | None' = None,
|
|
2046
2155
|
) -> 'Envelope':
|
|
2156
|
+
target_project = project or self.config.default_project
|
|
2047
2157
|
if rows <= 0:
|
|
2048
2158
|
raise ValidationError("`--rows` must be greater than 0.")
|
|
2049
2159
|
table, sample_rows, sample_info = self.backend.sample_table(
|
|
@@ -2051,6 +2161,7 @@ class MaxCApp:
|
|
|
2051
2161
|
rows,
|
|
2052
2162
|
partition=partition,
|
|
2053
2163
|
columns=columns,
|
|
2164
|
+
project=project,
|
|
2054
2165
|
)
|
|
2055
2166
|
ds_data = {
|
|
2056
2167
|
"table_name": table.name,
|
|
@@ -2061,7 +2172,7 @@ class MaxCApp:
|
|
|
2061
2172
|
"selected_columns": sample_info["selected_columns"],
|
|
2062
2173
|
}
|
|
2063
2174
|
ds_metadata = {
|
|
2064
|
-
"project":
|
|
2175
|
+
"project": target_project,
|
|
2065
2176
|
"requested_rows": rows,
|
|
2066
2177
|
"requested_partition": partition,
|
|
2067
2178
|
"requested_columns": columns or [],
|
|
@@ -2081,9 +2192,10 @@ class MaxCApp:
|
|
|
2081
2192
|
self.log("data.sample", envelope.status, envelope.metadata)
|
|
2082
2193
|
return envelope
|
|
2083
2194
|
|
|
2084
|
-
def data_profile(self, table_name: 'str', *, partition: 'str | None' = None) -> 'Envelope':
|
|
2085
|
-
|
|
2086
|
-
|
|
2195
|
+
def data_profile(self, table_name: 'str', *, partition: 'str | None' = None, project: 'str | None' = None) -> 'Envelope':
|
|
2196
|
+
target_project = project or self.config.default_project
|
|
2197
|
+
profile = self.backend.profile_table(table_name, partition=partition, project=project)
|
|
2198
|
+
dp_metadata = {"project": target_project, "requested_partition": partition}
|
|
2087
2199
|
envelope = Envelope(
|
|
2088
2200
|
command="data.profile",
|
|
2089
2201
|
status="success",
|
|
@@ -2747,7 +2859,9 @@ class MaxCApp:
|
|
|
2747
2859
|
elif auth_cfg.provider == "sts_token":
|
|
2748
2860
|
has_creds = bool(auth_cfg.access_id and auth_cfg.secret_access_key and auth_cfg.security_token)
|
|
2749
2861
|
elif auth_cfg.provider == "ncs":
|
|
2750
|
-
has_creds = bool(auth_cfg.ncs
|
|
2862
|
+
has_creds = bool(getattr(auth_cfg.ncs, "process_command", None))
|
|
2863
|
+
elif auth_cfg.provider == "external":
|
|
2864
|
+
has_creds = bool(getattr(auth_cfg.external, "process_command", None))
|
|
2751
2865
|
else:
|
|
2752
2866
|
has_creds = False
|
|
2753
2867
|
|
|
@@ -2992,9 +3106,23 @@ class MaxCApp:
|
|
|
2992
3106
|
else:
|
|
2993
3107
|
install_dir.mkdir(parents=True, exist_ok=True)
|
|
2994
3108
|
|
|
2995
|
-
# Copy SKILL.md and references/
|
|
3109
|
+
# Copy SKILL.md and references/, skipping dev/runtime junk that the
|
|
3110
|
+
# agent platform doesn't need (and may even refuse to load).
|
|
3111
|
+
EXCLUDED_NAMES = {
|
|
3112
|
+
".git", "__pycache__", ".DS_Store", "nohup.out",
|
|
3113
|
+
".gitignore", ".pytest_cache", ".mypy_cache", ".ruff_cache",
|
|
3114
|
+
}
|
|
3115
|
+
EXCLUDED_SUFFIXES = (".pyc", ".pyo", ".log")
|
|
3116
|
+
|
|
3117
|
+
def _is_excluded(name: 'str') -> 'bool':
|
|
3118
|
+
if name in EXCLUDED_NAMES:
|
|
3119
|
+
return True
|
|
3120
|
+
return any(name.endswith(suf) for suf in EXCLUDED_SUFFIXES)
|
|
3121
|
+
|
|
2996
3122
|
files_copied = []
|
|
2997
3123
|
for item in skills_dir.iterdir():
|
|
3124
|
+
if _is_excluded(item.name):
|
|
3125
|
+
continue
|
|
2998
3126
|
if item.is_file():
|
|
2999
3127
|
shutil.copy2(str(item), install_dir / item.name)
|
|
3000
3128
|
files_copied.append(item.name)
|
|
@@ -3002,7 +3130,13 @@ class MaxCApp:
|
|
|
3002
3130
|
dest = install_dir / item.name
|
|
3003
3131
|
if dest.exists():
|
|
3004
3132
|
shutil.rmtree(str(dest))
|
|
3005
|
-
shutil.copytree(
|
|
3133
|
+
shutil.copytree(
|
|
3134
|
+
str(item),
|
|
3135
|
+
str(dest),
|
|
3136
|
+
ignore=shutil.ignore_patterns(
|
|
3137
|
+
*EXCLUDED_NAMES, "*.pyc", "*.pyo", "*.log",
|
|
3138
|
+
),
|
|
3139
|
+
)
|
|
3006
3140
|
files_copied.append(item.name + "/")
|
|
3007
3141
|
|
|
3008
3142
|
# Write version marker
|
|
@@ -3054,10 +3188,7 @@ class MaxCApp:
|
|
|
3054
3188
|
force: 'bool' = False,
|
|
3055
3189
|
) -> 'JobInfo':
|
|
3056
3190
|
if cost_check is not None:
|
|
3057
|
-
|
|
3058
|
-
"The real MaxCompute backend does not yet support CU-based `--cost-check` validation.",
|
|
3059
|
-
suggestion="Run `--dry-run` first to inspect SQLCost metadata, or remove `--cost-check`.",
|
|
3060
|
-
)
|
|
3191
|
+
self._enforce_cost_check(sql=sql, project=project, cost_check=cost_check, force=force)
|
|
3061
3192
|
return self.backend.submit_query(
|
|
3062
3193
|
sql,
|
|
3063
3194
|
project=project,
|
|
@@ -3065,6 +3196,58 @@ class MaxCApp:
|
|
|
3065
3196
|
force=force,
|
|
3066
3197
|
)
|
|
3067
3198
|
|
|
3199
|
+
# ------------------------------------------------------------------
|
|
3200
|
+
# CU-based cost check helpers
|
|
3201
|
+
# ------------------------------------------------------------------
|
|
3202
|
+
# Conversion rule used for `--cost-check`:
|
|
3203
|
+
# MaxCompute SQLCost reports `input_size` in bytes scanned. The
|
|
3204
|
+
# rule-of-thumb conversion is 1 CU ≈ 1 GB of scanned input.
|
|
3205
|
+
_BYTES_PER_CU = 1024 ** 3
|
|
3206
|
+
|
|
3207
|
+
def _enforce_cost_check(
|
|
3208
|
+
self,
|
|
3209
|
+
*,
|
|
3210
|
+
sql: 'str',
|
|
3211
|
+
project: 'str',
|
|
3212
|
+
cost_check: 'float',
|
|
3213
|
+
force: 'bool',
|
|
3214
|
+
) -> 'None':
|
|
3215
|
+
"""Estimate query cost and abort if it exceeds *cost_check* CU.
|
|
3216
|
+
|
|
3217
|
+
Raises:
|
|
3218
|
+
CostLimitExceededError: If estimated CU exceeds the threshold.
|
|
3219
|
+
FeatureUnavailableError: If the backend doesn't expose
|
|
3220
|
+
``estimate_query_cost``.
|
|
3221
|
+
"""
|
|
3222
|
+
if not hasattr(self.backend, "estimate_query_cost"):
|
|
3223
|
+
raise FeatureUnavailableError(
|
|
3224
|
+
"The current backend does not provide CU-based cost validation.",
|
|
3225
|
+
suggestion="Remove `--cost-check`, or use `--dry-run` to inspect SQLCost metadata.",
|
|
3226
|
+
)
|
|
3227
|
+
try:
|
|
3228
|
+
estimate = self.backend.estimate_query_cost(sql, project=project, force=force)
|
|
3229
|
+
except MaxCError:
|
|
3230
|
+
raise
|
|
3231
|
+
except Exception as exc:
|
|
3232
|
+
raise FeatureUnavailableError(
|
|
3233
|
+
f"Could not estimate cost for `--cost-check`: {exc}",
|
|
3234
|
+
suggestion="Remove `--cost-check` or run `--dry-run` to inspect cost manually.",
|
|
3235
|
+
) from exc
|
|
3236
|
+
bytes_scanned = int(estimate.get("estimated_input_size_bytes") or 0)
|
|
3237
|
+
estimated_cu = bytes_scanned / self._BYTES_PER_CU
|
|
3238
|
+
if estimated_cu > cost_check:
|
|
3239
|
+
raise CostLimitExceededError(
|
|
3240
|
+
(
|
|
3241
|
+
f"Estimated query cost {estimated_cu:.2f} CU exceeds "
|
|
3242
|
+
f"--cost-check threshold of {cost_check:.2f} CU "
|
|
3243
|
+
f"({bytes_scanned:,} bytes scanned, 1 CU ≈ 1 GB)."
|
|
3244
|
+
),
|
|
3245
|
+
suggestion=(
|
|
3246
|
+
"Tighten the WHERE clause (e.g., add partition filter) or "
|
|
3247
|
+
"raise the --cost-check threshold."
|
|
3248
|
+
),
|
|
3249
|
+
)
|
|
3250
|
+
|
|
3068
3251
|
def _execute_query(
|
|
3069
3252
|
self,
|
|
3070
3253
|
*,
|
|
@@ -3089,10 +3272,14 @@ class MaxCApp:
|
|
|
3089
3272
|
attempts = 0
|
|
3090
3273
|
while True:
|
|
3091
3274
|
try:
|
|
3092
|
-
if cost_check is not None and strict_cost_check
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
|
|
3275
|
+
if cost_check is not None and strict_cost_check:
|
|
3276
|
+
if not getattr(self.backend, "supports_cost_check", False):
|
|
3277
|
+
raise FeatureUnavailableError(
|
|
3278
|
+
"The current backend does not provide CU-based cost validation.",
|
|
3279
|
+
suggestion="Remove `--cost-check`, or use `--dry-run` to inspect SQLCost metadata.",
|
|
3280
|
+
)
|
|
3281
|
+
self._enforce_cost_check(
|
|
3282
|
+
sql=sql, project=project, cost_check=cost_check, force=force,
|
|
3096
3283
|
)
|
|
3097
3284
|
|
|
3098
3285
|
result = self.backend.execute_query(
|