maxc-cli 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/PKG-INFO +1 -1
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/setup.py +1 -1
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/__init__.py +1 -1
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/app.py +245 -62
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/data.py +63 -5
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/job.py +19 -2
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/meta.py +147 -36
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/odps.py +1 -1
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/query.py +95 -11
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/cli.py +108 -17
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/exceptions.py +1 -57
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/helpers.py +45 -23
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/models.py +18 -11
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/output.py +11 -5
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/SKILL.md +103 -159
- maxc_cli-0.2.0/src/maxc_cli/skills/references/cache-mechanism.md +15 -0
- maxc_cli-0.2.0/src/maxc_cli/skills/references/diff-workflow.md +21 -0
- maxc_cli-0.2.0/src/maxc_cli/skills/references/error-recovery.md +23 -0
- maxc_cli-0.2.0/src/maxc_cli/skills/references/json-output-format.md +100 -0
- maxc_cli-0.2.0/src/maxc_cli/skills/references/semantic-metadata.md +24 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/PKG-INFO +1 -1
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/SOURCES.txt +5 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_agent_hints_and_cli.py +21 -12
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_agent_skill_commands_context.py +4 -69
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_cli_mock.py +1 -5
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_phase1_improvements.py +256 -56
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_setting_parser.py +8 -23
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/MANIFEST.in +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/README.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/pyproject.toml +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/scripts/regression_test.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/setup.cfg +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/__main__.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/audit.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/auth_providers.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/__init__.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/auth.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/backend/catalog.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/cache.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/config.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/masking.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/setting_parser.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/agents/openai.yaml +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/nohup.out +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/bootstrap-auth.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/command-patterns.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/maxcompute-sql-notes.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/migrate-from-odpscmd.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/partition-guide.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/skills/references/setup-install.md +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/store.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli/utils.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/dependency_links.txt +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/entry_points.txt +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/requires.txt +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/src/maxc_cli.egg-info/top_level.txt +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_cache.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_catalog.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_compat.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_e2e_smoke.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_error_self_correction.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_external_auth.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_integration.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_integration_real.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_job_improvements.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_masking.py +0 -0
- {maxc_cli-0.1.8 → maxc_cli-0.2.0}/tests/test_query_auto_promote.py +0 -0
|
@@ -9,7 +9,7 @@ README = ROOT / "README.md"
|
|
|
9
9
|
|
|
10
10
|
setup(
|
|
11
11
|
name="maxc-cli",
|
|
12
|
-
version="0.
|
|
12
|
+
version="0.2.0",
|
|
13
13
|
description="Agent-native MaxCompute CLI for external coding agents",
|
|
14
14
|
long_description=README.read_text(encoding="utf-8"),
|
|
15
15
|
long_description_content_type="text/markdown",
|
|
@@ -351,6 +351,7 @@ class MaxCApp:
|
|
|
351
351
|
return envelope
|
|
352
352
|
# Job ended — check outcome
|
|
353
353
|
if job_info.status == "failure":
|
|
354
|
+
error_msg = job_info.failure_reason or job_info.error_message or "Job failed"
|
|
354
355
|
envelope = Envelope(
|
|
355
356
|
command=command,
|
|
356
357
|
status="failure",
|
|
@@ -362,6 +363,12 @@ class MaxCApp:
|
|
|
362
363
|
"logview": job_info.logview,
|
|
363
364
|
"sql_executed": sql,
|
|
364
365
|
},
|
|
366
|
+
error=ErrorPayload(
|
|
367
|
+
code="EXECUTION_FAILED",
|
|
368
|
+
message=error_msg,
|
|
369
|
+
suggestion=None,
|
|
370
|
+
recoverable=False,
|
|
371
|
+
),
|
|
365
372
|
agent_hints=AgentHints(
|
|
366
373
|
actions=[
|
|
367
374
|
action("job.diagnose", data={"job_id": job_info.job_id}, metadata={"job_id": job_info.job_id, "project": job_info.project, "sql_executed": sql}),
|
|
@@ -897,27 +904,51 @@ class MaxCApp:
|
|
|
897
904
|
self.log("job.list", envelope.status, envelope.metadata)
|
|
898
905
|
return envelope
|
|
899
906
|
|
|
900
|
-
def meta_list_tables(
|
|
907
|
+
def meta_list_tables(
|
|
908
|
+
self,
|
|
909
|
+
*,
|
|
910
|
+
schema: 'str | None' = None,
|
|
911
|
+
project: 'str | None' = None,
|
|
912
|
+
limit: 'int | None' = None,
|
|
913
|
+
cursor: 'str | None' = None,
|
|
914
|
+
) -> 'Envelope':
|
|
901
915
|
started = monotonic()
|
|
916
|
+
target_project = project or self.config.default_project
|
|
902
917
|
effective_schema = schema or self.config.default_schema
|
|
903
918
|
|
|
904
|
-
#
|
|
919
|
+
# Decode cursor (offset token, mirrors cli.py pagination scheme)
|
|
920
|
+
offset = 0
|
|
921
|
+
if cursor:
|
|
922
|
+
try:
|
|
923
|
+
offset = max(0, int(cursor))
|
|
924
|
+
except (TypeError, ValueError):
|
|
925
|
+
raise ValidationError(
|
|
926
|
+
f"Invalid --cursor value: {cursor!r}",
|
|
927
|
+
suggestion="Pass the `next_cursor` value returned by the previous call.",
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
# Try to get from cache first (cache pagination is in-memory slicing)
|
|
905
931
|
cached_tables = self.cache.get_all_cached_tables(
|
|
906
|
-
|
|
932
|
+
target_project,
|
|
907
933
|
schema_name=effective_schema,
|
|
908
934
|
)
|
|
909
935
|
|
|
936
|
+
has_more = False
|
|
937
|
+
next_cursor: 'str | None' = None
|
|
938
|
+
|
|
910
939
|
if cached_tables:
|
|
911
940
|
# Use cached data (returns list of dicts)
|
|
912
|
-
|
|
941
|
+
window = cached_tables[offset:]
|
|
942
|
+
if limit is not None:
|
|
943
|
+
has_more = len(window) > limit
|
|
944
|
+
window = window[:limit]
|
|
945
|
+
tables = window
|
|
913
946
|
source = "cache"
|
|
914
947
|
rows = [
|
|
915
948
|
{
|
|
916
949
|
"table_name": table.get("table_name"),
|
|
917
950
|
"schema_name": effective_schema or table.get("schema_name", "default"),
|
|
918
951
|
"table_type": table.get("table_type", "TABLE"),
|
|
919
|
-
"size_bytes": table.get("size_bytes"),
|
|
920
|
-
"owner": table.get("owner"),
|
|
921
952
|
"description": table.get("description"),
|
|
922
953
|
"partition_columns": [
|
|
923
954
|
c.get("name") if isinstance(c, dict) else str(c)
|
|
@@ -927,34 +958,48 @@ class MaxCApp:
|
|
|
927
958
|
for table in tables
|
|
928
959
|
]
|
|
929
960
|
else:
|
|
930
|
-
# Cache miss — fall back to live backend query
|
|
931
|
-
live_tables = self.backend.list_tables(
|
|
961
|
+
# Cache miss — fall back to live backend query (now paginated)
|
|
962
|
+
live_tables, has_more = self.backend.list_tables(
|
|
963
|
+
schema=effective_schema,
|
|
964
|
+
project=project,
|
|
965
|
+
limit=limit,
|
|
966
|
+
offset=offset,
|
|
967
|
+
)
|
|
932
968
|
source = "backend"
|
|
933
969
|
rows = [
|
|
934
970
|
{
|
|
935
971
|
"table_name": t.name,
|
|
936
972
|
"schema_name": effective_schema or "default",
|
|
937
973
|
"table_type": t.table_type or "TABLE",
|
|
938
|
-
"size_bytes": t.size_bytes,
|
|
939
|
-
"owner": t.owner,
|
|
940
974
|
"description": t.description,
|
|
941
975
|
"partition_columns": [c.name for c in (t.partition_columns or [])],
|
|
942
976
|
}
|
|
943
977
|
for t in live_tables
|
|
944
978
|
]
|
|
945
|
-
|
|
979
|
+
|
|
980
|
+
if has_more and limit is not None:
|
|
981
|
+
next_cursor = str(offset + limit)
|
|
982
|
+
|
|
946
983
|
metadata = self._cache_metadata(
|
|
947
|
-
project=
|
|
984
|
+
project=target_project,
|
|
948
985
|
source=source,
|
|
949
986
|
query_time_ms=int((monotonic() - started) * 1000),
|
|
950
987
|
)
|
|
951
|
-
|
|
988
|
+
|
|
952
989
|
schema_label = effective_schema or "default"
|
|
953
990
|
insights = [f"Table list served from {source}."]
|
|
954
991
|
if effective_schema and effective_schema != "default":
|
|
955
992
|
insights.append(f"Use schema-qualified names in SQL: `{schema_label}.<table_name>`")
|
|
956
993
|
|
|
957
|
-
data = {
|
|
994
|
+
data = {
|
|
995
|
+
"tables": rows,
|
|
996
|
+
"total": len(rows),
|
|
997
|
+
"schema": schema_label,
|
|
998
|
+
"has_more": has_more,
|
|
999
|
+
"next_cursor": next_cursor,
|
|
1000
|
+
"limit": limit,
|
|
1001
|
+
"offset": offset,
|
|
1002
|
+
}
|
|
958
1003
|
envelope = Envelope(
|
|
959
1004
|
command="meta.list-tables",
|
|
960
1005
|
status="success",
|
|
@@ -971,12 +1016,13 @@ class MaxCApp:
|
|
|
971
1016
|
self.log("meta.list-tables", envelope.status, envelope.metadata)
|
|
972
1017
|
return envelope
|
|
973
1018
|
|
|
974
|
-
def meta_describe(self, table_name: 'str', full: 'bool' = False) -> 'Envelope':
|
|
1019
|
+
def meta_describe(self, table_name: 'str', full: 'bool' = False, project: 'str | None' = None) -> 'Envelope':
|
|
975
1020
|
started = monotonic()
|
|
1021
|
+
target_project = project or self.config.default_project
|
|
976
1022
|
|
|
977
1023
|
# Try to get from cache first
|
|
978
1024
|
cached_table = self.cache.get_cached_table(
|
|
979
|
-
|
|
1025
|
+
target_project,
|
|
980
1026
|
table_name,
|
|
981
1027
|
schema_name=self.config.default_schema or "default"
|
|
982
1028
|
)
|
|
@@ -1011,7 +1057,7 @@ class MaxCApp:
|
|
|
1011
1057
|
warnings = []
|
|
1012
1058
|
# Optionally fetch additional metadata from API (description, owner, size, sample rows, partitions)
|
|
1013
1059
|
try:
|
|
1014
|
-
api_table = self.backend.describe_table(table_name)
|
|
1060
|
+
api_table = self.backend.describe_table(table_name, project=project)
|
|
1015
1061
|
# Update with API data (API has priority over cache for these fields)
|
|
1016
1062
|
table.description = api_table.description or table.description
|
|
1017
1063
|
table.owner = api_table.owner or table.owner
|
|
@@ -1026,13 +1072,13 @@ class MaxCApp:
|
|
|
1026
1072
|
warnings.append("Backend API unavailable, showing cached schema only")
|
|
1027
1073
|
else:
|
|
1028
1074
|
# Fall back to live API
|
|
1029
|
-
table = self.backend.describe_table(table_name)
|
|
1075
|
+
table = self.backend.describe_table(table_name, project=project)
|
|
1030
1076
|
source = "live"
|
|
1031
1077
|
warnings = []
|
|
1032
1078
|
|
|
1033
1079
|
# Get semantic metadata from cache
|
|
1034
1080
|
semantic = self.cache.get_semantic(
|
|
1035
|
-
project=
|
|
1081
|
+
project=target_project,
|
|
1036
1082
|
table_name=table_name,
|
|
1037
1083
|
schema_name=self.config.default_schema or "default",
|
|
1038
1084
|
)
|
|
@@ -1055,7 +1101,7 @@ class MaxCApp:
|
|
|
1055
1101
|
payload["semantic"] = semantic
|
|
1056
1102
|
|
|
1057
1103
|
meta_metadata = {
|
|
1058
|
-
"project":
|
|
1104
|
+
"project": target_project,
|
|
1059
1105
|
"source": source,
|
|
1060
1106
|
"query_time_ms": int((monotonic() - started) * 1000) if source == "live" else None,
|
|
1061
1107
|
}
|
|
@@ -1076,8 +1122,16 @@ class MaxCApp:
|
|
|
1076
1122
|
self.log("meta.describe", envelope.status, envelope.metadata)
|
|
1077
1123
|
return envelope
|
|
1078
1124
|
|
|
1079
|
-
def meta_search(
|
|
1125
|
+
def meta_search(
|
|
1126
|
+
self,
|
|
1127
|
+
keyword: 'str',
|
|
1128
|
+
*,
|
|
1129
|
+
schema: 'str | None' = None,
|
|
1130
|
+
project: 'str | None' = None,
|
|
1131
|
+
limit: 'int | None' = None,
|
|
1132
|
+
) -> 'Envelope':
|
|
1080
1133
|
started = monotonic()
|
|
1134
|
+
target_project = project or self.config.default_project
|
|
1081
1135
|
effective_schema = schema or self.config.default_schema
|
|
1082
1136
|
|
|
1083
1137
|
# Priority: Catalog API → cache → live scan
|
|
@@ -1100,18 +1154,31 @@ class MaxCApp:
|
|
|
1100
1154
|
|
|
1101
1155
|
if not catalog_available:
|
|
1102
1156
|
cached_tables = self.cache.get_all_cached_tables(
|
|
1103
|
-
|
|
1157
|
+
target_project, schema_name=effective_schema,
|
|
1104
1158
|
)
|
|
1105
1159
|
if cached_tables:
|
|
1106
1160
|
matches = self._search_in_cache(keyword, cached_tables)
|
|
1107
1161
|
source = "cache"
|
|
1108
1162
|
else:
|
|
1109
|
-
matches = self.backend.search_tables(keyword, schema=effective_schema)
|
|
1163
|
+
matches = self.backend.search_tables(keyword, schema=effective_schema, project=project)
|
|
1110
1164
|
source = "live"
|
|
1111
1165
|
|
|
1112
|
-
|
|
1166
|
+
original_total = len(matches)
|
|
1167
|
+
truncated = False
|
|
1168
|
+
if limit is not None and len(matches) > limit:
|
|
1169
|
+
matches = matches[:limit]
|
|
1170
|
+
truncated = True
|
|
1171
|
+
|
|
1172
|
+
search_data = {
|
|
1173
|
+
"keyword": keyword,
|
|
1174
|
+
"matches": matches,
|
|
1175
|
+
"total": original_total,
|
|
1176
|
+
"has_more": truncated,
|
|
1177
|
+
"limit": limit,
|
|
1178
|
+
"truncated": truncated,
|
|
1179
|
+
}
|
|
1113
1180
|
search_metadata = self._cache_metadata(
|
|
1114
|
-
project=
|
|
1181
|
+
project=target_project,
|
|
1115
1182
|
source=source,
|
|
1116
1183
|
query_time_ms=int((monotonic() - started) * 1000) if source in ("live", "catalog") else None,
|
|
1117
1184
|
)
|
|
@@ -1131,11 +1198,19 @@ class MaxCApp:
|
|
|
1131
1198
|
self.log("meta.search", envelope.status, envelope.metadata)
|
|
1132
1199
|
return envelope
|
|
1133
1200
|
|
|
1134
|
-
def meta_search_columns(
|
|
1201
|
+
def meta_search_columns(
|
|
1202
|
+
self,
|
|
1203
|
+
keyword: 'str',
|
|
1204
|
+
*,
|
|
1205
|
+
schema: 'str | None' = None,
|
|
1206
|
+
project: 'str | None' = None,
|
|
1207
|
+
limit: 'int | None' = None,
|
|
1208
|
+
) -> 'Envelope':
|
|
1135
1209
|
started = monotonic()
|
|
1210
|
+
target_project = project or self.config.default_project
|
|
1136
1211
|
effective_schema = schema or self.config.default_schema
|
|
1137
1212
|
cached_tables = self.cache.get_all_cached_tables(
|
|
1138
|
-
|
|
1213
|
+
target_project, schema_name=effective_schema,
|
|
1139
1214
|
)
|
|
1140
1215
|
if cached_tables:
|
|
1141
1216
|
matches = self._search_columns_in_cache(keyword, cached_tables)
|
|
@@ -1152,9 +1227,23 @@ class MaxCApp:
|
|
|
1152
1227
|
"Column search requires a metadata cache. "
|
|
1153
1228
|
"Run `maxc cache build` first, then retry `maxc meta search-columns`.",
|
|
1154
1229
|
]
|
|
1155
|
-
|
|
1230
|
+
|
|
1231
|
+
original_total = len(matches)
|
|
1232
|
+
truncated = False
|
|
1233
|
+
if limit is not None and len(matches) > limit:
|
|
1234
|
+
matches = matches[:limit]
|
|
1235
|
+
truncated = True
|
|
1236
|
+
|
|
1237
|
+
sc_data = {
|
|
1238
|
+
"keyword": keyword,
|
|
1239
|
+
"matches": matches,
|
|
1240
|
+
"total": original_total,
|
|
1241
|
+
"has_more": truncated,
|
|
1242
|
+
"limit": limit,
|
|
1243
|
+
"truncated": truncated,
|
|
1244
|
+
}
|
|
1156
1245
|
sc_metadata = self._cache_metadata(
|
|
1157
|
-
project=
|
|
1246
|
+
project=target_project,
|
|
1158
1247
|
source=source,
|
|
1159
1248
|
query_time_ms=int((monotonic() - started) * 1000) if source not in ("cache", "cache_required") else None,
|
|
1160
1249
|
)
|
|
@@ -1381,6 +1470,13 @@ class MaxCApp:
|
|
|
1381
1470
|
if t["table_name"] not in semantic_table_names
|
|
1382
1471
|
]
|
|
1383
1472
|
|
|
1473
|
+
warnings: 'list[str]' = []
|
|
1474
|
+
if len(all_tables) == 0:
|
|
1475
|
+
warnings.append(
|
|
1476
|
+
"Cache is empty — no tables to analyze. Run "
|
|
1477
|
+
"`maxc cache build` first to populate metadata."
|
|
1478
|
+
)
|
|
1479
|
+
|
|
1384
1480
|
envelope = Envelope(
|
|
1385
1481
|
command="meta.semantic.list-missing",
|
|
1386
1482
|
status="success",
|
|
@@ -1403,6 +1499,7 @@ class MaxCApp:
|
|
|
1403
1499
|
},
|
|
1404
1500
|
agent_hints=AgentHints(
|
|
1405
1501
|
insights=[f"{len(missing)} tables lack semantic metadata."],
|
|
1502
|
+
warnings=warnings,
|
|
1406
1503
|
actions=[
|
|
1407
1504
|
action("meta.semantic.set", data={"table_name": missing[0]["table_name"]}, metadata={"project": self.config.default_project})
|
|
1408
1505
|
] if missing else [],
|
|
@@ -1428,9 +1525,10 @@ class MaxCApp:
|
|
|
1428
1525
|
self.log("meta.semantic.list-missing", envelope.status, envelope.metadata)
|
|
1429
1526
|
return envelope
|
|
1430
1527
|
|
|
1431
|
-
def meta_latest_partition(self, table_name: 'str') -> 'Envelope':
|
|
1432
|
-
|
|
1433
|
-
|
|
1528
|
+
def meta_latest_partition(self, table_name: 'str', project: 'str | None' = None) -> 'Envelope':
|
|
1529
|
+
target_project = project or self.config.default_project
|
|
1530
|
+
payload, warnings = self.backend.latest_partition_info(table_name, project=project)
|
|
1531
|
+
lp_metadata = {"project": target_project}
|
|
1434
1532
|
if payload.get("has_partitions"):
|
|
1435
1533
|
lp_actions = [
|
|
1436
1534
|
action("meta.freshness", data=payload, metadata=lp_metadata),
|
|
@@ -1452,9 +1550,10 @@ class MaxCApp:
|
|
|
1452
1550
|
self.log("meta.latest-partition", envelope.status, envelope.metadata)
|
|
1453
1551
|
return envelope
|
|
1454
1552
|
|
|
1455
|
-
def meta_freshness(self, table_name: 'str') -> 'Envelope':
|
|
1456
|
-
|
|
1457
|
-
|
|
1553
|
+
def meta_freshness(self, table_name: 'str', project: 'str | None' = None) -> 'Envelope':
|
|
1554
|
+
target_project = project or self.config.default_project
|
|
1555
|
+
payload, warnings = self.backend.freshness_info(table_name, project=project)
|
|
1556
|
+
fresh_metadata = {"project": target_project}
|
|
1458
1557
|
fresh_actions = []
|
|
1459
1558
|
if payload.get("freshness_status") == "stale":
|
|
1460
1559
|
fresh_actions.append(action("job.submit", data=payload, metadata=fresh_metadata))
|
|
@@ -1503,7 +1602,7 @@ class MaxCApp:
|
|
|
1503
1602
|
}
|
|
1504
1603
|
)
|
|
1505
1604
|
|
|
1506
|
-
all_tables = self.backend.list_tables(schema=schema_name)
|
|
1605
|
+
all_tables, _ = self.backend.list_tables(schema=schema_name)
|
|
1507
1606
|
tables = all_tables
|
|
1508
1607
|
|
|
1509
1608
|
if progress_callback is not None:
|
|
@@ -1799,20 +1898,29 @@ class MaxCApp:
|
|
|
1799
1898
|
)
|
|
1800
1899
|
return envelope
|
|
1801
1900
|
|
|
1802
|
-
def meta_partitions(
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1901
|
+
def meta_partitions(
|
|
1902
|
+
self,
|
|
1903
|
+
table_name: 'str',
|
|
1904
|
+
project: 'str | None' = None,
|
|
1905
|
+
*,
|
|
1906
|
+
limit: 'int' = 100,
|
|
1907
|
+
) -> 'Envelope':
|
|
1908
|
+
target_project = project or self.config.default_project
|
|
1909
|
+
payload, warnings = self.backend.list_partitions(
|
|
1910
|
+
table_name, limit=limit, project=project,
|
|
1911
|
+
)
|
|
1912
|
+
mp_metadata = {"project": target_project}
|
|
1806
1913
|
envelope = Envelope(
|
|
1807
1914
|
command="meta.partitions",
|
|
1808
1915
|
status="success",
|
|
1809
|
-
data=
|
|
1916
|
+
data=payload,
|
|
1810
1917
|
metadata=mp_metadata,
|
|
1811
1918
|
agent_hints=AgentHints(
|
|
1812
1919
|
actions=[
|
|
1813
|
-
action("query", data=
|
|
1814
|
-
action("meta.latest-partition", data=
|
|
1920
|
+
action("query", data=payload, metadata=mp_metadata),
|
|
1921
|
+
action("meta.latest-partition", data=payload, metadata=mp_metadata),
|
|
1815
1922
|
],
|
|
1923
|
+
warnings=warnings,
|
|
1816
1924
|
),
|
|
1817
1925
|
)
|
|
1818
1926
|
self.log("meta.partitions", envelope.status, envelope.metadata)
|
|
@@ -1884,10 +1992,6 @@ class MaxCApp:
|
|
|
1884
1992
|
f"Unable to access project `{project}`: {exc}",
|
|
1885
1993
|
suggestion="Verify the project name and that the current identity has access.",
|
|
1886
1994
|
) from exc
|
|
1887
|
-
else:
|
|
1888
|
-
warnings.append(
|
|
1889
|
-
"Project override was saved without remote validation because no authenticated backend session is active."
|
|
1890
|
-
)
|
|
1891
1995
|
override["project"] = project
|
|
1892
1996
|
changes.append(f"project set to `{project}`")
|
|
1893
1997
|
# Warn if session override project differs from the project saved in auth config
|
|
@@ -2047,7 +2151,9 @@ class MaxCApp:
|
|
|
2047
2151
|
*,
|
|
2048
2152
|
partition: 'str | None' = None,
|
|
2049
2153
|
columns: 'list[str] | None' = None,
|
|
2154
|
+
project: 'str | None' = None,
|
|
2050
2155
|
) -> 'Envelope':
|
|
2156
|
+
target_project = project or self.config.default_project
|
|
2051
2157
|
if rows <= 0:
|
|
2052
2158
|
raise ValidationError("`--rows` must be greater than 0.")
|
|
2053
2159
|
table, sample_rows, sample_info = self.backend.sample_table(
|
|
@@ -2055,6 +2161,7 @@ class MaxCApp:
|
|
|
2055
2161
|
rows,
|
|
2056
2162
|
partition=partition,
|
|
2057
2163
|
columns=columns,
|
|
2164
|
+
project=project,
|
|
2058
2165
|
)
|
|
2059
2166
|
ds_data = {
|
|
2060
2167
|
"table_name": table.name,
|
|
@@ -2065,7 +2172,7 @@ class MaxCApp:
|
|
|
2065
2172
|
"selected_columns": sample_info["selected_columns"],
|
|
2066
2173
|
}
|
|
2067
2174
|
ds_metadata = {
|
|
2068
|
-
"project":
|
|
2175
|
+
"project": target_project,
|
|
2069
2176
|
"requested_rows": rows,
|
|
2070
2177
|
"requested_partition": partition,
|
|
2071
2178
|
"requested_columns": columns or [],
|
|
@@ -2085,9 +2192,10 @@ class MaxCApp:
|
|
|
2085
2192
|
self.log("data.sample", envelope.status, envelope.metadata)
|
|
2086
2193
|
return envelope
|
|
2087
2194
|
|
|
2088
|
-
def data_profile(self, table_name: 'str', *, partition: 'str | None' = None) -> 'Envelope':
|
|
2089
|
-
|
|
2090
|
-
|
|
2195
|
+
def data_profile(self, table_name: 'str', *, partition: 'str | None' = None, project: 'str | None' = None) -> 'Envelope':
|
|
2196
|
+
target_project = project or self.config.default_project
|
|
2197
|
+
profile = self.backend.profile_table(table_name, partition=partition, project=project)
|
|
2198
|
+
dp_metadata = {"project": target_project, "requested_partition": partition}
|
|
2091
2199
|
envelope = Envelope(
|
|
2092
2200
|
command="data.profile",
|
|
2093
2201
|
status="success",
|
|
@@ -2751,7 +2859,9 @@ class MaxCApp:
|
|
|
2751
2859
|
elif auth_cfg.provider == "sts_token":
|
|
2752
2860
|
has_creds = bool(auth_cfg.access_id and auth_cfg.secret_access_key and auth_cfg.security_token)
|
|
2753
2861
|
elif auth_cfg.provider == "ncs":
|
|
2754
|
-
has_creds = bool(auth_cfg.ncs
|
|
2862
|
+
has_creds = bool(getattr(auth_cfg.ncs, "process_command", None))
|
|
2863
|
+
elif auth_cfg.provider == "external":
|
|
2864
|
+
has_creds = bool(getattr(auth_cfg.external, "process_command", None))
|
|
2755
2865
|
else:
|
|
2756
2866
|
has_creds = False
|
|
2757
2867
|
|
|
@@ -2996,9 +3106,23 @@ class MaxCApp:
|
|
|
2996
3106
|
else:
|
|
2997
3107
|
install_dir.mkdir(parents=True, exist_ok=True)
|
|
2998
3108
|
|
|
2999
|
-
# Copy SKILL.md and references/
|
|
3109
|
+
# Copy SKILL.md and references/, skipping dev/runtime junk that the
|
|
3110
|
+
# agent platform doesn't need (and may even refuse to load).
|
|
3111
|
+
EXCLUDED_NAMES = {
|
|
3112
|
+
".git", "__pycache__", ".DS_Store", "nohup.out",
|
|
3113
|
+
".gitignore", ".pytest_cache", ".mypy_cache", ".ruff_cache",
|
|
3114
|
+
}
|
|
3115
|
+
EXCLUDED_SUFFIXES = (".pyc", ".pyo", ".log")
|
|
3116
|
+
|
|
3117
|
+
def _is_excluded(name: 'str') -> 'bool':
|
|
3118
|
+
if name in EXCLUDED_NAMES:
|
|
3119
|
+
return True
|
|
3120
|
+
return any(name.endswith(suf) for suf in EXCLUDED_SUFFIXES)
|
|
3121
|
+
|
|
3000
3122
|
files_copied = []
|
|
3001
3123
|
for item in skills_dir.iterdir():
|
|
3124
|
+
if _is_excluded(item.name):
|
|
3125
|
+
continue
|
|
3002
3126
|
if item.is_file():
|
|
3003
3127
|
shutil.copy2(str(item), install_dir / item.name)
|
|
3004
3128
|
files_copied.append(item.name)
|
|
@@ -3006,7 +3130,13 @@ class MaxCApp:
|
|
|
3006
3130
|
dest = install_dir / item.name
|
|
3007
3131
|
if dest.exists():
|
|
3008
3132
|
shutil.rmtree(str(dest))
|
|
3009
|
-
shutil.copytree(
|
|
3133
|
+
shutil.copytree(
|
|
3134
|
+
str(item),
|
|
3135
|
+
str(dest),
|
|
3136
|
+
ignore=shutil.ignore_patterns(
|
|
3137
|
+
*EXCLUDED_NAMES, "*.pyc", "*.pyo", "*.log",
|
|
3138
|
+
),
|
|
3139
|
+
)
|
|
3010
3140
|
files_copied.append(item.name + "/")
|
|
3011
3141
|
|
|
3012
3142
|
# Write version marker
|
|
@@ -3058,10 +3188,7 @@ class MaxCApp:
|
|
|
3058
3188
|
force: 'bool' = False,
|
|
3059
3189
|
) -> 'JobInfo':
|
|
3060
3190
|
if cost_check is not None:
|
|
3061
|
-
|
|
3062
|
-
"The real MaxCompute backend does not yet support CU-based `--cost-check` validation.",
|
|
3063
|
-
suggestion="Run `--dry-run` first to inspect SQLCost metadata, or remove `--cost-check`.",
|
|
3064
|
-
)
|
|
3191
|
+
self._enforce_cost_check(sql=sql, project=project, cost_check=cost_check, force=force)
|
|
3065
3192
|
return self.backend.submit_query(
|
|
3066
3193
|
sql,
|
|
3067
3194
|
project=project,
|
|
@@ -3069,6 +3196,58 @@ class MaxCApp:
|
|
|
3069
3196
|
force=force,
|
|
3070
3197
|
)
|
|
3071
3198
|
|
|
3199
|
+
# ------------------------------------------------------------------
|
|
3200
|
+
# CU-based cost check helpers
|
|
3201
|
+
# ------------------------------------------------------------------
|
|
3202
|
+
# Conversion rule used for `--cost-check`:
|
|
3203
|
+
# MaxCompute SQLCost reports `input_size` in bytes scanned. The
|
|
3204
|
+
# rule-of-thumb conversion is 1 CU ≈ 1 GB of scanned input.
|
|
3205
|
+
_BYTES_PER_CU = 1024 ** 3
|
|
3206
|
+
|
|
3207
|
+
def _enforce_cost_check(
|
|
3208
|
+
self,
|
|
3209
|
+
*,
|
|
3210
|
+
sql: 'str',
|
|
3211
|
+
project: 'str',
|
|
3212
|
+
cost_check: 'float',
|
|
3213
|
+
force: 'bool',
|
|
3214
|
+
) -> 'None':
|
|
3215
|
+
"""Estimate query cost and abort if it exceeds *cost_check* CU.
|
|
3216
|
+
|
|
3217
|
+
Raises:
|
|
3218
|
+
CostLimitExceededError: If estimated CU exceeds the threshold.
|
|
3219
|
+
FeatureUnavailableError: If the backend doesn't expose
|
|
3220
|
+
``estimate_query_cost``.
|
|
3221
|
+
"""
|
|
3222
|
+
if not hasattr(self.backend, "estimate_query_cost"):
|
|
3223
|
+
raise FeatureUnavailableError(
|
|
3224
|
+
"The current backend does not provide CU-based cost validation.",
|
|
3225
|
+
suggestion="Remove `--cost-check`, or use `--dry-run` to inspect SQLCost metadata.",
|
|
3226
|
+
)
|
|
3227
|
+
try:
|
|
3228
|
+
estimate = self.backend.estimate_query_cost(sql, project=project, force=force)
|
|
3229
|
+
except MaxCError:
|
|
3230
|
+
raise
|
|
3231
|
+
except Exception as exc:
|
|
3232
|
+
raise FeatureUnavailableError(
|
|
3233
|
+
f"Could not estimate cost for `--cost-check`: {exc}",
|
|
3234
|
+
suggestion="Remove `--cost-check` or run `--dry-run` to inspect cost manually.",
|
|
3235
|
+
) from exc
|
|
3236
|
+
bytes_scanned = int(estimate.get("estimated_input_size_bytes") or 0)
|
|
3237
|
+
estimated_cu = bytes_scanned / self._BYTES_PER_CU
|
|
3238
|
+
if estimated_cu > cost_check:
|
|
3239
|
+
raise CostLimitExceededError(
|
|
3240
|
+
(
|
|
3241
|
+
f"Estimated query cost {estimated_cu:.2f} CU exceeds "
|
|
3242
|
+
f"--cost-check threshold of {cost_check:.2f} CU "
|
|
3243
|
+
f"({bytes_scanned:,} bytes scanned, 1 CU ≈ 1 GB)."
|
|
3244
|
+
),
|
|
3245
|
+
suggestion=(
|
|
3246
|
+
"Tighten the WHERE clause (e.g., add partition filter) or "
|
|
3247
|
+
"raise the --cost-check threshold."
|
|
3248
|
+
),
|
|
3249
|
+
)
|
|
3250
|
+
|
|
3072
3251
|
def _execute_query(
|
|
3073
3252
|
self,
|
|
3074
3253
|
*,
|
|
@@ -3093,10 +3272,14 @@ class MaxCApp:
|
|
|
3093
3272
|
attempts = 0
|
|
3094
3273
|
while True:
|
|
3095
3274
|
try:
|
|
3096
|
-
if cost_check is not None and strict_cost_check
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3275
|
+
if cost_check is not None and strict_cost_check:
|
|
3276
|
+
if not getattr(self.backend, "supports_cost_check", False):
|
|
3277
|
+
raise FeatureUnavailableError(
|
|
3278
|
+
"The current backend does not provide CU-based cost validation.",
|
|
3279
|
+
suggestion="Remove `--cost-check`, or use `--dry-run` to inspect SQLCost metadata.",
|
|
3280
|
+
)
|
|
3281
|
+
self._enforce_cost_check(
|
|
3282
|
+
sql=sql, project=project, cost_check=cost_check, force=force,
|
|
3100
3283
|
)
|
|
3101
3284
|
|
|
3102
3285
|
result = self.backend.execute_query(
|