maxcompute-semantic 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxcompute_semantic/__init__.py +17 -0
- maxcompute_semantic/_internal/__init__.py +4 -0
- maxcompute_semantic/_internal/logging_setup.py +87 -0
- maxcompute_semantic/_internal/output.py +135 -0
- maxcompute_semantic/_internal/paths.py +244 -0
- maxcompute_semantic/_internal/update_check.py +1022 -0
- maxcompute_semantic/_internal/yaml_io.py +75 -0
- maxcompute_semantic/_lib/__init__.py +4 -0
- maxcompute_semantic/_lib/acl_filter.py +56 -0
- maxcompute_semantic/_lib/schema_hash.py +26 -0
- maxcompute_semantic/_lib/status.py +47 -0
- maxcompute_semantic/_lib/yaml.py +237 -0
- maxcompute_semantic/_skill/SKILL.md +41 -0
- maxcompute_semantic/_skill_data/enrich/SKILL.md +63 -0
- maxcompute_semantic/_skill_data/enrich/references/enrich.md +106 -0
- maxcompute_semantic/_skill_data/memory/SKILL.md +27 -0
- maxcompute_semantic/_skill_data/memory/references/memory.md +111 -0
- maxcompute_semantic/_skill_data/onboarding/SKILL.md +34 -0
- maxcompute_semantic/_skill_data/onboarding/references/profile-editor.md +208 -0
- maxcompute_semantic/_skill_data/onboarding/references/profile-history.md +86 -0
- maxcompute_semantic/_skill_data/query/SKILL.md +161 -0
- maxcompute_semantic/_skill_data/query/references/cold-start.md +96 -0
- maxcompute_semantic/_skill_data/query/references/from-table.md +64 -0
- maxcompute_semantic/_skill_data/query/references/metrics.md +107 -0
- maxcompute_semantic/_skill_data/query/references/projection.md +114 -0
- maxcompute_semantic/_skill_data/query/references/query.md +307 -0
- maxcompute_semantic/_skill_data/query/references/rules.md +89 -0
- maxcompute_semantic/_skill_data/query/references/sql.md +160 -0
- maxcompute_semantic/_skill_data/query/references/value-discovery.md +89 -0
- maxcompute_semantic/_skill_data/udf/SKILL.md +31 -0
- maxcompute_semantic/_skill_data/udf/references/udf.md +60 -0
- maxcompute_semantic/auth/__init__.py +4 -0
- maxcompute_semantic/auth/context.py +269 -0
- maxcompute_semantic/auth/credential.py +194 -0
- maxcompute_semantic/auth/env_expand.py +31 -0
- maxcompute_semantic/auth/errors.py +29 -0
- maxcompute_semantic/auth/link_store.py +106 -0
- maxcompute_semantic/auth/profile_store.py +393 -0
- maxcompute_semantic/auth/resolver.py +178 -0
- maxcompute_semantic/auth/schema.py +758 -0
- maxcompute_semantic/cli.py +357 -0
- maxcompute_semantic/commands/__init__.py +4 -0
- maxcompute_semantic/commands/_auth_probe.py +171 -0
- maxcompute_semantic/commands/_identity.py +215 -0
- maxcompute_semantic/commands/_import_creds.py +423 -0
- maxcompute_semantic/commands/_profile_command.py +161 -0
- maxcompute_semantic/commands/_profile_editor.py +997 -0
- maxcompute_semantic/commands/_schema_resolve.py +125 -0
- maxcompute_semantic/commands/_source_picker.py +1013 -0
- maxcompute_semantic/commands/_sql_name.py +30 -0
- maxcompute_semantic/commands/_table_resolve.py +139 -0
- maxcompute_semantic/commands/build.py +372 -0
- maxcompute_semantic/commands/doctor.py +1176 -0
- maxcompute_semantic/commands/link.py +148 -0
- maxcompute_semantic/commands/memory.py +557 -0
- maxcompute_semantic/commands/meta.py +477 -0
- maxcompute_semantic/commands/metric.py +257 -0
- maxcompute_semantic/commands/package.py +301 -0
- maxcompute_semantic/commands/profile.py +2825 -0
- maxcompute_semantic/commands/profile_export.py +589 -0
- maxcompute_semantic/commands/profile_fork.py +751 -0
- maxcompute_semantic/commands/profile_history.py +759 -0
- maxcompute_semantic/commands/show.py +555 -0
- maxcompute_semantic/commands/skill.py +897 -0
- maxcompute_semantic/commands/skill_catalog.py +103 -0
- maxcompute_semantic/commands/sql.py +796 -0
- maxcompute_semantic/commands/sql_review/__init__.py +141 -0
- maxcompute_semantic/commands/sql_review/coverage.py +204 -0
- maxcompute_semantic/commands/sql_review/hints/__init__.py +38 -0
- maxcompute_semantic/commands/sql_review/hints/aggregation.py +134 -0
- maxcompute_semantic/commands/sql_review/hints/join_hints.py +346 -0
- maxcompute_semantic/commands/sql_review/hints/pattern.py +99 -0
- maxcompute_semantic/commands/sql_review/next_step.py +54 -0
- maxcompute_semantic/commands/sql_review/rules/__init__.py +54 -0
- maxcompute_semantic/commands/sql_review/rules/_common.py +207 -0
- maxcompute_semantic/commands/sql_review/rules/dialect.py +146 -0
- maxcompute_semantic/commands/sql_review/rules/projection.py +135 -0
- maxcompute_semantic/commands/sql_review/rules/schema.py +262 -0
- maxcompute_semantic/commands/sql_review/rules/tier.py +60 -0
- maxcompute_semantic/commands/sql_review/rules/type_check.py +122 -0
- maxcompute_semantic/commands/sql_review/types.py +108 -0
- maxcompute_semantic/commands/status.py +272 -0
- maxcompute_semantic/commands/udf.py +488 -0
- maxcompute_semantic/commands/update.py +619 -0
- maxcompute_semantic/errors/__init__.py +135 -0
- maxcompute_semantic/errors/annotate.py +58 -0
- maxcompute_semantic/errors/auth.py +144 -0
- maxcompute_semantic/errors/base.py +250 -0
- maxcompute_semantic/errors/build.py +95 -0
- maxcompute_semantic/errors/mc.py +333 -0
- maxcompute_semantic/errors/memory.py +22 -0
- maxcompute_semantic/errors/versioning.py +84 -0
- maxcompute_semantic/mc_client/__init__.py +4 -0
- maxcompute_semantic/mc_client/catalog.py +157 -0
- maxcompute_semantic/mc_client/client.py +1616 -0
- maxcompute_semantic/mc_client/cost_gate.py +218 -0
- maxcompute_semantic/mc_client/envelope.py +58 -0
- maxcompute_semantic/mc_client/errors.py +43 -0
- maxcompute_semantic/mc_client/hints.py +45 -0
- maxcompute_semantic/mc_client/sql_guard.py +120 -0
- maxcompute_semantic/mc_client/tier.py +260 -0
- maxcompute_semantic/memory/__init__.py +12 -0
- maxcompute_semantic/memory/embedding.py +125 -0
- maxcompute_semantic/memory/errors.py +14 -0
- maxcompute_semantic/memory/hybrid.py +112 -0
- maxcompute_semantic/memory/package_doc.py +101 -0
- maxcompute_semantic/memory/sample_sql.py +108 -0
- maxcompute_semantic/memory/search.py +72 -0
- maxcompute_semantic/memory/sql_pattern.py +299 -0
- maxcompute_semantic/memory/tokenizer.py +77 -0
- maxcompute_semantic/memory/vec_ext.py +149 -0
- maxcompute_semantic/metric_validator.py +170 -0
- maxcompute_semantic/osi/__init__.py +19 -0
- maxcompute_semantic/osi/export.py +226 -0
- maxcompute_semantic/osi/import_.py +19 -0
- maxcompute_semantic/osi/vocabulary.py +57 -0
- maxcompute_semantic/versioning/__init__.py +96 -0
- maxcompute_semantic/versioning/env.py +85 -0
- maxcompute_semantic/versioning/errors.py +20 -0
- maxcompute_semantic/versioning/forks.py +183 -0
- maxcompute_semantic/versioning/git_repo.py +494 -0
- maxcompute_semantic/versioning/gitignore_default.py +38 -0
- maxcompute_semantic/versioning/hook.py +450 -0
- maxcompute_semantic/versioning/lock.py +318 -0
- maxcompute_semantic/versioning/sql_dump.py +241 -0
- maxcompute_semantic-0.16.1.dist-info/METADATA +215 -0
- maxcompute_semantic-0.16.1.dist-info/RECORD +131 -0
- maxcompute_semantic-0.16.1.dist-info/WHEEL +4 -0
- maxcompute_semantic-0.16.1.dist-info/entry_points.txt +3 -0
- maxcompute_semantic-0.16.1.dist-info/licenses/LICENSE +190 -0
- maxcompute_semantic-0.16.1.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""maxcompute-semantic — semantic-layer-aware MaxCompute CLI."""
|
|
5
|
+
|
|
6
|
+
from importlib.metadata import PackageNotFoundError
|
|
7
|
+
from importlib.metadata import version as _pkg_version
|
|
8
|
+
|
|
9
|
+
# Pulled from the installed distribution so it always matches
|
|
10
|
+
# pyproject.toml's [project].version field. Fallback is for the
|
|
11
|
+
# rare case of running the source tree without an install.
|
|
12
|
+
try:
|
|
13
|
+
__version__ = _pkg_version("maxcompute-semantic")
|
|
14
|
+
except PackageNotFoundError:
|
|
15
|
+
__version__ = "0+unknown"
|
|
16
|
+
|
|
17
|
+
__all__ = ["__version__"]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Logging setup for mcs CLI: stderr handler, no duplicates, per-format levels."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import sys
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def setup_logging(
|
|
14
|
+
*,
|
|
15
|
+
debug: bool = False,
|
|
16
|
+
verbose: bool = False,
|
|
17
|
+
level: int | None = None,
|
|
18
|
+
format: str = "plain",
|
|
19
|
+
) -> None:
|
|
20
|
+
"""Configure the maxcompute_semantic + odps logger namespace.
|
|
21
|
+
|
|
22
|
+
- debug=True sets DEBUG on both namespaces.
|
|
23
|
+
- verbose=True sets INFO on both namespaces.
|
|
24
|
+
- Default level: WARNING for plain, ERROR for json (json mode only
|
|
25
|
+
emits errors to stdout).
|
|
26
|
+
- explicit level overrides debug/verbose/format defaults.
|
|
27
|
+
- All handlers stream to stderr.
|
|
28
|
+
- No duplicate handlers on repeated calls.
|
|
29
|
+
- In ``format="json"`` mode, also silences mcs ``UserWarning``s
|
|
30
|
+
(e.g. ``StaleLockClearedWarning``) so the stderr stream carries
|
|
31
|
+
only the error envelope. Explicit ``--debug`` / ``--verbose``
|
|
32
|
+
keeps them visible for diagnosis.
|
|
33
|
+
"""
|
|
34
|
+
if level is not None:
|
|
35
|
+
effective_level = level
|
|
36
|
+
elif debug:
|
|
37
|
+
effective_level = logging.DEBUG
|
|
38
|
+
elif verbose:
|
|
39
|
+
effective_level = logging.INFO
|
|
40
|
+
elif format == "json":
|
|
41
|
+
effective_level = logging.ERROR
|
|
42
|
+
else:
|
|
43
|
+
effective_level = logging.WARNING
|
|
44
|
+
|
|
45
|
+
_configure_namespace("maxcompute_semantic", effective_level)
|
|
46
|
+
_configure_namespace("odps", effective_level)
|
|
47
|
+
_configure_warnings(format=format, debug=debug, verbose=verbose)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _configure_warnings(*, format: str, debug: bool, verbose: bool) -> None:
|
|
51
|
+
"""Silence mcs recovery-class warnings in ``-f json`` mode.
|
|
52
|
+
|
|
53
|
+
The stderr stream in json mode is reserved for the error envelope
|
|
54
|
+
(``{"status":"error", ...}`` emitted by the CLI shield in
|
|
55
|
+
``cli.py``). Recovery-class warnings like
|
|
56
|
+
``StaleLockClearedWarning`` are non-actionable noise next to that
|
|
57
|
+
envelope and would break tools that scrape stderr for the JSON
|
|
58
|
+
payload. ``--debug`` / ``--verbose`` keep them visible for
|
|
59
|
+
diagnosis.
|
|
60
|
+
|
|
61
|
+
Imported lazily so this module has no dependency on
|
|
62
|
+
``versioning``.
|
|
63
|
+
"""
|
|
64
|
+
if format != "json" or debug or verbose:
|
|
65
|
+
return
|
|
66
|
+
from maxcompute_semantic.versioning.errors import StaleLockClearedWarning
|
|
67
|
+
|
|
68
|
+
warnings.filterwarnings("ignore", category=StaleLockClearedWarning)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _configure_namespace(name: str, level: int) -> None:
|
|
72
|
+
"""Set level and add a stderr StreamHandler if none exists."""
|
|
73
|
+
logger = logging.getLogger(name)
|
|
74
|
+
logger.setLevel(level)
|
|
75
|
+
|
|
76
|
+
# Only add a handler if one doesn't already exist (avoid duplicates
|
|
77
|
+
# on repeated calls to setup_logging)
|
|
78
|
+
for h in logger.handlers:
|
|
79
|
+
if isinstance(h, logging.StreamHandler) and h.stream is sys.stderr:
|
|
80
|
+
# Already have a stderr handler; just update its level
|
|
81
|
+
h.setLevel(level)
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
85
|
+
handler.setLevel(level)
|
|
86
|
+
handler.setFormatter(logging.Formatter("%(name)s: %(message)s"))
|
|
87
|
+
logger.addHandler(handler)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Renderer: CLI output adapter for plain (human) and envelope (json/yaml) modes."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from io import StringIO
|
|
11
|
+
from typing import Any, TextIO
|
|
12
|
+
|
|
13
|
+
from maxcompute_semantic.mc_client.envelope import Envelope
|
|
14
|
+
from maxcompute_semantic.mc_client.errors import McsError
|
|
15
|
+
|
|
16
|
+
_ENVELOPE_FORMATS: frozenset[str] = frozenset({"json", "yaml"})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _serialize_envelope(envelope_dict: dict[str, Any], format: str) -> str:
|
|
20
|
+
"""Serialize the envelope dict as JSON or YAML.
|
|
21
|
+
|
|
22
|
+
YAML serializer is ruamel's safe form so envelope output isn't
|
|
23
|
+
contaminated by the round-trip-mode comment preservation
|
|
24
|
+
machinery the config-write path uses.
|
|
25
|
+
"""
|
|
26
|
+
if format == "yaml":
|
|
27
|
+
from ruamel.yaml import YAML
|
|
28
|
+
|
|
29
|
+
yaml = YAML(typ="safe")
|
|
30
|
+
yaml.default_flow_style = False
|
|
31
|
+
yaml.allow_unicode = True
|
|
32
|
+
buf = StringIO()
|
|
33
|
+
yaml.dump(envelope_dict, buf)
|
|
34
|
+
return buf.getvalue()
|
|
35
|
+
return json.dumps(envelope_dict, ensure_ascii=False) + "\n"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Renderer:
|
|
39
|
+
"""Format-aware output: success/error/table for plain or envelope mode.
|
|
40
|
+
|
|
41
|
+
``format`` is one of ``plain`` / ``json`` / ``yaml``. ``json`` and
|
|
42
|
+
``yaml`` both emit the same :class:`Envelope` shape (``status`` /
|
|
43
|
+
``data`` / ``error``), differing only in serializer — callers that
|
|
44
|
+
branch on "envelope vs prose" should use :attr:`is_envelope`
|
|
45
|
+
rather than comparing against ``"json"`` directly.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
format: str = "plain",
|
|
51
|
+
quiet: bool = False,
|
|
52
|
+
stdout: TextIO | None = None,
|
|
53
|
+
stderr: TextIO | None = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
self.format = format
|
|
56
|
+
self.quiet = quiet
|
|
57
|
+
self._stdout = stdout or sys.stdout
|
|
58
|
+
self._stderr = stderr or sys.stderr
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def is_envelope(self) -> bool:
|
|
62
|
+
"""True for envelope-emitting formats (``json`` / ``yaml``).
|
|
63
|
+
|
|
64
|
+
Use this everywhere a call site branches "structured envelope
|
|
65
|
+
on stdout" vs "human prose" — keeps yaml symmetric with json
|
|
66
|
+
without each branch having to enumerate both names.
|
|
67
|
+
"""
|
|
68
|
+
return self.format in _ENVELOPE_FORMATS
|
|
69
|
+
|
|
70
|
+
def quiet_essential(self, data: dict[str, Any], key: str) -> None:
|
|
71
|
+
"""In quiet+plain mode, print just the essential identifier (one line).
|
|
72
|
+
|
|
73
|
+
When format=json/yaml, quiet is ignored per spec (envelope is always emitted).
|
|
74
|
+
When quiet=False, do nothing (regular success output handles it).
|
|
75
|
+
"""
|
|
76
|
+
if self.is_envelope:
|
|
77
|
+
# Envelope modes always emit via success(); quiet is irrelevant.
|
|
78
|
+
return
|
|
79
|
+
if not self.quiet:
|
|
80
|
+
# Non-quiet plain mode: regular success output handles it.
|
|
81
|
+
return
|
|
82
|
+
value = data.get(key)
|
|
83
|
+
if value is None:
|
|
84
|
+
return
|
|
85
|
+
self._stdout.write(str(value) + "\n")
|
|
86
|
+
|
|
87
|
+
def success(self, data: dict[str, Any]) -> None:
|
|
88
|
+
"""Emit a success payload."""
|
|
89
|
+
if self.is_envelope:
|
|
90
|
+
env = Envelope.success(data)
|
|
91
|
+
self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
|
|
92
|
+
elif self.quiet:
|
|
93
|
+
# quiet mode: no output on success
|
|
94
|
+
pass
|
|
95
|
+
else:
|
|
96
|
+
# Plain: simple key=value lines
|
|
97
|
+
for key, value in data.items():
|
|
98
|
+
self._stdout.write(f"{key}: {value}\n")
|
|
99
|
+
|
|
100
|
+
def error(self, err: McsError) -> None:
|
|
101
|
+
"""Emit an error payload. Plain goes to stderr; envelope modes go to stdout."""
|
|
102
|
+
if self.is_envelope:
|
|
103
|
+
env = Envelope.from_error(err)
|
|
104
|
+
self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
|
|
105
|
+
else:
|
|
106
|
+
self._stderr.write(f"Error: {err.message}\n")
|
|
107
|
+
if err.remediation:
|
|
108
|
+
self._stderr.write(f" Suggestion: {err.remediation}\n")
|
|
109
|
+
|
|
110
|
+
def table(self, headers: list[str], rows: list[list[str]]) -> None:
|
|
111
|
+
"""Emit a table. Plain uses columnar formatting; envelope modes use the envelope."""
|
|
112
|
+
if self.is_envelope:
|
|
113
|
+
env = Envelope.success({"headers": headers, "rows": rows})
|
|
114
|
+
self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
|
|
115
|
+
elif self.quiet:
|
|
116
|
+
pass
|
|
117
|
+
else:
|
|
118
|
+
# Compute column widths
|
|
119
|
+
col_widths = [len(h) for h in headers]
|
|
120
|
+
for row in rows:
|
|
121
|
+
for i, cell in enumerate(row):
|
|
122
|
+
col_widths[i] = max(col_widths[i], len(cell))
|
|
123
|
+
|
|
124
|
+
# Header row
|
|
125
|
+
header_line = " ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
|
|
126
|
+
self._stdout.write(header_line + "\n")
|
|
127
|
+
|
|
128
|
+
# Separator
|
|
129
|
+
sep_line = " ".join("-" * col_widths[i] for i in range(len(headers)))
|
|
130
|
+
self._stdout.write(sep_line + "\n")
|
|
131
|
+
|
|
132
|
+
# Data rows
|
|
133
|
+
for row in rows:
|
|
134
|
+
data_line = " ".join(cell.ljust(col_widths[i]) for i, cell in enumerate(row))
|
|
135
|
+
self._stdout.write(data_line + "\n")
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""XDG-compliant path helpers for mcs config + data + cache directories.
|
|
5
|
+
|
|
6
|
+
Config root is ``~/.config/maxcompute-semantic/`` (override via
|
|
7
|
+
``MCS_CONFIG_DIR``). Data root is ``~/.local/share/maxcompute-semantic/``
|
|
8
|
+
on Unix / ``~/Library/Application Support/maxcompute-semantic/`` on macOS
|
|
9
|
+
(override via ``MCS_DATA_DIR`` or ``XDG_DATA_HOME``). Config and data
|
|
10
|
+
live in separate XDG-standard directories.
|
|
11
|
+
|
|
12
|
+
Per-profile data lives at ``data_root()/<profile_name>/`` by default.
|
|
13
|
+
A profile may override its data dir explicitly via ``Profile.package_path``
|
|
14
|
+
(useful for imported packages, NFS-mounted shared dirs, or eval fixtures
|
|
15
|
+
checked into git). When ``package_path`` is set, it takes precedence over
|
|
16
|
+
the default; otherwise ``profile_data_dir(profile)`` falls back to the
|
|
17
|
+
per-name slot under ``data_root()``.
|
|
18
|
+
|
|
19
|
+
Historical note: the data root used to be called ``profiles_root()`` and
|
|
20
|
+
sat at ``<data>/profiles/``. The 2026-05-14 vocab cleanup renamed it to
|
|
21
|
+
``data_root()`` at ``<data>/data/`` because "profiles" was a misleading
|
|
22
|
+
label — that directory only ever held *data*, while profile *config*
|
|
23
|
+
lives at ``config_dir()/profiles.yaml``.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import TYPE_CHECKING
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from maxcompute_semantic.auth.schema import Profile
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _xdg_cache_home() -> Path:
|
|
38
|
+
"""Return the platform-appropriate XDG cache home directory.
|
|
39
|
+
|
|
40
|
+
Mirrors ``_xdg_data_home`` for the cache spec:
|
|
41
|
+
- ``XDG_CACHE_HOME`` env var wins.
|
|
42
|
+
- macOS: ``~/Library/Caches``.
|
|
43
|
+
- Linux / other Unix: ``~/.cache``.
|
|
44
|
+
- Windows: ``%LOCALAPPDATA%/Cache`` (LOCALAPPDATA is the standard
|
|
45
|
+
per-user non-roaming dir on Windows; we suffix ``Cache`` for the
|
|
46
|
+
same reason the XDG spec puts the cache under a dedicated subdir).
|
|
47
|
+
"""
|
|
48
|
+
xdg = os.environ.get("XDG_CACHE_HOME")
|
|
49
|
+
if xdg:
|
|
50
|
+
return Path(xdg)
|
|
51
|
+
if sys.platform == "darwin":
|
|
52
|
+
return Path.home() / "Library" / "Caches"
|
|
53
|
+
if sys.platform == "win32":
|
|
54
|
+
local = os.environ.get("LOCALAPPDATA")
|
|
55
|
+
base = Path(local) if local else Path.home() / "AppData" / "Local"
|
|
56
|
+
return base / "Cache"
|
|
57
|
+
return Path.home() / ".cache"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _xdg_data_home() -> Path:
|
|
61
|
+
"""Return the platform-appropriate XDG data home directory.
|
|
62
|
+
|
|
63
|
+
- If ``XDG_DATA_HOME`` is set, use that.
|
|
64
|
+
- On macOS: ``~/Library/Application Support``
|
|
65
|
+
- On other Unix: ``~/.local/share``
|
|
66
|
+
"""
|
|
67
|
+
xdg = os.environ.get("XDG_DATA_HOME")
|
|
68
|
+
if xdg:
|
|
69
|
+
return Path(xdg)
|
|
70
|
+
if sys.platform == "darwin":
|
|
71
|
+
return Path.home() / "Library" / "Application Support"
|
|
72
|
+
return Path.home() / ".local" / "share"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def cache_dir() -> Path:
|
|
76
|
+
"""Return the cache dir, ``<xdg_cache_home>/maxcompute-semantic`` or
|
|
77
|
+
``MCS_CACHE_DIR`` override.
|
|
78
|
+
|
|
79
|
+
Used by ``_internal/update_check.py`` to store the per-user
|
|
80
|
+
``update_check.json`` cache (last-checked timestamp, last-seen
|
|
81
|
+
``latest_version``, fetch error). The directory is created lazily on
|
|
82
|
+
the first write — readers tolerate its absence by returning ``None``
|
|
83
|
+
from ``read_cache``. Symmetric with ``data_root()``'s override
|
|
84
|
+
rules: ``MCS_CACHE_DIR`` is a no-suffix absolute path,
|
|
85
|
+
``XDG_CACHE_HOME`` gets the ``maxcompute-semantic`` suffix
|
|
86
|
+
appended.
|
|
87
|
+
"""
|
|
88
|
+
override = os.environ.get("MCS_CACHE_DIR")
|
|
89
|
+
if override:
|
|
90
|
+
return Path(override)
|
|
91
|
+
return _xdg_cache_home() / "maxcompute-semantic"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def config_dir() -> Path:
|
|
95
|
+
"""Return ~/.config/maxcompute-semantic or override via MCS_CONFIG_DIR."""
|
|
96
|
+
override = os.environ.get("MCS_CONFIG_DIR")
|
|
97
|
+
if override:
|
|
98
|
+
return Path(override)
|
|
99
|
+
xdg = os.environ.get("XDG_CONFIG_HOME") or str(Path.home() / ".config")
|
|
100
|
+
return Path(xdg) / "maxcompute-semantic"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def data_dir() -> Path:
|
|
104
|
+
"""Return XDG data dir / maxcompute-semantic or override via MCS_DATA_DIR.
|
|
105
|
+
|
|
106
|
+
Resolution order:
|
|
107
|
+
1. ``MCS_DATA_DIR`` env var — absolute path, no suffix added.
|
|
108
|
+
2. ``XDG_DATA_HOME`` env var — suffixed with ``maxcompute-semantic``.
|
|
109
|
+
3. Platform default — macOS uses ``~/Library/Application Support``,
|
|
110
|
+
other Unix uses ``~/.local/share``, suffixed with ``maxcompute-semantic``.
|
|
111
|
+
"""
|
|
112
|
+
override = os.environ.get("MCS_DATA_DIR")
|
|
113
|
+
if override:
|
|
114
|
+
return Path(override)
|
|
115
|
+
return _xdg_data_home() / "maxcompute-semantic"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def data_root() -> Path:
|
|
119
|
+
"""Return ``data_dir()/data`` or override via ``MCS_PROFILES_DIR``.
|
|
120
|
+
|
|
121
|
+
The env var name (``MCS_PROFILES_DIR``) is kept for backwards
|
|
122
|
+
compatibility with existing user configs and CI yamls; semantically
|
|
123
|
+
it points at the per-profile-data root.
|
|
124
|
+
"""
|
|
125
|
+
override = os.environ.get("MCS_PROFILES_DIR")
|
|
126
|
+
if override:
|
|
127
|
+
return Path(override)
|
|
128
|
+
return data_dir() / "data"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def profile_data_dir(profile: Profile | str) -> Path:
|
|
132
|
+
"""Return the per-profile data directory.
|
|
133
|
+
|
|
134
|
+
Accepts either a ``Profile`` object (preferred — honors
|
|
135
|
+
``profile.package_path`` if set) or a bare profile name (falls back
|
|
136
|
+
to the default per-name slot). The string form is for call sites
|
|
137
|
+
that only have a name and is equivalent to a ``Profile`` with
|
|
138
|
+
``package_path=None``.
|
|
139
|
+
"""
|
|
140
|
+
# Avoid circular import: Profile lives in auth.schema which imports
|
|
141
|
+
# from this module via the validation path. Local import only.
|
|
142
|
+
from maxcompute_semantic.auth.schema import Profile as _Profile
|
|
143
|
+
|
|
144
|
+
if isinstance(profile, _Profile):
|
|
145
|
+
if profile.package_path is not None:
|
|
146
|
+
return Path(profile.package_path)
|
|
147
|
+
return data_root() / profile.name
|
|
148
|
+
# str path — pure name, no package_path override possible.
|
|
149
|
+
return data_root() / profile
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def profiles_yaml_path() -> Path:
|
|
153
|
+
return config_dir() / "profiles.yaml"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def link_json_path() -> Path:
|
|
157
|
+
return config_dir() / "link.json"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def tier_cache_path(profile: Profile | str, project: str) -> Path:
|
|
161
|
+
"""Return the on-disk one-character sentinel for the cached tier of
|
|
162
|
+
a specific MaxCompute project under this profile's data directory.
|
|
163
|
+
|
|
164
|
+
The cache file lives at
|
|
165
|
+
``<profile_data_dir(profile)>/tier_cache/<project>`` with a single
|
|
166
|
+
character of content — ``"2"`` for a 2-level (no-schema)
|
|
167
|
+
MaxCompute project, ``"3"`` for a 3-level (schema-enabled) one.
|
|
168
|
+
The per-project key in the second path segment exists because a
|
|
169
|
+
multi-source profile spans potentially many MaxCompute projects:
|
|
170
|
+
the AK's home project (``Profile.compute_project``) plus the
|
|
171
|
+
data-side projects each ``DataSource`` in ``Profile.sources``
|
|
172
|
+
declares. Each project's tier is independent state — a single
|
|
173
|
+
profile may straddle a 3-level compute project and a mix of
|
|
174
|
+
2-level and 3-level data sources.
|
|
175
|
+
|
|
176
|
+
The ``profile_data_dir(profile)`` resolution honors
|
|
177
|
+
``Profile.package_path`` when the dataclass form is passed (the
|
|
178
|
+
NFS-mount or imported-package override case), so a profile whose
|
|
179
|
+
package lives off-disk has its tier_cache subdirectory on the same
|
|
180
|
+
off-disk root. The bare-name form ``tier_cache_path("name",
|
|
181
|
+
"proj")`` is the post-``mcs profile remove`` cleanup path's
|
|
182
|
+
convenience — the cache file is deleted alongside the rest of
|
|
183
|
+
the per-profile data dir, so the helper just produces the path
|
|
184
|
+
string without any reading of the (now-gone) profile config.
|
|
185
|
+
|
|
186
|
+
The "tier_cache" subdirectory's parent-mkdir is the writer's
|
|
187
|
+
responsibility (``get_tier`` calls
|
|
188
|
+
``cache_path.parent.mkdir(parents=True, exist_ok=True)`` before
|
|
189
|
+
the write). The reader path tolerates a missing file (the cache
|
|
190
|
+
is a hint, not authoritative; the live probe is the
|
|
191
|
+
source-of-truth).
|
|
192
|
+
|
|
193
|
+
See spec §3 "Vocabulary" entry for "tier" for the conceptual
|
|
194
|
+
summary of the 2-level vs 3-level distinction and the
|
|
195
|
+
relationship to MaxCompute's ``odps.namespace.schema`` SQL hint.
|
|
196
|
+
"""
|
|
197
|
+
if not isinstance(project, str) or not project.strip():
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"tier_cache_path requires a non-empty MaxCompute project name "
|
|
200
|
+
f"as the second argument (got {project!r}); the cache is keyed "
|
|
201
|
+
f"per-(profile, project) — caller must pass an explicit project "
|
|
202
|
+
f"name from either profile.compute_project (the AK's home project) "
|
|
203
|
+
f"or one of profile.sources[i].project (a declared data source)."
|
|
204
|
+
)
|
|
205
|
+
return profile_data_dir(profile) / "tier_cache" / project
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def profile_git_dir(profile: Profile | str) -> Path:
|
|
209
|
+
"""Return ``<profile_data_dir>/.git`` — the per-profile git
|
|
210
|
+
repository's admin directory. Pure path math, no I/O.
|
|
211
|
+
|
|
212
|
+
Used by ``versioning/git_repo.py`` to scope all ``git`` subprocess
|
|
213
|
+
invocations to the per-profile repo via ``git -C <repo_root>``
|
|
214
|
+
(where ``repo_root`` is the *parent* of this directory — git's
|
|
215
|
+
own ``-C`` convention is the working directory of the repo, not
|
|
216
|
+
the ``.git`` administrative dir). The function is named after the
|
|
217
|
+
admin dir because the existence check ``profile_git_dir(p).exists()``
|
|
218
|
+
is the canonical "is this profile versioned?" probe used by the
|
|
219
|
+
hook's legacy-profile branch and by ``mcs doctor``'s
|
|
220
|
+
``_check_profile_versioned``.
|
|
221
|
+
"""
|
|
222
|
+
return profile_data_dir(profile) / ".git"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def profile_gitignore_path(profile: Profile | str) -> Path:
|
|
226
|
+
"""Return ``<profile_data_dir>/.gitignore``. Committed; contents
|
|
227
|
+
are the constant ``PROFILE_GITIGNORE`` defined in
|
|
228
|
+
``versioning/gitignore_default.py``."""
|
|
229
|
+
return profile_data_dir(profile) / ".gitignore"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def profile_package_sql_path(profile: Profile | str) -> Path:
|
|
233
|
+
"""Return ``<profile_data_dir>/package.sql`` — the textual dump of
|
|
234
|
+
the committed tables of ``package.db``, produced and consumed by
|
|
235
|
+
``versioning/sql_dump.py``. Committed; the binary ``package.db``
|
|
236
|
+
sibling is *not* committed (it appears in the ``.gitignore``)."""
|
|
237
|
+
return profile_data_dir(profile) / "package.sql"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def profile_lock_path(profile: Profile | str) -> Path:
|
|
241
|
+
"""Return ``<profile_data_dir>/.mcs-lock`` — the fcntl lock anchor
|
|
242
|
+
file. The body of the file is the PID of the current holder. The
|
|
243
|
+
file appears in ``.gitignore`` so it never gets committed."""
|
|
244
|
+
return profile_data_dir(profile) / ".mcs-lock"
|