maxcompute-semantic 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. maxcompute_semantic/__init__.py +17 -0
  2. maxcompute_semantic/_internal/__init__.py +4 -0
  3. maxcompute_semantic/_internal/logging_setup.py +87 -0
  4. maxcompute_semantic/_internal/output.py +135 -0
  5. maxcompute_semantic/_internal/paths.py +244 -0
  6. maxcompute_semantic/_internal/update_check.py +1022 -0
  7. maxcompute_semantic/_internal/yaml_io.py +75 -0
  8. maxcompute_semantic/_lib/__init__.py +4 -0
  9. maxcompute_semantic/_lib/acl_filter.py +56 -0
  10. maxcompute_semantic/_lib/schema_hash.py +26 -0
  11. maxcompute_semantic/_lib/status.py +47 -0
  12. maxcompute_semantic/_lib/yaml.py +237 -0
  13. maxcompute_semantic/_skill/SKILL.md +41 -0
  14. maxcompute_semantic/_skill_data/enrich/SKILL.md +63 -0
  15. maxcompute_semantic/_skill_data/enrich/references/enrich.md +106 -0
  16. maxcompute_semantic/_skill_data/memory/SKILL.md +27 -0
  17. maxcompute_semantic/_skill_data/memory/references/memory.md +111 -0
  18. maxcompute_semantic/_skill_data/onboarding/SKILL.md +34 -0
  19. maxcompute_semantic/_skill_data/onboarding/references/profile-editor.md +208 -0
  20. maxcompute_semantic/_skill_data/onboarding/references/profile-history.md +86 -0
  21. maxcompute_semantic/_skill_data/query/SKILL.md +161 -0
  22. maxcompute_semantic/_skill_data/query/references/cold-start.md +96 -0
  23. maxcompute_semantic/_skill_data/query/references/from-table.md +64 -0
  24. maxcompute_semantic/_skill_data/query/references/metrics.md +107 -0
  25. maxcompute_semantic/_skill_data/query/references/projection.md +114 -0
  26. maxcompute_semantic/_skill_data/query/references/query.md +307 -0
  27. maxcompute_semantic/_skill_data/query/references/rules.md +89 -0
  28. maxcompute_semantic/_skill_data/query/references/sql.md +160 -0
  29. maxcompute_semantic/_skill_data/query/references/value-discovery.md +89 -0
  30. maxcompute_semantic/_skill_data/udf/SKILL.md +31 -0
  31. maxcompute_semantic/_skill_data/udf/references/udf.md +60 -0
  32. maxcompute_semantic/auth/__init__.py +4 -0
  33. maxcompute_semantic/auth/context.py +269 -0
  34. maxcompute_semantic/auth/credential.py +194 -0
  35. maxcompute_semantic/auth/env_expand.py +31 -0
  36. maxcompute_semantic/auth/errors.py +29 -0
  37. maxcompute_semantic/auth/link_store.py +106 -0
  38. maxcompute_semantic/auth/profile_store.py +393 -0
  39. maxcompute_semantic/auth/resolver.py +178 -0
  40. maxcompute_semantic/auth/schema.py +758 -0
  41. maxcompute_semantic/cli.py +357 -0
  42. maxcompute_semantic/commands/__init__.py +4 -0
  43. maxcompute_semantic/commands/_auth_probe.py +171 -0
  44. maxcompute_semantic/commands/_identity.py +215 -0
  45. maxcompute_semantic/commands/_import_creds.py +423 -0
  46. maxcompute_semantic/commands/_profile_command.py +161 -0
  47. maxcompute_semantic/commands/_profile_editor.py +997 -0
  48. maxcompute_semantic/commands/_schema_resolve.py +125 -0
  49. maxcompute_semantic/commands/_source_picker.py +1013 -0
  50. maxcompute_semantic/commands/_sql_name.py +30 -0
  51. maxcompute_semantic/commands/_table_resolve.py +139 -0
  52. maxcompute_semantic/commands/build.py +372 -0
  53. maxcompute_semantic/commands/doctor.py +1176 -0
  54. maxcompute_semantic/commands/link.py +148 -0
  55. maxcompute_semantic/commands/memory.py +557 -0
  56. maxcompute_semantic/commands/meta.py +477 -0
  57. maxcompute_semantic/commands/metric.py +257 -0
  58. maxcompute_semantic/commands/package.py +301 -0
  59. maxcompute_semantic/commands/profile.py +2825 -0
  60. maxcompute_semantic/commands/profile_export.py +589 -0
  61. maxcompute_semantic/commands/profile_fork.py +751 -0
  62. maxcompute_semantic/commands/profile_history.py +759 -0
  63. maxcompute_semantic/commands/show.py +555 -0
  64. maxcompute_semantic/commands/skill.py +897 -0
  65. maxcompute_semantic/commands/skill_catalog.py +103 -0
  66. maxcompute_semantic/commands/sql.py +796 -0
  67. maxcompute_semantic/commands/sql_review/__init__.py +141 -0
  68. maxcompute_semantic/commands/sql_review/coverage.py +204 -0
  69. maxcompute_semantic/commands/sql_review/hints/__init__.py +38 -0
  70. maxcompute_semantic/commands/sql_review/hints/aggregation.py +134 -0
  71. maxcompute_semantic/commands/sql_review/hints/join_hints.py +346 -0
  72. maxcompute_semantic/commands/sql_review/hints/pattern.py +99 -0
  73. maxcompute_semantic/commands/sql_review/next_step.py +54 -0
  74. maxcompute_semantic/commands/sql_review/rules/__init__.py +54 -0
  75. maxcompute_semantic/commands/sql_review/rules/_common.py +207 -0
  76. maxcompute_semantic/commands/sql_review/rules/dialect.py +146 -0
  77. maxcompute_semantic/commands/sql_review/rules/projection.py +135 -0
  78. maxcompute_semantic/commands/sql_review/rules/schema.py +262 -0
  79. maxcompute_semantic/commands/sql_review/rules/tier.py +60 -0
  80. maxcompute_semantic/commands/sql_review/rules/type_check.py +122 -0
  81. maxcompute_semantic/commands/sql_review/types.py +108 -0
  82. maxcompute_semantic/commands/status.py +272 -0
  83. maxcompute_semantic/commands/udf.py +488 -0
  84. maxcompute_semantic/commands/update.py +619 -0
  85. maxcompute_semantic/errors/__init__.py +135 -0
  86. maxcompute_semantic/errors/annotate.py +58 -0
  87. maxcompute_semantic/errors/auth.py +144 -0
  88. maxcompute_semantic/errors/base.py +250 -0
  89. maxcompute_semantic/errors/build.py +95 -0
  90. maxcompute_semantic/errors/mc.py +333 -0
  91. maxcompute_semantic/errors/memory.py +22 -0
  92. maxcompute_semantic/errors/versioning.py +84 -0
  93. maxcompute_semantic/mc_client/__init__.py +4 -0
  94. maxcompute_semantic/mc_client/catalog.py +157 -0
  95. maxcompute_semantic/mc_client/client.py +1616 -0
  96. maxcompute_semantic/mc_client/cost_gate.py +218 -0
  97. maxcompute_semantic/mc_client/envelope.py +58 -0
  98. maxcompute_semantic/mc_client/errors.py +43 -0
  99. maxcompute_semantic/mc_client/hints.py +45 -0
  100. maxcompute_semantic/mc_client/sql_guard.py +120 -0
  101. maxcompute_semantic/mc_client/tier.py +260 -0
  102. maxcompute_semantic/memory/__init__.py +12 -0
  103. maxcompute_semantic/memory/embedding.py +125 -0
  104. maxcompute_semantic/memory/errors.py +14 -0
  105. maxcompute_semantic/memory/hybrid.py +112 -0
  106. maxcompute_semantic/memory/package_doc.py +101 -0
  107. maxcompute_semantic/memory/sample_sql.py +108 -0
  108. maxcompute_semantic/memory/search.py +72 -0
  109. maxcompute_semantic/memory/sql_pattern.py +299 -0
  110. maxcompute_semantic/memory/tokenizer.py +77 -0
  111. maxcompute_semantic/memory/vec_ext.py +149 -0
  112. maxcompute_semantic/metric_validator.py +170 -0
  113. maxcompute_semantic/osi/__init__.py +19 -0
  114. maxcompute_semantic/osi/export.py +226 -0
  115. maxcompute_semantic/osi/import_.py +19 -0
  116. maxcompute_semantic/osi/vocabulary.py +57 -0
  117. maxcompute_semantic/versioning/__init__.py +96 -0
  118. maxcompute_semantic/versioning/env.py +85 -0
  119. maxcompute_semantic/versioning/errors.py +20 -0
  120. maxcompute_semantic/versioning/forks.py +183 -0
  121. maxcompute_semantic/versioning/git_repo.py +494 -0
  122. maxcompute_semantic/versioning/gitignore_default.py +38 -0
  123. maxcompute_semantic/versioning/hook.py +450 -0
  124. maxcompute_semantic/versioning/lock.py +318 -0
  125. maxcompute_semantic/versioning/sql_dump.py +241 -0
  126. maxcompute_semantic-0.16.1.dist-info/METADATA +215 -0
  127. maxcompute_semantic-0.16.1.dist-info/RECORD +131 -0
  128. maxcompute_semantic-0.16.1.dist-info/WHEEL +4 -0
  129. maxcompute_semantic-0.16.1.dist-info/entry_points.txt +3 -0
  130. maxcompute_semantic-0.16.1.dist-info/licenses/LICENSE +190 -0
  131. maxcompute_semantic-0.16.1.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,17 @@
1
+ # Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """maxcompute-semantic — semantic-layer-aware MaxCompute CLI."""
5
+
6
+ from importlib.metadata import PackageNotFoundError
7
+ from importlib.metadata import version as _pkg_version
8
+
9
+ # Pulled from the installed distribution so it always matches
10
+ # pyproject.toml's [project].version field. Fallback is for the
11
+ # rare case of running the source tree without an install.
12
+ try:
13
+ __version__ = _pkg_version("maxcompute-semantic")
14
+ except PackageNotFoundError:
15
+ __version__ = "0+unknown"
16
+
17
+ __all__ = ["__version__"]
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Internal helpers (paths, YAML IO, rendering, logging)."""
@@ -0,0 +1,87 @@
1
+ # Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Logging setup for mcs CLI: stderr handler, no duplicates, per-format levels."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import sys
10
+ import warnings
11
+
12
+
13
+ def setup_logging(
14
+ *,
15
+ debug: bool = False,
16
+ verbose: bool = False,
17
+ level: int | None = None,
18
+ format: str = "plain",
19
+ ) -> None:
20
+ """Configure the maxcompute_semantic + odps logger namespace.
21
+
22
+ - debug=True sets DEBUG on both namespaces.
23
+ - verbose=True sets INFO on both namespaces.
24
+ - Default level: WARNING for plain, ERROR for json (json mode only
25
+ emits errors to stdout).
26
+ - explicit level overrides debug/verbose/format defaults.
27
+ - All handlers stream to stderr.
28
+ - No duplicate handlers on repeated calls.
29
+ - In ``format="json"`` mode, also silences mcs ``UserWarning``s
30
+ (e.g. ``StaleLockClearedWarning``) so the stderr stream carries
31
+ only the error envelope. Explicit ``--debug`` / ``--verbose``
32
+ keeps them visible for diagnosis.
33
+ """
34
+ if level is not None:
35
+ effective_level = level
36
+ elif debug:
37
+ effective_level = logging.DEBUG
38
+ elif verbose:
39
+ effective_level = logging.INFO
40
+ elif format == "json":
41
+ effective_level = logging.ERROR
42
+ else:
43
+ effective_level = logging.WARNING
44
+
45
+ _configure_namespace("maxcompute_semantic", effective_level)
46
+ _configure_namespace("odps", effective_level)
47
+ _configure_warnings(format=format, debug=debug, verbose=verbose)
48
+
49
+
50
+ def _configure_warnings(*, format: str, debug: bool, verbose: bool) -> None:
51
+ """Silence mcs recovery-class warnings in ``-f json`` mode.
52
+
53
+ The stderr stream in json mode is reserved for the error envelope
54
+ (``{"status":"error", ...}`` emitted by the CLI shield in
55
+ ``cli.py``). Recovery-class warnings like
56
+ ``StaleLockClearedWarning`` are non-actionable noise next to that
57
+ envelope and would break tools that scrape stderr for the JSON
58
+ payload. ``--debug`` / ``--verbose`` keep them visible for
59
+ diagnosis.
60
+
61
+ Imported lazily so this module has no dependency on
62
+ ``versioning``.
63
+ """
64
+ if format != "json" or debug or verbose:
65
+ return
66
+ from maxcompute_semantic.versioning.errors import StaleLockClearedWarning
67
+
68
+ warnings.filterwarnings("ignore", category=StaleLockClearedWarning)
69
+
70
+
71
+ def _configure_namespace(name: str, level: int) -> None:
72
+ """Set level and add a stderr StreamHandler if none exists."""
73
+ logger = logging.getLogger(name)
74
+ logger.setLevel(level)
75
+
76
+ # Only add a handler if one doesn't already exist (avoid duplicates
77
+ # on repeated calls to setup_logging)
78
+ for h in logger.handlers:
79
+ if isinstance(h, logging.StreamHandler) and h.stream is sys.stderr:
80
+ # Already have a stderr handler; just update its level
81
+ h.setLevel(level)
82
+ return
83
+
84
+ handler = logging.StreamHandler(sys.stderr)
85
+ handler.setLevel(level)
86
+ handler.setFormatter(logging.Formatter("%(name)s: %(message)s"))
87
+ logger.addHandler(handler)
@@ -0,0 +1,135 @@
1
+ # Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Renderer: CLI output adapter for plain (human) and envelope (json/yaml) modes."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import sys
10
+ from io import StringIO
11
+ from typing import Any, TextIO
12
+
13
+ from maxcompute_semantic.mc_client.envelope import Envelope
14
+ from maxcompute_semantic.mc_client.errors import McsError
15
+
16
+ _ENVELOPE_FORMATS: frozenset[str] = frozenset({"json", "yaml"})
17
+
18
+
19
+ def _serialize_envelope(envelope_dict: dict[str, Any], format: str) -> str:
20
+ """Serialize the envelope dict as JSON or YAML.
21
+
22
+ YAML serializer is ruamel's safe form so envelope output isn't
23
+ contaminated by the round-trip-mode comment preservation
24
+ machinery the config-write path uses.
25
+ """
26
+ if format == "yaml":
27
+ from ruamel.yaml import YAML
28
+
29
+ yaml = YAML(typ="safe")
30
+ yaml.default_flow_style = False
31
+ yaml.allow_unicode = True
32
+ buf = StringIO()
33
+ yaml.dump(envelope_dict, buf)
34
+ return buf.getvalue()
35
+ return json.dumps(envelope_dict, ensure_ascii=False) + "\n"
36
+
37
+
38
+ class Renderer:
39
+ """Format-aware output: success/error/table for plain or envelope mode.
40
+
41
+ ``format`` is one of ``plain`` / ``json`` / ``yaml``. ``json`` and
42
+ ``yaml`` both emit the same :class:`Envelope` shape (``status`` /
43
+ ``data`` / ``error``), differing only in serializer — callers that
44
+ branch on "envelope vs prose" should use :attr:`is_envelope`
45
+ rather than comparing against ``"json"`` directly.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ format: str = "plain",
51
+ quiet: bool = False,
52
+ stdout: TextIO | None = None,
53
+ stderr: TextIO | None = None,
54
+ ) -> None:
55
+ self.format = format
56
+ self.quiet = quiet
57
+ self._stdout = stdout or sys.stdout
58
+ self._stderr = stderr or sys.stderr
59
+
60
+ @property
61
+ def is_envelope(self) -> bool:
62
+ """True for envelope-emitting formats (``json`` / ``yaml``).
63
+
64
+ Use this everywhere a call site branches "structured envelope
65
+ on stdout" vs "human prose" — keeps yaml symmetric with json
66
+ without each branch having to enumerate both names.
67
+ """
68
+ return self.format in _ENVELOPE_FORMATS
69
+
70
+ def quiet_essential(self, data: dict[str, Any], key: str) -> None:
71
+ """In quiet+plain mode, print just the essential identifier (one line).
72
+
73
+ When format=json/yaml, quiet is ignored per spec (envelope is always emitted).
74
+ When quiet=False, do nothing (regular success output handles it).
75
+ """
76
+ if self.is_envelope:
77
+ # Envelope modes always emit via success(); quiet is irrelevant.
78
+ return
79
+ if not self.quiet:
80
+ # Non-quiet plain mode: regular success output handles it.
81
+ return
82
+ value = data.get(key)
83
+ if value is None:
84
+ return
85
+ self._stdout.write(str(value) + "\n")
86
+
87
+ def success(self, data: dict[str, Any]) -> None:
88
+ """Emit a success payload."""
89
+ if self.is_envelope:
90
+ env = Envelope.success(data)
91
+ self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
92
+ elif self.quiet:
93
+ # quiet mode: no output on success
94
+ pass
95
+ else:
96
+ # Plain: simple key=value lines
97
+ for key, value in data.items():
98
+ self._stdout.write(f"{key}: {value}\n")
99
+
100
+ def error(self, err: McsError) -> None:
101
+ """Emit an error payload. Plain goes to stderr; envelope modes go to stdout."""
102
+ if self.is_envelope:
103
+ env = Envelope.from_error(err)
104
+ self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
105
+ else:
106
+ self._stderr.write(f"Error: {err.message}\n")
107
+ if err.remediation:
108
+ self._stderr.write(f" Suggestion: {err.remediation}\n")
109
+
110
+ def table(self, headers: list[str], rows: list[list[str]]) -> None:
111
+ """Emit a table. Plain uses columnar formatting; envelope modes use the envelope."""
112
+ if self.is_envelope:
113
+ env = Envelope.success({"headers": headers, "rows": rows})
114
+ self._stdout.write(_serialize_envelope(env.to_dict(), self.format))
115
+ elif self.quiet:
116
+ pass
117
+ else:
118
+ # Compute column widths
119
+ col_widths = [len(h) for h in headers]
120
+ for row in rows:
121
+ for i, cell in enumerate(row):
122
+ col_widths[i] = max(col_widths[i], len(cell))
123
+
124
+ # Header row
125
+ header_line = " ".join(h.ljust(col_widths[i]) for i, h in enumerate(headers))
126
+ self._stdout.write(header_line + "\n")
127
+
128
+ # Separator
129
+ sep_line = " ".join("-" * col_widths[i] for i in range(len(headers)))
130
+ self._stdout.write(sep_line + "\n")
131
+
132
+ # Data rows
133
+ for row in rows:
134
+ data_line = " ".join(cell.ljust(col_widths[i]) for i, cell in enumerate(row))
135
+ self._stdout.write(data_line + "\n")
@@ -0,0 +1,244 @@
1
+ # Copyright (c) 2024-2026, Alibaba Cloud and its affiliates.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """XDG-compliant path helpers for mcs config + data + cache directories.
5
+
6
+ Config root is ``~/.config/maxcompute-semantic/`` (override via
7
+ ``MCS_CONFIG_DIR``). Data root is ``~/.local/share/maxcompute-semantic/``
8
+ on Unix / ``~/Library/Application Support/maxcompute-semantic/`` on macOS
9
+ (override via ``MCS_DATA_DIR`` or ``XDG_DATA_HOME``). Config and data
10
+ live in separate XDG-standard directories.
11
+
12
+ Per-profile data lives at ``data_root()/<profile_name>/`` by default.
13
+ A profile may override its data dir explicitly via ``Profile.package_path``
14
+ (useful for imported packages, NFS-mounted shared dirs, or eval fixtures
15
+ checked into git). When ``package_path`` is set, it takes precedence over
16
+ the default; otherwise ``profile_data_dir(profile)`` falls back to the
17
+ per-name slot under ``data_root()``.
18
+
19
+ Historical note: the data root used to be called ``profiles_root()`` and
20
+ sat at ``<data>/profiles/``. The 2026-05-14 vocab cleanup renamed it to
21
+ ``data_root()`` at ``<data>/data/`` because "profiles" was a misleading
22
+ label — that directory only ever held *data*, while profile *config*
23
+ lives at ``config_dir()/profiles.yaml``.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import sys
30
+ from pathlib import Path
31
+ from typing import TYPE_CHECKING
32
+
33
+ if TYPE_CHECKING:
34
+ from maxcompute_semantic.auth.schema import Profile
35
+
36
+
37
+ def _xdg_cache_home() -> Path:
38
+ """Return the platform-appropriate XDG cache home directory.
39
+
40
+ Mirrors ``_xdg_data_home`` for the cache spec:
41
+ - ``XDG_CACHE_HOME`` env var wins.
42
+ - macOS: ``~/Library/Caches``.
43
+ - Linux / other Unix: ``~/.cache``.
44
+ - Windows: ``%LOCALAPPDATA%/Cache`` (LOCALAPPDATA is the standard
45
+ per-user non-roaming dir on Windows; we suffix ``Cache`` for the
46
+ same reason the XDG spec puts the cache under a dedicated subdir).
47
+ """
48
+ xdg = os.environ.get("XDG_CACHE_HOME")
49
+ if xdg:
50
+ return Path(xdg)
51
+ if sys.platform == "darwin":
52
+ return Path.home() / "Library" / "Caches"
53
+ if sys.platform == "win32":
54
+ local = os.environ.get("LOCALAPPDATA")
55
+ base = Path(local) if local else Path.home() / "AppData" / "Local"
56
+ return base / "Cache"
57
+ return Path.home() / ".cache"
58
+
59
+
60
+ def _xdg_data_home() -> Path:
61
+ """Return the platform-appropriate XDG data home directory.
62
+
63
+ - If ``XDG_DATA_HOME`` is set, use that.
64
+ - On macOS: ``~/Library/Application Support``
65
+ - On other Unix: ``~/.local/share``
66
+ """
67
+ xdg = os.environ.get("XDG_DATA_HOME")
68
+ if xdg:
69
+ return Path(xdg)
70
+ if sys.platform == "darwin":
71
+ return Path.home() / "Library" / "Application Support"
72
+ return Path.home() / ".local" / "share"
73
+
74
+
75
+ def cache_dir() -> Path:
76
+ """Return the cache dir, ``<xdg_cache_home>/maxcompute-semantic`` or
77
+ ``MCS_CACHE_DIR`` override.
78
+
79
+ Used by ``_internal/update_check.py`` to store the per-user
80
+ ``update_check.json`` cache (last-checked timestamp, last-seen
81
+ ``latest_version``, fetch error). The directory is created lazily on
82
+ the first write — readers tolerate its absence by returning ``None``
83
+ from ``read_cache``. Symmetric with ``data_root()``'s override
84
+ rules: ``MCS_CACHE_DIR`` is a no-suffix absolute path,
85
+ ``XDG_CACHE_HOME`` gets the ``maxcompute-semantic`` suffix
86
+ appended.
87
+ """
88
+ override = os.environ.get("MCS_CACHE_DIR")
89
+ if override:
90
+ return Path(override)
91
+ return _xdg_cache_home() / "maxcompute-semantic"
92
+
93
+
94
+ def config_dir() -> Path:
95
+ """Return ~/.config/maxcompute-semantic or override via MCS_CONFIG_DIR."""
96
+ override = os.environ.get("MCS_CONFIG_DIR")
97
+ if override:
98
+ return Path(override)
99
+ xdg = os.environ.get("XDG_CONFIG_HOME") or str(Path.home() / ".config")
100
+ return Path(xdg) / "maxcompute-semantic"
101
+
102
+
103
+ def data_dir() -> Path:
104
+ """Return XDG data dir / maxcompute-semantic or override via MCS_DATA_DIR.
105
+
106
+ Resolution order:
107
+ 1. ``MCS_DATA_DIR`` env var — absolute path, no suffix added.
108
+ 2. ``XDG_DATA_HOME`` env var — suffixed with ``maxcompute-semantic``.
109
+ 3. Platform default — macOS uses ``~/Library/Application Support``,
110
+ other Unix uses ``~/.local/share``, suffixed with ``maxcompute-semantic``.
111
+ """
112
+ override = os.environ.get("MCS_DATA_DIR")
113
+ if override:
114
+ return Path(override)
115
+ return _xdg_data_home() / "maxcompute-semantic"
116
+
117
+
118
+ def data_root() -> Path:
119
+ """Return ``data_dir()/data`` or override via ``MCS_PROFILES_DIR``.
120
+
121
+ The env var name (``MCS_PROFILES_DIR``) is kept for backwards
122
+ compatibility with existing user configs and CI yamls; semantically
123
+ it points at the per-profile-data root.
124
+ """
125
+ override = os.environ.get("MCS_PROFILES_DIR")
126
+ if override:
127
+ return Path(override)
128
+ return data_dir() / "data"
129
+
130
+
131
+ def profile_data_dir(profile: Profile | str) -> Path:
132
+ """Return the per-profile data directory.
133
+
134
+ Accepts either a ``Profile`` object (preferred — honors
135
+ ``profile.package_path`` if set) or a bare profile name (falls back
136
+ to the default per-name slot). The string form is for call sites
137
+ that only have a name and is equivalent to a ``Profile`` with
138
+ ``package_path=None``.
139
+ """
140
+ # Avoid circular import: Profile lives in auth.schema which imports
141
+ # from this module via the validation path. Local import only.
142
+ from maxcompute_semantic.auth.schema import Profile as _Profile
143
+
144
+ if isinstance(profile, _Profile):
145
+ if profile.package_path is not None:
146
+ return Path(profile.package_path)
147
+ return data_root() / profile.name
148
+ # str path — pure name, no package_path override possible.
149
+ return data_root() / profile
150
+
151
+
152
+ def profiles_yaml_path() -> Path:
153
+ return config_dir() / "profiles.yaml"
154
+
155
+
156
+ def link_json_path() -> Path:
157
+ return config_dir() / "link.json"
158
+
159
+
160
+ def tier_cache_path(profile: Profile | str, project: str) -> Path:
161
+ """Return the on-disk one-character sentinel for the cached tier of
162
+ a specific MaxCompute project under this profile's data directory.
163
+
164
+ The cache file lives at
165
+ ``<profile_data_dir(profile)>/tier_cache/<project>`` with a single
166
+ character of content — ``"2"`` for a 2-level (no-schema)
167
+ MaxCompute project, ``"3"`` for a 3-level (schema-enabled) one.
168
+ The per-project key in the second path segment exists because a
169
+ multi-source profile spans potentially many MaxCompute projects:
170
+ the AK's home project (``Profile.compute_project``) plus the
171
+ data-side projects each ``DataSource`` in ``Profile.sources``
172
+ declares. Each project's tier is independent state — a single
173
+ profile may straddle a 3-level compute project and a mix of
174
+ 2-level and 3-level data sources.
175
+
176
+ The ``profile_data_dir(profile)`` resolution honors
177
+ ``Profile.package_path`` when the dataclass form is passed (the
178
+ NFS-mount or imported-package override case), so a profile whose
179
+ package lives off-disk has its tier_cache subdirectory on the same
180
+ off-disk root. The bare-name form ``tier_cache_path("name",
181
+ "proj")`` is the post-``mcs profile remove`` cleanup path's
182
+ convenience — the cache file is deleted alongside the rest of
183
+ the per-profile data dir, so the helper just produces the path
184
+ string without any reading of the (now-gone) profile config.
185
+
186
+ The "tier_cache" subdirectory's parent-mkdir is the writer's
187
+ responsibility (``get_tier`` calls
188
+ ``cache_path.parent.mkdir(parents=True, exist_ok=True)`` before
189
+ the write). The reader path tolerates a missing file (the cache
190
+ is a hint, not authoritative; the live probe is the
191
+ source-of-truth).
192
+
193
+ See spec §3 "Vocabulary" entry for "tier" for the conceptual
194
+ summary of the 2-level vs 3-level distinction and the
195
+ relationship to MaxCompute's ``odps.namespace.schema`` SQL hint.
196
+ """
197
+ if not isinstance(project, str) or not project.strip():
198
+ raise ValueError(
199
+ f"tier_cache_path requires a non-empty MaxCompute project name "
200
+ f"as the second argument (got {project!r}); the cache is keyed "
201
+ f"per-(profile, project) — caller must pass an explicit project "
202
+ f"name from either profile.compute_project (the AK's home project) "
203
+ f"or one of profile.sources[i].project (a declared data source)."
204
+ )
205
+ return profile_data_dir(profile) / "tier_cache" / project
206
+
207
+
208
+ def profile_git_dir(profile: Profile | str) -> Path:
209
+ """Return ``<profile_data_dir>/.git`` — the per-profile git
210
+ repository's admin directory. Pure path math, no I/O.
211
+
212
+ Used by ``versioning/git_repo.py`` to scope all ``git`` subprocess
213
+ invocations to the per-profile repo via ``git -C <repo_root>``
214
+ (where ``repo_root`` is the *parent* of this directory — git's
215
+ own ``-C`` convention is the working directory of the repo, not
216
+ the ``.git`` administrative dir). The function is named after the
217
+ admin dir because the existence check ``profile_git_dir(p).exists()``
218
+ is the canonical "is this profile versioned?" probe used by the
219
+ hook's legacy-profile branch and by ``mcs doctor``'s
220
+ ``_check_profile_versioned``.
221
+ """
222
+ return profile_data_dir(profile) / ".git"
223
+
224
+
225
+ def profile_gitignore_path(profile: Profile | str) -> Path:
226
+ """Return ``<profile_data_dir>/.gitignore``. Committed; contents
227
+ are the constant ``PROFILE_GITIGNORE`` defined in
228
+ ``versioning/gitignore_default.py``."""
229
+ return profile_data_dir(profile) / ".gitignore"
230
+
231
+
232
+ def profile_package_sql_path(profile: Profile | str) -> Path:
233
+ """Return ``<profile_data_dir>/package.sql`` — the textual dump of
234
+ the committed tables of ``package.db``, produced and consumed by
235
+ ``versioning/sql_dump.py``. Committed; the binary ``package.db``
236
+ sibling is *not* committed (it appears in the ``.gitignore``)."""
237
+ return profile_data_dir(profile) / "package.sql"
238
+
239
+
240
+ def profile_lock_path(profile: Profile | str) -> Path:
241
+ """Return ``<profile_data_dir>/.mcs-lock`` — the fcntl lock anchor
242
+ file. The body of the file is the PID of the current holder. The
243
+ file appears in ``.gitignore`` so it never gets committed."""
244
+ return profile_data_dir(profile) / ".mcs-lock"