marin-rigging 0.2.1.dev202605292307__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marin_rigging-0.2.1.dev202605292307/.gitignore +244 -0
- marin_rigging-0.2.1.dev202605292307/PKG-INFO +8 -0
- marin_rigging-0.2.1.dev202605292307/pyproject.toml +27 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/__init__.py +2 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/config_discovery.py +154 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/distributed_lock.py +440 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/filesystem.py +1046 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/log_setup.py +253 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/redaction.py +166 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/rpc.py +128 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/timing.py +641 -0
- marin_rigging-0.2.1.dev202605292307/src/rigging/tunnel.py +333 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_config_discovery.py +201 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_mirror_fs.py +234 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_record_transfer.py +39 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_redaction.py +204 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_rpc.py +228 -0
- marin_rigging-0.2.1.dev202605292307/tests/test_timing.py +446 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# redundant, but Ray looks for this otherwise.
|
|
2
|
+
.git
|
|
3
|
+
|
|
4
|
+
logs/
|
|
5
|
+
|
|
6
|
+
# CPU profiles
|
|
7
|
+
prof/
|
|
8
|
+
|
|
9
|
+
# Downloaded build tools (zig, etc.)
|
|
10
|
+
.tools/
|
|
11
|
+
|
|
12
|
+
tests/snapshots/outputs
|
|
13
|
+
tests/snapshots/diffs
|
|
14
|
+
|
|
15
|
+
# don't log data/MD outputs to git
|
|
16
|
+
data/*
|
|
17
|
+
output/*
|
|
18
|
+
outputs/*
|
|
19
|
+
|
|
20
|
+
# Snapshot diffs and outputs
|
|
21
|
+
tests/snapshots/*/outputs/*
|
|
22
|
+
tests/snapshots/*/diffs/*
|
|
23
|
+
|
|
24
|
+
# This is mainly for Ray and using submodule
|
|
25
|
+
*/**/.git
|
|
26
|
+
|
|
27
|
+
### Python template
|
|
28
|
+
# Byte-compiled / optimized / DLL files
|
|
29
|
+
__pycache__/
|
|
30
|
+
*.py[cod]
|
|
31
|
+
*$py.class
|
|
32
|
+
|
|
33
|
+
# C extensions
|
|
34
|
+
*.so
|
|
35
|
+
|
|
36
|
+
# pypa/gh-action-pypi-publish caches its Docker action manifest here.
|
|
37
|
+
.github/.tmp/
|
|
38
|
+
|
|
39
|
+
# Distribution / packaging
|
|
40
|
+
.Python
|
|
41
|
+
build/
|
|
42
|
+
develop-eggs/
|
|
43
|
+
dist/
|
|
44
|
+
downloads/
|
|
45
|
+
eggs/
|
|
46
|
+
.eggs/
|
|
47
|
+
lib64/
|
|
48
|
+
parts/
|
|
49
|
+
sdist/
|
|
50
|
+
local_store/
|
|
51
|
+
wheels/
|
|
52
|
+
share/python-wheels/
|
|
53
|
+
*.egg-info/
|
|
54
|
+
.installed.cfg
|
|
55
|
+
*.egg
|
|
56
|
+
MANIFEST
|
|
57
|
+
|
|
58
|
+
# PyInstaller
|
|
59
|
+
# Usually these files are written by a python script from a template
|
|
60
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
61
|
+
*.manifest
|
|
62
|
+
*.spec
|
|
63
|
+
|
|
64
|
+
# Installer logs
|
|
65
|
+
pip-log.txt
|
|
66
|
+
pip-delete-this-directory.txt
|
|
67
|
+
|
|
68
|
+
# Unit test / coverage reports
|
|
69
|
+
htmlcov/
|
|
70
|
+
.tox/
|
|
71
|
+
.nox/
|
|
72
|
+
.coverage
|
|
73
|
+
.coverage.*
|
|
74
|
+
.cache
|
|
75
|
+
nosetests.xml
|
|
76
|
+
coverage.xml
|
|
77
|
+
*.cover
|
|
78
|
+
*.py,cover
|
|
79
|
+
.hypothesis/
|
|
80
|
+
.pytest_cache/
|
|
81
|
+
cover/
|
|
82
|
+
|
|
83
|
+
# Translations
|
|
84
|
+
*.mo
|
|
85
|
+
*.pot
|
|
86
|
+
|
|
87
|
+
# Django stuff:
|
|
88
|
+
*.log
|
|
89
|
+
local_settings.py
|
|
90
|
+
db.sqlite3
|
|
91
|
+
db.sqlite3-journal
|
|
92
|
+
|
|
93
|
+
# Flask stuff:
|
|
94
|
+
instance/
|
|
95
|
+
.webassets-cache
|
|
96
|
+
|
|
97
|
+
# Scrapy stuff:
|
|
98
|
+
.scrapy
|
|
99
|
+
|
|
100
|
+
# Sphinx documentation
|
|
101
|
+
docs/_build/
|
|
102
|
+
|
|
103
|
+
# PyBuilder
|
|
104
|
+
.pybuilder/
|
|
105
|
+
target/
|
|
106
|
+
|
|
107
|
+
# Jupyter Notebook
|
|
108
|
+
.ipynb_checkpoints
|
|
109
|
+
|
|
110
|
+
# IPython
|
|
111
|
+
profile_default/
|
|
112
|
+
ipython_config.py
|
|
113
|
+
|
|
114
|
+
# pyenv
|
|
115
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
116
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
117
|
+
# .python-version
|
|
118
|
+
|
|
119
|
+
# pipenv
|
|
120
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
121
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
122
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
123
|
+
# install all needed dependencies.
|
|
124
|
+
#Pipfile.lock
|
|
125
|
+
|
|
126
|
+
# poetry
|
|
127
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
128
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
129
|
+
# commonly ignored for libraries.
|
|
130
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
131
|
+
#poetry.lock
|
|
132
|
+
|
|
133
|
+
# pdm
|
|
134
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
135
|
+
#pdm.lock
|
|
136
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
137
|
+
# in version control.
|
|
138
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
139
|
+
.pdm.toml
|
|
140
|
+
|
|
141
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
142
|
+
__pypackages__/
|
|
143
|
+
|
|
144
|
+
# Celery stuff
|
|
145
|
+
celerybeat-schedule
|
|
146
|
+
celerybeat.pid
|
|
147
|
+
|
|
148
|
+
# SageMath parsed files
|
|
149
|
+
*.sage.py
|
|
150
|
+
|
|
151
|
+
# Environments
|
|
152
|
+
.env
|
|
153
|
+
.venv
|
|
154
|
+
env/
|
|
155
|
+
venv/
|
|
156
|
+
ENV/
|
|
157
|
+
env.bak/
|
|
158
|
+
venv.bak/
|
|
159
|
+
|
|
160
|
+
# Spyder project settings
|
|
161
|
+
.spyderproject
|
|
162
|
+
.spyproject
|
|
163
|
+
|
|
164
|
+
# Rope project settings
|
|
165
|
+
.ropeproject
|
|
166
|
+
|
|
167
|
+
# mkdocs documentation
|
|
168
|
+
/site
|
|
169
|
+
|
|
170
|
+
# mypy
|
|
171
|
+
.mypy_cache/
|
|
172
|
+
.dmypy.json
|
|
173
|
+
dmypy.json
|
|
174
|
+
|
|
175
|
+
# Pyre type checker
|
|
176
|
+
.pyre/
|
|
177
|
+
|
|
178
|
+
# Ruff
|
|
179
|
+
.ruff_cache/
|
|
180
|
+
|
|
181
|
+
# pytype static type analyzer
|
|
182
|
+
.pytype/
|
|
183
|
+
|
|
184
|
+
# Cython debug symbols
|
|
185
|
+
cython_debug/
|
|
186
|
+
|
|
187
|
+
# PyCharm
|
|
188
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
189
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
190
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
191
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
192
|
+
.idea/
|
|
193
|
+
*.iml
|
|
194
|
+
|
|
195
|
+
# IDE Config
|
|
196
|
+
.vscode/
|
|
197
|
+
|
|
198
|
+
# Mac OS
|
|
199
|
+
.DS_Store
|
|
200
|
+
|
|
201
|
+
# Secrets
|
|
202
|
+
credentials.json
|
|
203
|
+
marin/crawl/bigquery-gcs-key.json
|
|
204
|
+
|
|
205
|
+
# Archive
|
|
206
|
+
archive/
|
|
207
|
+
|
|
208
|
+
# Caches and Outputs
|
|
209
|
+
!/scripts/web/output/
|
|
210
|
+
!/output/
|
|
211
|
+
|
|
212
|
+
# csv
|
|
213
|
+
*.csv
|
|
214
|
+
|
|
215
|
+
# wandb logs
|
|
216
|
+
wandb
|
|
217
|
+
artifacts
|
|
218
|
+
|
|
219
|
+
# Ignore generated credentials from google-github-actions/auth
|
|
220
|
+
gha-creds-*.json
|
|
221
|
+
|
|
222
|
+
.aider*
|
|
223
|
+
.git/*
|
|
224
|
+
|
|
225
|
+
*.jsonl
|
|
226
|
+
**/*.jsonl
|
|
227
|
+
scr/*
|
|
228
|
+
.weaver/
|
|
229
|
+
|
|
230
|
+
# Local host Marin config
|
|
231
|
+
.marin.yaml
|
|
232
|
+
|
|
233
|
+
/scratch
|
|
234
|
+
|
|
235
|
+
.forge
|
|
236
|
+
.claude
|
|
237
|
+
!.claude/skills
|
|
238
|
+
.agents/tmp/
|
|
239
|
+
.codex
|
|
240
|
+
.entire
|
|
241
|
+
|
|
242
|
+
.worktrees
|
|
243
|
+
.obsidian
|
|
244
|
+
.cw_env
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "marin-rigging"
|
|
7
|
+
version = "0.2.1.dev202605292307"
|
|
8
|
+
requires-python = ">=3.11,<3.13"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"connect-python>=0.9.0",
|
|
11
|
+
"fsspec>=2024.0.0",
|
|
12
|
+
"gcsfs>=2024.0.0",
|
|
13
|
+
"s3fs>=2024.0.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[dependency-groups]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.3.2",
|
|
19
|
+
"pytest-asyncio",
|
|
20
|
+
"pytest-timeout",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.wheel]
|
|
24
|
+
packages = ["src/rigging"]
|
|
25
|
+
|
|
26
|
+
[tool.pytest.ini_options]
|
|
27
|
+
timeout = 10
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Copyright The Marin Authors
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Path-agnostic config discovery for cluster YAML files.
|
|
5
|
+
|
|
6
|
+
Generic YAML config discovery helpers. Callers (e.g. iris) pass the
|
|
7
|
+
directories to search; this module knows nothing about any particular
|
|
8
|
+
marin sub-package.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import functools
|
|
12
|
+
import logging
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
import tomllib
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
_YAML_SUFFIXES = (".yaml", ".yml")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@functools.lru_cache(maxsize=128)
|
|
24
|
+
def find_project_root(start: Path | str | None = None) -> Path | None:
|
|
25
|
+
"""Find the marin workspace root.
|
|
26
|
+
|
|
27
|
+
Walks up from ``start`` (or the current working directory) looking for a
|
|
28
|
+
``pyproject.toml`` that declares ``[tool.uv.workspace]``. This uniquely
|
|
29
|
+
identifies the top-level marin root and avoids matching a workspace
|
|
30
|
+
member's pyproject (e.g. ``lib/iris/pyproject.toml``).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The marin root ``Path``, or ``None`` when running outside a marin
|
|
34
|
+
checkout (e.g. from an installed pip package).
|
|
35
|
+
"""
|
|
36
|
+
current = Path(start).resolve() if start is not None else Path.cwd().resolve()
|
|
37
|
+
|
|
38
|
+
for directory in (current, *current.parents):
|
|
39
|
+
pp = directory / "pyproject.toml"
|
|
40
|
+
if pp.is_file() and _declares_uv_workspace(pp):
|
|
41
|
+
logger.debug("Found marin workspace root: %s", directory)
|
|
42
|
+
return directory
|
|
43
|
+
|
|
44
|
+
logger.debug("No marin workspace root found starting from %s", current)
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _declares_uv_workspace(pyproject_path: Path) -> bool:
|
|
49
|
+
"""Return True if ``pyproject_path`` declares ``[tool.uv.workspace]``."""
|
|
50
|
+
try:
|
|
51
|
+
with pyproject_path.open("rb") as f:
|
|
52
|
+
data = tomllib.load(f)
|
|
53
|
+
except (OSError, tomllib.TOMLDecodeError):
|
|
54
|
+
return False
|
|
55
|
+
return "workspace" in data.get("tool", {}).get("uv", {})
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _resolve_dirs(dirs: Sequence[Path | str]) -> list[Path]:
|
|
59
|
+
"""Expand ``~`` and resolve relative dirs against the marin project root.
|
|
60
|
+
|
|
61
|
+
An empty string resolves to the project root itself. Absolute paths are
|
|
62
|
+
returned unchanged. Relative paths are joined onto the marin project root
|
|
63
|
+
when one is found, and fall back to the current working directory otherwise.
|
|
64
|
+
"""
|
|
65
|
+
root = find_project_root()
|
|
66
|
+
resolved: list[Path] = []
|
|
67
|
+
for raw in dirs:
|
|
68
|
+
p = Path(raw).expanduser()
|
|
69
|
+
if p.is_absolute():
|
|
70
|
+
resolved.append(p)
|
|
71
|
+
elif root is not None:
|
|
72
|
+
resolved.append(root / p)
|
|
73
|
+
else:
|
|
74
|
+
resolved.append(Path.cwd() / p)
|
|
75
|
+
return resolved
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def find_configs(
|
|
79
|
+
dirs: Sequence[Path | str],
|
|
80
|
+
name: str | None = None,
|
|
81
|
+
) -> dict[str, Path]:
|
|
82
|
+
"""Discover YAML config files across ``dirs``.
|
|
83
|
+
|
|
84
|
+
Relative ``dirs`` are resolved against the marin project root (see
|
|
85
|
+
:func:`find_project_root`); absolute paths are used as-is; ``~`` is
|
|
86
|
+
expanded. An empty string resolves to the project root itself.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
dirs: Directories to search, in priority order.
|
|
90
|
+
name: When given, only return entries whose stem equals ``name``.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
A dict mapping config stem (filename without ``.yaml``/``.yml``) to
|
|
94
|
+
its resolved ``Path``. When the same stem appears in multiple dirs,
|
|
95
|
+
the first (highest-priority) match wins.
|
|
96
|
+
"""
|
|
97
|
+
configs: dict[str, Path] = {}
|
|
98
|
+
for directory in _resolve_dirs(dirs):
|
|
99
|
+
if not directory.is_dir():
|
|
100
|
+
continue
|
|
101
|
+
for path in sorted(directory.iterdir()):
|
|
102
|
+
if path.suffix not in _YAML_SUFFIXES:
|
|
103
|
+
continue
|
|
104
|
+
stem = path.stem
|
|
105
|
+
if name is not None and stem != name:
|
|
106
|
+
continue
|
|
107
|
+
if stem not in configs:
|
|
108
|
+
configs[stem] = path
|
|
109
|
+
return configs
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def resolve_cluster_config(name: str, dirs: Sequence[Path | str]) -> Path:
|
|
113
|
+
"""Resolve a cluster name (or path) to an existing YAML config file.
|
|
114
|
+
|
|
115
|
+
If ``name`` is already an existing file path, it is returned directly.
|
|
116
|
+
Otherwise ``dirs`` are searched for a file whose stem matches ``name``
|
|
117
|
+
(with ``.yaml`` or ``.yml`` extensions stripped from ``name`` before
|
|
118
|
+
comparison).
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
name: Cluster name (e.g. ``"marin-dev"``) or path to an existing file.
|
|
122
|
+
dirs: Directories to search.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
The resolved ``Path`` to the config file.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
FileNotFoundError: When no matching config file is found, with a
|
|
129
|
+
message listing all searched locations.
|
|
130
|
+
"""
|
|
131
|
+
candidate = Path(name).expanduser()
|
|
132
|
+
if candidate.is_file():
|
|
133
|
+
return candidate
|
|
134
|
+
|
|
135
|
+
# Allow callers to pass either "marin-dev" or "marin-dev.yaml".
|
|
136
|
+
name_path = Path(name)
|
|
137
|
+
search_stem = name_path.stem if name_path.suffix in _YAML_SUFFIXES else name
|
|
138
|
+
|
|
139
|
+
matches = find_configs(dirs, name=search_stem)
|
|
140
|
+
if search_stem in matches:
|
|
141
|
+
logger.debug("Resolved cluster config %r -> %s", name, matches[search_stem])
|
|
142
|
+
return matches[search_stem]
|
|
143
|
+
|
|
144
|
+
searched_str = "\n ".join(str(d) for d in _resolve_dirs(dirs))
|
|
145
|
+
raise FileNotFoundError(f"No config file found for cluster {name!r}.\nSearched directories:\n {searched_str}")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def list_cluster_configs(dirs: Sequence[Path | str]) -> dict[str, Path]:
|
|
149
|
+
"""List all YAML cluster configs across ``dirs``.
|
|
150
|
+
|
|
151
|
+
Thin alias over :func:`find_configs` for callers that want the full
|
|
152
|
+
name-to-path mapping.
|
|
153
|
+
"""
|
|
154
|
+
return find_configs(dirs)
|