marin-rigging 0.2.1.dev202605292307__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ # redundant, but Ray looks for this otherwise.
2
+ .git
3
+
4
+ logs/
5
+
6
+ # CPU profiles
7
+ prof/
8
+
9
+ # Downloaded build tools (zig, etc.)
10
+ .tools/
11
+
12
+ tests/snapshots/outputs
13
+ tests/snapshots/diffs
14
+
15
+ # don't log data/MD outputs to git
16
+ data/*
17
+ output/*
18
+ outputs/*
19
+
20
+ # Snapshot diffs and outputs
21
+ tests/snapshots/*/outputs/*
22
+ tests/snapshots/*/diffs/*
23
+
24
+ # This is mainly for Ray and using submodule
25
+ */**/.git
26
+
27
+ ### Python template
28
+ # Byte-compiled / optimized / DLL files
29
+ __pycache__/
30
+ *.py[cod]
31
+ *$py.class
32
+
33
+ # C extensions
34
+ *.so
35
+
36
+ # pypa/gh-action-pypi-publish caches its Docker action manifest here.
37
+ .github/.tmp/
38
+
39
+ # Distribution / packaging
40
+ .Python
41
+ build/
42
+ develop-eggs/
43
+ dist/
44
+ downloads/
45
+ eggs/
46
+ .eggs/
47
+ lib64/
48
+ parts/
49
+ sdist/
50
+ local_store/
51
+ wheels/
52
+ share/python-wheels/
53
+ *.egg-info/
54
+ .installed.cfg
55
+ *.egg
56
+ MANIFEST
57
+
58
+ # PyInstaller
59
+ # Usually these files are written by a python script from a template
60
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
61
+ *.manifest
62
+ *.spec
63
+
64
+ # Installer logs
65
+ pip-log.txt
66
+ pip-delete-this-directory.txt
67
+
68
+ # Unit test / coverage reports
69
+ htmlcov/
70
+ .tox/
71
+ .nox/
72
+ .coverage
73
+ .coverage.*
74
+ .cache
75
+ nosetests.xml
76
+ coverage.xml
77
+ *.cover
78
+ *.py,cover
79
+ .hypothesis/
80
+ .pytest_cache/
81
+ cover/
82
+
83
+ # Translations
84
+ *.mo
85
+ *.pot
86
+
87
+ # Django stuff:
88
+ *.log
89
+ local_settings.py
90
+ db.sqlite3
91
+ db.sqlite3-journal
92
+
93
+ # Flask stuff:
94
+ instance/
95
+ .webassets-cache
96
+
97
+ # Scrapy stuff:
98
+ .scrapy
99
+
100
+ # Sphinx documentation
101
+ docs/_build/
102
+
103
+ # PyBuilder
104
+ .pybuilder/
105
+ target/
106
+
107
+ # Jupyter Notebook
108
+ .ipynb_checkpoints
109
+
110
+ # IPython
111
+ profile_default/
112
+ ipython_config.py
113
+
114
+ # pyenv
115
+ # For a library or package, you might want to ignore these files since the code is
116
+ # intended to run in multiple environments; otherwise, check them in:
117
+ # .python-version
118
+
119
+ # pipenv
120
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
121
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
122
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
123
+ # install all needed dependencies.
124
+ #Pipfile.lock
125
+
126
+ # poetry
127
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
128
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
129
+ # commonly ignored for libraries.
130
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
131
+ #poetry.lock
132
+
133
+ # pdm
134
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
135
+ #pdm.lock
136
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
137
+ # in version control.
138
+ # https://pdm.fming.dev/#use-with-ide
139
+ .pdm.toml
140
+
141
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
142
+ __pypackages__/
143
+
144
+ # Celery stuff
145
+ celerybeat-schedule
146
+ celerybeat.pid
147
+
148
+ # SageMath parsed files
149
+ *.sage.py
150
+
151
+ # Environments
152
+ .env
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # Ruff
179
+ .ruff_cache/
180
+
181
+ # pytype static type analyzer
182
+ .pytype/
183
+
184
+ # Cython debug symbols
185
+ cython_debug/
186
+
187
+ # PyCharm
188
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
189
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
190
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
191
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
192
+ .idea/
193
+ *.iml
194
+
195
+ # IDE Config
196
+ .vscode/
197
+
198
+ # Mac OS
199
+ .DS_Store
200
+
201
+ # Secrets
202
+ credentials.json
203
+ marin/crawl/bigquery-gcs-key.json
204
+
205
+ # Archive
206
+ archive/
207
+
208
+ # Caches and Outputs
209
+ !/scripts/web/output/
210
+ !/output/
211
+
212
+ # csv
213
+ *.csv
214
+
215
+ # wandb logs
216
+ wandb
217
+ artifacts
218
+
219
+ # Ignore generated credentials from google-github-actions/auth
220
+ gha-creds-*.json
221
+
222
+ .aider*
223
+ .git/*
224
+
225
+ *.jsonl
226
+ **/*.jsonl
227
+ scr/*
228
+ .weaver/
229
+
230
+ # Local host Marin config
231
+ .marin.yaml
232
+
233
+ /scratch
234
+
235
+ .forge
236
+ .claude
237
+ !.claude/skills
238
+ .agents/tmp/
239
+ .codex
240
+ .entire
241
+
242
+ .worktrees
243
+ .obsidian
244
+ .cw_env
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: marin-rigging
3
+ Version: 0.2.1.dev202605292307
4
+ Requires-Python: <3.13,>=3.11
5
+ Requires-Dist: connect-python>=0.9.0
6
+ Requires-Dist: fsspec>=2024.0.0
7
+ Requires-Dist: gcsfs>=2024.0.0
8
+ Requires-Dist: s3fs>=2024.0.0
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "marin-rigging"
7
+ version = "0.2.1.dev202605292307"
8
+ requires-python = ">=3.11,<3.13"
9
+ dependencies = [
10
+ "connect-python>=0.9.0",
11
+ "fsspec>=2024.0.0",
12
+ "gcsfs>=2024.0.0",
13
+ "s3fs>=2024.0.0",
14
+ ]
15
+
16
+ [dependency-groups]
17
+ dev = [
18
+ "pytest>=8.3.2",
19
+ "pytest-asyncio",
20
+ "pytest-timeout",
21
+ ]
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["src/rigging"]
25
+
26
+ [tool.pytest.ini_options]
27
+ timeout = 10
@@ -0,0 +1,2 @@
1
+ # Copyright The Marin Authors
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,154 @@
1
+ # Copyright The Marin Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Path-agnostic config discovery for cluster YAML files.
5
+
6
+ Generic YAML config discovery helpers. Callers (e.g. iris) pass the
7
+ directories to search; this module knows nothing about any particular
8
+ marin sub-package.
9
+ """
10
+
11
+ import functools
12
+ import logging
13
+ from collections.abc import Sequence
14
+ from pathlib import Path
15
+
16
+ import tomllib
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ _YAML_SUFFIXES = (".yaml", ".yml")
21
+
22
+
23
+ @functools.lru_cache(maxsize=128)
24
+ def find_project_root(start: Path | str | None = None) -> Path | None:
25
+ """Find the marin workspace root.
26
+
27
+ Walks up from ``start`` (or the current working directory) looking for a
28
+ ``pyproject.toml`` that declares ``[tool.uv.workspace]``. This uniquely
29
+ identifies the top-level marin root and avoids matching a workspace
30
+ member's pyproject (e.g. ``lib/iris/pyproject.toml``).
31
+
32
+ Returns:
33
+ The marin root ``Path``, or ``None`` when running outside a marin
34
+ checkout (e.g. from an installed pip package).
35
+ """
36
+ current = Path(start).resolve() if start is not None else Path.cwd().resolve()
37
+
38
+ for directory in (current, *current.parents):
39
+ pp = directory / "pyproject.toml"
40
+ if pp.is_file() and _declares_uv_workspace(pp):
41
+ logger.debug("Found marin workspace root: %s", directory)
42
+ return directory
43
+
44
+ logger.debug("No marin workspace root found starting from %s", current)
45
+ return None
46
+
47
+
48
+ def _declares_uv_workspace(pyproject_path: Path) -> bool:
49
+ """Return True if ``pyproject_path`` declares ``[tool.uv.workspace]``."""
50
+ try:
51
+ with pyproject_path.open("rb") as f:
52
+ data = tomllib.load(f)
53
+ except (OSError, tomllib.TOMLDecodeError):
54
+ return False
55
+ return "workspace" in data.get("tool", {}).get("uv", {})
56
+
57
+
58
+ def _resolve_dirs(dirs: Sequence[Path | str]) -> list[Path]:
59
+ """Expand ``~`` and resolve relative dirs against the marin project root.
60
+
61
+ An empty string resolves to the project root itself. Absolute paths are
62
+ returned unchanged. Relative paths are joined onto the marin project root
63
+ when one is found, and fall back to the current working directory otherwise.
64
+ """
65
+ root = find_project_root()
66
+ resolved: list[Path] = []
67
+ for raw in dirs:
68
+ p = Path(raw).expanduser()
69
+ if p.is_absolute():
70
+ resolved.append(p)
71
+ elif root is not None:
72
+ resolved.append(root / p)
73
+ else:
74
+ resolved.append(Path.cwd() / p)
75
+ return resolved
76
+
77
+
78
+ def find_configs(
79
+ dirs: Sequence[Path | str],
80
+ name: str | None = None,
81
+ ) -> dict[str, Path]:
82
+ """Discover YAML config files across ``dirs``.
83
+
84
+ Relative ``dirs`` are resolved against the marin project root (see
85
+ :func:`find_project_root`); absolute paths are used as-is; ``~`` is
86
+ expanded. An empty string resolves to the project root itself.
87
+
88
+ Args:
89
+ dirs: Directories to search, in priority order.
90
+ name: When given, only return entries whose stem equals ``name``.
91
+
92
+ Returns:
93
+ A dict mapping config stem (filename without ``.yaml``/``.yml``) to
94
+ its resolved ``Path``. When the same stem appears in multiple dirs,
95
+ the first (highest-priority) match wins.
96
+ """
97
+ configs: dict[str, Path] = {}
98
+ for directory in _resolve_dirs(dirs):
99
+ if not directory.is_dir():
100
+ continue
101
+ for path in sorted(directory.iterdir()):
102
+ if path.suffix not in _YAML_SUFFIXES:
103
+ continue
104
+ stem = path.stem
105
+ if name is not None and stem != name:
106
+ continue
107
+ if stem not in configs:
108
+ configs[stem] = path
109
+ return configs
110
+
111
+
112
+ def resolve_cluster_config(name: str, dirs: Sequence[Path | str]) -> Path:
113
+ """Resolve a cluster name (or path) to an existing YAML config file.
114
+
115
+ If ``name`` is already an existing file path, it is returned directly.
116
+ Otherwise ``dirs`` are searched for a file whose stem matches ``name``
117
+ (with ``.yaml`` or ``.yml`` extensions stripped from ``name`` before
118
+ comparison).
119
+
120
+ Args:
121
+ name: Cluster name (e.g. ``"marin-dev"``) or path to an existing file.
122
+ dirs: Directories to search.
123
+
124
+ Returns:
125
+ The resolved ``Path`` to the config file.
126
+
127
+ Raises:
128
+ FileNotFoundError: When no matching config file is found, with a
129
+ message listing all searched locations.
130
+ """
131
+ candidate = Path(name).expanduser()
132
+ if candidate.is_file():
133
+ return candidate
134
+
135
+ # Allow callers to pass either "marin-dev" or "marin-dev.yaml".
136
+ name_path = Path(name)
137
+ search_stem = name_path.stem if name_path.suffix in _YAML_SUFFIXES else name
138
+
139
+ matches = find_configs(dirs, name=search_stem)
140
+ if search_stem in matches:
141
+ logger.debug("Resolved cluster config %r -> %s", name, matches[search_stem])
142
+ return matches[search_stem]
143
+
144
+ searched_str = "\n ".join(str(d) for d in _resolve_dirs(dirs))
145
+ raise FileNotFoundError(f"No config file found for cluster {name!r}.\nSearched directories:\n {searched_str}")
146
+
147
+
148
+ def list_cluster_configs(dirs: Sequence[Path | str]) -> dict[str, Path]:
149
+ """List all YAML cluster configs across ``dirs``.
150
+
151
+ Thin alias over :func:`find_configs` for callers that want the full
152
+ name-to-path mapping.
153
+ """
154
+ return find_configs(dirs)