dr-wandb 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dr-wandb might be problematic. Click here for more details.
- dr_wandb-0.1.0/.claude/settings.local.json +10 -0
- dr_wandb-0.1.0/.example.env +12 -0
- dr_wandb-0.1.0/.gitignore +209 -0
- dr_wandb-0.1.0/.python-version +1 -0
- dr_wandb-0.1.0/CLAUDE.md +116 -0
- dr_wandb-0.1.0/LICENSE +21 -0
- dr_wandb-0.1.0/PKG-INFO +123 -0
- dr_wandb-0.1.0/README.md +108 -0
- dr_wandb-0.1.0/docs/processes/CODING_PRINCIPLES.md +1 -0
- dr_wandb-0.1.0/docs/processes/README.md +1 -0
- dr_wandb-0.1.0/docs/processes/audit_synthesis_pipeline.md +1 -0
- dr_wandb-0.1.0/docs/processes/design_philosophy.md +1 -0
- dr_wandb-0.1.0/docs/processes/documentation_organizer_guide.md +1 -0
- dr_wandb-0.1.0/docs/processes/fresh_eyes_review_guide.md +1 -0
- dr_wandb-0.1.0/docs/processes/general_project_extraction_prompt.md +1 -0
- dr_wandb-0.1.0/docs/processes/project_consolidation_methodology.md +1 -0
- dr_wandb-0.1.0/docs/processes/reporting_guide.md +1 -0
- dr_wandb-0.1.0/docs/processes/strategic_collaboration_guide.md +1 -0
- dr_wandb-0.1.0/docs/processes/tactical_execution_guide.md +1 -0
- dr_wandb-0.1.0/pyproject.toml +156 -0
- dr_wandb-0.1.0/src/dr_wandb/__init__.py +2 -0
- dr_wandb-0.1.0/src/dr_wandb/cli/__init__.py +0 -0
- dr_wandb-0.1.0/src/dr_wandb/cli/download.py +128 -0
- dr_wandb-0.1.0/src/dr_wandb/constants.py +20 -0
- dr_wandb-0.1.0/src/dr_wandb/downloader.py +118 -0
- dr_wandb-0.1.0/src/dr_wandb/history_entry_record.py +62 -0
- dr_wandb-0.1.0/src/dr_wandb/py.typed +0 -0
- dr_wandb-0.1.0/src/dr_wandb/run_record.py +115 -0
- dr_wandb-0.1.0/src/dr_wandb/store.py +193 -0
- dr_wandb-0.1.0/src/dr_wandb/utils.py +57 -0
- dr_wandb-0.1.0/tests/conftest.py +122 -0
- dr_wandb-0.1.0/tests/test_cli_contract.py +234 -0
- dr_wandb-0.1.0/tests/test_cli_download.py +97 -0
- dr_wandb-0.1.0/tests/test_history_entry_record.py +140 -0
- dr_wandb-0.1.0/tests/test_query_builders.py +90 -0
- dr_wandb-0.1.0/tests/test_run_record.py +151 -0
- dr_wandb-0.1.0/tests/test_utils.py +85 -0
- dr_wandb-0.1.0/uv.lock +667 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# .example.env
|
|
2
|
+
# Copy to .env and update with your values
|
|
3
|
+
|
|
4
|
+
# Database connection (required for production)
|
|
5
|
+
DR_WANDB_DATABASE_URL=postgresql+psycopg2://localhost/wandb
|
|
6
|
+
|
|
7
|
+
# Output dir (optional)
|
|
8
|
+
DR_WANDB_OUTPUT_DIR=data
|
|
9
|
+
|
|
10
|
+
# Default W&B project (optional - can use cli instead)
|
|
11
|
+
DR_WANDB_PROJECT=project
|
|
12
|
+
DR_WANDB_ENTITY=entity
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
data/
|
|
2
|
+
|
|
3
|
+
# Byte-compiled / optimized / DLL files
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[codz]
|
|
6
|
+
*$py.class
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
.Python
|
|
13
|
+
build/
|
|
14
|
+
develop-eggs/
|
|
15
|
+
dist/
|
|
16
|
+
downloads/
|
|
17
|
+
eggs/
|
|
18
|
+
.eggs/
|
|
19
|
+
lib/
|
|
20
|
+
lib64/
|
|
21
|
+
parts/
|
|
22
|
+
sdist/
|
|
23
|
+
var/
|
|
24
|
+
wheels/
|
|
25
|
+
share/python-wheels/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
.installed.cfg
|
|
28
|
+
*.egg
|
|
29
|
+
MANIFEST
|
|
30
|
+
|
|
31
|
+
# PyInstaller
|
|
32
|
+
# Usually these files are written by a python script from a template
|
|
33
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
34
|
+
*.manifest
|
|
35
|
+
*.spec
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
|
|
41
|
+
# Unit test / coverage reports
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
.nox/
|
|
45
|
+
.coverage
|
|
46
|
+
.coverage.*
|
|
47
|
+
.cache
|
|
48
|
+
nosetests.xml
|
|
49
|
+
coverage.xml
|
|
50
|
+
*.cover
|
|
51
|
+
*.py.cover
|
|
52
|
+
.hypothesis/
|
|
53
|
+
.pytest_cache/
|
|
54
|
+
cover/
|
|
55
|
+
|
|
56
|
+
# Translations
|
|
57
|
+
*.mo
|
|
58
|
+
*.pot
|
|
59
|
+
|
|
60
|
+
# Django stuff:
|
|
61
|
+
*.log
|
|
62
|
+
local_settings.py
|
|
63
|
+
db.sqlite3
|
|
64
|
+
db.sqlite3-journal
|
|
65
|
+
|
|
66
|
+
# Flask stuff:
|
|
67
|
+
instance/
|
|
68
|
+
.webassets-cache
|
|
69
|
+
|
|
70
|
+
# Scrapy stuff:
|
|
71
|
+
.scrapy
|
|
72
|
+
|
|
73
|
+
# Sphinx documentation
|
|
74
|
+
docs/_build/
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
.pybuilder/
|
|
78
|
+
target/
|
|
79
|
+
|
|
80
|
+
# Jupyter Notebook
|
|
81
|
+
.ipynb_checkpoints
|
|
82
|
+
|
|
83
|
+
# IPython
|
|
84
|
+
profile_default/
|
|
85
|
+
ipython_config.py
|
|
86
|
+
|
|
87
|
+
# pyenv
|
|
88
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
89
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
90
|
+
# .python-version
|
|
91
|
+
|
|
92
|
+
# pipenv
|
|
93
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
94
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
95
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
96
|
+
# install all needed dependencies.
|
|
97
|
+
#Pipfile.lock
|
|
98
|
+
|
|
99
|
+
# UV
|
|
100
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
101
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
102
|
+
# commonly ignored for libraries.
|
|
103
|
+
#uv.lock
|
|
104
|
+
|
|
105
|
+
# poetry
|
|
106
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
107
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
108
|
+
# commonly ignored for libraries.
|
|
109
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
110
|
+
#poetry.lock
|
|
111
|
+
#poetry.toml
|
|
112
|
+
|
|
113
|
+
# pdm
|
|
114
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
115
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
116
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
117
|
+
#pdm.lock
|
|
118
|
+
#pdm.toml
|
|
119
|
+
.pdm-python
|
|
120
|
+
.pdm-build/
|
|
121
|
+
|
|
122
|
+
# pixi
|
|
123
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
124
|
+
#pixi.lock
|
|
125
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
126
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
127
|
+
.pixi
|
|
128
|
+
|
|
129
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
130
|
+
__pypackages__/
|
|
131
|
+
|
|
132
|
+
# Celery stuff
|
|
133
|
+
celerybeat-schedule
|
|
134
|
+
celerybeat.pid
|
|
135
|
+
|
|
136
|
+
# SageMath parsed files
|
|
137
|
+
*.sage.py
|
|
138
|
+
|
|
139
|
+
# Environments
|
|
140
|
+
.env
|
|
141
|
+
.envrc
|
|
142
|
+
.venv
|
|
143
|
+
env/
|
|
144
|
+
venv/
|
|
145
|
+
ENV/
|
|
146
|
+
env.bak/
|
|
147
|
+
venv.bak/
|
|
148
|
+
|
|
149
|
+
# Spyder project settings
|
|
150
|
+
.spyderproject
|
|
151
|
+
.spyproject
|
|
152
|
+
|
|
153
|
+
# Rope project settings
|
|
154
|
+
.ropeproject
|
|
155
|
+
|
|
156
|
+
# mkdocs documentation
|
|
157
|
+
/site
|
|
158
|
+
|
|
159
|
+
# mypy
|
|
160
|
+
.mypy_cache/
|
|
161
|
+
.dmypy.json
|
|
162
|
+
dmypy.json
|
|
163
|
+
|
|
164
|
+
# Pyre type checker
|
|
165
|
+
.pyre/
|
|
166
|
+
|
|
167
|
+
# pytype static type analyzer
|
|
168
|
+
.pytype/
|
|
169
|
+
|
|
170
|
+
# Cython debug symbols
|
|
171
|
+
cython_debug/
|
|
172
|
+
|
|
173
|
+
# PyCharm
|
|
174
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
175
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
176
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
177
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
178
|
+
#.idea/
|
|
179
|
+
|
|
180
|
+
# Abstra
|
|
181
|
+
# Abstra is an AI-powered process automation framework.
|
|
182
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
183
|
+
# Learn more at https://abstra.io/docs
|
|
184
|
+
.abstra/
|
|
185
|
+
|
|
186
|
+
# Visual Studio Code
|
|
187
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
188
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
189
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
190
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
191
|
+
# .vscode/
|
|
192
|
+
|
|
193
|
+
# Ruff stuff:
|
|
194
|
+
.ruff_cache/
|
|
195
|
+
|
|
196
|
+
# PyPI configuration file
|
|
197
|
+
.pypirc
|
|
198
|
+
|
|
199
|
+
# Cursor
|
|
200
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
201
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
202
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
203
|
+
.cursorignore
|
|
204
|
+
.cursorindexingignore
|
|
205
|
+
|
|
206
|
+
# Marimo
|
|
207
|
+
marimo/_static/
|
|
208
|
+
marimo/_lsp/
|
|
209
|
+
__marimo__/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
dr_wandb-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides project level guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## 🚨 READ FIRST - DESIGN PHILOSOPHY
|
|
6
|
+
**MANDATORY:** Before starting ANY work, read `docs/processes/design_philosophy.md` to understand the core principles and methodology that guide this project. All code changes must align with these principles.
|
|
7
|
+
|
|
8
|
+
- **No Backward Compatibility**: This is a research library - breaking changes are acceptable for better design
|
|
9
|
+
- **Fail Fast, Fail Loudly**: Use assertions, avoid defensive programming that hides bugs
|
|
10
|
+
- **No Exception Handling**: Never use try-catch blocks - let errors surface immediately
|
|
11
|
+
- **Assertions Over Exceptions**: Use `assert condition, "message"` instead of `raise ValueError()`
|
|
12
|
+
- **Minimize Friction**: Every design choice should reduce friction between idea and visualization
|
|
13
|
+
- **Embrace Change, Demand Consistency**: When making changes, update ALL affected parts
|
|
14
|
+
|
|
15
|
+
Remember: The goal is code that *disappears* into the background, allowing researchers to focus on their work.
|
|
16
|
+
|
|
17
|
+
## Essential Commands
|
|
18
|
+
- `us` runs `uv sync` - Install all dependencies including dev, test, and test-ml groups
|
|
19
|
+
- `lint` runs `uv run ruff check --fix .` - Lint code with ruff and apply autofixes where possible
|
|
20
|
+
- `ft` runs `uv run ruff format .` - Format code with ruff
|
|
21
|
+
- `uv run pytest` - Run tests with pytest (supports parallel execution with xdist)
|
|
22
|
+
- `lint_fix` - Run ruff format and then check with --fix
|
|
23
|
+
|
|
24
|
+
**IMPORTANT**: Do NOT run tests, linting, type checking, or formatting unless explicitly requested by the user. Focus on the requested changes only.
|
|
25
|
+
|
|
26
|
+
## 🎯 CODE STYLE REQUIREMENTS
|
|
27
|
+
|
|
28
|
+
### Zero Comments Policy
|
|
29
|
+
- **NEVER add ANY comments** - no docstrings, no inline comments, no block comments
|
|
30
|
+
- Code must be self-documenting through clear naming and structure
|
|
31
|
+
- Remove ALL existing comments when editing files (docstrings, # comments, etc.)
|
|
32
|
+
|
|
33
|
+
### Comprehensive Typing
|
|
34
|
+
- **ALL function signatures** must have complete type hints for parameters and return values
|
|
35
|
+
- Use `from typing import Any, Optional` etc. as needed
|
|
36
|
+
- Prefer `list`, `dict` etc over `List` and `Dict`
|
|
37
|
+
- Add `from __future__ import annotations` and use modern type hints
|
|
38
|
+
- Import types like `import pandas as pd` when using `pd.DataFrame` in hints
|
|
39
|
+
- If a circular import exists, use `TYPE_CHECKING` to gate
|
|
40
|
+
- All `__init__` methods must have `-> None` return type
|
|
41
|
+
- All class methods need proper `self` typing context
|
|
42
|
+
- Use specific types over `Any` when possible (e.g., `pd.DataFrame` not `Any`)
|
|
43
|
+
- Create custom types for clarity: `type GroupKey = Tuple[Tuple[str, Any], ...]`
|
|
44
|
+
- Example pattern:
|
|
45
|
+
```python
|
|
46
|
+
def method_name(self, param: str, optional_param: Optional[int] = None) -> Dict[str, Any]:
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### File Structure
|
|
50
|
+
- **ALL imports at the very top** - no imports anywhere else in the file
|
|
51
|
+
- Type aliases near top after imports
|
|
52
|
+
- Magic values should NEVER be hardcoded throughout, all constants be semantically named at the top of the module
|
|
53
|
+
- No module-level docstrings - remove entirely
|
|
54
|
+
- Class definitions without docstrings
|
|
55
|
+
- Methods without docstrings but with full type hints
|
|
56
|
+
|
|
57
|
+
### Replace Comments with Structure
|
|
58
|
+
- **Instead of comments** → Extract succinctly named helper functions
|
|
59
|
+
- **Instead of complex types** → Create descriptive type aliases
|
|
60
|
+
- Examples:
|
|
61
|
+
```python
|
|
62
|
+
# BAD: Complex code with comments
|
|
63
|
+
def process_data(self, data):
|
|
64
|
+
# Convert categorical columns to numeric for ML processing
|
|
65
|
+
processed = data.copy()
|
|
66
|
+
# ... complex logic ...
|
|
67
|
+
|
|
68
|
+
# GOOD: Self-documenting through function names and types
|
|
69
|
+
type CategoricalColumns = List[str]
|
|
70
|
+
type NumericData = pd.DataFrame
|
|
71
|
+
|
|
72
|
+
def process_data(self, data: pd.DataFrame) -> NumericData:
|
|
73
|
+
return self._convert_categorical_to_numeric(data)
|
|
74
|
+
|
|
75
|
+
def _convert_categorical_to_numeric(self, data: pd.DataFrame) -> NumericData:
|
|
76
|
+
# Clear, focused function that explains itself
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Fail Fast and Loud: Asserts Not Try-Except
|
|
80
|
+
- **Always aim to check assumptions with asserts**
|
|
81
|
+
- Avoid nested try-except blocks
|
|
82
|
+
- Instead, identify assumptions and assert them at the top of the function
|
|
83
|
+
|
|
84
|
+
## 🛠️ DEVELOPMENT WORKFLOW
|
|
85
|
+
|
|
86
|
+
### When Editing Files
|
|
87
|
+
1. **Read design philosophy first** - understand the core method principles
|
|
88
|
+
2. **Strip ALL comments** - docstrings, inline comments, everything
|
|
89
|
+
3. **Add comprehensive type hints** - every parameter, every return value
|
|
90
|
+
4. **Extract helper functions** - instead of complex inline logic with comments
|
|
91
|
+
5. **Import required typing modules** - add to imports as needed
|
|
92
|
+
6. **Test functionality** - ensure no behavioral changes from refactoring
|
|
93
|
+
|
|
94
|
+
### Code Quality Gates
|
|
95
|
+
- **Use type hints** on all functions
|
|
96
|
+
- **ALL imports at file top** - never mid-file, never in functions, never anywhere else
|
|
97
|
+
- **Use assertions, not exceptions** - single line `assert condition, "message"` instead of try-catch or raising exceptions
|
|
98
|
+
- **Never use try-catch blocks** - let errors bubble up; use assertions for validation
|
|
99
|
+
- **Show full modified functions**, not just diffs
|
|
100
|
+
- **Prefer explicit code** over clever code
|
|
101
|
+
- **Follow "Leave No Trace"** - remove all legacy patterns when making changes
|
|
102
|
+
|
|
103
|
+
### Git Shortcuts
|
|
104
|
+
| Shortcut | Command | Use |
|
|
105
|
+
|----------|---------|-----|
|
|
106
|
+
| `gst` | `git status` | Check state |
|
|
107
|
+
| `gd_agent` | `git --no-pager diff` | See changes |
|
|
108
|
+
| `glo` | `git log --oneline -10` | Recent commits |
|
|
109
|
+
| `ga .` | `git add .` | Stage files |
|
|
110
|
+
| `gc -m "msg"` | `git commit -m "msg"` | Commit |
|
|
111
|
+
|
|
112
|
+
### 📋 COMMIT STRATEGY
|
|
113
|
+
- **Small, semantic commits**: 20-30 lines per commit with clear purpose
|
|
114
|
+
- **Single line messages**: Succinct and clear, imperative mood
|
|
115
|
+
- **Quality gates**: Run linting/formatting before commits only when explicitly requested
|
|
116
|
+
- **Incremental building**: Each commit should be reviewable and complete
|
dr_wandb-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Danielle Rothermel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dr_wandb-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dr-wandb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Interact with wandb from python
|
|
5
|
+
Author-email: Danielle Rothermel <danielle.rothermel@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: pandas>=2.3.2
|
|
9
|
+
Requires-Dist: psycopg2>=2.9.10
|
|
10
|
+
Requires-Dist: pyarrow>=21.0.0
|
|
11
|
+
Requires-Dist: pydantic-settings>=2.10.1
|
|
12
|
+
Requires-Dist: sqlalchemy>=2.0.43
|
|
13
|
+
Requires-Dist: wandb>=0.21.4
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# dr_wandb
|
|
17
|
+
|
|
18
|
+
A command-line utility for downloading and archiving Weights & Biases experiment data to local storage formats optimized for offline analysis. Stores to PostgreSQL db + Parquet files, supports incremental updates and selective data retrieval.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv add dr_wandb
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Prerequisites
|
|
27
|
+
|
|
28
|
+
- Python 3.12 or higher
|
|
29
|
+
- PostgreSQL database server
|
|
30
|
+
- Weights & Biases account with API access
|
|
31
|
+
- PyArrow for Parquet file operations
|
|
32
|
+
|
|
33
|
+
### Authentication
|
|
34
|
+
|
|
35
|
+
Configure Weights & Biases authentication using one of these methods:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
wandb login
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Or set the API key as an environment variable:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
export WANDB_API_KEY=your_api_key_here
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Basic Usage
|
|
48
|
+
|
|
49
|
+
Download all runs from a Weights & Biases project:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
wandb-download --entity your_entity --project your_project
|
|
53
|
+
|
|
54
|
+
Options:
|
|
55
|
+
--entity TEXT WandB entity (username or team name)
|
|
56
|
+
--project TEXT WandB project name
|
|
57
|
+
--runs-only Download only run metadata, skip training history
|
|
58
|
+
--force-refresh Download all data, ignoring existing records
|
|
59
|
+
--db-url TEXT PostgreSQL connection string
|
|
60
|
+
--output-dir TEXT Directory for exported Parquet files
|
|
61
|
+
--help Show help message and exit
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
The tool creates a PostgreSQL database, downloads experiment data, and exports Parquet files to the configured output directory. It tool tracks existing data and downloads only new or updated runs by default. A run is considered for update if:
|
|
65
|
+
|
|
66
|
+
- It does not exist in the local database
|
|
67
|
+
- Its state is "running" (indicating potential new data)
|
|
68
|
+
|
|
69
|
+
Use `--force-refresh` to download all runs regardless of existing data.
|
|
70
|
+
|
|
71
|
+
### Environment Variables
|
|
72
|
+
|
|
73
|
+
The tool reads configuration from environment variables with the `DR_WANDB_` prefix and supports `.env` files:
|
|
74
|
+
|
|
75
|
+
| Variable | Description | Default |
|
|
76
|
+
|----------|-------------|---------|
|
|
77
|
+
| `DR_WANDB_ENTITY` | Weights & Biases entity name | None |
|
|
78
|
+
| `DR_WANDB_PROJECT` | Weights & Biases project name | None |
|
|
79
|
+
| `DR_WANDB_DATABASE_URL` | PostgreSQL connection string | `postgresql+psycopg2://localhost/wandb` |
|
|
80
|
+
| `DR_WANDB_OUTPUT_DIR` | Directory for exported files | `./data` |
|
|
81
|
+
|
|
82
|
+
### Database Configuration
|
|
83
|
+
|
|
84
|
+
The PostgreSQL connection string follows the standard format:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
postgresql+psycopg2://username:password@host:port/database_name
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
If the specified database does not exist, the tool will attempt to create it automatically.
|
|
91
|
+
|
|
92
|
+
## Data Schema
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
The tool generates the following files in the output directory:
|
|
96
|
+
|
|
97
|
+
- `runs_metadata.parquet` - Complete run metadata including configurations, summaries, and system information
|
|
98
|
+
- `runs_history.parquet` - Training metrics and logged values over time
|
|
99
|
+
- `runs_metadata_{component}.parquet` - Component-specific files for config, summary, wandb_metadata, system_metrics, system_attrs, and sweep_info
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
**Run Records**
|
|
103
|
+
- **run_id**: Unique identifier for the experiment run
|
|
104
|
+
- **run_name**: Human-readable name assigned to the run
|
|
105
|
+
- **state**: Current state (finished, running, crashed, failed, killed)
|
|
106
|
+
- **project**: Project name
|
|
107
|
+
- **entity**: Entity name
|
|
108
|
+
- **created_at**: Timestamp of run creation
|
|
109
|
+
- **config**: Experiment configuration parameters (JSONB)
|
|
110
|
+
- **summary**: Final metrics and outputs (JSONB)
|
|
111
|
+
- **wandb_metadata**: Platform-specific metadata (JSONB)
|
|
112
|
+
- **system_metrics**: Hardware and system information (JSONB)
|
|
113
|
+
- **system_attrs**: Additional system attributes (JSONB)
|
|
114
|
+
- **sweep_info**: Hyperparameter sweep information (JSONB)
|
|
115
|
+
|
|
116
|
+
**Training History Records**
|
|
117
|
+
- **run_id**: Reference to the parent run
|
|
118
|
+
- **step**: Training step number
|
|
119
|
+
- **timestamp**: Time of metric logging
|
|
120
|
+
- **runtime**: Elapsed time since run start
|
|
121
|
+
- **wandb_metadata**: Platform logging metadata (JSONB)
|
|
122
|
+
- **metrics**: All logged metrics and values (JSONB, flattened in Parquet export)
|
|
123
|
+
|
dr_wandb-0.1.0/README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# dr_wandb
|
|
2
|
+
|
|
3
|
+
A command-line utility for downloading and archiving Weights & Biases experiment data to local storage formats optimized for offline analysis. Stores to PostgreSQL db + Parquet files, supports incremental updates and selective data retrieval.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv add dr_wandb
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### Prerequisites
|
|
12
|
+
|
|
13
|
+
- Python 3.12 or higher
|
|
14
|
+
- PostgreSQL database server
|
|
15
|
+
- Weights & Biases account with API access
|
|
16
|
+
- PyArrow for Parquet file operations
|
|
17
|
+
|
|
18
|
+
### Authentication
|
|
19
|
+
|
|
20
|
+
Configure Weights & Biases authentication using one of these methods:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
wandb login
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or set the API key as an environment variable:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
export WANDB_API_KEY=your_api_key_here
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Basic Usage
|
|
33
|
+
|
|
34
|
+
Download all runs from a Weights & Biases project:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
wandb-download --entity your_entity --project your_project
|
|
38
|
+
|
|
39
|
+
Options:
|
|
40
|
+
--entity TEXT WandB entity (username or team name)
|
|
41
|
+
--project TEXT WandB project name
|
|
42
|
+
--runs-only Download only run metadata, skip training history
|
|
43
|
+
--force-refresh Download all data, ignoring existing records
|
|
44
|
+
--db-url TEXT PostgreSQL connection string
|
|
45
|
+
--output-dir TEXT Directory for exported Parquet files
|
|
46
|
+
--help Show help message and exit
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
The tool creates a PostgreSQL database, downloads experiment data, and exports Parquet files to the configured output directory. It tool tracks existing data and downloads only new or updated runs by default. A run is considered for update if:
|
|
50
|
+
|
|
51
|
+
- It does not exist in the local database
|
|
52
|
+
- Its state is "running" (indicating potential new data)
|
|
53
|
+
|
|
54
|
+
Use `--force-refresh` to download all runs regardless of existing data.
|
|
55
|
+
|
|
56
|
+
### Environment Variables
|
|
57
|
+
|
|
58
|
+
The tool reads configuration from environment variables with the `DR_WANDB_` prefix and supports `.env` files:
|
|
59
|
+
|
|
60
|
+
| Variable | Description | Default |
|
|
61
|
+
|----------|-------------|---------|
|
|
62
|
+
| `DR_WANDB_ENTITY` | Weights & Biases entity name | None |
|
|
63
|
+
| `DR_WANDB_PROJECT` | Weights & Biases project name | None |
|
|
64
|
+
| `DR_WANDB_DATABASE_URL` | PostgreSQL connection string | `postgresql+psycopg2://localhost/wandb` |
|
|
65
|
+
| `DR_WANDB_OUTPUT_DIR` | Directory for exported files | `./data` |
|
|
66
|
+
|
|
67
|
+
### Database Configuration
|
|
68
|
+
|
|
69
|
+
The PostgreSQL connection string follows the standard format:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
postgresql+psycopg2://username:password@host:port/database_name
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
If the specified database does not exist, the tool will attempt to create it automatically.
|
|
76
|
+
|
|
77
|
+
## Data Schema
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
The tool generates the following files in the output directory:
|
|
81
|
+
|
|
82
|
+
- `runs_metadata.parquet` - Complete run metadata including configurations, summaries, and system information
|
|
83
|
+
- `runs_history.parquet` - Training metrics and logged values over time
|
|
84
|
+
- `runs_metadata_{component}.parquet` - Component-specific files for config, summary, wandb_metadata, system_metrics, system_attrs, and sweep_info
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
**Run Records**
|
|
88
|
+
- **run_id**: Unique identifier for the experiment run
|
|
89
|
+
- **run_name**: Human-readable name assigned to the run
|
|
90
|
+
- **state**: Current state (finished, running, crashed, failed, killed)
|
|
91
|
+
- **project**: Project name
|
|
92
|
+
- **entity**: Entity name
|
|
93
|
+
- **created_at**: Timestamp of run creation
|
|
94
|
+
- **config**: Experiment configuration parameters (JSONB)
|
|
95
|
+
- **summary**: Final metrics and outputs (JSONB)
|
|
96
|
+
- **wandb_metadata**: Platform-specific metadata (JSONB)
|
|
97
|
+
- **system_metrics**: Hardware and system information (JSONB)
|
|
98
|
+
- **system_attrs**: Additional system attributes (JSONB)
|
|
99
|
+
- **sweep_info**: Hyperparameter sweep information (JSONB)
|
|
100
|
+
|
|
101
|
+
**Training History Records**
|
|
102
|
+
- **run_id**: Reference to the parent run
|
|
103
|
+
- **step**: Training step number
|
|
104
|
+
- **timestamp**: Time of metric logging
|
|
105
|
+
- **runtime**: Elapsed time since run start
|
|
106
|
+
- **wandb_metadata**: Platform logging metadata (JSONB)
|
|
107
|
+
- **metrics**: All logged metrics and values (JSONB, flattened in Parquet export)
|
|
108
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/CODING_PRINCIPLES.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/README.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/audit_synthesis_pipeline.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/design_philosophy.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/documentation_organizer_guide.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/fresh_eyes_review_guide.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/general_project_extraction_prompt.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/project_consolidation_methodology.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/reporting_guide.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/strategic_collaboration_guide.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/tactical_execution_guide.md
|