dr-wandb 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dr-wandb might be problematic. Click here for more details.

Files changed (38) hide show
  1. dr_wandb-0.1.0/.claude/settings.local.json +10 -0
  2. dr_wandb-0.1.0/.example.env +12 -0
  3. dr_wandb-0.1.0/.gitignore +209 -0
  4. dr_wandb-0.1.0/.python-version +1 -0
  5. dr_wandb-0.1.0/CLAUDE.md +116 -0
  6. dr_wandb-0.1.0/LICENSE +21 -0
  7. dr_wandb-0.1.0/PKG-INFO +123 -0
  8. dr_wandb-0.1.0/README.md +108 -0
  9. dr_wandb-0.1.0/docs/processes/CODING_PRINCIPLES.md +1 -0
  10. dr_wandb-0.1.0/docs/processes/README.md +1 -0
  11. dr_wandb-0.1.0/docs/processes/audit_synthesis_pipeline.md +1 -0
  12. dr_wandb-0.1.0/docs/processes/design_philosophy.md +1 -0
  13. dr_wandb-0.1.0/docs/processes/documentation_organizer_guide.md +1 -0
  14. dr_wandb-0.1.0/docs/processes/fresh_eyes_review_guide.md +1 -0
  15. dr_wandb-0.1.0/docs/processes/general_project_extraction_prompt.md +1 -0
  16. dr_wandb-0.1.0/docs/processes/project_consolidation_methodology.md +1 -0
  17. dr_wandb-0.1.0/docs/processes/reporting_guide.md +1 -0
  18. dr_wandb-0.1.0/docs/processes/strategic_collaboration_guide.md +1 -0
  19. dr_wandb-0.1.0/docs/processes/tactical_execution_guide.md +1 -0
  20. dr_wandb-0.1.0/pyproject.toml +156 -0
  21. dr_wandb-0.1.0/src/dr_wandb/__init__.py +2 -0
  22. dr_wandb-0.1.0/src/dr_wandb/cli/__init__.py +0 -0
  23. dr_wandb-0.1.0/src/dr_wandb/cli/download.py +128 -0
  24. dr_wandb-0.1.0/src/dr_wandb/constants.py +20 -0
  25. dr_wandb-0.1.0/src/dr_wandb/downloader.py +118 -0
  26. dr_wandb-0.1.0/src/dr_wandb/history_entry_record.py +62 -0
  27. dr_wandb-0.1.0/src/dr_wandb/py.typed +0 -0
  28. dr_wandb-0.1.0/src/dr_wandb/run_record.py +115 -0
  29. dr_wandb-0.1.0/src/dr_wandb/store.py +193 -0
  30. dr_wandb-0.1.0/src/dr_wandb/utils.py +57 -0
  31. dr_wandb-0.1.0/tests/conftest.py +122 -0
  32. dr_wandb-0.1.0/tests/test_cli_contract.py +234 -0
  33. dr_wandb-0.1.0/tests/test_cli_download.py +97 -0
  34. dr_wandb-0.1.0/tests/test_history_entry_record.py +140 -0
  35. dr_wandb-0.1.0/tests/test_query_builders.py +90 -0
  36. dr_wandb-0.1.0/tests/test_run_record.py +151 -0
  37. dr_wandb-0.1.0/tests/test_utils.py +85 -0
  38. dr_wandb-0.1.0/uv.lock +667 -0
@@ -0,0 +1,10 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebSearch",
5
+ "WebFetch(domain:docs.wandb.ai)"
6
+ ],
7
+ "deny": [],
8
+ "ask": []
9
+ }
10
+ }
@@ -0,0 +1,12 @@
1
+ # .example.env
2
+ # Copy to .env and update with your values
3
+
4
+ # Database connection (required for production)
5
+ DR_WANDB_DATABASE_URL=postgresql+psycopg2://localhost/wandb
6
+
7
+ # Output dir (optional)
8
+ DR_WANDB_OUTPUT_DIR=data
9
+
10
+ # Default W&B project (optional - can use cli instead)
11
+ DR_WANDB_PROJECT=project
12
+ DR_WANDB_ENTITY=entity
@@ -0,0 +1,209 @@
1
+ data/
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[codz]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py.cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # UV
100
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ #uv.lock
104
+
105
+ # poetry
106
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110
+ #poetry.lock
111
+ #poetry.toml
112
+
113
+ # pdm
114
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
116
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
117
+ #pdm.lock
118
+ #pdm.toml
119
+ .pdm-python
120
+ .pdm-build/
121
+
122
+ # pixi
123
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
124
+ #pixi.lock
125
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
126
+ # in the .venv directory. It is recommended not to include this directory in version control.
127
+ .pixi
128
+
129
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
130
+ __pypackages__/
131
+
132
+ # Celery stuff
133
+ celerybeat-schedule
134
+ celerybeat.pid
135
+
136
+ # SageMath parsed files
137
+ *.sage.py
138
+
139
+ # Environments
140
+ .env
141
+ .envrc
142
+ .venv
143
+ env/
144
+ venv/
145
+ ENV/
146
+ env.bak/
147
+ venv.bak/
148
+
149
+ # Spyder project settings
150
+ .spyderproject
151
+ .spyproject
152
+
153
+ # Rope project settings
154
+ .ropeproject
155
+
156
+ # mkdocs documentation
157
+ /site
158
+
159
+ # mypy
160
+ .mypy_cache/
161
+ .dmypy.json
162
+ dmypy.json
163
+
164
+ # Pyre type checker
165
+ .pyre/
166
+
167
+ # pytype static type analyzer
168
+ .pytype/
169
+
170
+ # Cython debug symbols
171
+ cython_debug/
172
+
173
+ # PyCharm
174
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
175
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
176
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
177
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
178
+ #.idea/
179
+
180
+ # Abstra
181
+ # Abstra is an AI-powered process automation framework.
182
+ # Ignore directories containing user credentials, local state, and settings.
183
+ # Learn more at https://abstra.io/docs
184
+ .abstra/
185
+
186
+ # Visual Studio Code
187
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
188
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
189
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
190
+ # you could uncomment the following to ignore the entire vscode folder
191
+ # .vscode/
192
+
193
+ # Ruff stuff:
194
+ .ruff_cache/
195
+
196
+ # PyPI configuration file
197
+ .pypirc
198
+
199
+ # Cursor
200
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
201
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
202
+ # refer to https://docs.cursor.com/context/ignore-files
203
+ .cursorignore
204
+ .cursorindexingignore
205
+
206
+ # Marimo
207
+ marimo/_static/
208
+ marimo/_lsp/
209
+ __marimo__/
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,116 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides project level guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## 🚨 READ FIRST - DESIGN PHILOSOPHY
6
+ **MANDATORY:** Before starting ANY work, read `docs/processes/design_philosophy.md` to understand the core principles and methodology that guide this project. All code changes must align with these principles.
7
+
8
+ - **No Backward Compatibility**: This is a research library - breaking changes are acceptable for better design
9
+ - **Fail Fast, Fail Loudly**: Use assertions, avoid defensive programming that hides bugs
10
+ - **No Exception Handling**: Never use try-catch blocks - let errors surface immediately
11
+ - **Assertions Over Exceptions**: Use `assert condition, "message"` instead of `raise ValueError()`
12
+ - **Minimize Friction**: Every design choice should reduce friction between idea and visualization
13
+ - **Embrace Change, Demand Consistency**: When making changes, update ALL affected parts
14
+
15
+ Remember: The goal is code that *disappears* into the background, allowing researchers to focus on their work.
16
+
17
+ ## Essential Commands
18
+ - `us` runs `uv sync` - Install all dependencies including dev, test, and test-ml groups
19
+ - `lint` runs `uv run ruff check --fix .` - Lint code with ruff and apply autofixes where possible
20
+ - `ft` runs `uv run ruff format .` - Format code with ruff
21
+ - `uv run pytest` - Run tests with pytest (supports parallel execution with xdist)
22
+ - `lint_fix` - Run ruff format and then check with --fix
23
+
24
+ **IMPORTANT**: Do NOT run tests, linting, type checking, or formatting unless explicitly requested by the user. Focus on the requested changes only.
25
+
26
+ ## 🎯 CODE STYLE REQUIREMENTS
27
+
28
+ ### Zero Comments Policy
29
+ - **NEVER add ANY comments** - no docstrings, no inline comments, no block comments
30
+ - Code must be self-documenting through clear naming and structure
31
+ - Remove ALL existing comments when editing files (docstrings, # comments, etc.)
32
+
33
+ ### Comprehensive Typing
34
+ - **ALL function signatures** must have complete type hints for parameters and return values
35
+ - Use `from typing import Any, Optional` etc. as needed
36
+ - Prefer `list`, `dict` etc over `List` and `Dict`
37
+ - Add `from __future__ import annotations` and use modern type hints
38
+ - Import types like `import pandas as pd` when using `pd.DataFrame` in hints
39
+ - If a circular import exists, use `TYPE_CHECKING` to gate
40
+ - All `__init__` methods must have `-> None` return type
41
+ - All class methods need proper `self` typing context
42
+ - Use specific types over `Any` when possible (e.g., `pd.DataFrame` not `Any`)
43
+ - Create custom types for clarity: `type GroupKey = Tuple[Tuple[str, Any], ...]`
44
+ - Example pattern:
45
+ ```python
46
+ def method_name(self, param: str, optional_param: Optional[int] = None) -> Dict[str, Any]:
47
+ ```
48
+
49
+ ### File Structure
50
+ - **ALL imports at the very top** - no imports anywhere else in the file
51
+ - Type aliases near top after imports
52
+ - Magic values should NEVER be hardcoded throughout, all constants be semantically named at the top of the module
53
+ - No module-level docstrings - remove entirely
54
+ - Class definitions without docstrings
55
+ - Methods without docstrings but with full type hints
56
+
57
+ ### Replace Comments with Structure
58
+ - **Instead of comments** → Extract succinctly named helper functions
59
+ - **Instead of complex types** → Create descriptive type aliases
60
+ - Examples:
61
+ ```python
62
+ # BAD: Complex code with comments
63
+ def process_data(self, data):
64
+ # Convert categorical columns to numeric for ML processing
65
+ processed = data.copy()
66
+ # ... complex logic ...
67
+
68
+ # GOOD: Self-documenting through function names and types
69
+ type CategoricalColumns = List[str]
70
+ type NumericData = pd.DataFrame
71
+
72
+ def process_data(self, data: pd.DataFrame) -> NumericData:
73
+ return self._convert_categorical_to_numeric(data)
74
+
75
+ def _convert_categorical_to_numeric(self, data: pd.DataFrame) -> NumericData:
76
+ # Clear, focused function that explains itself
77
+ ```
78
+
79
+ ### Fail Fast and Loud: Asserts Not Try-Except
80
+ - **Always aim to check assumptions with asserts**
81
+ - Avoid nested try-except blocks
82
+ - Instead, identify assumptions and assert them at the top of the function
83
+
84
+ ## 🛠️ DEVELOPMENT WORKFLOW
85
+
86
+ ### When Editing Files
87
+ 1. **Read design philosophy first** - understand the core method principles
88
+ 2. **Strip ALL comments** - docstrings, inline comments, everything
89
+ 3. **Add comprehensive type hints** - every parameter, every return value
90
+ 4. **Extract helper functions** - instead of complex inline logic with comments
91
+ 5. **Import required typing modules** - add to imports as needed
92
+ 6. **Test functionality** - ensure no behavioral changes from refactoring
93
+
94
+ ### Code Quality Gates
95
+ - **Use type hints** on all functions
96
+ - **ALL imports at file top** - never mid-file, never in functions, never anywhere else
97
+ - **Use assertions, not exceptions** - single line `assert condition, "message"` instead of try-catch or raising exceptions
98
+ - **Never use try-catch blocks** - let errors bubble up; use assertions for validation
99
+ - **Show full modified functions**, not just diffs
100
+ - **Prefer explicit code** over clever code
101
+ - **Follow "Leave No Trace"** - remove all legacy patterns when making changes
102
+
103
+ ### Git Shortcuts
104
+ | Shortcut | Command | Use |
105
+ |----------|---------|-----|
106
+ | `gst` | `git status` | Check state |
107
+ | `gd_agent` | `git --no-pager diff` | See changes |
108
+ | `glo` | `git log --oneline -10` | Recent commits |
109
+ | `ga .` | `git add .` | Stage files |
110
+ | `gc -m "msg"` | `git commit -m "msg"` | Commit |
111
+
112
+ ### 📋 COMMIT STRATEGY
113
+ - **Small, semantic commits**: 20-30 lines per commit with clear purpose
114
+ - **Single line messages**: Succinct and clear, imperative mood
115
+ - **Quality gates**: Run linting/formatting before commits only when explicitly requested
116
+ - **Incremental building**: Each commit should be reviewable and complete
dr_wandb-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Danielle Rothermel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: dr-wandb
3
+ Version: 0.1.0
4
+ Summary: Interact with wandb from python
5
+ Author-email: Danielle Rothermel <danielle.rothermel@gmail.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: pandas>=2.3.2
9
+ Requires-Dist: psycopg2>=2.9.10
10
+ Requires-Dist: pyarrow>=21.0.0
11
+ Requires-Dist: pydantic-settings>=2.10.1
12
+ Requires-Dist: sqlalchemy>=2.0.43
13
+ Requires-Dist: wandb>=0.21.4
14
+ Description-Content-Type: text/markdown
15
+
16
+ # dr_wandb
17
+
18
+ A command-line utility for downloading and archiving Weights & Biases experiment data to local storage formats optimized for offline analysis. Stores to PostgreSQL db + Parquet files, supports incremental updates and selective data retrieval.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ uv add dr_wandb
24
+ ```
25
+
26
+ ### Prerequisites
27
+
28
+ - Python 3.12 or higher
29
+ - PostgreSQL database server
30
+ - Weights & Biases account with API access
31
+ - PyArrow for Parquet file operations
32
+
33
+ ### Authentication
34
+
35
+ Configure Weights & Biases authentication using one of these methods:
36
+
37
+ ```bash
38
+ wandb login
39
+ ```
40
+
41
+ Or set the API key as an environment variable:
42
+
43
+ ```bash
44
+ export WANDB_API_KEY=your_api_key_here
45
+ ```
46
+
47
+ ## Basic Usage
48
+
49
+ Download all runs from a Weights & Biases project:
50
+
51
+ ```bash
52
+ wandb-download --entity your_entity --project your_project
53
+
54
+ Options:
55
+ --entity TEXT WandB entity (username or team name)
56
+ --project TEXT WandB project name
57
+ --runs-only Download only run metadata, skip training history
58
+ --force-refresh Download all data, ignoring existing records
59
+ --db-url TEXT PostgreSQL connection string
60
+ --output-dir TEXT Directory for exported Parquet files
61
+ --help Show help message and exit
62
+ ```
63
+
64
+ The tool creates a PostgreSQL database, downloads experiment data, and exports Parquet files to the configured output directory. It tool tracks existing data and downloads only new or updated runs by default. A run is considered for update if:
65
+
66
+ - It does not exist in the local database
67
+ - Its state is "running" (indicating potential new data)
68
+
69
+ Use `--force-refresh` to download all runs regardless of existing data.
70
+
71
+ ### Environment Variables
72
+
73
+ The tool reads configuration from environment variables with the `DR_WANDB_` prefix and supports `.env` files:
74
+
75
+ | Variable | Description | Default |
76
+ |----------|-------------|---------|
77
+ | `DR_WANDB_ENTITY` | Weights & Biases entity name | None |
78
+ | `DR_WANDB_PROJECT` | Weights & Biases project name | None |
79
+ | `DR_WANDB_DATABASE_URL` | PostgreSQL connection string | `postgresql+psycopg2://localhost/wandb` |
80
+ | `DR_WANDB_OUTPUT_DIR` | Directory for exported files | `./data` |
81
+
82
+ ### Database Configuration
83
+
84
+ The PostgreSQL connection string follows the standard format:
85
+
86
+ ```
87
+ postgresql+psycopg2://username:password@host:port/database_name
88
+ ```
89
+
90
+ If the specified database does not exist, the tool will attempt to create it automatically.
91
+
92
+ ## Data Schema
93
+
94
+
95
+ The tool generates the following files in the output directory:
96
+
97
+ - `runs_metadata.parquet` - Complete run metadata including configurations, summaries, and system information
98
+ - `runs_history.parquet` - Training metrics and logged values over time
99
+ - `runs_metadata_{component}.parquet` - Component-specific files for config, summary, wandb_metadata, system_metrics, system_attrs, and sweep_info
100
+
101
+
102
+ **Run Records**
103
+ - **run_id**: Unique identifier for the experiment run
104
+ - **run_name**: Human-readable name assigned to the run
105
+ - **state**: Current state (finished, running, crashed, failed, killed)
106
+ - **project**: Project name
107
+ - **entity**: Entity name
108
+ - **created_at**: Timestamp of run creation
109
+ - **config**: Experiment configuration parameters (JSONB)
110
+ - **summary**: Final metrics and outputs (JSONB)
111
+ - **wandb_metadata**: Platform-specific metadata (JSONB)
112
+ - **system_metrics**: Hardware and system information (JSONB)
113
+ - **system_attrs**: Additional system attributes (JSONB)
114
+ - **sweep_info**: Hyperparameter sweep information (JSONB)
115
+
116
+ **Training History Records**
117
+ - **run_id**: Reference to the parent run
118
+ - **step**: Training step number
119
+ - **timestamp**: Time of metric logging
120
+ - **runtime**: Elapsed time since run start
121
+ - **wandb_metadata**: Platform logging metadata (JSONB)
122
+ - **metrics**: All logged metrics and values (JSONB, flattened in Parquet export)
123
+
@@ -0,0 +1,108 @@
1
+ # dr_wandb
2
+
3
+ A command-line utility for downloading and archiving Weights & Biases experiment data to local storage formats optimized for offline analysis. Stores to PostgreSQL db + Parquet files, supports incremental updates and selective data retrieval.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ uv add dr_wandb
9
+ ```
10
+
11
+ ### Prerequisites
12
+
13
+ - Python 3.12 or higher
14
+ - PostgreSQL database server
15
+ - Weights & Biases account with API access
16
+ - PyArrow for Parquet file operations
17
+
18
+ ### Authentication
19
+
20
+ Configure Weights & Biases authentication using one of these methods:
21
+
22
+ ```bash
23
+ wandb login
24
+ ```
25
+
26
+ Or set the API key as an environment variable:
27
+
28
+ ```bash
29
+ export WANDB_API_KEY=your_api_key_here
30
+ ```
31
+
32
+ ## Basic Usage
33
+
34
+ Download all runs from a Weights & Biases project:
35
+
36
+ ```bash
37
+ wandb-download --entity your_entity --project your_project
38
+
39
+ Options:
40
+ --entity TEXT WandB entity (username or team name)
41
+ --project TEXT WandB project name
42
+ --runs-only Download only run metadata, skip training history
43
+ --force-refresh Download all data, ignoring existing records
44
+ --db-url TEXT PostgreSQL connection string
45
+ --output-dir TEXT Directory for exported Parquet files
46
+ --help Show help message and exit
47
+ ```
48
+
49
+ The tool creates a PostgreSQL database, downloads experiment data, and exports Parquet files to the configured output directory. It tool tracks existing data and downloads only new or updated runs by default. A run is considered for update if:
50
+
51
+ - It does not exist in the local database
52
+ - Its state is "running" (indicating potential new data)
53
+
54
+ Use `--force-refresh` to download all runs regardless of existing data.
55
+
56
+ ### Environment Variables
57
+
58
+ The tool reads configuration from environment variables with the `DR_WANDB_` prefix and supports `.env` files:
59
+
60
+ | Variable | Description | Default |
61
+ |----------|-------------|---------|
62
+ | `DR_WANDB_ENTITY` | Weights & Biases entity name | None |
63
+ | `DR_WANDB_PROJECT` | Weights & Biases project name | None |
64
+ | `DR_WANDB_DATABASE_URL` | PostgreSQL connection string | `postgresql+psycopg2://localhost/wandb` |
65
+ | `DR_WANDB_OUTPUT_DIR` | Directory for exported files | `./data` |
66
+
67
+ ### Database Configuration
68
+
69
+ The PostgreSQL connection string follows the standard format:
70
+
71
+ ```
72
+ postgresql+psycopg2://username:password@host:port/database_name
73
+ ```
74
+
75
+ If the specified database does not exist, the tool will attempt to create it automatically.
76
+
77
+ ## Data Schema
78
+
79
+
80
+ The tool generates the following files in the output directory:
81
+
82
+ - `runs_metadata.parquet` - Complete run metadata including configurations, summaries, and system information
83
+ - `runs_history.parquet` - Training metrics and logged values over time
84
+ - `runs_metadata_{component}.parquet` - Component-specific files for config, summary, wandb_metadata, system_metrics, system_attrs, and sweep_info
85
+
86
+
87
+ **Run Records**
88
+ - **run_id**: Unique identifier for the experiment run
89
+ - **run_name**: Human-readable name assigned to the run
90
+ - **state**: Current state (finished, running, crashed, failed, killed)
91
+ - **project**: Project name
92
+ - **entity**: Entity name
93
+ - **created_at**: Timestamp of run creation
94
+ - **config**: Experiment configuration parameters (JSONB)
95
+ - **summary**: Final metrics and outputs (JSONB)
96
+ - **wandb_metadata**: Platform-specific metadata (JSONB)
97
+ - **system_metrics**: Hardware and system information (JSONB)
98
+ - **system_attrs**: Additional system attributes (JSONB)
99
+ - **sweep_info**: Hyperparameter sweep information (JSONB)
100
+
101
+ **Training History Records**
102
+ - **run_id**: Reference to the parent run
103
+ - **step**: Training step number
104
+ - **timestamp**: Time of metric logging
105
+ - **runtime**: Elapsed time since run start
106
+ - **wandb_metadata**: Platform logging metadata (JSONB)
107
+ - **metrics**: All logged metrics and values (JSONB, flattened in Parquet export)
108
+
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/CODING_PRINCIPLES.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/README.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/audit_synthesis_pipeline.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/design_philosophy.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/documentation_organizer_guide.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/fresh_eyes_review_guide.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/general_project_extraction_prompt.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/project_consolidation_methodology.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/reporting_guide.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/strategic_collaboration_guide.md
@@ -0,0 +1 @@
1
+ /Users/daniellerothermel/drotherm/repos/dr_ref/docs/processes/tactical_execution_guide.md