slurmkit 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. slurmkit-0.0.1/LICENSE +21 -0
  2. slurmkit-0.0.1/PKG-INFO +527 -0
  3. slurmkit-0.0.1/README.md +478 -0
  4. slurmkit-0.0.1/pyproject.toml +77 -0
  5. slurmkit-0.0.1/setup.cfg +4 -0
  6. slurmkit-0.0.1/src/slurmkit/__init__.py +43 -0
  7. slurmkit-0.0.1/src/slurmkit/_version.py +1 -0
  8. slurmkit-0.0.1/src/slurmkit/cli/__init__.py +9 -0
  9. slurmkit-0.0.1/src/slurmkit/cli/commands.py +1839 -0
  10. slurmkit-0.0.1/src/slurmkit/cli/main.py +864 -0
  11. slurmkit-0.0.1/src/slurmkit/cli/ui/__init__.py +32 -0
  12. slurmkit-0.0.1/src/slurmkit/cli/ui/backend.py +59 -0
  13. slurmkit-0.0.1/src/slurmkit/cli/ui/context.py +90 -0
  14. slurmkit-0.0.1/src/slurmkit/cli/ui/models.py +57 -0
  15. slurmkit-0.0.1/src/slurmkit/cli/ui/plain.py +97 -0
  16. slurmkit-0.0.1/src/slurmkit/cli/ui/reports.py +365 -0
  17. slurmkit-0.0.1/src/slurmkit/cli/ui/rich_backend.py +105 -0
  18. slurmkit-0.0.1/src/slurmkit/collections.py +1251 -0
  19. slurmkit-0.0.1/src/slurmkit/config.py +487 -0
  20. slurmkit-0.0.1/src/slurmkit/generate.py +773 -0
  21. slurmkit-0.0.1/src/slurmkit/notifications.py +1516 -0
  22. slurmkit-0.0.1/src/slurmkit/slurm.py +840 -0
  23. slurmkit-0.0.1/src/slurmkit/sync.py +425 -0
  24. slurmkit-0.0.1/src/slurmkit/utils/__init__.py +5 -0
  25. slurmkit-0.0.1/src/slurmkit/wandb_utils.py +425 -0
  26. slurmkit-0.0.1/src/slurmkit.egg-info/PKG-INFO +527 -0
  27. slurmkit-0.0.1/src/slurmkit.egg-info/SOURCES.txt +43 -0
  28. slurmkit-0.0.1/src/slurmkit.egg-info/dependency_links.txt +1 -0
  29. slurmkit-0.0.1/src/slurmkit.egg-info/entry_points.txt +2 -0
  30. slurmkit-0.0.1/src/slurmkit.egg-info/requires.txt +25 -0
  31. slurmkit-0.0.1/src/slurmkit.egg-info/top_level.txt +1 -0
  32. slurmkit-0.0.1/tests/test_cli_collection_analyze.py +477 -0
  33. slurmkit-0.0.1/tests/test_cli_collection_groups.py +59 -0
  34. slurmkit-0.0.1/tests/test_cli_collection_show.py +128 -0
  35. slurmkit-0.0.1/tests/test_cli_init.py +99 -0
  36. slurmkit-0.0.1/tests/test_cli_notify.py +259 -0
  37. slurmkit-0.0.1/tests/test_cli_notify_collection_final.py +493 -0
  38. slurmkit-0.0.1/tests/test_cli_resubmit.py +170 -0
  39. slurmkit-0.0.1/tests/test_cli_ui.py +154 -0
  40. slurmkit-0.0.1/tests/test_collections.py +440 -0
  41. slurmkit-0.0.1/tests/test_config.py +173 -0
  42. slurmkit-0.0.1/tests/test_generate.py +308 -0
  43. slurmkit-0.0.1/tests/test_notifications.py +830 -0
  44. slurmkit-0.0.1/tests/test_notifications_phase2.py +235 -0
  45. slurmkit-0.0.1/tests/test_slurm.py +199 -0
slurmkit-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Awni
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,527 @@
1
+ Metadata-Version: 2.4
2
+ Name: slurmkit
3
+ Version: 0.0.1
4
+ Summary: CLI tools for managing and generating SLURM jobs
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/Awni00/slurmkit
7
+ Project-URL: Repository, https://github.com/Awni00/slurmkit
8
+ Project-URL: Issues, https://github.com/Awni00/slurmkit/issues
9
+ Keywords: slurm,hpc,job-management,cluster,batch
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: POSIX :: Linux
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering
22
+ Classifier: Topic :: System :: Clustering
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: pyyaml>=6.0
28
+ Requires-Dist: jinja2>=3.0
29
+ Requires-Dist: pandas>=1.3
30
+ Requires-Dist: tabulate>=0.9
31
+ Requires-Dist: requests>=2.31
32
+ Provides-Extra: ui
33
+ Requires-Dist: rich>=13.7; extra == "ui"
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
37
+ Provides-Extra: docs
38
+ Requires-Dist: mkdocs>=1.5; extra == "docs"
39
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
40
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
41
+ Provides-Extra: all
42
+ Requires-Dist: rich>=13.7; extra == "all"
43
+ Requires-Dist: pytest>=7.0; extra == "all"
44
+ Requires-Dist: pytest-cov>=4.0; extra == "all"
45
+ Requires-Dist: mkdocs>=1.5; extra == "all"
46
+ Requires-Dist: mkdocs-material>=9.5; extra == "all"
47
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == "all"
48
+ Dynamic: license-file
49
+
50
+ ![slurmkit header](docs/assets/slurmkit-header-landscape.png)
51
+
52
+ <p align="center">
53
+ <a href="https://github.com/Awni00/slurmkit/actions/workflows/tests.yml"><img src="https://github.com/Awni00/slurmkit/actions/workflows/tests.yml/badge.svg" alt="Unit Tests"></a>
54
+ <a href="https://github.com/Awni00/slurmkit/actions/workflows/docs.yml"><img src="https://github.com/Awni00/slurmkit/actions/workflows/docs.yml/badge.svg" alt="Docs"></a>
55
+ <img src="https://img.shields.io/badge/python-3.8%2B-blue" alt="Python 3.8+">
56
+ <img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License">
57
+ </p>
58
+
59
+ <p align="center">
60
+ <a href="#installation">Install</a> •
61
+ <a href="#quick-start">Quick Start</a> •
62
+ <a href="#features">Features</a> •
63
+ <a href="https://awni00.github.io/slurmkit">Docs</a> •
64
+ <a href="https://deepwiki.com/Awni00/slurmkit">DeepWiki</a>
65
+ </p>
66
+
67
+ ---
68
+
69
+ <!-- # slurmkit -->
70
+
71
+ A CLI toolkit for managing and generating SLURM jobs.
72
+
73
+
74
+
75
+ `slurmkit` provides tools for:
76
+ - Auto-discovering and tracking SLURM job status
77
+ - Generating job scripts from templates with parameter sweeps
78
+ - Organizing jobs into trackable collections
79
+ - Cross-cluster job synchronization
80
+ - Cleaning up failed jobs and W&B runs
81
+
82
+ ## Installation
83
+
84
+ ### Install Latest From GitHub
85
+
86
+ ```bash
87
+ pip install git+https://github.com/Awni00/slurmkit.git
88
+ # include all optional extras (ui + dev + docs)
89
+ pip install "slurmkit[all] @ git+https://github.com/Awni00/slurmkit.git"
90
+ ```
91
+
92
+ ### Clone and Install (Recommended for Development)
93
+
94
+ ```bash
95
+ git clone https://github.com/Awni00/slurmkit.git
96
+ cd slurmkit
97
+ pip install -e ".[all]"
98
+ ```
99
+ <!--
100
+ ### From PyPI
101
+
102
+ ```bash
103
+ pip install slurmkit
104
+ ```
105
+ -->
106
+
107
+ ### Dependencies
108
+
109
+ **Required:**
110
+ - Python 3.8+
111
+ - PyYAML
112
+ - Jinja2
113
+ - pandas
114
+ - tabulate
115
+ - requests
116
+
117
+ **Optional:**
118
+ - wandb (for W&B cleanup features)
119
+ - rich (enhanced CLI UI; install with `pip install -e ".[ui]"` from a clone)
120
+ - `all` extra for optional groups (`ui`, `dev`, `docs`)
121
+
122
+ ## Quick Start
123
+
124
+ ### 1. Initialize Project
125
+
126
+ ```bash
127
+ cd your-project
128
+ slurmkit init
129
+ ```
130
+
131
+ This creates `.slurm-kit/config.yaml` with your settings.
132
+
133
+ ### 2. Check Job Status
134
+
135
+ ```bash
136
+ slurmkit status my_experiment
137
+ ```
138
+
139
+ ### 3. Generate Jobs from Template
140
+
141
+ Create a template `templates/train.job.j2`:
142
+
143
+ ```jinja2
144
+ #!/bin/bash
145
+ #SBATCH --job-name={{ job_name }}
146
+ #SBATCH --partition={{ slurm.partition }}
147
+ #SBATCH --time={{ slurm.time }}
148
+ #SBATCH --output={{ logs_dir }}/{{ job_name }}.%j.out
149
+
150
+ python train.py --lr {{ learning_rate }} --bs {{ batch_size }}
151
+ ```
152
+
153
+ Create a job spec `experiments/exp1/job_spec.yaml`:
154
+
155
+ ```yaml
156
+ name: exp1
157
+ template: ../../templates/train.job.j2
158
+ output_dir: job_scripts
159
+ logs_dir: logs
160
+
161
+ parameters:
162
+ mode: grid
163
+ values:
164
+ learning_rate: [0.001, 0.01, 0.1]
165
+ batch_size: [32, 64]
166
+ # Optional: exclude incompatible combinations
167
+ filter:
168
+ file: params_filter.py
169
+ function: include_params
170
+
171
+ slurm_args:
172
+ defaults:
173
+ partition: gpu
174
+ time: "24:00:00"
175
+
176
+ job_name_pattern: "lr{{ learning_rate }}_bs{{ batch_size }}"
177
+ ```
178
+
179
+ Generate jobs:
180
+
181
+ ```bash
182
+ slurmkit generate experiments/exp1/job_spec.yaml --collection exp1
183
+ ```
184
+
185
+ ### 4. Submit Jobs
186
+
187
+ ```bash
188
+ # Preview before actual submission
189
+ slurmkit submit --collection exp1 --dry-run
190
+
191
+ # Submit to SLURM
192
+ slurmkit submit --collection exp1
193
+ ```
194
+
195
+ ### 5. Monitor and Resubmit
196
+
197
+ ```bash
198
+ # Update job states
199
+ slurmkit collection update exp1
200
+
201
+ # View collection status
202
+ slurmkit collection show exp1
203
+
204
+ # View latest effective attempts with primary/history context
205
+ slurmkit collection show exp1 --show-primary --show-history
206
+
207
+ # Rich UI (if installed)
208
+ slurmkit --ui rich collection analyze exp1
209
+
210
+ # Resubmit failed jobs
211
+ slurmkit resubmit --collection exp1 --filter failed
212
+
213
+ # Group-aware retry
214
+ slurmkit resubmit --collection exp1 --filter failed --submission-group retry_after_fix
215
+ ```
216
+
217
+ ## Testing and Showcase Workflows
218
+
219
+ ### A) Local Demo (No SLURM Required)
220
+
221
+ Use the bundled demo project for a deterministic feature showcase:
222
+
223
+ ```bash
224
+ cd examples/demo_project
225
+ python -m venv .venv
226
+ source .venv/bin/activate
227
+ pip install -e ../..
228
+ ./setup_dummy_jobs.py --include-non-terminal
229
+ ```
230
+
231
+ Then run:
232
+
233
+ ```bash
234
+ slurmkit collection list
235
+ slurmkit collection show demo_terminal_failed
236
+ slurmkit collection analyze demo_terminal_failed
237
+ # Optional richer formatting (requires rich extra):
238
+ slurmkit --ui rich collection analyze demo_terminal_failed
239
+ slurmkit notify test --dry-run
240
+ slurmkit notify collection-final --collection demo_terminal_failed --job-id 990002 --no-refresh --dry-run
241
+ ```
242
+
243
+ ### B) Real Cluster Workflow
244
+
245
+ ```bash
246
+ slurmkit generate experiments/exp1/job_spec.yaml --collection exp1
247
+ slurmkit submit --collection exp1 --dry-run
248
+ slurmkit submit --collection exp1
249
+ slurmkit status exp1
250
+ slurmkit collection update exp1
251
+ slurmkit collection show exp1
252
+ slurmkit collection analyze exp1 --attempt-mode latest
253
+ slurmkit collection groups exp1
254
+ slurmkit resubmit --collection exp1 --filter failed --dry-run
255
+ ```
256
+
257
+ ### C) Feature Checklist
258
+
259
+ | Goal | Command | Success signal |
260
+ |------|---------|----------------|
261
+ | Initialize config | `slurmkit init` | `.slurm-kit/config.yaml` created |
262
+ | Generate scripts | `slurmkit generate ... --collection exp1` | Job scripts written and collection updated |
263
+ | Preview submission | `slurmkit submit --collection exp1 --dry-run` | Candidate jobs listed with no submit |
264
+ | Inspect collection | `slurmkit collection show exp1` | Summary + jobs table rendered |
265
+ | Analyze outcomes | `slurmkit collection analyze exp1` | Parameter tables and risky/stable sections shown |
266
+ | Validate notifications | `slurmkit notify test --dry-run` | Route resolution and payload preview |
267
+
268
+ ## Commands
269
+
270
+ | Command | Description |
271
+ |---------|-------------|
272
+ | `slurmkit init` | Initialize project configuration |
273
+ | `slurmkit status <exp>` | Show job status for experiment |
274
+ | `slurmkit find <job_id>` | Find output file for job ID |
275
+ | `slurmkit generate <spec>` | Generate job scripts from template |
276
+ | `slurmkit submit` | Submit job scripts |
277
+ | `slurmkit resubmit` | Resubmit failed jobs |
278
+ | `slurmkit notify` | Send job lifecycle notifications |
279
+ | `slurmkit collection` | Manage job collections |
280
+ | `slurmkit clean outputs` | Clean failed job outputs |
281
+ | `slurmkit clean wandb` | Clean failed W&B runs |
282
+ | `slurmkit sync` | Sync job states for cross-cluster |
283
+
284
+ Run `slurmkit <command> --help` for detailed usage.
285
+
286
+ ## Configuration
287
+
288
+ Configuration is stored in `.slurm-kit/config.yaml`:
289
+
290
+ ```yaml
291
+ jobs_dir: jobs/
292
+ collections_dir: .job-collections/
293
+ sync_dir: .slurm-kit/sync/
294
+
295
+ output_patterns:
296
+ - "{job_name}.{job_id}.out"
297
+ - "{job_name}.{job_id}.*.out"
298
+ - "slurm-{job_id}.out"
299
+
300
+ slurm_defaults:
301
+ partition: gpu
302
+ time: "24:00:00"
303
+ mem: "32G"
304
+
305
+ job_structure:
306
+ scripts_subdir: job_scripts/
307
+ logs_subdir: logs/
308
+
309
+ ui:
310
+ mode: plain # plain | rich | auto
311
+
312
+ notifications:
313
+ defaults:
314
+ events: [job_failed]
315
+ timeout_seconds: 5
316
+ max_attempts: 3
317
+ backoff_seconds: 0.5
318
+ output_tail_lines: 40
319
+ collection_final:
320
+ attempt_mode: latest
321
+ min_support: 3
322
+ top_k: 10
323
+ include_failed_output_tail_lines: 20
324
+ ai:
325
+ enabled: false
326
+ callback: null
327
+ routes:
328
+ - name: team_slack
329
+ type: slack
330
+ url: "${SLACK_WEBHOOK_URL}"
331
+ events: [job_failed, collection_failed]
332
+ - name: team_email
333
+ type: email
334
+ to: ["ops@example.com", "ml@example.com"]
335
+ from: "${SLURMKIT_EMAIL_FROM}"
336
+ smtp_host: "${SMTP_HOST}"
337
+ smtp_port: 587
338
+ smtp_username: "${SMTP_USER}"
339
+ smtp_password: "${SMTP_PASSWORD}"
340
+ smtp_starttls: true
341
+ smtp_ssl: false
342
+ events: [job_failed, collection_failed]
343
+ ```
344
+
345
+ ### Environment Variables
346
+
347
+ | Variable | Description |
348
+ |----------|-------------|
349
+ | `SLURMKIT_CONFIG` | Path to config file |
350
+ | `SLURMKIT_JOBS_DIR` | Jobs directory |
351
+ | `SLURMKIT_COLLECTIONS_DIR` | Collections directory |
352
+ | `SLURMKIT_WANDB_ENTITY` | W&B entity |
353
+ | `SLURMKIT_DRY_RUN` | Enable dry-run mode |
354
+
355
+ ## Documentation
356
+
357
+ Full documentation is available at [https://awni00.github.io/slurmkit/](https://awni00.github.io/slurmkit/)
358
+
359
+ - [Getting Started](docs/getting-started.md)
360
+ - [Configuration](docs/configuration.md)
361
+ - [Job Generation](docs/job-generation.md)
362
+ - [Collections](docs/collections.md)
363
+ - [Notifications](docs/notifications.md)
364
+ - [Cross-Cluster Sync](docs/sync.md)
365
+ - [CLI Reference](docs/cli-reference.md)
366
+
367
+ ## Project Structure
368
+
369
+ ```
370
+ your-project/
371
+ ├── .slurm-kit/
372
+ │ ├── config.yaml # Project configuration
373
+ │ └── sync/ # Cross-cluster sync files
374
+ ├── .job-collections/ # Collection YAML files
375
+ ├── jobs/
376
+ │ └── experiment1/
377
+ │ ├── job_scripts/ # Generated job scripts
378
+ │ └── logs/ # Job output files
379
+ └── templates/ # Jinja2 job templates
380
+ ```
381
+
382
+ ## Features
383
+
384
+ Key features at a glance:
385
+
386
+ **1) Job Creation**
387
+
388
+ - Generate parameterized job scripts and attach them to a collection: `slurmkit generate job_spec.yaml --collection exp1`
389
+ - Preview generation and submission safely: `slurmkit generate ... --dry-run`, `slurmkit submit ... --dry-run`
390
+ - Submit only unsubmitted collection jobs (default): `slurmkit submit --collection exp1 --filter unsubmitted`
391
+
392
+ **2) Collection Tracking and Analysis**
393
+
394
+ - Create, inspect, and refresh collections: `slurmkit collection create exp1`, `slurmkit collection show exp1`, `slurmkit collection update exp1`
395
+ - Analyze outcomes by parameter values and latest attempts: `slurmkit collection analyze exp1 --attempt-mode latest --top-k 10`
396
+ - Inspect resubmission waves and attempt history: `slurmkit collection groups exp1`, `slurmkit collection show exp1 --show-history`
397
+ - Resubmit failed jobs with optional selection and parameter callbacks to programatically specify which jobs are submitted and whether to include additional parameters in resubmission (e.g., checkpoint dir): `slurmkit resubmit --collection exp1 --filter failed --select-file callbacks.py --extra-params-file extra.py`
398
+
399
+ **3) Notifications and Cross-Cluster Sync**
400
+
401
+ - Validate routes and send job notifications: `slurmkit notify test`, `slurmkit notify job ...`
402
+ - Send one final collection-level summary when a collection reaches terminal state: `slurmkit notify collection-final ...`
403
+ - Sync collection/job state across clusters via git-backed files: `slurmkit sync --push`
404
+
405
+ ### Job Collections
406
+
407
+ Track related jobs together:
408
+
409
+ ```bash
410
+ # Create collection
411
+ slurmkit collection create my_exp --description "Training sweep"
412
+
413
+ # List collections
414
+ slurmkit collection list
415
+
416
+ # Show details
417
+ slurmkit collection show my_exp --state failed
418
+ slurmkit collection show my_exp --attempt-mode latest --show-primary
419
+
420
+ # Update states from SLURM
421
+ slurmkit collection update my_exp
422
+
423
+ # Submission-group summary
424
+ slurmkit collection groups my_exp
425
+ ```
426
+
427
+ ### Notifications
428
+
429
+ Send job lifecycle notifications to Slack, Discord, email, or generic webhooks:
430
+
431
+ ```bash
432
+ # Validate route setup
433
+ slurmkit notify test
434
+ slurmkit notify test --route team_email --dry-run
435
+
436
+ # Typical end-of-job call from script (default: notify only on failure)
437
+ slurmkit notify job --job-id "$SLURM_JOB_ID" --exit-code "$rc"
438
+
439
+ # Collection-final summary notification (emits only when collection is terminal)
440
+ slurmkit notify collection-final --job-id "$SLURM_JOB_ID"
441
+ ```
442
+
443
+ Recommended trap snippet inside a job script:
444
+
445
+ ```bash
446
+ rc=$?
447
+ slurmkit notify job --job-id "${SLURM_JOB_ID}" --exit-code "${rc}"
448
+ slurmkit notify collection-final --job-id "${SLURM_JOB_ID}"
449
+ exit "${rc}"
450
+ ```
451
+
452
+ ### Parameter Sweeps
453
+
454
+ Generate jobs from parameter grids:
455
+
456
+ ```yaml
457
+ parameters:
458
+ mode: grid
459
+ values:
460
+ learning_rate: [0.001, 0.01, 0.1]
461
+ batch_size: [32, 64, 128]
462
+ model: [resnet18, resnet50]
463
+ ```
464
+
465
+ Or explicit lists:
466
+
467
+ ```yaml
468
+ parameters:
469
+ mode: list
470
+ values:
471
+ - {lr: 0.001, bs: 32}
472
+ - {lr: 0.01, bs: 64}
473
+ ```
474
+
475
+ ### Dynamic SLURM Arguments
476
+
477
+ Use Python functions for complex resource logic:
478
+
479
+ ```python
480
+ # slurm_logic.py
481
+ def get_slurm_args(params, defaults):
482
+ args = defaults.copy()
483
+ if params.get('model') == 'resnet50':
484
+ args['mem'] = '64G'
485
+ args['gpus'] = 2
486
+ return args
487
+ ```
488
+
489
+ ### Cross-Cluster Sync
490
+
491
+ Share job status across clusters via git:
492
+
493
+ ```bash
494
+ # On cluster A
495
+ slurmkit sync --push
496
+
497
+ # On cluster B
498
+ git pull
499
+ slurmkit collection show my_exp
500
+ ```
501
+
502
+ ## Development
503
+
504
+ ### Setup
505
+
506
+ We recommend using [uv](https://github.com/astral-sh/uv) to manage the development environment.
507
+
508
+ ```bash
509
+ # Clone the repository
510
+ git clone https://github.com/Awni00/slurmkit.git
511
+ cd slurmkit
512
+
513
+ # Create a virtual environment and install dependencies in editable mode
514
+ uv venv
515
+ source .venv/bin/activate
516
+ uv pip install -e ".[dev]"
517
+ ```
518
+
519
+ ### Running Tests
520
+
521
+ ```bash
522
+ pytest
523
+ ```
524
+
525
+ ## License
526
+
527
+ MIT License - see [LICENSE](LICENSE) for details.