freesolo-flash-dev 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flash/__init__.py +29 -0
- flash/_channel.py +23 -0
- flash/_fileio.py +35 -0
- flash/_logging.py +49 -0
- flash/_update_check.py +266 -0
- flash/catalog.py +253 -0
- flash/cli/__init__.py +1 -0
- flash/cli/main/__init__.py +227 -0
- flash/cli/main/__main__.py +6 -0
- flash/cli/main/commands.py +636 -0
- flash/cli/main/envpush.py +317 -0
- flash/cli/main/render.py +599 -0
- flash/cli/main/training_doc.py +455 -0
- flash/client/__init__.py +14 -0
- flash/client/config.py +70 -0
- flash/client/http.py +372 -0
- flash/client/runtime_secrets.py +69 -0
- flash/client/specs.py +20 -0
- flash/cost/__init__.py +16 -0
- flash/cost/analytical.py +175 -0
- flash/cost/facts.py +114 -0
- flash/cost/spec.py +113 -0
- flash/cost/types.py +158 -0
- flash/engine/__init__.py +6 -0
- flash/engine/accounting.py +36 -0
- flash/engine/chalk_kernels.py +116 -0
- flash/engine/multiturn_rollout.py +780 -0
- flash/engine/recipe.py +86 -0
- flash/engine/vram.py +603 -0
- flash/engine/worker/__init__.py +2916 -0
- flash/engine/worker/__main__.py +4 -0
- flash/engine/worker/kernel_warmup.py +400 -0
- flash/engine/worker/lora.py +796 -0
- flash/engine/worker/packing.py +366 -0
- flash/engine/worker/perf.py +1048 -0
- flash/envs/__init__.py +10 -0
- flash/envs/adapter/__init__.py +883 -0
- flash/envs/adapter/rubric.py +222 -0
- flash/envs/base.py +52 -0
- flash/envs/registry.py +62 -0
- flash/mcp/__init__.py +1 -0
- flash/mcp/server.py +85 -0
- flash/providers/__init__.py +59 -0
- flash/providers/_auth.py +24 -0
- flash/providers/_http.py +230 -0
- flash/providers/_instance.py +416 -0
- flash/providers/_instance_bootstrap.py +517 -0
- flash/providers/_poll.py +311 -0
- flash/providers/allocator.py +193 -0
- flash/providers/base.py +431 -0
- flash/providers/hyperstack/__init__.py +127 -0
- flash/providers/hyperstack/api.py +522 -0
- flash/providers/hyperstack/auth.py +17 -0
- flash/providers/hyperstack/gpus.py +29 -0
- flash/providers/hyperstack/jobs/__init__.py +632 -0
- flash/providers/hyperstack/jobs/builders.py +122 -0
- flash/providers/hyperstack/preflight.py +23 -0
- flash/providers/hyperstack/pricing.py +26 -0
- flash/providers/hyperstack/train.py +25 -0
- flash/providers/lambdalabs/__init__.py +139 -0
- flash/providers/lambdalabs/api.py +261 -0
- flash/providers/lambdalabs/auth.py +18 -0
- flash/providers/lambdalabs/gpus.py +29 -0
- flash/providers/lambdalabs/jobs/__init__.py +724 -0
- flash/providers/lambdalabs/jobs/builders.py +118 -0
- flash/providers/lambdalabs/preflight.py +27 -0
- flash/providers/lambdalabs/pricing.py +51 -0
- flash/providers/lambdalabs/train.py +27 -0
- flash/providers/preflight.py +55 -0
- flash/providers/realized.py +80 -0
- flash/providers/runpod/__init__.py +130 -0
- flash/providers/runpod/api.py +186 -0
- flash/providers/runpod/auth.py +37 -0
- flash/providers/runpod/cost.py +57 -0
- flash/providers/runpod/gpus.py +46 -0
- flash/providers/runpod/jobs.py +956 -0
- flash/providers/runpod/keys.py +139 -0
- flash/providers/runpod/preflight.py +30 -0
- flash/providers/runpod/preload.py +915 -0
- flash/providers/runpod/pricing.py +18 -0
- flash/providers/runpod/slots.py +79 -0
- flash/providers/runpod/train/__init__.py +150 -0
- flash/providers/runpod/train/deps.py +395 -0
- flash/providers/runpod/train/endpoints.py +820 -0
- flash/py.typed +0 -0
- flash/runner/__init__.py +686 -0
- flash/runner/checkpoints.py +82 -0
- flash/runner/deploy.py +422 -0
- flash/runner/lifecycle.py +672 -0
- flash/schema/__init__.py +375 -0
- flash/schema/fields.py +331 -0
- flash/serve/__init__.py +1 -0
- flash/serve/deploy.py +326 -0
- flash/serve/pricing.py +60 -0
- flash/server/__init__.py +1 -0
- flash/server/__main__.py +20 -0
- flash/server/app.py +961 -0
- flash/server/auth.py +263 -0
- flash/server/billing.py +124 -0
- flash/server/checkpoints.py +110 -0
- flash/server/db.py +160 -0
- flash/server/environment_registry.py +102 -0
- flash/server/envs.py +360 -0
- flash/server/reconcile.py +163 -0
- flash/server/run_registry.py +150 -0
- flash/spec.py +333 -0
- freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
- freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
- freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
- freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
- freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""RunPod credential handling for the managed Flash backend (operator-side).
|
|
2
|
+
|
|
3
|
+
The Flash SDK authenticates via the ``RUNPOD_API_KEY`` environment variable, set by
|
|
4
|
+
the **operator** on the control-plane host. End users never
|
|
5
|
+
provide provider credentials — they authenticate to the control plane with an Flash
|
|
6
|
+
key. Deliberately env-only: ``~/.flash/config.json`` holds the *Flash* key, which
|
|
7
|
+
must never be mistaken for a RunPod key.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .._auth import load_provider_key
|
|
13
|
+
from . import keys as _keys
|
|
14
|
+
|
|
15
|
+
_ENV_VAR = "RUNPOD_API_KEY"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_api_key() -> str | None:
|
|
19
|
+
"""API key from the environment (operator configuration)."""
|
|
20
|
+
return load_provider_key(_ENV_VAR)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def ensure_auth() -> str:
|
|
24
|
+
"""Select the active account and collapse ``RUNPOD_API_KEY`` to that single key.
|
|
25
|
+
|
|
26
|
+
``RUNPOD_API_KEY`` may be a comma-separated pool of per-account keys (see
|
|
27
|
+
``runpod.keys``). The runpod_flash SDK reads the raw env var, so a multi-key value
|
|
28
|
+
would be sent as one bearer token (a 401); ``select_active`` collapses it to the
|
|
29
|
+
single active key while the cached pool keeps the rest for failover. Raises if no
|
|
30
|
+
key is configured.
|
|
31
|
+
"""
|
|
32
|
+
key = _keys.select_active()
|
|
33
|
+
if not key:
|
|
34
|
+
raise RuntimeError(
|
|
35
|
+
"no RunPod API key found; set RUNPOD_API_KEY on the control-plane host"
|
|
36
|
+
)
|
|
37
|
+
return key
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Shape RunPod realized billing rows into a RealizedCost. Pure shaping (offline-testable);
|
|
2
|
+
the HTTP call is isolated in ``api.billing_endpoints``."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from flash.providers.realized import RealizedCost
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get(row: Any, key: str) -> Any:
|
|
13
|
+
if isinstance(row, dict):
|
|
14
|
+
return row.get(key)
|
|
15
|
+
return getattr(row, key, None)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _iso(ts: float) -> str:
|
|
19
|
+
return datetime.fromtimestamp(float(ts), UTC).isoformat()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def shape_endpoint_cost(rows: list[Any], *, endpoint_id: str | None) -> RealizedCost:
|
|
23
|
+
"""Sum realized USD across the billing rows for ``endpoint_id``.
|
|
24
|
+
|
|
25
|
+
Each row is {endpointId, amount (USD), timeBilledMs, ...}. When ``endpoint_id`` is given we
|
|
26
|
+
keep only its rows (the response may be grouped/unfiltered); otherwise we sum everything
|
|
27
|
+
returned. RunPod's ``amount`` already includes disk, so it's reported as a single resource.
|
|
28
|
+
"""
|
|
29
|
+
total = 0.0
|
|
30
|
+
billed_ms = 0
|
|
31
|
+
for row in rows:
|
|
32
|
+
if endpoint_id is not None:
|
|
33
|
+
row_eid = _get(row, "endpointId") or _get(row, "endpoint_id")
|
|
34
|
+
if row_eid is not None and str(row_eid) != str(endpoint_id):
|
|
35
|
+
continue
|
|
36
|
+
total += float(_get(row, "amount") or 0)
|
|
37
|
+
billed_ms += int(_get(row, "timeBilledMs") or 0)
|
|
38
|
+
total = round(total, 6)
|
|
39
|
+
return RealizedCost(
|
|
40
|
+
provider="runpod",
|
|
41
|
+
realized_usd=total,
|
|
42
|
+
by_resource={"gpu": total},
|
|
43
|
+
wall_seconds=(billed_ms / 1000.0) if billed_ms else None,
|
|
44
|
+
source={"endpoint_id": endpoint_id},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def realized_cost(endpoint_id: str | None, *, start: float, end: float) -> RealizedCost | None:
|
|
49
|
+
"""Pull + shape this run's realized RunPod cost; None when there's no endpoint to query."""
|
|
50
|
+
if not endpoint_id:
|
|
51
|
+
return None
|
|
52
|
+
from flash.providers.runpod import api
|
|
53
|
+
|
|
54
|
+
rows = api.billing_endpoints(
|
|
55
|
+
start_time=_iso(start), end_time=_iso(end), endpoint_id=endpoint_id
|
|
56
|
+
)
|
|
57
|
+
return shape_endpoint_cost(rows, endpoint_id=endpoint_id)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""RunPod's GPU classes + the Flash-specific bits of the shared GPU table.
|
|
2
|
+
|
|
3
|
+
The class table itself is provider-agnostic and lives in ``providers/base.py`` (one
|
|
4
|
+
canonical row per friendly name). This module carves out RunPod's rows
|
|
5
|
+
(``gpu_classes()`` == every class with a Flash ``enum_member``) and owns the
|
|
6
|
+
RunPod-only translation: friendly name -> Flash ``GpuType``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from flash.providers.base import (
|
|
12
|
+
GpuClass,
|
|
13
|
+
UnsupportedGpuError,
|
|
14
|
+
get_gpu_info,
|
|
15
|
+
providers_for,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Lazy import so unit tests that only exercise the mapping don't pull the whole SDK
|
|
20
|
+
# graph unless needed. ``runpod_flash`` is a hard dependency, so this import is safe.
|
|
21
|
+
def _gpu_enum():
|
|
22
|
+
from runpod_flash import GpuType
|
|
23
|
+
|
|
24
|
+
return GpuType
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def gpu_classes() -> list[GpuClass]:
|
|
28
|
+
"""The GPU classes RunPod Flash can provision (those with a ``GpuType`` member)."""
|
|
29
|
+
from flash.providers.base import GPU_INFO
|
|
30
|
+
|
|
31
|
+
return [g for g in GPU_INFO.values() if g.enum_member]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def flash_gpu(name: str):
|
|
35
|
+
"""Return the RunPod Flash ``GpuType`` for a friendly GPU name."""
|
|
36
|
+
info = get_gpu_info(name)
|
|
37
|
+
if not info.enum_member:
|
|
38
|
+
raise UnsupportedGpuError(
|
|
39
|
+
f"{info.name} is not available on RunPod (providers: {', '.join(providers_for(name))})"
|
|
40
|
+
)
|
|
41
|
+
return getattr(_gpu_enum(), info.enum_member)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def gpu_api_id(name: str) -> str:
|
|
45
|
+
"""RunPod API GPU id (the ``GpuType`` enum value, e.g. 'NVIDIA GeForce RTX 4090')."""
|
|
46
|
+
return flash_gpu(name).value
|