ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +17 -3
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +27 -3
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
|
@@ -1,64 +1,31 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
import os, time
|
|
4
|
-
from metaflow.decorators import StepDecorator
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
5
3
|
from metaflow import current
|
|
6
|
-
|
|
4
|
+
from .utils import get_storage_path, NIM_MONITOR_LOCAL_STORAGE_ROOT
|
|
7
5
|
from .nim_manager import NimManager
|
|
6
|
+
from metaflow.decorators import StepDecorator
|
|
8
7
|
from .card import NimMetricsRefresher
|
|
9
|
-
from .utilities import get_storage_path, NIM_MONITOR_LOCAL_STORAGE_ROOT
|
|
10
|
-
from ..card_utilities.async_cards import AsyncPeriodicRefresher
|
|
11
8
|
from ..card_utilities.injector import CardDecoratorInjector
|
|
9
|
+
from ..card_utilities.async_cards import AsyncPeriodicRefresher
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
15
|
-
"""
|
|
16
|
-
This decorator is used to run NIM containers in Metaflow tasks as sidecars.
|
|
17
|
-
|
|
18
|
-
User code call
|
|
19
|
-
-----------
|
|
20
|
-
@nim(
|
|
21
|
-
models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
|
|
22
|
-
backend='managed'
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
Valid backend options
|
|
26
|
-
---------------------
|
|
27
|
-
- 'managed': Outerbounds selects a compute provider based on the model.
|
|
28
|
-
|
|
29
|
-
Valid model options
|
|
30
|
-
----------------
|
|
31
|
-
- 'meta/llama3-8b-instruct': 8B parameter model
|
|
32
|
-
- 'meta/llama3-70b-instruct': 70B parameter model
|
|
33
|
-
- any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
|
|
34
|
-
|
|
35
|
-
Parameters
|
|
36
|
-
----------
|
|
37
|
-
models: list[NIM]
|
|
38
|
-
List of NIM containers running models in sidecars.
|
|
39
|
-
backend: str
|
|
40
|
-
Compute provider to run the NIM container.
|
|
41
|
-
queue_timeout : int
|
|
42
|
-
Time to keep the job in NVCF's queue.
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
13
|
name = "nim"
|
|
14
|
+
|
|
46
15
|
defaults = {
|
|
47
16
|
"models": [],
|
|
48
|
-
"backend": "managed",
|
|
49
17
|
"monitor": True,
|
|
50
18
|
"persist_db": False,
|
|
51
|
-
"queue_timeout": 5 * 24 * 3600, # Default 5 days in seconds
|
|
52
19
|
}
|
|
53
20
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
21
|
+
# Refer https://github.com/Netflix/metaflow/blob/master/docs/lifecycle.png
|
|
22
|
+
# to understand where these functions are invoked in the lifecycle of a
|
|
23
|
+
# Metaflow flow.
|
|
24
|
+
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
58
25
|
if self.attributes["monitor"]:
|
|
59
26
|
self.attach_card_decorator(
|
|
60
27
|
flow,
|
|
61
|
-
|
|
28
|
+
step,
|
|
62
29
|
NimMetricsRefresher.CARD_ID,
|
|
63
30
|
"blank",
|
|
64
31
|
refresh_interval=4.0,
|
|
@@ -68,11 +35,9 @@ class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
68
35
|
{
|
|
69
36
|
"nim": NimManager(
|
|
70
37
|
models=self.attributes["models"],
|
|
71
|
-
backend=self.attributes["backend"],
|
|
72
38
|
flow=flow,
|
|
73
|
-
step_name=
|
|
39
|
+
step_name=step,
|
|
74
40
|
monitor=self.attributes["monitor"],
|
|
75
|
-
queue_timeout=self.attributes["queue_timeout"],
|
|
76
41
|
)
|
|
77
42
|
}
|
|
78
43
|
)
|
|
@@ -81,15 +46,14 @@ class NimDecorator(StepDecorator, CardDecoratorInjector):
|
|
|
81
46
|
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
|
|
82
47
|
):
|
|
83
48
|
if self.attributes["monitor"]:
|
|
84
|
-
|
|
85
49
|
import sqlite3
|
|
86
|
-
from metaflow import current
|
|
87
50
|
|
|
88
51
|
file_path = get_storage_path(current.task_id)
|
|
89
52
|
if os.path.exists(file_path):
|
|
90
53
|
os.remove(file_path)
|
|
91
54
|
os.makedirs(NIM_MONITOR_LOCAL_STORAGE_ROOT, exist_ok=True)
|
|
92
55
|
conn = sqlite3.connect(file_path)
|
|
56
|
+
|
|
93
57
|
cursor = conn.cursor()
|
|
94
58
|
cursor.execute(
|
|
95
59
|
"""
|