arbor-ai 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arbor/cli.py +82 -1
- arbor/server/services/inference/vllm_serve.py +2 -2
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/METADATA +10 -2
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/RECORD +8 -8
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/WHEEL +0 -0
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/entry_points.txt +0 -0
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {arbor_ai-0.2.2.dist-info → arbor_ai-0.2.4.dist-info}/top_level.txt +0 -0
arbor/cli.py
CHANGED
@@ -4,6 +4,7 @@ from datetime import datetime
|
|
4
4
|
import click
|
5
5
|
import uvicorn
|
6
6
|
|
7
|
+
from arbor.client.arbor_client import create_app
|
7
8
|
from arbor.server.core.config import Config
|
8
9
|
from arbor.server.core.config_manager import ConfigManager
|
9
10
|
from arbor.server.main import app
|
@@ -13,13 +14,92 @@ from arbor.server.services.health_manager import HealthManager
|
|
13
14
|
from arbor.server.services.inference_manager import InferenceManager
|
14
15
|
from arbor.server.services.job_manager import JobManager
|
15
16
|
from arbor.server.services.training_manager import TrainingManager
|
16
|
-
from arbor.
|
17
|
+
from arbor.server.utils.logging import (
|
18
|
+
get_logger,
|
19
|
+
log_configuration,
|
20
|
+
log_system_info,
|
21
|
+
setup_logging,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def make_log_dir(storage_path: str):
|
26
|
+
# Create a timestamped log directory under the storage path
|
27
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
28
|
+
log_dir = os.path.join(storage_path, "logs", timestamp)
|
29
|
+
os.makedirs(log_dir, exist_ok=True)
|
30
|
+
return log_dir
|
17
31
|
|
18
32
|
|
19
33
|
@click.group()
|
20
34
|
def cli():
|
21
35
|
pass
|
22
36
|
|
37
|
+
|
38
|
+
def create_app(arbor_config_path: str):
|
39
|
+
"""Create and configure the Arbor API application
|
40
|
+
|
41
|
+
Args:
|
42
|
+
arbor_config_path (str): Path to config file
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
FastAPI: Configured FastAPI application
|
46
|
+
"""
|
47
|
+
# Create new settings instance with overrides
|
48
|
+
config = Config.load_config_from_yaml(arbor_config_path)
|
49
|
+
log_dir = make_log_dir(config.STORAGE_PATH)
|
50
|
+
app.state.log_dir = log_dir
|
51
|
+
|
52
|
+
# Setup logging
|
53
|
+
logging_config = setup_logging(
|
54
|
+
log_level="INFO",
|
55
|
+
log_dir=log_dir,
|
56
|
+
enable_file_logging=True,
|
57
|
+
enable_console_logging=True,
|
58
|
+
)
|
59
|
+
|
60
|
+
# Log configuration and system info
|
61
|
+
log_configuration(logging_config)
|
62
|
+
|
63
|
+
# Get logger for this module
|
64
|
+
logger = get_logger(__name__)
|
65
|
+
logger.info("Initializing Arbor application...")
|
66
|
+
|
67
|
+
# Log system information via health manager
|
68
|
+
health_manager = HealthManager(config=config)
|
69
|
+
try:
|
70
|
+
versions = config.get_system_versions()
|
71
|
+
logger.info("System versions:")
|
72
|
+
for category, version_info in versions.items():
|
73
|
+
if isinstance(version_info, dict):
|
74
|
+
logger.info(f" {category}:")
|
75
|
+
for lib, version in version_info.items():
|
76
|
+
logger.info(f" {lib}: {version}")
|
77
|
+
else:
|
78
|
+
logger.info(f" {category}: {version_info}")
|
79
|
+
except Exception as e:
|
80
|
+
logger.warning(f"Could not log system versions: {e}")
|
81
|
+
|
82
|
+
# Initialize services with settings
|
83
|
+
logger.info("Initializing services...")
|
84
|
+
file_manager = FileManager(config=config)
|
85
|
+
job_manager = JobManager(config=config)
|
86
|
+
training_manager = TrainingManager(config=config)
|
87
|
+
inference_manager = InferenceManager(config=config)
|
88
|
+
grpo_manager = GRPOManager(config=config)
|
89
|
+
|
90
|
+
# Inject settings into app state
|
91
|
+
app.state.config = config
|
92
|
+
app.state.file_manager = file_manager
|
93
|
+
app.state.job_manager = job_manager
|
94
|
+
app.state.training_manager = training_manager
|
95
|
+
app.state.inference_manager = inference_manager
|
96
|
+
app.state.grpo_manager = grpo_manager
|
97
|
+
app.state.health_manager = health_manager
|
98
|
+
|
99
|
+
logger.info("Arbor application initialized successfully")
|
100
|
+
return app
|
101
|
+
|
102
|
+
|
23
103
|
def start_server(host="0.0.0.0", port=7453, storage_path="./storage", timeout=10):
|
24
104
|
"""Start the Arbor API server with a single function call"""
|
25
105
|
import socket
|
@@ -93,6 +173,7 @@ def serve(host, port, arbor_config):
|
|
93
173
|
# configure_uvicorn_logging()
|
94
174
|
uvicorn.run(app, host=host, port=port)
|
95
175
|
except Exception as e:
|
176
|
+
|
96
177
|
click.echo(f"Failed to start server: {e}", err=True)
|
97
178
|
raise click.Abort()
|
98
179
|
|
@@ -352,7 +352,7 @@ class ScriptArguments:
|
|
352
352
|
enable_prefix_caching (`bool` or `None`, *optional*, defaults to `None`):
|
353
353
|
Whether to enable prefix caching in vLLM. If set to `True`, ensure that the model and the hardware support
|
354
354
|
this feature.
|
355
|
-
enforce_eager (`bool` or `None`, *optional*, defaults to `
|
355
|
+
enforce_eager (`bool` or `None`, *optional*, defaults to `False`):
|
356
356
|
Whether to enforce eager execution. If set to `True`, we will disable CUDA graph and always execute the
|
357
357
|
model in eager mode. If `False` (default behavior), we will use CUDA graph and eager execution in hybrid.
|
358
358
|
kv_cache_dtype (`str`, *optional*, defaults to `"auto"`):
|
@@ -423,7 +423,7 @@ class ScriptArguments:
|
|
423
423
|
},
|
424
424
|
)
|
425
425
|
enforce_eager: Optional[bool] = field(
|
426
|
-
default=
|
426
|
+
default=False,
|
427
427
|
metadata={
|
428
428
|
"help": "Whether to enforce eager execution. If set to `True`, we will disable CUDA graph and always "
|
429
429
|
"execute the model in eager mode. If `False` (default behavior), we will use CUDA graph and eager "
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: arbor-ai
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: A framework for fine-tuning and managing language models
|
5
5
|
Author-email: Noah Ziems <nziems2@nd.edu>
|
6
6
|
Project-URL: Homepage, https://github.com/Ziems/arbor
|
7
7
|
Project-URL: Issues, https://github.com/Ziems/arbor/issues
|
8
|
-
Requires-Python: >=3.
|
8
|
+
Requires-Python: >=3.11
|
9
9
|
Description-Content-Type: text/markdown
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: torch>=2.6.0
|
@@ -79,6 +79,14 @@ export NCCL_P2P_DISABLE=1
|
|
79
79
|
export NCCL_IB_DISABLE=1
|
80
80
|
```
|
81
81
|
|
82
|
+
**NVCC**
|
83
|
+
If you run into issues, double check that you have [nvcc](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/) installed:
|
84
|
+
```bash
|
85
|
+
nvcc --version
|
86
|
+
```
|
87
|
+
If you don't have admin permissions, you can often install nvcc using conda.
|
88
|
+
|
89
|
+
|
82
90
|
## 🙏 Acknowledgements
|
83
91
|
|
84
92
|
Arbor builds on the shoulders of great work. We extend our thanks to:
|
@@ -1,5 +1,5 @@
|
|
1
1
|
arbor/__init__.py,sha256=DKqMupRY_1oOt7WfwU_6g4FVbTBJjU83p7yEGC-JAhs,458
|
2
|
-
arbor/cli.py,sha256=
|
2
|
+
arbor/cli.py,sha256=wTyAK1uN7X9r4rinQVBhNJ8LaIj_100X9EF_Csw5Yzk,7453
|
3
3
|
arbor/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
arbor/client/api.py,sha256=86bgHuGM_AvI1Uhic_QaCnpF4VFqXie9ZzxmbTXUPpQ,19
|
5
5
|
arbor/client/arbor_client.py,sha256=laGmeTMyGzwKOBGjpOWEZdDXTwsp_MUhp6umiLufncs,8320
|
@@ -28,7 +28,7 @@ arbor/server/services/comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
28
28
|
arbor/server/services/comms/comms.py,sha256=zTV1Wo4FGnOcl5tIjBOfcuvDPMu-VTSKBlDBXV4h6LY,7833
|
29
29
|
arbor/server/services/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
30
|
arbor/server/services/inference/vllm_client.py,sha256=EwVuv4bUSK2rOGEL6YdGSfaYtA4NsZfTwWw_KKfMAiQ,18367
|
31
|
-
arbor/server/services/inference/vllm_serve.py,sha256=
|
31
|
+
arbor/server/services/inference/vllm_serve.py,sha256=etg7EhihRoPGl4fE-g0I5wYIhOEwLy2jC9mpgB3Agbw,109545
|
32
32
|
arbor/server/services/scripts/dpo_training.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
33
|
arbor/server/services/scripts/grpo_training.py,sha256=XJZ3PBJdXRP__XfiQBtqEqjRE4YKjjYrJ4ze5W9SRnY,22160
|
34
34
|
arbor/server/services/scripts/mmgrpo_training.py,sha256=aDaTGkyfceA-jhAdzfpehAzuSSGIT8gdfD_5z9IoBEk,19373
|
@@ -43,9 +43,9 @@ arbor/server/services/scripts/utils/mock_server.py,sha256=NX38oBtxEVS9-kreayTCSz
|
|
43
43
|
arbor/server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
arbor/server/utils/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
45
|
arbor/server/utils/logging.py,sha256=UsG3cYgNk04pXadf5-NcYxvI8qYjB6o91N_P08FuoIg,10368
|
46
|
-
arbor_ai-0.2.
|
47
|
-
arbor_ai-0.2.
|
48
|
-
arbor_ai-0.2.
|
49
|
-
arbor_ai-0.2.
|
50
|
-
arbor_ai-0.2.
|
51
|
-
arbor_ai-0.2.
|
46
|
+
arbor_ai-0.2.4.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
|
47
|
+
arbor_ai-0.2.4.dist-info/METADATA,sha256=Tb9KSDqEDVsueaPIAJk80g1dGlHjeIiF7bb0mQGEFNM,3017
|
48
|
+
arbor_ai-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
49
|
+
arbor_ai-0.2.4.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
|
50
|
+
arbor_ai-0.2.4.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
|
51
|
+
arbor_ai-0.2.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|