PyPI - arbor-ai - Versions diffs - 0.1.15__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

arbor-ai 0.1.15py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

arbor/server/api/models/schemas.py CHANGED Viewed

@@ -178,7 +178,6 @@ class ChatCompletionModel(BaseModel):
 class GRPORequest(BaseModel):
     model: str
-    update_inference_model: bool
     batch: List[dict]

arbor/server/api/routes/grpo.py CHANGED Viewed

@@ -38,14 +38,6 @@ def run_grpo_step(
     return GRPOStepResponse(status="success", **step_data)
-@router.post("/update_model", response_model=GRPOStepResponse)
-def update_model(request: Request):
-    grpo_manager = request.app.state.grpo_manager
-    inference_manager = request.app.state.inference_manager
-    update_model_data = grpo_manager.update_model(request, inference_manager)
-    return GRPOStepResponse(status="success", **update_model_data)
 @router.post("/checkpoint", response_model=GRPOCheckpointResponse)
 def checkpoint(request: Request, grpo_checkpoint_request: GRPOCheckpointRequest):
     grpo_manager = request.app.state.grpo_manager

arbor/server/api/routes/inference.py CHANGED Viewed

@@ -19,10 +19,25 @@ async def run_inference(
     with open(f"{request.app.state.log_dir}/inference_requests.jsonl", "a") as f:
         f.write(json.dumps({"id": request_id, "request": raw_json}) + "\n")
+    request_model = raw_json["model"]
+    prefixes = ["openai/", "huggingface/", "local:", "arbor:"]
+    for prefix in prefixes:
+        if request_model.startswith(prefix):
+            request_model = request_model[len(prefix) :]
     # if a server isnt running, launch one
     if not inference_manager.is_server_running():
         print("No model is running, launching model...")
-        inference_manager.launch(raw_json["model"])
+        inference_manager.launch(request_model)
+    # if the requested model is different from the launched model, swap the server
+    if request_model != inference_manager.launched_model:
+        print(
+            f"Model changed from {inference_manager.launched_model} to {request_model}, swapping server..."
+        )
+        inference_manager.kill()
+        inference_manager.launch(request_model)
+        print(f"Model swapped to {request_model}")
     # forward the request to the inference server
     completion = await inference_manager.run_inference(raw_json)

arbor/server/services/grpo_manager.py CHANGED Viewed

@@ -270,7 +270,6 @@ class GRPOManager:
                     print("Updating inference model...")
                     # There is a case where this status is sent multiple times
                     # We need to make sure we only update the model once
-                    self.current_model = status["output_dir"]
                     self.saving_model = False
                     print("Model update complete")
                 elif status["status"] == "checkpoint_saved":
@@ -308,14 +307,9 @@ class GRPOManager:
             print(f"Failed to send batch to training process: {e}")
             raise
-        return {
-            "current_model": self.current_model,
-            "checkpoints": self.checkpoints,
-            "last_checkpoint": self.last_checkpoint,
-        }
+        self.current_model = self.train_kwargs["output_dir"]
+        inference_manager.launched_model = self.current_model
-    def update_model(self, request, inference_manager: InferenceManager):
-        # No longer used
         return {
             "current_model": self.current_model,
             "checkpoints": self.checkpoints,

arbor/server/services/inference/vllm_client.py CHANGED Viewed

@@ -1,5 +1,6 @@
-# adapted from trl/extras/vllm_client.py (huggingface/trl)
+# adapted from Will Brown's verifiers library (https://github.com/willccbb/verifiers)
+import asyncio
 import atexit
 import logging
 import time
@@ -8,7 +9,6 @@ from typing import Optional
 import httpx
 import requests
 import torch
-from openai import OpenAI
 from requests import ConnectionError
 from requests.adapters import HTTPAdapter
 from torch import nn
@@ -31,7 +31,7 @@ class InferenceBlockedError(Exception):
     pass
-class VLLMClient(OpenAI):
+class VLLMClient:
     """
     A client class to interact with a vLLM server.
@@ -90,7 +90,7 @@ class VLLMClient(OpenAI):
                 "vLLM is not installed. Please install it with `pip install vllm`."
             )
-        super().__init__(base_url=f"http://{host}:{port}/v1", api_key="local")
+        self.base_url = f"http://{host}:{port}/v1"
         self.session = requests.Session()
         # Configure connection pooling to handle rapid requests better
         adapter = HTTPAdapter(
@@ -240,7 +240,7 @@ class VLLMClient(OpenAI):
                     response.raise_for_status()
                     return response.json()
-            except httpx.TimeoutError:
+            except httpx.TimeoutException:
                 logger.error("Request timed out")
                 raise
             except InferenceBlockedError:

arbor/server/services/inference/vllm_serve.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# adapted from Will Brown's verifiers library (https://github.com/willccbb/verifiers)
 """
 OpenAI-compatible vLLM server with weight synchronization.

arbor/server/services/scripts/grpo_training.py CHANGED Viewed

@@ -139,7 +139,11 @@ class ArborGRPOTrainer(GRPOTrainer):
                 maybe_apply_chat_template(
                     {
                         "prompt": example["messages"],
-                        "completion": [example["completion"]],
+                        "completion": (
+                            example["completion"]
+                            if isinstance(example["completion"], list)
+                            else [example["completion"]]
+                        ),
                     },
                     self.processing_class,
                 )
@@ -168,15 +172,15 @@ class ArborGRPOTrainer(GRPOTrainer):
             prompt_completion_text["completion"]
             for prompt_completion_text in prompt_completion_texts
         ]
-        completion_ids = self.processing_class(
+        completion_inputs = self.processing_class(
             completions_text,
             return_tensors="pt",
             padding=True,
             add_special_tokens=False,
         ).to(device)
         completion_ids, completion_mask = (
-            completion_ids["input_ids"],
-            completion_ids["attention_mask"],
+            completion_inputs["input_ids"],
+            completion_inputs["attention_mask"],
         )
         if self.max_prompt_length is not None:

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arbor-ai
-Version: 0.1.15
+Version: 0.2.1
 Summary: A framework for fine-tuning and managing language models
 Author-email: Noah Ziems <nziems2@nd.edu>
 Project-URL: Homepage, https://github.com/Ziems/arbor
@@ -40,7 +40,12 @@ Dynamic: license-file
 Install Arbor via pip:
 ```bash
-pip install arbor-ai
+pip install -U arbor-ai
+```
+Optionally, you can also install:
+```bash
+pip install flash-attn --no-build-isolation
 ```
 ---
@@ -74,6 +79,16 @@ Follow the DSPy tutorials here to see usage examples:
 ---
+### Troubleshooting
+**NCCL Errors**
+Certain GPU setups, particularly with newer GPUs, seem to have issues with NCCL that cause Arbor to crash. Often times of these can be fixed with the following environment variables:
+```bash
+export NCCL_P2P_DISABLE=1
+export NCCL_IB_DISABLE=1
+```
 ## 🙏 Acknowledgements
 Arbor builds on the shoulders of great work. We extend our thanks to:

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/RECORD RENAMED Viewed

@@ -5,11 +5,11 @@ arbor/client/api.py,sha256=86bgHuGM_AvI1Uhic_QaCnpF4VFqXie9ZzxmbTXUPpQ,19
 arbor/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 arbor/server/main.py,sha256=tY4Vlaaj4oq1FTGYOkbFMGF0quLEeR-VBaKaXhQ5mEE,382
 arbor/server/api/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-arbor/server/api/models/schemas.py,sha256=KCHav1nPFbQEynrcO-MObhRmoOrdFvfGuVogApynOCA,6210
+arbor/server/api/models/schemas.py,sha256=394FHmIxAWVwED3z5tjnJCsyrgSWXg2SFWvMM1oKqOI,6177
 arbor/server/api/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/api/routes/files.py,sha256=DQC_ogH5zlzhHZSAA4Cj5wzK07XBIBVs2Po91W9rcDY,1835
-arbor/server/api/routes/grpo.py,sha256=QrWwj44-EenOyDwtiAO7OJPPGe8CyNaxCUTDlqfJs4g,2338
-arbor/server/api/routes/inference.py,sha256=JI4lm7zWrUqgMadWA0JuTD13hq6kGQpTLcuklhOH7f8,1547
+arbor/server/api/routes/grpo.py,sha256=Yc4FxieuUbJ7Dbd-93uN4syQu9h2eQU4R9ZvnE_axRU,1982
+arbor/server/api/routes/inference.py,sha256=txLF4ANa0ZSaROrbvSaPZVFOSzn4so9e7mjNKnt2bcM,2182
 arbor/server/api/routes/jobs.py,sha256=BNdaSYUBJX6xSd6Pj6qx1DQJiZ5EKVxxbXDbEkfkCpw,3634
 arbor/server/core/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 arbor/server/core/config.py,sha256=Mx77S3ByIMvHmPDikQLcczhzA5so3Vrw_U4QefOiHOU,1257
@@ -17,26 +17,26 @@ arbor/server/core/logging.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 arbor/server/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/dependencies.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/file_manager.py,sha256=Z9z4A4EzvPauid_DBfpim401DDtuJy_TbX4twTWDJWI,12119
-arbor/server/services/grpo_manager.py,sha256=y5gOko_RmyjQqvzlR79_PPZgMwMwCMJiaeygCG5qS-A,18761
+arbor/server/services/grpo_manager.py,sha256=jY4kc7wlKKoi7RigjJiH1VaxX6qJCOxyEc0oYCkqPlQ,18549
 arbor/server/services/inference_manager.py,sha256=a1c5zYbjk6fPM3egX2McKv7ZWPN7c-QH_Qogu9iay90,9597
 arbor/server/services/job_manager.py,sha256=m_d4UPwN_82f7t7K443DaFpFoyv7JZSZKml8tawt1Bk,2186
 arbor/server/services/training_manager.py,sha256=oQdhpfxdgp_lCTb_lxhvjupdLrcg6HL3TEbct_q9F6I,21065
 arbor/server/services/comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/comms/comms.py,sha256=3KN3mzwPvfW2_L5hq02JdAk6yOMyhY0_pBz-DDr5A3o,7694
 arbor/server/services/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arbor/server/services/inference/vllm_client.py,sha256=X0v6zGHuaROGniWw_VCkzeWWuAHq0PlwtrFjTngCT4k,18285
-arbor/server/services/inference/vllm_serve.py,sha256=GdcaQStGKLj4J1kAnAnnI07R0X3A-bPoj7Tvagxsias,109457
+arbor/server/services/inference/vllm_client.py,sha256=06-VfdcwKqq8_ZRWaER3OnSVLtvL87bLdljSrkXfm-A,18269
+arbor/server/services/inference/vllm_serve.py,sha256=UZAGo7CyshR3-9fhXCTKhXeidqNqbY6LyU9DDNiX_Sw,109543
 arbor/server/services/scripts/dpo_training.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arbor/server/services/scripts/grpo_training.py,sha256=qjYSinOhi9-vvKY-gqGARwUgDQXYGDHlp9ZLwqKE1rw,31123
+arbor/server/services/scripts/grpo_training.py,sha256=6kXzMwn3rZXHdEn0xe_Kd9d7tbdYb76zE0zbi02xCm4,31314
 arbor/server/services/scripts/sft_training.py,sha256=jgDMxZn9RFH9ys_7OF9Is8pQ9V97O2KzWg22Gveh3yE,3410
 arbor/server/services/scripts/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/scripts/utils/arg_parser.py,sha256=ur_iyhc_Ie00tjq63vK4Sdeu2PGKwe6Dh6Iax2vw9jc,1022
 arbor/server/services/scripts/utils/dataset.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/utils/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arbor_ai-0.1.15.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
-arbor_ai-0.1.15.dist-info/METADATA,sha256=GMGq6nbWEbRZxsJG2u7DhnMj6qCSTvssMVUN4ASs2BA,2413
-arbor_ai-0.1.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-arbor_ai-0.1.15.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
-arbor_ai-0.1.15.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
-arbor_ai-0.1.15.dist-info/RECORD,,
+arbor_ai-0.2.1.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
+arbor_ai-0.2.1.dist-info/METADATA,sha256=34XAZBm8OLlsSBicLmRn_hhbltn0pDNlAj5WOjn9LtE,2791
+arbor_ai-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+arbor_ai-0.2.1.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
+arbor_ai-0.2.1.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
+arbor_ai-0.2.1.dist-info/RECORD,,

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{arbor_ai-0.1.15.dist-info → arbor_ai-0.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

arbor-ai 0.1.15__py3-none-any.whl → 0.2.1__py3-none-any.whl

arbor-ai 0.1.15py3-none-any.whl → 0.2.1py3-none-any.whl