PyPI - arbor-ai - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

arbor-ai 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

arbor/server/api/routes/inference.py CHANGED Viewed

@@ -33,7 +33,7 @@ async def run_inference(
         raw_json["model"] = inference_manager.current_model
     # forward the request to the inference server
-    completion = inference_manager.run_inference(raw_json)
+    completion = await inference_manager.run_inference(raw_json)
     return completion

arbor/server/services/grpo_manager.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 import json
 import os
 import random
@@ -263,8 +264,21 @@ class GRPOManager:
         return self.current_model
     def update_model(self, request, inference_manager: InferenceManager):
-        # THIS IS HACKY AND NEEDS TO BE FIXED BEFORE RELEASE
+        if inference_manager._session:
+            # Create a new event loop if one doesn't exist
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            # Run the session closure in the event loop
+            loop.run_until_complete(inference_manager._session.close())
+            inference_manager._session = None
+        inference_manager.inference_count = 0
         inference_manager.restarting = True
         self.model_saved_and_reload_requested = True
         self.server_comms_handler.send_command({"command": "save_model"})
         while self.model_saved_and_reload_requested:

arbor/server/services/inference_manager.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import asyncio
+import json
 import os
 import signal
 import socket
@@ -8,6 +10,7 @@ import time
 from datetime import datetime
 from typing import Any, Dict, Optional
+import aiohttp
 import requests
 from arbor.server.core.config import Settings
@@ -23,6 +26,7 @@ class InferenceManager:
         self._shutting_down = False
         self.current_model = None
         self.inference_count = 0
+        self._session = None
         # Set up signal handler for graceful shutdown
         signal.signal(signal.SIGINT, self._signal_handler)
         signal.signal(signal.SIGTERM, self._signal_handler)
@@ -62,7 +66,7 @@ class InferenceManager:
         my_env["CUDA_VISIBLE_DEVICES"] = self.settings.arbor_config.inference.gpu_ids
         n_gpus = self.settings.arbor_config.inference.gpu_ids.count(",") + 1
         # command = f"vllm serve {model} --port {port} --gpu-memory-utilization 0.9 --tensor-parallel-size {n_gpus} --max_model_len 8192 --enable_prefix_caching"
-        command = f"python -m sglang_router.launch_server --model-path {model} --dp-size {n_gpus} --router-policy round_robin --port {port} --host 0.0.0.0"
+        command = f"python -m sglang_router.launch_server --model-path {model} --dp-size {n_gpus} --port {port} --host 0.0.0.0 --disable-radix-cache"
         print(f"Running command: {command}")
         # We will manually stream & capture logs.
@@ -171,7 +175,7 @@ class InferenceManager:
         print("Server killed.")
-    def run_inference(self, request_json: dict):
+    async def run_inference(self, request_json: dict):
         model = request_json["model"]
         prefixes = ["openai/", "huggingface/", "local:", "arbor:"]
         for prefix in prefixes:
@@ -193,16 +197,22 @@ class InferenceManager:
         if self.restarting:
             while self.restarting:
                 print("Inference is paused while server is restarting...")
-                time.sleep(5)
+                await asyncio.sleep(5)
             request_json["model"] = self.current_model
         url = f"{self.launch_kwargs['api_base']}/chat/completions"
         try:
             self.inference_count += 1
-            response = requests.post(url, json=request_json)
-            return response.json()
-        except requests.exceptions.ConnectionError:
-            print("Server disconnected...ignoring")
+            session = await self._ensure_session()
+            async with session.post(url, json=request_json) as response:
+                content = await response.content.read()
+                return json.loads(content)
+        except aiohttp.ClientError as e:
+            print(f"Connection error: {type(e).__name__}: {str(e)}")
+            # Try to close and recreate the session on error
+            if self._session:
+                await self._session.close()
+                self._session = None
             return None
         except Exception as e:
             print(f"Error during inference: {e}")
@@ -214,11 +224,19 @@ class InferenceManager:
         print("Restarting server with new model...")
         self.restarting = True
-        while self.inference_count > 0:
-            print(
-                f"Waiting for inference requests to finish... {self.inference_count} remaining"
-            )
-            time.sleep(5)
+        # Close existing session and reset inference count
+        if self._session:
+            # Create a new event loop if one doesn't exist
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            # Run the session closure in the event loop
+            loop.run_until_complete(self._session.close())
+            self._session = None
+        self.inference_count = 0
         tik = time.time()
         self.kill()
@@ -236,6 +254,14 @@ class InferenceManager:
         self.restarting = False
         print(f"Time taken to update model: {tok - tik} seconds")
+    async def _ensure_session(self):
+        if self._session is None or self._session.closed:
+            timeout = aiohttp.ClientTimeout(
+                total=None
+            )  # No timeout...If it hangs, this might be the issue.
+            self._session = aiohttp.ClientSession(timeout=timeout)
+        return self._session
 def get_free_port() -> int:
     """

arbor/server/services/scripts/grpo_training.py CHANGED Viewed

@@ -351,10 +351,6 @@ class CommandMonitor:
                             output_dir=self.trainer.args.output_dir + "/adapter/"
                         )
-                        # base_model = AutoModelForCausalLM.from_pretrained(
-                        #     self.base_model_name
-                        # ).to(self.trainer.accelerator.device)
                         _model_to_merge = AutoPeftModelForCausalLM.from_pretrained(
                             self.trainer.args.output_dir + "/adapter/",
                             config=self.trainer.peft_config,

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arbor-ai
-Version: 0.1.10
+Version: 0.1.11
 Summary: A framework for fine-tuning and managing language models
 Author-email: Noah Ziems <nziems2@nd.edu>
 Project-URL: Homepage, https://github.com/Ziems/arbor

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ arbor/server/api/models/schemas.py,sha256=s_G8sSb05FjkKEqpKpLlqaEd8NysJddHibRHhc
 arbor/server/api/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/api/routes/files.py,sha256=DQC_ogH5zlzhHZSAA4Cj5wzK07XBIBVs2Po91W9rcDY,1835
 arbor/server/api/routes/grpo.py,sha256=VuEvSOwwrHegn9qM-1nbHFmmUnnC_BMwnIHsfIdiJyI,1877
-arbor/server/api/routes/inference.py,sha256=xlP-FMpOJAiiPZkE470l9mCR0ujLki8RrcO9hmTQD-k,1662
+arbor/server/api/routes/inference.py,sha256=Zy4ciN6vdRgu0-sFFnEeTZB-4XnLjEDH-atU7roIKSs,1668
 arbor/server/api/routes/jobs.py,sha256=BNdaSYUBJX6xSd6Pj6qx1DQJiZ5EKVxxbXDbEkfkCpw,3634
 arbor/server/core/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 arbor/server/core/config.py,sha256=Mx77S3ByIMvHmPDikQLcczhzA5so3Vrw_U4QefOiHOU,1257
@@ -17,18 +17,18 @@ arbor/server/core/logging.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 arbor/server/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/dependencies.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/file_manager.py,sha256=Z9z4A4EzvPauid_DBfpim401DDtuJy_TbX4twTWDJWI,12119
-arbor/server/services/grpo_manager.py,sha256=50g90lV8qpol7fQp2SBTXUCrF5eOP8YdxDnMLM0XY0E,13311
-arbor/server/services/inference_manager.py,sha256=gHI-Biy3TtGkyWxIDKY-uqZZm_fiQJLktkPY8ezRvo8,9660
+arbor/server/services/grpo_manager.py,sha256=TAU2BMHgbCgiAvKNVd2Y8N20SR4qEms3lChA4Z0ZzyY,13777
+arbor/server/services/inference_manager.py,sha256=YVHXqwBm9vEmgKzKdMKQdLdw6qkUTl5BjHTnW-3yfo0,10699
 arbor/server/services/job_manager.py,sha256=m_d4UPwN_82f7t7K443DaFpFoyv7JZSZKml8tawt1Bk,2186
 arbor/server/services/training_manager.py,sha256=oQdhpfxdgp_lCTb_lxhvjupdLrcg6HL3TEbct_q9F6I,21065
 arbor/server/services/comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/services/comms/comms.py,sha256=3KN3mzwPvfW2_L5hq02JdAk6yOMyhY0_pBz-DDr5A3o,7694
-arbor/server/services/scripts/grpo_training.py,sha256=V36pCMZDJj2DdzquxScOddi9zP8EVPGWN3HGiftFfrY,21082
+arbor/server/services/scripts/grpo_training.py,sha256=Q9jwnbRdXAv_jVgrChLX6IiB3BLZU1F3BP6mBV0DVik,20889
 arbor/server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arbor/server/utils/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arbor_ai-0.1.10.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
-arbor_ai-0.1.10.dist-info/METADATA,sha256=qnUBfdKczxenG5kPTcZgQVMnWimEUPExz7nONxBYpDQ,2413
-arbor_ai-0.1.10.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-arbor_ai-0.1.10.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
-arbor_ai-0.1.10.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
-arbor_ai-0.1.10.dist-info/RECORD,,
+arbor_ai-0.1.11.dist-info/licenses/LICENSE,sha256=5vFGrbOFeXXM83JV9o16w7ohH4WLeu3-57GocJSz8ow,1067
+arbor_ai-0.1.11.dist-info/METADATA,sha256=04deKUBx8A_5j4_OU39_09873sHhs-jKZwMOeRSU3GA,2413
+arbor_ai-0.1.11.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+arbor_ai-0.1.11.dist-info/entry_points.txt,sha256=PGBX-MfNwfIl8UPFgsX3gjtXLqSogRhOktKMpZUysD0,40
+arbor_ai-0.1.11.dist-info/top_level.txt,sha256=jzWdp3BRYqvZDMFsPajrcftvvlluzVDErkD8IMRfhYs,6
+arbor_ai-0.1.11.dist-info/RECORD,,

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.4.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{arbor_ai-0.1.10.dist-info → arbor_ai-0.1.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

arbor-ai 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

arbor-ai 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl