PyPI - arbor-ai - Versions diffs - 0.1.14__py3-none-any.whl → 0.2__py3-none-any.whl - Mend

arbor-ai 0.1.14py3-none-any.whl → 0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

arbor/cli.py +12 -0
arbor/server/api/models/schemas.py +0 -1
arbor/server/api/routes/grpo.py +4 -9
arbor/server/api/routes/inference.py +24 -14
arbor/server/services/grpo_manager.py +176 -103
arbor/server/services/inference/vllm_client.py +444 -0
arbor/server/services/inference/vllm_serve.py +2336 -0
arbor/server/services/inference_manager.py +145 -272
arbor/server/services/scripts/dpo_training.py +0 -0
arbor/server/services/scripts/grpo_training.py +165 -57
arbor/server/services/scripts/sft_training.py +109 -0
arbor/server/services/scripts/utils/__init__.py +0 -0
arbor/server/services/scripts/utils/arg_parser.py +31 -0
arbor/server/services/scripts/utils/dataset.py +0 -0
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/METADATA +10 -6
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/RECORD +20 -14
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/WHEEL +1 -1
arbor/server/services/inference/sgl_router_launch_server.py +0 -226
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/entry_points.txt +0 -0
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/licenses/LICENSE +0 -0
{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/top_level.txt +0 -0

arbor/server/services/scripts/grpo_training.py CHANGED Viewed

@@ -5,13 +5,18 @@
 import argparse
 import json
+import os
 import random
+import shutil
+import signal
+import sys
 import threading
 import time
 from functools import lru_cache
 from typing import Any, List, Optional, Union
 import torch
+import trl.extras.vllm_client
 import zmq
 from accelerate import Accelerator
 from accelerate.utils import broadcast_object_list, gather, gather_object
@@ -32,6 +37,9 @@ from arbor.server.services.comms.comms import (
     ArborScriptCommsHandler,
     ArborServerCommsHandler,
 )
+from arbor.server.services.inference.vllm_client import VLLMClient
+trl.extras.vllm_client.VLLMClient = VLLMClient
 if is_wandb_available():
     import wandb
@@ -71,10 +79,10 @@ class ArborGRPOTrainer(GRPOTrainer):
         comms_handler: Optional[ArborScriptCommsHandler] = None,
         lora: Optional[bool] = False,
         # We do nothing with max_context_length right now
+        vllm_group_port: Optional[int] = None,
         max_context_length: Optional[int] = None,
         **kwargs,
     ):
         super().__init__(
             model=model,
             reward_funcs=[],
@@ -91,6 +99,33 @@ class ArborGRPOTrainer(GRPOTrainer):
         self.scale_rewards = scale_rewards
         self.comms_handler = comms_handler
+        self.vllm_client = None
+        args.use_vllm = True
+        self.use_vllm = True
+        if self.accelerator.is_main_process:
+            print(
+                f"Initializing vLLM client with server port {args.vllm_server_port} and group port {vllm_group_port}"
+            )
+            self.vllm_client = VLLMClient(
+                args.vllm_server_host,
+                args.vllm_server_port,
+                group_port=vllm_group_port,
+                connection_timeout=args.vllm_server_timeout,
+            )
+            self.vllm_client.init_communicator()
+        # vLLM specific sampling arguments
+        self.guided_decoding_regex = args.vllm_guided_decoding_regex
+        self._last_loaded_step = (
+            -1
+        )  # tag to avoid useless loading during grad accumulation
+        # When using vLLM, the main process is responsible for loading the model weights. This can cause process
+        # desynchronization and seems to lead to DeepSpeed hanging during initialization. To prevent this, we
+        # synchronize all processes after vLLM has been fully initialized.
+        self.accelerator.wait_for_everyone()
     def _generate_and_score_completions(
         self, batch: List[dict[str, Any]]
     ) -> dict[str, Union[torch.Tensor, Any]]:
@@ -104,7 +139,11 @@ class ArborGRPOTrainer(GRPOTrainer):
                 maybe_apply_chat_template(
                     {
                         "prompt": example["messages"],
-                        "completion": [example["completion"]],
+                        "completion": (
+                            example["completion"]
+                            if isinstance(example["completion"], list)
+                            else [example["completion"]]
+                        ),
                     },
                     self.processing_class,
                 )
@@ -133,15 +172,15 @@ class ArborGRPOTrainer(GRPOTrainer):
             prompt_completion_text["completion"]
             for prompt_completion_text in prompt_completion_texts
         ]
-        completion_ids = self.processing_class(
+        completion_inputs = self.processing_class(
             completions_text,
             return_tensors="pt",
             padding=True,
             add_special_tokens=False,
         ).to(device)
         completion_ids, completion_mask = (
-            completion_ids["input_ids"],
-            completion_ids["attention_mask"],
+            completion_inputs["input_ids"],
+            completion_inputs["attention_mask"],
         )
         if self.max_prompt_length is not None:
@@ -156,11 +195,6 @@ class ArborGRPOTrainer(GRPOTrainer):
             completion_ids = completion_ids[:, : self.max_completion_length]
             completion_mask = completion_mask[:, : self.max_completion_length]
-        # Keeping this for when we switch to vllm
-        # if self.state.global_step != self._last_loaded_step:
-        #     self._move_model_to_vllm()
-        #     self._last_loaded_step = self.state.global_step
         prompt_ids = broadcast_object_list(prompt_ids)
         prompt_mask = broadcast_object_list(prompt_mask)
         completion_ids = broadcast_object_list(completion_ids)
@@ -178,6 +212,9 @@ class ArborGRPOTrainer(GRPOTrainer):
         is_eos = completion_ids == self.processing_class.eos_token_id
+        # Sum along sequence dimension (dim=1) to get completion length per sequence, used for logging
+        completion_lengths = completion_mask.sum(1)
         # If mask_truncated_completions is enabled, zero out truncated completions in completion_mask
         if self.mask_truncated_completions:
             truncated_completions = ~is_eos.any(dim=1)
@@ -230,6 +267,10 @@ class ArborGRPOTrainer(GRPOTrainer):
         std_grouped_rewards = std_grouped_rewards.repeat_interleave(
             self.num_generations, dim=0
         )
+        is_std_zero = torch.isclose(
+            std_grouped_rewards, torch.zeros_like(std_grouped_rewards)
+        )
         advantages = rewards - mean_grouped_rewards
         if self.scale_rewards:
@@ -241,66 +282,72 @@ class ArborGRPOTrainer(GRPOTrainer):
             self.accelerator.process_index * len(batch),
             (self.accelerator.process_index + 1) * len(batch),
         )
+        all_process_advantages = (
+            advantages.clone()
+        )  # keep the aggregated advantages for logging
         advantages = advantages[process_slice]
         # Log the metrics
         if mode == "train":
             self.state.num_input_tokens_seen += (
-                self.accelerator.gather_for_metrics(attention_mask.sum()).sum().item()
+                self.accelerator.gather(attention_mask.sum()).sum().item()
             )
         self._metrics[mode]["num_tokens"] = [self.state.num_input_tokens_seen]
-        # log completion lengths, mean, min, max
-        agg_completion_mask = self.accelerator.gather_for_metrics(
-            completion_mask.sum(1)
-        )
+        # Log completion lengths, mean, min, max
+        agg_completion_lengths = self.accelerator.gather(completion_lengths)
         self._metrics[mode]["completions/mean_length"].append(
-            agg_completion_mask.float().mean().item()
+            agg_completion_lengths.float().mean().item()
         )
         self._metrics[mode]["completions/min_length"].append(
-            agg_completion_mask.float().min().item()
+            agg_completion_lengths.float().min().item()
         )
         self._metrics[mode]["completions/max_length"].append(
-            agg_completion_mask.float().max().item()
+            agg_completion_lengths.float().max().item()
         )
-        # identify sequences that terminated with EOS and log their lengths
-        agg_terminated_with_eos = self.accelerator.gather_for_metrics(is_eos.any(dim=1))
-        term_completion_mask = agg_completion_mask[agg_terminated_with_eos]
-        clipped_completions_ratio = 1 - len(term_completion_mask) / len(
-            agg_completion_mask
+        # Identify sequences that terminated with EOS and log their lengths
+        agg_terminated_with_eos = self.accelerator.gather(is_eos.any(dim=1))
+        term_completion_lengths = agg_completion_lengths[agg_terminated_with_eos]
+        clipped_completions_ratio = 1 - len(term_completion_lengths) / len(
+            agg_completion_lengths
         )
         self._metrics[mode]["completions/clipped_ratio"].append(
             clipped_completions_ratio
         )
-        if len(term_completion_mask) == 0:
-            # edge case where no completed sequences are found
-            term_completion_mask = torch.zeros(1, device=device)
+        if (
+            len(term_completion_lengths) == 0
+        ):  # edge case where no terminated sequences are found
+            term_completion_lengths = torch.zeros(1, device=device)
         self._metrics[mode]["completions/mean_terminated_length"].append(
-            term_completion_mask.float().mean().item()
+            term_completion_lengths.float().mean().item()
         )
         self._metrics[mode]["completions/min_terminated_length"].append(
-            term_completion_mask.float().min().item()
+            term_completion_lengths.float().min().item()
         )
         self._metrics[mode]["completions/max_terminated_length"].append(
-            term_completion_mask.float().max().item()
+            term_completion_lengths.float().max().item()
         )
-        # Calculate mean reward
+        # Calculate mean reward per function, but only for samples where the function was applied (non-NaN values)
         self._metrics[mode]["reward"].append(mean_grouped_rewards.mean().item())
         self._metrics[mode]["reward_std"].append(std_grouped_rewards.mean().item())
+        self._metrics[mode]["frac_reward_zero_std"].append(
+            is_std_zero.float().mean().item()
+        )
         # Log prompt and completion texts
         self._textual_logs["prompt"].extend(gather_object(prompts_text))
         self._textual_logs["completion"].extend(gather_object(completions_text))
+        self._textual_logs["advantages"].extend(all_process_advantages.tolist())
         return {
             "prompt_ids": prompt_ids,
             "prompt_mask": prompt_mask,
             "completion_ids": completion_ids,
             "completion_mask": completion_mask,
-            "old_per_token_logps": old_per_token_logps,
             "advantages": advantages,
+            "old_per_token_logps": old_per_token_logps,
         }
@@ -313,6 +360,30 @@ class LastStepTimeCallback(TrainerCallback):
         last_step_time = time.time()
+class WeightUpdateCallback(TrainerCallback):
+    """A callback that sends weight update completion status after each step"""
+    def __init__(self):
+        self.comms_handler = None
+        self.trainer = None
+    def set_comms_handler(self, comms_handler: ArborScriptCommsHandler):
+        self.comms_handler = comms_handler
+    def set_trainer(self, trainer):
+        self.trainer = trainer
+    def on_step_end(self, args, state, control, **kwargs):
+        if self.comms_handler and self.comms_handler.is_main_process and self.trainer:
+            if state.global_step != self.trainer._last_loaded_step:
+                print("Updating inference model...")
+                self.comms_handler.send_status({"status": "weight_update_start"})
+                self.trainer._move_model_to_vllm()
+                self.trainer._last_loaded_step = state.global_step
+                print("[DEBUG] Sending weight update completion status")
+                self.comms_handler.send_status({"status": "weight_update_complete"})
 class BlockingQueueDataset(Dataset):
     def __init__(
         self,
@@ -379,11 +450,6 @@ class CommandMonitor:
         )
         self.command_thread.start()
-        self.broadcast_thread = threading.Thread(
-            target=self._monitor_broadcasts, daemon=True
-        )
-        self.broadcast_thread.start()
     def _monitor_commands(self):
         """Background thread that monitors for commands from the server."""
         if not self.comms_handler:
@@ -478,6 +544,26 @@ class CommandMonitor:
                             output_dir=self.trainer.args.output_dir
                             + f"/checkpoints/{command.get('checkpoint_name')}/"
                         )
+                    # Copy checkpoint files to root output directory
+                    checkpoint_dir = (
+                        self.trainer.args.output_dir
+                        + f"/checkpoints/{command.get('checkpoint_name')}/"
+                    )
+                    root_dir = self.trainer.args.output_dir
+                    # Copy all files from checkpoint dir to root dir, overwriting if they exist
+                    # (effectively saves the checkpoint to the output directory)
+                    for item in os.listdir(checkpoint_dir):
+                        src = os.path.join(checkpoint_dir, item)
+                        dst = os.path.join(root_dir, item)
+                        if os.path.isdir(src):
+                            if os.path.exists(dst):
+                                shutil.rmtree(dst)
+                            shutil.copytree(src, dst)
+                        else:
+                            shutil.copy2(src, dst)
                     self.comms_handler.send_status(
                         {
                             "status": "checkpoint_saved",
@@ -486,31 +572,21 @@ class CommandMonitor:
                             + f"/checkpoints/{command.get('checkpoint_name')}/",
                         }
                     )
+                    self.comms_handler.send_status(
+                        {
+                            "status": "model_saved",
+                            "output_dir": self.trainer.args.output_dir,
+                        }
+                    )
+                elif command.get("command") == "terminate":
+                    print("TERMINATED")
+                    self.trainer.accelerator.end_training()
+                    self.comms_handler.send_status({"status": "terminated"})
         except Exception as e:
             print(e)
             self.comms_handler.send_status({"status": "error", "error": str(e)})
-    def _monitor_broadcasts(self):
-        """Background thread that monitors for broadcasts from the server."""
-        if not self.comms_handler:
-            return
-        try:
-            for broadcast in self.comms_handler.receive_broadcast():
-                print(f"!!!Received broadcast: {broadcast}")
-                if broadcast.get("message") == "terminate":
-                    # self.trainer.control.should_training_stop = True
-                    # self.comms_handler.send_status(
-                    #     {
-                    #         "status": "Received termination command",
-                    #         "process_id": self.trainer.accelerator.process_index,
-                    #     }
-                    # )
-                    if self.trainer.accelerator.is_main_process:
-                        self.trainer.accelerator.end_training()
-        except Exception as e:
-            self.comms_handler.send_status({"status": "error", "error": str(e)})
 def main():
     parser = argparse.ArgumentParser()
@@ -523,6 +599,8 @@ def main():
     pipe_args.add_argument("--data_port", type=int, required=True)
     pipe_args.add_argument("--broadcast_port", type=int, required=True)
     pipe_args.add_argument("--handshake_port", type=int, required=True)
+    pipe_args.add_argument("--vllm_group_port", type=int, required=True)
+    pipe_args.add_argument("--vllm_port", type=int, required=True)
     training_args = parser.add_argument_group("Training arguments")
     training_args.add_argument(
@@ -544,6 +622,11 @@ def main():
     args = parser.parse_args()
     if args.debug:
+        # python grpo_training.py --debug
+        #  --command_port 0 --status_port 0
+        #  --data_port 0 --broadcast_port 0
+        #  --handshake_port 0 --model Qwen/Qwen3-0.6B
+        #  --trl_train_kwargs '{"output_dir": ".", "report_to": "none"}'
         server_comms_handler = ArborServerCommsHandler(
             host=args.host,
         )
@@ -554,6 +637,11 @@ def main():
         args.broadcast_port = server_comms_handler.broadcast_port
         args.handshake_port = server_comms_handler.handshake_port
+        handshake_thread = threading.Thread(
+            target=server_comms_handler.wait_for_clients, args=(1,), daemon=True
+        )
+        handshake_thread.start()
         def debug_data_generator():
             tldr_dataset = load_dataset("trl-lib/tldr", split="train")
             idx = 0
@@ -636,15 +724,18 @@ def main():
         training_args = GRPOConfig(
             dataloader_num_workers=0,
             shuffle_dataset=False,
+            vllm_server_port=args.vllm_port,
             **trl_train_args,
         )
+        weight_update_callback = WeightUpdateCallback()
         trainer = ArborGRPOTrainer(
             model=args.model,
             args=training_args,
             train_dataset=BlockingQueueDataset(None, None),
-            callbacks=[LastStepTimeCallback()],
+            callbacks=[LastStepTimeCallback(), weight_update_callback],
             peft_config=lora_config,
+            vllm_group_port=args.vllm_group_port,
             **arbor_train_args,
         )
         # Create client handler
@@ -657,6 +748,8 @@ def main():
             handshake_port=args.handshake_port,
             is_main_process=trainer.accelerator.is_main_process,
         )
+        weight_update_callback.set_comms_handler(comms_handler)
+        weight_update_callback.set_trainer(trainer)
         trainer.comms_handler = comms_handler
         # Initialize the dataset with the actual accelerator
@@ -671,6 +764,18 @@ def main():
             base_model_name=args.model,
         )
+        # Add signal handlers for graceful shutdown
+        def signal_handler(signum, frame):
+            print(f"\nReceived signal {signum}. Initiating graceful shutdown...")
+            print("Ending training...")
+            trainer.accelerator.end_training()
+            print("Closing communications...")
+            comms_handler.close()
+            sys.exit(0)
+        signal.signal(signal.SIGINT, signal_handler)
+        signal.signal(signal.SIGTERM, signal_handler)
         print("Training...")
         trainer.train()
@@ -681,7 +786,10 @@ def main():
         comms_handler.send_status({"status": "error", "error": str(e)})
         raise e
     finally:
+        print("Cleaning up resources...")
+        trainer.accelerator.end_training()
         comms_handler.close()
+        print("Cleanup complete")
 if __name__ == "__main__":

arbor/server/services/scripts/sft_training.py ADDED Viewed

@@ -0,0 +1,109 @@
+import argparse
+import json
+import random
+import threading
+import time
+import torch
+import zmq
+from peft import LoraConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from trl import SFTConfig, SFTTrainer, setup_chat_format
+from arbor.server.services.scripts.utils.arg_parser import get_training_arg_parser
+def main():
+    parser = get_training_arg_parser()
+    parser.add_argument("--model", type=str, required=True)
+    parser.add_argument("--lora", type=bool, default=False)
+    args = parser.parse_args()
+    try:
+        trl_train_kwargs = {**(args.trl_config_kwargs or {})}
+        # TODO: These assertions should be done in some better way
+        assert "output_dir" in trl_train_kwargs, "output_dir is required"
+        if "gradient_checkpointing_kwargs" in trl_train_kwargs and args.lora:
+            print(
+                "Setting gradient_checkpointing_kwargs to use_reentrant=False for LORA training"
+            )
+            trl_train_kwargs["gradient_checkpointing_kwargs"] = {
+                **(trl_train_kwargs.get("gradient_checkpointing_kwargs") or {}),
+                "use_reentrant": False,
+            }
+        lora_config = None
+        if args.lora:
+            print("Using LORA for PEFT")
+            lora_config = LoraConfig(
+                r=16,
+                lora_alpha=64,
+                target_modules=[
+                    "q_proj",
+                    "k_proj",
+                    "v_proj",
+                    "o_proj",
+                    "up_proj",
+                    "down_proj",
+                    "gate_proj",
+                ],
+                task_type="CAUSAL_LM",
+                lora_dropout=0.05,
+                inference_mode=False,
+            )
+        training_args = GRPOConfig(
+            dataloader_num_workers=0,
+            shuffle_dataset=False,
+            **trl_train_args,
+        )
+        trainer = ArborGRPOTrainer(
+            model=args.model,
+            args=training_args,
+            train_dataset=BlockingQueueDataset(None, None),
+            callbacks=[LastStepTimeCallback()],
+            peft_config=lora_config,
+            **arbor_train_args,
+        )
+        # Create client handler
+        comms_handler = ArborScriptCommsHandler(
+            host=args.host,
+            command_port=args.command_port,
+            status_port=args.status_port,
+            data_port=args.data_port,
+            broadcast_port=args.broadcast_port,
+            handshake_port=args.handshake_port,
+            is_main_process=trainer.accelerator.is_main_process,
+        )
+        trainer.comms_handler = comms_handler
+        # Initialize the dataset with the actual accelerator
+        trainer.train_dataset = BlockingQueueDataset(
+            accelerator=trainer.accelerator,
+            comms_handler=trainer.comms_handler,
+        )
+        command_monitor = CommandMonitor(
+            comms_handler=comms_handler,
+            trainer=trainer,
+            base_model_name=args.model,
+        )
+        print("Training...")
+        trainer.train()
+    except KeyboardInterrupt:
+        print("\nReceived interrupt, shutting down...")
+    except Exception as e:
+        print(f"Error: {e}")
+        comms_handler.send_status({"status": "error", "error": str(e)})
+        raise e
+    finally:
+        trainer.accelerator.end_training()
+        comms_handler.close()
+if __name__ == "__main__":
+    main()

arbor/server/services/scripts/utils/__init__.py ADDED Viewed

File without changes

arbor/server/services/scripts/utils/arg_parser.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""The arg parser for the training scripts"""
+import argparse
+import json
+def get_training_arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--debug", action="store_true")
+    pipe_args = parser.add_argument_group("Comms arguments")
+    pipe_args.add_argument("--host", default="localhost")
+    pipe_args.add_argument("--command_port", type=int, required=True)
+    pipe_args.add_argument("--status_port", type=int, required=True)
+    pipe_args.add_argument("--data_port", type=int, required=True)
+    pipe_args.add_argument("--broadcast_port", type=int, required=True)
+    pipe_args.add_argument("--handshake_port", type=int, required=True)
+    training_args = parser.add_argument_group("Training arguments")
+    training_args.add_argument(
+        "--model",
+        type=str,
+        help="Model to use for training",
+    )
+    training_args.add_argument(
+        "--trl_config_kwargs",
+        type=json.loads,
+        help="Training configs as a JSON string",
+    )
+    return parser

arbor/server/services/scripts/utils/dataset.py ADDED Viewed

File without changes

{arbor_ai-0.1.14.dist-info → arbor_ai-0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arbor-ai
-Version: 0.1.14
+Version: 0.2
 Summary: A framework for fine-tuning and managing language models
 Author-email: Noah Ziems <nziems2@nd.edu>
 Project-URL: Homepage, https://github.com/Ziems/arbor
@@ -8,21 +8,20 @@ Project-URL: Issues, https://github.com/Ziems/arbor/issues
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: torch>=2.6.0
 Requires-Dist: fastapi
 Requires-Dist: uvicorn
 Requires-Dist: click
 Requires-Dist: python-multipart
 Requires-Dist: pydantic-settings
-Requires-Dist: torch
+Requires-Dist: vllm>=0.8.5.post1
 Requires-Dist: transformers
-Requires-Dist: trl
+Requires-Dist: trl>=0.17.0
 Requires-Dist: peft
 Requires-Dist: ray>=2.9
 Requires-Dist: setuptools<77.0.0,>=76.0.0
 Requires-Dist: pyzmq>=26.4.0
 Requires-Dist: pyyaml>=6.0.2
-Requires-Dist: sglang[all]>=0.4.5.post3
-Requires-Dist: sglang-router
 Requires-Dist: wandb
 Dynamic: license-file
@@ -41,7 +40,12 @@ Dynamic: license-file
 Install Arbor via pip:
 ```bash
-pip install arbor-ai
+pip install -U arbor-ai
+```
+Optionally, you can also install:
+```bash
+pip install flash-attn --no-build-isolation
 ```
 ---

arbor-ai 0.1.14__py3-none-any.whl → 0.2__py3-none-any.whl

arbor-ai 0.1.14py3-none-any.whl → 0.2py3-none-any.whl