PyPI - ai-nk-cce - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ai-nk-cce 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

ai_nk_cce-0.1.0.dist-info/METADATA +118 -0
ai_nk_cce-0.1.0.dist-info/RECORD +46 -0
ai_nk_cce-0.1.0.dist-info/WHEEL +4 -0
api/__init__.py +0 -0
api/mpcdf_vllm.py +94 -0
evals/nk_model.py +277 -0
model/README.md +64 -0
model/config/dataset_conv_v1.yml +9 -0
model/config/dataset_conv_v2_m2.yml +9 -0
model/config/dataset_conv_v3_m2_assembl_nearest.yml +9 -0
model/config/dataset_debug.yml +9 -0
model/config/dataset_v4_int_format.yml +9 -0
model/config/dataset_v5.yml +9 -0
model/config/inference.yml +7 -0
model/config/train.yml +24 -0
model/config/train_debug.yml +19 -0
model/config/train_from_checkpoint.yml +24 -0
model/config/train_from_checkpoint_debug.yml +19 -0
model/config/train_grpo.yml +30 -0
model/config/train_grpo_debug.yml +30 -0
model/config/train_grpo_debug_vllm.yml +32 -0
model/config.py +54 -0
model/dataset.py +324 -0
model/inference.py +51 -0
model/nk_assistant.py +207 -0
model/parser.py +70 -0
model/run_slurm.py +335 -0
model/score.ipynb +596 -0
model/scripts/template.slurm +54 -0
model/scripts/template_rl.slurm +54 -0
model/train.py +293 -0
nk_model/__init__.py +0 -0
nk_model/assembler.py +112 -0
nk_model/biased_prediction_agent.py +389 -0
nk_model/dataset.py +434 -0
nk_model/enums.py +21 -0
nk_model/landscape_cache.py +149 -0
nk_model/models.py +172 -0
nk_model/nk_landscape.py +498 -0
simulation/hill_climber_simulation.py +211 -0
simulation/hill_climber_vs_ai_simulation.py +132 -0
simulation/landscape_selection.py +179 -0
utils/__init__.py +0 -0
utils/binary_conversion.py +128 -0
utils/logging.py +33 -0
utils/utils.py +51 -0

model/scripts/template_rl.slurm ADDED Viewed

@@ -0,0 +1,54 @@
+#!/bin/bash -l
+#SBATCH --output {output_dir}/slurm-%x-%j.out
+#SBATCH --error {output_dir}/slurm-%x-%j.out
+#SBATCH --chdir ./
+#SBATCH --job-name {job_name}/{job_id}
+#
+#SBATCH --nodes={n_nodes}
+#SBATCH --tasks-per-node=1
+#SBATCH --cpus-per-task={n_cpu}
+#SBATCH --mem={memory}
+#
+#SBATCH --constraint="gpu"
+#SBATCH --gres=gpu:a100:{n_gpu}
+#SBATCH --partition=gpu
+# Wall clock limit (max is 24 hours):
+#SBATCH --time={time}
+module purge
+module load apptainer
+source .env
+# create huggingface cache directory if it doesn't exist
+mkdir -p ~/.cache/huggingface
+echo "Runing training using the image: {image}"
+echo "Runing training using the config: {config_file}"
+srun apptainer exec \
+	--nv \
+    --contain \
+    --cleanenv \
+    --pwd /root/llm-strategic-tuning \
+    --bind .:/root/llm-strategic-tuning \
+    --bind ~/.cache/huggingface:/root/.cache/huggingface \
+    --bind /ptmp:/ptmp \
+    --env HUGGING_FACE_HUB_TOKEN="$HUGGINGFACE_TOKEN" \
+    --env WANDB_API_KEY="$WANDB_API_KEY" \
+    --env WANDB_ENTITY="chm-ml" \
+    --env WANDB_PROJECT="{project_name}" \
+    --env WANDB_RUN_GROUP="{group_name}" \
+    --env WANDB_NAME="{job_name}/{job_id}" \
+    --env NCCL_DEBUG="INFO" \
+    --env NCCL_BLOCKING_WAIT="0" \
+    --env HF_HOME="/root/.cache/huggingface" \
+	{image} \
+    bash -c "python -m torch.distributed.run \
+		--nnodes=\"$SLURM_NNODES\" \
+		--nproc-per-node=gpu \
+		--rdzv-id=\"$SLURM_JOBID\" \
+		--rdzv-endpoint=\$(scontrol show hostnames \"$SLURM_JOB_NODELIST\" | head -n 1) \
+		--rdzv-backend=\"c10d\" \
+    {script} --config {config_file} --rl"

model/train.py ADDED Viewed

@@ -0,0 +1,293 @@
+import argparse
+import logging
+from pathlib import Path
+from typing import List
+import numpy as np
+from datasets import load_from_disk
+from torch.nn.utils.rnn import pad_sequence
+from transformers import Trainer, TrainingArguments
+from trl import GRPOConfig, GRPOTrainer
+from src.model.config import TrainConfig
+from src.model.nk_assistant import NKAssistant
+from src.utils.utils import load_config_from_yaml
+logger = logging.getLogger(__name__)
+def tokenize_function(row, tokenizer):
+    context = row["context"]
+    target = row["target"]
+    full_input = context + target
+    # Tokenize context and target separately
+    context_tokens = tokenizer(context, add_special_tokens=False)["input_ids"]
+    # Tokenize combined text with padding and truncation
+    tokenized = tokenizer(full_input, padding=False, return_tensors="np")
+    # Create labels and mask the context part
+    labels = tokenized["input_ids"].copy()
+    context_length = len(context_tokens)
+    # Mask context tokens by setting them to -100
+    labels[:, :context_length] = -100
+    tokenized["labels"] = labels
+    return {
+        "input_ids": tokenized["input_ids"].squeeze(0),
+        "attention_mask": tokenized["attention_mask"].squeeze(0),
+        "labels": labels.squeeze(0),
+    }
+def custom_data_collator(features, tokenizer):
+    input_ids = [f["input_ids"].clone().detach() for f in features]
+    attention_mask = [f["attention_mask"].clone().detach() for f in features]
+    labels = [f["labels"].clone().detach() for f in features]
+    batch = {
+        "input_ids": pad_sequence(
+            input_ids, batch_first=True, padding_value=tokenizer.pad_token_id
+        ),
+        "attention_mask": pad_sequence(
+            attention_mask, batch_first=True, padding_value=0
+        ),
+        "labels": pad_sequence(labels, batch_first=True, padding_value=-100),
+    }
+    return batch
+def target_rank_reward_function(
+    completions: List[str], ranks: List[float], **kwargs
+) -> List[float]:
+    """
+    Reward function that evaluates the rank of a target completion based on
+    the provided ranks.
+    Args:
+        completions (List[str]): List of generated target completions in
+            binary string format
+        ranks (List[float]): List of rank values for each possible target
+        **kwargs: Additional arguments passed from the trainer
+    Returns:
+        List[float]: List of reward values for each completion, where each
+            reward is:
+            - The rank value from ranks if the completion is valid
+            - Different negative values based on the type of format violation
+    """
+    rewards = []
+    for completion, rank_scores in zip(completions, ranks):
+        try:
+            int_target = int(completion.replace(",", ""), 2)
+            reward = rank_scores[int_target]
+            rewards.append(reward)
+        except Exception as e:
+            # First, check if completion contains only valid characters
+            # (0, 1, comma, space)
+            valid_chars = set(["0", "1", ","])
+            if not all(c in valid_chars for c in completion):
+                logger.error(
+                    f"Error evaluating completion: Contains invalid "
+                    f"characters: {completion[:50]}..."
+                )
+                rewards.append(-2.00)  # Severe penalty for invalid chars
+                continue
+            # Check if it's a valid binary string with commas
+            parts = completion.split(",")
+            # The length should be consistent with a valid binary string
+            # (e.g., 8 bits for n=8). We can infer expected length from the
+            # size of rank_scores array
+            expected_bits = (
+                int(np.log2(len(rank_scores))) if len(rank_scores) > 0 else 8
+            )
+            if len(parts) != expected_bits:
+                logger.error(
+                    f"Error evaluating completion: Wrong length: "
+                    f"{len(parts)} vs expected {expected_bits}"
+                )
+                rewards.append(-1.50)  # Penalty for wrong length
+                continue
+            logger.error(f"Error evaluating completion: {e}")
+            rewards.append(-1.00)  # Generic penalty for other errors
+    return rewards
+def train(config: TrainConfig):
+    print("Starting supervised learning training...")
+    print(f"Training with config: {config}")
+    print(f"Dataset file: {config.dataset_file}")
+    print(f"Trainer args: {config.trainer_args}")
+    print(f"Assistant config: {config.assistant_config}")
+    print(f"Final model path: {config.final_model_path}")
+    ds = load_from_disk(config.dataset_file)
+    if config.trainer_args["num_train_epochs"] < 1:
+        for split in ds.keys():
+            print(
+                f"Split of fraction "
+                f"{config.trainer_args['num_train_epochs']} from dataset "
+                f"partition {split}"
+            )
+            fraction = int(
+                len(ds[split]) * config.trainer_args["num_train_epochs"]
+            )
+            ds[split] = ds[split].take(fraction)
+            print(
+                f"Fractioned dataset partition {split} has "
+                f"{len(ds[split])} samples"
+            )
+    ass = NKAssistant(config.assistant_config, metadata=config)
+    # Create text documents to train on
+    print("Create and add prompts to dataset")
+    ds = ds.map(ass.create_text_from_row)
+    # Tokenize text
+    print("Tokenize prompt")
+    def tokenize_function_with_tokenizer(row):
+        return tokenize_function(row, ass.tokenizer)
+    ds = ds.map(tokenize_function_with_tokenizer, batched=False)
+    # Turn into torch tensor
+    ds.set_format(
+        type="torch",
+        columns=["input_ids", "attention_mask", "labels"],
+    )
+    # Setup collator for padding of batches
+    def data_collator(features):
+        return custom_data_collator(features, ass.tokenizer)
+    # Initialize trainer with default values
+    trainer = Trainer(
+        model=ass.model,
+        args=TrainingArguments(**config.trainer_args),
+        train_dataset=ds["train"],
+        eval_dataset=ds["test"],
+        data_collator=data_collator,
+        tokenizer=ass.tokenizer,
+    )
+    # Train model
+    trainer.train()
+    ass._save_pretrained(Path(config.final_model_path))
+def train_reinforcement(config: TrainConfig):
+    print("Starting reinforcement learning training...")
+    print(f"Training with config: {config}")
+    print(f"Dataset file: {config.dataset_file}")
+    print(f"Trainer args: {config.trainer_args}")
+    print(f"Assistant config: {config.assistant_config}")
+    print(f"Final model path: {config.final_model_path}")
+    ds = load_from_disk(config.dataset_file)
+    for split in ds.keys():
+        logger.debug(
+            f"Configs fraction for {split} split: "
+            f"{config.trainer_args['num_train_epochs']}"
+        )
+        fraction = int(
+            len(ds[split]) * config.trainer_args["num_train_epochs"]
+        )
+        if split != "train":
+            # steps = samples * grpo_creation_steps / batch_size * devices
+            # samples = wished_steps * batch_size * devices /
+            # grpo_creation_steps
+            fraction = int(min(600 * 24 * 4 / 8, fraction))
+        logger.debug(f"Fraction of {fraction} samples for {split} split")
+        ds[split] = ds[split].take(fraction)
+        logger.debug(
+            f"Fractioned dataset partition {split} has "
+            f"{len(ds[split])} samples"
+        )
+    ass = NKAssistant(config.assistant_config, metadata=config)
+    # Create prompts for reinforcement learning
+    print("Create and add prompts to dataset for RL")
+    ds = ds.map(ass.create_prompt_for_rl_from_row)
+    print("Successfully completed prompts creation mapping.")
+    # Turn into torch tensor while keeping the prompt field
+    ds.set_format(
+        type="torch",
+        columns=["prompt", "ranks"],
+    )
+    # Initialize GRPOConfig with default values
+    grpo_config = GRPOConfig(
+        **config.trainer_args  # Override with any custom trainer args
+    )
+    print("Initializing GRPOTrainer...")
+    # Initialize GRPOTrainer
+    trainer = GRPOTrainer(
+        model=ass.model,
+        args=grpo_config,
+        train_dataset=ds["train"],
+        eval_dataset=ds["test"],
+        reward_funcs=target_rank_reward_function,
+        processing_class=ass.tokenizer,
+    )
+    print("Starting training...")
+    # Train model
+    trainer.train()
+    print("Saving model...")
+    ass._save_pretrained(Path(config.final_model_path))
+if __name__ == "__main__":
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(
+        description="Train a model with the given config."
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to YAML config file"
+    )
+    parser.add_argument(
+        "--rl",
+        action="store_true",
+        help=(
+            "Use reinforcement learning training instead of supervised "
+            "learning"
+        ),
+    )
+    parser.add_argument(
+        "--rl_method",
+        type=str,
+        default="grpo",
+        choices=["grpo"],
+        help="Reinforcement learning method to use (default: grpo)",
+    )
+    args = parser.parse_args()
+    # Load config from YAML file
+    config = load_config_from_yaml(args.config, TrainConfig)
+    # Train the model using the appropriate method
+    if args.rl:
+        print("Training with reinforcement learning")
+        train_reinforcement(config)
+    else:
+        print("Training with supervised learning")
+        train(config)
+    print("Training completed successfully.")

nk_model/__init__.py ADDED Viewed

File without changes

nk_model/assembler.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Utilities for assembling sequences (vectors) and simple bit operations.
+This module provides:
+- Bit helper (`reverse01`) for boolean bit values
+- Random selection helper (`random_elem`)
+- Simple assemblers (`assembler_v1`, `assembler_v1_v2`, `assembler_v1_sym`)
+- `ensemble_builder` to grow a set of vectors up to a target length
+  while avoiding duplicates and invalid outputs.
+"""
+from random import choice
+from typing import Callable, Optional, TypeVar
+T = TypeVar("T")
+def reverse01(bits: list[int]) -> list[int]:
+    """Return a new list where each bit is flipped."""
+    return [1 - bit for bit in bits]
+def random_elem(items: list[T]) -> Optional[T]:
+    """Return a random element from the list, or None if the list is empty."""
+    return choice(items) if items else None
+def assembler_v1(vectors: list[list[T]]) -> Optional[list[T]]:
+    """
+    Concatenate a randomly chosen vector with itself.
+    Returns None if the input list is empty.
+    """
+    if not vectors:
+        return None
+    v1 = random_elem(vectors)
+    return None if v1 is None else v1 + v1
+def assembler_v1_v2(vectors: list[list[T]]) -> Optional[list[T]]:
+    """
+    Concatenate two randomly chosen vectors from the input.
+    Returns None if the input list is empty.
+    """
+    if not vectors:
+        return None
+    v1 = random_elem(vectors)
+    v2 = random_elem(vectors)
+    if v1 is None or v2 is None:
+        return None
+    return v1 + v2
+def assembler_v1_sym(
+    vectors: list[list[int]],
+) -> Optional[list[int]]:
+    """
+    For integer vectors: pick v1 at random, then concatenate with v1
+    or its flip.
+    Returns None if the input list is empty.
+    """
+    if not vectors:
+        return None
+    v1 = random_elem(vectors)
+    if v1 is None:
+        return None
+    v2 = random_elem([v1, reverse01(v1)])
+    return None if v2 is None else v1 + v2
+def ensemble_builder(
+    assembler: Callable[[list[list[T]]], Optional[list[T]]],
+    n: int,
+    max_len: int,
+    vectors: list[list[T]],
+) -> list[list[T]]:
+    """
+    Grow the starting ensemble of vectors by repeatedly assembling new vectors.
+    Args:
+        assembler:
+            Function to assemble new vectors from the current ensemble. Should
+            take a list of vectors and return a new vector.
+        n:
+            Number of vectors the ensemble should return of length max_len.
+        max_len:
+            Maximum length of the vectors in the ensemble.
+            The ensemble will return vectors of length max_len.
+        vectors:
+            Starting ensemble of vectors.
+            The ensemble will start with these vectors.
+    Returns:
+        List of vectors in the ensemble of length max_len.
+    """
+    if n == 0 or not vectors:
+        return vectors
+    new_vector = assembler(vectors)
+    if (
+        new_vector is None
+        or new_vector in vectors
+        or len(new_vector) > max_len
+    ):
+        return ensemble_builder(assembler, n, max_len, vectors)
+    new_vectors = vectors + [new_vector]
+    if len(new_vector) < max_len:
+        return ensemble_builder(assembler, n, max_len, new_vectors)
+    return ensemble_builder(assembler, n - 1, max_len, new_vectors)