PyPI - ai-nk-cce - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ai-nk-cce 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

ai_nk_cce-0.1.0.dist-info/METADATA +118 -0
ai_nk_cce-0.1.0.dist-info/RECORD +46 -0
ai_nk_cce-0.1.0.dist-info/WHEEL +4 -0
api/__init__.py +0 -0
api/mpcdf_vllm.py +94 -0
evals/nk_model.py +277 -0
model/README.md +64 -0
model/config/dataset_conv_v1.yml +9 -0
model/config/dataset_conv_v2_m2.yml +9 -0
model/config/dataset_conv_v3_m2_assembl_nearest.yml +9 -0
model/config/dataset_debug.yml +9 -0
model/config/dataset_v4_int_format.yml +9 -0
model/config/dataset_v5.yml +9 -0
model/config/inference.yml +7 -0
model/config/train.yml +24 -0
model/config/train_debug.yml +19 -0
model/config/train_from_checkpoint.yml +24 -0
model/config/train_from_checkpoint_debug.yml +19 -0
model/config/train_grpo.yml +30 -0
model/config/train_grpo_debug.yml +30 -0
model/config/train_grpo_debug_vllm.yml +32 -0
model/config.py +54 -0
model/dataset.py +324 -0
model/inference.py +51 -0
model/nk_assistant.py +207 -0
model/parser.py +70 -0
model/run_slurm.py +335 -0
model/score.ipynb +596 -0
model/scripts/template.slurm +54 -0
model/scripts/template_rl.slurm +54 -0
model/train.py +293 -0
nk_model/__init__.py +0 -0
nk_model/assembler.py +112 -0
nk_model/biased_prediction_agent.py +389 -0
nk_model/dataset.py +434 -0
nk_model/enums.py +21 -0
nk_model/landscape_cache.py +149 -0
nk_model/models.py +172 -0
nk_model/nk_landscape.py +498 -0
simulation/hill_climber_simulation.py +211 -0
simulation/hill_climber_vs_ai_simulation.py +132 -0
simulation/landscape_selection.py +179 -0
utils/__init__.py +0 -0
utils/binary_conversion.py +128 -0
utils/logging.py +33 -0
utils/utils.py +51 -0

model/config/train.yml ADDED Viewed

@@ -0,0 +1,24 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  eval_strategy: "steps"
+  per_device_train_batch_size: 20
+  per_device_eval_batch_size: 20
+  save_steps: 0.1
+  save_total_limit: 3
+  logging_dir: "./logs"
+  logging_steps: 10
+  eval_steps: 100
+  do_eval: true
+  num_train_epochs: 1
+  learning_rate: "<<float: lr>>"
+  report_to: "wandb"
+  run_name: <<job_name>>/<<job_id>>
+  lr_scheduler_type: constant
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: gpt2
+  generation_params:
+    temperature: 0.7

model/config/train_debug.yml ADDED Viewed

@@ -0,0 +1,19 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  evaluation_strategy: epoch
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  save_steps: 10000
+  save_total_limit: 2
+  logging_dir: ./logs
+  logging_steps: 500
+  num_train_epochs: 1
+  report_to: none
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: gpt2
+  generation_params:
+    temperature: 0.7

model/config/train_from_checkpoint.yml ADDED Viewed

@@ -0,0 +1,24 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  eval_strategy: "steps"
+  per_device_train_batch_size: 20
+  per_device_eval_batch_size: 20
+  save_steps: 0.1
+  save_total_limit: 3
+  logging_dir: "./logs"
+  logging_steps: 10
+  eval_steps: 100
+  do_eval: true
+  num_train_epochs: 0.2
+  learning_rate: "<<float: lr>>"
+  report_to: "wandb"
+  run_name: <<job_name>>/<<job_id>>
+  lr_scheduler_type: cosine
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: models/gpt2_v5/1e-5/2025_05_26__18_02_35
+  generation_params:
+    temperature: 0.7

model/config/train_from_checkpoint_debug.yml ADDED Viewed

@@ -0,0 +1,19 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  evaluation_strategy: epoch
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  save_steps: 10000
+  save_total_limit: 2
+  logging_dir: ./logs
+  logging_steps: 500
+  num_train_epochs: 1
+  report_to: none
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: models/gpt2_v5/1e-5/2025_05_20__10_27_38
+  generation_params:
+    temperature: 0.7

model/config/train_grpo.yml ADDED Viewed

@@ -0,0 +1,30 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  eval_strategy: "steps"
+  per_device_train_batch_size: 24 # must be divisible by generations per prompt
+  per_device_eval_batch_size: 24
+  save_steps: 0.1
+  save_total_limit: 3
+  logging_dir: "./logs"
+  logging_steps: 0.01
+  eval_steps: 0.1
+  do_eval: true
+  num_train_epochs: 1.0
+  learning_rate: "<<float: lr>>"
+  report_to: "wandb"
+  run_name: <<job_name>>/<<job_id>>
+  lr_scheduler_type: constant
+  # GRPO specific args
+  num_generations: 8
+  beta: 0.04
+  epsilon: 0.2
+  max_prompt_length: 512
+  max_completion_length: 15
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: models/gpt2_v5/1e-5/2025_05_26__18_02_35
+  generation_params:
+    temperature: 0.7

model/config/train_grpo_debug.yml ADDED Viewed

@@ -0,0 +1,30 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  eval_strategy: "steps"
+  per_device_train_batch_size: 8 # must be divisible by generations per prompt
+  per_device_eval_batch_size: 8
+  save_steps: 0.5  # Very high number to avoid frequent saves during debug
+  save_total_limit: 1  # Keep only one checkpoint
+  logging_dir: "./logs"
+  logging_steps: 0.01
+  eval_steps: 0.1
+  do_eval: true
+  num_train_epochs: 0.005
+  learning_rate: "<<float: lr>>"
+  report_to: "wandb"
+  run_name: <<job_name>>/<<job_id>>
+  lr_scheduler_type: constant
+  # GRPO specific args
+  num_generations: 8
+  beta: 0.04
+  epsilon: 0.2
+  max_prompt_length: 512
+  max_completion_length: 15
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: models/gpt2_v5/1e-5/2025_05_26__18_02_35
+  generation_params:
+    temperature: 0.7

model/config/train_grpo_debug_vllm.yml ADDED Viewed

@@ -0,0 +1,32 @@
+dataset_file: data/samples/<<ds>>
+final_model_path: models/<<group_name>>/<<job_name>>/<<job_id>>
+trainer_args:
+  output_dir: checkpoints/<<group_name>>/<<job_name>>/<<job_id>>
+  eval_strategy: "steps"
+  per_device_train_batch_size: 8 # must be divisible by generations per prompt
+  per_device_eval_batch_size: 8
+  save_steps: 1  # Very high number to avoid frequent saves during debug
+  save_total_limit: 1  # Keep only one checkpoint
+  logging_dir: "./logs"
+  logging_steps: 0.1
+  eval_steps: 0.1
+  do_eval: true
+  num_train_epochs: 0.001
+  learning_rate: "<<float: lr>>"
+  report_to: "wandb"
+  run_name: <<job_name>>/<<job_id>>
+  lr_scheduler_type: constant
+  # GRPO specific args
+  beta: 0.04
+  epsilon: 0.2
+  max_prompt_length: 512
+  max_completion_length: 15
+  use_vllm: true
+  vllm_mode: "colocate"
+  vllm_gpu_memory_utilization: 0.10
+assistant_config:
+  parser_config:
+    include_payoff: true
+  model_path: models/gpt2_v5/1e-5/2025_05_26__18_02_35
+  generation_params:
+    temperature: 0.7

model/config.py ADDED Viewed

@@ -0,0 +1,54 @@
+from typing import Any, Dict, List, Optional, Tuple
+from pydantic import BaseModel, Field
+class ParserConfig(BaseModel):
+    include_payoff: bool = True
+class NKAssistantConfig(BaseModel):
+    parser_config: ParserConfig
+    use_mpcdf_vllm: bool = False
+    model_path: Optional[str] = None
+    generation_params: Optional[Dict[str, Any]] = None
+class TrainConfig(BaseModel):
+    trainer_args: Any
+    dataset_file: str = Field(..., description="The path to the dataset file")
+    final_model_path: str = Field(
+        ..., description="The path to save the final model"
+    )
+    assistant_config: NKAssistantConfig
+class InferenceConfig(BaseModel):
+    dataset_file: str = Field(..., description="The path to the dataset file")
+    model_path: str = Field(
+        ..., description="The path to the fine-tuned model"
+    )
+    output_dataset_file: str = Field(
+        ..., description="The path to save the dataset with suggestions"
+    )
+    generation_params: Optional[Dict[str, Any]] = Field(
+        default=None, description="Generation parameters"
+    )
+    splits: List[str] = Field(
+        default=["train", "test"], description="Dataset splits to process"
+    )
+    max_test_samples: Optional[int] = Field(
+        default=None, description="Maximum number of test samples to process"
+    )
+class DataSetConfig(BaseModel):
+    input_file: str
+    output_file: str
+    samples_per_landscapes: int = 1
+    constraints_range: Tuple[int, int] = (0, 8)
+    n_samples: List[int] = [8]
+    include_payoff: bool = True
+    test_ratio: float = 0.1
+    exp_ratio: float = 0.1
+    debug_size: Optional[int] = None

model/dataset.py ADDED Viewed

@@ -0,0 +1,324 @@
+import argparse
+import random
+from typing import List, Tuple
+import numpy as np
+import pandas as pd
+from datasets import Dataset, DatasetDict
+from src.model.config import DataSetConfig
+from src.model.parser import create_context, create_target
+from src.utils.utils import BIN_ARRAY, load_config_from_yaml
+def binary_vectors_within_radius(
+    origin: int,
+    radius: int,
+    n_dim: int = 8,
+) -> np.ndarray:
+    """
+    Create the list of indices of all binary vectors in a ball of radius r
+    around origin.
+    Args:
+        origin (int): Integer representation of the binary vector to center
+            around.
+        radius (int): Maximum Hamming distance from origin to include in the
+            ball.
+        n_dim (int, optional): Number of dimensions of binary vectors. Must be
+            <= 8. Defaults to 8.
+    Returns:
+        np.ndarray: Array of indices corresponding to binary vectors within
+            radius of origin.
+    """
+    assert n_dim <= 8, "n_dim must be less than or equal to 8"
+    # create a list of all binary vectors of length n_dim
+    all_vectors = BIN_ARRAY[: 2**n_dim]
+    origin_vector = all_vectors[origin]
+    # calculate hamming distances to origin using element-wise comparison
+    distances = np.sum(all_vectors != origin_vector, axis=1)
+    # return the indices of vectors within the radius
+    ball_indices = np.where(distances <= radius)[0]
+    return ball_indices
+def get_rank_score(
+    payoff: np.ndarray,
+    constraint_idx: np.ndarray,
+) -> np.ndarray:
+    """
+    Calculates a ranking score for the indices within the constraint indices.
+    It ranks the indices within the constraint area and gives them a score
+    according to their rank. The indices outside the constraint area are given
+    a score of -1.
+    Args:
+        payoff (np.ndarray): Array of payoff values.
+        constraint_idx (np.ndarray): Boolean array indicating which indices
+            are constrained.
+    Returns:
+        np.ndarray: Array where constrained indices have values from 0 to 1
+            based on their rank (0 for lowest payoff, 1 for highest), and
+            unconstrained indices have -1.
+    """
+    # Initialize all scores to -1
+    rank_score = np.full_like(payoff, fill_value=-1, dtype=float)
+    # Get payoffs only for constrained indices
+    constrained_payoffs = payoff[constraint_idx]
+    # Calculate rank scores from 0 to 1
+    n_constrained = len(constrained_payoffs)
+    if n_constrained > 1:
+        ranks = np.linspace(0, 1, n_constrained)
+    else:
+        ranks = np.array([1.0])  # If only one point, give it rank 1
+    # Assign ranks to constrained indices
+    rank_score[constraint_idx] = ranks[np.argsort(constrained_payoffs)]
+    return rank_score
+def create_sample(
+    payoff: np.ndarray, hamming_distance: Tuple[int, int], n_samples: List[int]
+) -> Tuple[np.ndarray, int, np.ndarray, np.ndarray]:
+    assert all(map(lambda x: x <= len(payoff), n_samples)), (
+        "n_samples must be less than or equal to the length of the "
+        "payoff array"
+    )
+    assert hamming_distance[0] <= hamming_distance[1], (
+        "constraints_range must be a tuple of two integers where the first "
+        "is less than or equal to the second"
+    )
+    assert hamming_distance[0] > 0, (
+        "constraints_range must be a tuple of two integers where the first "
+        "is greater than 0"
+    )
+    # create a list of all idx [0, 1, 2, ..., len(payoff) - 1]
+    # as integer representation of a binary vector
+    all_idx = np.arange(len(payoff))
+    # random choice of sample size and number of constraints
+    sample_size = np.random.choice(n_samples)
+    hamming_distance = np.random.randint(
+        hamming_distance[0], hamming_distance[1]
+    )
+    # creating a list of random sample indices
+    sample_idx = np.random.choice(a=all_idx, size=sample_size, replace=False)
+    sample_payoffs = payoff[sample_idx]
+    sample_idx = sample_idx[np.argsort(sample_payoffs)]
+    # random choice of origin
+    origin_idx = np.random.choice(all_idx)
+    # create a list of indices fullfilling the constraints
+    # to get the target index
+    constraint_idx = binary_vectors_within_radius(
+        origin=origin_idx,
+        radius=hamming_distance,
+        n_dim=int(np.log2(len(payoff))),
+    )
+    constraint_idx = constraint_idx[constraint_idx != origin_idx]
+    # get the target index as the index with the highest payoff
+    # that is not the origin
+    target_idx = constraint_idx[np.argmax(payoff[constraint_idx])]
+    # get the rank score for all indices
+    rank_score = get_rank_score(payoff, constraint_idx)
+    return sample_idx, target_idx, origin_idx, hamming_distance, rank_score
+def create_raw_dataset(df: pd.DataFrame, config: DataSetConfig) -> Dataset:
+    examples = {
+        "landscape_id": [],
+        "sample": [],
+        "n": [],
+        "k": [],
+        "power_scale": [],
+        "payoffs": [],
+        "sample_idx": [],
+        "target_idx": [],
+        "origin_idx": [],
+        "hamming_distance": [],
+        "ranks": [],
+    }
+    from tqdm import tqdm
+    total_iterations = (
+        len(df.groupby("landscape_uuid")) * config.samples_per_landscapes
+    )
+    pbar = tqdm(total=total_iterations, desc="Creating dataset")
+    for s in range(config.samples_per_landscapes):
+        for id, group in df.groupby("landscape_uuid"):
+            n = group["n"].values[0].item()
+            k = group["k"].values[0].item()
+            power_scale = group["power_scale"].values[0].item()
+            # Sort the group by binary coordinates
+            group = sort_by_binary_coords(group, n)
+            # create a sample
+            payoffs = group["payoff"].values.astype(np.int32)
+            (
+                sample_idxs,
+                target_idx,
+                origin_idx,
+                hamming_distance,
+                ranks,
+            ) = create_sample(
+                payoff=payoffs,
+                hamming_distance=config.constraints_range,
+                n_samples=config.n_samples,
+            )
+            examples["landscape_id"].append(id)
+            examples["sample"].append(s)
+            examples["n"].append(n)
+            examples["k"].append(k)
+            examples["power_scale"].append(power_scale)
+            examples["payoffs"].append(payoffs)
+            examples["sample_idx"].append(sample_idxs)
+            examples["target_idx"].append(target_idx)
+            examples["origin_idx"].append(origin_idx)
+            examples["hamming_distance"].append(hamming_distance)
+            examples["ranks"].append(ranks)
+            pbar.update(1)
+    pbar.close()
+    return Dataset.from_dict(examples)
+def create_context_from_row(row: dict, include_payoff: bool = True) -> dict:
+    sample_payoff = np.array(row["payoffs"])[row["sample_idx"]]
+    context = create_context(
+        n=row["n"],
+        k=row["k"],
+        power_scale=row["power_scale"],
+        sample_idxs=row["sample_idx"],
+        origin_idx=row["origin_idx"],
+        hamming_distance=row["hamming_distance"],
+        payoff=sample_payoff,
+        include_payoff=include_payoff,
+    )
+    return {"context": context}
+def create_target_from_row(row: dict) -> dict:
+    target = create_target(target_idx=row["target_idx"])
+    return {"target": target}
+def create_dataset_from_file(config: DataSetConfig) -> DatasetDict:
+    df = pd.read_parquet(config.input_file)
+    landscape_uuid = df["landscape_uuid"].unique()
+    random.shuffle(landscape_uuid)
+    # Split landscape IDs into train, test, exp
+    split_points = [
+        int((1 - config.test_ratio - config.exp_ratio) * len(landscape_uuid)),
+        int((1 - config.exp_ratio) * len(landscape_uuid)),
+    ]
+    train_ids, test_ids, exp_ids = np.split(landscape_uuid, split_points)
+    # Assertions to ensure splits are correct
+    assert len(train_ids) + len(test_ids) + len(exp_ids) == len(landscape_uuid)
+    assert len(set(train_ids) & set(test_ids)) == 0
+    assert len(set(train_ids) & set(exp_ids)) == 0
+    assert len(set(test_ids) & set(exp_ids)) == 0
+    # Apply debug size limitation if specified
+    if config.debug_size is not None:
+        dbs = config.debug_size
+        train_ids, test_ids, exp_ids = (
+            train_ids[:dbs],
+            test_ids[:dbs],
+            exp_ids[:dbs],
+        )
+    # Create datasets for each split
+    datasets = {}
+    for split_name, split_ids in [
+        ("train", train_ids),
+        ("test", test_ids),
+        ("exp", exp_ids),
+    ]:
+        # Create raw dataset and apply context/target transformation
+        split_df = df[df["landscape_uuid"].isin(split_ids)]
+        datasets[split_name] = create_raw_dataset(split_df, config)
+    ds = DatasetDict(datasets)
+    return ds
+def apply_context_target(
+    ds: DatasetDict, config: DataSetConfig
+) -> DatasetDict:
+    def create_context_from_row_(row):
+        return create_context_from_row(row, config.include_payoff)
+    ds = ds.map(create_context_from_row_, batched=False)
+    ds = ds.map(create_target_from_row, batched=False)
+    return ds
+def sort_by_binary_coords(df: pd.DataFrame, n: int) -> pd.DataFrame:
+    """
+    Sort dataframe by converting binary coordinates to integer representation.
+    Args:
+        df (pd.DataFrame): Input dataframe with binary coordinates
+        n (int): Number of binary coordinate columns
+    Returns:
+        pd.DataFrame: Sorted dataframe with sort_key column
+    """
+    # Get the last n columns as binary coordinates
+    coord_cols = df.columns[-n:]
+    # Convert binary coordinates to integer and sort
+    df["sort_key"] = df[coord_cols].apply(
+        lambda x: int("".join(map(str, x)), 2), axis=1
+    )
+    df = df.sort_values("sort_key")
+    return df
+if __name__ == "__main__":
+    print("Starting dataset creation...")
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(
+        description="Generate dataset from config."
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to YAML config file"
+    )
+    args = parser.parse_args()
+    # Load config from YAML file
+    config = load_config_from_yaml(args.config, DataSetConfig)
+    print(config)
+    # Create and save dataset
+    dataset = create_dataset_from_file(config)
+    print(dataset)
+    dataset = apply_context_target(dataset, config)
+    dataset.save_to_disk(config.output_file)
+    print(f"Dataset saved to {config.output_file}")

model/inference.py ADDED Viewed

@@ -0,0 +1,51 @@
+import argparse
+from datasets import load_from_disk
+from src.model.config import InferenceConfig
+from src.model.nk_assistant import NKAssistant
+from src.utils.utils import load_config_from_yaml
+def run_inference(config: InferenceConfig):
+    # Load dataset and model
+    ds = load_from_disk(config.dataset_file)
+    # Load model and tokenizer
+    ass = NKAssistant.from_pretrained(config.model_path)
+    if config.generation_params:
+        ass.generation_params = config.generation_params
+    # Process each split
+    result_ds = ds
+    for split in config.splits:
+        if split in ds:
+            print(f"Processing {split} split...")
+            if config.max_test_samples:
+                print(f"Processing {config.max_test_samples} test samples...")
+                ds[split] = ds[split].select(range(config.max_test_samples))
+            result_ds[split] = ds[split].map(ass.suggest_from_row)
+    # Save the dataset with suggestions
+    result_ds.save_to_disk(config.output_dataset_file)
+    print(f"Dataset with suggestions saved to {config.output_dataset_file}")
+if __name__ == "__main__":
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(
+        description="Run inference with a fine-tuned model."
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to YAML config file"
+    )
+    args = parser.parse_args()
+    # Load config from YAML file
+    config = load_config_from_yaml(args.config, InferenceConfig)
+    # Run inference
+    run_inference(config)
+    print("Inference completed successfully.")