promptolution 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.1
2
+ Name: promptolution
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Tom Zehle, Moritz Schlager, Timo Heiß
6
+ Requires-Python: >=3.11,<3.12
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Requires-Dist: langchain-anthropic (>=0.1.22,<0.2.0)
10
+ Requires-Dist: langchain-community (>=0.2.12,<0.3.0)
11
+ Requires-Dist: langchain-core (>=0.2.29,<0.3.0)
12
+ Requires-Dist: langchain-openai (>=0.1.21,<0.2.0)
13
+ Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
14
+ Requires-Dist: numpy (>=1.26.0,<2.0.0)
15
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
16
+ Requires-Dist: seaborn (>=0.13.2,<0.14.0)
17
+ Requires-Dist: tqdm (>=4.66.5,<5.0.0)
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Promptolution
21
+
22
+ Project for seminar "AutoML in the age of large pre-trained language models" at LMU Munich , developed by [Timo Heiß](https://www.linkedin.com/in/timo-heiss/), [Moritz Schlager](https://www.linkedin.com/in/moritz-schlager/) and [Tom Zehle](https://www.linkedin.com/in/tom-zehle/).
23
+
24
+ ## Set Up
25
+
26
+ After having cloned the repository, run
27
+
28
+ ```
29
+ poetry install
30
+ ```
31
+
32
+ to install the necessary dependencies.
33
+
34
+ You might need to install [pipx](https://pipx.pypa.io/stable/installation/) and [poetry](https://python-poetry.org/docs/) first.
35
+
36
+ ## Usage
37
+
38
+ Create API Keys for the models you want to use:
39
+ - OpenAI: store token in openaitoken.txt
40
+ - Anthropic: store token in anthropictoken.txt
41
+ - DeepInfra (for Llama): store token in deepinfratoken.txt
42
+
43
+ Run experiments based on config via:
44
+
45
+ ```
46
+ poetry run python scripts/experiment_runs.py --experiment "configs/<my_experiment>.ini"
47
+ ```
48
+ where `<my_experiment>.ini` is a config based on our templates.
@@ -0,0 +1,29 @@
1
+ # Promptolution
2
+
3
+ Project for seminar "AutoML in the age of large pre-trained language models" at LMU Munich , developed by [Timo Heiß](https://www.linkedin.com/in/timo-heiss/), [Moritz Schlager](https://www.linkedin.com/in/moritz-schlager/) and [Tom Zehle](https://www.linkedin.com/in/tom-zehle/).
4
+
5
+ ## Set Up
6
+
7
+ After having cloned the repository, run
8
+
9
+ ```
10
+ poetry install
11
+ ```
12
+
13
+ to install the necessary dependencies.
14
+
15
+ You might need to install [pipx](https://pipx.pypa.io/stable/installation/) and [poetry](https://python-poetry.org/docs/) first.
16
+
17
+ ## Usage
18
+
19
+ Create API Keys for the models you want to use:
20
+ - OpenAI: store token in openaitoken.txt
21
+ - Anthropic: store token in anthropictoken.txt
22
+ - DeepInfra (for Llama): store token in deepinfratoken.txt
23
+
24
+ Run experiments based on config via:
25
+
26
+ ```
27
+ poetry run python scripts/experiment_runs.py --experiment "configs/<my_experiment>.ini"
28
+ ```
29
+ where `<my_experiment>.ini` is a config based on our templates.
@@ -0,0 +1,6 @@
1
+ from . import llms
2
+ from . import optimizers
3
+ from . import predictors
4
+ from . import tasks
5
+ from . import callbacks
6
+ from . import config
@@ -0,0 +1,120 @@
1
+ import os
2
+
3
+ import pandas as pd
4
+ from tqdm import tqdm
5
+
6
+
7
+ class Callback:
8
+ def on_step_end(self, optimizer):
9
+ pass
10
+
11
+ def on_epoch_end(self, epoch, logs=None):
12
+ pass
13
+
14
+ def on_train_end(self, logs=None):
15
+ pass
16
+
17
+
18
+ class LoggerCallback(Callback):
19
+ """
20
+ Callback for logging optimization progress.
21
+
22
+ This callback logs information about each step, epoch, and the end of training.
23
+
24
+ Attributes:
25
+ logger: The logger object to use for logging.
26
+ step (int): The current step number.
27
+ """
28
+ def __init__(self, logger):
29
+ self.logger = logger
30
+ self.step = 0
31
+
32
+ def on_step_end(self, optimizer):
33
+ self.step += 1
34
+ self.logger.critical(f"✨Step {self.step} ended✨")
35
+ for i, (prompt, score) in enumerate(zip(optimizer.prompts, optimizer.scores)):
36
+ self.logger.critical(f"*** Prompt {i}: Score: {score}")
37
+ self.logger.critical(f"{prompt}")
38
+
39
+ def on_epoch_end(self, epoch, logs=None):
40
+ self.logger.critical(f"Epoch {epoch} - {logs}")
41
+
42
+ def on_train_end(self, logs=None):
43
+ self.logger.critical(f"Training ended - {logs}")
44
+
45
+
46
+ class CSVCallback(Callback):
47
+ """
48
+ Callback for saving optimization progress to a CSV file.
49
+
50
+ This callback saves prompts and scores at each step to a CSV file.
51
+
52
+ Attributes:
53
+ path (str): The path to the CSV file.
54
+ step (int): The current step number.
55
+ """
56
+ def __init__(self, path):
57
+ # if dir does not exist
58
+ if not os.path.exists(os.path.dirname(path)):
59
+ os.makedirs(os.path.dirname(path))
60
+
61
+ # create file in path with header: "step,prompt,score"
62
+ with open(path, "w") as f:
63
+ f.write("step,prompt,score\n")
64
+ self.path = path
65
+ self.step = 0
66
+
67
+ def on_step_end(self, optimizer):
68
+ """
69
+ Save prompts and scores to csv
70
+ """
71
+ self.step += 1
72
+ df = pd.DataFrame(
73
+ {"step": [self.step] * len(optimizer.prompts), "prompt": optimizer.prompts, "score": optimizer.scores}
74
+ )
75
+ df.to_csv(self.path, mode="a", header=False, index=False)
76
+
77
+ def on_train_end(self, logs=None):
78
+ pass
79
+
80
+
81
+ class BestPromptCallback(Callback):
82
+ """
83
+ Callback for tracking the best prompt during optimization.
84
+
85
+ This callback keeps track of the prompt with the highest score.
86
+
87
+ Attributes:
88
+ best_prompt (str): The prompt with the highest score so far.
89
+ best_score (float): The highest score achieved so far.
90
+ """
91
+ def __init__(self):
92
+ self.best_prompt = ""
93
+ self.best_score = -99999
94
+
95
+ def on_step_end(self, optimizer):
96
+ if optimizer.scores[0] > self.best_score:
97
+ self.best_score = optimizer.scores[0]
98
+ self.best_prompt = optimizer.prompts[0]
99
+
100
+ def get_best_prompt(self):
101
+ return self.best_prompt, self.best_score
102
+
103
+
104
+ class ProgressBarCallback(Callback):
105
+ """
106
+ Callback for displaying a progress bar during optimization.
107
+
108
+ This callback uses tqdm to display a progress bar that updates at each step.
109
+
110
+ Attributes:
111
+ pbar (tqdm): The tqdm progress bar object.
112
+ """
113
+ def __init__(self, total_steps):
114
+ self.pbar = tqdm(total=total_steps)
115
+
116
+ def on_step_end(self, optimizer):
117
+ self.pbar.update(1)
118
+
119
+ def on_train_end(self, logs=None):
120
+ self.pbar.close()
@@ -0,0 +1,78 @@
1
+ from configparser import ConfigParser
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass
6
+ class Config:
7
+ """
8
+ Configuration class for the promptolution library.
9
+
10
+ This class handles loading and parsing of configuration settings,
11
+ either from a config file or from keyword arguments.
12
+
13
+ Attributes:
14
+ task_name (str): Name of the task.
15
+ ds_path (str): Path to the dataset.
16
+ n_steps (int): Number of optimization steps.
17
+ optimizer (str): Name of the optimizer to use.
18
+ meta_prompt_path (str): Path to the meta prompt file.
19
+ meta_llms (str): Name of the meta language model.
20
+ downstream_llm (str): Name of the downstream language model.
21
+ evaluation_llm (str): Name of the evaluation language model.
22
+ init_pop_size (int): Initial population size. Defaults to 10.
23
+ logging_dir (str): Directory for logging. Defaults to "logs/run.csv".
24
+ experiment_name (str): Name of the experiment. Defaults to "experiment".
25
+ include_task_desc (bool): Whether to include task description. Defaults to False.
26
+ random_seed (int): Random seed for reproducibility. Defaults to 42.
27
+ """
28
+ task_name: str
29
+ ds_path: str
30
+ n_steps: int
31
+ optimizer: str
32
+ meta_prompt_path: str
33
+ meta_llms: str
34
+ downstream_llm: str
35
+ evaluation_llm: str
36
+ init_pop_size: int = 10
37
+ logging_dir: str = "logs/run.csv"
38
+ experiment_name: str = "experiment"
39
+ include_task_desc: bool = False
40
+ random_seed: int = 42
41
+
42
+ def __init__(self, config_path: str = None, **kwargs):
43
+ if config_path:
44
+ self.config_path = config_path
45
+ self.config = ConfigParser()
46
+ self.config.read(config_path)
47
+ self._parse_config()
48
+ else:
49
+ for key, value in kwargs.items():
50
+ setattr(self, key, value)
51
+
52
+ def _parse_config(self):
53
+ self.task_name = self.config["task"]["task_name"]
54
+ self.ds_path = self.config["task"]["ds_path"]
55
+ self.n_steps = int(self.config["task"]["steps"])
56
+ self.random_seed = int(self.config["task"]["random_seed"])
57
+ self.optimizer = self.config["optimizer"]["name"]
58
+ self.meta_prompt_path = self.config["optimizer"]["meta_prompt_path"]
59
+ self.meta_llm = self.config["meta_llm"]["name"]
60
+ self.downstream_llm = self.config["downstream_llm"]["name"]
61
+ self.evaluation_llm = self.config["evaluator_llm"]["name"]
62
+ self.init_pop_size = int(self.config["optimizer"]["init_pop_size"])
63
+ self.logging_dir = self.config["logging"]["dir"]
64
+ self.experiment_name = self.config["experiment"]["name"]
65
+
66
+ if "include_task_desc" in self.config["task"]:
67
+ self.include_task_desc = self.config["task"]["include_task_desc"] == "True"
68
+
69
+ if self.optimizer == "evopromptga":
70
+ self.selection_mode = self.config["optimizer"]["selection_mode"]
71
+ elif self.optimizer == "evopromptde":
72
+ self.selection_mode = self.config["optimizer"]["donor_random"]
73
+
74
+ if "local" in self.meta_llm:
75
+ self.meta_bs = int(self.config["meta_llm"]["batch_size"])
76
+
77
+ if "local" in self.downstream_llm:
78
+ self.downstream_bs = int(self.config["downstream_llm"]["batch_size"])
@@ -0,0 +1,31 @@
1
+ from .api_llm import APILLM
2
+ from .base_llm import DummyLLM
3
+ from .local_llm import LocalLLM
4
+
5
+
6
+ def get_llm(model_id: str, *args, **kwargs):
7
+ """
8
+ Factory function to create and return a language model instance based on the provided model_id.
9
+
10
+ This function supports three types of language models:
11
+ 1. DummyLLM: A mock LLM for testing purposes.
12
+ 2. LocalLLM: For running models locally (identified by 'local' in the model_id).
13
+ 3. APILLM: For API-based models (default if not matching other types).
14
+
15
+ Args:
16
+ model_id (str): Identifier for the model to use. Special cases:
17
+ - "dummy" for DummyLLM
18
+ - "local-{model_name}" for LocalLLM
19
+ - Any other string for APILLM
20
+ *args: Variable length argument list passed to the LLM constructor.
21
+ **kwargs: Arbitrary keyword arguments passed to the LLM constructor.
22
+
23
+ Returns:
24
+ An instance of DummyLLM, LocalLLM, or APILLM based on the model_id.
25
+ """
26
+ if model_id == "dummy":
27
+ return DummyLLM(*args, **kwargs)
28
+ if "local" in model_id:
29
+ model_id = "-".join(model_id.split("-")[1:])
30
+ return LocalLLM(model_id, *args, **kwargs)
31
+ return APILLM(model_id, *args, **kwargs)
@@ -0,0 +1,149 @@
1
+ import asyncio
2
+ import requests
3
+ import time
4
+ import openai
5
+ from logging import INFO, Logger
6
+
7
+ from typing import List
8
+
9
+ from langchain_anthropic import ChatAnthropic
10
+ from langchain_community.chat_models.deepinfra import ChatDeepInfraException
11
+ from langchain_core.messages import HumanMessage
12
+ from langchain_openai import ChatOpenAI
13
+
14
+ from promptolution.llms.deepinfra import ChatDeepInfra
15
+
16
+
17
+ logger = Logger(__name__)
18
+ logger.setLevel(INFO)
19
+
20
+
21
+ async def invoke_model(prompt, model, semaphore):
22
+ """
23
+ Asynchronously invoke a language model with retry logic.
24
+
25
+ Args:
26
+ prompt (str): The input prompt for the model.
27
+ model: The language model to invoke.
28
+ semaphore (asyncio.Semaphore): Semaphore to limit concurrent calls.
29
+
30
+ Returns:
31
+ str: The model's response content.
32
+
33
+ Raises:
34
+ ChatDeepInfraException: If all retry attempts fail.
35
+ """
36
+ async with semaphore:
37
+ max_retries = 100
38
+ delay = 3
39
+ attempts = 0
40
+
41
+ while attempts < max_retries:
42
+ try:
43
+ response = await asyncio.to_thread(model.invoke, [HumanMessage(content=prompt)])
44
+ return response.content
45
+ except ChatDeepInfraException as e:
46
+ print(f"DeepInfra error: {e}. Attempt {attempts}/{max_retries}. Retrying in {delay} seconds...")
47
+ attempts += 1
48
+ time.sleep(delay)
49
+
50
+
51
+ class APILLM:
52
+ """
53
+ A class to interface with various language models through their respective APIs.
54
+
55
+ This class supports Claude (Anthropic), GPT (OpenAI), and LLaMA (DeepInfra) models.
56
+ It handles API key management, model initialization, and provides methods for
57
+ both synchronous and asynchronous inference.
58
+
59
+ Attributes:
60
+ model: The initialized language model instance.
61
+
62
+ Methods:
63
+ get_response: Synchronously get responses for a list of prompts.
64
+ _get_response: Asynchronously get responses for a list of prompts.
65
+ """
66
+ def __init__(self, model_id: str):
67
+ """
68
+ Initialize the APILLM with a specific model.
69
+
70
+ Args:
71
+ model_id (str): Identifier for the model to use.
72
+
73
+ Raises:
74
+ ValueError: If an unknown model identifier is provided.
75
+ """
76
+ if "claude" in model_id:
77
+ ANTHROPIC_API_KEY = open("anthropictoken.txt", "r").read()
78
+ self.model = ChatAnthropic(model=model_id, api_key=ANTHROPIC_API_KEY)
79
+ elif "gpt" in model_id:
80
+ OPENAI_API_KEY = open("openaitoken.txt", "r").read()
81
+ self.model = ChatOpenAI(model=model_id, api_key=OPENAI_API_KEY)
82
+ elif "llama" in model_id:
83
+ DEEPINFRA_API_KEY = open("deepinfratoken.txt", "r").read()
84
+ self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=DEEPINFRA_API_KEY)
85
+ else:
86
+ raise ValueError(f"Unknown model: {model_id}")
87
+
88
+ def get_response(self, prompts: List[str]) -> List[str]:
89
+ """
90
+ Synchronously get responses for a list of prompts.
91
+
92
+ This method includes retry logic for handling connection errors and rate limits.
93
+
94
+ Args:
95
+ prompts (list[str]): List of input prompts.
96
+
97
+ Returns:
98
+ list[str]: List of model responses.
99
+
100
+ Raises:
101
+ requests.exceptions.ConnectionError: If max retries are exceeded.
102
+ """
103
+ max_retries = 100
104
+ delay = 3
105
+ attempts = 0
106
+
107
+ while attempts < max_retries:
108
+ try:
109
+ responses = asyncio.run(self._get_response(prompts))
110
+ return responses
111
+ except requests.exceptions.ConnectionError as e:
112
+ attempts += 1
113
+ logger.critical(
114
+ f"Connection error: {e}. Attempt {attempts}/{max_retries}. Retrying in {delay} seconds..."
115
+ )
116
+ time.sleep(delay)
117
+ except openai.RateLimitError as e:
118
+ attempts += 1
119
+ logger.critical(
120
+ f"Rate limit error: {e}. Attempt {attempts}/{max_retries}. Retrying in {delay} seconds..."
121
+ )
122
+ time.sleep(delay)
123
+
124
+ # If the loop exits, it means max retries were reached
125
+ raise requests.exceptions.ConnectionError("Max retries exceeded. Connection could not be established.")
126
+
127
+ async def _get_response(
128
+ self, prompts: list[str], max_concurrent_calls=200
129
+ ) -> list[str]:
130
+ """
131
+ Asynchronously get responses for a list of prompts.
132
+
133
+ This method uses a semaphore to limit the number of concurrent API calls.
134
+
135
+ Args:
136
+ prompts (list[str]): List of input prompts.
137
+ max_concurrent_calls (int): Maximum number of concurrent API calls allowed.
138
+
139
+ Returns:
140
+ list[str]: List of model responses.
141
+ """
142
+ semaphore = asyncio.Semaphore(max_concurrent_calls) # Limit the number of concurrent calls
143
+ tasks = []
144
+
145
+ for prompt in prompts:
146
+ tasks.append(invoke_model(prompt, self.model, semaphore))
147
+
148
+ responses = await asyncio.gather(*tasks)
149
+ return responses
@@ -0,0 +1,74 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+
4
+ import numpy as np
5
+
6
+
7
+ class BaseLLM(ABC):
8
+ """
9
+ Abstract base class for Language Models in the promptolution library.
10
+
11
+ This class defines the interface that all concrete LLM implementations should follow.
12
+
13
+ Methods:
14
+ get_response: An abstract method that should be implemented by subclasses
15
+ to generate responses for given prompts.
16
+ """
17
+ def __init__(self, *args, **kwargs):
18
+ pass
19
+
20
+ @abstractmethod
21
+ def get_response(self, prompts: List[str]) -> List[str]:
22
+ """
23
+ Generate responses for the given prompts.
24
+
25
+ This method should be implemented by subclasses to define how
26
+ the LLM generates responses.
27
+
28
+ Args:
29
+ prompts (List[str]): A list of input prompts.
30
+
31
+ Returns:
32
+ List[str]: A list of generated responses corresponding to the input prompts.
33
+ """
34
+ pass
35
+
36
+
37
+ class DummyLLM(BaseLLM):
38
+ """
39
+ A dummy implementation of the BaseLLM for testing purposes.
40
+
41
+ This class generates random responses for given prompts, simulating
42
+ the behavior of a language model without actually performing any
43
+ complex natural language processing.
44
+ """
45
+ def __init__(self, *args, **kwargs):
46
+ pass
47
+
48
+ def get_response(self, prompts: str) -> str:
49
+ """
50
+ Generate random responses for the given prompts.
51
+
52
+ This method creates silly, random responses enclosed in <prompt> tags.
53
+ It's designed for testing and demonstration purposes.
54
+
55
+ Args:
56
+ prompts (str or List[str]): Input prompt(s). If a single string is provided,
57
+ it's converted to a list containing that string.
58
+
59
+ Returns:
60
+ List[str]: A list of randomly generated responses, one for each input prompt.
61
+ """
62
+ if isinstance(prompts, str):
63
+ prompts = [prompts]
64
+ results = []
65
+ for _ in prompts:
66
+ r = np.random.rand()
67
+ if r < 0.3:
68
+ results += [f"Joooo wazzuppp <prompt>hier gehts los {r} </prompt>"]
69
+ if 0.3 <= r < 0.6:
70
+ results += [f"was das hier? <prompt>peter lustig{r}</prompt>"]
71
+ else:
72
+ results += [f"hier ist ein <prompt>test{r}</prompt>"]
73
+
74
+ return results