PyPI - lionagi - Versions diffs - 0.0.106__py3-none-any.whl → 0.0.107__py3-none-any.whl - Mend

lionagi 0.0.106py3-none-any.whl → 0.0.107py3-none-any.whl

Files changed (18) hide show

lionagi/api/oai_service.py +25 -13
lionagi/session/conversation.py +18 -9
lionagi/session/message.py +38 -14
lionagi/session/session.py +122 -55
lionagi/utils/__init__.py +4 -7
lionagi/utils/api_util.py +91 -45
lionagi/utils/doc_util.py +69 -26
lionagi/utils/log_util.py +15 -4
lionagi/utils/sys_util.py +74 -11
lionagi/utils/tool_util.py +39 -24
lionagi/version.py +1 -1
{lionagi-0.0.106.dist-info → lionagi-0.0.107.dist-info}/METADATA +7 -12
lionagi-0.0.107.dist-info/RECORD +20 -0
lionagi/tools/__init__.py +0 -0
lionagi-0.0.106.dist-info/RECORD +0 -21
{lionagi-0.0.106.dist-info → lionagi-0.0.107.dist-info}/LICENSE +0 -0
{lionagi-0.0.106.dist-info → lionagi-0.0.107.dist-info}/WHEEL +0 -0
{lionagi-0.0.106.dist-info → lionagi-0.0.107.dist-info}/top_level.txt +0 -0

lionagi/utils/api_util.py CHANGED Viewed

@@ -1,27 +1,37 @@
 import asyncio
-import json
 import logging
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Generator, NoReturn, Optional
+from .sys_util import append_to_jsonl
 @dataclass
 class StatusTracker:
-    """Class for keeping track of various task statuses.
+    """
+    Class for keeping track of various task statuses.
     This class serves as a simple way to monitor different types of task
     outcomes and errors within a system. It uses dataclasses for easy
     creation and management of state.
     Attributes:
-        num_tasks_started: The number of tasks that have been initiated.
-        num_tasks_in_progress: The number of tasks currently being processed.
-        num_tasks_succeeded: The number of tasks that have completed successfully.
-        num_tasks_failed: The number of tasks that have failed.
-        num_rate_limit_errors: The number of tasks that failed due to rate limiting.
-        num_api_errors: The number of tasks that failed due to API errors.
-        num_other_errors: The number of tasks that failed due to other errors.
+        num_tasks_started:
+            The number of tasks that have been initiated.
+        num_tasks_in_progress:
+            The number of tasks currently being processed.
+        num_tasks_succeeded:
+            The number of tasks that have completed successfully.
+        num_tasks_failed:
+            The number of tasks that have failed.
+        num_rate_limit_errors:
+            The number of tasks that failed due to rate limiting.
+        num_api_errors:
+            The number of tasks that failed due to API errors.
+        num_other_errors:
+            The number of tasks that failed due to other errors.
     """
     num_tasks_started: int = 0
     num_tasks_in_progress: int = 0
@@ -41,16 +51,24 @@ class AsyncQueue:
     concurrent task processing in an orderly and controlled manner.
     Attributes:
-        queue (asyncio.Queue): A queue to hold items for asynchronous processing.
-        _stop_event (asyncio.Event): An event to signal when the queue should stop processing.
+        queue (asyncio.Queue):
+            A queue to hold items for asynchronous processing.
+        _stop_event (asyncio.Event):
+            An event to signal when the queue should stop processing.
     Methods:
-        enqueue(item): Add an item to the queue for processing.
-        dequeue(): Remove and return an item from the queue.
-        join(): Wait until all items in the queue have been processed.
-        stop(): Signal to stop processing new items in the queue.
-        stopped(): Check if the queue has been signaled to stop.
-        process_requests(func): Process items using a provided function.
+        enqueue(item):
+            Add an item to the queue for processing.
+        dequeue():
+            Remove and return an item from the queue.
+        join():
+            Wait until all items in the queue have been processed.
+        stop():
+            Signal to stop processing new items in the queue.
+        stopped():
+            Check if the queue has been signaled to stop.
+        process_requests(func):
+            Process items using a provided function.
     """
     def __init__(self) -> None:
@@ -64,7 +82,7 @@ class AsyncQueue:
         """
         Asynchronously add an item to the queue for processing.
-        Args:
+        Parameters:
             item (Any): The item to be added to the queue.
         Example:
@@ -139,7 +157,7 @@ class AsyncQueue:
         Continuously dequeues items and applies the given function to each.
         The processing stops when the queue is signaled to stop or a sentinel value (`None`) is dequeued.
-        Args:
+        Parameters:
             func (Callable[[Any], Any]): A coroutine function to process items from the queue.
         Example:
@@ -166,23 +184,29 @@ class RateLimiter(ABC):
     of requests sent to or received from a network interface controller or an API.
     Attributes:
-        max_requests_per_minute (int): Maximum number of requests permitted per minute.
-        max_tokens_per_minute (int): Maximum number of tokens that can accumulate per minute.
-        available_request_capacity (int): Current number of available request slots.
-        available_token_capacity (int): Current number of available tokens.
-        rate_limit_replenisher_task (asyncio.Task): Background task for replenishing rate limits.
+        max_requests_per_minute (int):
+            Maximum number of requests permitted per minute.
+        max_tokens_per_minute (int):
+            Maximum number of tokens that can accumulate per minute.
+        available_request_capacity (int):
+            Current number of available request slots.
+        available_token_capacity (int):
+            Current number of available tokens.
     Methods:
-        rate_limit_replenisher: Coroutine to replenish rate limits over time.
-        calculate_num_token: Method to calculate required tokens for a request.
+        rate_limit_replenisher:
+            Coroutine to replenish rate limits over time.
+        calculate_num_token:
+            Method to calculate required tokens for a request.
     """
     def __init__(self, max_requests_per_minute: int, max_tokens_per_minute: int) -> None:
         """
         Initializes the RateLimiter with specified maximum request and token limits.
-        Args:
+        Parameters:
             max_requests_per_minute (int): Maximum requests allowed per minute.
             max_tokens_per_minute (int): Maximum tokens allowed to accumulate per minute.
         Example:
@@ -226,8 +250,9 @@ class RateLimiter(ABC):
         Subclasses should implement this method to determine the number of tokens needed based
         on the request payload and target endpoint.
-        Args:
+        Parameters:
             payload (Dict[str, Any]): Payload of the request.
             api_endpoint (str): Target API endpoint for the request.
         Returns:
@@ -245,6 +270,7 @@ class RateLimiter(ABC):
         ...
 class BaseAPIService(ABC):
     """
     Abstract base class for API services requiring asynchronous operations.
@@ -254,19 +280,30 @@ class BaseAPIService(ABC):
     subclassed for concrete implementations of specific API service interactions.
     Attributes:
-        api_key (str): The API key used for authenticating with the API service.
-        token_encoding_name (str): The encoding for the API token.
-        max_attempts (int): The maximum number of retry attempts for API calls.
-        status_tracker (StatusTracker): Tracker for API call statuses.
-        rate_limiter (RateLimiter): Limiter to control the rate of API calls.
-        queue (AsyncQueue): Queue for managing API call tasks.
+        api_key (str):
+            The API key used for authenticating with the API service.
+        token_encoding_name (str):
+            The encoding for the API token.
+        max_attempts (int):
+            The maximum number of retry attempts for API calls.
+        status_tracker (StatusTracker):
+            Tracker for API call statuses.
+        rate_limiter (RateLimiter):
+            Limiter to control the rate of API calls.
+        queue (AsyncQueue):
+            Queue for managing API call tasks.
     Methods:
-        call_api: Abstract method to define API call mechanism in subclasses.
-        handle_error: Handle errors by logging and saving details to a JSONL file.
-        append_to_jsonl: Append data to a file in JSONL format.
-        api_endpoint_from_url: Extract the API endpoint from a URL.
-        task_id_generator_function: Generate a sequence of unique task IDs.
+        call_api:
+            Abstract method to define API call mechanism in subclasses.
+        handle_error:
+            Handle errors by logging and saving details to a JSONL file.
+        append_to_jsonl:
+            Append data to a file in JSONL format.
+        api_endpoint_from_url:
+            Extract the API endpoint from a URL.
+        task_id_generator_function:
+            Generate a sequence of unique task IDs.
     """
     def __init__(
@@ -278,17 +315,22 @@ class BaseAPIService(ABC):
         max_tokens_per_minute: int,
         ratelimiter,
         status_tracker: Optional[StatusTracker] = None,
-        queue: Optional[AsyncQueue] = None,
+        queue: Optional[AsyncQueue] = None,
     ) -> None:
         """
         Initializes the BaseAPIService with necessary configuration.
-        Args:
+        Parameters:
             api_key (str): The API key for authentication.
             token_encoding_name (str): Encoding name for the API token.
             max_attempts (int): Maximum number of attempts for an API call.
             status_tracker (Optional[StatusTracker]): Tracker for API call statuses.
-            rate_limiter (RateLimiter): Limiter for API call rates.
+            ratelimiter: Limiter for API call rates.
             queue (Optional[AsyncQueue]): Queue for managing API tasks.
         Example:
@@ -297,7 +339,7 @@ class BaseAPIService(ABC):
             ...
             >>> service = MyAPIService(api_key="12345", token_encoding_name="utf-8",
             ...                       max_attempts=3, status_tracker=None,
-            ...                       rate_limiter=my_rate_limiter, queue=None)
+            ...                       rate_limiter=ratelimiter, queue=None)
         """
         self.api_key = api_key
         self.token_encoding_name = token_encoding_name
@@ -305,6 +347,7 @@ class BaseAPIService(ABC):
         self.status_tracker = status_tracker or StatusTracker()
         self.queue = queue or AsyncQueue()
         self.rate_limiter = ratelimiter(max_requests_per_minute, max_tokens_per_minute)
+        self.append_to_jsonl = append_to_jsonl
     @abstractmethod
     async def call_api(self) -> Any:
@@ -335,10 +378,13 @@ class BaseAPIService(ABC):
         Updates the status tracker to indicate the error and saves details to a JSONL file.
-        Args:
+        Parameters:
             error (Exception): The exception that was raised during the API call.
             payload (Any): The data payload that was used for the API call.
             metadata (Any): Additional metadata related to the API call.
             save_filepath (str): The file path where error details should be saved.
         """
         self.status_tracker.num_tasks_in_progress -= 1
@@ -356,7 +402,7 @@ class BaseAPIService(ABC):
         """
         Extracts the endpoint from an API request URL.
-        Args:
+        Parameters:
             request_url (str): The URL from which to extract the API endpoint.
         Returns:

lionagi/utils/doc_util.py CHANGED Viewed

@@ -12,8 +12,11 @@ def dir_to_path(dir: str, ext, recursive: bool = False, flat: bool = True):
     Parameters:
         dir (str): The directory path where to search for files.
         ext (str): The file extension to filter by.
         recursive (bool, optional): If True, search for files recursively in subdirectories. Defaults to False.
         flat (bool, optional): If True, return a flat list of file paths. Defaults to True.
     Returns:
@@ -27,14 +30,18 @@ def dir_to_path(dir: str, ext, recursive: bool = False, flat: bool = True):
         tem = '**/*' if recursive else '*'
         return list(Path(dir).glob(tem + ext))
-    return to_list(l_call(ext, _dir_to_path, flat=True), flat=flat)
+    try:
+        return to_list(l_call(ext, _dir_to_path, flat=True), flat=flat)
+    except:
+        raise ValueError("Invalid directory or extension, please check the path")
 def read_text(filepath: str, clean: bool = True) -> str:
     """
     Reads the content of a text file and optionally cleans it by removing specified characters.
     Parameters:
         filepath (str): The path to the text file to be read.
         clean (bool, optional): If True, clean the content by removing specific unwanted characters. Defaults to True.
     Returns:
@@ -63,16 +70,27 @@ def dir_to_files(dir: str, ext: str, recursive: bool = False,
     Parameters:
         dir (str): The directory path where files are located.
         ext (str): The file extension to filter by.
         recursive (bool, optional): If True, search files recursively in subdirectories. Defaults to False.
         reader (Callable, optional): Function used to read and process the content of each file. Defaults to read_text.
         clean (bool, optional): If True, cleans the content by removing specified characters. Defaults to True.
         to_csv (bool, optional): If True, export the processed data to a CSV file. Defaults to False.
         project (str, optional): The name of the project. Defaults to 'project'.
         output_dir (str, optional): Directory path for exporting the CSV file. Defaults to 'data/logs/sources/'.
         filename (Optional[str], optional): Name of the CSV file, if not provided, a default will be used. Defaults to None.
         verbose (bool, optional): If True, print a message upon CSV export. Defaults to True.
         timestamp (bool, optional): If True, include a timestamp in the file name. Defaults to True.
         logger (Optional[DataLogger], optional): An instance of DataLogger for logging, if not provided, a new one will be created. Defaults to None.
     Returns:
@@ -84,13 +102,13 @@ def dir_to_files(dir: str, ext: str, recursive: bool = False,
     sources = dir_to_path(dir, ext, recursive)
-    def split_path(path: Path) -> tuple:
+    def _split_path(path: Path) -> tuple:
         folder_name = path.parent.name
         file_name = path.name
         return (folder_name, file_name)
-    def to_dict(path_: Path) -> Dict[str, Union[str, Path]]:
-        folder, file = split_path(path_)
+    def _to_dict(path_: Path) -> Dict[str, Union[str, Path]]:
+        folder, file = _split_path(path_)
         content = reader(str(path_), clean=clean)
         return {
             'project': project,
@@ -100,7 +118,7 @@ def dir_to_files(dir: str, ext: str, recursive: bool = False,
             'content': content
         } if content else None
-    logs = to_list(l_call(sources, to_dict, flat=True), dropna=True)
+    logs = to_list(l_call(sources, _to_dict, flat=True), dropna=True)
     if to_csv:
         filename = filename or f"{project}_sources.csv"
@@ -114,14 +132,18 @@ def chunk_text(input: str, chunk_size: int, overlap: float,
     """
     Splits a string into chunks of a specified size, allowing for optional overlap between chunks.
-    Args:
+    Parameters:
         input (str): The text to be split into chunks.
         chunk_size (int): The size of each chunk in characters.
         overlap (float): A value between [0, 1] specifying the percentage of overlap between adjacent chunks.
         threshold (int): The minimum size for the last chunk. If the last chunk is smaller than this, it will be merged with the previous chunk.
     Raises:
         TypeError: If input text cannot be converted to a string.
         ValueError: If any error occurs during the chunking process.
     Returns:
@@ -173,11 +195,15 @@ def _file_to_chunks(input: Dict[str, Any],
     """
     Splits text from a specified dictionary field into chunks and returns a list of dictionaries.
-    Args:
+    Parameters:
         input (Dict[str, Any]): The input dictionary containing the text field to be chunked.
         field (str, optional): The dictionary key corresponding to the text field. Defaults to 'content'.
         chunk_size (int, optional): Size of each text chunk in characters. Defaults to 1500.
         overlap (float, optional): Percentage of overlap between adjacent chunks, in the range [0, 1]. Defaults to 0.2.
         threshold (int, optional): Minimum size for the last chunk. If smaller, it will be merged with the previous chunk. Defaults to 200.
     Raises:
@@ -229,23 +255,39 @@ def file_to_chunks(input,
     """
         Splits text from a specified dictionary field into chunks and returns a list of dictionaries.
-    Args:
+    Parameters:
         input (List[Dict[str, Any]]): The input dictionaries containing the text field to be chunked.
         field (str, optional): The dictionary key corresponding to the text field. Defaults to 'content'.
         chunk_size (int, optional): Size of each text chunk in characters. Defaults to 1500.
         overlap (float, optional): Percentage of overlap between adjacent chunks, in the range [0, 1]. Defaults to 0.2.
         threshold (int, optional): Minimum size for the last chunk. If smaller, it will be merged with the previous chunk. Defaults to 200.
-        to_csv: If True, export the processed data to a CSV file.
-        project: The name of the project.
-        output_dir: The directory path for exporting the CSV file.
-        filename: The name of the CSV file.
-        verbose: If True, print a verbose message after export.
-        timestamp: If True, include a timestamp in the exported file name.
-        logger: An optional DataLogger instance for logging.
+        to_csv (bool, optional): If True, export the processed data to a CSV file.
+        project (str, optional): The name of the project.
+        output_dir (str, optional): The directory path for exporting the CSV file.
+        chunk_func (function, optional): The function to be used for chunking. Defaults to _file_to_chunks.
+        filename (str, optional): The name of the CSV file.
+        verbose (bool, optional): If True, print a verbose message after export.
+        timestamp (bool, optional): If True, include a timestamp in the exported file name.
+        logger (DataLogger, optional): An optional DataLogger instance for logging.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries representing the processed text chunks.
     """
-    f = lambda x: chunk_func(x, field=field, chunk_size=chunk_size, overlap=overlap, threshold=threshold)
-    logs = to_list(l_call(input, f), flat=True)
+    _f = lambda x: chunk_func(x, field=field, chunk_size=chunk_size, overlap=overlap, threshold=threshold)
+    logs = to_list(l_call(input, _f), flat=True)
     if to_csv:
         filename = filename if filename else f"{project}_sources.csv"
@@ -259,18 +301,19 @@ def get_bins(input: List[str], upper: int = 7500) -> List[List[int]]:
     Get index of elements in a list based on their consecutive cumulative sum of length,
     according to some upper threshold. Return lists of indices as bins.
-    Args:
-    input (List[str]): List of items to be binned.
-    upper (int, optional): Upper threshold for the cumulative sum of the length of items in a bin. Default is 7500.
+    Parameters:
+        input (List[str]): List of items to be binned.
+        upper (int, optional): Upper threshold for the cumulative sum of the length of items in a bin. Default is 7500.
     Returns:
-    List[List[int]]: List of lists, where each inner list contains the indices of the items that form a bin.
+        List[List[int]]: List of lists, where each inner list contains the indices of the items that form a bin.
     Example:
-    >>> items = ['apple', 'a', 'b', 'banana', 'cheery', 'c', 'd', 'e']
-    >>> upper = 10
-    >>> get_bins(items, upper)
-    [[0, 1, 2], [3], [4, 5, 6, 7]]
+        >>> items = ['apple', 'a', 'b', 'banana', 'cheery', 'c', 'd', 'e']
+        >>> upper = 10
+        >>> get_bins(items, upper)
+        [[0, 1, 2], [3], [4, 5, 6, 7]]
     """
     current = 0
     bins = []

lionagi/utils/log_util.py CHANGED Viewed

@@ -11,14 +11,18 @@ class DataLogger:
     and setting the directory where the logs should be saved.
     Attributes:
-        dir (str): The directory where the log files are to be saved.
-        log (deque): A deque that stores log entries.
+        dir (str):
+            The directory where the log files are to be saved.
+        log (deque):
+            A deque that stores log entries.
     Methods:
-        __call__(entry): Appends a new entry to the log.
+        __call__(entry):
+            Appends a new entry to the log.
         to_csv(dir: str, filename: str, verbose: bool, timestamp: bool, dir_exist_ok: bool, file_exist_ok: bool):
             Converts the log to a CSV format and saves it to a file.
-        set_dir(dir: str): Sets the directory for saving log files.
+        set_dir(dir: str):
+            Sets the directory for saving log files.
     """
     def __init__(self, dir= None, log: list = None) -> None:
@@ -27,6 +31,7 @@ class DataLogger:
         Parameters:
             dir (str, optional): The directory where the log files will be saved. Defaults to None.
             log (list, optional): An initial list of log entries. Defaults to an empty deque.
         """
         self.dir = dir
@@ -47,14 +52,20 @@ class DataLogger:
         Parameters:
             dir (str): The directory where the CSV file will be saved.
             filename (str): The name of the CSV file.
             verbose (bool, optional): If True, prints a message after saving the log. Defaults to True.
             timestamp (bool, optional): If True, appends a timestamp to the filename. Defaults to True.
             dir_exist_ok (bool, optional): If True, overrides the existing directory if needed. Defaults to True.
             file_exist_ok (bool, optional): If True, overrides the existing file if needed. Defaults to False.
         Postconditions:
             Saves the log entries to a CSV file and clears the `log` attribute.
             Optionally prints a message with the number of log entries saved and the file path.
         """
         filepath = create_path(dir=dir, filename=filename, timestamp=timestamp, dir_exist_ok=dir_exist_ok)

lionagi 0.0.106__py3-none-any.whl → 0.0.107__py3-none-any.whl

lionagi 0.0.106py3-none-any.whl → 0.0.107py3-none-any.whl