PyPI - lionagi - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

lionagi 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

Files changed (257) hide show

lionagi/__init__.py +60 -5
lionagi/core/__init__.py +0 -25
lionagi/core/_setting/_setting.py +59 -0
lionagi/core/action/__init__.py +14 -0
lionagi/core/action/function_calling.py +136 -0
lionagi/core/action/manual.py +1 -0
lionagi/core/action/node.py +109 -0
lionagi/core/action/tool.py +114 -0
lionagi/core/action/tool_manager.py +356 -0
lionagi/core/agent/base_agent.py +27 -13
lionagi/core/agent/eval/evaluator.py +1 -0
lionagi/core/agent/eval/vote.py +40 -0
lionagi/core/agent/learn/learner.py +59 -0
lionagi/core/agent/plan/unit_template.py +1 -0
lionagi/core/collections/__init__.py +17 -0
lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
lionagi/core/collections/abc/__init__.py +53 -0
lionagi/core/collections/abc/component.py +615 -0
lionagi/core/collections/abc/concepts.py +297 -0
lionagi/core/collections/abc/exceptions.py +150 -0
lionagi/core/collections/abc/util.py +45 -0
lionagi/core/collections/exchange.py +161 -0
lionagi/core/collections/flow.py +426 -0
lionagi/core/collections/model.py +419 -0
lionagi/core/collections/pile.py +913 -0
lionagi/core/collections/progression.py +236 -0
lionagi/core/collections/util.py +64 -0
lionagi/core/director/direct.py +314 -0
lionagi/core/director/director.py +2 -0
lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
lionagi/core/executor/base_executor.py +90 -0
lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +83 -67
lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
lionagi/core/generic/__init__.py +3 -33
lionagi/core/generic/edge.py +42 -92
lionagi/core/generic/edge_condition.py +16 -0
lionagi/core/generic/graph.py +236 -0
lionagi/core/generic/hyperedge.py +1 -0
lionagi/core/generic/node.py +156 -221
lionagi/core/generic/tree.py +48 -0
lionagi/core/generic/tree_node.py +79 -0
lionagi/core/mail/__init__.py +12 -0
lionagi/core/mail/mail.py +25 -0
lionagi/core/mail/mail_manager.py +139 -58
lionagi/core/mail/package.py +45 -0
lionagi/core/mail/start_mail.py +36 -0
lionagi/core/message/__init__.py +19 -0
lionagi/core/message/action_request.py +133 -0
lionagi/core/message/action_response.py +135 -0
lionagi/core/message/assistant_response.py +95 -0
lionagi/core/message/instruction.py +234 -0
lionagi/core/message/message.py +101 -0
lionagi/core/message/system.py +86 -0
lionagi/core/message/util.py +283 -0
lionagi/core/report/__init__.py +4 -0
lionagi/core/report/base.py +217 -0
lionagi/core/report/form.py +231 -0
lionagi/core/report/report.py +166 -0
lionagi/core/report/util.py +28 -0
lionagi/core/rule/_default.py +16 -0
lionagi/core/rule/action.py +99 -0
lionagi/core/rule/base.py +238 -0
lionagi/core/rule/boolean.py +56 -0
lionagi/core/rule/choice.py +47 -0
lionagi/core/rule/mapping.py +96 -0
lionagi/core/rule/number.py +71 -0
lionagi/core/rule/rulebook.py +109 -0
lionagi/core/rule/string.py +52 -0
lionagi/core/rule/util.py +35 -0
lionagi/core/session/branch.py +431 -0
lionagi/core/session/directive_mixin.py +287 -0
lionagi/core/session/session.py +229 -903
lionagi/core/structure/__init__.py +1 -0
lionagi/core/structure/chain.py +1 -0
lionagi/core/structure/forest.py +1 -0
lionagi/core/structure/graph.py +1 -0
lionagi/core/structure/tree.py +1 -0
lionagi/core/unit/__init__.py +5 -0
lionagi/core/unit/parallel_unit.py +245 -0
lionagi/core/unit/template/action.py +81 -0
lionagi/core/unit/template/base.py +51 -0
lionagi/core/unit/template/plan.py +84 -0
lionagi/core/unit/template/predict.py +109 -0
lionagi/core/unit/template/score.py +124 -0
lionagi/core/unit/template/select.py +104 -0
lionagi/core/unit/unit.py +362 -0
lionagi/core/unit/unit_form.py +305 -0
lionagi/core/unit/unit_mixin.py +1168 -0
lionagi/core/unit/util.py +71 -0
lionagi/core/validator/validator.py +364 -0
lionagi/core/work/work.py +74 -0
lionagi/core/work/work_function.py +92 -0
lionagi/core/work/work_queue.py +81 -0
lionagi/core/work/worker.py +195 -0
lionagi/core/work/worklog.py +124 -0
lionagi/experimental/compressor/base.py +46 -0
lionagi/experimental/compressor/llm_compressor.py +247 -0
lionagi/experimental/compressor/llm_summarizer.py +61 -0
lionagi/experimental/compressor/util.py +70 -0
lionagi/experimental/directive/__init__.py +19 -0
lionagi/experimental/directive/parser/base_parser.py +69 -2
lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
lionagi/experimental/knowledge/__init__.py +0 -0
lionagi/experimental/knowledge/base.py +10 -0
lionagi/experimental/knowledge/graph.py +0 -0
lionagi/experimental/memory/__init__.py +0 -0
lionagi/experimental/strategies/__init__.py +0 -0
lionagi/experimental/strategies/base.py +1 -0
lionagi/integrations/bridge/langchain_/documents.py +4 -0
lionagi/integrations/bridge/llamaindex_/index.py +30 -0
lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
lionagi/integrations/chunker/chunk.py +161 -24
lionagi/integrations/config/oai_configs.py +34 -3
lionagi/integrations/config/openrouter_configs.py +14 -2
lionagi/integrations/loader/load.py +122 -21
lionagi/integrations/loader/load_util.py +6 -77
lionagi/integrations/provider/_mapping.py +46 -0
lionagi/integrations/provider/litellm.py +2 -1
lionagi/integrations/provider/mlx_service.py +16 -9
lionagi/integrations/provider/oai.py +91 -4
lionagi/integrations/provider/ollama.py +6 -5
lionagi/integrations/provider/openrouter.py +115 -8
lionagi/integrations/provider/services.py +2 -2
lionagi/integrations/provider/transformers.py +18 -22
lionagi/integrations/storage/__init__.py +3 -3
lionagi/integrations/storage/neo4j.py +52 -60
lionagi/integrations/storage/storage_util.py +45 -47
lionagi/integrations/storage/structure_excel.py +285 -0
lionagi/integrations/storage/to_excel.py +23 -7
lionagi/libs/__init__.py +26 -1
lionagi/libs/ln_api.py +75 -20
lionagi/libs/ln_context.py +37 -0
lionagi/libs/ln_convert.py +21 -9
lionagi/libs/ln_func_call.py +69 -28
lionagi/libs/ln_image.py +107 -0
lionagi/libs/ln_nested.py +26 -11
lionagi/libs/ln_parse.py +82 -23
lionagi/libs/ln_queue.py +16 -0
lionagi/libs/ln_tokenize.py +164 -0
lionagi/libs/ln_validate.py +16 -0
lionagi/libs/special_tokens.py +172 -0
lionagi/libs/sys_util.py +95 -24
lionagi/lions/coder/code_form.py +13 -0
lionagi/lions/coder/coder.py +50 -3
lionagi/lions/coder/util.py +30 -25
lionagi/tests/libs/test_func_call.py +23 -21
lionagi/tests/libs/test_nested.py +36 -21
lionagi/tests/libs/test_parse.py +1 -1
lionagi/tests/test_core/collections/__init__.py +0 -0
lionagi/tests/test_core/collections/test_component.py +206 -0
lionagi/tests/test_core/collections/test_exchange.py +138 -0
lionagi/tests/test_core/collections/test_flow.py +145 -0
lionagi/tests/test_core/collections/test_pile.py +171 -0
lionagi/tests/test_core/collections/test_progression.py +129 -0
lionagi/tests/test_core/generic/__init__.py +0 -0
lionagi/tests/test_core/generic/test_edge.py +67 -0
lionagi/tests/test_core/generic/test_graph.py +96 -0
lionagi/tests/test_core/generic/test_node.py +106 -0
lionagi/tests/test_core/generic/test_tree_node.py +73 -0
lionagi/tests/test_core/test_branch.py +115 -294
lionagi/tests/test_core/test_form.py +46 -0
lionagi/tests/test_core/test_report.py +105 -0
lionagi/tests/test_core/test_validator.py +111 -0
lionagi/version.py +1 -1
lionagi-0.2.0.dist-info/LICENSE +202 -0
lionagi-0.2.0.dist-info/METADATA +272 -0
lionagi-0.2.0.dist-info/RECORD +240 -0
lionagi/core/branch/base.py +0 -653
lionagi/core/branch/branch.py +0 -474
lionagi/core/branch/flow_mixin.py +0 -96
lionagi/core/branch/util.py +0 -323
lionagi/core/direct/__init__.py +0 -19
lionagi/core/direct/cot.py +0 -123
lionagi/core/direct/plan.py +0 -164
lionagi/core/direct/predict.py +0 -166
lionagi/core/direct/react.py +0 -171
lionagi/core/direct/score.py +0 -279
lionagi/core/direct/select.py +0 -170
lionagi/core/direct/sentiment.py +0 -1
lionagi/core/direct/utils.py +0 -110
lionagi/core/direct/vote.py +0 -64
lionagi/core/execute/base_executor.py +0 -47
lionagi/core/flow/baseflow.py +0 -23
lionagi/core/flow/monoflow/ReAct.py +0 -238
lionagi/core/flow/monoflow/__init__.py +0 -9
lionagi/core/flow/monoflow/chat.py +0 -95
lionagi/core/flow/monoflow/chat_mixin.py +0 -253
lionagi/core/flow/monoflow/followup.py +0 -213
lionagi/core/flow/polyflow/__init__.py +0 -1
lionagi/core/flow/polyflow/chat.py +0 -251
lionagi/core/form/action_form.py +0 -26
lionagi/core/form/field_validator.py +0 -287
lionagi/core/form/form.py +0 -302
lionagi/core/form/mixin.py +0 -214
lionagi/core/form/scored_form.py +0 -13
lionagi/core/generic/action.py +0 -26
lionagi/core/generic/component.py +0 -455
lionagi/core/generic/condition.py +0 -44
lionagi/core/generic/mail.py +0 -90
lionagi/core/generic/mailbox.py +0 -36
lionagi/core/generic/relation.py +0 -70
lionagi/core/generic/signal.py +0 -22
lionagi/core/generic/structure.py +0 -362
lionagi/core/generic/transfer.py +0 -20
lionagi/core/generic/work.py +0 -40
lionagi/core/graph/graph.py +0 -126
lionagi/core/graph/tree.py +0 -190
lionagi/core/mail/schema.py +0 -63
lionagi/core/messages/schema.py +0 -325
lionagi/core/tool/__init__.py +0 -5
lionagi/core/tool/tool.py +0 -28
lionagi/core/tool/tool_manager.py +0 -282
lionagi/experimental/tool/function_calling.py +0 -43
lionagi/experimental/tool/manual.py +0 -66
lionagi/experimental/tool/schema.py +0 -59
lionagi/experimental/tool/tool_manager.py +0 -138
lionagi/experimental/tool/util.py +0 -16
lionagi/experimental/work/_logger.py +0 -25
lionagi/experimental/work/schema.py +0 -30
lionagi/experimental/work/tests.py +0 -72
lionagi/experimental/work/work_function.py +0 -89
lionagi/experimental/work/worker.py +0 -12
lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
lionagi/tests/test_core/test_base_branch.py +0 -426
lionagi/tests/test_core/test_chat_flow.py +0 -63
lionagi/tests/test_core/test_mail_manager.py +0 -75
lionagi/tests/test_core/test_prompts.py +0 -51
lionagi/tests/test_core/test_session.py +0 -254
lionagi/tests/test_core/test_session_base_util.py +0 -313
lionagi/tests/test_core/test_tool_manager.py +0 -95
lionagi-0.1.1.dist-info/LICENSE +0 -9
lionagi-0.1.1.dist-info/METADATA +0 -174
lionagi-0.1.1.dist-info/RECORD +0 -190
/lionagi/core/{branch → _setting}/__init__.py +0 -0
/lionagi/core/{execute → agent/eval}/__init__.py +0 -0
/lionagi/core/{flow → agent/learn}/__init__.py +0 -0
/lionagi/core/{form → agent/plan}/__init__.py +0 -0
/lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
/lionagi/core/{graph → director}/__init__.py +0 -0
/lionagi/core/{messages → engine}/__init__.py +0 -0
/lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
/lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
/lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
/lionagi/{experimental/tool → core/unit/template}/__init__.py +0 -0
/lionagi/{experimental/work → core/validator}/__init__.py +0 -0
/lionagi/core/{flow/mono_chat_mixin.py → work/__init__.py} +0 -0
/lionagi/experimental/{work/exchange.py → compressor/__init__.py} +0 -0
/lionagi/experimental/{work/util.py → directive/template/__init__.py} +0 -0
/lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
/lionagi/{tests/libs/test_async.py → experimental/evaluator/__init__.py} +0 -0
{lionagi-0.1.1.dist-info → lionagi-0.2.0.dist-info}/WHEEL +0 -0
{lionagi-0.1.1.dist-info → lionagi-0.2.0.dist-info}/top_level.txt +0 -0

lionagi/core/work/worker.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""
+Copyright 2024 HaiyangLi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from abc import ABC
+from functools import wraps
+import asyncio
+from lionagi import logging as _logging
+from lionagi.libs.ln_func_call import pcall
+from lionagi.core.work.work_function import WorkFunction
+from lionagi.core.work.work import Work
+class Worker(ABC):
+    """
+    This class represents a worker that handles multiple work functions.
+    Attributes:
+        name (str): The name of the worker.
+        work_functions (dict[str, WorkFunction]): Dictionary mapping assignments to WorkFunction objects.
+    """
+    name: str = "Worker"
+    work_functions: dict[str, WorkFunction] = {}
+    def __init__(self) -> None:
+        self.stopped = False
+    async def stop(self):
+        """
+        Stops the worker and all associated work functions.
+        """
+        self.stopped = True
+        _logging.info(f"Stopping worker {self.name}")
+        non_stopped_ = []
+        for func in self.work_functions.values():
+            worklog = func.worklog
+            await worklog.stop()
+            if not worklog.stopped:
+                non_stopped_.append(func.name)
+        if len(non_stopped_) > 0:
+            _logging.error(f"Could not stop worklogs: {non_stopped_}")
+        _logging.info(f"Stopped worker {self.name}")
+    async def is_progressable(self):
+        """
+        Checks if any work function is progressable and the worker is not stopped.
+        Returns:
+            bool: True if any work function is progressable and the worker is not stopped, else False.
+        """
+        return (
+            any([await i.is_progressable() for i in self.work_functions.values()])
+            and not self.stopped
+        )
+    async def process(self, refresh_time=1):
+        """
+        Processes all work functions periodically.
+        Args:
+            refresh_time (int): Time interval between each process cycle.
+        """
+        while await self.is_progressable():
+            await pcall([i.process(refresh_time) for i in self.work_functions.values()])
+            await asyncio.sleep(refresh_time)
+    # TODO: Implement process method
+    # async def process(self, refresh_time=1):
+    #     while not self.stopped:
+    #         tasks = [
+    #             asyncio.create_task(func.process(refresh_time=refresh_time))
+    #             for func in self.work_functions.values()
+    #         ]
+    #         await asyncio.wait(tasks)
+    #         await asyncio.sleep(refresh_time)
+    async def _wrapper(
+        self,
+        *args,
+        func=None,
+        assignment=None,
+        capacity=None,
+        retry_kwargs=None,
+        guidance=None,
+        **kwargs,
+    ):
+        """
+        Internal wrapper to handle work function execution.
+        Args:
+            func (Callable): The function to be executed.
+            assignment (str): The assignment description.
+            capacity (int): Capacity for the work log.
+            retry_kwargs (dict): Retry arguments for the function.
+            guidance (str): Guidance or documentation for the function.
+        """
+        if getattr(self, "work_functions", None) is None:
+            self.work_functions = {}
+        if func.__name__ not in self.work_functions:
+            self.work_functions[func.__name__] = WorkFunction(
+                assignment=assignment,
+                function=func,
+                retry_kwargs=retry_kwargs or {},
+                guidance=guidance or func.__doc__,
+                capacity=capacity,
+            )
+        work_func: WorkFunction = self.work_functions[func.__name__]
+        task = asyncio.create_task(work_func.perform(self, *args, **kwargs))
+        work = Work(async_task=task)
+        await work_func.worklog.append(work)
+        return True
+def work(
+    assignment=None,
+    capacity=10,
+    guidance=None,
+    retry_kwargs=None,
+    refresh_time=1,
+    timeout=10,
+):
+    """
+    Decorator to mark a method as a work function.
+    Args:
+        assignment (str): The assignment description of the work function.
+        capacity (int): Capacity for the work log.
+        guidance (str): Guidance or documentation for the work function.
+        retry_kwargs (dict): Retry arguments for the work function.
+        refresh_time (int): Time interval between each process cycle.
+        timeout (int): Timeout for the work function.
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(
+            self: Worker,
+            *args,
+            func=func,
+            assignment=assignment,
+            capacity=capacity,
+            retry_kwargs=retry_kwargs,
+            guidance=guidance,
+            **kwargs,
+        ):
+            retry_kwargs = retry_kwargs or {}
+            retry_kwargs["timeout"] = retry_kwargs.get("timeout", timeout)
+            return await self._wrapper(
+                *args,
+                func=func,
+                assignment=assignment,
+                capacity=capacity,
+                retry_kwargs=retry_kwargs,
+                guidance=guidance,
+                **kwargs,
+            )
+        return wrapper
+    return decorator
+# # Example
+# from lionagi import Session
+# from lionagi.experimental.work.work_function import work
+# class MyWorker(Worker):
+#     @work(assignment="instruction, context -> response")
+#     async def chat(instruction=None, context=None):
+#         session = Session()
+#         return await session.chat(instruction=instruction, context=context)
+# await a.chat(instruction="Hello", context={})

lionagi/core/work/worklog.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Copyright 2024 HaiyangLi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from lionagi.core.collections.abc import Progressable
+from lionagi.core.collections import pile, progression, Pile
+from lionagi.core.work.work import Work, WorkStatus
+from lionagi.core.work.work_queue import WorkQueue
+class WorkLog(Progressable):
+    """
+    A class representing a log of work items.
+    Attributes:
+        pile (Pile): A pile containing work items.
+        pending (Progression): A progression of pending work items.
+        queue (WorkQueue): A queue to manage the execution of work items.
+    """
+    def __init__(self, capacity=10, workpile=None):
+        """
+        Initializes a new instance of WorkLog.
+        Args:
+            capacity (int): The capacity of the work queue.
+            workpile (Pile, optional): An optional pile of initial work items.
+        """
+        self.pile = (
+            workpile if workpile and isinstance(workpile, Pile) else pile({}, Work)
+        )
+        self.pending = progression(workpile) if workpile else progression()
+        self.queue = WorkQueue(capacity=capacity)
+    async def append(self, work: Work):
+        """
+        Appends a new work item to the log.
+        Args:
+            work (Work): The work item to append.
+        """
+        self.pile.append(work)
+        self.pending.append(work)
+    async def forward(self):
+        """
+        Forwards pending work items to the queue if capacity allows.
+        """
+        if not self.queue.available_capacity:
+            return
+        else:
+            while len(self.pending) > 0 and self.queue.available_capacity:
+                work: Work = self.pile[self.pending.popleft()]
+                work.status = WorkStatus.IN_PROGRESS
+                await self.queue.enqueue(work)
+    async def stop(self):
+        """
+        Stops the work queue.
+        """
+        await self.queue.stop()
+    @property
+    def pending_work(self):
+        """
+        Retrieves the pile of pending work items.
+        Returns:
+            Pile: A pile of pending work items.
+        """
+        return pile([i for i in self.pile if i.status == WorkStatus.PENDING])
+    @property
+    def stopped(self):
+        """
+        Checks if the work queue is stopped.
+        Returns:
+            bool: True if the work queue is stopped, else False.
+        """
+        return self.queue.stopped
+    @property
+    def completed_work(self):
+        """
+        Retrieves the pile of completed work items.
+        Returns:
+            Pile: A pile of completed work items.
+        """
+        return pile([i for i in self.pile if i.status == WorkStatus.COMPLETED])
+    def __contains__(self, work):
+        """
+        Checks if a work item is in the pile.
+        Args:
+            work (Work): The work item to check.
+        Returns:
+            bool: True if the work item is in the pile, else False.
+        """
+        return work in self.pile
+    def __iter__(self):
+        """
+        Returns an iterator over the work pile.
+        Returns:
+            Iterator: An iterator over the work pile.
+        """
+        return iter(self.pile)

lionagi/experimental/compressor/base.py ADDED Viewed

@@ -0,0 +1,46 @@
+from abc import ABC
+from lionagi.core.collections import iModel
+class TokenCompressor(ABC):
+    """
+    NOTICE:
+        The token compressor system is inspired by LLMLingua.
+        https://github.com/microsoft/LLMLingua
+        MIT License
+        Copyright (c) Microsoft Corporation.
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+    Authors:
+        Huiqiang Jiang, Qianhui Wu, Chin-Yew Lin, Yuqing Yang, Lili Qiu
+        @inproceedings{jiang-etal-2023-llmlingua,
+            title = "{LLML}ingua: Compressing Prompts for Accelerated Inference of Large Language Models",
+            author = "Huiqiang Jiang and Qianhui Wu and Chin-Yew Lin and Yuqing Yang and Lili Qiu",
+            booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
+            month = dec,
+            year = "2023",
+            publisher = "Association for Computational Linguistics",
+            url = "https://aclanthology.org/2023.emnlp-main.825",
+            doi = "10.18653/v1/2023.emnlp-main.825",
+            pages = "13358--13376",
+        }
+    LionAGI Modifications:
+        - Only borrowed the concept of token compression via perplexity
+        - Removed the dependency on the LLMLingua library
+        - use logprobs from GPT model to calculate perplexity
+        - added async ability to the functions
+        - used lionagi existing iModel class for API calls
+    """
+    def __init__(self, imodel: iModel, tokenizer=None, splitter=None):
+        self.imodel = imodel
+        self.tokenizer = tokenizer
+        self.splitter = splitter

lionagi/experimental/compressor/llm_compressor.py ADDED Viewed

@@ -0,0 +1,247 @@
+import asyncio
+from lionagi import alcall
+from lionagi.libs.ln_convert import to_list
+import numpy as np
+from lionagi.core.collections import iModel
+from .base import TokenCompressor
+from lionagi.libs.ln_tokenize import TokenizeUtil
+from time import time
+# inspired by LLMLingua, MIT License, Copyright (c) Microsoft Corporation.
+# https://github.com/microsoft/LLMLingua
+class LLMCompressor(TokenCompressor):
+    def __init__(
+        self,
+        imodel: iModel = None,
+        system_msg=None,
+        tokenizer=None,  # must be a callable or object with a tokenize method
+        splitter=None,  # must be a callable or object with a split/chunk/segment method
+        target_ratio=0.2,
+        n_samples=5,  # the cumulative samples to take in each perplexity calculation
+        chunk_size=64,
+        max_tokens_per_sample=80,
+        min_compression_score=0,  # (0-1) the minimum score to consider for compression, 0 means all
+        split_overlap=0,
+        split_threshold=0,
+        verbose=True,
+    ):
+        imodel = imodel or iModel(model="gpt-3.5-turbo", temperature=0.3)
+        super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
+        self.system_msg = (
+            system_msg
+            or "Concisely summarize and compress the information for storage:"
+        )
+        self.target_ratio = target_ratio
+        self.n_samples = n_samples
+        self.chunk_size = chunk_size
+        self.max_tokens_per_sample = max_tokens_per_sample
+        self.min_compression_score = min_compression_score
+        self.verbose = verbose
+        self.split_overlap = split_overlap
+        self.split_threshold = split_threshold
+    def tokenize(self, text, encoding_name=None, return_byte=False, **kwargs):
+        """
+        by default you can use `encoding_name` to be one of,
+        ['gpt2', 'r50k_base', 'p50k_base', 'p50k_edit', 'cl100k_base', 'o200k_base']
+        or you can use `encoding_model` that tiktoken supports in their mapping such as "gpt-4o"
+        """
+        if not self.tokenizer:
+            return TokenizeUtil.tokenize(
+                text,
+                encoding_model=self.imodel.iModel_name,
+                encoding_name=encoding_name,
+                return_byte=return_byte,
+            )
+        if hasattr(self.tokenizer, "tokenize"):
+            return self.tokenizer.tokenize(text, **kwargs)
+        return self.tokenizer(text, **kwargs)
+    def split(
+        self,
+        text,
+        chunk_size=None,
+        overlap=None,
+        threshold=None,
+        by_chars=False,
+        return_tokens=False,
+        return_byte=False,
+        **kwargs,
+    ):
+        if not self.splitter:
+            splitter = (
+                TokenizeUtil.chunk_by_chars
+                if by_chars
+                else TokenizeUtil.chunk_by_tokens
+            )
+            return splitter(
+                text,
+                chunk_size or self.chunk_size,
+                overlap or self.split_overlap,
+                threshold or self.split_threshold,
+                return_tokens=return_tokens,
+                return_byte=return_byte,
+            )
+        a = [
+            getattr(self.splitter, i, None)
+            for i in ["split", "chunk", "segment"]
+            if i is not None
+        ][0]
+        a = getattr(self.splitter, a)
+        return a(text, **kwargs)
+    async def rank_by_pplex(
+        self, items: list, initial_text=None, cumulative=False, n_samples=None, **kwargs
+    ):
+        """
+        rank a list of items according to their perplexity
+        an item can be a single token or a list of tokens
+        kwargs: additional arguments to pass to the model
+        """
+        async def _get_item_perplexity(item):
+            item = item if isinstance(item, list) else [item]
+            item = (
+                item[: self.max_tokens_per_sample]
+                if len(item) > self.max_tokens_per_sample
+                else item
+            )
+            return await self.imodel.compute_perplexity(
+                initial_context=initial_text,
+                tokens=item,
+                n_samples=n_samples or self.n_samples,
+                system_msg=self.system_msg,
+                **kwargs,
+            )
+        if not isinstance(items, list):
+            items = self.tokenize(items)
+        if len(items) == 1:
+            return [items]  # no need to rank a single item
+        _segments = []
+        _context = initial_text or ""
+        _task = []
+        if cumulative:
+            for i in items:
+                if isinstance(i, list):
+                    _context += " " + " ".join(i).strip()
+                else:
+                    _context += " " + i.strip()
+                _segments.append(_context)
+        else:
+            _segments = items
+        for i in _segments:
+            _task.append(asyncio.create_task(_get_item_perplexity(i)))
+        results = await asyncio.gather(*_task)
+        results = [(item, pplex) for item, pplex in zip(items, results)]
+        return sorted(results, key=lambda x: x[1]["logprobs"], reverse=True)
+    async def compress(
+        self,
+        text,
+        target_ratio=None,
+        initial_text=None,
+        cumulative=False,
+        split_kwargs=None,
+        split_overlap=None,
+        split_threshold=None,
+        rank_by="perplexity",
+        min_compression_score=None,
+        verbose=True,
+        **kwargs,
+    ):
+        start = time()
+        if split_kwargs is None:
+            split_kwargs = {}
+            split_kwargs["chunk_size"] = self.max_tokens_per_sample
+            split_kwargs["overlap"] = split_overlap or 0
+            split_kwargs["threshold"] = split_threshold or 0
+        len_tokens = len(self.tokenize(text))
+        items = self.split(text, return_tokens=True, **split_kwargs)
+        if rank_by == "perplexity":
+            ranked_items = await self.rank_by_pplex(
+                items=items, initial_text=initial_text, cumulative=cumulative, **kwargs
+            )
+            prompt_tokens = sum([i[1]["num_prompt_tokens"] for i in ranked_items])
+            num_completion_tokens = sum(
+                [i[1]["num_completion_tokens"] for i in ranked_items]
+            )
+            price = (
+                prompt_tokens * 0.5 / 1000000 + num_completion_tokens * 1.5 / 1000000
+            )
+            selected_items = self.select_by_pplex(
+                ranked_items=ranked_items,
+                target_compression_ratio=target_ratio or self.target_ratio,
+                original_length=len_tokens,
+                min_pplex=min_compression_score or self.min_compression_score,
+            )
+            if verbose:
+                msg = ""
+                msg += f"Original Token number: {len_tokens}\n"
+                def _f(i):
+                    if isinstance(i, str):
+                        i = self.tokenize(i)
+                    if isinstance(i, list):
+                        return len(to_list(i, dropna=True, flatten=True))
+                len_ = sum([_f(i) for i in selected_items])
+                msg += f"Selected Token number: {len_}\n"
+                msg += f"Token Compression Ratio: {len_ / len_tokens:.03f}\n"
+                msg += f"Compression Time: {time() - start:.04f} seconds\n"
+                msg += f"Compression Model: {self.imodel.iModel_name}\n"
+                msg += f"Compression Method: {rank_by}\n"
+                msg += f"Compression Usage: ${price:.05f}\n"
+                print(msg)
+            a = "".join([i.strip() for i in selected_items]).strip()
+            a = a.replace("\n\n", "")
+            return a
+        raise ValueError(f"Ranking method {rank_by} is not supported")
+    def select_by_pplex(
+        self, ranked_items, target_compression_ratio, original_length, min_pplex=None
+    ):
+        min_pplex = min_pplex or 0
+        desired_length = int(original_length * target_compression_ratio)
+        items = []
+        current_length = 0
+        for item, info in ranked_items:
+            if info["perplexity"] > min_pplex:
+                item = self.tokenize(item) if isinstance(item, str) else item
+                item = item if isinstance(item, list) else [item]
+                item = to_list(item, dropna=True, flatten=True)
+                if current_length + len(item) > desired_length:
+                    break
+                else:
+                    current_length += len(item)
+                    items.append("".join(item))
+        return items

lionagi/experimental/compressor/llm_summarizer.py ADDED Viewed

@@ -0,0 +1,61 @@
+# from lionagi.core.collections import iModel
+# from .base import TokenCompressor
+# class LLMSummarizer(TokenCompressor):
+#     def __init__(
+#         self, imodel: iModel = None, system_msg=None, tokenizer=None, splitter=None,
+#         max_tokens=25, target_ratio=0.3
+#     ):
+#         imodel = imodel or iModel(model="gpt-3.5-turbo", max_tokens=max_tokens)
+#         super().__init__(imodel=imodel, tokenizer=tokenizer, splitter=splitter)
+#         self.system_msg = (
+#             system_msg
+#             or "Summarize the following sentence to be concise and informative:"
+#         )
+#         self.target_ratio = target_ratio
+#     async def summarize_sentence(self, sentence, **kwargs):
+#         messages = [
+#             {"role": "system", "content": self.system_msg},
+#             {"role": "user", "content": sentence},
+#         ]
+#         response = await self.imodel.call_chat_completion(messages, **kwargs)
+#         return response["choices"][0]["message"]["content"]
+#     def tokenize(self, text):
+#         tokenize_func = self.tokenizer or tokenize
+#         return tokenize_func(text)
+#     def split(self, text):
+#         split_func = self.splitter or split_into_segments
+#         return split_func(text)
+#     # Function to enforce maximum sentence length
+#     def enforce_max_sentence_length(self, sentence, max_words=25):
+#         words = self.tokenize(sentence)
+#         if len(words) > max_words:
+#             sentence = ' '.join(words[:max_words])
+#         return sentence
+#     async def summarize_text(self, text, max_length_per_sentence=25, target_ratio=None, **kwargs):
+#         sentences = self.split(text)
+#         summarized = await alcall(
+#             sentences, self.summarize_sentence, **kwargs
+#         )
+#         summarized = [
+#             self.enforce_max_sentence_length(sentence, max_length_per_sentence)
+#             for sentence in summarized
+#         ]
+#         original_length = len(self.tokenize(text))
+#         summarized_length = len(self.tokenize(' '.join(summarized)))
+#         current_ratio = summarized_length / original_length
+#         target_ratio = target_ratio or self.target_ratio
+#         if current_ratio > target_ratio:
+#             words_to_remove = int((current_ratio - target_ratio) * original_length)
+#             return ' '.join(summarized[:-words_to_remove])
+#         return ' '.join(summarized)

lionagi 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

lionagi 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl