PyPI - lionagi - Versions diffs - 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

lionagi 0.0.312py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

lionagi/__init__.py +61 -3
lionagi/core/__init__.py +0 -14
lionagi/core/_setting/_setting.py +59 -0
lionagi/core/action/__init__.py +14 -0
lionagi/core/action/function_calling.py +136 -0
lionagi/core/action/manual.py +1 -0
lionagi/core/action/node.py +109 -0
lionagi/core/action/tool.py +114 -0
lionagi/core/action/tool_manager.py +356 -0
lionagi/core/agent/__init__.py +0 -3
lionagi/core/agent/base_agent.py +45 -36
lionagi/core/agent/eval/evaluator.py +1 -0
lionagi/core/agent/eval/vote.py +40 -0
lionagi/core/agent/learn/learner.py +59 -0
lionagi/core/agent/plan/unit_template.py +1 -0
lionagi/core/collections/__init__.py +17 -0
lionagi/core/collections/_logger.py +319 -0
lionagi/core/collections/abc/__init__.py +53 -0
lionagi/core/collections/abc/component.py +615 -0
lionagi/core/collections/abc/concepts.py +297 -0
lionagi/core/collections/abc/exceptions.py +150 -0
lionagi/core/collections/abc/util.py +45 -0
lionagi/core/collections/exchange.py +161 -0
lionagi/core/collections/flow.py +426 -0
lionagi/core/collections/model.py +419 -0
lionagi/core/collections/pile.py +913 -0
lionagi/core/collections/progression.py +236 -0
lionagi/core/collections/util.py +64 -0
lionagi/core/director/direct.py +314 -0
lionagi/core/director/director.py +2 -0
lionagi/core/engine/branch_engine.py +333 -0
lionagi/core/engine/instruction_map_engine.py +204 -0
lionagi/core/engine/sandbox_.py +14 -0
lionagi/core/engine/script_engine.py +99 -0
lionagi/core/executor/base_executor.py +90 -0
lionagi/core/executor/graph_executor.py +330 -0
lionagi/core/executor/neo4j_executor.py +384 -0
lionagi/core/generic/__init__.py +7 -0
lionagi/core/generic/edge.py +112 -0
lionagi/core/generic/edge_condition.py +16 -0
lionagi/core/generic/graph.py +236 -0
lionagi/core/generic/hyperedge.py +1 -0
lionagi/core/generic/node.py +220 -0
lionagi/core/generic/tree.py +48 -0
lionagi/core/generic/tree_node.py +79 -0
lionagi/core/mail/__init__.py +7 -3
lionagi/core/mail/mail.py +25 -0
lionagi/core/mail/mail_manager.py +142 -58
lionagi/core/mail/package.py +45 -0
lionagi/core/mail/start_mail.py +36 -0
lionagi/core/message/__init__.py +19 -0
lionagi/core/message/action_request.py +133 -0
lionagi/core/message/action_response.py +135 -0
lionagi/core/message/assistant_response.py +95 -0
lionagi/core/message/instruction.py +234 -0
lionagi/core/message/message.py +101 -0
lionagi/core/message/system.py +86 -0
lionagi/core/message/util.py +283 -0
lionagi/core/report/__init__.py +4 -0
lionagi/core/report/base.py +217 -0
lionagi/core/report/form.py +231 -0
lionagi/core/report/report.py +166 -0
lionagi/core/report/util.py +28 -0
lionagi/core/rule/__init__.py +0 -0
lionagi/core/rule/_default.py +16 -0
lionagi/core/rule/action.py +99 -0
lionagi/core/rule/base.py +238 -0
lionagi/core/rule/boolean.py +56 -0
lionagi/core/rule/choice.py +47 -0
lionagi/core/rule/mapping.py +96 -0
lionagi/core/rule/number.py +71 -0
lionagi/core/rule/rulebook.py +109 -0
lionagi/core/rule/string.py +52 -0
lionagi/core/rule/util.py +35 -0
lionagi/core/session/__init__.py +0 -3
lionagi/core/session/branch.py +431 -0
lionagi/core/session/directive_mixin.py +287 -0
lionagi/core/session/session.py +230 -902
lionagi/core/structure/__init__.py +1 -0
lionagi/core/structure/chain.py +1 -0
lionagi/core/structure/forest.py +1 -0
lionagi/core/structure/graph.py +1 -0
lionagi/core/structure/tree.py +1 -0
lionagi/core/unit/__init__.py +5 -0
lionagi/core/unit/parallel_unit.py +245 -0
lionagi/core/unit/template/__init__.py +0 -0
lionagi/core/unit/template/action.py +81 -0
lionagi/core/unit/template/base.py +51 -0
lionagi/core/unit/template/plan.py +84 -0
lionagi/core/unit/template/predict.py +109 -0
lionagi/core/unit/template/score.py +124 -0
lionagi/core/unit/template/select.py +104 -0
lionagi/core/unit/unit.py +362 -0
lionagi/core/unit/unit_form.py +305 -0
lionagi/core/unit/unit_mixin.py +1168 -0
lionagi/core/unit/util.py +71 -0
lionagi/core/validator/__init__.py +0 -0
lionagi/core/validator/validator.py +364 -0
lionagi/core/work/__init__.py +0 -0
lionagi/core/work/work.py +76 -0
lionagi/core/work/work_function.py +101 -0
lionagi/core/work/work_queue.py +103 -0
lionagi/core/work/worker.py +258 -0
lionagi/core/work/worklog.py +120 -0
lionagi/experimental/__init__.py +0 -0
lionagi/experimental/compressor/__init__.py +0 -0
lionagi/experimental/compressor/base.py +46 -0
lionagi/experimental/compressor/llm_compressor.py +247 -0
lionagi/experimental/compressor/llm_summarizer.py +61 -0
lionagi/experimental/compressor/util.py +70 -0
lionagi/experimental/directive/__init__.py +19 -0
lionagi/experimental/directive/parser/__init__.py +0 -0
lionagi/experimental/directive/parser/base_parser.py +282 -0
lionagi/experimental/directive/template/__init__.py +0 -0
lionagi/experimental/directive/template/base_template.py +79 -0
lionagi/experimental/directive/template/schema.py +36 -0
lionagi/experimental/directive/tokenizer.py +73 -0
lionagi/experimental/evaluator/__init__.py +0 -0
lionagi/experimental/evaluator/ast_evaluator.py +131 -0
lionagi/experimental/evaluator/base_evaluator.py +218 -0
lionagi/experimental/knowledge/__init__.py +0 -0
lionagi/experimental/knowledge/base.py +10 -0
lionagi/experimental/knowledge/graph.py +0 -0
lionagi/experimental/memory/__init__.py +0 -0
lionagi/experimental/strategies/__init__.py +0 -0
lionagi/experimental/strategies/base.py +1 -0
lionagi/integrations/bridge/autogen_/__init__.py +0 -0
lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
lionagi/integrations/bridge/langchain_/documents.py +4 -0
lionagi/integrations/bridge/llamaindex_/index.py +30 -0
lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
lionagi/integrations/bridge/transformers_/__init__.py +0 -0
lionagi/integrations/bridge/transformers_/install_.py +36 -0
lionagi/integrations/chunker/__init__.py +0 -0
lionagi/integrations/chunker/chunk.py +312 -0
lionagi/integrations/config/oai_configs.py +38 -7
lionagi/integrations/config/ollama_configs.py +1 -1
lionagi/integrations/config/openrouter_configs.py +14 -2
lionagi/integrations/loader/__init__.py +0 -0
lionagi/integrations/loader/load.py +253 -0
lionagi/integrations/loader/load_util.py +195 -0
lionagi/integrations/provider/_mapping.py +46 -0
lionagi/integrations/provider/litellm.py +2 -1
lionagi/integrations/provider/mlx_service.py +16 -9
lionagi/integrations/provider/oai.py +91 -4
lionagi/integrations/provider/ollama.py +7 -6
lionagi/integrations/provider/openrouter.py +115 -8
lionagi/integrations/provider/services.py +2 -2
lionagi/integrations/provider/transformers.py +18 -22
lionagi/integrations/storage/__init__.py +3 -0
lionagi/integrations/storage/neo4j.py +665 -0
lionagi/integrations/storage/storage_util.py +287 -0
lionagi/integrations/storage/structure_excel.py +285 -0
lionagi/integrations/storage/to_csv.py +63 -0
lionagi/integrations/storage/to_excel.py +83 -0
lionagi/libs/__init__.py +26 -1
lionagi/libs/ln_api.py +78 -23
lionagi/libs/ln_context.py +37 -0
lionagi/libs/ln_convert.py +21 -9
lionagi/libs/ln_func_call.py +69 -28
lionagi/libs/ln_image.py +107 -0
lionagi/libs/ln_knowledge_graph.py +405 -0
lionagi/libs/ln_nested.py +26 -11
lionagi/libs/ln_parse.py +110 -14
lionagi/libs/ln_queue.py +117 -0
lionagi/libs/ln_tokenize.py +164 -0
lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
lionagi/libs/special_tokens.py +172 -0
lionagi/libs/sys_util.py +107 -2
lionagi/lions/__init__.py +0 -0
lionagi/lions/coder/__init__.py +0 -0
lionagi/lions/coder/add_feature.py +20 -0
lionagi/lions/coder/base_prompts.py +22 -0
lionagi/lions/coder/code_form.py +13 -0
lionagi/lions/coder/coder.py +168 -0
lionagi/lions/coder/util.py +96 -0
lionagi/lions/researcher/__init__.py +0 -0
lionagi/lions/researcher/data_source/__init__.py +0 -0
lionagi/lions/researcher/data_source/finhub_.py +191 -0
lionagi/lions/researcher/data_source/google_.py +199 -0
lionagi/lions/researcher/data_source/wiki_.py +96 -0
lionagi/lions/researcher/data_source/yfinance_.py +21 -0
lionagi/tests/integrations/__init__.py +0 -0
lionagi/tests/libs/__init__.py +0 -0
lionagi/tests/libs/test_field_validators.py +353 -0
lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
lionagi/tests/libs/test_queue.py +67 -0
lionagi/tests/test_core/collections/__init__.py +0 -0
lionagi/tests/test_core/collections/test_component.py +206 -0
lionagi/tests/test_core/collections/test_exchange.py +138 -0
lionagi/tests/test_core/collections/test_flow.py +145 -0
lionagi/tests/test_core/collections/test_pile.py +171 -0
lionagi/tests/test_core/collections/test_progression.py +129 -0
lionagi/tests/test_core/generic/__init__.py +0 -0
lionagi/tests/test_core/generic/test_edge.py +67 -0
lionagi/tests/test_core/generic/test_graph.py +96 -0
lionagi/tests/test_core/generic/test_node.py +106 -0
lionagi/tests/test_core/generic/test_tree_node.py +73 -0
lionagi/tests/test_core/test_branch.py +115 -292
lionagi/tests/test_core/test_form.py +46 -0
lionagi/tests/test_core/test_report.py +105 -0
lionagi/tests/test_core/test_validator.py +111 -0
lionagi/version.py +1 -1
{lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
{lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
lionagi-0.2.1.dist-info/RECORD +240 -0
lionagi/core/branch/__init__.py +0 -4
lionagi/core/branch/base_branch.py +0 -654
lionagi/core/branch/branch.py +0 -471
lionagi/core/branch/branch_flow_mixin.py +0 -96
lionagi/core/branch/executable_branch.py +0 -347
lionagi/core/branch/util.py +0 -323
lionagi/core/direct/__init__.py +0 -6
lionagi/core/direct/predict.py +0 -161
lionagi/core/direct/score.py +0 -278
lionagi/core/direct/select.py +0 -169
lionagi/core/direct/utils.py +0 -87
lionagi/core/direct/vote.py +0 -64
lionagi/core/flow/base/baseflow.py +0 -23
lionagi/core/flow/monoflow/ReAct.py +0 -238
lionagi/core/flow/monoflow/__init__.py +0 -9
lionagi/core/flow/monoflow/chat.py +0 -95
lionagi/core/flow/monoflow/chat_mixin.py +0 -263
lionagi/core/flow/monoflow/followup.py +0 -214
lionagi/core/flow/polyflow/__init__.py +0 -1
lionagi/core/flow/polyflow/chat.py +0 -248
lionagi/core/mail/schema.py +0 -56
lionagi/core/messages/__init__.py +0 -3
lionagi/core/messages/schema.py +0 -533
lionagi/core/prompt/prompt_template.py +0 -316
lionagi/core/schema/__init__.py +0 -22
lionagi/core/schema/action_node.py +0 -29
lionagi/core/schema/base_mixin.py +0 -296
lionagi/core/schema/base_node.py +0 -199
lionagi/core/schema/condition.py +0 -24
lionagi/core/schema/data_logger.py +0 -354
lionagi/core/schema/data_node.py +0 -93
lionagi/core/schema/prompt_template.py +0 -67
lionagi/core/schema/structure.py +0 -910
lionagi/core/tool/__init__.py +0 -3
lionagi/core/tool/tool_manager.py +0 -280
lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
lionagi/tests/test_core/test_base_branch.py +0 -427
lionagi/tests/test_core/test_chat_flow.py +0 -63
lionagi/tests/test_core/test_mail_manager.py +0 -75
lionagi/tests/test_core/test_prompts.py +0 -51
lionagi/tests/test_core/test_session.py +0 -254
lionagi/tests/test_core/test_session_base_util.py +0 -312
lionagi/tests/test_core/test_tool_manager.py +0 -95
lionagi-0.0.312.dist-info/RECORD +0 -111
/lionagi/core/{branch/base → _setting}/__init__.py +0 -0
/lionagi/core/{flow → agent/eval}/__init__.py +0 -0
/lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
/lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
/lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
/lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
/lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
/lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
/lionagi/tests/{test_libs → libs}/test_api.py +0 -0
/lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
/lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
{lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
{lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0

lionagi/libs/ln_image.py ADDED Viewed

@@ -0,0 +1,107 @@
+import base64
+import numpy as np
+from typing import Optional
+from .sys_util import SysUtil
+class ImageUtil:
+    @staticmethod
+    def preprocess_image(
+        image: np.ndarray, color_conversion_code: Optional[int] = None
+    ) -> np.ndarray:
+        SysUtil.check_import("cv2", pip_name="opencv-python")
+        import cv2
+        color_conversion_code = color_conversion_code or cv2.COLOR_BGR2RGB
+        return cv2.cvtColor(image, color_conversion_code)
+    @staticmethod
+    def encode_image_to_base64(image: np.ndarray, file_extension: str = ".jpg") -> str:
+        SysUtil.check_import("cv2", pip_name="opencv-python")
+        import cv2
+        success, buffer = cv2.imencode(file_extension, image)
+        if not success:
+            raise ValueError(f"Could not encode image to {file_extension} format.")
+        encoded_image = base64.b64encode(buffer).decode("utf-8")
+        return encoded_image
+    @staticmethod
+    def read_image_to_array(
+        image_path: str, color_flag: Optional[int] = None
+    ) -> np.ndarray:
+        SysUtil.check_import("cv2", pip_name="opencv-python")
+        import cv2
+        image = cv2.imread(image_path, color_flag)
+        color_flag = color_flag or cv2.IMREAD_COLOR
+        if image is None:
+            raise ValueError(f"Could not read image from path: {image_path}")
+        return image
+    @staticmethod
+    def read_image_to_base64(
+        image_path: str,
+        color_flag: Optional[int] = None,
+    ) -> str:
+        image_path = str(image_path)
+        image = ImageUtil.read_image_to_array(image_path, color_flag)
+        file_extension = "." + image_path.split(".")[-1]
+        return ImageUtil.encode_image_to_base64(image, file_extension)
+    # @staticmethod
+    # def encode_image(image_path):
+    #     with open(image_path, "rb") as image_file:
+    #         return base64.b64encode(image_file.read()).decode("utf-8")
+    @staticmethod
+    def calculate_image_token_usage_from_base64(image_base64: str, detail):
+        """
+        Calculate the token usage for processing OpenAI images from a base64-encoded string.
+        Parameters:
+        image_base64 (str): The base64-encoded string of the image.
+        detail (str): The detail level of the image, either 'low' or 'high'.
+        Returns:
+        int: The total token cost for processing the image.
+        """
+        import base64
+        from io import BytesIO
+        from PIL import Image
+        # Decode the base64 string to get image data
+        if "data:image/jpeg;base64," in image_base64:
+            image_base64 = image_base64.split("data:image/jpeg;base64,")[1]
+            image_base64.strip("{}")
+        image_data = base64.b64decode(image_base64)
+        image = Image.open(BytesIO(image_data))
+        # Get image dimensions
+        width, height = image.size
+        if detail == "low":
+            return 85
+        # Scale to fit within a 2048 x 2048 square
+        max_dimension = 2048
+        if width > max_dimension or height > max_dimension:
+            scale_factor = max_dimension / max(width, height)
+            width = int(width * scale_factor)
+            height = int(height * scale_factor)
+        # Scale such that the shortest side is 768px
+        min_side = 768
+        if min(width, height) > min_side:
+            scale_factor = min_side / min(width, height)
+            width = int(width * scale_factor)
+            height = int(height * scale_factor)
+        # Calculate the number of 512px squares
+        num_squares = (width // 512) * (height // 512)
+        token_cost = 170 * num_squares + 85
+        return token_cost

lionagi/libs/ln_knowledge_graph.py ADDED Viewed

@@ -0,0 +1,405 @@
+import math
+from lionagi.libs import CallDecorator as cd
+class KnowledgeBase:
+    """
+    A class to represent a Knowledge Base (KB) containing entities, relations, and sources.
+    Attributes:
+        entities (dict): A dictionary of entities in the KB, where the keys are entity titles, and the values are
+                         entity information (excluding the title).
+        relations (list): A list of relations in the KB, where each relation is a dictionary containing information
+                          about the relation (head, type, tail) and metadata (article_url and spans).
+        sources (dict): A dictionary of information about the sources of relations, where the keys are article URLs,
+                       and the values are source data (article_title and article_publish_date).
+    Methods:
+        merge_with_kb(kb2): Merge another Knowledge Base (kb2) into this KB.
+        are_relations_equal(r1, r2): Check if two relations (r1 and r2) are equal.
+        exists_relation(r1): Check if a relation (r1) already exists in the KB.
+        merge_relations(r2): Merge the information from relation r2 into an existing relation in the KB.
+        get_wikipedia_data(candidate_entity): Get data for a candidate entity from Wikipedia.
+        add_entity(e): Add an entity to the KB.
+        add_relation(r, article_title, article_publish_date): Add a relation to the KB.
+        print(): Print the entities, relations, and sources in the KB.
+        extract_relations_from_model_output(text): Extract relations from the model output text.
+    """
+    def __init__(self):
+        """
+        Initialize an empty Knowledge Base (KB) with empty dictionaries for entities, relations, and sources.
+        """
+        self.entities = {}  # { entity_title: {...} }
+        self.relations = []  # [ head: entity_title, type: ..., tail: entity_title,
+        # meta: { article_url: { spans: [...] } } ]
+        self.sources = {}  # { article_url: {...} }
+    def merge_with_kb(self, kb2):
+        """
+        Merge another Knowledge Base (KB) into this KB.
+        Args:
+            kb2 (KnowledgeBase): The Knowledge Base (KB) to merge into this KB.
+        """
+        for r in kb2.relations:
+            article_url = list(r["meta"].keys())[0]
+            source_data = kb2.sources[article_url]
+            self.add_relation(
+                r, source_data["article_title"], source_data["article_publish_date"]
+            )
+    def are_relations_equal(self, r1, r2):
+        """
+        Check if two relations (r1 and r2) are equal.
+        Args:
+            r1 (dict): The first relation to compare.
+            r2 (dict): The second relation to compare.
+        Returns:
+            bool: True if the relations are equal, False otherwise.
+        """
+        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])
+    def exists_relation(self, r1):
+        """
+        Check if a relation (r1) already exists in the KB.
+        Args:
+            r1 (dict): The relation to check for existence in the KB.
+        Returns:
+            bool: True if the relation exists in the KB, False otherwise.
+        """
+        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)
+    def merge_relations(self, r2):
+        """
+        Merge the information from relation r2 into an existing relation in the KB.
+        Args:
+            r2 (dict): The relation to merge into an existing relation in the KB.
+        """
+        r1 = [r for r in self.relations if self.are_relations_equal(r2, r)][0]
+        # if different article
+        article_url = list(r2["meta"].keys())[0]
+        if article_url not in r1["meta"]:
+            r1["meta"][article_url] = r2["meta"][article_url]
+        # if existing article
+        else:
+            spans_to_add = [
+                span
+                for span in r2["meta"][article_url]["spans"]
+                if span not in r1["meta"][article_url]["spans"]
+            ]
+            r1["meta"][article_url]["spans"] += spans_to_add
+    @cd.cache(maxsize=10000)
+    def get_wikipedia_data(self, candidate_entity):
+        """
+        Get data for a candidate entity from Wikipedia.
+        Args:
+            candidate_entity (str): The candidate entity title.
+        Returns:
+            dict: A dictionary containing information about the candidate entity (title, url, summary).
+                  None if the entity does not exist in Wikipedia.
+        """
+        try:
+            from lionagi.libs import SysUtil
+            SysUtil.check_import("wikipedia")
+            import wikipedia  # type: ignore
+        except Exception as e:
+            raise Exception("wikipedia package is not installed {e}")
+        try:
+            page = wikipedia.page(candidate_entity, auto_suggest=False)
+            entity_data = {
+                "title": page.title,
+                "url": page.url,
+                "summary": page.summary,
+            }
+            return entity_data
+        except:
+            return None
+    def add_entity(self, e):
+        """
+        Add an entity to the KB.
+        Args:
+            e (dict): A dictionary containing information about the entity (title and additional attributes).
+        """
+        self.entities[e["title"]] = {k: v for k, v in e.items() if k != "title"}
+    def add_relation(self, r, article_title, article_publish_date):
+        """
+        Add a relation to the KB.
+        Args:
+            r (dict): A dictionary containing information about the relation (head, type, tail, and metadata).
+            article_title (str): The title of the article containing the relation.
+            article_publish_date (str): The publish date of the article.
+        """
+        # check on wikipedia
+        candidate_entities = [r["head"], r["tail"]]
+        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]
+        # if one entity does not exist, stop
+        if any(ent is None for ent in entities):
+            return
+        # manage new entities
+        for e in entities:
+            self.add_entity(e)
+        # rename relation entities with their wikipedia titles
+        r["head"] = entities[0]["title"]
+        r["tail"] = entities[1]["title"]
+        # add source if not in kb
+        article_url = list(r["meta"].keys())[0]
+        if article_url not in self.sources:
+            self.sources[article_url] = {
+                "article_title": article_title,
+                "article_publish_date": article_publish_date,
+            }
+        # manage new relation
+        if not self.exists_relation(r):
+            self.relations.append(r)
+        else:
+            self.merge_relations(r)
+    def print(self):
+        """
+        Print the entities, relations, and sources in the KB.
+        Returns:
+            None
+        """
+        print("Entities:")
+        for e in self.entities.items():
+            print(f"  {e}")
+        print("Relations:")
+        for r in self.relations:
+            print(f"  {r}")
+        print("Sources:")
+        for s in self.sources.items():
+            print(f"  {s}")
+    @staticmethod
+    def extract_relations_from_model_output(text):
+        """
+        Extract relations from the model output text.
+        Args:
+            text (str): The model output text containing relations.
+        Returns:
+            list: A list of dictionaries, where each dictionary represents a relation (head, type, tail).
+        """
+        relations = []
+        relation, subject, relation, object_ = "", "", "", ""
+        text = text.strip()
+        current = "x"
+        text_replaced = text.replace("<s>", "").replace("<pad>", "").replace("</s>", "")
+        for token in text_replaced.split():
+            if token == "<triplet>":
+                current = "t"
+                if relation != "":
+                    relations.append(
+                        {
+                            "head": subject.strip(),
+                            "type": relation.strip(),
+                            "tail": object_.strip(),
+                        }
+                    )
+                    relation = ""
+                subject = ""
+            elif token == "<subj>":
+                current = "s"
+                if relation != "":
+                    relations.append(
+                        {
+                            "head": subject.strip(),
+                            "type": relation.strip(),
+                            "tail": object_.strip(),
+                        }
+                    )
+                object_ = ""
+            elif token == "<obj>":
+                current = "o"
+                relation = ""
+            else:
+                if current == "t":
+                    subject += " " + token
+                elif current == "s":
+                    object_ += " " + token
+                elif current == "o":
+                    relation += " " + token
+        if subject != "" and relation != "" and object_ != "":
+            relations.append(
+                {
+                    "head": subject.strip(),
+                    "type": relation.strip(),
+                    "tail": object_.strip(),
+                }
+            )
+        return relations
+class KGTripletExtractor:
+    """
+    A class to perform knowledge graph triplet extraction from text using a pre-trained model.
+    Methods:
+        text_to_wiki_kb(text, model=None, tokenizer=None, device='cpu', span_length=512,
+                        article_title=None, article_publish_date=None, verbose=False):
+            Extract knowledge graph triplets from text and create a KnowledgeBase (KB) containing entities and relations.
+    """
+    @staticmethod
+    def text_to_wiki_kb(
+        text,
+        model=None,
+        tokenizer=None,
+        device="cpu",
+        span_length=512,
+        article_title=None,
+        article_publish_date=None,
+        verbose=False,
+    ):
+        from lionagi.integrations.bridge.transformers_.install_ import (
+            install_transformers,
+        )
+        try:
+            from transformers import AutoModelForSeq2SeqLM, AutoTokenizer  # type: ignore
+        except ImportError:
+            install_transformers()
+        from transformers import AutoModelForSeq2SeqLM, AutoTokenizer  # type: ignore
+        import torch  # type: ignore
+        """
+        Extract knowledge graph triplets from text and create a KnowledgeBase (KB) containing entities and relations.
+        Args:
+            text (str): The input text from which triplets will be extracted.
+            model (AutoModelForSeq2SeqLM, optional): The pre-trained model for triplet extraction. Defaults to None.
+            tokenizer (AutoTokenizer, optional): The tokenizer for the model. Defaults to None.
+            device (str, optional): The device to run the model on (e.g., 'cpu', 'cuda'). Defaults to 'cpu'.
+            span_length (int, optional): The maximum span length for input text segmentation. Defaults to 512.
+            article_title (str, optional): The title of the article containing the input text. Defaults to None.
+            article_publish_date (str, optional): The publish date of the article. Defaults to None.
+            verbose (bool, optional): Whether to enable verbose mode for debugging. Defaults to False.
+        Returns:
+            KnowledgeBase: A KnowledgeBase (KB) containing extracted entities, relations, and sources.
+        """
+        if not any([model, tokenizer]):
+            tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
+            model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large")
+            model.to(device)
+        inputs = tokenizer([text], return_tensors="pt")
+        num_tokens = len(inputs["input_ids"][0])
+        if verbose:
+            print(f"Input has {num_tokens} tokens")
+        num_spans = math.ceil(num_tokens / span_length)
+        if verbose:
+            print(f"Input has {num_spans} spans")
+        overlap = math.ceil(
+            (num_spans * span_length - num_tokens) / max(num_spans - 1, 1)
+        )
+        spans_boundaries = []
+        start = 0
+        for i in range(num_spans):
+            spans_boundaries.append(
+                [start + span_length * i, start + span_length * (i + 1)]
+            )
+            start -= overlap
+        if verbose:
+            print(f"Span boundaries are {spans_boundaries}")
+        # transform input with spans
+        tensor_ids = [
+            inputs["input_ids"][0][boundary[0] : boundary[1]]
+            for boundary in spans_boundaries
+        ]
+        tensor_masks = [
+            inputs["attention_mask"][0][boundary[0] : boundary[1]]
+            for boundary in spans_boundaries
+        ]
+        inputs = {
+            "input_ids": torch.stack(tensor_ids).to(device),
+            "attention_mask": torch.stack(tensor_masks).to(device),
+        }
+        # generate relations
+        num_return_sequences = 3
+        gen_kwargs = {
+            "max_length": 512,
+            "length_penalty": 0,
+            "num_beams": 3,
+            "num_return_sequences": num_return_sequences,
+        }
+        generated_tokens = model.generate(
+            **inputs,
+            **gen_kwargs,
+        )
+        # decode relations
+        decoded_preds = tokenizer.batch_decode(
+            generated_tokens, skip_special_tokens=False
+        )
+        # create kb
+        kb = KnowledgeBase()
+        i = 0
+        for sentence_pred in decoded_preds:
+            current_span_index = i // num_return_sequences
+            relations = KnowledgeBase.extract_relations_from_model_output(sentence_pred)
+            for relation in relations:
+                relation["meta"] = {
+                    "article_url": {"spans": [spans_boundaries[current_span_index]]}
+                }
+                kb.add_relation(relation, article_title, article_publish_date)
+            i += 1
+        return kb
+class KGraph:
+    """
+    A class representing a Knowledge Graph (KGraph) for extracting relations from text.
+    Methods:
+        text_to_wiki_kb(text, model=None, tokenizer=None, device='cpu', span_length=512, article_title=None,
+                        article_publish_date=None, verbose=False):
+            Extract relations from input text and create a Knowledge Base (KB) containing entities and relations.
+    """
+    @staticmethod
+    def text_to_wiki_kb(text, **kwargs):
+        """
+        Extract relations from input text and create a Knowledge Base (KB) containing entities and relations.
+        Args:
+            text (str): The input text from which relations are extracted.
+            **kwargs: Additional keyword arguments passed to the underlying extraction method.
+        Returns:
+            KnowledgeBase: A Knowledge Base (KB) containing entities and relations extracted from the input text.
+        """
+        return KGTripletExtractor.text_to_wiki_kb(text, **kwargs)

lionagi/libs/ln_nested.py CHANGED Viewed

@@ -1,3 +1,19 @@
+"""
+Copyright 2024 HaiyangLi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
 from collections import defaultdict
 from itertools import chain
 from typing import Any, Generator, Callable
@@ -52,7 +68,7 @@ def nset(nested_structure: dict | list, indices: list[int | str], value: Any) ->
 def nget(
     nested_structure: dict | list,
     indices: list[int | str],
-    default: Any | None = None,
+    default=...,
 ) -> Any:
     """
     retrieves a value from a nested list or dictionary structure, with an option to
@@ -98,12 +114,12 @@ def nget(
             return target_container[last_index]
         elif isinstance(target_container, dict) and last_index in target_container:
             return target_container[last_index]
-        elif default is not None:
+        elif default is not ...:
             return default
         else:
             raise LookupError("Target not found and no default value provided.")
     except (IndexError, KeyError, TypeError):
-        if default is not None:
+        if default is not ...:
             return default
         else:
             raise LookupError("Target not found and no default value provided.")
@@ -116,7 +132,7 @@ def nmerge(
     *,
     overwrite: bool = False,
     dict_sequence: bool = False,
-    sequence_separator: str = "_",
+    sequence_separator: str = "[^_^]",
     sort_list: bool = False,
     custom_sort: Callable[[Any], Any] | None = None,
 ) -> dict | list:
@@ -176,7 +192,7 @@ def flatten(
     /,
     *,
     parent_key: str = "",
-    sep: str = "_",
+    sep: str = "[^_^]",
     max_depth: int | None = None,
     inplace: bool = False,
     dict_only: bool = False,
@@ -238,7 +254,7 @@ def unflatten(
     flat_dict: dict[str, Any],
     /,
     *,
-    sep: str = "_",
+    sep: str = "[^_^]",
     custom_logic: Callable[[str], Any] | None = None,
     max_depth: int | None = None,
 ) -> dict | list:
@@ -330,7 +346,7 @@ def ninsert(
     indices: list[str | int],
     value: Any,
     *,
-    sep: str = "_",
+    sep: str = "[^_^]",
     max_depth: int | None = None,
     current_depth: int = 0,
 ) -> None:
@@ -393,12 +409,11 @@ def ninsert(
         nested_structure[last_part] = value
-# noinspection PyDecorator
 def get_flattened_keys(
     nested_structure: Any,
     /,
     *,
-    sep: str = "_",
+    sep: str = "[^_^]",
     max_depth: int | None = None,
     dict_only: bool = False,
     inplace: bool = False,
@@ -448,7 +463,7 @@ def _dynamic_flatten_in_place(
     /,
     *,
     parent_key: str = "",
-    sep: str = "_",
+    sep: str = "[^_^]",
     max_depth: int | None = None,
     current_depth: int = 0,
     dict_only: bool = False,
@@ -581,7 +596,7 @@ def _deep_update(original: dict, update: dict) -> dict:
 def _dynamic_flatten_generator(
     nested_structure: Any,
     parent_key: tuple[str, ...],
-    sep: str = "_",
+    sep: str = "[^_^]",
     max_depth: int | None = None,
     current_depth: int = 0,
     dict_only: bool = False,

lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl

lionagi 0.0.312py3-none-any.whl → 0.2.1py3-none-any.whl