PyPI - lionagi - Versions diffs - 0.9.12__py3-none-any.whl → 0.9.14__py3-none-any.whl - Mend

lionagi 0.9.12py3-none-any.whl → 0.9.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

lionagi/libs/file/chunk.py CHANGED Viewed

@@ -216,7 +216,7 @@ def chunk_content(
     chunk_size: int = 1024,
     overlap: float = 0,
     threshold: int = 256,
-    metadata: dict[str, Any] = {},
+    metadata: dict[str, Any] = None,
     return_tokens: bool = False,
     as_node: bool = False,
     **kwargs: Any,
@@ -268,7 +268,7 @@ def chunk_content(
                     "chunk_id": i + 1,
                     "total_chunks": len(chunks),
                     "chunk_size": len(chunk),
-                    **metadata,
+                    **(metadata or {}),
                 },
             )
             for i, chunk in enumerate(chunks)
@@ -280,7 +280,7 @@ def chunk_content(
             "chunk_id": i + 1,
             "total_chunks": len(chunks),
             "chunk_size": len(chunk),
-            **metadata,
+            **(metadata or {}),
         }
         for i, chunk in enumerate(chunks)
     ]

lionagi/libs/token_transform/base.py ADDED Viewed

@@ -0,0 +1,52 @@
+from __future__ import annotations
+from enum import Enum
+from pathlib import Path
+from pydantic import Field
+from lionagi.tools.base import Resource, ResourceCategory
+here = Path(__file__).parent.resolve()
+MAPPING_PATH = "synthlang_/resources/mapping"
+class TokenMappingTemplate(str, Enum):
+    RUST_CHINESE = "rust_chinese"
+    @property
+    def fp(self) -> Path:
+        return here / MAPPING_PATH / f"{self.value}_mapping.toml"
+class TokenMapping(Resource):
+    category: ResourceCategory = Field(
+        default=ResourceCategory.UTILITY, frozen=True
+    )
+    content: dict
+    @classmethod
+    def load_from_template(
+        cls, template: TokenMappingTemplate | str
+    ) -> TokenMapping:
+        if isinstance(template, str):
+            template = template.lower().strip()
+            template = (
+                template.replace(".toml", "")
+                .replace(" ", "_")
+                .replace("-", "_")
+                .strip()
+            )
+            if template.endswith("_mapping"):
+                template = template[:-8]
+            if "/" in template:
+                template = template.split("/")[-1]
+            template = TokenMappingTemplate(template)
+        if isinstance(template, TokenMappingTemplate):
+            template = template.fp
+            return cls.adapt_from(template, ".toml", many=False)
+        raise ValueError(
+            f"Invalid template: {template}. Must be a TokenMappingTemplate or a valid path."
+        )

lionagi/libs/token_transform/perplexity.py CHANGED Viewed

@@ -101,7 +101,7 @@ async def compute_perplexity(
     chat_model: iModel,
     initial_context: str = None,
     tokens: list[str] = None,
-    system_msg: str = None,
+    system: str = None,
     n_samples: int = 1,
     use_residue: bool = True,
     **kwargs,
@@ -142,9 +142,9 @@ async def compute_perplexity(
     api_calls = []
     for sample_txt in sampless:
         messages = []
-        if system_msg:
+        if system:
             if not chat_model.sequential_exchange:
-                messages.append({"role": "system", "content": system_msg})
+                messages.append({"role": "system", "content": system})
             messages.append({"role": "user", "content": sample_txt})
         else:
             messages.append({"role": "user", "content": sample_txt})
@@ -171,10 +171,10 @@ class LLMCompressor:
     def __init__(
         self,
         chat_model: iModel,
-        system_msg=None,
+        system=None,
         tokenizer=None,
         splitter=None,
-        target_ratio=0.2,
+        compression_ratio=0.2,
         n_samples=5,
         chunk_size=64,
         max_tokens_per_sample=80,
@@ -193,10 +193,8 @@ class LLMCompressor:
         self.chat_model = chat_model
         self.tokenizer = tokenizer
         self.splitter = splitter
-        self.system_msg = (
-            system_msg or "Concisely summarize content for storage:"
-        )
-        self.target_ratio = target_ratio
+        self.system = system or "Concisely summarize content for storage:"
+        self.compression_ratio = compression_ratio
         self.n_samples = n_samples
         self.chunk_size = chunk_size
         self.max_tokens_per_sample = max_tokens_per_sample
@@ -281,7 +279,7 @@ class LLMCompressor:
                 initial_context=initial_text,
                 tokens=item_toks,
                 n_samples=n_samples or self.n_samples,
-                system_msg=self.system_msg,
+                system=self.system,
                 use_residue=use_residue,
                 **kwargs,
             )
@@ -347,6 +345,7 @@ class LLMCompressor:
         # Tokenize once to get total length
         all_tokens = self.tokenize(text)
         original_len = len(all_tokens)
+        ttl_chars = len(text)
         # Split text
         items = self.split(text, **split_kwargs)
@@ -363,26 +362,26 @@ class LLMCompressor:
         # Select
         selected = self.select_by_pplex(
             ranked_items=ranked,
-            target_compression_ratio=compression_ratio or self.target_ratio,
+            target_compression_ratio=compression_ratio
+            or self.compression_ratio,
             original_length=original_len,
             min_pplx=min_pplx or self.min_pplx,
         )
-        if self.verbose:
-            compressed_len = sum(
-                len(to_list(self.tokenize(x), dropna=True, flatten=True))
-                for x in selected
-            )
-            ratio = compressed_len / original_len if original_len else 1
-            print(
-                f"Original tokens: {original_len}\n"
-                f"Selected tokens: {compressed_len}\n"
-                f"Compression ratio: {ratio:.3f}\n"
-                f"Time: {timer() - start:.3f}s\n"
-            )
         # Join final
         out_str = " ".join(selected)
+        if self.verbose:
+            compressed_chars = len(out_str)
+            ratio = compressed_chars / ttl_chars if original_len else 1
+            msg = "------------------------------------------\n"
+            msg += f"Compression Method: Perplexity\n"
+            msg += f"Compressed Characters number: {compressed_chars}\n"
+            msg += f"Character Compression Ratio: {ratio:.1%}\n"
+            msg += f"Compression Time: {timer() - start:.3f}s\n"
+            msg += f"Compression Model: {self.chat_model.model_name}\n"
+            print(msg)
         return out_str.strip()
     def select_by_pplex(
@@ -419,21 +418,34 @@ class LLMCompressor:
 async def compress_text(
     text: str,
     chat_model: iModel,
-    system_msg: str = None,
-    target_ratio: float = 0.2,
+    system: str = None,
+    compression_ratio: float = 0.2,
     n_samples: int = 5,
     max_tokens_per_sample=80,
     verbose=True,
+    initial_text=None,
+    cumulative=False,
+    split_kwargs=None,
+    min_pplx=None,
+    **kwargs,
 ) -> str:
     """
     Convenience function that instantiates LLMCompressor and compresses text.
     """
     compressor = LLMCompressor(
         chat_model=chat_model,
-        system_msg=system_msg,
-        target_ratio=target_ratio,
+        system=system,
+        compression_ratio=compression_ratio,
         n_samples=n_samples,
         max_tokens_per_sample=max_tokens_per_sample,
         verbose=verbose,
     )
-    return await compressor.compress(text)
+    return await compressor.compress(
+        text,
+        compression_ratio=compression_ratio,
+        initial_text=initial_text,
+        cumulative=cumulative,
+        split_kwargs=split_kwargs,
+        min_pplx=min_pplx,
+        **kwargs,
+    )

lionagi/libs/token_transform/symbolic_compress_context.py ADDED Viewed

@@ -0,0 +1,138 @@
+from collections.abc import Callable
+from pathlib import Path
+from typing import Literal
+from lionagi.service.imodel import iModel
+from lionagi.session.branch import Branch
+from lionagi.utils import alcall
+from .base import TokenMapping, TokenMappingTemplate
+from .synthlang_.base import SynthlangFramework, SynthlangTemplate
+FRAMEWORK_OPTIONS = SynthlangFramework.load_framework_options()
+FRAMEWORK_CHOICES = Literal["math", "optim", "custom_algebra"]
+async def symbolic_compress_context(
+    *,
+    text: str = None,
+    url_or_path: str | Path = None,
+    chunk_by="tokens",
+    chunk_size: int = 1000,
+    chunk_tokenizer: Callable = str.split,
+    threshold=50,
+    output_path: Path | str = None,
+    overlap=0.05,
+    system: str = None,
+    chat_model: iModel = None,
+    use_lion_system_message: bool = True,
+    max_concurrent=10,
+    throttle_period=1,
+    framework: Literal["synthlang"] = "synthlang",
+    framework_template: (
+        SynthlangTemplate | SynthlangFramework
+    ) = SynthlangTemplate.REFLECTIVE_PATTERNS,
+    framework_options: list[FRAMEWORK_CHOICES] = None,
+    compress: bool = False,
+    compress_model: iModel = None,
+    compression_ratio: float = 0.2,
+    compress_initial_text=None,
+    compress_cumulative=False,
+    compress_split_kwargs=None,
+    compress_min_pplx=None,
+    encode_token_map: TokenMappingTemplate | dict | TokenMapping = None,
+    num_encodings: int = 3,
+    encode_output: bool = False,
+    num_output_encodings: int = None,
+    verbose: bool = True,
+    branch: Branch = None,
+    additional_text: str = "",
+    **kwargs,
+):
+    if framework != "synthlang":
+        raise ValueError(f"Unsupported framework: {framework}")
+    if not text and not url_or_path:
+        raise ValueError("Either text or url_or_path must be provided.")
+    if text and url_or_path:
+        raise ValueError("Only one of text or url_or_path should be provided.")
+    from .synthlang_.translate_to_synthlang import translate_to_synthlang
+    async def _inner(text: str):
+        b_ = None
+        if branch:
+            b_ = await branch.aclone()
+        else:
+            b_ = Branch(
+                system=system,
+                use_lion_system_message=use_lion_system_message,
+                chat_model=chat_model,
+            )
+        return await translate_to_synthlang(
+            text,
+            branch=b_,
+            framework_template=framework_template,
+            framework_options=framework_options,
+            compress=compress,
+            compress_model=compress_model,
+            compression_ratio=compression_ratio,
+            compress_kwargs={
+                "initial_text": compress_initial_text,
+                "cumulative": compress_cumulative,
+                "split_kwargs": compress_split_kwargs,
+                "min_pplx": compress_min_pplx,
+            },
+            encode_token_map=encode_token_map,
+            num_encodings=num_encodings,
+            encode_output=encode_output,
+            num_output_encodings=num_output_encodings,
+            verbose=verbose,
+            additional_text=additional_text,
+            **kwargs,
+        )
+    from lionagi.libs.file.process import chunk, chunk_content
+    texts = []
+    if url_or_path:
+        chunks = chunk(
+            url_or_path=url_or_path,
+            chunk_by=chunk_by,
+            chunk_size=chunk_size,
+            overlap=overlap,
+            threshold=threshold,
+        )
+        texts = [i.content for i in chunks if i.content]
+    elif text:
+        texts = chunk_content(
+            text=text,
+            chunk_by=chunk_by,
+            chunk_size=chunk_size,
+            overlap=overlap,
+            threshold=threshold,
+            tokenizer=chunk_tokenizer,
+        )
+    results = await alcall(
+        texts,
+        _inner,
+        max_concurrent=max_concurrent,
+        retry_default=None,
+        throttle_period=throttle_period,
+        flatten=True,
+        dropna=True,
+    )
+    text = "\n".join(results)
+    if output_path:
+        fp = Path(output_path)
+        fp.write_text(text)
+        if verbose:
+            print(f"Results of {len(text)} characters saved to: {fp}")
+        return fp
+    return text

lionagi 0.9.12__py3-none-any.whl → 0.9.14__py3-none-any.whl

lionagi 0.9.12py3-none-any.whl → 0.9.14py3-none-any.whl