PyPI - gllm-core-binary - Versions diffs - 0.4.4__py3-none-manylinux_2_31_x86_64.whl - Mend

gllm-core-binary 0.4.4__py3-none-manylinux_2_31_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

gllm_core/__init__.py +1 -0
gllm_core/__init__.pyi +0 -0
gllm_core/adapters/__init__.py +5 -0
gllm_core/adapters/__init__.pyi +3 -0
gllm_core/adapters/tool/__init__.py +6 -0
gllm_core/adapters/tool/__init__.pyi +4 -0
gllm_core/adapters/tool/google_adk.py +91 -0
gllm_core/adapters/tool/google_adk.pyi +23 -0
gllm_core/adapters/tool/langchain.py +130 -0
gllm_core/adapters/tool/langchain.pyi +31 -0
gllm_core/constants.py +55 -0
gllm_core/constants.pyi +36 -0
gllm_core/event/__init__.py +6 -0
gllm_core/event/__init__.pyi +4 -0
gllm_core/event/event_emitter.py +211 -0
gllm_core/event/event_emitter.pyi +155 -0
gllm_core/event/handler/__init__.py +7 -0
gllm_core/event/handler/__init__.pyi +5 -0
gllm_core/event/handler/console_event_handler.py +48 -0
gllm_core/event/handler/console_event_handler.pyi +32 -0
gllm_core/event/handler/event_handler.py +89 -0
gllm_core/event/handler/event_handler.pyi +51 -0
gllm_core/event/handler/print_event_handler.py +130 -0
gllm_core/event/handler/print_event_handler.pyi +33 -0
gllm_core/event/handler/stream_event_handler.py +85 -0
gllm_core/event/handler/stream_event_handler.pyi +62 -0
gllm_core/event/hook/__init__.py +5 -0
gllm_core/event/hook/__init__.pyi +3 -0
gllm_core/event/hook/event_hook.py +30 -0
gllm_core/event/hook/event_hook.pyi +18 -0
gllm_core/event/hook/json_stringify_event_hook.py +32 -0
gllm_core/event/hook/json_stringify_event_hook.pyi +16 -0
gllm_core/event/messenger.py +133 -0
gllm_core/event/messenger.pyi +66 -0
gllm_core/schema/__init__.py +8 -0
gllm_core/schema/__init__.pyi +6 -0
gllm_core/schema/chunk.py +148 -0
gllm_core/schema/chunk.pyi +66 -0
gllm_core/schema/component.py +546 -0
gllm_core/schema/component.pyi +205 -0
gllm_core/schema/event.py +50 -0
gllm_core/schema/event.pyi +33 -0
gllm_core/schema/schema_generator.py +150 -0
gllm_core/schema/schema_generator.pyi +35 -0
gllm_core/schema/tool.py +418 -0
gllm_core/schema/tool.pyi +198 -0
gllm_core/utils/__init__.py +32 -0
gllm_core/utils/__init__.pyi +13 -0
gllm_core/utils/analyzer.py +256 -0
gllm_core/utils/analyzer.pyi +123 -0
gllm_core/utils/binary_handler_factory.py +99 -0
gllm_core/utils/binary_handler_factory.pyi +62 -0
gllm_core/utils/chunk_metadata_merger.py +102 -0
gllm_core/utils/chunk_metadata_merger.pyi +41 -0
gllm_core/utils/concurrency.py +184 -0
gllm_core/utils/concurrency.pyi +94 -0
gllm_core/utils/event_formatter.py +69 -0
gllm_core/utils/event_formatter.pyi +30 -0
gllm_core/utils/google_sheets.py +115 -0
gllm_core/utils/google_sheets.pyi +18 -0
gllm_core/utils/imports.py +91 -0
gllm_core/utils/imports.pyi +42 -0
gllm_core/utils/logger_manager.py +339 -0
gllm_core/utils/logger_manager.pyi +176 -0
gllm_core/utils/main_method_resolver.py +185 -0
gllm_core/utils/main_method_resolver.pyi +54 -0
gllm_core/utils/merger_method.py +130 -0
gllm_core/utils/merger_method.pyi +49 -0
gllm_core/utils/retry.py +258 -0
gllm_core/utils/retry.pyi +41 -0
gllm_core/utils/similarity.py +29 -0
gllm_core/utils/similarity.pyi +10 -0
gllm_core/utils/validation.py +26 -0
gllm_core/utils/validation.pyi +12 -0
gllm_core_binary-0.4.4.dist-info/METADATA +177 -0
gllm_core_binary-0.4.4.dist-info/RECORD +78 -0
gllm_core_binary-0.4.4.dist-info/WHEEL +5 -0
gllm_core_binary-0.4.4.dist-info/top_level.txt +1 -0

gllm_core/utils/analyzer.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""An analyzer for the _run method of a component.
+Includes the analyzer itself and the data models used to represent the analysis results.
+Authors:
+    Dimitrij Ray (dimitrij.ray@gdplabs.id)
+References:
+    NONE
+"""
+import ast
+import inspect
+import textwrap
+from enum import StrEnum
+from typing import Any, Callable
+from pydantic import BaseModel, Field
+class ParameterKind(StrEnum):
+    """Enum representing the different kinds of parameters a method can have."""
+    POSITIONAL_ONLY = "positional_only"
+    POSITIONAL_OR_KEYWORD = "positional_or_keyword"
+    VAR_POSITIONAL = "var_positional"
+    KEYWORD_ONLY = "keyword_only"
+    VAR_KEYWORD = "var_keyword"
+class ParameterInfo(BaseModel):
+    """Model representing information about a method parameter.
+    Attributes:
+        kind (ParameterKind): The kind of the parameter.
+        default (str): The default value of the parameter, if any.
+        annotation (str): The type annotation of the parameter, if any.
+    """
+    kind: ParameterKind
+    default: str | None = None
+    annotation: str | None = None
+class MethodSignature(BaseModel):
+    """Model representing the signature of a method.
+    Attributes:
+        parameters (dict[str, ParameterInfo]): A dictionary of parameter names to their information.
+        is_async (bool): Whether the method is asynchronous.
+    """
+    parameters: dict[str, ParameterInfo]
+    is_async: bool
+class ArgUsages(BaseModel):
+    """Model representing the different types of argument usage in a run.
+    Attributes:
+        required (list[str]): A list of argument names that are required.
+        optional (list[str]): A list of argument names that are optional.
+        unresolvable (list[str]): A list of unresolvable key patterns encountered during analysis.
+    """
+    required: list[str] = Field(default_factory=list)
+    optional: list[str] = Field(default_factory=list)
+    unresolvable: list[str] = Field(default_factory=list)
+class RunArgumentUsageType(StrEnum):
+    """Enum representing the different types of argument usage in a run."""
+    FULL_PASS = "full_pass"
+    REQUIRED = "required"
+    OPTIONAL = "optional"
+class RunProfile(BaseModel):
+    """Model representing the profile of a run.
+    Attributes:
+        arg_usages (ArgUsages): A dictionary mapping argument usage types to lists of
+            argument names.
+        full_pass_methods (list[str]): A list of method names that fully pass the kwargs.
+        method_signatures (dict[str, MethodSignature]): A dictionary mapping method names to their signatures.
+    """
+    arg_usages: ArgUsages
+    full_pass_methods: list[str]
+    method_signatures: dict[str, MethodSignature]
+    def __init__(self, **data: Any):
+        """Initialize the RunProfile with the given data.
+        This is to circumvent Pylint false positives due to the usage of Field(default_factory=...).
+        """
+        super().__init__(
+            arg_usages=data.get("arg_usages", ArgUsages()),
+            full_pass_methods=data.get("full_pass_methods", []),
+            method_signatures=data.get("method_signatures", {}),
+        )
+class RunAnalyzer(ast.NodeVisitor):
+    """AST NodeVisitor that analyzes a class to build a RunProfile.
+    The run analyzer visits the AST nodes of a class to analyze the _run method and build a RunProfile.
+    It will look for the usage of the **kwargs parameter in method calls and subscript expressions.
+    The traversal result is stored as a RunProfile object.
+    Attributes:
+        cls (type): The class to analyze.
+        profile (RunProfile): The profile of the run being analyzed.
+    """
+    def __init__(self, cls):
+        """Initialize the RunAnalyzer with a class.
+        Args:
+            cls (type): The class to analyze.
+        """
+        self.cls = cls
+        self.profile = RunProfile()
+    def visit_Call(self, node: ast.Call) -> None:
+        """Visit a Call node in the AST.
+        This node represents a function call in the source code.
+        Here, we are looking for calls to methods that fully pass the kwargs.
+        Args:
+            node (ast.Call): The Call node to visit.
+        """
+        for keyword in node.keywords:
+            if keyword.arg is None and isinstance(keyword.value, ast.Name) and keyword.value.id == "kwargs":
+                if (
+                    isinstance(node.func, ast.Attribute)
+                    and isinstance(node.func.value, ast.Name)
+                    and node.func.value.id == "self"
+                ):
+                    self.profile.full_pass_methods.append(f"{self.cls.__name__}.{node.func.attr}")
+                else:
+                    func_name = node.func.id if isinstance(node.func, ast.Name) else node.func.attr
+                    self.profile.full_pass_methods.append(func_name)
+        if (
+            isinstance(node.func, ast.Attribute)
+            and isinstance(node.func.value, ast.Name)
+            and node.func.value.id == "kwargs"
+            and node.func.attr == "get"
+        ):
+            if node.args:
+                try:
+                    key = self._get_key(node.args[0])
+                    if key and key != "kwargs":
+                        self.profile.arg_usages.optional.append(key)
+                except ValueError:
+                    self.profile.arg_usages.unresolvable.append(self._expr_to_text(node.args[0]))
+        self.generic_visit(node)
+    def visit_Subscript(self, node: ast.Subscript) -> None:
+        """Visit a Subscript node in the AST.
+        The Subscript node represents a subscripted value in the source code.
+        Example: kwargs["key"]
+        Args:
+            node (ast.Subscript): The Subscript node to visit.
+        """
+        if isinstance(node.value, ast.Name) and node.value.id == "kwargs":
+            if not isinstance(node.ctx, ast.Load):
+                self.generic_visit(node)
+                return
+            try:
+                slice_node = node.slice
+                key = self._get_key(slice_node)
+                if key and key != "kwargs":
+                    self.profile.arg_usages.required.append(key)
+            except ValueError:
+                self.profile.arg_usages.unresolvable.append(self._expr_to_text(slice_node))
+        self.generic_visit(node)
+    def _get_key(self, node_arg: ast.expr | str) -> str:
+        """Get the key from an AST node.
+        Args:
+            node_arg (ast.expr | str): The AST node representing the key.
+        Returns:
+            str: The key as a string.
+        Raises:
+            ValueError: If the key cannot be extracted from the node.
+        """
+        if isinstance(node_arg, str):
+            return node_arg
+        if isinstance(node_arg, ast.Constant):
+            if isinstance(node_arg.value, str):
+                return node_arg.value
+            else:
+                raise ValueError(f"Non-string constant: {node_arg.value}")
+        if isinstance(node_arg, ast.Attribute):
+            raise ValueError(f"Attribute key not resolvable: {node_arg.attr}")
+        if isinstance(node_arg, ast.Name):
+            raise ValueError(f"Variable key not supported: {node_arg.id}")
+        raise ValueError(f"Unsupported AST node type for key extraction: {type(node_arg)}")
+    def _expr_to_text(self, expr: ast.AST) -> str:
+        """Return a readable textual form of an AST expression.
+        On Python 3.11+, ast.unparse is available and sufficient.
+        Args:
+            expr (ast.AST): The AST expression node.
+        Returns:
+            str: Text representation of the expression.
+        """
+        try:
+            return ast.unparse(expr)
+        except Exception:
+            if isinstance(expr, ast.Name):
+                return expr.id
+            if isinstance(expr, ast.Attribute):
+                return expr.attr
+            if isinstance(expr, ast.Constant):
+                return str(expr.value)
+            # Fallback for unknown node types
+            return expr.__class__.__name__
+def analyze_method(cls: type, method: Callable) -> RunProfile:
+    """Analyze a method using RunAnalyzer.
+    This function encapsulates the common analysis logic used by both
+    Component._analyze_run_method() and schema_generator._generate_from_analyzer().
+    Args:
+        cls (type): The class containing the method (for analyzer context).
+        method (Callable): The method to analyze.
+    Returns:
+        RunProfile: The analysis results.
+    """
+    analyzer = RunAnalyzer(cls)
+    source = inspect.getsource(method)
+    tree = ast.parse(textwrap.dedent(source))
+    analyzer.visit(tree)
+    return analyzer.profile

gllm_core/utils/analyzer.pyi ADDED Viewed

@@ -0,0 +1,123 @@
+import ast
+from _typeshed import Incomplete
+from enum import StrEnum
+from pydantic import BaseModel
+from typing import Any, Callable
+class ParameterKind(StrEnum):
+    """Enum representing the different kinds of parameters a method can have."""
+    POSITIONAL_ONLY = 'positional_only'
+    POSITIONAL_OR_KEYWORD = 'positional_or_keyword'
+    VAR_POSITIONAL = 'var_positional'
+    KEYWORD_ONLY = 'keyword_only'
+    VAR_KEYWORD = 'var_keyword'
+class ParameterInfo(BaseModel):
+    """Model representing information about a method parameter.
+    Attributes:
+        kind (ParameterKind): The kind of the parameter.
+        default (str): The default value of the parameter, if any.
+        annotation (str): The type annotation of the parameter, if any.
+    """
+    kind: ParameterKind
+    default: str | None
+    annotation: str | None
+class MethodSignature(BaseModel):
+    """Model representing the signature of a method.
+    Attributes:
+        parameters (dict[str, ParameterInfo]): A dictionary of parameter names to their information.
+        is_async (bool): Whether the method is asynchronous.
+    """
+    parameters: dict[str, ParameterInfo]
+    is_async: bool
+class ArgUsages(BaseModel):
+    """Model representing the different types of argument usage in a run.
+    Attributes:
+        required (list[str]): A list of argument names that are required.
+        optional (list[str]): A list of argument names that are optional.
+        unresolvable (list[str]): A list of unresolvable key patterns encountered during analysis.
+    """
+    required: list[str]
+    optional: list[str]
+    unresolvable: list[str]
+class RunArgumentUsageType(StrEnum):
+    """Enum representing the different types of argument usage in a run."""
+    FULL_PASS = 'full_pass'
+    REQUIRED = 'required'
+    OPTIONAL = 'optional'
+class RunProfile(BaseModel):
+    """Model representing the profile of a run.
+    Attributes:
+        arg_usages (ArgUsages): A dictionary mapping argument usage types to lists of
+            argument names.
+        full_pass_methods (list[str]): A list of method names that fully pass the kwargs.
+        method_signatures (dict[str, MethodSignature]): A dictionary mapping method names to their signatures.
+    """
+    arg_usages: ArgUsages
+    full_pass_methods: list[str]
+    method_signatures: dict[str, MethodSignature]
+    def __init__(self, **data: Any) -> None:
+        """Initialize the RunProfile with the given data.
+        This is to circumvent Pylint false positives due to the usage of Field(default_factory=...).
+        """
+class RunAnalyzer(ast.NodeVisitor):
+    """AST NodeVisitor that analyzes a class to build a RunProfile.
+    The run analyzer visits the AST nodes of a class to analyze the _run method and build a RunProfile.
+    It will look for the usage of the **kwargs parameter in method calls and subscript expressions.
+    The traversal result is stored as a RunProfile object.
+    Attributes:
+        cls (type): The class to analyze.
+        profile (RunProfile): The profile of the run being analyzed.
+    """
+    cls: Incomplete
+    profile: Incomplete
+    def __init__(self, cls) -> None:
+        """Initialize the RunAnalyzer with a class.
+        Args:
+            cls (type): The class to analyze.
+        """
+    def visit_Call(self, node: ast.Call) -> None:
+        """Visit a Call node in the AST.
+        This node represents a function call in the source code.
+        Here, we are looking for calls to methods that fully pass the kwargs.
+        Args:
+            node (ast.Call): The Call node to visit.
+        """
+    def visit_Subscript(self, node: ast.Subscript) -> None:
+        '''Visit a Subscript node in the AST.
+        The Subscript node represents a subscripted value in the source code.
+        Example: kwargs["key"]
+        Args:
+            node (ast.Subscript): The Subscript node to visit.
+        '''
+def analyze_method(cls, method: Callable) -> RunProfile:
+    """Analyze a method using RunAnalyzer.
+    This function encapsulates the common analysis logic used by both
+    Component._analyze_run_method() and schema_generator._generate_from_analyzer().
+    Args:
+        cls (type): The class containing the method (for analyzer context).
+        method (Callable): The method to analyze.
+    Returns:
+        RunProfile: The analysis results.
+    """

gllm_core/utils/binary_handler_factory.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""Binary handler factory for handling binary data in various formats.
+This module provides factory function to create handlers for different types of binary data.
+Authors:
+    Resti Febriana (resti.febriana@gdplabs.id)
+References:
+    NONE
+"""
+import base64
+from enum import StrEnum
+from typing import Callable
+class BinaryHandlingStrategy(StrEnum):
+    """Enum for binary data handling options.
+    Attributes:
+        SKIP (str): Skip binary data.
+        BASE64 (str): Encode binary data to base64.
+        HEX (str): Encode binary data to hex.
+        SHOW_SIZE (str): Show the size of binary data.
+    """
+    SKIP = "skip"
+    BASE64 = "base64"
+    HEX = "hex"
+    SHOW_SIZE = "show_size"
+def binary_handler_factory(handling_type: BinaryHandlingStrategy | str) -> Callable[[bytes], str | None]:
+    """Factory function to create appropriate binary data handler.
+    Args:
+        handling_type (BinaryHandlingStrategy | str): The type of binary handling to use.
+    Returns:
+        Callable[[bytes], str | None]: A function that handles binary data according to specified type.
+    """
+    handling_type = BinaryHandlingStrategy(handling_type)
+    handlers = {
+        BinaryHandlingStrategy.SKIP: skip_handler,
+        BinaryHandlingStrategy.BASE64: base64_handler,
+        BinaryHandlingStrategy.HEX: hex_handler,
+        BinaryHandlingStrategy.SHOW_SIZE: show_size_handler,
+    }
+    return handlers[handling_type]
+def skip_handler(v: bytes) -> None:
+    """Handler for skipping binary data.
+    Args:
+        v (bytes): The binary data to skip.
+    Returns:
+        None: The handler returns None.
+    """
+    return None
+def base64_handler(v: bytes) -> str:
+    """Handler for encoding binary data to base64.
+    Args:
+        v (bytes): The binary data to encode.
+    Returns:
+        str: The raw base64 encoded binary data.
+    """
+    return base64.b64encode(v).decode()
+def hex_handler(v: bytes) -> str:
+    """Handler for encoding binary data to hex.
+    Args:
+        v (bytes): The binary data to encode.
+    Returns:
+        str: The raw hex encoded binary data.
+    """
+    return v.hex()
+def show_size_handler(v: bytes) -> str:
+    """Handler for showing the size of binary data.
+    Args:
+        v (bytes): The binary data to show the size.
+    Returns:
+        str: The size of the binary data.
+    """
+    return f"<Binary Data> (Size: {len(v):,} bytes)"

gllm_core/utils/binary_handler_factory.pyi ADDED Viewed

@@ -0,0 +1,62 @@
+from enum import StrEnum
+from typing import Callable
+class BinaryHandlingStrategy(StrEnum):
+    """Enum for binary data handling options.
+    Attributes:
+        SKIP (str): Skip binary data.
+        BASE64 (str): Encode binary data to base64.
+        HEX (str): Encode binary data to hex.
+        SHOW_SIZE (str): Show the size of binary data.
+    """
+    SKIP = 'skip'
+    BASE64 = 'base64'
+    HEX = 'hex'
+    SHOW_SIZE = 'show_size'
+def binary_handler_factory(handling_type: BinaryHandlingStrategy | str) -> Callable[[bytes], str | None]:
+    """Factory function to create appropriate binary data handler.
+    Args:
+        handling_type (BinaryHandlingStrategy | str): The type of binary handling to use.
+    Returns:
+        Callable[[bytes], str | None]: A function that handles binary data according to specified type.
+    """
+def skip_handler(v: bytes) -> None:
+    """Handler for skipping binary data.
+    Args:
+        v (bytes): The binary data to skip.
+    Returns:
+        None: The handler returns None.
+    """
+def base64_handler(v: bytes) -> str:
+    """Handler for encoding binary data to base64.
+    Args:
+        v (bytes): The binary data to encode.
+    Returns:
+        str: The raw base64 encoded binary data.
+    """
+def hex_handler(v: bytes) -> str:
+    """Handler for encoding binary data to hex.
+    Args:
+        v (bytes): The binary data to encode.
+    Returns:
+        str: The raw hex encoded binary data.
+    """
+def show_size_handler(v: bytes) -> str:
+    """Handler for showing the size of binary data.
+    Args:
+        v (bytes): The binary data to show the size.
+    Returns:
+        str: The size of the binary data.
+    """

gllm_core/utils/chunk_metadata_merger.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Defines a helper class to merge metadata from multiple chunks.
+Authors:
+    Henry Wicaksono (henry.wicaksono@gdplabs.id)
+References:
+    NONE
+"""
+from typing import Any, Callable
+from gllm_core.constants import DefaultChunkMetadata
+from gllm_core.utils.merger_method import MergerMethod
+class ChunkMetadataMerger:
+    """A helper class to merge metadata from multiple chunks.
+    Attributes:
+        merger_func_map (dict[str, Callable[[list[Any]], Any]]): A mapping of metadata keys to merger functions.
+        default_merger_func (Callable[[list[Any]], Any]): The default merger function for metadata keys that are not
+            present in the merger_func_map.
+        retained_keys (set[str] | None): The keys that should be retained in the merged metadata.
+            If None, all intersection keys are retained.
+    """
+    def __init__(
+        self,
+        merger_func_map: dict[str, Callable[[list[Any]], Any]] | None = None,
+        default_merger_func: Callable[[list[Any]], Any] | None = None,
+        retained_keys: set[str] | None = None,
+    ):
+        """Initializes a new instance of the ChunkMetadataMerger class.
+        Args:
+            merger_func_map (dict[str, Callable[[list[Any]], Any]] | None, optional): A mapping of metadata keys to
+                merger functions. Defaults to None, in which case a default merger map is used. The default merger map:
+                1. Picks the first value of the PREV_CHUNK_ID key.
+                2. Picks the last value of the NEXT_CHUNK_ID key.
+            default_merger_func (Callable[[list[Any]], Any] | None, optional): The default merger for metadata keys that
+                are not present in the merger_func_map. Defaults to None, in which case a default merger that picks the
+                first value is used.
+            retained_keys (set[str] | None, optional): The keys that should be retained in the merged metadata. Defaults
+                to None, in which case all intersection keys are retained.
+        """
+        self.merger_map = merger_func_map or {
+            DefaultChunkMetadata.PREV_CHUNK_ID: MergerMethod.pick_first,
+            DefaultChunkMetadata.NEXT_CHUNK_ID: MergerMethod.pick_last,
+        }
+        self.default_merger = default_merger_func or MergerMethod.pick_first
+        self.retained_keys = retained_keys
+    def merge(self, metadatas: list[dict[str, Any]]) -> dict[str, Any]:
+        """Merges metadata from multiple chunks.
+        Args:
+            metadatas (list[dict[str, Any]]): The metadata to merge.
+        Returns:
+            dict[str, Any]: The merged metadata.
+        """
+        if not metadatas:
+            return {}
+        metadata_keys = self._get_retained_keys(metadatas)
+        merged_metadata = {}
+        for key in metadata_keys:
+            metadata_values = [metadata[key] for metadata in metadatas]
+            if key in self.merger_map:
+                merged_value = self.merger_map[key](metadata_values)
+            else:
+                merged_value = self.default_merger(metadata_values)
+            merged_metadata[key] = merged_value
+        return merged_metadata
+    def _get_retained_keys(self, metadatas: list[dict[str, Any]]) -> set[str]:
+        """Gets the set of retained keys.
+        This method ensures that the retained keys are present in all metadata dictionaries.
+        Args:
+            metadatas (list[dict[str, Any]]): The metadata to parse.
+        Returns:
+            set[str]: The set of retained keys.
+        Raises:
+            ValueError: If any of the retained keys are missing from any of the metadata dictionaries.
+        """
+        common_keys = set.intersection(*(set(metadata.keys()) for metadata in metadatas))
+        retained_keys = self.retained_keys or common_keys
+        missing_keys = retained_keys - common_keys
+        if missing_keys:
+            raise ValueError(f"The following keys are missing in the metadata: {missing_keys}")
+        return retained_keys

gllm_core/utils/chunk_metadata_merger.pyi ADDED Viewed

@@ -0,0 +1,41 @@
+from _typeshed import Incomplete
+from gllm_core.constants import DefaultChunkMetadata as DefaultChunkMetadata
+from gllm_core.utils.merger_method import MergerMethod as MergerMethod
+from typing import Any, Callable
+class ChunkMetadataMerger:
+    """A helper class to merge metadata from multiple chunks.
+    Attributes:
+        merger_func_map (dict[str, Callable[[list[Any]], Any]]): A mapping of metadata keys to merger functions.
+        default_merger_func (Callable[[list[Any]], Any]): The default merger function for metadata keys that are not
+            present in the merger_func_map.
+        retained_keys (set[str] | None): The keys that should be retained in the merged metadata.
+            If None, all intersection keys are retained.
+    """
+    merger_map: Incomplete
+    default_merger: Incomplete
+    retained_keys: Incomplete
+    def __init__(self, merger_func_map: dict[str, Callable[[list[Any]], Any]] | None = None, default_merger_func: Callable[[list[Any]], Any] | None = None, retained_keys: set[str] | None = None) -> None:
+        """Initializes a new instance of the ChunkMetadataMerger class.
+        Args:
+            merger_func_map (dict[str, Callable[[list[Any]], Any]] | None, optional): A mapping of metadata keys to
+                merger functions. Defaults to None, in which case a default merger map is used. The default merger map:
+                1. Picks the first value of the PREV_CHUNK_ID key.
+                2. Picks the last value of the NEXT_CHUNK_ID key.
+            default_merger_func (Callable[[list[Any]], Any] | None, optional): The default merger for metadata keys that
+                are not present in the merger_func_map. Defaults to None, in which case a default merger that picks the
+                first value is used.
+            retained_keys (set[str] | None, optional): The keys that should be retained in the merged metadata. Defaults
+                to None, in which case all intersection keys are retained.
+        """
+    def merge(self, metadatas: list[dict[str, Any]]) -> dict[str, Any]:
+        """Merges metadata from multiple chunks.
+        Args:
+            metadatas (list[dict[str, Any]]): The metadata to merge.
+        Returns:
+            dict[str, Any]: The merged metadata.
+        """