gllm-core-binary 0.4.4__py3-none-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. gllm_core/__init__.py +1 -0
  2. gllm_core/__init__.pyi +0 -0
  3. gllm_core/adapters/__init__.py +5 -0
  4. gllm_core/adapters/__init__.pyi +3 -0
  5. gllm_core/adapters/tool/__init__.py +6 -0
  6. gllm_core/adapters/tool/__init__.pyi +4 -0
  7. gllm_core/adapters/tool/google_adk.py +91 -0
  8. gllm_core/adapters/tool/google_adk.pyi +23 -0
  9. gllm_core/adapters/tool/langchain.py +130 -0
  10. gllm_core/adapters/tool/langchain.pyi +31 -0
  11. gllm_core/constants.py +55 -0
  12. gllm_core/constants.pyi +36 -0
  13. gllm_core/event/__init__.py +6 -0
  14. gllm_core/event/__init__.pyi +4 -0
  15. gllm_core/event/event_emitter.py +211 -0
  16. gllm_core/event/event_emitter.pyi +155 -0
  17. gllm_core/event/handler/__init__.py +7 -0
  18. gllm_core/event/handler/__init__.pyi +5 -0
  19. gllm_core/event/handler/console_event_handler.py +48 -0
  20. gllm_core/event/handler/console_event_handler.pyi +32 -0
  21. gllm_core/event/handler/event_handler.py +89 -0
  22. gllm_core/event/handler/event_handler.pyi +51 -0
  23. gllm_core/event/handler/print_event_handler.py +130 -0
  24. gllm_core/event/handler/print_event_handler.pyi +33 -0
  25. gllm_core/event/handler/stream_event_handler.py +85 -0
  26. gllm_core/event/handler/stream_event_handler.pyi +62 -0
  27. gllm_core/event/hook/__init__.py +5 -0
  28. gllm_core/event/hook/__init__.pyi +3 -0
  29. gllm_core/event/hook/event_hook.py +30 -0
  30. gllm_core/event/hook/event_hook.pyi +18 -0
  31. gllm_core/event/hook/json_stringify_event_hook.py +32 -0
  32. gllm_core/event/hook/json_stringify_event_hook.pyi +16 -0
  33. gllm_core/event/messenger.py +133 -0
  34. gllm_core/event/messenger.pyi +66 -0
  35. gllm_core/schema/__init__.py +8 -0
  36. gllm_core/schema/__init__.pyi +6 -0
  37. gllm_core/schema/chunk.py +148 -0
  38. gllm_core/schema/chunk.pyi +66 -0
  39. gllm_core/schema/component.py +546 -0
  40. gllm_core/schema/component.pyi +205 -0
  41. gllm_core/schema/event.py +50 -0
  42. gllm_core/schema/event.pyi +33 -0
  43. gllm_core/schema/schema_generator.py +150 -0
  44. gllm_core/schema/schema_generator.pyi +35 -0
  45. gllm_core/schema/tool.py +418 -0
  46. gllm_core/schema/tool.pyi +198 -0
  47. gllm_core/utils/__init__.py +32 -0
  48. gllm_core/utils/__init__.pyi +13 -0
  49. gllm_core/utils/analyzer.py +256 -0
  50. gllm_core/utils/analyzer.pyi +123 -0
  51. gllm_core/utils/binary_handler_factory.py +99 -0
  52. gllm_core/utils/binary_handler_factory.pyi +62 -0
  53. gllm_core/utils/chunk_metadata_merger.py +102 -0
  54. gllm_core/utils/chunk_metadata_merger.pyi +41 -0
  55. gllm_core/utils/concurrency.py +184 -0
  56. gllm_core/utils/concurrency.pyi +94 -0
  57. gllm_core/utils/event_formatter.py +69 -0
  58. gllm_core/utils/event_formatter.pyi +30 -0
  59. gllm_core/utils/google_sheets.py +115 -0
  60. gllm_core/utils/google_sheets.pyi +18 -0
  61. gllm_core/utils/imports.py +91 -0
  62. gllm_core/utils/imports.pyi +42 -0
  63. gllm_core/utils/logger_manager.py +339 -0
  64. gllm_core/utils/logger_manager.pyi +176 -0
  65. gllm_core/utils/main_method_resolver.py +185 -0
  66. gllm_core/utils/main_method_resolver.pyi +54 -0
  67. gllm_core/utils/merger_method.py +130 -0
  68. gllm_core/utils/merger_method.pyi +49 -0
  69. gllm_core/utils/retry.py +258 -0
  70. gllm_core/utils/retry.pyi +41 -0
  71. gllm_core/utils/similarity.py +29 -0
  72. gllm_core/utils/similarity.pyi +10 -0
  73. gllm_core/utils/validation.py +26 -0
  74. gllm_core/utils/validation.pyi +12 -0
  75. gllm_core_binary-0.4.4.dist-info/METADATA +177 -0
  76. gllm_core_binary-0.4.4.dist-info/RECORD +78 -0
  77. gllm_core_binary-0.4.4.dist-info/WHEEL +5 -0
  78. gllm_core_binary-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,62 @@
1
+ import asyncio
2
+ from _typeshed import Incomplete
3
+ from gllm_core.event.handler.event_handler import BaseEventHandler as BaseEventHandler
4
+ from gllm_core.schema import Event as Event
5
+ from typing import AsyncGenerator
6
+
7
+ class StreamEventHandler(BaseEventHandler):
8
+ """A class that manages an asynchronous stream of data using a queue.
9
+
10
+ The StreamEventHandler class provides methods to manage an asynchronous stream, allowing data to be sent and
11
+ retrieved in a non-blocking manner. The stream method yields items from the queue, and the emit method adds
12
+ items to the queue. The stream can be closed by calling the close method, which ensures no further items
13
+ are processed.
14
+
15
+ Attributes:
16
+ name (str): The name assigned to the event handler.
17
+ color_map (dict[str, str]): The dictionary that maps certain event types to their
18
+ corresponding colors in Rich format.
19
+ queue (asyncio.Queue): The queue used to manage an asynchronous stream.
20
+ """
21
+ queue: asyncio.Queue
22
+ stream_delay: Incomplete
23
+ def __init__(self, name: str | None = None, stream_delay: float = 0.001) -> None:
24
+ """Initializes a new instance of the StreamEventHandler class.
25
+
26
+ Args:
27
+ name (str | None, optional): The name assigned to the event handler. Defaults to None,
28
+ in which case the class name will be used.
29
+ stream_delay (float, optional): The delay duration after each data stream. Needed in order for the stream
30
+ manager to process the data stream properly. Defaults to 0.001.
31
+ """
32
+ async def emit(self, event: Event) -> None:
33
+ """Emits the given event by sending it to the client via an asynchronous queue.
34
+
35
+ This method serializes the event to a JSON and sends it to the client by adding it to an asynchronous
36
+ queue. It also introduces a delay specified by `stream_delay` to make sure that the stream data can
37
+ be processed properly.
38
+
39
+ Args:
40
+ event (Event): The event to be emitted.
41
+
42
+ Returns:
43
+ None
44
+ """
45
+ async def stream(self) -> AsyncGenerator:
46
+ """Asynchronously yields items from the queue until a StopIteration item is encountered.
47
+
48
+ This method continuously retrieves items from the queue and yields them. The iteration stops when a
49
+ StopIteration item is encountered, at which point the method returns.
50
+
51
+ Returns:
52
+ AsyncGenerator: An asynchronous generator yielding items from the queue.
53
+ """
54
+ async def close(self) -> None:
55
+ """Immediately stops the stream by placing a StopIteration item in the queue.
56
+
57
+ This method inserts a StopIteration item into the queue without waiting, which signals the stream to stop
58
+ processing further items.
59
+
60
+ Returns:
61
+ None
62
+ """
@@ -0,0 +1,5 @@
1
+ """Defines the event hooks module used throughout the Gen AI applications."""
2
+
3
+ from gllm_core.event.hook.json_stringify_event_hook import JSONStringifyEventHook
4
+
5
+ __all__ = ["JSONStringifyEventHook"]
@@ -0,0 +1,3 @@
1
+ from gllm_core.event.hook.json_stringify_event_hook import JSONStringifyEventHook as JSONStringifyEventHook
2
+
3
+ __all__ = ['JSONStringifyEventHook']
@@ -0,0 +1,30 @@
1
+ """Defines an interface for event hooks.
2
+
3
+ Authors:
4
+ Henry Wicaksono (henry.wicaksono@gdplabs.id)
5
+
6
+ References:
7
+ NONE
8
+ """
9
+
10
+ from abc import ABC, abstractmethod
11
+
12
+ from gllm_core.schema import Event
13
+
14
+
15
+ class BaseEventHook(ABC):
16
+ """An abstract base class for all event hooks."""
17
+
18
+ @abstractmethod
19
+ async def __call__(self, event: Event) -> Event:
20
+ """Applies the hook to the event.
21
+
22
+ This abstract method must be implemented by subclasses to define how the hook is applied to the event.
23
+
24
+ Args:
25
+ event (Event): The event to apply the hook to.
26
+
27
+ Returns:
28
+ Event: The event after the hook is applied.
29
+ """
30
+ raise NotImplementedError
@@ -0,0 +1,18 @@
1
+ import abc
2
+ from abc import ABC, abstractmethod
3
+ from gllm_core.schema import Event as Event
4
+
5
+ class BaseEventHook(ABC, metaclass=abc.ABCMeta):
6
+ """An abstract base class for all event hooks."""
7
+ @abstractmethod
8
+ async def __call__(self, event: Event) -> Event:
9
+ """Applies the hook to the event.
10
+
11
+ This abstract method must be implemented by subclasses to define how the hook is applied to the event.
12
+
13
+ Args:
14
+ event (Event): The event to apply the hook to.
15
+
16
+ Returns:
17
+ Event: The event after the hook is applied.
18
+ """
@@ -0,0 +1,32 @@
1
+ """Defines an event hook to JSON stringify the event value.
2
+
3
+ Authors:
4
+ Henry Wicaksono (henry.wicaksono@gdplabs.id)
5
+
6
+ References:
7
+ NONE
8
+ """
9
+
10
+ import json
11
+
12
+ from gllm_core.event.hook.event_hook import BaseEventHook
13
+ from gllm_core.schema import Event
14
+
15
+
16
+ class JSONStringifyEventHook(BaseEventHook):
17
+ """An event hook to JSON stringify the event value."""
18
+
19
+ async def __call__(self, event: Event) -> Event:
20
+ """Applies the hook to the event.
21
+
22
+ This method will convert the event value to a JSON string if it is a dictionary.
23
+
24
+ Args:
25
+ event (Event): The event to apply the hook to.
26
+
27
+ Returns:
28
+ Event: The event after the hook is applied.
29
+ """
30
+ if isinstance(event.value, dict):
31
+ event.value = json.dumps(event.value)
32
+ return event
@@ -0,0 +1,16 @@
1
+ from gllm_core.event.hook.event_hook import BaseEventHook as BaseEventHook
2
+ from gllm_core.schema import Event as Event
3
+
4
+ class JSONStringifyEventHook(BaseEventHook):
5
+ """An event hook to JSON stringify the event value."""
6
+ async def __call__(self, event: Event) -> Event:
7
+ """Applies the hook to the event.
8
+
9
+ This method will convert the event value to a JSON string if it is a dictionary.
10
+
11
+ Args:
12
+ event (Event): The event to apply the hook to.
13
+
14
+ Returns:
15
+ Event: The event after the hook is applied.
16
+ """
@@ -0,0 +1,133 @@
1
+ """Defines a component used for custom event messaging in Gen AI applications pipelines.
2
+
3
+ Authors:
4
+ Henry Wicaksono (henry.wicaksono@gdplabs.id)
5
+
6
+ References:
7
+ NONE
8
+ """
9
+
10
+ from typing import Any
11
+
12
+ from gllm_core.event import EventEmitter
13
+ from gllm_core.schema import Component
14
+ from gllm_core.utils import get_placeholder_keys
15
+
16
+
17
+ class Messenger(Component):
18
+ """Emits a custom event message with possible access to the state variables.
19
+
20
+ This component acts as an intermediary step, designed to be placed between other pipeline steps.
21
+ It allows for event messaging operations to be performed outside individual components but still within
22
+ the context of the pipeline execution.
23
+
24
+ Attributes:
25
+ message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
26
+ is_template (bool): Whether the message is a template that can be injected with state variables.
27
+ Defaults to True.
28
+ variable_keys (list[str]): The keys of the message that can be injected with state variables.
29
+ Only used if `is_template` is set to True.
30
+
31
+ Plain string message example:
32
+ ```python
33
+ event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
34
+ kwargs = {"event_emitter": event_emitter}
35
+
36
+ messenger = Messenger("Executing component.", is_template=False)
37
+ await messenger.run(**kwargs)
38
+ ```
39
+
40
+ Template message example:
41
+ ```python
42
+ event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
43
+ state_variables = {"query": "Hi!", "top_k": 10}
44
+ kwargs = {"event_emitter": event_emitter, "state_variables": state_variables}
45
+
46
+ messenger = Messenger("Executing component for query {query} and top k {top_k}.")
47
+ await messenger.run(**kwargs)
48
+ ```
49
+ """
50
+
51
+ def __init__(self, message: str, is_template: bool = True):
52
+ """Initializes a new instance of the Messenger class.
53
+
54
+ Args:
55
+ message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
56
+ is_template (bool, optional): Whether the message is a template that can be injected with state variables.
57
+ Defaults to True.
58
+
59
+ Raises:
60
+ ValueError: If the keys of the message does not match the provided keys.
61
+ """
62
+ self.message = message
63
+ self.is_template = is_template
64
+ self.variable_keys = get_placeholder_keys(message) if is_template else []
65
+
66
+ async def _run(self, **kwargs: Any) -> None:
67
+ """Executes the messaging operation.
68
+
69
+ This method validates the provided kwargs to make sure it contains the necessary keys and values, then
70
+ calls the `send_message` method to emit the message.
71
+
72
+ Args:
73
+ **kwargs (Any): A dictionary of arguments for the event process, must include `event_emitter` and may
74
+ optionally include `state_variables`, and `emit_kwargs`.
75
+
76
+ Raises:
77
+ KeyError: If the kwargs is missing the `event_emitter` key.
78
+ """
79
+ if "event_emitter" not in kwargs or not isinstance(kwargs["event_emitter"], EventEmitter):
80
+ raise KeyError("The input kwargs must include an `event_emitter` key with an EventEmitter instance.")
81
+
82
+ return await self.send_message(
83
+ kwargs["event_emitter"],
84
+ state_variables=kwargs.get("state_variables"),
85
+ emit_kwargs=kwargs.get("emit_kwargs"),
86
+ )
87
+
88
+ async def send_message(
89
+ self,
90
+ event_emitter: EventEmitter,
91
+ state_variables: dict[str, Any] | None = None,
92
+ emit_kwargs: dict[str, Any] | None = None,
93
+ ) -> None:
94
+ """Emits the message to the event emitter.
95
+
96
+ This method validates the variables, formats the message if required, and then emits the message using the
97
+ event emitter.
98
+
99
+ Args:
100
+ event_emitter (EventEmitter): The event emitter instance to emit the message.
101
+ state_variables (dict[str, Any] | None, optional): The state variables to be injected into the message
102
+ placeholders. Can only be provided if `is_template` is set to True. Defaults to None.
103
+ emit_kwargs (dict[str, Any] | None, optional): The keyword arguments to be passed to the event emitter's
104
+ emit method. Defaults to None.
105
+ """
106
+ state_variables = state_variables or {}
107
+ emit_kwargs = emit_kwargs or {}
108
+ formatted_message = self.message
109
+
110
+ if self.is_template:
111
+ self._validate_variables(state_variables)
112
+ message_kwargs = {key: state_variables[key] for key in self.variable_keys}
113
+ formatted_message = formatted_message.format(**message_kwargs)
114
+ elif state_variables:
115
+ raise ValueError("State variables can only be provided if `is_template` is set to True.")
116
+
117
+ await event_emitter.emit(formatted_message, **emit_kwargs)
118
+
119
+ def _validate_variables(self, variables: dict[str, Any]) -> None:
120
+ """Validates the variables to ensure there are no missing keys.
121
+
122
+ This method checks if the provided variables are missing any of the expected keys. If so, it raises a
123
+ `ValueError`.
124
+
125
+ Args:
126
+ variables (dict[str, Any]): The variables to be validated.
127
+
128
+ Raises:
129
+ ValueError: If the variables are missing the expected keys.
130
+ """
131
+ missing_keys = set(self.variable_keys) - set(variables.keys())
132
+ if missing_keys:
133
+ raise ValueError(f"The following keys are missing in the variables: {missing_keys}")
@@ -0,0 +1,66 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.event import EventEmitter as EventEmitter
3
+ from gllm_core.schema import Component as Component
4
+ from gllm_core.utils import get_placeholder_keys as get_placeholder_keys
5
+ from typing import Any
6
+
7
+ class Messenger(Component):
8
+ '''Emits a custom event message with possible access to the state variables.
9
+
10
+ This component acts as an intermediary step, designed to be placed between other pipeline steps.
11
+ It allows for event messaging operations to be performed outside individual components but still within
12
+ the context of the pipeline execution.
13
+
14
+ Attributes:
15
+ message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
16
+ is_template (bool): Whether the message is a template that can be injected with state variables.
17
+ Defaults to True.
18
+ variable_keys (list[str]): The keys of the message that can be injected with state variables.
19
+ Only used if `is_template` is set to True.
20
+
21
+ Plain string message example:
22
+ ```python
23
+ event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
24
+ kwargs = {"event_emitter": event_emitter}
25
+
26
+ messenger = Messenger("Executing component.", is_template=False)
27
+ await messenger.run(**kwargs)
28
+ ```
29
+
30
+ Template message example:
31
+ ```python
32
+ event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
33
+ state_variables = {"query": "Hi!", "top_k": 10}
34
+ kwargs = {"event_emitter": event_emitter, "state_variables": state_variables}
35
+
36
+ messenger = Messenger("Executing component for query {query} and top k {top_k}.")
37
+ await messenger.run(**kwargs)
38
+ ```
39
+ '''
40
+ message: Incomplete
41
+ is_template: Incomplete
42
+ variable_keys: Incomplete
43
+ def __init__(self, message: str, is_template: bool = True) -> None:
44
+ """Initializes a new instance of the Messenger class.
45
+
46
+ Args:
47
+ message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
48
+ is_template (bool, optional): Whether the message is a template that can be injected with state variables.
49
+ Defaults to True.
50
+
51
+ Raises:
52
+ ValueError: If the keys of the message does not match the provided keys.
53
+ """
54
+ async def send_message(self, event_emitter: EventEmitter, state_variables: dict[str, Any] | None = None, emit_kwargs: dict[str, Any] | None = None) -> None:
55
+ """Emits the message to the event emitter.
56
+
57
+ This method validates the variables, formats the message if required, and then emits the message using the
58
+ event emitter.
59
+
60
+ Args:
61
+ event_emitter (EventEmitter): The event emitter instance to emit the message.
62
+ state_variables (dict[str, Any] | None, optional): The state variables to be injected into the message
63
+ placeholders. Can only be provided if `is_template` is set to True. Defaults to None.
64
+ emit_kwargs (dict[str, Any] | None, optional): The keyword arguments to be passed to the event emitter's
65
+ emit method. Defaults to None.
66
+ """
@@ -0,0 +1,8 @@
1
+ """Modules concerning the schemas used throughout the Gen AI applications."""
2
+
3
+ from gllm_core.schema.chunk import Chunk
4
+ from gllm_core.schema.component import Component, main
5
+ from gllm_core.schema.event import Event
6
+ from gllm_core.schema.tool import Tool, tool
7
+
8
+ __all__ = ["Chunk", "Component", "Event", "Tool", "main", "tool"]
@@ -0,0 +1,6 @@
1
+ from gllm_core.schema.chunk import Chunk as Chunk
2
+ from gllm_core.schema.component import Component as Component, main as main
3
+ from gllm_core.schema.event import Event as Event
4
+ from gllm_core.schema.tool import Tool as Tool, tool as tool
5
+
6
+ __all__ = ['Chunk', 'Component', 'Event', 'Tool', 'main', 'tool']
@@ -0,0 +1,148 @@
1
+ """Defines the Chunk schema, which represents a chunk of content retrieved from a vector store.
2
+
3
+ Authors:
4
+ Dimitrij Ray (dimitrij.ray@gdplabs.id)
5
+
6
+ References:
7
+ NONE
8
+ """
9
+
10
+ from typing import Any, Generic, Iterable, TypeVar
11
+ from uuid import uuid4
12
+
13
+ from pydantic import BaseModel, Field, field_validator
14
+
15
+ MAX_PREVIEW_LENGTH = 50
16
+ MAX_ITEMS_PREVIEW = 3
17
+
18
+ T = TypeVar("T")
19
+
20
+
21
+ class _TruncatedIterable(Generic[T]):
22
+ """Represents a truncated iterable with first and last elements visible.
23
+
24
+ Attributes:
25
+ items (Iterable[T]): The iterable to be truncated.
26
+ max_items_preview (int): Maximum number of items to show before truncation.
27
+ """
28
+
29
+ def __init__(self, items: Iterable[T], max_items_preview: int = MAX_ITEMS_PREVIEW) -> None:
30
+ """Initialize a TruncatedIterable.
31
+
32
+ Args:
33
+ items (Iterable[T]): The iterable to be truncated.
34
+ max_items_preview (int, optional): Maximum number of items to show before truncation.
35
+ Defaults to MAX_ITEMS_PREVIEW.
36
+ """
37
+ self.items = list(items)
38
+ self.max_items_preview = max_items_preview
39
+
40
+ def __repr__(self) -> str:
41
+ """Return a string representation of the truncated iterable.
42
+
43
+ Returns:
44
+ str: The string representation in the format [first, ..., last] if truncated,
45
+ or [item1, item2, ...] if not truncated.
46
+ """
47
+ if len(self.items) <= self.max_items_preview:
48
+ return str(self.items)
49
+
50
+ def format_element(elem: Any) -> str:
51
+ return f"{elem!r}"
52
+
53
+ first_item = format_element(self.items[0])
54
+ last_item = format_element(self.items[-1])
55
+ return f"[{first_item}, ..., {last_item}]"
56
+
57
+
58
+ class Chunk(BaseModel, arbitrary_types_allowed=True):
59
+ """Represents a chunk of content retrieved from a vector store.
60
+
61
+ Attributes:
62
+ id (str): A unique identifier for the chunk. Defaults to a random UUID.
63
+ content (str | bytes): The content of the chunk, either text or binary.
64
+ metadata (dict[str, Any]): Additional metadata associated with the chunk. Defaults to an empty dictionary.
65
+ score (float | None): Similarity score of the chunk (if available). Defaults to None.
66
+ """
67
+
68
+ id: str = Field(default_factory=lambda: str(uuid4()))
69
+ content: str | bytes
70
+ metadata: dict[str, Any] = Field(default_factory=dict)
71
+ score: float | None = None
72
+
73
+ @field_validator("content")
74
+ @classmethod
75
+ def validate_content(cls, value: str | bytes) -> str | bytes:
76
+ """Validate the content of the Chunk.
77
+
78
+ This is a class method required by Pydantic validators. As such, it follows its signature and conventions.
79
+
80
+ Args:
81
+ value (str | bytes): The content to validate.
82
+
83
+ Returns:
84
+ str | bytes: The validated content.
85
+
86
+ Raises:
87
+ ValueError: If the content is empty or not a string or bytes.
88
+ """
89
+ if not value:
90
+ raise ValueError("Content must not be empty")
91
+ if not isinstance(value, (str, bytes)):
92
+ raise ValueError("Content must be either str or bytes")
93
+ return value
94
+
95
+ def is_text(self) -> bool:
96
+ """Check if the content is text.
97
+
98
+ Returns:
99
+ bool: True if the content is text, False otherwise.
100
+ """
101
+ return isinstance(self.content, str)
102
+
103
+ def is_binary(self) -> bool:
104
+ """Check if the content is binary.
105
+
106
+ Returns:
107
+ bool: True if the content is binary, False otherwise.
108
+ """
109
+ return isinstance(self.content, bytes)
110
+
111
+ def _format_value(self, value: Any) -> Any:
112
+ """Format a value for string representation.
113
+
114
+ Args:
115
+ value (Any): The value to format.
116
+
117
+ Returns:
118
+ Any: The formatted value.
119
+ """
120
+ if isinstance(value, str):
121
+ return f"{value[:MAX_PREVIEW_LENGTH]}{'...' if len(value) > MAX_PREVIEW_LENGTH else ''}"
122
+
123
+ if isinstance(value, bytes):
124
+ return "(Binary content)"
125
+
126
+ if isinstance(value, (list, tuple)):
127
+ return _TruncatedIterable(value)
128
+
129
+ if isinstance(value, dict):
130
+ return {k: self._format_value(v) for k, v in value.items()}
131
+
132
+ return value
133
+
134
+ def __repr__(self) -> str:
135
+ """Return a string representation of the Chunk.
136
+
137
+ Returns:
138
+ str: The string representation of the Chunk.
139
+ """
140
+ content_preview = self._format_value(self.content)
141
+ formatted_metadata = self._format_value(self.metadata)
142
+
143
+ return (
144
+ f"Chunk(id={self.id}, "
145
+ f"content={content_preview}, "
146
+ f"metadata={formatted_metadata}, "
147
+ f"score={self.score})"
148
+ )
@@ -0,0 +1,66 @@
1
+ from _typeshed import Incomplete
2
+ from pydantic import BaseModel
3
+ from typing import Any, Generic, Iterable, TypeVar
4
+
5
+ MAX_PREVIEW_LENGTH: int
6
+ MAX_ITEMS_PREVIEW: int
7
+ T = TypeVar('T')
8
+
9
+ class _TruncatedIterable(Generic[T]):
10
+ """Represents a truncated iterable with first and last elements visible.
11
+
12
+ Attributes:
13
+ items (Iterable[T]): The iterable to be truncated.
14
+ max_items_preview (int): Maximum number of items to show before truncation.
15
+ """
16
+ items: Incomplete
17
+ max_items_preview: Incomplete
18
+ def __init__(self, items: Iterable[T], max_items_preview: int = ...) -> None:
19
+ """Initialize a TruncatedIterable.
20
+
21
+ Args:
22
+ items (Iterable[T]): The iterable to be truncated.
23
+ max_items_preview (int, optional): Maximum number of items to show before truncation.
24
+ Defaults to MAX_ITEMS_PREVIEW.
25
+ """
26
+
27
+ class Chunk(BaseModel, arbitrary_types_allowed=True):
28
+ """Represents a chunk of content retrieved from a vector store.
29
+
30
+ Attributes:
31
+ id (str): A unique identifier for the chunk. Defaults to a random UUID.
32
+ content (str | bytes): The content of the chunk, either text or binary.
33
+ metadata (dict[str, Any]): Additional metadata associated with the chunk. Defaults to an empty dictionary.
34
+ score (float | None): Similarity score of the chunk (if available). Defaults to None.
35
+ """
36
+ id: str
37
+ content: str | bytes
38
+ metadata: dict[str, Any]
39
+ score: float | None
40
+ @classmethod
41
+ def validate_content(cls, value: str | bytes) -> str | bytes:
42
+ """Validate the content of the Chunk.
43
+
44
+ This is a class method required by Pydantic validators. As such, it follows its signature and conventions.
45
+
46
+ Args:
47
+ value (str | bytes): The content to validate.
48
+
49
+ Returns:
50
+ str | bytes: The validated content.
51
+
52
+ Raises:
53
+ ValueError: If the content is empty or not a string or bytes.
54
+ """
55
+ def is_text(self) -> bool:
56
+ """Check if the content is text.
57
+
58
+ Returns:
59
+ bool: True if the content is text, False otherwise.
60
+ """
61
+ def is_binary(self) -> bool:
62
+ """Check if the content is binary.
63
+
64
+ Returns:
65
+ bool: True if the content is binary, False otherwise.
66
+ """