gllm-core-binary 0.4.4__py3-none-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gllm_core/__init__.py +1 -0
- gllm_core/__init__.pyi +0 -0
- gllm_core/adapters/__init__.py +5 -0
- gllm_core/adapters/__init__.pyi +3 -0
- gllm_core/adapters/tool/__init__.py +6 -0
- gllm_core/adapters/tool/__init__.pyi +4 -0
- gllm_core/adapters/tool/google_adk.py +91 -0
- gllm_core/adapters/tool/google_adk.pyi +23 -0
- gllm_core/adapters/tool/langchain.py +130 -0
- gllm_core/adapters/tool/langchain.pyi +31 -0
- gllm_core/constants.py +55 -0
- gllm_core/constants.pyi +36 -0
- gllm_core/event/__init__.py +6 -0
- gllm_core/event/__init__.pyi +4 -0
- gllm_core/event/event_emitter.py +211 -0
- gllm_core/event/event_emitter.pyi +155 -0
- gllm_core/event/handler/__init__.py +7 -0
- gllm_core/event/handler/__init__.pyi +5 -0
- gllm_core/event/handler/console_event_handler.py +48 -0
- gllm_core/event/handler/console_event_handler.pyi +32 -0
- gllm_core/event/handler/event_handler.py +89 -0
- gllm_core/event/handler/event_handler.pyi +51 -0
- gllm_core/event/handler/print_event_handler.py +130 -0
- gllm_core/event/handler/print_event_handler.pyi +33 -0
- gllm_core/event/handler/stream_event_handler.py +85 -0
- gllm_core/event/handler/stream_event_handler.pyi +62 -0
- gllm_core/event/hook/__init__.py +5 -0
- gllm_core/event/hook/__init__.pyi +3 -0
- gllm_core/event/hook/event_hook.py +30 -0
- gllm_core/event/hook/event_hook.pyi +18 -0
- gllm_core/event/hook/json_stringify_event_hook.py +32 -0
- gllm_core/event/hook/json_stringify_event_hook.pyi +16 -0
- gllm_core/event/messenger.py +133 -0
- gllm_core/event/messenger.pyi +66 -0
- gllm_core/schema/__init__.py +8 -0
- gllm_core/schema/__init__.pyi +6 -0
- gllm_core/schema/chunk.py +148 -0
- gllm_core/schema/chunk.pyi +66 -0
- gllm_core/schema/component.py +546 -0
- gllm_core/schema/component.pyi +205 -0
- gllm_core/schema/event.py +50 -0
- gllm_core/schema/event.pyi +33 -0
- gllm_core/schema/schema_generator.py +150 -0
- gllm_core/schema/schema_generator.pyi +35 -0
- gllm_core/schema/tool.py +418 -0
- gllm_core/schema/tool.pyi +198 -0
- gllm_core/utils/__init__.py +32 -0
- gllm_core/utils/__init__.pyi +13 -0
- gllm_core/utils/analyzer.py +256 -0
- gllm_core/utils/analyzer.pyi +123 -0
- gllm_core/utils/binary_handler_factory.py +99 -0
- gllm_core/utils/binary_handler_factory.pyi +62 -0
- gllm_core/utils/chunk_metadata_merger.py +102 -0
- gllm_core/utils/chunk_metadata_merger.pyi +41 -0
- gllm_core/utils/concurrency.py +184 -0
- gllm_core/utils/concurrency.pyi +94 -0
- gllm_core/utils/event_formatter.py +69 -0
- gllm_core/utils/event_formatter.pyi +30 -0
- gllm_core/utils/google_sheets.py +115 -0
- gllm_core/utils/google_sheets.pyi +18 -0
- gllm_core/utils/imports.py +91 -0
- gllm_core/utils/imports.pyi +42 -0
- gllm_core/utils/logger_manager.py +339 -0
- gllm_core/utils/logger_manager.pyi +176 -0
- gllm_core/utils/main_method_resolver.py +185 -0
- gllm_core/utils/main_method_resolver.pyi +54 -0
- gllm_core/utils/merger_method.py +130 -0
- gllm_core/utils/merger_method.pyi +49 -0
- gllm_core/utils/retry.py +258 -0
- gllm_core/utils/retry.pyi +41 -0
- gllm_core/utils/similarity.py +29 -0
- gllm_core/utils/similarity.pyi +10 -0
- gllm_core/utils/validation.py +26 -0
- gllm_core/utils/validation.pyi +12 -0
- gllm_core_binary-0.4.4.dist-info/METADATA +177 -0
- gllm_core_binary-0.4.4.dist-info/RECORD +78 -0
- gllm_core_binary-0.4.4.dist-info/WHEEL +5 -0
- gllm_core_binary-0.4.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from _typeshed import Incomplete
|
|
3
|
+
from gllm_core.event.handler.event_handler import BaseEventHandler as BaseEventHandler
|
|
4
|
+
from gllm_core.schema import Event as Event
|
|
5
|
+
from typing import AsyncGenerator
|
|
6
|
+
|
|
7
|
+
class StreamEventHandler(BaseEventHandler):
|
|
8
|
+
"""A class that manages an asynchronous stream of data using a queue.
|
|
9
|
+
|
|
10
|
+
The StreamEventHandler class provides methods to manage an asynchronous stream, allowing data to be sent and
|
|
11
|
+
retrieved in a non-blocking manner. The stream method yields items from the queue, and the emit method adds
|
|
12
|
+
items to the queue. The stream can be closed by calling the close method, which ensures no further items
|
|
13
|
+
are processed.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
name (str): The name assigned to the event handler.
|
|
17
|
+
color_map (dict[str, str]): The dictionary that maps certain event types to their
|
|
18
|
+
corresponding colors in Rich format.
|
|
19
|
+
queue (asyncio.Queue): The queue used to manage an asynchronous stream.
|
|
20
|
+
"""
|
|
21
|
+
queue: asyncio.Queue
|
|
22
|
+
stream_delay: Incomplete
|
|
23
|
+
def __init__(self, name: str | None = None, stream_delay: float = 0.001) -> None:
|
|
24
|
+
"""Initializes a new instance of the StreamEventHandler class.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
name (str | None, optional): The name assigned to the event handler. Defaults to None,
|
|
28
|
+
in which case the class name will be used.
|
|
29
|
+
stream_delay (float, optional): The delay duration after each data stream. Needed in order for the stream
|
|
30
|
+
manager to process the data stream properly. Defaults to 0.001.
|
|
31
|
+
"""
|
|
32
|
+
async def emit(self, event: Event) -> None:
|
|
33
|
+
"""Emits the given event by sending it to the client via an asynchronous queue.
|
|
34
|
+
|
|
35
|
+
This method serializes the event to a JSON and sends it to the client by adding it to an asynchronous
|
|
36
|
+
queue. It also introduces a delay specified by `stream_delay` to make sure that the stream data can
|
|
37
|
+
be processed properly.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
event (Event): The event to be emitted.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
None
|
|
44
|
+
"""
|
|
45
|
+
async def stream(self) -> AsyncGenerator:
|
|
46
|
+
"""Asynchronously yields items from the queue until a StopIteration item is encountered.
|
|
47
|
+
|
|
48
|
+
This method continuously retrieves items from the queue and yields them. The iteration stops when a
|
|
49
|
+
StopIteration item is encountered, at which point the method returns.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
AsyncGenerator: An asynchronous generator yielding items from the queue.
|
|
53
|
+
"""
|
|
54
|
+
async def close(self) -> None:
|
|
55
|
+
"""Immediately stops the stream by placing a StopIteration item in the queue.
|
|
56
|
+
|
|
57
|
+
This method inserts a StopIteration item into the queue without waiting, which signals the stream to stop
|
|
58
|
+
processing further items.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
None
|
|
62
|
+
"""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Defines an interface for event hooks.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Henry Wicaksono (henry.wicaksono@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
|
|
12
|
+
from gllm_core.schema import Event
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseEventHook(ABC):
|
|
16
|
+
"""An abstract base class for all event hooks."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
async def __call__(self, event: Event) -> Event:
|
|
20
|
+
"""Applies the hook to the event.
|
|
21
|
+
|
|
22
|
+
This abstract method must be implemented by subclasses to define how the hook is applied to the event.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
event (Event): The event to apply the hook to.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Event: The event after the hook is applied.
|
|
29
|
+
"""
|
|
30
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from gllm_core.schema import Event as Event
|
|
4
|
+
|
|
5
|
+
class BaseEventHook(ABC, metaclass=abc.ABCMeta):
|
|
6
|
+
"""An abstract base class for all event hooks."""
|
|
7
|
+
@abstractmethod
|
|
8
|
+
async def __call__(self, event: Event) -> Event:
|
|
9
|
+
"""Applies the hook to the event.
|
|
10
|
+
|
|
11
|
+
This abstract method must be implemented by subclasses to define how the hook is applied to the event.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
event (Event): The event to apply the hook to.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Event: The event after the hook is applied.
|
|
18
|
+
"""
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Defines an event hook to JSON stringify the event value.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Henry Wicaksono (henry.wicaksono@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
from gllm_core.event.hook.event_hook import BaseEventHook
|
|
13
|
+
from gllm_core.schema import Event
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JSONStringifyEventHook(BaseEventHook):
|
|
17
|
+
"""An event hook to JSON stringify the event value."""
|
|
18
|
+
|
|
19
|
+
async def __call__(self, event: Event) -> Event:
|
|
20
|
+
"""Applies the hook to the event.
|
|
21
|
+
|
|
22
|
+
This method will convert the event value to a JSON string if it is a dictionary.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
event (Event): The event to apply the hook to.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Event: The event after the hook is applied.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(event.value, dict):
|
|
31
|
+
event.value = json.dumps(event.value)
|
|
32
|
+
return event
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from gllm_core.event.hook.event_hook import BaseEventHook as BaseEventHook
|
|
2
|
+
from gllm_core.schema import Event as Event
|
|
3
|
+
|
|
4
|
+
class JSONStringifyEventHook(BaseEventHook):
|
|
5
|
+
"""An event hook to JSON stringify the event value."""
|
|
6
|
+
async def __call__(self, event: Event) -> Event:
|
|
7
|
+
"""Applies the hook to the event.
|
|
8
|
+
|
|
9
|
+
This method will convert the event value to a JSON string if it is a dictionary.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
event (Event): The event to apply the hook to.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Event: The event after the hook is applied.
|
|
16
|
+
"""
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Defines a component used for custom event messaging in Gen AI applications pipelines.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Henry Wicaksono (henry.wicaksono@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from gllm_core.event import EventEmitter
|
|
13
|
+
from gllm_core.schema import Component
|
|
14
|
+
from gllm_core.utils import get_placeholder_keys
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Messenger(Component):
|
|
18
|
+
"""Emits a custom event message with possible access to the state variables.
|
|
19
|
+
|
|
20
|
+
This component acts as an intermediary step, designed to be placed between other pipeline steps.
|
|
21
|
+
It allows for event messaging operations to be performed outside individual components but still within
|
|
22
|
+
the context of the pipeline execution.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
|
|
26
|
+
is_template (bool): Whether the message is a template that can be injected with state variables.
|
|
27
|
+
Defaults to True.
|
|
28
|
+
variable_keys (list[str]): The keys of the message that can be injected with state variables.
|
|
29
|
+
Only used if `is_template` is set to True.
|
|
30
|
+
|
|
31
|
+
Plain string message example:
|
|
32
|
+
```python
|
|
33
|
+
event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
|
|
34
|
+
kwargs = {"event_emitter": event_emitter}
|
|
35
|
+
|
|
36
|
+
messenger = Messenger("Executing component.", is_template=False)
|
|
37
|
+
await messenger.run(**kwargs)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Template message example:
|
|
41
|
+
```python
|
|
42
|
+
event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
|
|
43
|
+
state_variables = {"query": "Hi!", "top_k": 10}
|
|
44
|
+
kwargs = {"event_emitter": event_emitter, "state_variables": state_variables}
|
|
45
|
+
|
|
46
|
+
messenger = Messenger("Executing component for query {query} and top k {top_k}.")
|
|
47
|
+
await messenger.run(**kwargs)
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, message: str, is_template: bool = True):
|
|
52
|
+
"""Initializes a new instance of the Messenger class.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
|
|
56
|
+
is_template (bool, optional): Whether the message is a template that can be injected with state variables.
|
|
57
|
+
Defaults to True.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If the keys of the message does not match the provided keys.
|
|
61
|
+
"""
|
|
62
|
+
self.message = message
|
|
63
|
+
self.is_template = is_template
|
|
64
|
+
self.variable_keys = get_placeholder_keys(message) if is_template else []
|
|
65
|
+
|
|
66
|
+
async def _run(self, **kwargs: Any) -> None:
|
|
67
|
+
"""Executes the messaging operation.
|
|
68
|
+
|
|
69
|
+
This method validates the provided kwargs to make sure it contains the necessary keys and values, then
|
|
70
|
+
calls the `send_message` method to emit the message.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
**kwargs (Any): A dictionary of arguments for the event process, must include `event_emitter` and may
|
|
74
|
+
optionally include `state_variables`, and `emit_kwargs`.
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
KeyError: If the kwargs is missing the `event_emitter` key.
|
|
78
|
+
"""
|
|
79
|
+
if "event_emitter" not in kwargs or not isinstance(kwargs["event_emitter"], EventEmitter):
|
|
80
|
+
raise KeyError("The input kwargs must include an `event_emitter` key with an EventEmitter instance.")
|
|
81
|
+
|
|
82
|
+
return await self.send_message(
|
|
83
|
+
kwargs["event_emitter"],
|
|
84
|
+
state_variables=kwargs.get("state_variables"),
|
|
85
|
+
emit_kwargs=kwargs.get("emit_kwargs"),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def send_message(
|
|
89
|
+
self,
|
|
90
|
+
event_emitter: EventEmitter,
|
|
91
|
+
state_variables: dict[str, Any] | None = None,
|
|
92
|
+
emit_kwargs: dict[str, Any] | None = None,
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Emits the message to the event emitter.
|
|
95
|
+
|
|
96
|
+
This method validates the variables, formats the message if required, and then emits the message using the
|
|
97
|
+
event emitter.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
event_emitter (EventEmitter): The event emitter instance to emit the message.
|
|
101
|
+
state_variables (dict[str, Any] | None, optional): The state variables to be injected into the message
|
|
102
|
+
placeholders. Can only be provided if `is_template` is set to True. Defaults to None.
|
|
103
|
+
emit_kwargs (dict[str, Any] | None, optional): The keyword arguments to be passed to the event emitter's
|
|
104
|
+
emit method. Defaults to None.
|
|
105
|
+
"""
|
|
106
|
+
state_variables = state_variables or {}
|
|
107
|
+
emit_kwargs = emit_kwargs or {}
|
|
108
|
+
formatted_message = self.message
|
|
109
|
+
|
|
110
|
+
if self.is_template:
|
|
111
|
+
self._validate_variables(state_variables)
|
|
112
|
+
message_kwargs = {key: state_variables[key] for key in self.variable_keys}
|
|
113
|
+
formatted_message = formatted_message.format(**message_kwargs)
|
|
114
|
+
elif state_variables:
|
|
115
|
+
raise ValueError("State variables can only be provided if `is_template` is set to True.")
|
|
116
|
+
|
|
117
|
+
await event_emitter.emit(formatted_message, **emit_kwargs)
|
|
118
|
+
|
|
119
|
+
def _validate_variables(self, variables: dict[str, Any]) -> None:
|
|
120
|
+
"""Validates the variables to ensure there are no missing keys.
|
|
121
|
+
|
|
122
|
+
This method checks if the provided variables are missing any of the expected keys. If so, it raises a
|
|
123
|
+
`ValueError`.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
variables (dict[str, Any]): The variables to be validated.
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
ValueError: If the variables are missing the expected keys.
|
|
130
|
+
"""
|
|
131
|
+
missing_keys = set(self.variable_keys) - set(variables.keys())
|
|
132
|
+
if missing_keys:
|
|
133
|
+
raise ValueError(f"The following keys are missing in the variables: {missing_keys}")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.event import EventEmitter as EventEmitter
|
|
3
|
+
from gllm_core.schema import Component as Component
|
|
4
|
+
from gllm_core.utils import get_placeholder_keys as get_placeholder_keys
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
class Messenger(Component):
|
|
8
|
+
'''Emits a custom event message with possible access to the state variables.
|
|
9
|
+
|
|
10
|
+
This component acts as an intermediary step, designed to be placed between other pipeline steps.
|
|
11
|
+
It allows for event messaging operations to be performed outside individual components but still within
|
|
12
|
+
the context of the pipeline execution.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
|
|
16
|
+
is_template (bool): Whether the message is a template that can be injected with state variables.
|
|
17
|
+
Defaults to True.
|
|
18
|
+
variable_keys (list[str]): The keys of the message that can be injected with state variables.
|
|
19
|
+
Only used if `is_template` is set to True.
|
|
20
|
+
|
|
21
|
+
Plain string message example:
|
|
22
|
+
```python
|
|
23
|
+
event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
|
|
24
|
+
kwargs = {"event_emitter": event_emitter}
|
|
25
|
+
|
|
26
|
+
messenger = Messenger("Executing component.", is_template=False)
|
|
27
|
+
await messenger.run(**kwargs)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Template message example:
|
|
31
|
+
```python
|
|
32
|
+
event_emitter = EventEmitter(handlers=[ConsoleEventHandler()])
|
|
33
|
+
state_variables = {"query": "Hi!", "top_k": 10}
|
|
34
|
+
kwargs = {"event_emitter": event_emitter, "state_variables": state_variables}
|
|
35
|
+
|
|
36
|
+
messenger = Messenger("Executing component for query {query} and top k {top_k}.")
|
|
37
|
+
await messenger.run(**kwargs)
|
|
38
|
+
```
|
|
39
|
+
'''
|
|
40
|
+
message: Incomplete
|
|
41
|
+
is_template: Incomplete
|
|
42
|
+
variable_keys: Incomplete
|
|
43
|
+
def __init__(self, message: str, is_template: bool = True) -> None:
|
|
44
|
+
"""Initializes a new instance of the Messenger class.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
message (str): The message to be sent, may contain placeholders enclosed in curly braces `{}`.
|
|
48
|
+
is_template (bool, optional): Whether the message is a template that can be injected with state variables.
|
|
49
|
+
Defaults to True.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
ValueError: If the keys of the message does not match the provided keys.
|
|
53
|
+
"""
|
|
54
|
+
async def send_message(self, event_emitter: EventEmitter, state_variables: dict[str, Any] | None = None, emit_kwargs: dict[str, Any] | None = None) -> None:
|
|
55
|
+
"""Emits the message to the event emitter.
|
|
56
|
+
|
|
57
|
+
This method validates the variables, formats the message if required, and then emits the message using the
|
|
58
|
+
event emitter.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
event_emitter (EventEmitter): The event emitter instance to emit the message.
|
|
62
|
+
state_variables (dict[str, Any] | None, optional): The state variables to be injected into the message
|
|
63
|
+
placeholders. Can only be provided if `is_template` is set to True. Defaults to None.
|
|
64
|
+
emit_kwargs (dict[str, Any] | None, optional): The keyword arguments to be passed to the event emitter's
|
|
65
|
+
emit method. Defaults to None.
|
|
66
|
+
"""
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Modules concerning the schemas used throughout the Gen AI applications."""
|
|
2
|
+
|
|
3
|
+
from gllm_core.schema.chunk import Chunk
|
|
4
|
+
from gllm_core.schema.component import Component, main
|
|
5
|
+
from gllm_core.schema.event import Event
|
|
6
|
+
from gllm_core.schema.tool import Tool, tool
|
|
7
|
+
|
|
8
|
+
__all__ = ["Chunk", "Component", "Event", "Tool", "main", "tool"]
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from gllm_core.schema.chunk import Chunk as Chunk
|
|
2
|
+
from gllm_core.schema.component import Component as Component, main as main
|
|
3
|
+
from gllm_core.schema.event import Event as Event
|
|
4
|
+
from gllm_core.schema.tool import Tool as Tool, tool as tool
|
|
5
|
+
|
|
6
|
+
__all__ = ['Chunk', 'Component', 'Event', 'Tool', 'main', 'tool']
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Defines the Chunk schema, which represents a chunk of content retrieved from a vector store.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Dimitrij Ray (dimitrij.ray@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Generic, Iterable, TypeVar
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field, field_validator
|
|
14
|
+
|
|
15
|
+
MAX_PREVIEW_LENGTH = 50
|
|
16
|
+
MAX_ITEMS_PREVIEW = 3
|
|
17
|
+
|
|
18
|
+
T = TypeVar("T")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _TruncatedIterable(Generic[T]):
|
|
22
|
+
"""Represents a truncated iterable with first and last elements visible.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
items (Iterable[T]): The iterable to be truncated.
|
|
26
|
+
max_items_preview (int): Maximum number of items to show before truncation.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, items: Iterable[T], max_items_preview: int = MAX_ITEMS_PREVIEW) -> None:
|
|
30
|
+
"""Initialize a TruncatedIterable.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
items (Iterable[T]): The iterable to be truncated.
|
|
34
|
+
max_items_preview (int, optional): Maximum number of items to show before truncation.
|
|
35
|
+
Defaults to MAX_ITEMS_PREVIEW.
|
|
36
|
+
"""
|
|
37
|
+
self.items = list(items)
|
|
38
|
+
self.max_items_preview = max_items_preview
|
|
39
|
+
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
"""Return a string representation of the truncated iterable.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The string representation in the format [first, ..., last] if truncated,
|
|
45
|
+
or [item1, item2, ...] if not truncated.
|
|
46
|
+
"""
|
|
47
|
+
if len(self.items) <= self.max_items_preview:
|
|
48
|
+
return str(self.items)
|
|
49
|
+
|
|
50
|
+
def format_element(elem: Any) -> str:
|
|
51
|
+
return f"{elem!r}"
|
|
52
|
+
|
|
53
|
+
first_item = format_element(self.items[0])
|
|
54
|
+
last_item = format_element(self.items[-1])
|
|
55
|
+
return f"[{first_item}, ..., {last_item}]"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Chunk(BaseModel, arbitrary_types_allowed=True):
|
|
59
|
+
"""Represents a chunk of content retrieved from a vector store.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
id (str): A unique identifier for the chunk. Defaults to a random UUID.
|
|
63
|
+
content (str | bytes): The content of the chunk, either text or binary.
|
|
64
|
+
metadata (dict[str, Any]): Additional metadata associated with the chunk. Defaults to an empty dictionary.
|
|
65
|
+
score (float | None): Similarity score of the chunk (if available). Defaults to None.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
69
|
+
content: str | bytes
|
|
70
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
71
|
+
score: float | None = None
|
|
72
|
+
|
|
73
|
+
@field_validator("content")
|
|
74
|
+
@classmethod
|
|
75
|
+
def validate_content(cls, value: str | bytes) -> str | bytes:
|
|
76
|
+
"""Validate the content of the Chunk.
|
|
77
|
+
|
|
78
|
+
This is a class method required by Pydantic validators. As such, it follows its signature and conventions.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
value (str | bytes): The content to validate.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
str | bytes: The validated content.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
ValueError: If the content is empty or not a string or bytes.
|
|
88
|
+
"""
|
|
89
|
+
if not value:
|
|
90
|
+
raise ValueError("Content must not be empty")
|
|
91
|
+
if not isinstance(value, (str, bytes)):
|
|
92
|
+
raise ValueError("Content must be either str or bytes")
|
|
93
|
+
return value
|
|
94
|
+
|
|
95
|
+
def is_text(self) -> bool:
|
|
96
|
+
"""Check if the content is text.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
bool: True if the content is text, False otherwise.
|
|
100
|
+
"""
|
|
101
|
+
return isinstance(self.content, str)
|
|
102
|
+
|
|
103
|
+
def is_binary(self) -> bool:
|
|
104
|
+
"""Check if the content is binary.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
bool: True if the content is binary, False otherwise.
|
|
108
|
+
"""
|
|
109
|
+
return isinstance(self.content, bytes)
|
|
110
|
+
|
|
111
|
+
def _format_value(self, value: Any) -> Any:
|
|
112
|
+
"""Format a value for string representation.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
value (Any): The value to format.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Any: The formatted value.
|
|
119
|
+
"""
|
|
120
|
+
if isinstance(value, str):
|
|
121
|
+
return f"{value[:MAX_PREVIEW_LENGTH]}{'...' if len(value) > MAX_PREVIEW_LENGTH else ''}"
|
|
122
|
+
|
|
123
|
+
if isinstance(value, bytes):
|
|
124
|
+
return "(Binary content)"
|
|
125
|
+
|
|
126
|
+
if isinstance(value, (list, tuple)):
|
|
127
|
+
return _TruncatedIterable(value)
|
|
128
|
+
|
|
129
|
+
if isinstance(value, dict):
|
|
130
|
+
return {k: self._format_value(v) for k, v in value.items()}
|
|
131
|
+
|
|
132
|
+
return value
|
|
133
|
+
|
|
134
|
+
def __repr__(self) -> str:
|
|
135
|
+
"""Return a string representation of the Chunk.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
str: The string representation of the Chunk.
|
|
139
|
+
"""
|
|
140
|
+
content_preview = self._format_value(self.content)
|
|
141
|
+
formatted_metadata = self._format_value(self.metadata)
|
|
142
|
+
|
|
143
|
+
return (
|
|
144
|
+
f"Chunk(id={self.id}, "
|
|
145
|
+
f"content={content_preview}, "
|
|
146
|
+
f"metadata={formatted_metadata}, "
|
|
147
|
+
f"score={self.score})"
|
|
148
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from typing import Any, Generic, Iterable, TypeVar
|
|
4
|
+
|
|
5
|
+
MAX_PREVIEW_LENGTH: int
|
|
6
|
+
MAX_ITEMS_PREVIEW: int
|
|
7
|
+
T = TypeVar('T')
|
|
8
|
+
|
|
9
|
+
class _TruncatedIterable(Generic[T]):
|
|
10
|
+
"""Represents a truncated iterable with first and last elements visible.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
items (Iterable[T]): The iterable to be truncated.
|
|
14
|
+
max_items_preview (int): Maximum number of items to show before truncation.
|
|
15
|
+
"""
|
|
16
|
+
items: Incomplete
|
|
17
|
+
max_items_preview: Incomplete
|
|
18
|
+
def __init__(self, items: Iterable[T], max_items_preview: int = ...) -> None:
|
|
19
|
+
"""Initialize a TruncatedIterable.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
items (Iterable[T]): The iterable to be truncated.
|
|
23
|
+
max_items_preview (int, optional): Maximum number of items to show before truncation.
|
|
24
|
+
Defaults to MAX_ITEMS_PREVIEW.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
class Chunk(BaseModel, arbitrary_types_allowed=True):
|
|
28
|
+
"""Represents a chunk of content retrieved from a vector store.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
id (str): A unique identifier for the chunk. Defaults to a random UUID.
|
|
32
|
+
content (str | bytes): The content of the chunk, either text or binary.
|
|
33
|
+
metadata (dict[str, Any]): Additional metadata associated with the chunk. Defaults to an empty dictionary.
|
|
34
|
+
score (float | None): Similarity score of the chunk (if available). Defaults to None.
|
|
35
|
+
"""
|
|
36
|
+
id: str
|
|
37
|
+
content: str | bytes
|
|
38
|
+
metadata: dict[str, Any]
|
|
39
|
+
score: float | None
|
|
40
|
+
@classmethod
|
|
41
|
+
def validate_content(cls, value: str | bytes) -> str | bytes:
|
|
42
|
+
"""Validate the content of the Chunk.
|
|
43
|
+
|
|
44
|
+
This is a class method required by Pydantic validators. As such, it follows its signature and conventions.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
value (str | bytes): The content to validate.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str | bytes: The validated content.
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
ValueError: If the content is empty or not a string or bytes.
|
|
54
|
+
"""
|
|
55
|
+
def is_text(self) -> bool:
|
|
56
|
+
"""Check if the content is text.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
bool: True if the content is text, False otherwise.
|
|
60
|
+
"""
|
|
61
|
+
def is_binary(self) -> bool:
|
|
62
|
+
"""Check if the content is binary.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
bool: True if the content is binary, False otherwise.
|
|
66
|
+
"""
|