gllm-core-binary 0.4.4__py3-none-manylinux_2_31_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gllm_core/__init__.py +1 -0
- gllm_core/__init__.pyi +0 -0
- gllm_core/adapters/__init__.py +5 -0
- gllm_core/adapters/__init__.pyi +3 -0
- gllm_core/adapters/tool/__init__.py +6 -0
- gllm_core/adapters/tool/__init__.pyi +4 -0
- gllm_core/adapters/tool/google_adk.py +91 -0
- gllm_core/adapters/tool/google_adk.pyi +23 -0
- gllm_core/adapters/tool/langchain.py +130 -0
- gllm_core/adapters/tool/langchain.pyi +31 -0
- gllm_core/constants.py +55 -0
- gllm_core/constants.pyi +36 -0
- gllm_core/event/__init__.py +6 -0
- gllm_core/event/__init__.pyi +4 -0
- gllm_core/event/event_emitter.py +211 -0
- gllm_core/event/event_emitter.pyi +155 -0
- gllm_core/event/handler/__init__.py +7 -0
- gllm_core/event/handler/__init__.pyi +5 -0
- gllm_core/event/handler/console_event_handler.py +48 -0
- gllm_core/event/handler/console_event_handler.pyi +32 -0
- gllm_core/event/handler/event_handler.py +89 -0
- gllm_core/event/handler/event_handler.pyi +51 -0
- gllm_core/event/handler/print_event_handler.py +130 -0
- gllm_core/event/handler/print_event_handler.pyi +33 -0
- gllm_core/event/handler/stream_event_handler.py +85 -0
- gllm_core/event/handler/stream_event_handler.pyi +62 -0
- gllm_core/event/hook/__init__.py +5 -0
- gllm_core/event/hook/__init__.pyi +3 -0
- gllm_core/event/hook/event_hook.py +30 -0
- gllm_core/event/hook/event_hook.pyi +18 -0
- gllm_core/event/hook/json_stringify_event_hook.py +32 -0
- gllm_core/event/hook/json_stringify_event_hook.pyi +16 -0
- gllm_core/event/messenger.py +133 -0
- gllm_core/event/messenger.pyi +66 -0
- gllm_core/schema/__init__.py +8 -0
- gllm_core/schema/__init__.pyi +6 -0
- gllm_core/schema/chunk.py +148 -0
- gllm_core/schema/chunk.pyi +66 -0
- gllm_core/schema/component.py +546 -0
- gllm_core/schema/component.pyi +205 -0
- gllm_core/schema/event.py +50 -0
- gllm_core/schema/event.pyi +33 -0
- gllm_core/schema/schema_generator.py +150 -0
- gllm_core/schema/schema_generator.pyi +35 -0
- gllm_core/schema/tool.py +418 -0
- gllm_core/schema/tool.pyi +198 -0
- gllm_core/utils/__init__.py +32 -0
- gllm_core/utils/__init__.pyi +13 -0
- gllm_core/utils/analyzer.py +256 -0
- gllm_core/utils/analyzer.pyi +123 -0
- gllm_core/utils/binary_handler_factory.py +99 -0
- gllm_core/utils/binary_handler_factory.pyi +62 -0
- gllm_core/utils/chunk_metadata_merger.py +102 -0
- gllm_core/utils/chunk_metadata_merger.pyi +41 -0
- gllm_core/utils/concurrency.py +184 -0
- gllm_core/utils/concurrency.pyi +94 -0
- gllm_core/utils/event_formatter.py +69 -0
- gllm_core/utils/event_formatter.pyi +30 -0
- gllm_core/utils/google_sheets.py +115 -0
- gllm_core/utils/google_sheets.pyi +18 -0
- gllm_core/utils/imports.py +91 -0
- gllm_core/utils/imports.pyi +42 -0
- gllm_core/utils/logger_manager.py +339 -0
- gllm_core/utils/logger_manager.pyi +176 -0
- gllm_core/utils/main_method_resolver.py +185 -0
- gllm_core/utils/main_method_resolver.pyi +54 -0
- gllm_core/utils/merger_method.py +130 -0
- gllm_core/utils/merger_method.pyi +49 -0
- gllm_core/utils/retry.py +258 -0
- gllm_core/utils/retry.pyi +41 -0
- gllm_core/utils/similarity.py +29 -0
- gllm_core/utils/similarity.pyi +10 -0
- gllm_core/utils/validation.py +26 -0
- gllm_core/utils/validation.pyi +12 -0
- gllm_core_binary-0.4.4.dist-info/METADATA +177 -0
- gllm_core_binary-0.4.4.dist-info/RECORD +78 -0
- gllm_core_binary-0.4.4.dist-info/WHEEL +5 -0
- gllm_core_binary-0.4.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Concurrency utilities for bridging sync and async code.
|
|
2
|
+
|
|
3
|
+
This module provides two primary helpers:
|
|
4
|
+
|
|
5
|
+
1. asyncify: Wrap a synchronous function so it can be awaited in async code
|
|
6
|
+
by offloading it to a worker thread using AnyIO.
|
|
7
|
+
2. syncify: Wrap an asynchronous function so it can be called from synchronous
|
|
8
|
+
code. By default, this uses a shared AnyIO BlockingPortal running in a
|
|
9
|
+
background thread.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
```python
|
|
13
|
+
from gllm_core.utils.concurrency import asyncify, syncify
|
|
14
|
+
|
|
15
|
+
# Asyncify a sync function
|
|
16
|
+
async_op = asyncify(blocking_fn)
|
|
17
|
+
result = await async_op(arg1, arg2)
|
|
18
|
+
|
|
19
|
+
# Syncify an async function
|
|
20
|
+
sync_op = syncify(async_fn)
|
|
21
|
+
result = sync_op(arg1, arg2)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Notes:
|
|
25
|
+
1. For asyncify: Cancelling an await of an asyncified sync function cancels the awaiter, but
|
|
26
|
+
the underlying thread cannot be forcibly interrupted. The function continues to run until it returns.
|
|
27
|
+
2. For syncify: A shared default BlockingPortal is lazily created on first use and shut down
|
|
28
|
+
at process exit.
|
|
29
|
+
|
|
30
|
+
Authors:
|
|
31
|
+
Dimitrij Ray (dimitrij.ray@gdplabs.id)
|
|
32
|
+
|
|
33
|
+
References:
|
|
34
|
+
NONE
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import atexit
|
|
40
|
+
import threading
|
|
41
|
+
from typing import Awaitable, Callable, ContextManager, ParamSpec, TypeVar
|
|
42
|
+
|
|
43
|
+
import anyio
|
|
44
|
+
from anyio.abc import BlockingPortal
|
|
45
|
+
from anyio.from_thread import start_blocking_portal
|
|
46
|
+
|
|
47
|
+
P = ParamSpec("P")
|
|
48
|
+
R = TypeVar("R")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class _DefaultPortalManager:
|
|
52
|
+
"""Lazily manages a process-wide AnyIO BlockingPortal.
|
|
53
|
+
|
|
54
|
+
The portal runs an event loop in a background thread started via
|
|
55
|
+
anyio.from_thread.start_blocking_portal(). It is created on first access and stopped
|
|
56
|
+
automatically at interpreter shutdown.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
_lock: threading.Lock
|
|
60
|
+
_portal: BlockingPortal | None
|
|
61
|
+
_cm: ContextManager[BlockingPortal] | None
|
|
62
|
+
|
|
63
|
+
def __init__(self) -> None:
|
|
64
|
+
"""Initialize the default portal manager."""
|
|
65
|
+
self._lock = threading.Lock()
|
|
66
|
+
self._portal = None
|
|
67
|
+
self._cm = None
|
|
68
|
+
|
|
69
|
+
def get(self) -> BlockingPortal:
|
|
70
|
+
"""Return the shared BlockingPortal, creating it if necessary.
|
|
71
|
+
|
|
72
|
+
This method is thread-safe: concurrent callers will see the same portal.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
BlockingPortal: The shared default portal.
|
|
76
|
+
"""
|
|
77
|
+
with self._lock:
|
|
78
|
+
portal = self._portal
|
|
79
|
+
if portal is None:
|
|
80
|
+
cm = start_blocking_portal()
|
|
81
|
+
portal = cm.__enter__()
|
|
82
|
+
|
|
83
|
+
atexit.register(cm.__exit__, None, None, None)
|
|
84
|
+
|
|
85
|
+
self._cm = cm
|
|
86
|
+
self._portal = portal
|
|
87
|
+
|
|
88
|
+
return portal
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
_default_portal_mgr = _DefaultPortalManager()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_default_portal() -> BlockingPortal:
|
|
95
|
+
"""Return the shared default BlockingPortal.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
BlockingPortal: A process-wide portal running on a background thread.
|
|
99
|
+
"""
|
|
100
|
+
return _default_portal_mgr.get()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def asyncify(
|
|
104
|
+
func: Callable[P, R], *, cancellable: bool = False, limiter: anyio.CapacityLimiter | None = None
|
|
105
|
+
) -> Callable[P, Awaitable[R]]:
|
|
106
|
+
"""Wrap a sync function into an awaitable callable using a worker thread.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
func (Callable[P, R]): Synchronous function to wrap.
|
|
110
|
+
cancellable (bool, optional): If True, allow cancellation of the awaiter while running in a
|
|
111
|
+
worker thread. Defaults to False.
|
|
112
|
+
limiter (anyio.CapacityLimiter | None, optional): Capacity limiter to throttle concurrent
|
|
113
|
+
thread usage. Defaults to None.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Callable[P, Awaitable[R]]: An async function that when awaited will execute `func` in a
|
|
117
|
+
worker thread and return its result.
|
|
118
|
+
|
|
119
|
+
Usage:
|
|
120
|
+
```python
|
|
121
|
+
async def handler() -> int:
|
|
122
|
+
wrapped = asyncify(blocking_func)
|
|
123
|
+
return await wrapped(1, 2)
|
|
124
|
+
```
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
128
|
+
return await anyio.to_thread.run_sync(lambda: func(*args, **kwargs), abandon_on_cancel=cancellable, limiter=limiter)
|
|
129
|
+
|
|
130
|
+
return _wrapper
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def syncify(
|
|
134
|
+
async_func: Callable[P, Awaitable[R]],
|
|
135
|
+
*,
|
|
136
|
+
portal: BlockingPortal | None = None,
|
|
137
|
+
) -> Callable[P, R]:
|
|
138
|
+
"""Wrap an async function to be callable from synchronous code.
|
|
139
|
+
|
|
140
|
+
Lifecycle and portals:
|
|
141
|
+
1. This helper uses an already running AnyIO `BlockingPortal` to execute the coroutine.
|
|
142
|
+
2. If `portal` is not provided, a process-wide shared portal is used. Its lifecycle is
|
|
143
|
+
managed internally: it is created lazily on first use and shut down automatically at process exit.
|
|
144
|
+
3. If you provide a `portal`, you are expected to manage its lifecycle, typically with a
|
|
145
|
+
context manager. This is recommended when making many calls in a bounded scope since it
|
|
146
|
+
avoids per-call startup costs while allowing deterministic teardown.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
async_func (Callable[P, Awaitable[R]]): Asynchronous function to wrap.
|
|
150
|
+
portal (BlockingPortal | None, optional): Portal to use for calling the async function
|
|
151
|
+
from sync code. Defaults to None, in which case a shared default portal is used.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Callable[P, R]: A synchronous function that runs the coroutine and returns its result.
|
|
155
|
+
|
|
156
|
+
Usage:
|
|
157
|
+
```python
|
|
158
|
+
# Use the default shared portal (most convenient)
|
|
159
|
+
def do_work(x: int) -> int:
|
|
160
|
+
sync_call = syncify(async_func)
|
|
161
|
+
return sync_call(x)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
# Reuse a scoped portal for multiple calls (deterministic lifecycle)
|
|
166
|
+
from anyio.from_thread import start_blocking_portal
|
|
167
|
+
|
|
168
|
+
with start_blocking_portal() as portal:
|
|
169
|
+
sync_call = syncify(async_func, portal=portal)
|
|
170
|
+
a = sync_call(1)
|
|
171
|
+
b = sync_call(2)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Notes:
|
|
175
|
+
Creating a brand-new portal per call is discouraged due to the overhead of spinning up
|
|
176
|
+
and tearing down a background event loop/thread. Prefer the shared portal or a scoped
|
|
177
|
+
portal reused for a batch of calls.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
181
|
+
p: BlockingPortal = portal if portal is not None else get_default_portal()
|
|
182
|
+
return p.call(lambda: async_func(*args, **kwargs))
|
|
183
|
+
|
|
184
|
+
return _wrapper
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import anyio
|
|
2
|
+
from anyio.abc import BlockingPortal as BlockingPortal
|
|
3
|
+
from typing import Awaitable, Callable, ParamSpec, TypeVar
|
|
4
|
+
|
|
5
|
+
P = ParamSpec('P')
|
|
6
|
+
R = TypeVar('R')
|
|
7
|
+
|
|
8
|
+
class _DefaultPortalManager:
|
|
9
|
+
"""Lazily manages a process-wide AnyIO BlockingPortal.
|
|
10
|
+
|
|
11
|
+
The portal runs an event loop in a background thread started via
|
|
12
|
+
anyio.from_thread.start_blocking_portal(). It is created on first access and stopped
|
|
13
|
+
automatically at interpreter shutdown.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
"""Initialize the default portal manager."""
|
|
17
|
+
def get(self) -> BlockingPortal:
|
|
18
|
+
"""Return the shared BlockingPortal, creating it if necessary.
|
|
19
|
+
|
|
20
|
+
This method is thread-safe: concurrent callers will see the same portal.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
BlockingPortal: The shared default portal.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def get_default_portal() -> BlockingPortal:
|
|
27
|
+
"""Return the shared default BlockingPortal.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
BlockingPortal: A process-wide portal running on a background thread.
|
|
31
|
+
"""
|
|
32
|
+
def asyncify(func: Callable[P, R], *, cancellable: bool = False, limiter: anyio.CapacityLimiter | None = None) -> Callable[P, Awaitable[R]]:
|
|
33
|
+
"""Wrap a sync function into an awaitable callable using a worker thread.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
func (Callable[P, R]): Synchronous function to wrap.
|
|
37
|
+
cancellable (bool, optional): If True, allow cancellation of the awaiter while running in a
|
|
38
|
+
worker thread. Defaults to False.
|
|
39
|
+
limiter (anyio.CapacityLimiter | None, optional): Capacity limiter to throttle concurrent
|
|
40
|
+
thread usage. Defaults to None.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Callable[P, Awaitable[R]]: An async function that when awaited will execute `func` in a
|
|
44
|
+
worker thread and return its result.
|
|
45
|
+
|
|
46
|
+
Usage:
|
|
47
|
+
```python
|
|
48
|
+
async def handler() -> int:
|
|
49
|
+
wrapped = asyncify(blocking_func)
|
|
50
|
+
return await wrapped(1, 2)
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
53
|
+
def syncify(async_func: Callable[P, Awaitable[R]], *, portal: BlockingPortal | None = None) -> Callable[P, R]:
|
|
54
|
+
"""Wrap an async function to be callable from synchronous code.
|
|
55
|
+
|
|
56
|
+
Lifecycle and portals:
|
|
57
|
+
1. This helper uses an already running AnyIO `BlockingPortal` to execute the coroutine.
|
|
58
|
+
2. If `portal` is not provided, a process-wide shared portal is used. Its lifecycle is
|
|
59
|
+
managed internally: it is created lazily on first use and shut down automatically at process exit.
|
|
60
|
+
3. If you provide a `portal`, you are expected to manage its lifecycle, typically with a
|
|
61
|
+
context manager. This is recommended when making many calls in a bounded scope since it
|
|
62
|
+
avoids per-call startup costs while allowing deterministic teardown.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
async_func (Callable[P, Awaitable[R]]): Asynchronous function to wrap.
|
|
66
|
+
portal (BlockingPortal | None, optional): Portal to use for calling the async function
|
|
67
|
+
from sync code. Defaults to None, in which case a shared default portal is used.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Callable[P, R]: A synchronous function that runs the coroutine and returns its result.
|
|
71
|
+
|
|
72
|
+
Usage:
|
|
73
|
+
```python
|
|
74
|
+
# Use the default shared portal (most convenient)
|
|
75
|
+
def do_work(x: int) -> int:
|
|
76
|
+
sync_call = syncify(async_func)
|
|
77
|
+
return sync_call(x)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Reuse a scoped portal for multiple calls (deterministic lifecycle)
|
|
82
|
+
from anyio.from_thread import start_blocking_portal
|
|
83
|
+
|
|
84
|
+
with start_blocking_portal() as portal:
|
|
85
|
+
sync_call = syncify(async_func, portal=portal)
|
|
86
|
+
a = sync_call(1)
|
|
87
|
+
b = sync_call(2)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Notes:
|
|
91
|
+
Creating a brand-new portal per call is discouraged due to the overhead of spinning up
|
|
92
|
+
and tearing down a background event loop/thread. Prefer the shared portal or a scoped
|
|
93
|
+
portal reused for a batch of calls.
|
|
94
|
+
"""
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Defines helper functions to format logged events.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Henry Wicaksono (henry.wicaksono@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
from gllm_core.schema import Chunk
|
|
13
|
+
from gllm_core.schema.chunk import MAX_PREVIEW_LENGTH
|
|
14
|
+
|
|
15
|
+
TEMPLATE_VALIDATOR_REGEX = re.compile(r"(?<!\{)\{(?!\{)(.*?)(?<!\})\}(?!\})")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def format_chunk_message(
|
|
19
|
+
chunk: Chunk,
|
|
20
|
+
rank: int = None,
|
|
21
|
+
include_score: bool = True,
|
|
22
|
+
include_metadata: bool = True,
|
|
23
|
+
) -> str:
|
|
24
|
+
"""Formats a log to display a single chunk.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
chunk (Chunk): The chunk to be formatted.
|
|
28
|
+
rank (int, optional): The optional rank of the formatted chunk. Defaults to None.
|
|
29
|
+
include_score (bool, optional): Whether to include the score in the formatted message. Defaults to True.
|
|
30
|
+
include_metadata (bool, optional): Whether to include the metadata in the formatted message. Defaults to True.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str: A formatted log message that displays information about the logged chunk.
|
|
34
|
+
"""
|
|
35
|
+
content_preview = chunk.content[:MAX_PREVIEW_LENGTH]
|
|
36
|
+
if len(chunk.content) > MAX_PREVIEW_LENGTH:
|
|
37
|
+
content_preview = f"{content_preview}..."
|
|
38
|
+
|
|
39
|
+
message = f"ID: {chunk.id}\n Content: {content_preview}"
|
|
40
|
+
|
|
41
|
+
if chunk.score is not None and include_score:
|
|
42
|
+
message += f"\n Score: {chunk.score}"
|
|
43
|
+
|
|
44
|
+
if chunk.metadata and include_metadata:
|
|
45
|
+
message += "\n Metadata:"
|
|
46
|
+
for key, value in chunk.metadata.items():
|
|
47
|
+
message += f"\n - {key}: {value}"
|
|
48
|
+
|
|
49
|
+
if rank:
|
|
50
|
+
message = f"Rank: {rank}\n {message}"
|
|
51
|
+
|
|
52
|
+
message = f"\n - {message}"
|
|
53
|
+
|
|
54
|
+
return message
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_placeholder_keys(template: str) -> list[str]:
|
|
58
|
+
"""Extracts keys from a template string based on a regex pattern.
|
|
59
|
+
|
|
60
|
+
This function searches the template for placeholders enclosed in single curly braces `{}` and ignores
|
|
61
|
+
any placeholders within double curly braces `{{}}`. It returns a list of the keys found.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
template (str): The template string containing placeholders.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
list[str]: A list of keys extracted from the template.
|
|
68
|
+
"""
|
|
69
|
+
return TEMPLATE_VALIDATOR_REGEX.findall(template)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.schema import Chunk as Chunk
|
|
3
|
+
from gllm_core.schema.chunk import MAX_PREVIEW_LENGTH as MAX_PREVIEW_LENGTH
|
|
4
|
+
|
|
5
|
+
TEMPLATE_VALIDATOR_REGEX: Incomplete
|
|
6
|
+
|
|
7
|
+
def format_chunk_message(chunk: Chunk, rank: int = None, include_score: bool = True, include_metadata: bool = True) -> str:
|
|
8
|
+
"""Formats a log to display a single chunk.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
chunk (Chunk): The chunk to be formatted.
|
|
12
|
+
rank (int, optional): The optional rank of the formatted chunk. Defaults to None.
|
|
13
|
+
include_score (bool, optional): Whether to include the score in the formatted message. Defaults to True.
|
|
14
|
+
include_metadata (bool, optional): Whether to include the metadata in the formatted message. Defaults to True.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
str: A formatted log message that displays information about the logged chunk.
|
|
18
|
+
"""
|
|
19
|
+
def get_placeholder_keys(template: str) -> list[str]:
|
|
20
|
+
"""Extracts keys from a template string based on a regex pattern.
|
|
21
|
+
|
|
22
|
+
This function searches the template for placeholders enclosed in single curly braces `{}` and ignores
|
|
23
|
+
any placeholders within double curly braces `{{}}`. It returns a list of the keys found.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
template (str): The template string containing placeholders.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
list[str]: A list of keys extracted from the template.
|
|
30
|
+
"""
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Defines functions to interact with Google Sheets API.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Henry Wicaksono (henry.wicaksono@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
[1] https://github.com/GDP-ADMIN/gen-ai-veriwise/blob/main/main/backend/module/google_sheets/auth.py
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import gspread
|
|
11
|
+
from google.oauth2 import service_account
|
|
12
|
+
from gspread import Client, Spreadsheet, Worksheet
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_gsheets(
|
|
16
|
+
client_email: str,
|
|
17
|
+
private_key: str,
|
|
18
|
+
sheet_id: str,
|
|
19
|
+
worksheet_id: str,
|
|
20
|
+
) -> list[dict[str, str]]:
|
|
21
|
+
"""Loads data from a Google Sheets worksheet.
|
|
22
|
+
|
|
23
|
+
This function retrieves data from a Google Sheets worksheet using service account credentials.
|
|
24
|
+
It authorizes the client, selects the specified worksheet, and reads the worksheet data.
|
|
25
|
+
The first row of the worksheet will be treated as the column names.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
client_email (str): The client email associated with the service account.
|
|
29
|
+
private_key (str): The private key used for authentication.
|
|
30
|
+
sheet_id (str): The ID of the Google Sheet.
|
|
31
|
+
worksheet_id (str): The ID of the worksheet within the Google Sheet.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
list[dict[str, str]]: A list of dictionaries containing the Google Sheets content.
|
|
35
|
+
"""
|
|
36
|
+
account_info = _get_account_info(client_email, private_key)
|
|
37
|
+
client = _get_client(account_info)
|
|
38
|
+
worksheet = _get_worksheet(client, sheet_id, worksheet_id)
|
|
39
|
+
return worksheet.get_all_records()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_account_info(client_email: str, private_key: str) -> dict[str, str]:
|
|
43
|
+
"""Generates a dictionary with Google Sheets API authentication information.
|
|
44
|
+
|
|
45
|
+
This function returns a dictionary containing authentication details required for a Google Sheets API service
|
|
46
|
+
account.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
client_email (str): The client email associated with the service account.
|
|
50
|
+
private_key (str): The private key used for authentication.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
dict[str, str]: A dictionary containing the authentication details for the Google Sheets API.
|
|
54
|
+
"""
|
|
55
|
+
return {
|
|
56
|
+
"type": "service_account",
|
|
57
|
+
"private_key": private_key,
|
|
58
|
+
"client_email": client_email,
|
|
59
|
+
"client_id": "https://www.googleapis.com/auth/spreadsheets",
|
|
60
|
+
"token_uri": "https://oauth2.googleapis.com/token",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_client(info: dict[str, str]) -> Client:
|
|
65
|
+
"""Gets a Google Sheets client with the provided credentials and scopes.
|
|
66
|
+
|
|
67
|
+
This function initializes a Google Sheets client using the provided service account credentials
|
|
68
|
+
(info) and OAuth2 scopes. It authorizes the client and returns it for further use in Google Sheets
|
|
69
|
+
interactions.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
info (dict[str, str]): Service account information for authentication.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Client: An authorized Google Sheets client.
|
|
76
|
+
"""
|
|
77
|
+
scopes = ["https://www.googleapis.com/auth/spreadsheets"]
|
|
78
|
+
credentials = service_account.Credentials.from_service_account_info(info=info, scopes=scopes)
|
|
79
|
+
client = gspread.authorize(credentials)
|
|
80
|
+
return client
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _get_sheet(client: Client, sheet_id: str) -> Spreadsheet:
|
|
84
|
+
"""Gets a Google Sheets spreadsheet by its ID using a Google Sheets client.
|
|
85
|
+
|
|
86
|
+
This function opens the spreadsheet using Sheet ID and returns the corresponding Spreadsheet object.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
client (Client): An authorized Google Sheets client.
|
|
90
|
+
sheet_id (str): The ID of the Google Sheets spreadsheet.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Spreadsheet: The Google Sheets spreadsheet as a Spreadsheet object.
|
|
94
|
+
"""
|
|
95
|
+
sheet = client.open_by_key(sheet_id)
|
|
96
|
+
return sheet
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_worksheet(client: Client, sheet_id: str, worksheet_id: str) -> Worksheet:
|
|
100
|
+
"""Gets a specific worksheet from a Google Sheets spreadsheet using a Google Sheets client and worksheet ID.
|
|
101
|
+
|
|
102
|
+
This function first retrieves the target Google Sheets spreadsheet using the provided client and sheet ID.
|
|
103
|
+
Then, it fetches the desired worksheet within the spreadsheet based on the worksheet ID and returns it.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
client (Client): An authorized Google Sheets client.
|
|
107
|
+
sheet_id (str): The ID of the Google Sheets spreadsheet.
|
|
108
|
+
worksheet_id (str): The ID of the worksheet to retrieve.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Worksheet: The specified worksheet as a Worksheet object.
|
|
112
|
+
"""
|
|
113
|
+
sheet = _get_sheet(client, sheet_id)
|
|
114
|
+
worksheet = sheet.get_worksheet_by_id(int(worksheet_id))
|
|
115
|
+
return worksheet
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from gspread import Client as Client, Spreadsheet as Spreadsheet, Worksheet as Worksheet
|
|
2
|
+
|
|
3
|
+
def load_gsheets(client_email: str, private_key: str, sheet_id: str, worksheet_id: str) -> list[dict[str, str]]:
|
|
4
|
+
"""Loads data from a Google Sheets worksheet.
|
|
5
|
+
|
|
6
|
+
This function retrieves data from a Google Sheets worksheet using service account credentials.
|
|
7
|
+
It authorizes the client, selects the specified worksheet, and reads the worksheet data.
|
|
8
|
+
The first row of the worksheet will be treated as the column names.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
client_email (str): The client email associated with the service account.
|
|
12
|
+
private_key (str): The private key used for authentication.
|
|
13
|
+
sheet_id (str): The ID of the Google Sheet.
|
|
14
|
+
worksheet_id (str): The ID of the worksheet within the Google Sheet.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
list[dict[str, str]]: A list of dictionaries containing the Google Sheets content.
|
|
18
|
+
"""
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Utilities for handling package imports.
|
|
2
|
+
|
|
3
|
+
Authors:
|
|
4
|
+
Dimitrij Ray (dimitrij.ray@gdplabs.id)
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
NONE
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import importlib
|
|
11
|
+
from typing import Callable
|
|
12
|
+
|
|
13
|
+
from deprecation import deprecated as _deprecated
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_optional_packages(
|
|
17
|
+
packages: str | list[str],
|
|
18
|
+
error_message: str | None = None,
|
|
19
|
+
install_instructions: str | None = None,
|
|
20
|
+
extras: str | list[str] | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Check if optional packages are available and raise ImportError if not.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
packages (str | list[str]): Package name or list of package names to check.
|
|
26
|
+
error_message (str | None, optional): Custom error message. If None, a default message is used.
|
|
27
|
+
Defaults to None.
|
|
28
|
+
install_instructions (str | None, optional): Installation instructions. If None, generates poetry install
|
|
29
|
+
command. Defaults to None.
|
|
30
|
+
extras (str | list[str] | None, optional): Poetry extras that contain the required packages. If provided,
|
|
31
|
+
generates specific installation instructions. If install_instructions is None, it will create
|
|
32
|
+
default instructions based on the extras. If install_instructions is not None, it will use the
|
|
33
|
+
provided instructions directly and ignore this argument. Defaults to None.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ImportError: If any of the required packages are not installed.
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(packages, str):
|
|
39
|
+
packages = [packages]
|
|
40
|
+
|
|
41
|
+
missing = [package for package in packages if importlib.util.find_spec(package) is None]
|
|
42
|
+
|
|
43
|
+
if missing:
|
|
44
|
+
packages_str = ", ".join(f"'{pkg}'" for pkg in missing)
|
|
45
|
+
default_message = f"The following packages are missing: {packages_str}"
|
|
46
|
+
message = error_message or default_message
|
|
47
|
+
|
|
48
|
+
if install_instructions is None:
|
|
49
|
+
if extras:
|
|
50
|
+
if isinstance(extras, str):
|
|
51
|
+
extras = [extras]
|
|
52
|
+
extras_str = " ".join(extras)
|
|
53
|
+
install_instructions = f"Please install the required extras with 'poetry install --extras {extras_str}'"
|
|
54
|
+
else:
|
|
55
|
+
install_instructions = (
|
|
56
|
+
"Please update your poetry environment with 'poetry install' or "
|
|
57
|
+
"install the required extras with 'poetry install --extras <extras>'"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
raise ImportError(f"{message}.\n{install_instructions}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def deprecated(deprecated_in: str, removed_in: str, current_version: str | None = None, details: str = "") -> Callable:
|
|
64
|
+
"""Decorator to mark functions as deprecated.
|
|
65
|
+
|
|
66
|
+
This is currently implemented as a thin wrapper around deprecation.deprecated for consistency, since deprecation
|
|
67
|
+
may be deprecated when we move into Python 3.13, where @warnings.deprecated will be available.
|
|
68
|
+
|
|
69
|
+
Usage example:
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
@deprecated(deprecated_in="0.1.0", removed_in="0.2.0", current_version="0.1.1")
|
|
73
|
+
def old_function():
|
|
74
|
+
pass
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
deprecated_in (str): The version when the function was deprecated.
|
|
79
|
+
removed_in (str): The version when the function will be removed.
|
|
80
|
+
current_version (str | None, optional): The current version of the package. Defaults to None.
|
|
81
|
+
details (str, optional): Additional details about the deprecation. Defaults to an empty string.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Callable: The decorated function.
|
|
85
|
+
"""
|
|
86
|
+
return _deprecated(
|
|
87
|
+
deprecated_in=deprecated_in,
|
|
88
|
+
removed_in=removed_in,
|
|
89
|
+
current_version=current_version,
|
|
90
|
+
details=details,
|
|
91
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
def check_optional_packages(packages: str | list[str], error_message: str | None = None, install_instructions: str | None = None, extras: str | list[str] | None = None) -> None:
|
|
4
|
+
"""Check if optional packages are available and raise ImportError if not.
|
|
5
|
+
|
|
6
|
+
Args:
|
|
7
|
+
packages (str | list[str]): Package name or list of package names to check.
|
|
8
|
+
error_message (str | None, optional): Custom error message. If None, a default message is used.
|
|
9
|
+
Defaults to None.
|
|
10
|
+
install_instructions (str | None, optional): Installation instructions. If None, generates poetry install
|
|
11
|
+
command. Defaults to None.
|
|
12
|
+
extras (str | list[str] | None, optional): Poetry extras that contain the required packages. If provided,
|
|
13
|
+
generates specific installation instructions. If install_instructions is None, it will create
|
|
14
|
+
default instructions based on the extras. If install_instructions is not None, it will use the
|
|
15
|
+
provided instructions directly and ignore this argument. Defaults to None.
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
ImportError: If any of the required packages are not installed.
|
|
19
|
+
"""
|
|
20
|
+
def deprecated(deprecated_in: str, removed_in: str, current_version: str | None = None, details: str = '') -> Callable:
|
|
21
|
+
'''Decorator to mark functions as deprecated.
|
|
22
|
+
|
|
23
|
+
This is currently implemented as a thin wrapper around deprecation.deprecated for consistency, since deprecation
|
|
24
|
+
may be deprecated when we move into Python 3.13, where @warnings.deprecated will be available.
|
|
25
|
+
|
|
26
|
+
Usage example:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
@deprecated(deprecated_in="0.1.0", removed_in="0.2.0", current_version="0.1.1")
|
|
30
|
+
def old_function():
|
|
31
|
+
pass
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
deprecated_in (str): The version when the function was deprecated.
|
|
36
|
+
removed_in (str): The version when the function will be removed.
|
|
37
|
+
current_version (str | None, optional): The current version of the package. Defaults to None.
|
|
38
|
+
details (str, optional): Additional details about the deprecation. Defaults to an empty string.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Callable: The decorated function.
|
|
42
|
+
'''
|