banks 2.1.3__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {banks-2.1.3 → banks-2.3.0}/.github/workflows/test.yml +3 -3
- {banks-2.1.3 → banks-2.3.0}/CLAUDE.md +6 -4
- {banks-2.1.3 → banks-2.3.0}/PKG-INFO +4 -3
- {banks-2.1.3 → banks-2.3.0}/README.md +2 -2
- {banks-2.1.3 → banks-2.3.0}/docs/examples.md +52 -0
- {banks-2.1.3 → banks-2.3.0}/pyproject.toml +3 -2
- {banks-2.1.3 → banks-2.3.0}/src/banks/__about__.py +1 -1
- {banks-2.1.3 → banks-2.3.0}/src/banks/config.py +4 -1
- {banks-2.1.3 → banks-2.3.0}/src/banks/env.py +4 -1
- {banks-2.1.3 → banks-2.3.0}/src/banks/errors.py +1 -1
- {banks-2.1.3 → banks-2.3.0}/src/banks/extensions/completion.py +16 -2
- {banks-2.1.3 → banks-2.3.0}/src/banks/filters/__init__.py +4 -1
- banks-2.3.0/src/banks/filters/audio.py +60 -0
- banks-2.3.0/src/banks/filters/document.py +96 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/filters/image.py +11 -1
- banks-2.3.0/src/banks/filters/video.py +62 -0
- banks-2.3.0/src/banks/filters/xml.py +62 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/prompt.py +11 -2
- {banks-2.1.3 → banks-2.3.0}/src/banks/types.py +69 -0
- banks-2.3.0/tests/data/1x1.pdf +0 -0
- banks-2.3.0/tests/data/empty.mov +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_audio.py +29 -1
- {banks-2.1.3 → banks-2.3.0}/tests/test_cache_control.py +3 -2
- {banks-2.1.3 → banks-2.3.0}/tests/test_directory_registry.py +1 -1
- banks-2.3.0/tests/test_document.py +74 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_image.py +17 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_redis_registry.py +1 -1
- banks-2.3.0/tests/test_video.py +73 -0
- banks-2.3.0/tests/test_xml.py +70 -0
- banks-2.1.3/src/banks/filters/audio.py +0 -23
- {banks-2.1.3 → banks-2.3.0}/.github/workflows/docs.yml +0 -0
- {banks-2.1.3 → banks-2.3.0}/.github/workflows/release.yml +0 -0
- {banks-2.1.3 → banks-2.3.0}/.gitignore +0 -0
- {banks-2.1.3 → banks-2.3.0}/CITATION.cff +0 -0
- {banks-2.1.3 → banks-2.3.0}/CODE_OF_CONDUCT.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/CONTRIBUTING.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/LICENSE.txt +0 -0
- {banks-2.1.3 → banks-2.3.0}/MANIFEST.in +0 -0
- {banks-2.1.3 → banks-2.3.0}/assets/banks.png +0 -0
- {banks-2.1.3 → banks-2.3.0}/cookbook/Prompt_Caching_with_Anthropic.ipynb +0 -0
- {banks-2.1.3 → banks-2.3.0}/cookbook/Prompt_Versioning.ipynb +0 -0
- {banks-2.1.3 → banks-2.3.0}/cookbook/in_prompt_completion.ipynb +0 -0
- {banks-2.1.3 → banks-2.3.0}/docs/config.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/docs/index.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/docs/prompt.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/docs/python.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/docs/registry.md +0 -0
- {banks-2.1.3 → banks-2.3.0}/mkdocs.yml +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/__init__.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/cache.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/extensions/__init__.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/extensions/chat.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/extensions/docs.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/filters/cache_control.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/filters/lemmatize.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/filters/tool.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/registries/__init__.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/registries/directory.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/registries/file.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/registries/redis.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/src/banks/utils.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/__init__.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/conftest.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/data/1x1.png +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/data/empty.wav +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/e2e/__init__.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/e2e/conftest.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/e2e/test_completion.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/e2e/test_function_calling.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/templates/blog.jinja +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/templates/cache.jinja +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/templates/chat.jinja +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/templates/summarize.jinja +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/templates/summarize_lemma.jinja +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_cache.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_chat.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_completion.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_config.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_file_registry.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_prompt.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_tool.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_types.py +0 -0
- {banks-2.1.3 → banks-2.3.0}/tests/test_utils.py +0 -0
|
@@ -34,7 +34,7 @@ jobs:
|
|
|
34
34
|
strategy:
|
|
35
35
|
fail-fast: false
|
|
36
36
|
matrix:
|
|
37
|
-
python-version: ["3.
|
|
37
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
38
38
|
|
|
39
39
|
steps:
|
|
40
40
|
- uses: actions/checkout@v4
|
|
@@ -72,7 +72,7 @@ jobs:
|
|
|
72
72
|
strategy:
|
|
73
73
|
fail-fast: false
|
|
74
74
|
matrix:
|
|
75
|
-
python-version: ["3.
|
|
75
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
76
76
|
|
|
77
77
|
steps:
|
|
78
78
|
- uses: actions/checkout@v4
|
|
@@ -95,7 +95,7 @@ jobs:
|
|
|
95
95
|
strategy:
|
|
96
96
|
fail-fast: false
|
|
97
97
|
matrix:
|
|
98
|
-
python-version: ["3.
|
|
98
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
99
99
|
|
|
100
100
|
steps:
|
|
101
101
|
- uses: actions/checkout@v4
|
|
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
|
4
4
|
|
|
5
5
|
## Project Overview
|
|
6
6
|
|
|
7
|
-
Banks is a Python prompt programming language and templating system for LLM applications. It provides a Jinja2-based template engine with specialized extensions and filters for creating dynamic prompts, managing chat messages, handling multimodal content (images/audio), and integrating with various LLM providers through LiteLLM.
|
|
7
|
+
Banks is a Python prompt programming language and templating system for LLM applications. It provides a Jinja2-based template engine with specialized extensions and filters for creating dynamic prompts, managing chat messages, handling multimodal content (images/audio/video/documents), and integrating with various LLM providers through LiteLLM.
|
|
8
8
|
|
|
9
9
|
## Development Commands
|
|
10
10
|
|
|
@@ -28,7 +28,7 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
28
28
|
### Environment Management
|
|
29
29
|
- All commands use Hatch environments with automatic dependency management
|
|
30
30
|
- Use `uv` as the installer for faster dependency resolution
|
|
31
|
-
- Python 3.
|
|
31
|
+
- Python 3.10+ supported across multiple versions (3.10-3.14)
|
|
32
32
|
|
|
33
33
|
## Architecture Overview
|
|
34
34
|
|
|
@@ -42,7 +42,7 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
42
42
|
|
|
43
43
|
**Type System** (`src/banks/types.py`):
|
|
44
44
|
- `ChatMessage`: Core chat message structure with role and content
|
|
45
|
-
- `ContentBlock`: Handles different content types (text, image_url, audio) with optional cache control
|
|
45
|
+
- `ContentBlock`: Handles different content types (text, image_url, audio, video, document) with optional cache control
|
|
46
46
|
- `Tool`: Function calling support with automatic schema generation from Python callables
|
|
47
47
|
- `CacheControl`: Anthropic-style prompt caching metadata
|
|
48
48
|
|
|
@@ -67,6 +67,8 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
67
67
|
**Core Filters** (`src/banks/filters/`):
|
|
68
68
|
- `image`: Convert file paths/URLs to base64-encoded image content blocks
|
|
69
69
|
- `audio`: Convert audio files to base64-encoded audio content blocks
|
|
70
|
+
- `video`: Convert video files to base64-encoded video content blocks
|
|
71
|
+
- `document`: Convert documents (PDF, TXT, HTML, CSS, XML, CSV, RTF, JS, JSON) to base64-encoded content blocks
|
|
70
72
|
- `cache_control`: Add Anthropic cache control metadata to content blocks
|
|
71
73
|
- `tool`: Convert Python callables to LLM function call schemas
|
|
72
74
|
- `lemmatize`: Text lemmatization using simplemma
|
|
@@ -95,7 +97,7 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
95
97
|
4. Caching layer prevents re-rendering identical contexts
|
|
96
98
|
|
|
97
99
|
### Multimodal Content Handling
|
|
98
|
-
- Images/audio converted to base64 during filter application
|
|
100
|
+
- Images/audio/video/documents converted to base64 during filter application
|
|
99
101
|
- Content blocks maintain type safety and metadata
|
|
100
102
|
- Cache control integrated at content block level
|
|
101
103
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: banks
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: A prompt programming language
|
|
5
5
|
Project-URL: Documentation, https://github.com/masci/banks#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/masci/banks/issues
|
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
19
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
20
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
21
|
Requires-Python: >=3.9
|
|
@@ -125,11 +126,11 @@ print(p.chat_messages({"persona": "helpful assistant"}))
|
|
|
125
126
|
# [
|
|
126
127
|
# ChatMessage(role='system', content=[
|
|
127
128
|
# ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='You are a helpful assistant.',
|
|
128
|
-
# image_url=None, input_audio=None)
|
|
129
|
+
# image_url=None, input_audio=None, input_video=None, input_document=None)
|
|
129
130
|
# ], tool_call_id=None, name=None),
|
|
130
131
|
# ChatMessage(role='user', content=[
|
|
131
132
|
# ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='Hello, how are you?',
|
|
132
|
-
# image_url=None, input_audio=None)
|
|
133
|
+
# image_url=None, input_audio=None, input_video=None, input_document=None)
|
|
133
134
|
# ], tool_call_id=None, name=None)
|
|
134
135
|
# ]
|
|
135
136
|
```
|
|
@@ -94,11 +94,11 @@ print(p.chat_messages({"persona": "helpful assistant"}))
|
|
|
94
94
|
# [
|
|
95
95
|
# ChatMessage(role='system', content=[
|
|
96
96
|
# ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='You are a helpful assistant.',
|
|
97
|
-
# image_url=None, input_audio=None)
|
|
97
|
+
# image_url=None, input_audio=None, input_video=None, input_document=None)
|
|
98
98
|
# ], tool_call_id=None, name=None),
|
|
99
99
|
# ChatMessage(role='user', content=[
|
|
100
100
|
# ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='Hello, how are you?',
|
|
101
|
-
# image_url=None, input_audio=None)
|
|
101
|
+
# image_url=None, input_audio=None, input_video=None, input_document=None)
|
|
102
102
|
# ], tool_call_id=None, name=None)
|
|
103
103
|
# ]
|
|
104
104
|
```
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
- [Create a blog writing prompt](#create-a-blog-writing-prompt)
|
|
4
4
|
- [Create a summarizer prompt](#create-a-summarizer-prompt)
|
|
5
5
|
- [Lemmatize text while processing a template](#lemmatize-text-while-processing-a-template)
|
|
6
|
+
- [Convert a JSON-like object into XML while processing the template](#convert-a-json-like-object-into-xml-while-processing-the-template)
|
|
6
7
|
- [Use a LLM to generate a text while rendering a prompt](#use-a-llm-to-generate-a-text-while-rendering-a-prompt)
|
|
7
8
|
- [Render a prompt template as chat messages](#render-a-prompt-template-as-chat-messages)
|
|
8
9
|
- [Use prompt caching from Anthropic](#use-prompt-caching-from-anthropic)
|
|
@@ -135,6 +136,57 @@ the cat be run
|
|
|
135
136
|
Summary:
|
|
136
137
|
```
|
|
137
138
|
|
|
139
|
+
## Convert a JSON-like object into XML while processing the template
|
|
140
|
+
|
|
141
|
+
Banks has built-in support for filtering JSON-like objects (Pydantic `BaseModel` subclasses, dictionaries, deserializable strings) and returning an XML string.
|
|
142
|
+
|
|
143
|
+
Here is an example of how you can use it:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from banks import Prompt
|
|
147
|
+
from pydantic import BaseModel
|
|
148
|
+
from typing import Dict
|
|
149
|
+
|
|
150
|
+
prompt_template = """
|
|
151
|
+
Please extract the contact details from this user:
|
|
152
|
+
|
|
153
|
+
{{ data | to_xml }}
|
|
154
|
+
|
|
155
|
+
Contact details:
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
class User(BaseModel):
|
|
159
|
+
username: str
|
|
160
|
+
account_id: str
|
|
161
|
+
registered_at: str
|
|
162
|
+
email: str
|
|
163
|
+
phone_number: str
|
|
164
|
+
social_media_accounts: Dict[str, str]
|
|
165
|
+
|
|
166
|
+
user = User(username="example", account_id="0000", registered_at="10-25-2024", email="example@email.com", phone_number="0123456789", social_media_accounts={"BlueSky": "@example.com"})
|
|
167
|
+
|
|
168
|
+
p = Prompt(prompt_template)
|
|
169
|
+
print(p.text({"data": user}))
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
This will output:
|
|
173
|
+
|
|
174
|
+
```text
|
|
175
|
+
Please extract the contact details from this user:
|
|
176
|
+
|
|
177
|
+
<user>
|
|
178
|
+
<username>example</username>
|
|
179
|
+
<account_id>0000</account_id>
|
|
180
|
+
<registered_at>10-25-2024</registered_at>
|
|
181
|
+
<email>example@email.com</email>
|
|
182
|
+
<phone_number>0123456789</phone_number>
|
|
183
|
+
<social_media_accounts>{'BlueSky': '@example.com'}</social_media_accounts>
|
|
184
|
+
</user>
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
Contact details:
|
|
188
|
+
```
|
|
189
|
+
|
|
138
190
|
## Use a LLM to generate a text while rendering a prompt
|
|
139
191
|
|
|
140
192
|
Sometimes it might be useful to ask another LLM to generate examples for you in a
|
|
@@ -19,6 +19,7 @@ classifiers = [
|
|
|
19
19
|
"Programming Language :: Python :: 3.11",
|
|
20
20
|
"Programming Language :: Python :: 3.12",
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Programming Language :: Python :: 3.14",
|
|
22
23
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
23
24
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
24
25
|
]
|
|
@@ -65,7 +66,7 @@ cov = ["test-cov", "cov-report"]
|
|
|
65
66
|
docs = "mkdocs {args:build}"
|
|
66
67
|
|
|
67
68
|
[[tool.hatch.envs.all.matrix]]
|
|
68
|
-
python = ["3.
|
|
69
|
+
python = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
69
70
|
|
|
70
71
|
[tool.hatch.envs.lint]
|
|
71
72
|
detached = false # Normally the linting env can be detached, but mypy doesn't install all the stubs we need
|
|
@@ -199,7 +200,7 @@ max-args = 10
|
|
|
199
200
|
asyncio_default_fixture_loop_scope = "function"
|
|
200
201
|
markers = ["e2e"]
|
|
201
202
|
filterwarnings = [
|
|
202
|
-
#
|
|
203
|
+
# Silence litellm warning coming from their Pydantic config.
|
|
203
204
|
# This assumes our use of Pydantic is correct :)
|
|
204
205
|
"ignore:Support for class-based `config` is deprecated",
|
|
205
206
|
]
|
|
@@ -28,9 +28,12 @@ class _BanksConfig:
|
|
|
28
28
|
return original_value
|
|
29
29
|
|
|
30
30
|
# Convert string from env var to the actual type
|
|
31
|
-
|
|
31
|
+
annotations = getattr(type(self), "__annotations__", {})
|
|
32
|
+
t = annotations.get(name, type(original_value))
|
|
32
33
|
if t is bool:
|
|
33
34
|
return strtobool(read_value)
|
|
35
|
+
if t is Any:
|
|
36
|
+
return read_value
|
|
34
37
|
|
|
35
38
|
return t(read_value)
|
|
36
39
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
from jinja2 import Environment, select_autoescape
|
|
5
5
|
|
|
6
6
|
from .config import config
|
|
7
|
-
from .filters import audio, cache_control, image, lemmatize, tool
|
|
7
|
+
from .filters import audio, cache_control, document, image, lemmatize, tool, video, xml
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _add_extensions(_env):
|
|
@@ -38,5 +38,8 @@ env.filters["image"] = image
|
|
|
38
38
|
env.filters["lemmatize"] = lemmatize
|
|
39
39
|
env.filters["tool"] = tool
|
|
40
40
|
env.filters["audio"] = audio
|
|
41
|
+
env.filters["video"] = video
|
|
42
|
+
env.filters["document"] = document
|
|
43
|
+
env.filters["to_xml"] = xml
|
|
41
44
|
|
|
42
45
|
_add_extensions(env)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
import importlib
|
|
5
5
|
import json
|
|
6
|
-
from typing import cast
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, cast
|
|
7
7
|
|
|
8
8
|
from jinja2 import TemplateSyntaxError, nodes
|
|
9
9
|
from jinja2.ext import Extension
|
|
@@ -12,6 +12,8 @@ from pydantic import ValidationError
|
|
|
12
12
|
from banks.errors import InvalidPromptError, LLMError
|
|
13
13
|
from banks.types import ChatMessage, Tool
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from litellm.types.utils import ChatCompletionMessageToolCall
|
|
15
17
|
SUPPORTED_KWARGS = ("model",)
|
|
16
18
|
LITELLM_INSTALL_MSG = "litellm is not installed. Please install it with `pip install litellm`."
|
|
17
19
|
|
|
@@ -74,7 +76,19 @@ class CompletionExtension(Extension):
|
|
|
74
76
|
return nodes.CallBlock(self.call_method("_do_completion_async", args), [], [], body).set_lineno(lineno)
|
|
75
77
|
return nodes.CallBlock(self.call_method("_do_completion", args), [], [], body).set_lineno(lineno)
|
|
76
78
|
|
|
77
|
-
def _get_tool_callable(self, tools, tool_call):
|
|
79
|
+
def _get_tool_callable(self, tools: list[Tool], tool_call: "ChatCompletionMessageToolCall") -> Callable[..., Any]:
|
|
80
|
+
"""Get the callable function for a tool call.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
tools: List of available tools
|
|
84
|
+
tool_call: The tool call from the LLM response
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The callable function
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
ValueError: If the function is not found in available tools
|
|
91
|
+
"""
|
|
78
92
|
for tool in tools:
|
|
79
93
|
if tool.function.name == tool_call.function.name:
|
|
80
94
|
module_name, func_name = tool.import_path.rsplit(".", maxsplit=1)
|
|
@@ -3,8 +3,11 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
from .audio import audio
|
|
5
5
|
from .cache_control import cache_control
|
|
6
|
+
from .document import document
|
|
6
7
|
from .image import image
|
|
7
8
|
from .lemmatize import lemmatize
|
|
8
9
|
from .tool import tool
|
|
10
|
+
from .video import video
|
|
11
|
+
from .xml import xml
|
|
9
12
|
|
|
10
|
-
__all__ = ("cache_control", "image", "lemmatize", "tool", "audio")
|
|
13
|
+
__all__ = ("cache_control", "image", "lemmatize", "tool", "audio", "video", "document", "xml")
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
from banks.types import AudioFormat, ContentBlock, InputAudio
|
|
10
|
+
|
|
11
|
+
BASE64_AUDIO_REGEX = re.compile(r"audio\/.*;base64,.*")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_url(string: str) -> bool:
|
|
15
|
+
"""Check if a string is a URL."""
|
|
16
|
+
result = urlparse(string)
|
|
17
|
+
if not result.scheme:
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
if not result.netloc:
|
|
21
|
+
# The only valid format when netloc is empty is base64 data urls
|
|
22
|
+
return all([result.scheme == "data", BASE64_AUDIO_REGEX.match(result.path)])
|
|
23
|
+
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_audio_format_from_url(url: str) -> AudioFormat:
|
|
28
|
+
"""Extract audio format from URL.
|
|
29
|
+
|
|
30
|
+
Tries to determine format from URL path or defaults to mp3.
|
|
31
|
+
"""
|
|
32
|
+
parsed = urlparse(url)
|
|
33
|
+
path = parsed.path.lower()
|
|
34
|
+
for fmt in ("mp3", "wav", "m4a", "webm", "ogg", "flac"):
|
|
35
|
+
if path.endswith(f".{fmt}"):
|
|
36
|
+
return cast(AudioFormat, fmt)
|
|
37
|
+
# Default to mp3 if format cannot be determined
|
|
38
|
+
return "mp3"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def audio(value: str) -> str:
|
|
42
|
+
"""Wrap the filtered value into a ContentBlock of type audio.
|
|
43
|
+
|
|
44
|
+
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
45
|
+
|
|
46
|
+
Supports both file paths and URLs (including data URLs).
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
```jinja
|
|
50
|
+
{{ "path/to/audio/file.mp3" | audio }}
|
|
51
|
+
{{ "https://example.com/audio.mp3" | audio }}
|
|
52
|
+
```
|
|
53
|
+
"""
|
|
54
|
+
if _is_url(value):
|
|
55
|
+
audio_format = _get_audio_format_from_url(value)
|
|
56
|
+
input_audio = InputAudio.from_url(value, audio_format)
|
|
57
|
+
else:
|
|
58
|
+
input_audio = InputAudio.from_path(Path(value))
|
|
59
|
+
block = ContentBlock.model_validate({"type": "audio", "input_audio": input_audio})
|
|
60
|
+
return f"<content_block>{block.model_dump_json()}</content_block>"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
from banks.types import ContentBlock, DocumentFormat, InputDocument
|
|
10
|
+
|
|
11
|
+
BASE64_DOCUMENT_REGEX = re.compile(r"(text|application)\/.*;base64,.*")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_url(string: str) -> bool:
|
|
15
|
+
"""Check if a string is a URL."""
|
|
16
|
+
result = urlparse(string)
|
|
17
|
+
if not result.scheme:
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
if not result.netloc:
|
|
21
|
+
# The only valid format when netloc is empty is base64 data urls
|
|
22
|
+
return all([result.scheme == "data", BASE64_DOCUMENT_REGEX.match(result.path)])
|
|
23
|
+
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_document_format_from_url(url: str) -> DocumentFormat:
|
|
28
|
+
"""Extract document format from URL.
|
|
29
|
+
|
|
30
|
+
Tries to determine format from URL path or defaults to pdf.
|
|
31
|
+
"""
|
|
32
|
+
parsed = urlparse(url)
|
|
33
|
+
path = parsed.path.lower()
|
|
34
|
+
# Gemini supported file types https://ai.google.dev/gemini-api/docs/file-input-methods
|
|
35
|
+
# text/html
|
|
36
|
+
# text/css
|
|
37
|
+
# text/plain
|
|
38
|
+
# text/xml
|
|
39
|
+
# text/scv
|
|
40
|
+
# text/rtf
|
|
41
|
+
# text/javascript
|
|
42
|
+
# application/json
|
|
43
|
+
# application/pdf
|
|
44
|
+
|
|
45
|
+
# Claude supported file types
|
|
46
|
+
# application/pdf
|
|
47
|
+
# text/plain
|
|
48
|
+
|
|
49
|
+
# OpenAI supported file types
|
|
50
|
+
# application/pdf
|
|
51
|
+
|
|
52
|
+
for fmt in (
|
|
53
|
+
"pdf",
|
|
54
|
+
"html",
|
|
55
|
+
"htm",
|
|
56
|
+
"xhtml",
|
|
57
|
+
"css",
|
|
58
|
+
"txt",
|
|
59
|
+
"md",
|
|
60
|
+
"markdown",
|
|
61
|
+
"rst",
|
|
62
|
+
"xml",
|
|
63
|
+
"csv",
|
|
64
|
+
"rtf",
|
|
65
|
+
"js",
|
|
66
|
+
"mjs",
|
|
67
|
+
"cjs",
|
|
68
|
+
"javascript",
|
|
69
|
+
"json",
|
|
70
|
+
):
|
|
71
|
+
if path.endswith(f".{fmt}"):
|
|
72
|
+
return cast(DocumentFormat, fmt)
|
|
73
|
+
# Default to pdf if format cannot be determined
|
|
74
|
+
return "pdf"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def document(value: str) -> str:
|
|
78
|
+
"""Wrap the filtered value into a ContentBlock of type document.
|
|
79
|
+
|
|
80
|
+
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
81
|
+
|
|
82
|
+
Supports both file paths and URLs (including data URLs).
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
```jinja
|
|
86
|
+
{{ "path/to/document/file.pdf" | document }}
|
|
87
|
+
{{ "https://example.com/document.pdf" | document }}
|
|
88
|
+
```
|
|
89
|
+
"""
|
|
90
|
+
if _is_url(value):
|
|
91
|
+
document_format = _get_document_format_from_url(value)
|
|
92
|
+
input_document = InputDocument.from_url(value, document_format)
|
|
93
|
+
else:
|
|
94
|
+
input_document = InputDocument.from_path(Path(value))
|
|
95
|
+
block = ContentBlock.model_validate({"type": "document", "input_document": input_document})
|
|
96
|
+
return f"<content_block>{block.model_dump_json()}</content_block>"
|
|
@@ -1,15 +1,25 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
|
+
import re
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
from banks.types import ContentBlock, ImageUrl
|
|
8
9
|
|
|
10
|
+
BASE64_PATH_REGEX = re.compile(r"image\/.*;base64,.*")
|
|
11
|
+
|
|
9
12
|
|
|
10
13
|
def _is_url(string: str) -> bool:
|
|
11
14
|
result = urlparse(string)
|
|
12
|
-
|
|
15
|
+
if not result.scheme:
|
|
16
|
+
return False
|
|
17
|
+
|
|
18
|
+
if not result.netloc:
|
|
19
|
+
# The only valid format when netloc is empty is base64 data urls
|
|
20
|
+
return all([result.scheme == "data", BASE64_PATH_REGEX.match(result.path)])
|
|
21
|
+
|
|
22
|
+
return True
|
|
13
23
|
|
|
14
24
|
|
|
15
25
|
def image(value: str) -> str:
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
from banks.types import ContentBlock, InputVideo, VideoFormat
|
|
10
|
+
|
|
11
|
+
BASE64_VIDEO_REGEX = re.compile(r"video\/.*;base64,.*")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_url(string: str) -> bool:
|
|
15
|
+
"""Check if a string is a URL."""
|
|
16
|
+
result = urlparse(string)
|
|
17
|
+
if not result.scheme:
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
if not result.netloc:
|
|
21
|
+
# The only valid format when netloc is empty is base64 data urls
|
|
22
|
+
return all([result.scheme == "data", BASE64_VIDEO_REGEX.match(result.path)])
|
|
23
|
+
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_video_format_from_url(url: str) -> VideoFormat:
|
|
28
|
+
"""Extract video format from URL.
|
|
29
|
+
|
|
30
|
+
Tries to determine format from URL path or defaults to mp4.
|
|
31
|
+
"""
|
|
32
|
+
parsed = urlparse(url)
|
|
33
|
+
path = parsed.path.lower()
|
|
34
|
+
|
|
35
|
+
# Based on formats supported by Gemini https://ai.google.dev/gemini-api/docs/video-understanding
|
|
36
|
+
for fmt in ("mp4", "mpeg", "mov", "avi", "flv", "mpg", "webm", "wmv", "3gpp"):
|
|
37
|
+
if path.endswith(f".{fmt}"):
|
|
38
|
+
return cast(VideoFormat, fmt)
|
|
39
|
+
# Default to mp4 if format cannot be determined
|
|
40
|
+
return "mp4"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def video(value: str) -> str:
|
|
44
|
+
"""Wrap the filtered value into a ContentBlock of type video.
|
|
45
|
+
|
|
46
|
+
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
47
|
+
|
|
48
|
+
Supports both file paths and URLs (including data URLs).
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
```jinja
|
|
52
|
+
{{ "path/to/video/file.mp4" | video }}
|
|
53
|
+
{{ "https://example.com/video.mp4" | video }}
|
|
54
|
+
```
|
|
55
|
+
"""
|
|
56
|
+
if _is_url(value):
|
|
57
|
+
video_format = _get_video_format_from_url(value)
|
|
58
|
+
input_video = InputVideo.from_url(value, video_format)
|
|
59
|
+
else:
|
|
60
|
+
input_video = InputVideo.from_path(Path(value))
|
|
61
|
+
block = ContentBlock.model_validate({"type": "video", "input_video": input_video})
|
|
62
|
+
return f"<content_block>{block.model_dump_json()}</content_block>"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import xml.etree.ElementTree as ET
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
from xml.dom.minidom import parseString
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _deserialize(string: str) -> Optional[dict]:
|
|
10
|
+
try:
|
|
11
|
+
return json.loads(string)
|
|
12
|
+
except json.JSONDecodeError:
|
|
13
|
+
return None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _prepare_dictionary(value: Union[str, BaseModel, dict[str, Any]]):
|
|
17
|
+
root_tag = "input"
|
|
18
|
+
if isinstance(value, str):
|
|
19
|
+
model: Optional[dict[str, Any]] = _deserialize(value)
|
|
20
|
+
if model is None:
|
|
21
|
+
msg = f"{value} is not deserializable"
|
|
22
|
+
raise ValueError(msg)
|
|
23
|
+
elif isinstance(value, BaseModel):
|
|
24
|
+
model = value.model_dump()
|
|
25
|
+
root_tag = value.__class__.__name__.lower()
|
|
26
|
+
elif isinstance(value, dict):
|
|
27
|
+
model = value.copy()
|
|
28
|
+
for k in value.keys():
|
|
29
|
+
if not isinstance(k, str):
|
|
30
|
+
key = str(k)
|
|
31
|
+
if isinstance(k, (int, float)):
|
|
32
|
+
key = "_" + key
|
|
33
|
+
v = model.pop(k)
|
|
34
|
+
model[key.lower()] = v
|
|
35
|
+
else:
|
|
36
|
+
msg = f"Input can only be of type BaseModel, dictionary or deserializable string. Got {type(value)}"
|
|
37
|
+
raise ValueError(msg)
|
|
38
|
+
return model, root_tag
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def xml(value: Union[str, BaseModel, dict[str, Any]]) -> str:
|
|
42
|
+
"""
|
|
43
|
+
Convert a Pydantic model, a deserializable string or a dictionary into an XML string.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
```jinja
|
|
47
|
+
{{'{"username": "user", "email": "example@email.com"}' | to_xml}}
|
|
48
|
+
"
|
|
49
|
+
<input>
|
|
50
|
+
<username>user</username>
|
|
51
|
+
<email>example@email.com</email>
|
|
52
|
+
</input>
|
|
53
|
+
"
|
|
54
|
+
```
|
|
55
|
+
"""
|
|
56
|
+
model, root_tag = _prepare_dictionary(value)
|
|
57
|
+
xml_model = ET.Element(root_tag)
|
|
58
|
+
for k, v in model.items():
|
|
59
|
+
sub = ET.SubElement(xml_model, k)
|
|
60
|
+
sub.text = str(v)
|
|
61
|
+
xml_str = ET.tostring(xml_model, encoding="unicode")
|
|
62
|
+
return parseString(xml_str).toprettyxml().replace('<?xml version="1.0" ?>\n', "") # noqa: S318
|
|
@@ -81,8 +81,17 @@ class BasePrompt:
|
|
|
81
81
|
|
|
82
82
|
@property
|
|
83
83
|
def variables(self) -> set[str]:
|
|
84
|
-
|
|
85
|
-
|
|
84
|
+
try:
|
|
85
|
+
ast = env.parse(self.raw)
|
|
86
|
+
return meta.find_undeclared_variables(ast)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
from jinja2 import TemplateSyntaxError
|
|
89
|
+
|
|
90
|
+
if isinstance(e, TemplateSyntaxError):
|
|
91
|
+
raise
|
|
92
|
+
# Re-raise as TemplateSyntaxError for consistency
|
|
93
|
+
msg = f"Failed to parse template: {e}"
|
|
94
|
+
raise TemplateSyntaxError(msg, 0) from e
|
|
86
95
|
|
|
87
96
|
def canary_leaked(self, text: str) -> bool:
|
|
88
97
|
"""Returns whether the canary word is present in `text`, signalling the prompt might have leaked."""
|