banks 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
banks/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "2.2.0"
4
+ __version__ = "2.4.0"
banks/config.py CHANGED
@@ -28,9 +28,12 @@ class _BanksConfig:
28
28
  return original_value
29
29
 
30
30
  # Convert string from env var to the actual type
31
- t = super().__getattribute__("__annotations__")[name]
31
+ annotations = getattr(type(self), "__annotations__", {})
32
+ t = annotations.get(name, type(original_value))
32
33
  if t is bool:
33
34
  return strtobool(read_value)
35
+ if t is Any:
36
+ return read_value
34
37
 
35
38
  return t(read_value)
36
39
 
banks/env.py CHANGED
@@ -4,7 +4,7 @@
4
4
  from jinja2 import Environment, select_autoescape
5
5
 
6
6
  from .config import config
7
- from .filters import audio, cache_control, image, lemmatize, tool, xml
7
+ from .filters import audio, cache_control, document, image, lemmatize, tool, video, xml
8
8
 
9
9
 
10
10
  def _add_extensions(_env):
@@ -38,6 +38,8 @@ env.filters["image"] = image
38
38
  env.filters["lemmatize"] = lemmatize
39
39
  env.filters["tool"] = tool
40
40
  env.filters["audio"] = audio
41
+ env.filters["video"] = video
42
+ env.filters["document"] = document
41
43
  env.filters["to_xml"] = xml
42
44
 
43
45
  _add_extensions(env)
banks/errors.py CHANGED
@@ -14,7 +14,7 @@ class CanaryWordError(Exception):
14
14
 
15
15
 
16
16
  class PromptNotFoundError(Exception):
17
- """The prompt was now found in the registry."""
17
+ """The prompt was not found in the registry."""
18
18
 
19
19
 
20
20
  class InvalidPromptError(Exception):
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
  import importlib
5
5
  import json
6
- from typing import cast
6
+ from typing import TYPE_CHECKING, Any, Callable, cast
7
7
 
8
8
  from jinja2 import TemplateSyntaxError, nodes
9
9
  from jinja2.ext import Extension
@@ -12,6 +12,8 @@ from pydantic import ValidationError
12
12
  from banks.errors import InvalidPromptError, LLMError
13
13
  from banks.types import ChatMessage, Tool
14
14
 
15
+ if TYPE_CHECKING:
16
+ from litellm.types.utils import ChatCompletionMessageToolCall
15
17
  SUPPORTED_KWARGS = ("model",)
16
18
  LITELLM_INSTALL_MSG = "litellm is not installed. Please install it with `pip install litellm`."
17
19
 
@@ -74,7 +76,19 @@ class CompletionExtension(Extension):
74
76
  return nodes.CallBlock(self.call_method("_do_completion_async", args), [], [], body).set_lineno(lineno)
75
77
  return nodes.CallBlock(self.call_method("_do_completion", args), [], [], body).set_lineno(lineno)
76
78
 
77
- def _get_tool_callable(self, tools, tool_call):
79
+ def _get_tool_callable(self, tools: list[Tool], tool_call: "ChatCompletionMessageToolCall") -> Callable[..., Any]:
80
+ """Get the callable function for a tool call.
81
+
82
+ Args:
83
+ tools: List of available tools
84
+ tool_call: The tool call from the LLM response
85
+
86
+ Returns:
87
+ The callable function
88
+
89
+ Raises:
90
+ ValueError: If the function is not found in available tools
91
+ """
78
92
  for tool in tools:
79
93
  if tool.function.name == tool_call.function.name:
80
94
  module_name, func_name = tool.import_path.rsplit(".", maxsplit=1)
banks/filters/__init__.py CHANGED
@@ -3,9 +3,11 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
  from .audio import audio
5
5
  from .cache_control import cache_control
6
+ from .document import document
6
7
  from .image import image
7
8
  from .lemmatize import lemmatize
8
9
  from .tool import tool
10
+ from .video import video
9
11
  from .xml import xml
10
12
 
11
- __all__ = ("cache_control", "image", "lemmatize", "tool", "audio", "xml")
13
+ __all__ = ("cache_control", "image", "lemmatize", "tool", "audio", "video", "document", "xml")
banks/filters/audio.py CHANGED
@@ -1,23 +1,76 @@
1
1
  # SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
+ import re
4
5
  from pathlib import Path
6
+ from typing import cast
7
+ from urllib.parse import urlparse
5
8
 
6
- from banks.types import ContentBlock, InputAudio
9
+ import filetype # type: ignore[import-untyped]
7
10
 
11
+ from banks.types import AudioFormat, ContentBlock, InputAudio, resolve_binary
8
12
 
9
- def audio(value: str) -> str:
13
+ BASE64_AUDIO_REGEX = re.compile(r"audio\/.*;base64,.*")
14
+
15
+
16
+ def _is_url(string: str) -> bool:
17
+ """Check if a string is a URL."""
18
+ result = urlparse(string)
19
+ if not result.scheme:
20
+ return False
21
+
22
+ if not result.netloc:
23
+ # The only valid format when netloc is empty is base64 data urls
24
+ return all([result.scheme == "data", BASE64_AUDIO_REGEX.match(result.path)])
25
+
26
+ return True
27
+
28
+
29
+ def _get_audio_format_from_url(url: str) -> AudioFormat:
30
+ """Extract audio format from URL.
31
+
32
+ Tries to determine format from URL path or defaults to mp3.
33
+ """
34
+ parsed = urlparse(url)
35
+ path = parsed.path.lower()
36
+ for fmt in ("mp3", "wav", "m4a", "webm", "ogg", "flac"):
37
+ if path.endswith(f".{fmt}"):
38
+ return cast(AudioFormat, fmt)
39
+ # Default to mp3 if format cannot be determined
40
+ return "mp3"
41
+
42
+
43
+ def _get_audio_format_from_bytes(data: bytes) -> AudioFormat:
44
+ """Extract audio format from bytes data using filetype library."""
45
+ kind = filetype.guess(data)
46
+ if kind is not None:
47
+ fmt = kind.extension
48
+ if fmt in ("mp3", "wav", "m4a", "webm", "ogg", "flac"):
49
+ return cast(AudioFormat, fmt)
50
+ # Default to mp3 if format cannot be determined
51
+ return "mp3"
52
+
53
+
54
+ def audio(value: str | bytes) -> str:
10
55
  """Wrap the filtered value into a ContentBlock of type audio.
11
56
 
12
57
  The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
13
58
 
59
+ Supports both file paths and URLs (including data URLs).
60
+
14
61
  Example:
15
62
  ```jinja
16
- Describe what you see
17
-
18
- {{ "path/to/audio/file" | audio }}
63
+ {{ "path/to/audio/file.mp3" | audio }}
64
+ {{ "https://example.com/audio.mp3" | audio }}
19
65
  ```
20
66
  """
21
- input_audio = InputAudio.from_path(Path(value))
67
+ if isinstance(value, bytes):
68
+ audio_format = _get_audio_format_from_bytes(resolve_binary(value, as_base64=False))
69
+ input_audio = InputAudio.from_bytes(value, audio_format=audio_format)
70
+ elif _is_url(value):
71
+ audio_format = _get_audio_format_from_url(value)
72
+ input_audio = InputAudio.from_url(value, audio_format)
73
+ else:
74
+ input_audio = InputAudio.from_path(Path(value))
22
75
  block = ContentBlock.model_validate({"type": "audio", "input_audio": input_audio})
23
76
  return f"<content_block>{block.model_dump_json()}</content_block>"
@@ -0,0 +1,135 @@
1
+ # SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import mimetypes
5
+ import re
6
+ from pathlib import Path
7
+ from typing import cast
8
+ from urllib.parse import urlparse
9
+
10
+ import filetype # type: ignore[import-untyped]
11
+
12
+ from banks.types import ContentBlock, DocumentFormat, InputDocument, resolve_binary
13
+
14
+ BASE64_DOCUMENT_REGEX = re.compile(r"(text|application)\/.*;base64,.*")
15
+
16
+
17
+ def _is_url(string: str) -> bool:
18
+ """Check if a string is a URL."""
19
+ result = urlparse(string)
20
+ if not result.scheme:
21
+ return False
22
+
23
+ if not result.netloc:
24
+ # The only valid format when netloc is empty is base64 data urls
25
+ return all([result.scheme == "data", BASE64_DOCUMENT_REGEX.match(result.path)])
26
+
27
+ return True
28
+
29
+
30
+ def _get_document_format_from_url(url: str) -> DocumentFormat:
31
+ """Extract document format from URL.
32
+
33
+ Tries to determine format from URL path or defaults to pdf.
34
+ """
35
+ parsed = urlparse(url)
36
+ path = parsed.path.lower()
37
+ # Gemini supported file types https://ai.google.dev/gemini-api/docs/file-input-methods
38
+ # text/html
39
+ # text/css
40
+ # text/plain
41
+ # text/xml
42
+ # text/csv
43
+ # text/rtf
44
+ # text/javascript
45
+ # application/json
46
+ # application/pdf
47
+
48
+ # Claude supported file types
49
+ # application/pdf
50
+ # text/plain
51
+
52
+ # OpenAI supported file types
53
+ # application/pdf
54
+
55
+ for fmt in (
56
+ "pdf",
57
+ "html",
58
+ "htm",
59
+ "xhtml",
60
+ "css",
61
+ "txt",
62
+ "md",
63
+ "markdown",
64
+ "rst",
65
+ "xml",
66
+ "csv",
67
+ "rtf",
68
+ "js",
69
+ "mjs",
70
+ "cjs",
71
+ "javascript",
72
+ "json",
73
+ ):
74
+ # Because Claude only supports pdf and text, and Gemini only supports a small subset of text formats,
75
+ # we can default to 'txt' for any text-based format that is not pdf. This allows the data to be sent to the llm
76
+ # in an acceptable format, but the LLM should still be able to understand the content: e.g., html, markdown,
77
+ # xml, etc.
78
+ if path.endswith(f".{fmt}"):
79
+ if fmt == "pdf":
80
+ return cast(DocumentFormat, "pdf")
81
+ return "txt"
82
+ mime = mimetypes.guess_type(path)[0]
83
+ if mime is not None and mime.startswith("text/"):
84
+ return "txt"
85
+ # With urls, the likelihood seems sufficiently high that it's probably a pdf if not otherwise indicated
86
+ if mime is None:
87
+ return "pdf"
88
+ # Document type indicated to be other than pdf or text type
89
+ raise ValueError("Unsupported document format: " + path)
90
+
91
+
92
+ def _get_document_format_from_bytes(data: bytes) -> DocumentFormat:
93
+ """Extract document format from bytes data using filetype library."""
94
+ # First check for pdf (only non text based format) and RTF formats (can be detected by file header)
95
+ kind = filetype.guess(data)
96
+ if kind is not None:
97
+ fmt = kind.extension
98
+ if fmt == "pdf":
99
+ return cast(DocumentFormat, fmt)
100
+
101
+ # filetype is good at detecting binary formats, but not text-based ones.
102
+ # So, this is a good indicator that it's text-based.
103
+ # Because Claude only supports pdf and text, and Gemini only supports a small subset of text formats,
104
+ # we can default to 'txt' for any text-based format that is not pdf. This allows the data to be sent to the llm in
105
+ # an acceptable format, but the LLM should still be able to understand the content: e.g., html, markdown, xml, etc.
106
+ # If detecting text types should become desirable, I recommend using something like Google magicka
107
+ if kind is None or kind.extension == "rtf":
108
+ return "txt"
109
+ # There are many common document types (like word, excel, powerpoint, etc.) that are not supported.
110
+ raise ValueError("Unsupported document format: " + kind.extension)
111
+
112
+
113
+ def document(value: str | bytes) -> str:
114
+ """Wrap the filtered value into a ContentBlock of type document.
115
+
116
+ The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
117
+
118
+ Supports both file paths and URLs (including data URLs).
119
+
120
+ Example:
121
+ ```jinja
122
+ {{ "path/to/document/file.pdf" | document }}
123
+ {{ "https://example.com/document.pdf" | document }}
124
+ ```
125
+ """
126
+ if isinstance(value, bytes):
127
+ document_format = _get_document_format_from_bytes(resolve_binary(value, as_base64=False))
128
+ input_document = InputDocument.from_bytes(value, document_format=document_format)
129
+ elif _is_url(value):
130
+ document_format = _get_document_format_from_url(value)
131
+ input_document = InputDocument.from_url(value, document_format)
132
+ else:
133
+ input_document = InputDocument.from_path(Path(value))
134
+ block = ContentBlock.model_validate({"type": "document", "input_document": input_document})
135
+ return f"<content_block>{block.model_dump_json()}</content_block>"
banks/filters/image.py CHANGED
@@ -22,7 +22,7 @@ def _is_url(string: str) -> bool:
22
22
  return True
23
23
 
24
24
 
25
- def image(value: str) -> str:
25
+ def image(value: str | bytes) -> str:
26
26
  """Wrap the filtered value into a ContentBlock of type image.
27
27
 
28
28
  The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
@@ -38,7 +38,9 @@ def image(value: str) -> str:
38
38
  this filter marks the content to cache by surrounding it with `<content_block>` and
39
39
  `</content_block>`, so it's only useful when used within a `{% chat %}` block.
40
40
  """
41
- if _is_url(value):
41
+ if isinstance(value, bytes):
42
+ image_url = ImageUrl.from_bytes(bytes_str=value)
43
+ elif _is_url(value):
42
44
  image_url = ImageUrl(url=value)
43
45
  else:
44
46
  image_url = ImageUrl.from_path(Path(value))
banks/filters/video.py ADDED
@@ -0,0 +1,108 @@
1
+ # SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import re
5
+ from pathlib import Path
6
+ from typing import cast
7
+ from urllib.parse import urlparse
8
+
9
+ import filetype # type: ignore[import-untyped]
10
+ from filetype.types.video import IsoBmff # type: ignore[import-untyped]
11
+
12
+ from banks.types import ContentBlock, InputVideo, VideoFormat, resolve_binary
13
+
14
+ BASE64_VIDEO_REGEX = re.compile(r"video\/.*;base64,.*")
15
+
16
+
17
+ class M3gp(IsoBmff):
18
+ """
19
+ Implements the 3gp video type matcher.
20
+
21
+ The type matcher in the filetype lib does not work correctly for 3gp files,
22
+ so implement our own here.
23
+ """
24
+
25
+ MIME = "video/3gpp"
26
+ EXTENSION = "3gp"
27
+
28
+ def __init__(self):
29
+ super().__init__(mime=M3gp.MIME, extension=M3gp.EXTENSION)
30
+
31
+ def match(self, buf):
32
+ if not self._is_isobmff(buf):
33
+ return False
34
+
35
+ major_brand, _, compatible_brands = self._get_ftyp(buf)
36
+ for brand in compatible_brands:
37
+ if brand in ["3gp4", "3gp5", "3gpp"]:
38
+ return True
39
+ return major_brand in ["3gp4", "3gp5", "3gpp"]
40
+
41
+
42
+ def _is_url(string: str) -> bool:
43
+ """Check if a string is a URL."""
44
+ result = urlparse(string)
45
+ if not result.scheme:
46
+ return False
47
+
48
+ if not result.netloc:
49
+ # The only valid format when netloc is empty is base64 data urls
50
+ return all([result.scheme == "data", BASE64_VIDEO_REGEX.match(result.path)])
51
+
52
+ return True
53
+
54
+
55
+ def _get_video_format_from_url(url: str) -> VideoFormat:
56
+ """Extract video format from URL.
57
+
58
+ Tries to determine format from URL path or defaults to mp4.
59
+ """
60
+ parsed = urlparse(url)
61
+ path = parsed.path.lower()
62
+
63
+ # Based on formats supported by Gemini https://ai.google.dev/gemini-api/docs/video-understanding
64
+ for fmt in ("mp4", "mpeg", "mov", "avi", "flv", "mpg", "webm", "wmv", "3gpp"):
65
+ if path.endswith(f".{fmt}"):
66
+ return cast(VideoFormat, fmt)
67
+ # Default to mp4 if format cannot be determined
68
+ return "mp4"
69
+
70
+
71
+ def _get_video_format_from_bytes(data: bytes) -> VideoFormat:
72
+ """Extract video format from bytes data using filetype library."""
73
+ m3gp = M3gp()
74
+ if m3gp not in filetype.types:
75
+ filetype.add_type(m3gp)
76
+
77
+ kind = filetype.guess(data)
78
+ if kind is not None:
79
+ fmt = kind.extension
80
+ if fmt in ("mp4", "mpg", "mov", "avi", "flv", "webm", "wmv", "3gp"):
81
+ return cast(VideoFormat, fmt)
82
+ # Default to mp4 if format cannot be determined
83
+ return "mp4"
84
+
85
+
86
+ def video(value: str | bytes) -> str:
87
+ """Wrap the filtered value into a ContentBlock of type video.
88
+
89
+ The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
90
+
91
+ Supports both file paths and URLs (including data URLs).
92
+
93
+ Example:
94
+ ```jinja
95
+ {{ "path/to/video/file.mp4" | video }}
96
+ {{ "https://example.com/video.mp4" | video }}
97
+ ```
98
+ """
99
+ if isinstance(value, bytes):
100
+ video_format = _get_video_format_from_bytes(resolve_binary(value, as_base64=False))
101
+ input_video = InputVideo.from_bytes(value, video_format=video_format)
102
+ elif _is_url(value):
103
+ video_format = _get_video_format_from_url(value)
104
+ input_video = InputVideo.from_url(value, video_format)
105
+ else:
106
+ input_video = InputVideo.from_path(Path(value))
107
+ block = ContentBlock.model_validate({"type": "video", "input_video": input_video})
108
+ return f"<content_block>{block.model_dump_json()}</content_block>"
banks/prompt.py CHANGED
@@ -81,8 +81,17 @@ class BasePrompt:
81
81
 
82
82
  @property
83
83
  def variables(self) -> set[str]:
84
- ast = env.parse(self.raw)
85
- return meta.find_undeclared_variables(ast)
84
+ try:
85
+ ast = env.parse(self.raw)
86
+ return meta.find_undeclared_variables(ast)
87
+ except Exception as e:
88
+ from jinja2 import TemplateSyntaxError
89
+
90
+ if isinstance(e, TemplateSyntaxError):
91
+ raise
92
+ # Re-raise as TemplateSyntaxError for consistency
93
+ msg = f"Failed to parse template: {e}"
94
+ raise TemplateSyntaxError(msg, 0) from e
86
95
 
87
96
  def canary_leaked(self, text: str) -> bool:
88
97
  """Returns whether the canary word is present in `text`, signalling the prompt might have leaked."""
banks/types.py CHANGED
@@ -5,11 +5,14 @@ from __future__ import annotations
5
5
 
6
6
  import base64
7
7
  import re
8
+ from base64 import b64decode, b64encode
9
+ from binascii import Error as BinasciiError
8
10
  from enum import Enum
9
11
  from inspect import Parameter, getdoc, signature
10
12
  from pathlib import Path
11
13
  from typing import Callable, Literal, Union, cast
12
14
 
15
+ import filetype # type: ignore[import-untyped]
13
16
  from pydantic import BaseModel
14
17
  from typing_extensions import Self
15
18
 
@@ -19,10 +22,37 @@ from .utils import parse_params_from_docstring, python_type_to_jsonschema
19
22
  CONTENT_BLOCK_REGEX = re.compile(r"(<content_block>\{.*?\}<\/content_block>)|([^<](?:(?!<content_block>)[\s\S])*)")
20
23
 
21
24
 
25
+ def resolve_binary(bytes_str: bytes, *, as_base64: bool = True) -> bytes:
26
+ """
27
+ Resolve binary data between base64 and raw bytes.
28
+
29
+ Args:
30
+ bytes_str: Bytes data
31
+ as_base64: Whether to return base64 encoded bytes or raw bytes
32
+
33
+ Returns:
34
+ b64 encoded bytes if input is not base64 encoded, else returns input as is.
35
+ """
36
+ # check if bytes_str is base64 encoded
37
+ try:
38
+ # Check if raw_bytes is already base64 encoded.
39
+ # b64decode() can succeed on random binary data, so we
40
+ # pass verify=True to make sure it's not a false positive
41
+ raw_bytes = base64.b64decode(bytes_str, validate=True)
42
+ b64_bytes = bytes_str
43
+ except BinasciiError:
44
+ # b64decode failed, leave as is
45
+ raw_bytes = bytes_str
46
+ b64_bytes = b64encode(bytes_str)
47
+ return b64_bytes if as_base64 else raw_bytes
48
+
49
+
22
50
  class ContentBlockType(str, Enum):
23
51
  text = "text"
24
52
  image_url = "image_url"
25
53
  audio = "audio"
54
+ video = "video"
55
+ document = "document"
26
56
 
27
57
 
28
58
  class CacheControl(BaseModel):
@@ -32,6 +62,14 @@ class CacheControl(BaseModel):
32
62
  class ImageUrl(BaseModel):
33
63
  url: str
34
64
 
65
+ @staticmethod
66
+ def _mimetype_from_bytes(raw_bytes: bytes) -> str:
67
+ kind = filetype.guess(raw_bytes)
68
+ if kind is not None:
69
+ return kind.mime
70
+ # Default to jpeg if format cannot be determined
71
+ return "image/jpeg"
72
+
35
73
  @classmethod
36
74
  def from_base64(cls, media_type: str, base64_str: str) -> Self:
37
75
  return cls(url=f"data:{media_type};base64,{base64_str}")
@@ -39,10 +77,31 @@ class ImageUrl(BaseModel):
39
77
  @classmethod
40
78
  def from_path(cls, file_path: Path) -> Self:
41
79
  with open(file_path, "rb") as image_file:
42
- return cls.from_base64("image/jpeg", base64.b64encode(image_file.read()).decode("utf-8"))
80
+ raw_bytes = image_file.read()
81
+ mimetype = cls._mimetype_from_bytes(raw_bytes)
82
+ return cls.from_base64(mimetype, base64.b64encode(raw_bytes).decode("utf-8"))
83
+
84
+ @classmethod
85
+ def from_bytes(cls, bytes_str: bytes) -> Self:
86
+ """Create ImageUrl from bytes
87
+ Args:
88
+ bytes_str: Bytes data
89
+ Returns:
90
+ ImageUrl instance with base64 encoded bytes as URL
91
+ """
92
+ b64_bytes = resolve_binary(bytes_str)
93
+ mimetype = cls._mimetype_from_bytes(b64decode(b64_bytes))
94
+ return cls.from_base64(mimetype, b64_bytes.decode("utf-8"))
43
95
 
44
96
 
45
97
  AudioFormat = Literal["mp3", "wav", "m4a", "webm", "ogg", "flac"]
98
+ VideoFormat = Literal["mp4", "mpg", "mov", "avi", "flv", "webm", "wmv", "3gp", "3gpp"]
99
+ # Because Claude only supports pdf and text, and Gemini only supports a small subset of text formats,
100
+ # we can default to 'txt' for any text-based format that is not pdf. This allows the data to be sent to the llm
101
+ # in an acceptable format, but the LLM should still be able to understand the content: e.g., html, markdown,
102
+ # xml, etc.
103
+ # If detecting text types should become desirable, I recommend using something like Google magicka
104
+ DocumentFormat = Literal["pdf", "txt"]
46
105
 
47
106
 
48
107
  class InputAudio(BaseModel):
@@ -56,6 +115,114 @@ class InputAudio(BaseModel):
56
115
  file_format = cast(AudioFormat, file_path.suffix[1:])
57
116
  return cls(data=encoded_str, format=file_format)
58
117
 
118
+ @classmethod
119
+ def from_url(cls, url: str, audio_format: AudioFormat) -> Self:
120
+ """Create InputAudio from a URL.
121
+
122
+ Args:
123
+ url: The URL to the audio file
124
+ audio_format: The audio format
125
+
126
+ Returns:
127
+ InputAudio instance with the URL as data
128
+ """
129
+ return cls(data=url, format=audio_format)
130
+
131
+ @classmethod
132
+ def from_bytes(cls, bytes_str: bytes, audio_format: AudioFormat) -> Self:
133
+ """Create InputAudio from bytes
134
+
135
+ Args:
136
+ bytes_str: Bytes data
137
+ audio_format: The audio format
138
+
139
+ Returns:
140
+ InputAudio instance with base64 encoded bytes as data
141
+ """
142
+ b64_bytes = resolve_binary(bytes_str)
143
+ encoded_str = b64_bytes.decode("utf-8")
144
+ return cls(data=encoded_str, format=audio_format)
145
+
146
+
147
+ class InputVideo(BaseModel):
148
+ data: str
149
+ format: VideoFormat
150
+
151
+ @classmethod
152
+ def from_path(cls, file_path: Path) -> Self:
153
+ with open(file_path, "rb") as video_file:
154
+ encoded_str = base64.b64encode(video_file.read()).decode("utf-8")
155
+ file_format = cast(VideoFormat, file_path.suffix[1:])
156
+ return cls(data=encoded_str, format=file_format)
157
+
158
+ @classmethod
159
+ def from_url(cls, url: str, video_format: VideoFormat) -> Self:
160
+ """Create InputVideo from a URL.
161
+
162
+ Args:
163
+ url: The URL to the audio file
164
+ video_format: The audio format
165
+
166
+ Returns:
167
+ InputVideo instance with the URL as data
168
+ """
169
+ return cls(data=url, format=video_format)
170
+
171
+ @classmethod
172
+ def from_bytes(cls, bytes_str: bytes, video_format: VideoFormat) -> Self:
173
+ """Create InputVideo from bytes
174
+
175
+ Args:
176
+ bytes_str: Bytes data
177
+ video_format: The video format
178
+
179
+ Returns:
180
+ InputVideo instance with base64 encoded bytes as data
181
+ """
182
+ b64_bytes = resolve_binary(bytes_str)
183
+ encoded_str = b64_bytes.decode("utf-8")
184
+ return cls(data=encoded_str, format=video_format)
185
+
186
+
187
+ class InputDocument(BaseModel):
188
+ data: str
189
+ format: DocumentFormat
190
+
191
+ @classmethod
192
+ def from_path(cls, file_path: Path) -> Self:
193
+ with open(file_path, "rb") as document_file:
194
+ encoded_str = base64.b64encode(document_file.read()).decode("utf-8")
195
+ file_format = cast(DocumentFormat, file_path.suffix[1:])
196
+ return cls(data=encoded_str, format=file_format)
197
+
198
+ @classmethod
199
+ def from_url(cls, url: str, document_format: DocumentFormat) -> Self:
200
+ """Create InputDocument from a URL.
201
+
202
+ Args:
203
+ url: The URL to the document file
204
+ document_format: The document format
205
+
206
+ Returns:
207
+ InputDocument instance with the URL as data
208
+ """
209
+ return cls(data=url, format=document_format)
210
+
211
+ @classmethod
212
+ def from_bytes(cls, bytes_str: bytes, document_format: DocumentFormat) -> Self:
213
+ """Create InputDocument from bytes
214
+
215
+ Args:
216
+ bytes_str: Bytes data
217
+ document_format: The document format
218
+
219
+ Returns:
220
+ InputDocument instance with base64 encoded bytes as data
221
+ """
222
+ b64_bytes = resolve_binary(bytes_str)
223
+ encoded_str = b64_bytes.decode("utf-8")
224
+ return cls(data=encoded_str, format=document_format)
225
+
59
226
 
60
227
  class ContentBlock(BaseModel):
61
228
  type: ContentBlockType
@@ -63,6 +230,8 @@ class ContentBlock(BaseModel):
63
230
  text: str | None = None
64
231
  image_url: ImageUrl | None = None
65
232
  input_audio: InputAudio | None = None
233
+ input_video: InputVideo | None = None
234
+ input_document: InputDocument | None = None
66
235
 
67
236
 
68
237
  ChatMessageContent = Union[list[ContentBlock], str]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: banks
3
- Version: 2.2.0
3
+ Version: 2.4.0
4
4
  Summary: A prompt programming language
5
5
  Project-URL: Documentation, https://github.com/masci/banks#readme
6
6
  Project-URL: Issues, https://github.com/masci/banks/issues
@@ -15,11 +15,13 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
18
19
  Classifier: Programming Language :: Python :: Implementation :: CPython
19
20
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
21
  Requires-Python: >=3.9
21
22
  Requires-Dist: deprecated
22
23
  Requires-Dist: eval-type-backport; python_version < '3.10'
24
+ Requires-Dist: filetype>=1.2.0
23
25
  Requires-Dist: griffe
24
26
  Requires-Dist: jinja2
25
27
  Requires-Dist: platformdirs
@@ -125,11 +127,11 @@ print(p.chat_messages({"persona": "helpful assistant"}))
125
127
  # [
126
128
  # ChatMessage(role='system', content=[
127
129
  # ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='You are a helpful assistant.',
128
- # image_url=None, input_audio=None)
130
+ # image_url=None, input_audio=None, input_video=None, input_document=None)
129
131
  # ], tool_call_id=None, name=None),
130
132
  # ChatMessage(role='user', content=[
131
133
  # ContentBlock(type=<ContentBlockType.text: 'text'>, cache_control=None, text='Hello, how are you?',
132
- # image_url=None, input_audio=None)
134
+ # image_url=None, input_audio=None, input_video=None, input_document=None)
133
135
  # ], tool_call_id=None, name=None)
134
136
  # ]
135
137
  ```
@@ -0,0 +1,30 @@
1
+ banks/__about__.py,sha256=Kbqara7VPF_4Txd0QVMWDNL5EqtjtYKEQFgzvx2Dqgc,132
2
+ banks/__init__.py,sha256=4IBopxXstFZliCvSjOuTurSQb32Vy26EXOPhmNZ4Hus,334
3
+ banks/cache.py,sha256=uUGAu82-mfrscc2q24x19ZMZBkoQzf3hh7_V300J-Ik,1069
4
+ banks/config.py,sha256=Ry2pdba1pgepsfmj41hxMdfQCT9pMjdV6TRWySpMiSY,1192
5
+ banks/env.py,sha256=oGC4wjmF0-NTwoi49NooCLqqT44htn6EWDnHsDl2n0I,1347
6
+ banks/errors.py,sha256=EnKRBhHmo8KEcSg3YoDBtVEaooJj9uSqRV1wnzUtrJU,580
7
+ banks/prompt.py,sha256=LXaDGHBywFkD_JmWPQOXqyuQKZgozzDIb5lSZ12xf5A,8505
8
+ banks/types.py,sha256=p74ZUJfH0vZnneOEsAmiHoDGfsbjNLO-VDHYPuov2Yo,11347
9
+ banks/utils.py,sha256=ZetGG3qhXMYOitDZQCWbE33wHEqR0ih2ZEg_dIW8OeI,1827
10
+ banks/extensions/__init__.py,sha256=Lx4UrOzywYQY7a8qvIqvc3ql54nwK0lNP7x3jYdbREY,110
11
+ banks/extensions/chat.py,sha256=VV6UV1wQZcJ0KbIFHSFmDeptWtww4o2IXF5pXB6TpTM,2478
12
+ banks/extensions/completion.py,sha256=p6NdzA5kOuWZ0BIcGQH86Ji4Z4PFz0-h_G2cHgKdYvw,7861
13
+ banks/extensions/docs.py,sha256=vWOZvu2JoS4LwUG-BR3jPqThirYvu3Fdba331UxooYM,1098
14
+ banks/filters/__init__.py,sha256=fcAlKqgDSX19JDQHfTTOtpotxbdC84QbcFF3dKTtEog,430
15
+ banks/filters/audio.py,sha256=x1mWEpzSN2mc_HAUEaNEOUgS_Vh7Wa1VPwRFkRy4oG0,2574
16
+ banks/filters/cache_control.py,sha256=aOGOIzuqasV_TcuFaaXbaoGhA2W9YTFuz7wkatyjXRU,962
17
+ banks/filters/document.py,sha256=hs2IO6d-xcLyTHH50bpzA3848bJLpQYjUkEgjOBYGqE,4893
18
+ banks/filters/image.py,sha256=0t4u2El2Gi92C1qlY_0ji5OpPnjJfTn67SXb2mCIOl8,1507
19
+ banks/filters/lemmatize.py,sha256=Yvp8M4HCx6C0nrcu3UEMtjJUwsyVYI6GQDYOG4S6EEw,887
20
+ banks/filters/tool.py,sha256=i8ukSDYw54ksShVJ2abfRQAiKzKrqUtmgBB1H04cig0,475
21
+ banks/filters/video.py,sha256=MFni5um9Xnq8Sxf6ZBTN5GsKAC6f73CLrqFAGaE2pkk,3531
22
+ banks/filters/xml.py,sha256=uQ_2zfCf8NhpdbF8F5HS7URXvDzsxfg-TEIVGufZbM0,1991
23
+ banks/registries/__init__.py,sha256=iRK-8420cKBckOTd5KcIFQyV66EsF0Mc7UHCkzf8qZU,255
24
+ banks/registries/directory.py,sha256=gRFO7fl9yXHt2NJ1pDA2wPSQtlORhSw1GKWxSTyFzE8,6055
25
+ banks/registries/file.py,sha256=8ayvFrcM8Tk0DWgGXmKD2DRBfGXr5CmgtdQaQ5cXhow,4054
26
+ banks/registries/redis.py,sha256=eBL92URJa-NegOxRLS4b2xrDRDxz6iiaz_7Ddi32Rtc,2756
27
+ banks-2.4.0.dist-info/METADATA,sha256=D94dNSjkJj9vXMecMp7lTiGS_7sM4ipxyRvtbkn8aYs,12258
28
+ banks-2.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
29
+ banks-2.4.0.dist-info/licenses/LICENSE.txt,sha256=NZJne_JTwMFwq_g-kq-sm4PuaeVOgu1l3NUGOgBHX-g,1102
30
+ banks-2.4.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,28 +0,0 @@
1
- banks/__about__.py,sha256=77xoXWQ484-a4X30IBRswGGTfRx8t_BwItzAm7NTXwI,132
2
- banks/__init__.py,sha256=4IBopxXstFZliCvSjOuTurSQb32Vy26EXOPhmNZ4Hus,334
3
- banks/cache.py,sha256=uUGAu82-mfrscc2q24x19ZMZBkoQzf3hh7_V300J-Ik,1069
4
- banks/config.py,sha256=c6B1cXUZ-NN0XmJvfezXeHPXHP7knk8TfbmcZL7gCzk,1082
5
- banks/env.py,sha256=XOSz6QGNSRaqnIdKWhH5U-ci8Tfi1mDyJHit_aE27Ro,1266
6
- banks/errors.py,sha256=I5cgsa7wtolRVKBSq_aH5xs27yVcErBlMyUswCnM-es,580
7
- banks/prompt.py,sha256=RhPq3wpE-AiCfCftZpPFj2HXGdazwYD502Pr1e-j7FY,8162
8
- banks/types.py,sha256=03x7E7FPVfuN39xY--c0fKumnyVUVzNrq9pgG5R-pAU,5520
9
- banks/utils.py,sha256=ZetGG3qhXMYOitDZQCWbE33wHEqR0ih2ZEg_dIW8OeI,1827
10
- banks/extensions/__init__.py,sha256=Lx4UrOzywYQY7a8qvIqvc3ql54nwK0lNP7x3jYdbREY,110
11
- banks/extensions/chat.py,sha256=VV6UV1wQZcJ0KbIFHSFmDeptWtww4o2IXF5pXB6TpTM,2478
12
- banks/extensions/completion.py,sha256=kF55PiNxjqpslUTAd46H4jOy0eFiLLm5hEcwxS4_oxs,7356
13
- banks/extensions/docs.py,sha256=vWOZvu2JoS4LwUG-BR3jPqThirYvu3Fdba331UxooYM,1098
14
- banks/filters/__init__.py,sha256=MMNxopwecFHW4LA76NwL2JQkdddIAGbKOaHUHG1JQs8,353
15
- banks/filters/audio.py,sha256=2vTPdpDo8FVQsl0WiPlXskwMCGnF8zKwWXfq1fYQzws,726
16
- banks/filters/cache_control.py,sha256=aOGOIzuqasV_TcuFaaXbaoGhA2W9YTFuz7wkatyjXRU,962
17
- banks/filters/image.py,sha256=Ls1fWCgRx0YLGIFx7hdKtR1skY575jDWlCESP0zV1Bs,1407
18
- banks/filters/lemmatize.py,sha256=Yvp8M4HCx6C0nrcu3UEMtjJUwsyVYI6GQDYOG4S6EEw,887
19
- banks/filters/tool.py,sha256=i8ukSDYw54ksShVJ2abfRQAiKzKrqUtmgBB1H04cig0,475
20
- banks/filters/xml.py,sha256=uQ_2zfCf8NhpdbF8F5HS7URXvDzsxfg-TEIVGufZbM0,1991
21
- banks/registries/__init__.py,sha256=iRK-8420cKBckOTd5KcIFQyV66EsF0Mc7UHCkzf8qZU,255
22
- banks/registries/directory.py,sha256=gRFO7fl9yXHt2NJ1pDA2wPSQtlORhSw1GKWxSTyFzE8,6055
23
- banks/registries/file.py,sha256=8ayvFrcM8Tk0DWgGXmKD2DRBfGXr5CmgtdQaQ5cXhow,4054
24
- banks/registries/redis.py,sha256=eBL92URJa-NegOxRLS4b2xrDRDxz6iiaz_7Ddi32Rtc,2756
25
- banks-2.2.0.dist-info/METADATA,sha256=m2W5swzWGUFGr0uNCFAgOqFtxv4A7fMyjsC34GDBTsk,12098
26
- banks-2.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
27
- banks-2.2.0.dist-info/licenses/LICENSE.txt,sha256=NZJne_JTwMFwq_g-kq-sm4PuaeVOgu1l3NUGOgBHX-g,1102
28
- banks-2.2.0.dist-info/RECORD,,