banks 2.3.0__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banks-2.4.0/.github/workflows/release.yml +79 -0
- banks-2.3.0/CLAUDE.md → banks-2.4.0/AGENTS.md +114 -24
- banks-2.4.0/CLAUDE.md +3 -0
- {banks-2.3.0 → banks-2.4.0}/PKG-INFO +2 -1
- {banks-2.3.0 → banks-2.4.0}/pyproject.toml +3 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/__about__.py +1 -1
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/audio.py +19 -3
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/document.py +45 -6
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/image.py +4 -2
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/video.py +49 -3
- {banks-2.3.0 → banks-2.4.0}/src/banks/types.py +103 -3
- {banks-2.3.0 → banks-2.4.0}/tests/test_audio.py +49 -1
- banks-2.4.0/tests/test_document.py +167 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_image.py +35 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_video.py +54 -1
- banks-2.3.0/.github/workflows/release.yml +0 -33
- banks-2.3.0/tests/test_document.py +0 -74
- {banks-2.3.0 → banks-2.4.0}/.github/workflows/docs.yml +0 -0
- {banks-2.3.0 → banks-2.4.0}/.github/workflows/test.yml +0 -0
- {banks-2.3.0 → banks-2.4.0}/.gitignore +0 -0
- {banks-2.3.0 → banks-2.4.0}/CITATION.cff +0 -0
- {banks-2.3.0 → banks-2.4.0}/CODE_OF_CONDUCT.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/CONTRIBUTING.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/LICENSE.txt +0 -0
- {banks-2.3.0 → banks-2.4.0}/MANIFEST.in +0 -0
- {banks-2.3.0 → banks-2.4.0}/README.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/assets/banks.png +0 -0
- {banks-2.3.0 → banks-2.4.0}/cookbook/Prompt_Caching_with_Anthropic.ipynb +0 -0
- {banks-2.3.0 → banks-2.4.0}/cookbook/Prompt_Versioning.ipynb +0 -0
- {banks-2.3.0 → banks-2.4.0}/cookbook/in_prompt_completion.ipynb +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/config.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/examples.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/index.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/prompt.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/python.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/docs/registry.md +0 -0
- {banks-2.3.0 → banks-2.4.0}/mkdocs.yml +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/cache.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/config.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/env.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/errors.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/extensions/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/extensions/chat.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/extensions/completion.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/extensions/docs.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/cache_control.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/lemmatize.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/tool.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/filters/xml.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/prompt.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/registries/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/registries/directory.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/registries/file.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/registries/redis.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/src/banks/utils.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/conftest.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/data/1x1.pdf +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/data/1x1.png +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/data/empty.mov +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/data/empty.wav +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/e2e/__init__.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/e2e/conftest.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/e2e/test_completion.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/e2e/test_function_calling.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/templates/blog.jinja +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/templates/cache.jinja +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/templates/chat.jinja +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/templates/summarize.jinja +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/templates/summarize_lemma.jinja +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_cache.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_cache_control.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_chat.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_completion.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_config.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_directory_registry.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_file_registry.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_prompt.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_redis_registry.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_tool.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_types.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_utils.py +0 -0
- {banks-2.3.0 → banks-2.4.0}/tests/test_xml.py +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
name: PyPI Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
bump:
|
|
7
|
+
description: "Version bump type"
|
|
8
|
+
required: true
|
|
9
|
+
type: choice
|
|
10
|
+
options:
|
|
11
|
+
- MINOR
|
|
12
|
+
- BUGFIX
|
|
13
|
+
default: BUGFIX
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
release:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
permissions:
|
|
19
|
+
contents: write
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
with:
|
|
25
|
+
ref: ${{ github.event.repository.default_branch }}
|
|
26
|
+
|
|
27
|
+
- name: Bump version
|
|
28
|
+
run: |
|
|
29
|
+
CURRENT=$(sed -n 's/^__version__ = "\([0-9.]*\).*/\1/p' src/banks/__about__.py)
|
|
30
|
+
IFS=. read -r major minor patch <<< "$CURRENT"
|
|
31
|
+
case "${{ github.event.inputs.bump }}" in
|
|
32
|
+
BUGFIX)
|
|
33
|
+
patch=$((patch + 1))
|
|
34
|
+
;;
|
|
35
|
+
MINOR)
|
|
36
|
+
minor=$((minor + 1))
|
|
37
|
+
patch=0
|
|
38
|
+
;;
|
|
39
|
+
*)
|
|
40
|
+
echo "Unexpected bump type: ${{ github.event.inputs.bump }}"
|
|
41
|
+
exit 1
|
|
42
|
+
;;
|
|
43
|
+
esac
|
|
44
|
+
VERSION="${major}.${minor}.${patch}"
|
|
45
|
+
echo "VERSION=${VERSION}" >> "$GITHUB_ENV"
|
|
46
|
+
echo "Bumped ${CURRENT} -> ${VERSION} (${{ github.event.inputs.bump }})"
|
|
47
|
+
|
|
48
|
+
- name: Update __about__.py on default branch
|
|
49
|
+
run: |
|
|
50
|
+
git config user.name "github-actions[bot]"
|
|
51
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
52
|
+
sed -i "s/^__version__ = .*$/__version__ = \"${VERSION}\"/" src/banks/__about__.py
|
|
53
|
+
git diff --quiet && exit 0
|
|
54
|
+
git add src/banks/__about__.py
|
|
55
|
+
git commit -m "chore: set __version__ to ${VERSION} [skip ci]"
|
|
56
|
+
git push origin "${{ github.event.repository.default_branch }}"
|
|
57
|
+
|
|
58
|
+
- name: Create and push tag
|
|
59
|
+
run: |
|
|
60
|
+
git tag "v${VERSION}"
|
|
61
|
+
git push origin "v${VERSION}"
|
|
62
|
+
|
|
63
|
+
- name: Install Hatch
|
|
64
|
+
run: pip install hatch
|
|
65
|
+
|
|
66
|
+
- name: Publish on PyPi
|
|
67
|
+
env:
|
|
68
|
+
HATCH_INDEX_USER: __token__
|
|
69
|
+
HATCH_INDEX_AUTH: ${{ secrets.PYPI_API_TOKEN }}
|
|
70
|
+
run: |
|
|
71
|
+
hatch build
|
|
72
|
+
hatch publish -y
|
|
73
|
+
|
|
74
|
+
- name: Create GitHub Release
|
|
75
|
+
uses: ncipollo/release-action@v1
|
|
76
|
+
with:
|
|
77
|
+
tag: v${{ env.VERSION }}
|
|
78
|
+
artifacts: "dist/*"
|
|
79
|
+
generateReleaseNotes: true
|
|
@@ -1,21 +1,33 @@
|
|
|
1
|
-
#
|
|
1
|
+
# AGENTS.md
|
|
2
2
|
|
|
3
|
-
This file provides guidance to
|
|
3
|
+
This file provides guidance to AI coding assistants when working with code in this repository.
|
|
4
4
|
|
|
5
5
|
## Project Overview
|
|
6
6
|
|
|
7
7
|
Banks is a Python prompt programming language and templating system for LLM applications. It provides a Jinja2-based template engine with specialized extensions and filters for creating dynamic prompts, managing chat messages, handling multimodal content (images/audio/video/documents), and integrating with various LLM providers through LiteLLM.
|
|
8
8
|
|
|
9
|
+
## Quick Reference
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Most common commands
|
|
13
|
+
hatch run test # Run unit tests
|
|
14
|
+
hatch run lint:all # Run all linting checks
|
|
15
|
+
hatch run lint:fmt # Auto-format code
|
|
16
|
+
hatch run test tests/test_foo.py # Run specific test file
|
|
17
|
+
```
|
|
18
|
+
|
|
9
19
|
## Development Commands
|
|
10
20
|
|
|
11
21
|
### Testing
|
|
12
22
|
- Run tests: `hatch run test`
|
|
13
|
-
- Run tests with coverage: `hatch run test-cov`
|
|
23
|
+
- Run tests with coverage: `hatch run test-cov`
|
|
14
24
|
- Generate coverage report: `hatch run cov`
|
|
15
|
-
- Run
|
|
25
|
+
- Run specific test file: `hatch run test tests/test_foo.py`
|
|
26
|
+
- Run e2e tests: `hatch run test tests/e2e/` (requires API keys)
|
|
16
27
|
|
|
17
28
|
### Linting and Type Checking
|
|
18
29
|
- Format code: `hatch run lint:fmt`
|
|
30
|
+
- Auto-fix lint issues: `hatch run lint:fix`
|
|
19
31
|
- Check formatting: `hatch run lint:check`
|
|
20
32
|
- Run type checking: `hatch run lint:typing`
|
|
21
33
|
- Run pylint: `hatch run lint:lint`
|
|
@@ -27,17 +39,17 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
27
39
|
|
|
28
40
|
### Environment Management
|
|
29
41
|
- All commands use Hatch environments with automatic dependency management
|
|
30
|
-
-
|
|
31
|
-
- Python 3.
|
|
42
|
+
- Uses `uv` as the installer for faster dependency resolution
|
|
43
|
+
- Python 3.9+ supported (tested on 3.10-3.14)
|
|
32
44
|
|
|
33
45
|
## Architecture Overview
|
|
34
46
|
|
|
35
47
|
### Core Components
|
|
36
48
|
|
|
37
49
|
**Prompt Classes** (`src/banks/prompt.py`):
|
|
38
|
-
- `BasePrompt`: Base class with
|
|
50
|
+
- `BasePrompt`: Base class with template rendering, metadata, versioning, and caching
|
|
39
51
|
- `Prompt`: Synchronous prompt rendering with `text()` and `chat_messages()` methods
|
|
40
|
-
- `AsyncPrompt`: Asynchronous version
|
|
52
|
+
- `AsyncPrompt`: Asynchronous version (requires `BANKS_ASYNC_ENABLED=true`)
|
|
41
53
|
- `PromptRegistry`: Protocol interface for prompt storage backends
|
|
42
54
|
|
|
43
55
|
**Type System** (`src/banks/types.py`):
|
|
@@ -51,13 +63,21 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
51
63
|
- Async support detection and configuration
|
|
52
64
|
- Custom template loader integration
|
|
53
65
|
|
|
66
|
+
**Error Types** (`src/banks/errors.py`):
|
|
67
|
+
- `MissingDependencyError`: Optional dependencies not installed
|
|
68
|
+
- `AsyncError`: Asyncio support misconfiguration
|
|
69
|
+
- `CanaryWordError`: Canary word leaked (prompt injection detection)
|
|
70
|
+
- `PromptNotFoundError`: Prompt not found in registry
|
|
71
|
+
- `InvalidPromptError`: Invalid prompt format
|
|
72
|
+
- `LLMError`: LLM provider errors
|
|
73
|
+
|
|
54
74
|
### Extensions System
|
|
55
75
|
|
|
56
76
|
**Chat Extension** (`src/banks/extensions/chat.py`):
|
|
57
77
|
- `{% chat role="..." %}...{% endchat %}` blocks for structured message creation
|
|
58
78
|
- Automatic conversion to `ChatMessage` objects during rendering
|
|
59
79
|
|
|
60
|
-
**Completion Extension** (`src/banks/extensions/completion.py`):
|
|
80
|
+
**Completion Extension** (`src/banks/extensions/completion.py`):
|
|
61
81
|
- `{% completion model="..." %}...{% endcompletion %}` for in-prompt LLM calls
|
|
62
82
|
- Integrated with LiteLLM for multi-provider support
|
|
63
83
|
- Function calling support within completion blocks
|
|
@@ -65,39 +85,49 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
65
85
|
### Filters System
|
|
66
86
|
|
|
67
87
|
**Core Filters** (`src/banks/filters/`):
|
|
68
|
-
- `image`: Convert file paths/URLs to base64-encoded image content blocks
|
|
69
|
-
- `audio`: Convert audio files to base64-encoded audio content blocks
|
|
88
|
+
- `image`: Convert file paths/URLs/bytes to base64-encoded image content blocks
|
|
89
|
+
- `audio`: Convert audio files to base64-encoded audio content blocks
|
|
70
90
|
- `video`: Convert video files to base64-encoded video content blocks
|
|
71
91
|
- `document`: Convert documents (PDF, TXT, HTML, CSS, XML, CSV, RTF, JS, JSON) to base64-encoded content blocks
|
|
72
92
|
- `cache_control`: Add Anthropic cache control metadata to content blocks
|
|
73
93
|
- `tool`: Convert Python callables to LLM function call schemas
|
|
74
94
|
- `lemmatize`: Text lemmatization using simplemma
|
|
75
95
|
|
|
96
|
+
**Filter Pattern**: Filters wrap content in `<content_block>` tags and are only useful within `{% chat %}` blocks.
|
|
97
|
+
|
|
76
98
|
### Registry System
|
|
77
99
|
|
|
78
100
|
**Storage Backends** (`src/banks/registries/`):
|
|
79
101
|
- `DirectoryTemplateRegistry`: File system-based prompt storage
|
|
80
|
-
- `FileTemplateRegistry`: Single file-based storage
|
|
102
|
+
- `FileTemplateRegistry`: Single file-based storage
|
|
81
103
|
- `RedisTemplateRegistry`: Redis-backed storage for distributed scenarios
|
|
82
104
|
- All registries implement the `PromptRegistry` protocol
|
|
83
105
|
|
|
106
|
+
### Caching System
|
|
107
|
+
|
|
108
|
+
**Render Cache** (`src/banks/cache.py`):
|
|
109
|
+
- `RenderCache`: Protocol interface for caching rendered prompts
|
|
110
|
+
- `DefaultCache`: In-memory cache using pickle-serialized context as key
|
|
111
|
+
- Prevents re-rendering identical template + context combinations
|
|
112
|
+
|
|
84
113
|
### Configuration
|
|
85
114
|
|
|
86
115
|
**Config System** (`src/banks/config.py`):
|
|
87
116
|
- Environment variable-based configuration with `BANKS_` prefix
|
|
88
|
-
- `BANKS_ASYNC_ENABLED`: Enable async template rendering
|
|
117
|
+
- `BANKS_ASYNC_ENABLED`: Enable async template rendering (must be set before import)
|
|
89
118
|
- `BANKS_USER_DATA_PATH`: Custom user data directory
|
|
90
119
|
|
|
91
120
|
## Key Development Patterns
|
|
92
121
|
|
|
93
122
|
### Template Rendering Flow
|
|
94
123
|
1. Templates parsed by Jinja2 environment with Banks extensions
|
|
95
|
-
2. Chat blocks converted to JSON during rendering
|
|
124
|
+
2. Chat blocks converted to JSON during rendering
|
|
96
125
|
3. `chat_messages()` parses JSON back to `ChatMessage` objects
|
|
97
126
|
4. Caching layer prevents re-rendering identical contexts
|
|
98
127
|
|
|
99
128
|
### Multimodal Content Handling
|
|
100
129
|
- Images/audio/video/documents converted to base64 during filter application
|
|
130
|
+
- Filters accept file paths, URLs, or raw bytes
|
|
101
131
|
- Content blocks maintain type safety and metadata
|
|
102
132
|
- Cache control integrated at content block level
|
|
103
133
|
|
|
@@ -106,23 +136,83 @@ Banks is a Python prompt programming language and templating system for LLM appl
|
|
|
106
136
|
- Docstring parsing for parameter descriptions
|
|
107
137
|
- Type annotations converted to JSON Schema
|
|
108
138
|
|
|
109
|
-
### Async Support Architecture
|
|
139
|
+
### Async Support Architecture
|
|
110
140
|
- Global environment state requires async decision at import time
|
|
111
141
|
- `BANKS_ASYNC_ENABLED` must be set before importing banks modules
|
|
112
142
|
- `AsyncPrompt` provides `await`-able rendering methods
|
|
113
143
|
|
|
114
|
-
## Testing
|
|
144
|
+
## Testing
|
|
145
|
+
|
|
146
|
+
### Test Markers
|
|
147
|
+
- `@pytest.mark.e2e`: End-to-end tests requiring external services
|
|
148
|
+
- `@pytest.mark.redis`: Tests requiring a running Redis instance
|
|
149
|
+
|
|
150
|
+
### Required Environment Variables for E2E Tests
|
|
151
|
+
- `OPENAI_API_KEY`: For OpenAI-based tests
|
|
152
|
+
- `ANTHROPIC_API_KEY`: For Anthropic-based tests
|
|
153
|
+
|
|
154
|
+
### Test Data
|
|
155
|
+
- Test fixtures in `tests/data/` (images, audio, video, PDFs)
|
|
156
|
+
- Template examples in `tests/templates/`
|
|
157
|
+
|
|
158
|
+
### Running Specific Tests
|
|
159
|
+
```bash
|
|
160
|
+
hatch run test tests/test_image.py # Single file
|
|
161
|
+
hatch run test tests/test_image.py::test_name # Single test
|
|
162
|
+
hatch run test -k "image" # Tests matching pattern
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Code Style
|
|
166
|
+
|
|
167
|
+
### Formatting
|
|
168
|
+
- Line length: 120 characters
|
|
169
|
+
- Use ruff for formatting and linting
|
|
170
|
+
- Imports sorted with `banks` as first-party
|
|
171
|
+
|
|
172
|
+
### Type Hints
|
|
173
|
+
- All public functions should have type annotations
|
|
174
|
+
- Use `from __future__ import annotations` for forward references
|
|
175
|
+
- MyPy strict mode enforced
|
|
176
|
+
|
|
177
|
+
### Conventions
|
|
178
|
+
- SPDX license headers in all source files
|
|
179
|
+
- Docstrings for public APIs
|
|
180
|
+
- Relative imports banned (use absolute `from banks.x import y`)
|
|
181
|
+
|
|
182
|
+
## Public API
|
|
115
183
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
184
|
+
The main exports from `banks` package:
|
|
185
|
+
```python
|
|
186
|
+
from banks import Prompt, AsyncPrompt, ChatMessage, config, env
|
|
187
|
+
```
|
|
120
188
|
|
|
121
|
-
##
|
|
189
|
+
## Dependencies
|
|
122
190
|
|
|
191
|
+
**Core (required)**:
|
|
123
192
|
- `jinja2`: Core templating engine
|
|
124
193
|
- `pydantic`: Type validation and serialization
|
|
125
|
-
- `litellm`: Multi-provider LLM integration (optional)
|
|
126
|
-
- `redis`: Redis registry backend (optional)
|
|
127
194
|
- `griffe`: Code introspection utilities
|
|
128
|
-
- `platformdirs`: Cross-platform data directory handling
|
|
195
|
+
- `platformdirs`: Cross-platform data directory handling
|
|
196
|
+
- `filetype`: File type detection for multimodal content
|
|
197
|
+
- `deprecated`: Deprecation decorators
|
|
198
|
+
|
|
199
|
+
**Optional**:
|
|
200
|
+
- `litellm`: Multi-provider LLM integration (`banks[all]`)
|
|
201
|
+
- `redis`: Redis registry backend (`banks[all]`)
|
|
202
|
+
- `simplemma`: Lemmatization filter (dev dependency)
|
|
203
|
+
|
|
204
|
+
## CI/CD
|
|
205
|
+
|
|
206
|
+
- **test.yml**: Runs tests on Python 3.10-3.14
|
|
207
|
+
- **docs.yml**: Builds and deploys documentation
|
|
208
|
+
- **release.yml**: Handles package releases
|
|
209
|
+
|
|
210
|
+
## PR Guidelines
|
|
211
|
+
|
|
212
|
+
Follow conventional commit prefixes for PR titles:
|
|
213
|
+
- `fix:` - Bug fixes
|
|
214
|
+
- `feat:` - New features
|
|
215
|
+
- `chore:` - Maintenance
|
|
216
|
+
- `docs:` - Documentation
|
|
217
|
+
- `refactor:` - Code refactoring
|
|
218
|
+
- `test:` - Test additions/changes
|
banks-2.4.0/CLAUDE.md
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: banks
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: A prompt programming language
|
|
5
5
|
Project-URL: Documentation, https://github.com/masci/banks#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/masci/banks/issues
|
|
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
|
21
21
|
Requires-Python: >=3.9
|
|
22
22
|
Requires-Dist: deprecated
|
|
23
23
|
Requires-Dist: eval-type-backport; python_version < '3.10'
|
|
24
|
+
Requires-Dist: filetype>=1.2.0
|
|
24
25
|
Requires-Dist: griffe
|
|
25
26
|
Requires-Dist: jinja2
|
|
26
27
|
Requires-Dist: platformdirs
|
|
@@ -30,6 +30,7 @@ dependencies = [
|
|
|
30
30
|
"deprecated",
|
|
31
31
|
"eval-type-backport;python_version<'3.10'",
|
|
32
32
|
"platformdirs",
|
|
33
|
+
"filetype>=1.2.0",
|
|
33
34
|
]
|
|
34
35
|
|
|
35
36
|
[project.optional-dependencies]
|
|
@@ -78,6 +79,7 @@ lint = "pylint {args:src/banks}"
|
|
|
78
79
|
typing = "mypy --install-types --non-interactive {args:src/banks}"
|
|
79
80
|
all = ["check", "typing", "lint"]
|
|
80
81
|
fmt = "ruff format {args}"
|
|
82
|
+
fix = "ruff check --fix {args}"
|
|
81
83
|
|
|
82
84
|
[tool.hatch.build.targets.wheel]
|
|
83
85
|
only-include = ["src/banks", "src/templates"]
|
|
@@ -183,6 +185,7 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
|
|
|
183
185
|
module = ["litellm.*", "simplemma.*", "deprecated.*"]
|
|
184
186
|
ignore_missing_imports = true
|
|
185
187
|
|
|
188
|
+
|
|
186
189
|
[tool.pylint]
|
|
187
190
|
disable = [
|
|
188
191
|
"line-too-long",
|
|
@@ -6,7 +6,9 @@ from pathlib import Path
|
|
|
6
6
|
from typing import cast
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import filetype # type: ignore[import-untyped]
|
|
10
|
+
|
|
11
|
+
from banks.types import AudioFormat, ContentBlock, InputAudio, resolve_binary
|
|
10
12
|
|
|
11
13
|
BASE64_AUDIO_REGEX = re.compile(r"audio\/.*;base64,.*")
|
|
12
14
|
|
|
@@ -38,7 +40,18 @@ def _get_audio_format_from_url(url: str) -> AudioFormat:
|
|
|
38
40
|
return "mp3"
|
|
39
41
|
|
|
40
42
|
|
|
41
|
-
def
|
|
43
|
+
def _get_audio_format_from_bytes(data: bytes) -> AudioFormat:
|
|
44
|
+
"""Extract audio format from bytes data using filetype library."""
|
|
45
|
+
kind = filetype.guess(data)
|
|
46
|
+
if kind is not None:
|
|
47
|
+
fmt = kind.extension
|
|
48
|
+
if fmt in ("mp3", "wav", "m4a", "webm", "ogg", "flac"):
|
|
49
|
+
return cast(AudioFormat, fmt)
|
|
50
|
+
# Default to mp3 if format cannot be determined
|
|
51
|
+
return "mp3"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def audio(value: str | bytes) -> str:
|
|
42
55
|
"""Wrap the filtered value into a ContentBlock of type audio.
|
|
43
56
|
|
|
44
57
|
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
@@ -51,7 +64,10 @@ def audio(value: str) -> str:
|
|
|
51
64
|
{{ "https://example.com/audio.mp3" | audio }}
|
|
52
65
|
```
|
|
53
66
|
"""
|
|
54
|
-
if
|
|
67
|
+
if isinstance(value, bytes):
|
|
68
|
+
audio_format = _get_audio_format_from_bytes(resolve_binary(value, as_base64=False))
|
|
69
|
+
input_audio = InputAudio.from_bytes(value, audio_format=audio_format)
|
|
70
|
+
elif _is_url(value):
|
|
55
71
|
audio_format = _get_audio_format_from_url(value)
|
|
56
72
|
input_audio = InputAudio.from_url(value, audio_format)
|
|
57
73
|
else:
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present Massimiliano Pippi <mpippi@gmail.com>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
|
+
import mimetypes
|
|
4
5
|
import re
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from typing import cast
|
|
7
8
|
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
|
-
|
|
10
|
+
import filetype # type: ignore[import-untyped]
|
|
11
|
+
|
|
12
|
+
from banks.types import ContentBlock, DocumentFormat, InputDocument, resolve_binary
|
|
10
13
|
|
|
11
14
|
BASE64_DOCUMENT_REGEX = re.compile(r"(text|application)\/.*;base64,.*")
|
|
12
15
|
|
|
@@ -36,7 +39,7 @@ def _get_document_format_from_url(url: str) -> DocumentFormat:
|
|
|
36
39
|
# text/css
|
|
37
40
|
# text/plain
|
|
38
41
|
# text/xml
|
|
39
|
-
# text/
|
|
42
|
+
# text/csv
|
|
40
43
|
# text/rtf
|
|
41
44
|
# text/javascript
|
|
42
45
|
# application/json
|
|
@@ -68,13 +71,46 @@ def _get_document_format_from_url(url: str) -> DocumentFormat:
|
|
|
68
71
|
"javascript",
|
|
69
72
|
"json",
|
|
70
73
|
):
|
|
74
|
+
# Because Claude only supports pdf and text, and Gemini only supports a small subset of text formats,
|
|
75
|
+
# we can default to 'txt' for any text-based format that is not pdf. This allows the data to be sent to the llm
|
|
76
|
+
# in an acceptable format, but the LLM should still be able to understand the content: e.g., html, markdown,
|
|
77
|
+
# xml, etc.
|
|
71
78
|
if path.endswith(f".{fmt}"):
|
|
79
|
+
if fmt == "pdf":
|
|
80
|
+
return cast(DocumentFormat, "pdf")
|
|
81
|
+
return "txt"
|
|
82
|
+
mime = mimetypes.guess_type(path)[0]
|
|
83
|
+
if mime is not None and mime.startswith("text/"):
|
|
84
|
+
return "txt"
|
|
85
|
+
# With urls, the likelihood seems sufficiently high that it's probably a pdf if not otherwise indicated
|
|
86
|
+
if mime is None:
|
|
87
|
+
return "pdf"
|
|
88
|
+
# Document type indicated to be other than pdf or text type
|
|
89
|
+
raise ValueError("Unsupported document format: " + path)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _get_document_format_from_bytes(data: bytes) -> DocumentFormat:
|
|
93
|
+
"""Extract document format from bytes data using filetype library."""
|
|
94
|
+
# First check for pdf (only non text based format) and RTF formats (can be detected by file header)
|
|
95
|
+
kind = filetype.guess(data)
|
|
96
|
+
if kind is not None:
|
|
97
|
+
fmt = kind.extension
|
|
98
|
+
if fmt == "pdf":
|
|
72
99
|
return cast(DocumentFormat, fmt)
|
|
73
|
-
|
|
74
|
-
|
|
100
|
+
|
|
101
|
+
# filetype is good at detecting binary formats, but not text-based ones.
|
|
102
|
+
# So, this is a good indicator that it's text-based.
|
|
103
|
+
# Because Claude only supports pdf and text, and Gemini only supports a small subset of text formats,
|
|
104
|
+
# we can default to 'txt' for any text-based format that is not pdf. This allows the data to be sent to the llm in
|
|
105
|
+
# an acceptable format, but the LLM should still be able to understand the content: e.g., html, markdown, xml, etc.
|
|
106
|
+
# If detecting text types should become desirable, I recommend using something like Google magicka
|
|
107
|
+
if kind is None or kind.extension == "rtf":
|
|
108
|
+
return "txt"
|
|
109
|
+
# There are many common document types (like word, excel, powerpoint, etc.) that are not supported.
|
|
110
|
+
raise ValueError("Unsupported document format: " + kind.extension)
|
|
75
111
|
|
|
76
112
|
|
|
77
|
-
def document(value: str) -> str:
|
|
113
|
+
def document(value: str | bytes) -> str:
|
|
78
114
|
"""Wrap the filtered value into a ContentBlock of type document.
|
|
79
115
|
|
|
80
116
|
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
@@ -87,7 +123,10 @@ def document(value: str) -> str:
|
|
|
87
123
|
{{ "https://example.com/document.pdf" | document }}
|
|
88
124
|
```
|
|
89
125
|
"""
|
|
90
|
-
if
|
|
126
|
+
if isinstance(value, bytes):
|
|
127
|
+
document_format = _get_document_format_from_bytes(resolve_binary(value, as_base64=False))
|
|
128
|
+
input_document = InputDocument.from_bytes(value, document_format=document_format)
|
|
129
|
+
elif _is_url(value):
|
|
91
130
|
document_format = _get_document_format_from_url(value)
|
|
92
131
|
input_document = InputDocument.from_url(value, document_format)
|
|
93
132
|
else:
|
|
@@ -22,7 +22,7 @@ def _is_url(string: str) -> bool:
|
|
|
22
22
|
return True
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def image(value: str) -> str:
|
|
25
|
+
def image(value: str | bytes) -> str:
|
|
26
26
|
"""Wrap the filtered value into a ContentBlock of type image.
|
|
27
27
|
|
|
28
28
|
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
@@ -38,7 +38,9 @@ def image(value: str) -> str:
|
|
|
38
38
|
this filter marks the content to cache by surrounding it with `<content_block>` and
|
|
39
39
|
`</content_block>`, so it's only useful when used within a `{% chat %}` block.
|
|
40
40
|
"""
|
|
41
|
-
if
|
|
41
|
+
if isinstance(value, bytes):
|
|
42
|
+
image_url = ImageUrl.from_bytes(bytes_str=value)
|
|
43
|
+
elif _is_url(value):
|
|
42
44
|
image_url = ImageUrl(url=value)
|
|
43
45
|
else:
|
|
44
46
|
image_url = ImageUrl.from_path(Path(value))
|
|
@@ -6,11 +6,39 @@ from pathlib import Path
|
|
|
6
6
|
from typing import cast
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import filetype # type: ignore[import-untyped]
|
|
10
|
+
from filetype.types.video import IsoBmff # type: ignore[import-untyped]
|
|
11
|
+
|
|
12
|
+
from banks.types import ContentBlock, InputVideo, VideoFormat, resolve_binary
|
|
10
13
|
|
|
11
14
|
BASE64_VIDEO_REGEX = re.compile(r"video\/.*;base64,.*")
|
|
12
15
|
|
|
13
16
|
|
|
17
|
+
class M3gp(IsoBmff):
|
|
18
|
+
"""
|
|
19
|
+
Implements the 3gp video type matcher.
|
|
20
|
+
|
|
21
|
+
The type matcher in the filetype lib does not work correctly for 3gp files,
|
|
22
|
+
so implement our own here.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
MIME = "video/3gpp"
|
|
26
|
+
EXTENSION = "3gp"
|
|
27
|
+
|
|
28
|
+
def __init__(self):
|
|
29
|
+
super().__init__(mime=M3gp.MIME, extension=M3gp.EXTENSION)
|
|
30
|
+
|
|
31
|
+
def match(self, buf):
|
|
32
|
+
if not self._is_isobmff(buf):
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
major_brand, _, compatible_brands = self._get_ftyp(buf)
|
|
36
|
+
for brand in compatible_brands:
|
|
37
|
+
if brand in ["3gp4", "3gp5", "3gpp"]:
|
|
38
|
+
return True
|
|
39
|
+
return major_brand in ["3gp4", "3gp5", "3gpp"]
|
|
40
|
+
|
|
41
|
+
|
|
14
42
|
def _is_url(string: str) -> bool:
|
|
15
43
|
"""Check if a string is a URL."""
|
|
16
44
|
result = urlparse(string)
|
|
@@ -40,7 +68,22 @@ def _get_video_format_from_url(url: str) -> VideoFormat:
|
|
|
40
68
|
return "mp4"
|
|
41
69
|
|
|
42
70
|
|
|
43
|
-
def
|
|
71
|
+
def _get_video_format_from_bytes(data: bytes) -> VideoFormat:
|
|
72
|
+
"""Extract video format from bytes data using filetype library."""
|
|
73
|
+
m3gp = M3gp()
|
|
74
|
+
if m3gp not in filetype.types:
|
|
75
|
+
filetype.add_type(m3gp)
|
|
76
|
+
|
|
77
|
+
kind = filetype.guess(data)
|
|
78
|
+
if kind is not None:
|
|
79
|
+
fmt = kind.extension
|
|
80
|
+
if fmt in ("mp4", "mpg", "mov", "avi", "flv", "webm", "wmv", "3gp"):
|
|
81
|
+
return cast(VideoFormat, fmt)
|
|
82
|
+
# Default to mp4 if format cannot be determined
|
|
83
|
+
return "mp4"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def video(value: str | bytes) -> str:
|
|
44
87
|
"""Wrap the filtered value into a ContentBlock of type video.
|
|
45
88
|
|
|
46
89
|
The resulting ChatMessage will have the field `content` populated with a list of ContentBlock objects.
|
|
@@ -53,7 +96,10 @@ def video(value: str) -> str:
|
|
|
53
96
|
{{ "https://example.com/video.mp4" | video }}
|
|
54
97
|
```
|
|
55
98
|
"""
|
|
56
|
-
if
|
|
99
|
+
if isinstance(value, bytes):
|
|
100
|
+
video_format = _get_video_format_from_bytes(resolve_binary(value, as_base64=False))
|
|
101
|
+
input_video = InputVideo.from_bytes(value, video_format=video_format)
|
|
102
|
+
elif _is_url(value):
|
|
57
103
|
video_format = _get_video_format_from_url(value)
|
|
58
104
|
input_video = InputVideo.from_url(value, video_format)
|
|
59
105
|
else:
|