recallforge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recallforge-0.1.0/LICENSE +21 -0
- recallforge-0.1.0/PKG-INFO +305 -0
- recallforge-0.1.0/README.md +256 -0
- recallforge-0.1.0/pyproject.toml +79 -0
- recallforge-0.1.0/setup.cfg +4 -0
- recallforge-0.1.0/src/recallforge/__init__.py +154 -0
- recallforge-0.1.0/src/recallforge/__main__.py +13 -0
- recallforge-0.1.0/src/recallforge/backends/__init__.py +110 -0
- recallforge-0.1.0/src/recallforge/backends/base.py +193 -0
- recallforge-0.1.0/src/recallforge/backends/mlx_backend.py +1307 -0
- recallforge-0.1.0/src/recallforge/backends/torch_backend.py +610 -0
- recallforge-0.1.0/src/recallforge/cache.py +37 -0
- recallforge-0.1.0/src/recallforge/cli.py +534 -0
- recallforge-0.1.0/src/recallforge/documents.py +317 -0
- recallforge-0.1.0/src/recallforge/py.typed +0 -0
- recallforge-0.1.0/src/recallforge/search.py +667 -0
- recallforge-0.1.0/src/recallforge/server.py +1270 -0
- recallforge-0.1.0/src/recallforge/storage/__init__.py +15 -0
- recallforge-0.1.0/src/recallforge/storage/base.py +339 -0
- recallforge-0.1.0/src/recallforge/storage/lancedb_backend.py +2595 -0
- recallforge-0.1.0/src/recallforge/video.py +294 -0
- recallforge-0.1.0/src/recallforge/watch_folder.py +422 -0
- recallforge-0.1.0/src/recallforge.egg-info/PKG-INFO +305 -0
- recallforge-0.1.0/src/recallforge.egg-info/SOURCES.txt +40 -0
- recallforge-0.1.0/src/recallforge.egg-info/dependency_links.txt +1 -0
- recallforge-0.1.0/src/recallforge.egg-info/entry_points.txt +2 -0
- recallforge-0.1.0/src/recallforge.egg-info/requires.txt +35 -0
- recallforge-0.1.0/src/recallforge.egg-info/top_level.txt +1 -0
- recallforge-0.1.0/tests/test_backend_selection.py +52 -0
- recallforge-0.1.0/tests/test_backends.py +226 -0
- recallforge-0.1.0/tests/test_batch_tool.py +292 -0
- recallforge-0.1.0/tests/test_config_tools.py +432 -0
- recallforge-0.1.0/tests/test_documents.py +134 -0
- recallforge-0.1.0/tests/test_embedding_cache.py +183 -0
- recallforge-0.1.0/tests/test_error_responses.py +312 -0
- recallforge-0.1.0/tests/test_json_compliance.py +287 -0
- recallforge-0.1.0/tests/test_live.py +367 -0
- recallforge-0.1.0/tests/test_schema_migration.py +306 -0
- recallforge-0.1.0/tests/test_search_pipeline.py +462 -0
- recallforge-0.1.0/tests/test_sql_validation.py +91 -0
- recallforge-0.1.0/tests/test_storage.py +929 -0
- recallforge-0.1.0/tests/test_watch_folder.py +186 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright 2025-2026 Brian Meyer
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: recallforge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: RecallForge - Cross-Modal Vision-Language Search Engine
|
|
5
|
+
Author: Brian Meyer
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/brianmeyer/recallforge
|
|
8
|
+
Project-URL: Repository, https://github.com/brianmeyer/recallforge
|
|
9
|
+
Project-URL: Issues, https://github.com/brianmeyer/recallforge/issues
|
|
10
|
+
Keywords: search,semantic,embedding,vision-language,cross-modal
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Python: <3.14,>=3.12
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: lancedb<1.0,>=0.20
|
|
20
|
+
Requires-Dist: pyarrow<20.0,>=18.0
|
|
21
|
+
Requires-Dist: pillow<12.0,>=10.0
|
|
22
|
+
Requires-Dist: numpy<3.0,>=2.0
|
|
23
|
+
Requires-Dist: mcp<2.0,>=1.0
|
|
24
|
+
Provides-Extra: torch
|
|
25
|
+
Requires-Dist: torch<3.0,>=2.0; extra == "torch"
|
|
26
|
+
Requires-Dist: torchvision<1.0,>=0.15; extra == "torch"
|
|
27
|
+
Requires-Dist: transformers<5.0,>=4.40; extra == "torch"
|
|
28
|
+
Requires-Dist: scipy<2.0,>=1.10; extra == "torch"
|
|
29
|
+
Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "torch"
|
|
30
|
+
Provides-Extra: mlx
|
|
31
|
+
Requires-Dist: mlx<1.0,>=0.20; extra == "mlx"
|
|
32
|
+
Requires-Dist: mlx-vlm<1.0,>=0.1; extra == "mlx"
|
|
33
|
+
Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "mlx"
|
|
34
|
+
Requires-Dist: torchvision<1.0,>=0.15; extra == "mlx"
|
|
35
|
+
Provides-Extra: docs
|
|
36
|
+
Requires-Dist: pypdf<6.0,>=5.0; extra == "docs"
|
|
37
|
+
Provides-Extra: cuda
|
|
38
|
+
Requires-Dist: torch<3.0,>=2.0; extra == "cuda"
|
|
39
|
+
Requires-Dist: torchvision<1.0,>=0.15; extra == "cuda"
|
|
40
|
+
Requires-Dist: transformers<5.0,>=4.40; extra == "cuda"
|
|
41
|
+
Requires-Dist: scipy<2.0,>=1.10; extra == "cuda"
|
|
42
|
+
Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "cuda"
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest<9.0,>=8.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-asyncio<1.0,>=0.24; extra == "dev"
|
|
46
|
+
Provides-Extra: all
|
|
47
|
+
Requires-Dist: recallforge[docs,mlx,torch]; extra == "all"
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# RecallForge
|
|
51
|
+
|
|
52
|
+
   
|
|
53
|
+
|
|
54
|
+
**Every modality, one search. Local first.**
|
|
55
|
+
|
|
56
|
+

|
|
57
|
+
|
|
58
|
+
Standard RAG only works on text. Drop a PDF with charts, a photo of a whiteboard, or a video recording — and your AI agent goes blind. RecallForge gives agents **eyes and ears over your local filesystem**. Text, images, documents, and video all live in one unified search space, and nothing ever leaves your machine.
|
|
59
|
+
|
|
60
|
+
## What this enables
|
|
61
|
+
|
|
62
|
+
> **You:** "What did the whiteboard look like in our last meeting?"
|
|
63
|
+
>
|
|
64
|
+
> **Claude:** *(Searches your local `~/Documents`, finds a photo of a whiteboard from an iPhone, reads the handwriting via Qwen3-VL, and surfaces the image with context.)*
|
|
65
|
+
|
|
66
|
+
> **You:** "Find the architecture diagram from that PDF I downloaded last week."
|
|
67
|
+
>
|
|
68
|
+
> **Claude:** *(Indexes the PDF, matches your query against extracted text and embedded figures, returns the relevant page.)*
|
|
69
|
+
|
|
70
|
+
> **You:** *(Drops an image of a circuit board)* "Find my notes related to this."
|
|
71
|
+
>
|
|
72
|
+
> **Claude:** *(Reverse image-to-text search across your indexed notes. Returns matching documents.)*
|
|
73
|
+
|
|
74
|
+
One query. Any modality. All local.
|
|
75
|
+
|
|
76
|
+
## What makes RecallForge different
|
|
77
|
+
|
|
78
|
+
| Capability | RecallForge | Chroma | Mem0 | Qdrant | Weaviate |
|
|
79
|
+
|------------|-------------|--------|------|--------|----------|
|
|
80
|
+
| Cross-modal search | ✅ Native | ✅ OpenCLIP | ❌ Text only | ❌ | ✅ CLIP modules |
|
|
81
|
+
| Video support [Beta] | ✅ | ❌ | ❌ | ❌ | ❌ |
|
|
82
|
+
| Document ingest (PDF/DOCX/PPTX) | ✅ | ❌ | ❌ | ❌ | ❌ |
|
|
83
|
+
| Built-in reranking | ✅ Multimodal | ❌ | ❌ | ✅ ColBERT | ✅ Modules |
|
|
84
|
+
| Query expansion | ✅ Multimodal | ❌ | ❌ | ❌ | ✅ Generative |
|
|
85
|
+
| MCP-native | ✅ 17 tools | ❌ | ❌ | ❌ | ❌ |
|
|
86
|
+
| 100% local | ✅ | ✅ | ⚠️ Cloud default | ✅ | ✅ Docker |
|
|
87
|
+
| Apple Silicon optimized | ✅ MLX 4-bit | ❌ | ❌ | ❌ | ❌ |
|
|
88
|
+
| Cloud option | ❌ | ✅ | ✅ | ✅ | ✅ |
|
|
89
|
+
| JS/TS SDK | ❌ | ✅ | ✅ | ✅ | ✅ |
|
|
90
|
+
|
|
91
|
+
**Use RecallForge when:** You need multimodal memory for AI agents that runs entirely on your machine, especially on Apple Silicon. One search across text, images, documents, and video.
|
|
92
|
+
|
|
93
|
+
**Use something else when:** You need cloud hosting, massive scale (millions+ vectors), or a JS/TS-first ecosystem.
|
|
94
|
+
|
|
95
|
+
## Performance
|
|
96
|
+
|
|
97
|
+
4 modalities (text, images, documents, video) unified in a single MLX-optimized local vector space. Sub-60ms search latency. Under 400MB resident memory.
|
|
98
|
+
|
|
99
|
+
Measured on Mac mini M4 16GB, MLX 4-bit, embed mode:
|
|
100
|
+
|
|
101
|
+
| Metric | MLX 4-bit | PyTorch fp16 |
|
|
102
|
+
|--------|-----------|--------------|
|
|
103
|
+
| Warm search p50 | 53ms | 599ms |
|
|
104
|
+
| Warm search p95 | 55ms | — |
|
|
105
|
+
| Cold start | 7.6s | ~20s |
|
|
106
|
+
| Peak RSS (embed) | 329MB* | ~4GB |
|
|
107
|
+
| Text indexing | 5.0 docs/sec | — |
|
|
108
|
+
|
|
109
|
+
*\*MLX maps model weights lazily via memory-mapped files. RSS reflects resident pages, not full model size (~1.7GB on disk for embed mode). Actual memory pressure is low.*
|
|
110
|
+
|
|
111
|
+
Search quality comes from the multi-stage pipeline (BM25 + vector + RRF fusion + cross-encoder reranking), not raw embedding accuracy alone.
|
|
112
|
+
|
|
113
|
+
## Installation
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pip install recallforge[mlx] # Apple Silicon (recommended, 4-bit quantization)
|
|
117
|
+
pip install recallforge[cuda] # NVIDIA GPU
|
|
118
|
+
pip install recallforge[torch] # CPU / other PyTorch targets
|
|
119
|
+
pip install recallforge[docs] # add richer PDF extraction (optional)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
> **Note:** `pip install recallforge` installs the core without a backend.
|
|
123
|
+
> You need at least one of `[mlx]`, `[cuda]`, or `[torch]` to run inference.
|
|
124
|
+
|
|
125
|
+
From source:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
git clone https://github.com/brianmeyer/recallforge.git
|
|
129
|
+
cd recallforge
|
|
130
|
+
pip install -e ".[mlx]"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Requirements
|
|
134
|
+
|
|
135
|
+
- Python 3.12 or 3.13 required (3.14 not yet supported, pending pyarrow wheel)
|
|
136
|
+
- Disk: ~2-5GB free for model downloads on first run
|
|
137
|
+
- RAM (MLX 4-bit): ~1.7GB (`embed`) to ~4.4GB (`full`)
|
|
138
|
+
- `ffmpeg` recommended for video indexing/search
|
|
139
|
+
- First run downloads models automatically and may take a few minutes
|
|
140
|
+
|
|
141
|
+
## MCP Server (primary use)
|
|
142
|
+
|
|
143
|
+
RecallForge is designed as a **Model Context Protocol server for AI agents**. Configure in Claude Desktop (or any MCP-compatible agent host):
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"mcpServers": {
|
|
148
|
+
"recallforge": {
|
|
149
|
+
"command": "recallforge",
|
|
150
|
+
"args": ["serve", "--mode", "full"]
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Run manually:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
recallforge serve --mode embed --backend mlx --quantize 4bit
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Exposes **17 tools** for agents: `ingest`, `search`, `search_fts`, `search_vec`, `index_document`, `index_image`, `memory_add`, `memory_update`, `memory_delete`, `index_folder`, `status`, `rebuild_fts`, `list_collections`, `list_namespaces`, `batch`, `get_config`, `set_config`.
|
|
163
|
+
|
|
164
|
+
See [docs/mcp-tools.md](docs/mcp-tools.md) for the full tool reference.
|
|
165
|
+
|
|
166
|
+
## Search modes
|
|
167
|
+
|
|
168
|
+
| Mode | Models loaded | Memory (MLX 4-bit) | Quality | Best for |
|
|
169
|
+
|------|--------------|-------------------|---------|----------|
|
|
170
|
+
| `embed` | Embedder | ~1.7GB | Good | Memory-constrained, fast searches |
|
|
171
|
+
| `hybrid` | + Reranker | ~3.4GB | Better | Balanced quality and memory |
|
|
172
|
+
| `full` | + Query Expander | ~4.4GB | Best | Maximum retrieval quality |
|
|
173
|
+
|
|
174
|
+
> **Video [Beta] note:** Video support requires `ffmpeg`. The torch backend video path has a known upstream issue (see [QwenLM/Qwen3.5#58](https://github.com/QwenLM/Qwen3.5/issues/58)).
|
|
175
|
+
|
|
176
|
+
## How it works
|
|
177
|
+
|
|
178
|
+
RecallForge encodes text, images, and video frames into the same 2048-dimensional vector space using Qwen3-VL. This means "find notes about this diagram" works whether the diagram is text, an image, or a frame from a video. A 3-stage pipeline handles the rest:
|
|
179
|
+
|
|
180
|
+
```mermaid
|
|
181
|
+
graph TD
|
|
182
|
+
subgraph Local Filesystem
|
|
183
|
+
Docs[📄 Documents]
|
|
184
|
+
Imgs[🖼️ Images]
|
|
185
|
+
Vids[🎬 Video]
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
subgraph RecallForge Ingest
|
|
189
|
+
Docs --> TxtExt[Text Extractor]
|
|
190
|
+
Imgs --> VLM[Qwen3-VL Encoder]
|
|
191
|
+
Vids --> Frame[Frame & Audio Extractor]
|
|
192
|
+
Frame --> VLM
|
|
193
|
+
TxtExt --> VLM
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
subgraph LanceDB Storage
|
|
197
|
+
VLM -->|2048-dim Vectors| VecDB[(Vector Space)]
|
|
198
|
+
TxtExt -->|Text/Transcripts| FTS[(Tantivy FTS)]
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
subgraph MCP Search Pipeline
|
|
202
|
+
Query[Agent Query] --> BM25[BM25 Text Search]
|
|
203
|
+
Query --> Dense[Vector Similarity Search]
|
|
204
|
+
BM25 --> RRF[RRF Fusion]
|
|
205
|
+
Dense --> RRF
|
|
206
|
+
RRF --> Rerank[Cross-Encoder Reranker]
|
|
207
|
+
Rerank --> Output[Final Context to Agent]
|
|
208
|
+
end
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Pipeline:** BM25 probe → Query expansion (full mode) → Parallel BM25 + Vector → RRF fusion → Reranking (hybrid/full) → Score blending
|
|
212
|
+
|
|
213
|
+
## CLI (development & debugging)
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Index anything
|
|
217
|
+
recallforge index ./photos ./docs
|
|
218
|
+
recallforge index ~/Movies/demo.mp4
|
|
219
|
+
recallforge index ~/Documents/roadmap.pptx
|
|
220
|
+
|
|
221
|
+
# Search any modality
|
|
222
|
+
recallforge search "whiteboard diagram from last meeting"
|
|
223
|
+
recallforge search --image ./photos/whiteboard.png
|
|
224
|
+
recallforge search --video ~/Movies/demo.mp4
|
|
225
|
+
|
|
226
|
+
# Watch a folder for changes (auto-index)
|
|
227
|
+
recallforge watch start ~/Documents --collection docs
|
|
228
|
+
recallforge watch list
|
|
229
|
+
recallforge watch stop ~/Documents
|
|
230
|
+
|
|
231
|
+
# Status
|
|
232
|
+
recallforge status
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
RecallForge auto-detects MLX on Apple Silicon, PyTorch elsewhere.
|
|
236
|
+
|
|
237
|
+
## Python API
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from recallforge import get_backend, get_storage
|
|
241
|
+
from recallforge.search import HybridSearcher
|
|
242
|
+
|
|
243
|
+
backend = get_backend()
|
|
244
|
+
storage = get_storage()
|
|
245
|
+
backend.warm_up()
|
|
246
|
+
|
|
247
|
+
# Index
|
|
248
|
+
storage.index_document(
|
|
249
|
+
path="notes.md",
|
|
250
|
+
text="My notes about AI...",
|
|
251
|
+
collection="my_docs",
|
|
252
|
+
model="Qwen3-VL-Embedding-2B",
|
|
253
|
+
embed_func=backend.embed_text,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Search
|
|
257
|
+
searcher = HybridSearcher(backend=backend, storage=storage, limit=10)
|
|
258
|
+
results = searcher.search("artificial intelligence")
|
|
259
|
+
for r in results:
|
|
260
|
+
print(f"[{r.score:.3f}] {r.title}")
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Configuration
|
|
264
|
+
|
|
265
|
+
| Variable | Default | Description |
|
|
266
|
+
|----------|---------|-------------|
|
|
267
|
+
| `RECALLFORGE_BACKEND` | `auto` | `auto`, `mlx`, `torch` |
|
|
268
|
+
| `RECALLFORGE_MODE` | `full` | `embed`, `hybrid`, `full` |
|
|
269
|
+
| `RECALLFORGE_MLX_QUANTIZE` | `4bit` | `4bit`, `bf16` |
|
|
270
|
+
| `RECALLFORGE_STORE_PATH` | `~/.recallforge` | Storage directory |
|
|
271
|
+
|
|
272
|
+
## Project structure
|
|
273
|
+
|
|
274
|
+
```
|
|
275
|
+
src/recallforge/
|
|
276
|
+
├── backends/
|
|
277
|
+
│ ├── mlx_backend.py # MLX 4-bit/bf16 (Apple Silicon)
|
|
278
|
+
│ └── torch_backend.py # PyTorch (CUDA/MPS/CPU)
|
|
279
|
+
├── storage/
|
|
280
|
+
│ └── lancedb_backend.py # LanceDB + Tantivy FTS
|
|
281
|
+
├── cache.py # LRU embedding cache
|
|
282
|
+
├── search.py # Hybrid search pipeline (BM25 + vector + RRF)
|
|
283
|
+
├── server.py # MCP server (17 tools)
|
|
284
|
+
├── documents.py # PDF/DOCX/PPTX extraction
|
|
285
|
+
├── video.py # Frame/transcript extraction
|
|
286
|
+
├── watch_folder.py # Folder monitoring with dedup
|
|
287
|
+
└── cli.py # CLI interface
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Development
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
pytest tests/ -m "not live" # Unit tests (no model download needed)
|
|
294
|
+
pytest tests/ -m live -v # Integration tests (requires models)
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.
|
|
298
|
+
|
|
299
|
+
## Attribution
|
|
300
|
+
|
|
301
|
+
RecallForge is inspired by [QMD](https://github.com/tobil/qmd) by Tobi. QMD pioneered the multi-stage retrieval pipeline (embedding, reranking, query expansion). RecallForge extends this pattern to vision-language with cross-modal retrieval and multi-backend support.
|
|
302
|
+
|
|
303
|
+
## License
|
|
304
|
+
|
|
305
|
+
MIT License
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# RecallForge
|
|
2
|
+
|
|
3
|
+
   
|
|
4
|
+
|
|
5
|
+
**Every modality, one search. Local first.**
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
Standard RAG only works on text. Drop a PDF with charts, a photo of a whiteboard, or a video recording — and your AI agent goes blind. RecallForge gives agents **eyes and ears over your local filesystem**. Text, images, documents, and video all live in one unified search space, and nothing ever leaves your machine.
|
|
10
|
+
|
|
11
|
+
## What this enables
|
|
12
|
+
|
|
13
|
+
> **You:** "What did the whiteboard look like in our last meeting?"
|
|
14
|
+
>
|
|
15
|
+
> **Claude:** *(Searches your local `~/Documents`, finds a photo of a whiteboard from an iPhone, reads the handwriting via Qwen3-VL, and surfaces the image with context.)*
|
|
16
|
+
|
|
17
|
+
> **You:** "Find the architecture diagram from that PDF I downloaded last week."
|
|
18
|
+
>
|
|
19
|
+
> **Claude:** *(Indexes the PDF, matches your query against extracted text and embedded figures, returns the relevant page.)*
|
|
20
|
+
|
|
21
|
+
> **You:** *(Drops an image of a circuit board)* "Find my notes related to this."
|
|
22
|
+
>
|
|
23
|
+
> **Claude:** *(Reverse image-to-text search across your indexed notes. Returns matching documents.)*
|
|
24
|
+
|
|
25
|
+
One query. Any modality. All local.
|
|
26
|
+
|
|
27
|
+
## What makes RecallForge different
|
|
28
|
+
|
|
29
|
+
| Capability | RecallForge | Chroma | Mem0 | Qdrant | Weaviate |
|
|
30
|
+
|------------|-------------|--------|------|--------|----------|
|
|
31
|
+
| Cross-modal search | ✅ Native | ✅ OpenCLIP | ❌ Text only | ❌ | ✅ CLIP modules |
|
|
32
|
+
| Video support [Beta] | ✅ | ❌ | ❌ | ❌ | ❌ |
|
|
33
|
+
| Document ingest (PDF/DOCX/PPTX) | ✅ | ❌ | ❌ | ❌ | ❌ |
|
|
34
|
+
| Built-in reranking | ✅ Multimodal | ❌ | ❌ | ✅ ColBERT | ✅ Modules |
|
|
35
|
+
| Query expansion | ✅ Multimodal | ❌ | ❌ | ❌ | ✅ Generative |
|
|
36
|
+
| MCP-native | ✅ 17 tools | ❌ | ❌ | ❌ | ❌ |
|
|
37
|
+
| 100% local | ✅ | ✅ | ⚠️ Cloud default | ✅ | ✅ Docker |
|
|
38
|
+
| Apple Silicon optimized | ✅ MLX 4-bit | ❌ | ❌ | ❌ | ❌ |
|
|
39
|
+
| Cloud option | ❌ | ✅ | ✅ | ✅ | ✅ |
|
|
40
|
+
| JS/TS SDK | ❌ | ✅ | ✅ | ✅ | ✅ |
|
|
41
|
+
|
|
42
|
+
**Use RecallForge when:** You need multimodal memory for AI agents that runs entirely on your machine, especially on Apple Silicon. One search across text, images, documents, and video.
|
|
43
|
+
|
|
44
|
+
**Use something else when:** You need cloud hosting, massive scale (millions+ vectors), or a JS/TS-first ecosystem.
|
|
45
|
+
|
|
46
|
+
## Performance
|
|
47
|
+
|
|
48
|
+
4 modalities (text, images, documents, video) unified in a single MLX-optimized local vector space. Sub-60ms search latency. Under 400MB resident memory.
|
|
49
|
+
|
|
50
|
+
Measured on Mac mini M4 16GB, MLX 4-bit, embed mode:
|
|
51
|
+
|
|
52
|
+
| Metric | MLX 4-bit | PyTorch fp16 |
|
|
53
|
+
|--------|-----------|--------------|
|
|
54
|
+
| Warm search p50 | 53ms | 599ms |
|
|
55
|
+
| Warm search p95 | 55ms | — |
|
|
56
|
+
| Cold start | 7.6s | ~20s |
|
|
57
|
+
| Peak RSS (embed) | 329MB* | ~4GB |
|
|
58
|
+
| Text indexing | 5.0 docs/sec | — |
|
|
59
|
+
|
|
60
|
+
*\*MLX maps model weights lazily via memory-mapped files. RSS reflects resident pages, not full model size (~1.7GB on disk for embed mode). Actual memory pressure is low.*
|
|
61
|
+
|
|
62
|
+
Search quality comes from the multi-stage pipeline (BM25 + vector + RRF fusion + cross-encoder reranking), not raw embedding accuracy alone.
|
|
63
|
+
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install recallforge[mlx] # Apple Silicon (recommended, 4-bit quantization)
|
|
68
|
+
pip install recallforge[cuda] # NVIDIA GPU
|
|
69
|
+
pip install recallforge[torch] # CPU / other PyTorch targets
|
|
70
|
+
pip install recallforge[docs] # add richer PDF extraction (optional)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
> **Note:** `pip install recallforge` installs the core without a backend.
|
|
74
|
+
> You need at least one of `[mlx]`, `[cuda]`, or `[torch]` to run inference.
|
|
75
|
+
|
|
76
|
+
From source:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git clone https://github.com/brianmeyer/recallforge.git
|
|
80
|
+
cd recallforge
|
|
81
|
+
pip install -e ".[mlx]"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Requirements
|
|
85
|
+
|
|
86
|
+
- Python 3.12 or 3.13 required (3.14 not yet supported, pending pyarrow wheel)
|
|
87
|
+
- Disk: ~2-5GB free for model downloads on first run
|
|
88
|
+
- RAM (MLX 4-bit): ~1.7GB (`embed`) to ~4.4GB (`full`)
|
|
89
|
+
- `ffmpeg` recommended for video indexing/search
|
|
90
|
+
- First run downloads models automatically and may take a few minutes
|
|
91
|
+
|
|
92
|
+
## MCP Server (primary use)
|
|
93
|
+
|
|
94
|
+
RecallForge is designed as a **Model Context Protocol server for AI agents**. Configure in Claude Desktop (or any MCP-compatible agent host):
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"mcpServers": {
|
|
99
|
+
"recallforge": {
|
|
100
|
+
"command": "recallforge",
|
|
101
|
+
"args": ["serve", "--mode", "full"]
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Run manually:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
recallforge serve --mode embed --backend mlx --quantize 4bit
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Exposes **17 tools** for agents: `ingest`, `search`, `search_fts`, `search_vec`, `index_document`, `index_image`, `memory_add`, `memory_update`, `memory_delete`, `index_folder`, `status`, `rebuild_fts`, `list_collections`, `list_namespaces`, `batch`, `get_config`, `set_config`.
|
|
114
|
+
|
|
115
|
+
See [docs/mcp-tools.md](docs/mcp-tools.md) for the full tool reference.
|
|
116
|
+
|
|
117
|
+
## Search modes
|
|
118
|
+
|
|
119
|
+
| Mode | Models loaded | Memory (MLX 4-bit) | Quality | Best for |
|
|
120
|
+
|------|--------------|-------------------|---------|----------|
|
|
121
|
+
| `embed` | Embedder | ~1.7GB | Good | Memory-constrained, fast searches |
|
|
122
|
+
| `hybrid` | + Reranker | ~3.4GB | Better | Balanced quality and memory |
|
|
123
|
+
| `full` | + Query Expander | ~4.4GB | Best | Maximum retrieval quality |
|
|
124
|
+
|
|
125
|
+
> **Video [Beta] note:** Video support requires `ffmpeg`. The torch backend video path has a known upstream issue (see [QwenLM/Qwen3.5#58](https://github.com/QwenLM/Qwen3.5/issues/58)).
|
|
126
|
+
|
|
127
|
+
## How it works
|
|
128
|
+
|
|
129
|
+
RecallForge encodes text, images, and video frames into the same 2048-dimensional vector space using Qwen3-VL. This means "find notes about this diagram" works whether the diagram is text, an image, or a frame from a video. A 3-stage pipeline handles the rest:
|
|
130
|
+
|
|
131
|
+
```mermaid
|
|
132
|
+
graph TD
|
|
133
|
+
subgraph Local Filesystem
|
|
134
|
+
Docs[📄 Documents]
|
|
135
|
+
Imgs[🖼️ Images]
|
|
136
|
+
Vids[🎬 Video]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
subgraph RecallForge Ingest
|
|
140
|
+
Docs --> TxtExt[Text Extractor]
|
|
141
|
+
Imgs --> VLM[Qwen3-VL Encoder]
|
|
142
|
+
Vids --> Frame[Frame & Audio Extractor]
|
|
143
|
+
Frame --> VLM
|
|
144
|
+
TxtExt --> VLM
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
subgraph LanceDB Storage
|
|
148
|
+
VLM -->|2048-dim Vectors| VecDB[(Vector Space)]
|
|
149
|
+
TxtExt -->|Text/Transcripts| FTS[(Tantivy FTS)]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
subgraph MCP Search Pipeline
|
|
153
|
+
Query[Agent Query] --> BM25[BM25 Text Search]
|
|
154
|
+
Query --> Dense[Vector Similarity Search]
|
|
155
|
+
BM25 --> RRF[RRF Fusion]
|
|
156
|
+
Dense --> RRF
|
|
157
|
+
RRF --> Rerank[Cross-Encoder Reranker]
|
|
158
|
+
Rerank --> Output[Final Context to Agent]
|
|
159
|
+
end
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Pipeline:** BM25 probe → Query expansion (full mode) → Parallel BM25 + Vector → RRF fusion → Reranking (hybrid/full) → Score blending
|
|
163
|
+
|
|
164
|
+
## CLI (development & debugging)
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# Index anything
|
|
168
|
+
recallforge index ./photos ./docs
|
|
169
|
+
recallforge index ~/Movies/demo.mp4
|
|
170
|
+
recallforge index ~/Documents/roadmap.pptx
|
|
171
|
+
|
|
172
|
+
# Search any modality
|
|
173
|
+
recallforge search "whiteboard diagram from last meeting"
|
|
174
|
+
recallforge search --image ./photos/whiteboard.png
|
|
175
|
+
recallforge search --video ~/Movies/demo.mp4
|
|
176
|
+
|
|
177
|
+
# Watch a folder for changes (auto-index)
|
|
178
|
+
recallforge watch start ~/Documents --collection docs
|
|
179
|
+
recallforge watch list
|
|
180
|
+
recallforge watch stop ~/Documents
|
|
181
|
+
|
|
182
|
+
# Status
|
|
183
|
+
recallforge status
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
RecallForge auto-detects MLX on Apple Silicon, PyTorch elsewhere.
|
|
187
|
+
|
|
188
|
+
## Python API
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from recallforge import get_backend, get_storage
|
|
192
|
+
from recallforge.search import HybridSearcher
|
|
193
|
+
|
|
194
|
+
backend = get_backend()
|
|
195
|
+
storage = get_storage()
|
|
196
|
+
backend.warm_up()
|
|
197
|
+
|
|
198
|
+
# Index
|
|
199
|
+
storage.index_document(
|
|
200
|
+
path="notes.md",
|
|
201
|
+
text="My notes about AI...",
|
|
202
|
+
collection="my_docs",
|
|
203
|
+
model="Qwen3-VL-Embedding-2B",
|
|
204
|
+
embed_func=backend.embed_text,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Search
|
|
208
|
+
searcher = HybridSearcher(backend=backend, storage=storage, limit=10)
|
|
209
|
+
results = searcher.search("artificial intelligence")
|
|
210
|
+
for r in results:
|
|
211
|
+
print(f"[{r.score:.3f}] {r.title}")
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Configuration
|
|
215
|
+
|
|
216
|
+
| Variable | Default | Description |
|
|
217
|
+
|----------|---------|-------------|
|
|
218
|
+
| `RECALLFORGE_BACKEND` | `auto` | `auto`, `mlx`, `torch` |
|
|
219
|
+
| `RECALLFORGE_MODE` | `full` | `embed`, `hybrid`, `full` |
|
|
220
|
+
| `RECALLFORGE_MLX_QUANTIZE` | `4bit` | `4bit`, `bf16` |
|
|
221
|
+
| `RECALLFORGE_STORE_PATH` | `~/.recallforge` | Storage directory |
|
|
222
|
+
|
|
223
|
+
## Project structure
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
src/recallforge/
|
|
227
|
+
├── backends/
|
|
228
|
+
│ ├── mlx_backend.py # MLX 4-bit/bf16 (Apple Silicon)
|
|
229
|
+
│ └── torch_backend.py # PyTorch (CUDA/MPS/CPU)
|
|
230
|
+
├── storage/
|
|
231
|
+
│ └── lancedb_backend.py # LanceDB + Tantivy FTS
|
|
232
|
+
├── cache.py # LRU embedding cache
|
|
233
|
+
├── search.py # Hybrid search pipeline (BM25 + vector + RRF)
|
|
234
|
+
├── server.py # MCP server (17 tools)
|
|
235
|
+
├── documents.py # PDF/DOCX/PPTX extraction
|
|
236
|
+
├── video.py # Frame/transcript extraction
|
|
237
|
+
├── watch_folder.py # Folder monitoring with dedup
|
|
238
|
+
└── cli.py # CLI interface
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Development
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
pytest tests/ -m "not live" # Unit tests (no model download needed)
|
|
245
|
+
pytest tests/ -m live -v # Integration tests (requires models)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.
|
|
249
|
+
|
|
250
|
+
## Attribution
|
|
251
|
+
|
|
252
|
+
RecallForge is inspired by [QMD](https://github.com/tobil/qmd) by Tobi. QMD pioneered the multi-stage retrieval pipeline (embedding, reranking, query expansion). RecallForge extends this pattern to vision-language with cross-modal retrieval and multi-backend support.
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
MIT License
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "recallforge"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "RecallForge - Cross-Modal Vision-Language Search Engine"
|
|
5
|
+
authors = [{name = "Brian Meyer"}]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
requires-python = ">=3.12,<3.14"
|
|
8
|
+
license = {text = "MIT"}
|
|
9
|
+
keywords = ["search", "semantic", "embedding", "vision-language", "cross-modal"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Programming Language :: Python :: 3.13",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
dependencies = [
|
|
19
|
+
"lancedb>=0.20,<1.0",
|
|
20
|
+
"pyarrow>=18.0,<20.0",
|
|
21
|
+
"pillow>=10.0,<12.0",
|
|
22
|
+
"numpy>=2.0,<3.0",
|
|
23
|
+
"mcp>=1.0,<2.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/brianmeyer/recallforge"
|
|
28
|
+
Repository = "https://github.com/brianmeyer/recallforge"
|
|
29
|
+
Issues = "https://github.com/brianmeyer/recallforge/issues"
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
torch = [
|
|
33
|
+
"torch>=2.0,<3.0",
|
|
34
|
+
"torchvision>=0.15,<1.0",
|
|
35
|
+
"transformers>=4.40,<5.0",
|
|
36
|
+
"scipy>=1.10,<2.0",
|
|
37
|
+
"qwen-vl-utils>=0.0.14,<1.0",
|
|
38
|
+
]
|
|
39
|
+
mlx = [
|
|
40
|
+
"mlx>=0.20,<1.0",
|
|
41
|
+
"mlx-vlm>=0.1,<1.0",
|
|
42
|
+
"qwen-vl-utils>=0.0.14,<1.0",
|
|
43
|
+
# transformers 5.x Qwen3VLVideoProcessor requires torchvision for processor loading
|
|
44
|
+
"torchvision>=0.15,<1.0",
|
|
45
|
+
]
|
|
46
|
+
docs = ["pypdf>=5.0,<6.0"]
|
|
47
|
+
cuda = [
|
|
48
|
+
"torch>=2.0,<3.0",
|
|
49
|
+
"torchvision>=0.15,<1.0",
|
|
50
|
+
"transformers>=4.40,<5.0",
|
|
51
|
+
"scipy>=1.10,<2.0",
|
|
52
|
+
"qwen-vl-utils>=0.0.14,<1.0",
|
|
53
|
+
]
|
|
54
|
+
dev = [
|
|
55
|
+
"pytest>=8.0,<9.0",
|
|
56
|
+
"pytest-asyncio>=0.24,<1.0",
|
|
57
|
+
]
|
|
58
|
+
all = ["recallforge[torch,mlx,docs]"]
|
|
59
|
+
|
|
60
|
+
[build-system]
|
|
61
|
+
requires = ["setuptools>=61.0"]
|
|
62
|
+
build-backend = "setuptools.build_meta"
|
|
63
|
+
|
|
64
|
+
[project.scripts]
|
|
65
|
+
recallforge = "recallforge.cli:main"
|
|
66
|
+
|
|
67
|
+
[tool.setuptools.packages.find]
|
|
68
|
+
where = ["src"]
|
|
69
|
+
|
|
70
|
+
[tool.setuptools.package-data]
|
|
71
|
+
recallforge = ["py.typed"]
|
|
72
|
+
|
|
73
|
+
[tool.pytest.ini_options]
|
|
74
|
+
asyncio_mode = "auto"
|
|
75
|
+
testpaths = ["tests"]
|
|
76
|
+
pythonpath = ["src"]
|
|
77
|
+
markers = [
|
|
78
|
+
"live: marks tests as live (deselect with '-m \"not live\"')",
|
|
79
|
+
]
|