recallforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. recallforge-0.1.0/LICENSE +21 -0
  2. recallforge-0.1.0/PKG-INFO +305 -0
  3. recallforge-0.1.0/README.md +256 -0
  4. recallforge-0.1.0/pyproject.toml +79 -0
  5. recallforge-0.1.0/setup.cfg +4 -0
  6. recallforge-0.1.0/src/recallforge/__init__.py +154 -0
  7. recallforge-0.1.0/src/recallforge/__main__.py +13 -0
  8. recallforge-0.1.0/src/recallforge/backends/__init__.py +110 -0
  9. recallforge-0.1.0/src/recallforge/backends/base.py +193 -0
  10. recallforge-0.1.0/src/recallforge/backends/mlx_backend.py +1307 -0
  11. recallforge-0.1.0/src/recallforge/backends/torch_backend.py +610 -0
  12. recallforge-0.1.0/src/recallforge/cache.py +37 -0
  13. recallforge-0.1.0/src/recallforge/cli.py +534 -0
  14. recallforge-0.1.0/src/recallforge/documents.py +317 -0
  15. recallforge-0.1.0/src/recallforge/py.typed +0 -0
  16. recallforge-0.1.0/src/recallforge/search.py +667 -0
  17. recallforge-0.1.0/src/recallforge/server.py +1270 -0
  18. recallforge-0.1.0/src/recallforge/storage/__init__.py +15 -0
  19. recallforge-0.1.0/src/recallforge/storage/base.py +339 -0
  20. recallforge-0.1.0/src/recallforge/storage/lancedb_backend.py +2595 -0
  21. recallforge-0.1.0/src/recallforge/video.py +294 -0
  22. recallforge-0.1.0/src/recallforge/watch_folder.py +422 -0
  23. recallforge-0.1.0/src/recallforge.egg-info/PKG-INFO +305 -0
  24. recallforge-0.1.0/src/recallforge.egg-info/SOURCES.txt +40 -0
  25. recallforge-0.1.0/src/recallforge.egg-info/dependency_links.txt +1 -0
  26. recallforge-0.1.0/src/recallforge.egg-info/entry_points.txt +2 -0
  27. recallforge-0.1.0/src/recallforge.egg-info/requires.txt +35 -0
  28. recallforge-0.1.0/src/recallforge.egg-info/top_level.txt +1 -0
  29. recallforge-0.1.0/tests/test_backend_selection.py +52 -0
  30. recallforge-0.1.0/tests/test_backends.py +226 -0
  31. recallforge-0.1.0/tests/test_batch_tool.py +292 -0
  32. recallforge-0.1.0/tests/test_config_tools.py +432 -0
  33. recallforge-0.1.0/tests/test_documents.py +134 -0
  34. recallforge-0.1.0/tests/test_embedding_cache.py +183 -0
  35. recallforge-0.1.0/tests/test_error_responses.py +312 -0
  36. recallforge-0.1.0/tests/test_json_compliance.py +287 -0
  37. recallforge-0.1.0/tests/test_live.py +367 -0
  38. recallforge-0.1.0/tests/test_schema_migration.py +306 -0
  39. recallforge-0.1.0/tests/test_search_pipeline.py +462 -0
  40. recallforge-0.1.0/tests/test_sql_validation.py +91 -0
  41. recallforge-0.1.0/tests/test_storage.py +929 -0
  42. recallforge-0.1.0/tests/test_watch_folder.py +186 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright 2025-2026 Brian Meyer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,305 @@
1
+ Metadata-Version: 2.4
2
+ Name: recallforge
3
+ Version: 0.1.0
4
+ Summary: RecallForge - Cross-Modal Vision-Language Search Engine
5
+ Author: Brian Meyer
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/brianmeyer/recallforge
8
+ Project-URL: Repository, https://github.com/brianmeyer/recallforge
9
+ Project-URL: Issues, https://github.com/brianmeyer/recallforge/issues
10
+ Keywords: search,semantic,embedding,vision-language,cross-modal
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Python: <3.14,>=3.12
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: lancedb<1.0,>=0.20
20
+ Requires-Dist: pyarrow<20.0,>=18.0
21
+ Requires-Dist: pillow<12.0,>=10.0
22
+ Requires-Dist: numpy<3.0,>=2.0
23
+ Requires-Dist: mcp<2.0,>=1.0
24
+ Provides-Extra: torch
25
+ Requires-Dist: torch<3.0,>=2.0; extra == "torch"
26
+ Requires-Dist: torchvision<1.0,>=0.15; extra == "torch"
27
+ Requires-Dist: transformers<5.0,>=4.40; extra == "torch"
28
+ Requires-Dist: scipy<2.0,>=1.10; extra == "torch"
29
+ Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "torch"
30
+ Provides-Extra: mlx
31
+ Requires-Dist: mlx<1.0,>=0.20; extra == "mlx"
32
+ Requires-Dist: mlx-vlm<1.0,>=0.1; extra == "mlx"
33
+ Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "mlx"
34
+ Requires-Dist: torchvision<1.0,>=0.15; extra == "mlx"
35
+ Provides-Extra: docs
36
+ Requires-Dist: pypdf<6.0,>=5.0; extra == "docs"
37
+ Provides-Extra: cuda
38
+ Requires-Dist: torch<3.0,>=2.0; extra == "cuda"
39
+ Requires-Dist: torchvision<1.0,>=0.15; extra == "cuda"
40
+ Requires-Dist: transformers<5.0,>=4.40; extra == "cuda"
41
+ Requires-Dist: scipy<2.0,>=1.10; extra == "cuda"
42
+ Requires-Dist: qwen-vl-utils<1.0,>=0.0.14; extra == "cuda"
43
+ Provides-Extra: dev
44
+ Requires-Dist: pytest<9.0,>=8.0; extra == "dev"
45
+ Requires-Dist: pytest-asyncio<1.0,>=0.24; extra == "dev"
46
+ Provides-Extra: all
47
+ Requires-Dist: recallforge[docs,mlx,torch]; extra == "all"
48
+ Dynamic: license-file
49
+
50
+ # RecallForge
51
+
52
+ ![CI](https://github.com/brianmeyer/recallforge/actions/workflows/ci.yml/badge.svg) ![PyPI](https://img.shields.io/badge/PyPI-coming_soon-blue) ![License](https://img.shields.io/badge/License-MIT-green) ![Python](https://img.shields.io/badge/Python-3.12%20%7C%203.13-blue)
53
+
54
+ **Every modality, one search. Local first.**
55
+
56
+ ![RecallForge — Your Files → One Search](docs/hero-image.png)
57
+
58
+ Standard RAG only works on text. Drop a PDF with charts, a photo of a whiteboard, or a video recording — and your AI agent goes blind. RecallForge gives agents **eyes and ears over your local filesystem**. Text, images, documents, and video all live in one unified search space, and nothing ever leaves your machine.
59
+
60
+ ## What this enables
61
+
62
+ > **You:** "What did the whiteboard look like in our last meeting?"
63
+ >
64
+ > **Claude:** *(Searches your local `~/Documents`, finds a photo of a whiteboard from an iPhone, reads the handwriting via Qwen3-VL, and surfaces the image with context.)*
65
+
66
+ > **You:** "Find the architecture diagram from that PDF I downloaded last week."
67
+ >
68
+ > **Claude:** *(Indexes the PDF, matches your query against extracted text and embedded figures, returns the relevant page.)*
69
+
70
+ > **You:** *(Drops an image of a circuit board)* "Find my notes related to this."
71
+ >
72
+ > **Claude:** *(Reverse image-to-text search across your indexed notes. Returns matching documents.)*
73
+
74
+ One query. Any modality. All local.
75
+
76
+ ## What makes RecallForge different
77
+
78
+ | Capability | RecallForge | Chroma | Mem0 | Qdrant | Weaviate |
79
+ |------------|-------------|--------|------|--------|----------|
80
+ | Cross-modal search | ✅ Native | ✅ OpenCLIP | ❌ Text only | ❌ | ✅ CLIP modules |
81
+ | Video support [Beta] | ✅ | ❌ | ❌ | ❌ | ❌ |
82
+ | Document ingest (PDF/DOCX/PPTX) | ✅ | ❌ | ❌ | ❌ | ❌ |
83
+ | Built-in reranking | ✅ Multimodal | ❌ | ❌ | ✅ ColBERT | ✅ Modules |
84
+ | Query expansion | ✅ Multimodal | ❌ | ❌ | ❌ | ✅ Generative |
85
+ | MCP-native | ✅ 17 tools | ❌ | ❌ | ❌ | ❌ |
86
+ | 100% local | ✅ | ✅ | ⚠️ Cloud default | ✅ | ✅ Docker |
87
+ | Apple Silicon optimized | ✅ MLX 4-bit | ❌ | ❌ | ❌ | ❌ |
88
+ | Cloud option | ❌ | ✅ | ✅ | ✅ | ✅ |
89
+ | JS/TS SDK | ❌ | ✅ | ✅ | ✅ | ✅ |
90
+
91
+ **Use RecallForge when:** You need multimodal memory for AI agents that runs entirely on your machine, especially on Apple Silicon. One search across text, images, documents, and video.
92
+
93
+ **Use something else when:** You need cloud hosting, massive scale (millions+ vectors), or a JS/TS-first ecosystem.
94
+
95
+ ## Performance
96
+
97
+ 4 modalities (text, images, documents, video) unified in a single MLX-optimized local vector space. Sub-60ms search latency. Under 400MB resident memory.
98
+
99
+ Measured on Mac mini M4 16GB, MLX 4-bit, embed mode:
100
+
101
+ | Metric | MLX 4-bit | PyTorch fp16 |
102
+ |--------|-----------|--------------|
103
+ | Warm search p50 | 53ms | 599ms |
104
+ | Warm search p95 | 55ms | — |
105
+ | Cold start | 7.6s | ~20s |
106
+ | Peak RSS (embed) | 329MB* | ~4GB |
107
+ | Text indexing | 5.0 docs/sec | — |
108
+
109
+ *\*MLX maps model weights lazily via memory-mapped files. RSS reflects resident pages, not full model size (~1.7GB on disk for embed mode). Actual memory pressure is low.*
110
+
111
+ Search quality comes from the multi-stage pipeline (BM25 + vector + RRF fusion + cross-encoder reranking), not raw embedding accuracy alone.
112
+
113
+ ## Installation
114
+
115
+ ```bash
116
+ pip install recallforge[mlx] # Apple Silicon (recommended, 4-bit quantization)
117
+ pip install recallforge[cuda] # NVIDIA GPU
118
+ pip install recallforge[torch] # CPU / other PyTorch targets
119
+ pip install recallforge[docs] # add richer PDF extraction (optional)
120
+ ```
121
+
122
+ > **Note:** `pip install recallforge` installs the core without a backend.
123
+ > You need at least one of `[mlx]`, `[cuda]`, or `[torch]` to run inference.
124
+
125
+ From source:
126
+
127
+ ```bash
128
+ git clone https://github.com/brianmeyer/recallforge.git
129
+ cd recallforge
130
+ pip install -e ".[mlx]"
131
+ ```
132
+
133
+ ### Requirements
134
+
135
+ - Python 3.12 or 3.13 required (3.14 not yet supported, pending pyarrow wheel)
136
+ - Disk: ~2-5GB free for model downloads on first run
137
+ - RAM (MLX 4-bit): ~1.7GB (`embed`) to ~4.4GB (`full`)
138
+ - `ffmpeg` recommended for video indexing/search
139
+ - First run downloads models automatically and may take a few minutes
140
+
141
+ ## MCP Server (primary use)
142
+
143
+ RecallForge is designed as a **Model Context Protocol server for AI agents**. Configure in Claude Desktop (or any MCP-compatible agent host):
144
+
145
+ ```json
146
+ {
147
+ "mcpServers": {
148
+ "recallforge": {
149
+ "command": "recallforge",
150
+ "args": ["serve", "--mode", "full"]
151
+ }
152
+ }
153
+ }
154
+ ```
155
+
156
+ Run manually:
157
+
158
+ ```bash
159
+ recallforge serve --mode embed --backend mlx --quantize 4bit
160
+ ```
161
+
162
+ Exposes **17 tools** for agents: `ingest`, `search`, `search_fts`, `search_vec`, `index_document`, `index_image`, `memory_add`, `memory_update`, `memory_delete`, `index_folder`, `status`, `rebuild_fts`, `list_collections`, `list_namespaces`, `batch`, `get_config`, `set_config`.
163
+
164
+ See [docs/mcp-tools.md](docs/mcp-tools.md) for the full tool reference.
165
+
166
+ ## Search modes
167
+
168
+ | Mode | Models loaded | Memory (MLX 4-bit) | Quality | Best for |
169
+ |------|--------------|-------------------|---------|----------|
170
+ | `embed` | Embedder | ~1.7GB | Good | Memory-constrained, fast searches |
171
+ | `hybrid` | + Reranker | ~3.4GB | Better | Balanced quality and memory |
172
+ | `full` | + Query Expander | ~4.4GB | Best | Maximum retrieval quality |
173
+
174
+ > **Video [Beta] note:** Video support requires `ffmpeg`. The torch backend video path has a known upstream issue (see [QwenLM/Qwen3.5#58](https://github.com/QwenLM/Qwen3.5/issues/58)).
175
+
176
+ ## How it works
177
+
178
+ RecallForge encodes text, images, and video frames into the same 2048-dimensional vector space using Qwen3-VL. This means "find notes about this diagram" works whether the diagram is text, an image, or a frame from a video. A 3-stage pipeline handles the rest:
179
+
180
+ ```mermaid
181
+ graph TD
182
+ subgraph Local Filesystem
183
+ Docs[📄 Documents]
184
+ Imgs[🖼️ Images]
185
+ Vids[🎬 Video]
186
+ end
187
+
188
+ subgraph RecallForge Ingest
189
+ Docs --> TxtExt[Text Extractor]
190
+ Imgs --> VLM[Qwen3-VL Encoder]
191
+ Vids --> Frame[Frame & Audio Extractor]
192
+ Frame --> VLM
193
+ TxtExt --> VLM
194
+ end
195
+
196
+ subgraph LanceDB Storage
197
+ VLM -->|2048-dim Vectors| VecDB[(Vector Space)]
198
+ TxtExt -->|Text/Transcripts| FTS[(Tantivy FTS)]
199
+ end
200
+
201
+ subgraph MCP Search Pipeline
202
+ Query[Agent Query] --> BM25[BM25 Text Search]
203
+ Query --> Dense[Vector Similarity Search]
204
+ BM25 --> RRF[RRF Fusion]
205
+ Dense --> RRF
206
+ RRF --> Rerank[Cross-Encoder Reranker]
207
+ Rerank --> Output[Final Context to Agent]
208
+ end
209
+ ```
210
+
211
+ **Pipeline:** BM25 probe → Query expansion (full mode) → Parallel BM25 + Vector → RRF fusion → Reranking (hybrid/full) → Score blending
212
+
213
+ ## CLI (development & debugging)
214
+
215
+ ```bash
216
+ # Index anything
217
+ recallforge index ./photos ./docs
218
+ recallforge index ~/Movies/demo.mp4
219
+ recallforge index ~/Documents/roadmap.pptx
220
+
221
+ # Search any modality
222
+ recallforge search "whiteboard diagram from last meeting"
223
+ recallforge search --image ./photos/whiteboard.png
224
+ recallforge search --video ~/Movies/demo.mp4
225
+
226
+ # Watch a folder for changes (auto-index)
227
+ recallforge watch start ~/Documents --collection docs
228
+ recallforge watch list
229
+ recallforge watch stop ~/Documents
230
+
231
+ # Status
232
+ recallforge status
233
+ ```
234
+
235
+ RecallForge auto-detects MLX on Apple Silicon, PyTorch elsewhere.
236
+
237
+ ## Python API
238
+
239
+ ```python
240
+ from recallforge import get_backend, get_storage
241
+ from recallforge.search import HybridSearcher
242
+
243
+ backend = get_backend()
244
+ storage = get_storage()
245
+ backend.warm_up()
246
+
247
+ # Index
248
+ storage.index_document(
249
+ path="notes.md",
250
+ text="My notes about AI...",
251
+ collection="my_docs",
252
+ model="Qwen3-VL-Embedding-2B",
253
+ embed_func=backend.embed_text,
254
+ )
255
+
256
+ # Search
257
+ searcher = HybridSearcher(backend=backend, storage=storage, limit=10)
258
+ results = searcher.search("artificial intelligence")
259
+ for r in results:
260
+ print(f"[{r.score:.3f}] {r.title}")
261
+ ```
262
+
263
+ ## Configuration
264
+
265
+ | Variable | Default | Description |
266
+ |----------|---------|-------------|
267
+ | `RECALLFORGE_BACKEND` | `auto` | `auto`, `mlx`, `torch` |
268
+ | `RECALLFORGE_MODE` | `full` | `embed`, `hybrid`, `full` |
269
+ | `RECALLFORGE_MLX_QUANTIZE` | `4bit` | `4bit`, `bf16` |
270
+ | `RECALLFORGE_STORE_PATH` | `~/.recallforge` | Storage directory |
271
+
272
+ ## Project structure
273
+
274
+ ```
275
+ src/recallforge/
276
+ ├── backends/
277
+ │ ├── mlx_backend.py # MLX 4-bit/bf16 (Apple Silicon)
278
+ │ └── torch_backend.py # PyTorch (CUDA/MPS/CPU)
279
+ ├── storage/
280
+ │ └── lancedb_backend.py # LanceDB + Tantivy FTS
281
+ ├── cache.py # LRU embedding cache
282
+ ├── search.py # Hybrid search pipeline (BM25 + vector + RRF)
283
+ ├── server.py # MCP server (17 tools)
284
+ ├── documents.py # PDF/DOCX/PPTX extraction
285
+ ├── video.py # Frame/transcript extraction
286
+ ├── watch_folder.py # Folder monitoring with dedup
287
+ └── cli.py # CLI interface
288
+ ```
289
+
290
+ ## Development
291
+
292
+ ```bash
293
+ pytest tests/ -m "not live" # Unit tests (no model download needed)
294
+ pytest tests/ -m live -v # Integration tests (requires models)
295
+ ```
296
+
297
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.
298
+
299
+ ## Attribution
300
+
301
+ RecallForge is inspired by [QMD](https://github.com/tobil/qmd) by Tobi. QMD pioneered the multi-stage retrieval pipeline (embedding, reranking, query expansion). RecallForge extends this pattern to vision-language with cross-modal retrieval and multi-backend support.
302
+
303
+ ## License
304
+
305
+ MIT License
@@ -0,0 +1,256 @@
1
+ # RecallForge
2
+
3
+ ![CI](https://github.com/brianmeyer/recallforge/actions/workflows/ci.yml/badge.svg) ![PyPI](https://img.shields.io/badge/PyPI-coming_soon-blue) ![License](https://img.shields.io/badge/License-MIT-green) ![Python](https://img.shields.io/badge/Python-3.12%20%7C%203.13-blue)
4
+
5
+ **Every modality, one search. Local first.**
6
+
7
+ ![RecallForge — Your Files → One Search](docs/hero-image.png)
8
+
9
+ Standard RAG only works on text. Drop a PDF with charts, a photo of a whiteboard, or a video recording — and your AI agent goes blind. RecallForge gives agents **eyes and ears over your local filesystem**. Text, images, documents, and video all live in one unified search space, and nothing ever leaves your machine.
10
+
11
+ ## What this enables
12
+
13
+ > **You:** "What did the whiteboard look like in our last meeting?"
14
+ >
15
+ > **Claude:** *(Searches your local `~/Documents`, finds a photo of a whiteboard from an iPhone, reads the handwriting via Qwen3-VL, and surfaces the image with context.)*
16
+
17
+ > **You:** "Find the architecture diagram from that PDF I downloaded last week."
18
+ >
19
+ > **Claude:** *(Indexes the PDF, matches your query against extracted text and embedded figures, returns the relevant page.)*
20
+
21
+ > **You:** *(Drops an image of a circuit board)* "Find my notes related to this."
22
+ >
23
+ > **Claude:** *(Reverse image-to-text search across your indexed notes. Returns matching documents.)*
24
+
25
+ One query. Any modality. All local.
26
+
27
+ ## What makes RecallForge different
28
+
29
+ | Capability | RecallForge | Chroma | Mem0 | Qdrant | Weaviate |
30
+ |------------|-------------|--------|------|--------|----------|
31
+ | Cross-modal search | ✅ Native | ✅ OpenCLIP | ❌ Text only | ❌ | ✅ CLIP modules |
32
+ | Video support [Beta] | ✅ | ❌ | ❌ | ❌ | ❌ |
33
+ | Document ingest (PDF/DOCX/PPTX) | ✅ | ❌ | ❌ | ❌ | ❌ |
34
+ | Built-in reranking | ✅ Multimodal | ❌ | ❌ | ✅ ColBERT | ✅ Modules |
35
+ | Query expansion | ✅ Multimodal | ❌ | ❌ | ❌ | ✅ Generative |
36
+ | MCP-native | ✅ 17 tools | ❌ | ❌ | ❌ | ❌ |
37
+ | 100% local | ✅ | ✅ | ⚠️ Cloud default | ✅ | ✅ Docker |
38
+ | Apple Silicon optimized | ✅ MLX 4-bit | ❌ | ❌ | ❌ | ❌ |
39
+ | Cloud option | ❌ | ✅ | ✅ | ✅ | ✅ |
40
+ | JS/TS SDK | ❌ | ✅ | ✅ | ✅ | ✅ |
41
+
42
+ **Use RecallForge when:** You need multimodal memory for AI agents that runs entirely on your machine, especially on Apple Silicon. One search across text, images, documents, and video.
43
+
44
+ **Use something else when:** You need cloud hosting, massive scale (millions+ vectors), or a JS/TS-first ecosystem.
45
+
46
+ ## Performance
47
+
48
+ 4 modalities (text, images, documents, video) unified in a single MLX-optimized local vector space. Sub-60ms search latency. Under 400MB resident memory.
49
+
50
+ Measured on Mac mini M4 16GB, MLX 4-bit, embed mode:
51
+
52
+ | Metric | MLX 4-bit | PyTorch fp16 |
53
+ |--------|-----------|--------------|
54
+ | Warm search p50 | 53ms | 599ms |
55
+ | Warm search p95 | 55ms | — |
56
+ | Cold start | 7.6s | ~20s |
57
+ | Peak RSS (embed) | 329MB* | ~4GB |
58
+ | Text indexing | 5.0 docs/sec | — |
59
+
60
+ *\*MLX maps model weights lazily via memory-mapped files. RSS reflects resident pages, not full model size (~1.7GB on disk for embed mode). Actual memory pressure is low.*
61
+
62
+ Search quality comes from the multi-stage pipeline (BM25 + vector + RRF fusion + cross-encoder reranking), not raw embedding accuracy alone.
63
+
64
+ ## Installation
65
+
66
+ ```bash
67
+ pip install recallforge[mlx] # Apple Silicon (recommended, 4-bit quantization)
68
+ pip install recallforge[cuda] # NVIDIA GPU
69
+ pip install recallforge[torch] # CPU / other PyTorch targets
70
+ pip install recallforge[docs] # add richer PDF extraction (optional)
71
+ ```
72
+
73
+ > **Note:** `pip install recallforge` installs the core without a backend.
74
+ > You need at least one of `[mlx]`, `[cuda]`, or `[torch]` to run inference.
75
+
76
+ From source:
77
+
78
+ ```bash
79
+ git clone https://github.com/brianmeyer/recallforge.git
80
+ cd recallforge
81
+ pip install -e ".[mlx]"
82
+ ```
83
+
84
+ ### Requirements
85
+
86
+ - Python 3.12 or 3.13 required (3.14 not yet supported, pending pyarrow wheel)
87
+ - Disk: ~2-5GB free for model downloads on first run
88
+ - RAM (MLX 4-bit): ~1.7GB (`embed`) to ~4.4GB (`full`)
89
+ - `ffmpeg` recommended for video indexing/search
90
+ - First run downloads models automatically and may take a few minutes
91
+
92
+ ## MCP Server (primary use)
93
+
94
+ RecallForge is designed as a **Model Context Protocol server for AI agents**. Configure in Claude Desktop (or any MCP-compatible agent host):
95
+
96
+ ```json
97
+ {
98
+ "mcpServers": {
99
+ "recallforge": {
100
+ "command": "recallforge",
101
+ "args": ["serve", "--mode", "full"]
102
+ }
103
+ }
104
+ }
105
+ ```
106
+
107
+ Run manually:
108
+
109
+ ```bash
110
+ recallforge serve --mode embed --backend mlx --quantize 4bit
111
+ ```
112
+
113
+ Exposes **17 tools** for agents: `ingest`, `search`, `search_fts`, `search_vec`, `index_document`, `index_image`, `memory_add`, `memory_update`, `memory_delete`, `index_folder`, `status`, `rebuild_fts`, `list_collections`, `list_namespaces`, `batch`, `get_config`, `set_config`.
114
+
115
+ See [docs/mcp-tools.md](docs/mcp-tools.md) for the full tool reference.
116
+
117
+ ## Search modes
118
+
119
+ | Mode | Models loaded | Memory (MLX 4-bit) | Quality | Best for |
120
+ |------|--------------|-------------------|---------|----------|
121
+ | `embed` | Embedder | ~1.7GB | Good | Memory-constrained, fast searches |
122
+ | `hybrid` | + Reranker | ~3.4GB | Better | Balanced quality and memory |
123
+ | `full` | + Query Expander | ~4.4GB | Best | Maximum retrieval quality |
124
+
125
+ > **Video [Beta] note:** Video support requires `ffmpeg`. The torch backend video path has a known upstream issue (see [QwenLM/Qwen3.5#58](https://github.com/QwenLM/Qwen3.5/issues/58)).
126
+
127
+ ## How it works
128
+
129
+ RecallForge encodes text, images, and video frames into the same 2048-dimensional vector space using Qwen3-VL. This means "find notes about this diagram" works whether the diagram is text, an image, or a frame from a video. A 3-stage pipeline handles the rest:
130
+
131
+ ```mermaid
132
+ graph TD
133
+ subgraph Local Filesystem
134
+ Docs[📄 Documents]
135
+ Imgs[🖼️ Images]
136
+ Vids[🎬 Video]
137
+ end
138
+
139
+ subgraph RecallForge Ingest
140
+ Docs --> TxtExt[Text Extractor]
141
+ Imgs --> VLM[Qwen3-VL Encoder]
142
+ Vids --> Frame[Frame & Audio Extractor]
143
+ Frame --> VLM
144
+ TxtExt --> VLM
145
+ end
146
+
147
+ subgraph LanceDB Storage
148
+ VLM -->|2048-dim Vectors| VecDB[(Vector Space)]
149
+ TxtExt -->|Text/Transcripts| FTS[(Tantivy FTS)]
150
+ end
151
+
152
+ subgraph MCP Search Pipeline
153
+ Query[Agent Query] --> BM25[BM25 Text Search]
154
+ Query --> Dense[Vector Similarity Search]
155
+ BM25 --> RRF[RRF Fusion]
156
+ Dense --> RRF
157
+ RRF --> Rerank[Cross-Encoder Reranker]
158
+ Rerank --> Output[Final Context to Agent]
159
+ end
160
+ ```
161
+
162
+ **Pipeline:** BM25 probe → Query expansion (full mode) → Parallel BM25 + Vector → RRF fusion → Reranking (hybrid/full) → Score blending
163
+
164
+ ## CLI (development & debugging)
165
+
166
+ ```bash
167
+ # Index anything
168
+ recallforge index ./photos ./docs
169
+ recallforge index ~/Movies/demo.mp4
170
+ recallforge index ~/Documents/roadmap.pptx
171
+
172
+ # Search any modality
173
+ recallforge search "whiteboard diagram from last meeting"
174
+ recallforge search --image ./photos/whiteboard.png
175
+ recallforge search --video ~/Movies/demo.mp4
176
+
177
+ # Watch a folder for changes (auto-index)
178
+ recallforge watch start ~/Documents --collection docs
179
+ recallforge watch list
180
+ recallforge watch stop ~/Documents
181
+
182
+ # Status
183
+ recallforge status
184
+ ```
185
+
186
+ RecallForge auto-detects MLX on Apple Silicon, PyTorch elsewhere.
187
+
188
+ ## Python API
189
+
190
+ ```python
191
+ from recallforge import get_backend, get_storage
192
+ from recallforge.search import HybridSearcher
193
+
194
+ backend = get_backend()
195
+ storage = get_storage()
196
+ backend.warm_up()
197
+
198
+ # Index
199
+ storage.index_document(
200
+ path="notes.md",
201
+ text="My notes about AI...",
202
+ collection="my_docs",
203
+ model="Qwen3-VL-Embedding-2B",
204
+ embed_func=backend.embed_text,
205
+ )
206
+
207
+ # Search
208
+ searcher = HybridSearcher(backend=backend, storage=storage, limit=10)
209
+ results = searcher.search("artificial intelligence")
210
+ for r in results:
211
+ print(f"[{r.score:.3f}] {r.title}")
212
+ ```
213
+
214
+ ## Configuration
215
+
216
+ | Variable | Default | Description |
217
+ |----------|---------|-------------|
218
+ | `RECALLFORGE_BACKEND` | `auto` | `auto`, `mlx`, `torch` |
219
+ | `RECALLFORGE_MODE` | `full` | `embed`, `hybrid`, `full` |
220
+ | `RECALLFORGE_MLX_QUANTIZE` | `4bit` | `4bit`, `bf16` |
221
+ | `RECALLFORGE_STORE_PATH` | `~/.recallforge` | Storage directory |
222
+
223
+ ## Project structure
224
+
225
+ ```
226
+ src/recallforge/
227
+ ├── backends/
228
+ │ ├── mlx_backend.py # MLX 4-bit/bf16 (Apple Silicon)
229
+ │ └── torch_backend.py # PyTorch (CUDA/MPS/CPU)
230
+ ├── storage/
231
+ │ └── lancedb_backend.py # LanceDB + Tantivy FTS
232
+ ├── cache.py # LRU embedding cache
233
+ ├── search.py # Hybrid search pipeline (BM25 + vector + RRF)
234
+ ├── server.py # MCP server (17 tools)
235
+ ├── documents.py # PDF/DOCX/PPTX extraction
236
+ ├── video.py # Frame/transcript extraction
237
+ ├── watch_folder.py # Folder monitoring with dedup
238
+ └── cli.py # CLI interface
239
+ ```
240
+
241
+ ## Development
242
+
243
+ ```bash
244
+ pytest tests/ -m "not live" # Unit tests (no model download needed)
245
+ pytest tests/ -m live -v # Integration tests (requires models)
246
+ ```
247
+
248
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.
249
+
250
+ ## Attribution
251
+
252
+ RecallForge is inspired by [QMD](https://github.com/tobil/qmd) by Tobi. QMD pioneered the multi-stage retrieval pipeline (embedding, reranking, query expansion). RecallForge extends this pattern to vision-language with cross-modal retrieval and multi-backend support.
253
+
254
+ ## License
255
+
256
+ MIT License
@@ -0,0 +1,79 @@
1
+ [project]
2
+ name = "recallforge"
3
+ version = "0.1.0"
4
+ description = "RecallForge - Cross-Modal Vision-Language Search Engine"
5
+ authors = [{name = "Brian Meyer"}]
6
+ readme = "README.md"
7
+ requires-python = ">=3.12,<3.14"
8
+ license = {text = "MIT"}
9
+ keywords = ["search", "semantic", "embedding", "vision-language", "cross-modal"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Programming Language :: Python :: 3.13",
16
+ ]
17
+
18
+ dependencies = [
19
+ "lancedb>=0.20,<1.0",
20
+ "pyarrow>=18.0,<20.0",
21
+ "pillow>=10.0,<12.0",
22
+ "numpy>=2.0,<3.0",
23
+ "mcp>=1.0,<2.0",
24
+ ]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/brianmeyer/recallforge"
28
+ Repository = "https://github.com/brianmeyer/recallforge"
29
+ Issues = "https://github.com/brianmeyer/recallforge/issues"
30
+
31
+ [project.optional-dependencies]
32
+ torch = [
33
+ "torch>=2.0,<3.0",
34
+ "torchvision>=0.15,<1.0",
35
+ "transformers>=4.40,<5.0",
36
+ "scipy>=1.10,<2.0",
37
+ "qwen-vl-utils>=0.0.14,<1.0",
38
+ ]
39
+ mlx = [
40
+ "mlx>=0.20,<1.0",
41
+ "mlx-vlm>=0.1,<1.0",
42
+ "qwen-vl-utils>=0.0.14,<1.0",
43
+ # transformers 5.x Qwen3VLVideoProcessor requires torchvision for processor loading
44
+ "torchvision>=0.15,<1.0",
45
+ ]
46
+ docs = ["pypdf>=5.0,<6.0"]
47
+ cuda = [
48
+ "torch>=2.0,<3.0",
49
+ "torchvision>=0.15,<1.0",
50
+ "transformers>=4.40,<5.0",
51
+ "scipy>=1.10,<2.0",
52
+ "qwen-vl-utils>=0.0.14,<1.0",
53
+ ]
54
+ dev = [
55
+ "pytest>=8.0,<9.0",
56
+ "pytest-asyncio>=0.24,<1.0",
57
+ ]
58
+ all = ["recallforge[torch,mlx,docs]"]
59
+
60
+ [build-system]
61
+ requires = ["setuptools>=61.0"]
62
+ build-backend = "setuptools.build_meta"
63
+
64
+ [project.scripts]
65
+ recallforge = "recallforge.cli:main"
66
+
67
+ [tool.setuptools.packages.find]
68
+ where = ["src"]
69
+
70
+ [tool.setuptools.package-data]
71
+ recallforge = ["py.typed"]
72
+
73
+ [tool.pytest.ini_options]
74
+ asyncio_mode = "auto"
75
+ testpaths = ["tests"]
76
+ pythonpath = ["src"]
77
+ markers = [
78
+ "live: marks tests as live (deselect with '-m \"not live\"')",
79
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+