latent-gate 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- latent_gate-0.4.0/LICENSE +21 -0
- latent_gate-0.4.0/PKG-INFO +330 -0
- latent_gate-0.4.0/README.md +285 -0
- latent_gate-0.4.0/latent_gate/__init__.py +73 -0
- latent_gate-0.4.0/latent_gate/__main__.py +5 -0
- latent_gate-0.4.0/latent_gate/cache.py +115 -0
- latent_gate-0.4.0/latent_gate/cli.py +159 -0
- latent_gate-0.4.0/latent_gate/config.py +65 -0
- latent_gate-0.4.0/latent_gate/fast_client.py +188 -0
- latent_gate-0.4.0/latent_gate/local_processor.py +234 -0
- latent_gate-0.4.0/latent_gate/mcp_server.py +278 -0
- latent_gate-0.4.0/latent_gate/payload.py +95 -0
- latent_gate-0.4.0/latent_gate/pipeline.py +318 -0
- latent_gate-0.4.0/latent_gate/remote_decoder.py +138 -0
- latent_gate-0.4.0/latent_gate/selective_decoder.py +144 -0
- latent_gate-0.4.0/latent_gate/text_processor.py +381 -0
- latent_gate-0.4.0/latent_gate.egg-info/PKG-INFO +330 -0
- latent_gate-0.4.0/latent_gate.egg-info/SOURCES.txt +29 -0
- latent_gate-0.4.0/latent_gate.egg-info/dependency_links.txt +1 -0
- latent_gate-0.4.0/latent_gate.egg-info/entry_points.txt +2 -0
- latent_gate-0.4.0/latent_gate.egg-info/requires.txt +24 -0
- latent_gate-0.4.0/latent_gate.egg-info/top_level.txt +1 -0
- latent_gate-0.4.0/pyproject.toml +97 -0
- latent_gate-0.4.0/setup.cfg +4 -0
- latent_gate-0.4.0/tests/test_config.py +57 -0
- latent_gate-0.4.0/tests/test_local_processor.py +66 -0
- latent_gate-0.4.0/tests/test_payload.py +90 -0
- latent_gate-0.4.0/tests/test_pipeline.py +74 -0
- latent_gate-0.4.0/tests/test_remote_decoder.py +48 -0
- latent_gate-0.4.0/tests/test_selective_decoder.py +128 -0
- latent_gate-0.4.0/tests/test_text_processor.py +159 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kathan Modh
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: latent-gate
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Local-first vision-language pipeline inspired by VL-JEPA. Compress images, text, conversations, and RAG documents locally via Ollama before sending to any LLM API. Includes MCP server for Claude Code, Cursor, Cline, Continue, and Zed. ~80% token savings.
|
|
5
|
+
Author: Kathan Modh
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/KathanModh259/latent-gate
|
|
8
|
+
Project-URL: Repository, https://github.com/KathanModh259/latent-gate
|
|
9
|
+
Project-URL: Issues, https://github.com/KathanModh259/latent-gate/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/KathanModh259/latent-gate/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: vlm,vision-language,ollama,token-optimization,vl-jepa,local-llm,cost-reduction,selective-decoding,mcp,model-context-protocol,claude-code,cursor,cline,prompt-compression,rag,ai-tools
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: requests>=2.31.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
29
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
31
|
+
Requires-Dist: mypy>=1.5; extra == "dev"
|
|
32
|
+
Provides-Extra: mcp
|
|
33
|
+
Requires-Dist: mcp>=0.9.0; extra == "mcp"
|
|
34
|
+
Provides-Extra: video
|
|
35
|
+
Requires-Dist: opencv-python>=4.9.0; extra == "video"
|
|
36
|
+
Provides-Extra: embeddings
|
|
37
|
+
Requires-Dist: torch>=2.0.0; extra == "embeddings"
|
|
38
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "embeddings"
|
|
39
|
+
Provides-Extra: all
|
|
40
|
+
Requires-Dist: mcp>=0.9.0; extra == "all"
|
|
41
|
+
Requires-Dist: opencv-python>=4.9.0; extra == "all"
|
|
42
|
+
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
43
|
+
Requires-Dist: sentence-transformers>=2.7.0; extra == "all"
|
|
44
|
+
Dynamic: license-file
|
|
45
|
+
|
|
46
|
+
<div align="center">
|
|
47
|
+
|
|
48
|
+
# ๐ฎ LatentGate
|
|
49
|
+
|
|
50
|
+
### *Process Locally. Send Smart. Pay Less.*
|
|
51
|
+
|
|
52
|
+
**A VL-JEPA-inspired pipeline that compresses images, text, conversations, and RAG documents locally via Ollama, then sends only compact semantic payloads to any LLM API โ cutting token costs by ~80%.**
|
|
53
|
+
|
|
54
|
+
[](https://www.python.org/downloads/)
|
|
55
|
+
[](LICENSE)
|
|
56
|
+
[](CHANGELOG.md)
|
|
57
|
+
[](CONTRIBUTING.md)
|
|
58
|
+
[](https://ollama.com)
|
|
59
|
+
[](https://modelcontextprotocol.io)
|
|
60
|
+
|
|
61
|
+
[**Quick Start**](#-quick-start) ยท [**AI Tool Integrations**](#-use-with-ai-coding-tools-mcp-integration) ยท [**Benchmarks**](#-cost-benchmarks) ยท [**Contributing**](#-contributing)
|
|
62
|
+
|
|
63
|
+
</div>
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## ๐๏ธ Architecture
|
|
68
|
+
|
|
69
|
+
<div align="center">
|
|
70
|
+
|
|
71
|
+

|
|
72
|
+
|
|
73
|
+
</div>
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## ๐ก The Problem
|
|
78
|
+
|
|
79
|
+
Every time you send an image or long prompt to GPT-4o / Claude / Gemini, you are burning 1,000+ tokens on processing that could happen locally for free.
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
Traditional: Image โ Cloud LLM (1,200 tokens) โ Answer
|
|
83
|
+
LatentGate: Image โ Local Ollama (FREE) โ Cloud LLM (200 tokens) โ Answer
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## โจ Features
|
|
89
|
+
|
|
90
|
+
- ๐ **Local-First** โ Vision and text compression runs on Ollama (free)
|
|
91
|
+
- ๐ฐ **~80% Token Savings** โ Send ~200 tokens instead of ~1,200
|
|
92
|
+
- ๐ **MCP Server** โ Works with Claude Desktop, Cursor, Cline, Continue, Zed
|
|
93
|
+
- ๐ฏ **Selective Decoding** โ For video, only call API when scene changes (~2.85x fewer calls)
|
|
94
|
+
- ๐ **Text Compression** โ Long prompts, conversations, RAG docs compressed locally
|
|
95
|
+
- โก **Speed Optimized** โ Connection pooling, model preloading, parallel processing
|
|
96
|
+
- ๐ **Multi-Provider** โ OpenAI, Anthropic, Google, Groq, or any OpenAI-compatible endpoint
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## ๐ Quick Start
|
|
101
|
+
|
|
102
|
+
### Install
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# Core install
|
|
106
|
+
pip install latent-gate
|
|
107
|
+
|
|
108
|
+
# With MCP server (for Claude Desktop, Cursor, Cline, etc.)
|
|
109
|
+
pip install latent-gate[mcp]
|
|
110
|
+
|
|
111
|
+
# Pull required Ollama models
|
|
112
|
+
ollama pull llava:7b
|
|
113
|
+
ollama pull llama3:8b
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Run
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Image query
|
|
120
|
+
python -m latent_gate photo.jpg "What is in this image?" --provider ollama -v
|
|
121
|
+
|
|
122
|
+
# Text compression
|
|
123
|
+
python -m latent_gate --text "Your long prompt here..." --provider ollama -v
|
|
124
|
+
|
|
125
|
+
# Image + Text combined
|
|
126
|
+
python -m latent_gate photo.jpg "Analyze" --text "Extra context..." -v
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Python API
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from latent_gate import LatentGatePipeline, PipelineConfig
|
|
133
|
+
|
|
134
|
+
config = PipelineConfig(
|
|
135
|
+
vision_model="llava:7b",
|
|
136
|
+
remote_provider="openai",
|
|
137
|
+
remote_model="gpt-4o-mini",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
with LatentGatePipeline(config) as pipeline:
|
|
141
|
+
result = pipeline.query("photo.jpg", "Describe this")
|
|
142
|
+
result = pipeline.query_text("Your 500-word prompt...")
|
|
143
|
+
result = pipeline.query_conversation(messages, "Follow-up question")
|
|
144
|
+
result = pipeline.query_documents(["doc1...", "doc2..."], "Question?")
|
|
145
|
+
result = pipeline.query_universal(text="...", image="photo.jpg")
|
|
146
|
+
|
|
147
|
+
print(result["timing"])
|
|
148
|
+
print(result["tokens_estimated"])
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## ๐ Use With AI Coding Tools (MCP Integration)
|
|
154
|
+
|
|
155
|
+
LatentGate works as a Model Context Protocol (MCP) server with every major AI coding tool. Once configured, your AI assistant automatically compresses images, long prompts, and documents โ saving you ~80% on tokens without changing your workflow.
|
|
156
|
+
|
|
157
|
+
### Supported Tools
|
|
158
|
+
|
|
159
|
+
| Tool | Status |
|
|
160
|
+
| ----------------- | ----------- |
|
|
161
|
+
| Claude Desktop | Supported |
|
|
162
|
+
| Claude Code (CLI) | Supported |
|
|
163
|
+
| Cursor | Supported |
|
|
164
|
+
| Cline (VS Code) | Supported |
|
|
165
|
+
| Continue.dev | Supported |
|
|
166
|
+
| Zed Editor | Supported |
|
|
167
|
+
|
|
168
|
+
### Quick Setup
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
pip install latent-gate[mcp]
|
|
172
|
+
ollama pull llava:7b
|
|
173
|
+
ollama pull llama3:8b
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Then add to your AI tool MCP config:
|
|
177
|
+
|
|
178
|
+
```json
|
|
179
|
+
{
|
|
180
|
+
"mcpServers": {
|
|
181
|
+
"latent-gate": {
|
|
182
|
+
"command": "python",
|
|
183
|
+
"args": ["-m", "latent_gate.mcp_server"]
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Detailed setup guides for each tool: see the `integrations/` folder.
|
|
190
|
+
|
|
191
|
+
### What Gets Compressed Automatically
|
|
192
|
+
|
|
193
|
+
| Tool Call | When AI Uses It |
|
|
194
|
+
| ----------------------- | ------------------------------ |
|
|
195
|
+
| `compress_image` | Before analyzing any image |
|
|
196
|
+
| `compress_text` | For prompts longer than ~500 tokens |
|
|
197
|
+
| `compress_conversation` | When chat history is large |
|
|
198
|
+
| `compress_documents` | For RAG queries |
|
|
199
|
+
| `get_stats` | To check session savings |
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## โก Speed Optimizations
|
|
204
|
+
|
|
205
|
+
| Optimization | What It Does | Impact |
|
|
206
|
+
| --------------------- | ----------------------------------------------------------- | ------------------------------- |
|
|
207
|
+
| Connection Pooling | Reuses HTTP connections via `requests.Session` | ~30-50% faster per call |
|
|
208
|
+
| Model Preloading | Warms up Ollama models on init (`keep_alive`) | Eliminates 5-15s cold start |
|
|
209
|
+
| Shorter Prompts | Optimized extraction prompts produce fewer output tokens | ~20% faster generation |
|
|
210
|
+
| 3-Tier JSON Parsing | Fast parse, extract from text, LLM fallback | Avoids slow LLM call 90% of time |
|
|
211
|
+
| Parallel Processing | Image and text processed simultaneously via ThreadPool | ~40% faster combined queries |
|
|
212
|
+
| Caching | Content-hash disk cache for repeated images | Instant on cache hit |
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## ๐ Cost Benchmarks
|
|
217
|
+
|
|
218
|
+
### Image Queries (by provider)
|
|
219
|
+
|
|
220
|
+
| Provider | Raw Image Tokens | LatentGate Tokens | Savings |
|
|
221
|
+
| ------------------------------ | ---------------: | ----------------: | ------- |
|
|
222
|
+
| OpenAI GPT-4o (high detail) | ~1,105 | ~150 | ~86% |
|
|
223
|
+
| Claude 3.5 Sonnet (1MP image) | ~1,334 | ~150 | ~89% |
|
|
224
|
+
| Gemini 3 Pro | ~560 | ~150 | ~73% |
|
|
225
|
+
| Gemini 2.0 Flash | ~258 | ~150 | ~42% |
|
|
226
|
+
|
|
227
|
+
### Text and Other Modes (all providers benefit equally)
|
|
228
|
+
|
|
229
|
+
| Scenario | Traditional | LatentGate | Savings |
|
|
230
|
+
| ------------------------- | ----------: | -------------------: | ------- |
|
|
231
|
+
| Long text prompt | ~800 | ~120 | ~85% |
|
|
232
|
+
| Conversation (10 turns) | ~2,500 | ~350 | ~86% |
|
|
233
|
+
| RAG documents (3 docs) | ~3,000 | ~450 | ~85% |
|
|
234
|
+
| Video stream (1 min)* | varies | ~2.85x fewer calls | ~65% |
|
|
235
|
+
|
|
236
|
+
*With selective decoding
|
|
237
|
+
|
|
238
|
+
### At Scale (10,000 image queries with gpt-4o-mini)
|
|
239
|
+
|
|
240
|
+
| | Traditional | LatentGate | Savings |
|
|
241
|
+
| -------------- | ----------- | ---------- | -------------- |
|
|
242
|
+
| Input tokens | 12,000,000 | 2,000,000 | 10M tokens |
|
|
243
|
+
| Cost | $1.80 | $0.30 | $1.50 (83%) |
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## ๐ Project Structure
|
|
248
|
+
|
|
249
|
+
```
|
|
250
|
+
latent-gate/
|
|
251
|
+
โโโ latent_gate/
|
|
252
|
+
โ โโโ __init__.py
|
|
253
|
+
โ โโโ config.py
|
|
254
|
+
โ โโโ payload.py
|
|
255
|
+
โ โโโ text_processor.py
|
|
256
|
+
โ โโโ local_processor.py
|
|
257
|
+
โ โโโ remote_decoder.py
|
|
258
|
+
โ โโโ selective_decoder.py
|
|
259
|
+
โ โโโ fast_client.py
|
|
260
|
+
โ โโโ cache.py
|
|
261
|
+
โ โโโ pipeline.py
|
|
262
|
+
โ โโโ cli.py
|
|
263
|
+
โ โโโ mcp_server.py
|
|
264
|
+
โโโ integrations/
|
|
265
|
+
โ โโโ README.md
|
|
266
|
+
โ โโโ mcp_server/
|
|
267
|
+
โ โโโ claude_code_skill/
|
|
268
|
+
โ โโโ cursor/
|
|
269
|
+
โ โโโ continue_dev/
|
|
270
|
+
โ โโโ openai_functions/
|
|
271
|
+
โโโ examples/
|
|
272
|
+
โโโ tests/
|
|
273
|
+
โโโ docs/
|
|
274
|
+
โ โโโ architecture.png
|
|
275
|
+
โ โโโ how_it_works.md
|
|
276
|
+
โโโ CHANGELOG.md
|
|
277
|
+
โโโ LICENSE
|
|
278
|
+
โโโ README.md
|
|
279
|
+
โโโ pyproject.toml
|
|
280
|
+
โโโ requirements.txt
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## ๐ค Contributing
|
|
286
|
+
|
|
287
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
288
|
+
|
|
289
|
+
### Priority Areas
|
|
290
|
+
|
|
291
|
+
- True embedding similarity (replace Jaccard with cosine via sentence-transformers)
|
|
292
|
+
- FastAPI server wrapper
|
|
293
|
+
- Direct video file input (auto frame extraction)
|
|
294
|
+
- Cost tracking dashboard
|
|
295
|
+
- More vision model support (Florence-2, InternVL)
|
|
296
|
+
- PyPI publish
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## ๐ Citation
|
|
301
|
+
|
|
302
|
+
```bibtex
|
|
303
|
+
@software{latentgate2026,
|
|
304
|
+
author = {Kathan Modh},
|
|
305
|
+
title = {LatentGate: Local-First Vision-Language Pipeline Inspired by VL-JEPA},
|
|
306
|
+
year = {2026},
|
|
307
|
+
version = {0.4.0},
|
|
308
|
+
url = {https://github.com/KathanModh259/latent-gate}
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
Inspired by [VL-JEPA](https://arxiv.org/abs/2512.10942) (Meta FAIR, 2025).
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## ๐ License
|
|
317
|
+
|
|
318
|
+
MIT License โ see [LICENSE](LICENSE).
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
<div align="center">
|
|
323
|
+
|
|
324
|
+
**Built with ๐ง by [Kathan Modh](https://github.com/KathanModh259)**
|
|
325
|
+
|
|
326
|
+
*Process locally. Send smart. Pay less.*
|
|
327
|
+
|
|
328
|
+
Star this repo if it saved you tokens (and money)!
|
|
329
|
+
|
|
330
|
+
</div>
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# ๐ฎ LatentGate
|
|
4
|
+
|
|
5
|
+
### *Process Locally. Send Smart. Pay Less.*
|
|
6
|
+
|
|
7
|
+
**A VL-JEPA-inspired pipeline that compresses images, text, conversations, and RAG documents locally via Ollama, then sends only compact semantic payloads to any LLM API โ cutting token costs by ~80%.**
|
|
8
|
+
|
|
9
|
+
[](https://www.python.org/downloads/)
|
|
10
|
+
[](LICENSE)
|
|
11
|
+
[](CHANGELOG.md)
|
|
12
|
+
[](CONTRIBUTING.md)
|
|
13
|
+
[](https://ollama.com)
|
|
14
|
+
[](https://modelcontextprotocol.io)
|
|
15
|
+
|
|
16
|
+
[**Quick Start**](#-quick-start) ยท [**AI Tool Integrations**](#-use-with-ai-coding-tools-mcp-integration) ยท [**Benchmarks**](#-cost-benchmarks) ยท [**Contributing**](#-contributing)
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## ๐๏ธ Architecture
|
|
23
|
+
|
|
24
|
+
<div align="center">
|
|
25
|
+
|
|
26
|
+

|
|
27
|
+
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## ๐ก The Problem
|
|
33
|
+
|
|
34
|
+
Every time you send an image or long prompt to GPT-4o / Claude / Gemini, you are burning 1,000+ tokens on processing that could happen locally for free.
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
Traditional: Image โ Cloud LLM (1,200 tokens) โ Answer
|
|
38
|
+
LatentGate: Image โ Local Ollama (FREE) โ Cloud LLM (200 tokens) โ Answer
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## โจ Features
|
|
44
|
+
|
|
45
|
+
- ๐ **Local-First** โ Vision and text compression runs on Ollama (free)
|
|
46
|
+
- ๐ฐ **~80% Token Savings** โ Send ~200 tokens instead of ~1,200
|
|
47
|
+
- ๐ **MCP Server** โ Works with Claude Desktop, Cursor, Cline, Continue, Zed
|
|
48
|
+
- ๐ฏ **Selective Decoding** โ For video, only call API when scene changes (~2.85x fewer calls)
|
|
49
|
+
- ๐ **Text Compression** โ Long prompts, conversations, RAG docs compressed locally
|
|
50
|
+
- โก **Speed Optimized** โ Connection pooling, model preloading, parallel processing
|
|
51
|
+
- ๐ **Multi-Provider** โ OpenAI, Anthropic, Google, Groq, or any OpenAI-compatible endpoint
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## ๐ Quick Start
|
|
56
|
+
|
|
57
|
+
### Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Core install
|
|
61
|
+
pip install latent-gate
|
|
62
|
+
|
|
63
|
+
# With MCP server (for Claude Desktop, Cursor, Cline, etc.)
|
|
64
|
+
pip install latent-gate[mcp]
|
|
65
|
+
|
|
66
|
+
# Pull required Ollama models
|
|
67
|
+
ollama pull llava:7b
|
|
68
|
+
ollama pull llama3:8b
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Run
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Image query
|
|
75
|
+
python -m latent_gate photo.jpg "What is in this image?" --provider ollama -v
|
|
76
|
+
|
|
77
|
+
# Text compression
|
|
78
|
+
python -m latent_gate --text "Your long prompt here..." --provider ollama -v
|
|
79
|
+
|
|
80
|
+
# Image + Text combined
|
|
81
|
+
python -m latent_gate photo.jpg "Analyze" --text "Extra context..." -v
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Python API
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from latent_gate import LatentGatePipeline, PipelineConfig
|
|
88
|
+
|
|
89
|
+
config = PipelineConfig(
|
|
90
|
+
vision_model="llava:7b",
|
|
91
|
+
remote_provider="openai",
|
|
92
|
+
remote_model="gpt-4o-mini",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
with LatentGatePipeline(config) as pipeline:
|
|
96
|
+
result = pipeline.query("photo.jpg", "Describe this")
|
|
97
|
+
result = pipeline.query_text("Your 500-word prompt...")
|
|
98
|
+
result = pipeline.query_conversation(messages, "Follow-up question")
|
|
99
|
+
result = pipeline.query_documents(["doc1...", "doc2..."], "Question?")
|
|
100
|
+
result = pipeline.query_universal(text="...", image="photo.jpg")
|
|
101
|
+
|
|
102
|
+
print(result["timing"])
|
|
103
|
+
print(result["tokens_estimated"])
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## ๐ Use With AI Coding Tools (MCP Integration)
|
|
109
|
+
|
|
110
|
+
LatentGate works as a Model Context Protocol (MCP) server with every major AI coding tool. Once configured, your AI assistant automatically compresses images, long prompts, and documents โ saving you ~80% on tokens without changing your workflow.
|
|
111
|
+
|
|
112
|
+
### Supported Tools
|
|
113
|
+
|
|
114
|
+
| Tool | Status |
|
|
115
|
+
| ----------------- | ----------- |
|
|
116
|
+
| Claude Desktop | Supported |
|
|
117
|
+
| Claude Code (CLI) | Supported |
|
|
118
|
+
| Cursor | Supported |
|
|
119
|
+
| Cline (VS Code) | Supported |
|
|
120
|
+
| Continue.dev | Supported |
|
|
121
|
+
| Zed Editor | Supported |
|
|
122
|
+
|
|
123
|
+
### Quick Setup
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
pip install latent-gate[mcp]
|
|
127
|
+
ollama pull llava:7b
|
|
128
|
+
ollama pull llama3:8b
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Then add to your AI tool MCP config:
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{
|
|
135
|
+
"mcpServers": {
|
|
136
|
+
"latent-gate": {
|
|
137
|
+
"command": "python",
|
|
138
|
+
"args": ["-m", "latent_gate.mcp_server"]
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Detailed setup guides for each tool: see the `integrations/` folder.
|
|
145
|
+
|
|
146
|
+
### What Gets Compressed Automatically
|
|
147
|
+
|
|
148
|
+
| Tool Call | When AI Uses It |
|
|
149
|
+
| ----------------------- | ------------------------------ |
|
|
150
|
+
| `compress_image` | Before analyzing any image |
|
|
151
|
+
| `compress_text` | For prompts longer than ~500 tokens |
|
|
152
|
+
| `compress_conversation` | When chat history is large |
|
|
153
|
+
| `compress_documents` | For RAG queries |
|
|
154
|
+
| `get_stats` | To check session savings |
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## โก Speed Optimizations
|
|
159
|
+
|
|
160
|
+
| Optimization | What It Does | Impact |
|
|
161
|
+
| --------------------- | ----------------------------------------------------------- | ------------------------------- |
|
|
162
|
+
| Connection Pooling | Reuses HTTP connections via `requests.Session` | ~30-50% faster per call |
|
|
163
|
+
| Model Preloading | Warms up Ollama models on init (`keep_alive`) | Eliminates 5-15s cold start |
|
|
164
|
+
| Shorter Prompts | Optimized extraction prompts produce fewer output tokens | ~20% faster generation |
|
|
165
|
+
| 3-Tier JSON Parsing | Fast parse, extract from text, LLM fallback | Avoids slow LLM call 90% of time |
|
|
166
|
+
| Parallel Processing | Image and text processed simultaneously via ThreadPool | ~40% faster combined queries |
|
|
167
|
+
| Caching | Content-hash disk cache for repeated images | Instant on cache hit |
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## ๐ Cost Benchmarks
|
|
172
|
+
|
|
173
|
+
### Image Queries (by provider)
|
|
174
|
+
|
|
175
|
+
| Provider | Raw Image Tokens | LatentGate Tokens | Savings |
|
|
176
|
+
| ------------------------------ | ---------------: | ----------------: | ------- |
|
|
177
|
+
| OpenAI GPT-4o (high detail) | ~1,105 | ~150 | ~86% |
|
|
178
|
+
| Claude 3.5 Sonnet (1MP image) | ~1,334 | ~150 | ~89% |
|
|
179
|
+
| Gemini 3 Pro | ~560 | ~150 | ~73% |
|
|
180
|
+
| Gemini 2.0 Flash | ~258 | ~150 | ~42% |
|
|
181
|
+
|
|
182
|
+
### Text and Other Modes (all providers benefit equally)
|
|
183
|
+
|
|
184
|
+
| Scenario | Traditional | LatentGate | Savings |
|
|
185
|
+
| ------------------------- | ----------: | -------------------: | ------- |
|
|
186
|
+
| Long text prompt | ~800 | ~120 | ~85% |
|
|
187
|
+
| Conversation (10 turns) | ~2,500 | ~350 | ~86% |
|
|
188
|
+
| RAG documents (3 docs) | ~3,000 | ~450 | ~85% |
|
|
189
|
+
| Video stream (1 min)* | varies | ~2.85x fewer calls | ~65% |
|
|
190
|
+
|
|
191
|
+
*With selective decoding
|
|
192
|
+
|
|
193
|
+
### At Scale (10,000 image queries with gpt-4o-mini)
|
|
194
|
+
|
|
195
|
+
| | Traditional | LatentGate | Savings |
|
|
196
|
+
| -------------- | ----------- | ---------- | -------------- |
|
|
197
|
+
| Input tokens | 12,000,000 | 2,000,000 | 10M tokens |
|
|
198
|
+
| Cost | $1.80 | $0.30 | $1.50 (83%) |
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## ๐ Project Structure
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
latent-gate/
|
|
206
|
+
โโโ latent_gate/
|
|
207
|
+
โ โโโ __init__.py
|
|
208
|
+
โ โโโ config.py
|
|
209
|
+
โ โโโ payload.py
|
|
210
|
+
โ โโโ text_processor.py
|
|
211
|
+
โ โโโ local_processor.py
|
|
212
|
+
โ โโโ remote_decoder.py
|
|
213
|
+
โ โโโ selective_decoder.py
|
|
214
|
+
โ โโโ fast_client.py
|
|
215
|
+
โ โโโ cache.py
|
|
216
|
+
โ โโโ pipeline.py
|
|
217
|
+
โ โโโ cli.py
|
|
218
|
+
โ โโโ mcp_server.py
|
|
219
|
+
โโโ integrations/
|
|
220
|
+
โ โโโ README.md
|
|
221
|
+
โ โโโ mcp_server/
|
|
222
|
+
โ โโโ claude_code_skill/
|
|
223
|
+
โ โโโ cursor/
|
|
224
|
+
โ โโโ continue_dev/
|
|
225
|
+
โ โโโ openai_functions/
|
|
226
|
+
โโโ examples/
|
|
227
|
+
โโโ tests/
|
|
228
|
+
โโโ docs/
|
|
229
|
+
โ โโโ architecture.png
|
|
230
|
+
โ โโโ how_it_works.md
|
|
231
|
+
โโโ CHANGELOG.md
|
|
232
|
+
โโโ LICENSE
|
|
233
|
+
โโโ README.md
|
|
234
|
+
โโโ pyproject.toml
|
|
235
|
+
โโโ requirements.txt
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## ๐ค Contributing
|
|
241
|
+
|
|
242
|
+
Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
243
|
+
|
|
244
|
+
### Priority Areas
|
|
245
|
+
|
|
246
|
+
- True embedding similarity (replace Jaccard with cosine via sentence-transformers)
|
|
247
|
+
- FastAPI server wrapper
|
|
248
|
+
- Direct video file input (auto frame extraction)
|
|
249
|
+
- Cost tracking dashboard
|
|
250
|
+
- More vision model support (Florence-2, InternVL)
|
|
251
|
+
- PyPI publish
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## ๐ Citation
|
|
256
|
+
|
|
257
|
+
```bibtex
|
|
258
|
+
@software{latentgate2026,
|
|
259
|
+
author = {Kathan Modh},
|
|
260
|
+
title = {LatentGate: Local-First Vision-Language Pipeline Inspired by VL-JEPA},
|
|
261
|
+
year = {2026},
|
|
262
|
+
version = {0.4.0},
|
|
263
|
+
url = {https://github.com/KathanModh259/latent-gate}
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Inspired by [VL-JEPA](https://arxiv.org/abs/2512.10942) (Meta FAIR, 2025).
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## ๐ License
|
|
272
|
+
|
|
273
|
+
MIT License โ see [LICENSE](LICENSE).
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
<div align="center">
|
|
278
|
+
|
|
279
|
+
**Built with ๐ง by [Kathan Modh](https://github.com/KathanModh259)**
|
|
280
|
+
|
|
281
|
+
*Process locally. Send smart. Pay less.*
|
|
282
|
+
|
|
283
|
+
Star this repo if it saved you tokens (and money)!
|
|
284
|
+
|
|
285
|
+
</div>
|