guaardvark 2.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. guaardvark-2.5.3/PKG-INFO +610 -0
  2. guaardvark-2.5.3/guaardvark.egg-info/PKG-INFO +610 -0
  3. guaardvark-2.5.3/guaardvark.egg-info/SOURCES.txt +51 -0
  4. guaardvark-2.5.3/guaardvark.egg-info/dependency_links.txt +1 -0
  5. guaardvark-2.5.3/guaardvark.egg-info/entry_points.txt +2 -0
  6. guaardvark-2.5.3/guaardvark.egg-info/requires.txt +14 -0
  7. guaardvark-2.5.3/guaardvark.egg-info/top_level.txt +2 -0
  8. guaardvark-2.5.3/llx/__init__.py +1 -0
  9. guaardvark-2.5.3/llx/client.py +155 -0
  10. guaardvark-2.5.3/llx/commands/__init__.py +1 -0
  11. guaardvark-2.5.3/llx/commands/agents.py +180 -0
  12. guaardvark-2.5.3/llx/commands/backup.py +195 -0
  13. guaardvark-2.5.3/llx/commands/chat.py +321 -0
  14. guaardvark-2.5.3/llx/commands/clients.py +118 -0
  15. guaardvark-2.5.3/llx/commands/dashboard.py +155 -0
  16. guaardvark-2.5.3/llx/commands/family.py +204 -0
  17. guaardvark-2.5.3/llx/commands/files.py +176 -0
  18. guaardvark-2.5.3/llx/commands/generate.py +84 -0
  19. guaardvark-2.5.3/llx/commands/images.py +158 -0
  20. guaardvark-2.5.3/llx/commands/index.py +119 -0
  21. guaardvark-2.5.3/llx/commands/jobs.py +146 -0
  22. guaardvark-2.5.3/llx/commands/launch.py +145 -0
  23. guaardvark-2.5.3/llx/commands/logs.py +219 -0
  24. guaardvark-2.5.3/llx/commands/projects.py +121 -0
  25. guaardvark-2.5.3/llx/commands/rag.py +202 -0
  26. guaardvark-2.5.3/llx/commands/rules.py +144 -0
  27. guaardvark-2.5.3/llx/commands/search.py +44 -0
  28. guaardvark-2.5.3/llx/commands/settings.py +93 -0
  29. guaardvark-2.5.3/llx/commands/system.py +277 -0
  30. guaardvark-2.5.3/llx/commands/tasks.py +193 -0
  31. guaardvark-2.5.3/llx/commands/videos.py +379 -0
  32. guaardvark-2.5.3/llx/commands/websites.py +150 -0
  33. guaardvark-2.5.3/llx/completer.py +195 -0
  34. guaardvark-2.5.3/llx/config.py +210 -0
  35. guaardvark-2.5.3/llx/context.py +276 -0
  36. guaardvark-2.5.3/llx/global_opts.py +48 -0
  37. guaardvark-2.5.3/llx/launch_config.py +100 -0
  38. guaardvark-2.5.3/llx/lite_server.py +137 -0
  39. guaardvark-2.5.3/llx/main.py +102 -0
  40. guaardvark-2.5.3/llx/onboarding.py +160 -0
  41. guaardvark-2.5.3/llx/output.py +106 -0
  42. guaardvark-2.5.3/llx/repl.py +352 -0
  43. guaardvark-2.5.3/llx/slash.py +737 -0
  44. guaardvark-2.5.3/llx/streaming.py +517 -0
  45. guaardvark-2.5.3/llx/theme.py +517 -0
  46. guaardvark-2.5.3/llx/utils.py +34 -0
  47. guaardvark-2.5.3/setup.cfg +4 -0
  48. guaardvark-2.5.3/setup.py +67 -0
  49. guaardvark-2.5.3/tests/__init__.py +0 -0
  50. guaardvark-2.5.3/tests/test_launch_config.py +86 -0
  51. guaardvark-2.5.3/tests/test_lite_server.py +105 -0
  52. guaardvark-2.5.3/tests/test_onboarding.py +86 -0
  53. guaardvark-2.5.3/tests/test_slash_multimodal.py +85 -0
@@ -0,0 +1,610 @@
1
+ Metadata-Version: 2.4
2
+ Name: guaardvark
3
+ Version: 2.5.3
4
+ Summary: Guaardvark CLI — full-stack AI platform with RAG, image/video generation, and agents
5
+ Home-page: https://guaardvark.com
6
+ Author: Guaardvark
7
+ Project-URL: Source, https://github.com/guaardvark/guaardvark
8
+ Project-URL: Homepage, https://guaardvark.com
9
+ Project-URL: Issues, https://github.com/guaardvark/guaardvark/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.12
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: typer[all]>=0.9.0
18
+ Requires-Dist: rich>=13.0.0
19
+ Requires-Dist: python-socketio>=5.10.0
20
+ Requires-Dist: httpx>=0.25.0
21
+ Requires-Dist: websocket-client>=1.6.0
22
+ Requires-Dist: requests>=2.31.0
23
+ Requires-Dist: prompt_toolkit>=3.0.0
24
+ Requires-Dist: tenacity>=8.0.0
25
+ Requires-Dist: flask>=3.0.0
26
+ Provides-Extra: rag
27
+ Requires-Dist: llama-index-core<0.15.0,>=0.13.0; extra == "rag"
28
+ Requires-Dist: llama-index-llms-ollama>=0.7.0; extra == "rag"
29
+ Requires-Dist: llama-index-embeddings-ollama>=0.8.0; extra == "rag"
30
+ Dynamic: author
31
+ Dynamic: classifier
32
+ Dynamic: description
33
+ Dynamic: description-content-type
34
+ Dynamic: home-page
35
+ Dynamic: project-url
36
+ Dynamic: provides-extra
37
+ Dynamic: requires-dist
38
+ Dynamic: requires-python
39
+ Dynamic: summary
40
+
41
+ # Guaardvark
42
+
43
+ **Version 2.5.3** · [guaardvark.com](https://guaardvark.com)
44
+
45
+ The self-hosted AI workstation. Autonomous agents that see your screen and control your apps. A three-tier neural routing engine. Parallel agent swarms across isolated git worktrees. Video generation, image upscaling to 4K/8K, RAG over your documents, voice interface, and a 60+ tool execution engine — all running locally on your hardware. Your machine. Your data. Your rules.
46
+
47
+ ## What's included
48
+
49
+ A full creative-professional AI workstation, all running locally:
50
+
51
+ **Generation**
52
+ - **Video (Text-to-Video, Image-to-Video)** — Wan 2.2, CogVideoX 2B/5B, SVD-XT. No workflow graph required: paste a list of prompts, pick a model and resolution, hit go. The queue handles the rest while you start the next batch.
53
+ - **Audio Studio** — music generation (ACE-Step, full songs with vocals or instrumental), sound-effect lab (Stable Audio Open), neural voice (Chatterbox + Kokoro), and 6 Piper voice profiles out of the box.
54
+ - **Voice Cloning** — gated behind an explicit consent prompt before any clone is created or used.
55
+ - **Image generation** — Stable Diffusion via Diffusers with batch queue, face restoration, anatomy and detail controls.
56
+ - **Image + Video Upscaling** — 4K and 8K via HAT-L, RealESRGAN family, NMKD-Superscale, Foolhardy Remacri. Two-pass mode for maximum quality. Frame-by-frame video processing.
57
+ - **Batch CSV Generator** — generate unique web pages, post content, or structured data from a CSV using your indexed knowledge base as ground truth. Marketing copy, product pages, unique-content campaigns at scale.
58
+ - **File Generation** — code, text, docs, images, video, audio in one queue.
59
+
60
+ **Editing**
61
+ - **Video Editor** — Shotcut-lite timeline with three lanes (video / text / audio), drag-and-drop from the media library, real text overlay rendering via ffmpeg, visual trim sliders, keyboard shortcuts, one-step undo.
62
+ - **Video Text Overlay** — standalone tool for the simpler one-off case.
63
+
64
+ **Agents & Automation**
65
+ - **Autonomous screen agents** — agents see a real virtual desktop (Xvfb :99), move the mouse, click, type, navigate browsers, and verify their own actions.
66
+ - **AgentBrain** — three-tier neural routing: Reflex (<100ms), Instinct (1–3s), Deliberation (5–30s).
67
+ - **Agent Training System** — visual hand-eye-coordination teaching: bracket a session with Begin/End Lesson, walk the agent through a flow with thumbs-up pearls, the system distills a structured replayable lesson with parameterized steps.
68
+ - **Agent Memory + Learning** — system-message persistent knowledge that survives reboots, recipe induction from successful tasks (Agent Workflow Memory pattern), vision-actionable knowledge with no cached pixel coordinates.
69
+ - **Agent Swarms** — up to 20 parallel coding agents, each in an isolated git worktree on its own branch. Dependency-ordered merging. Flight Mode (fully offline). Backends: Claude Code, Cline/OpenClaw via local Ollama.
70
+ - **Agents · Agent Tools · Virtual Agent Screen** — explorable surfaces for each capability, with a draggable VNC viewer that works on any page.
71
+ - **Voice Chat** — Whisper.cpp transcribes, the agent thinks, Piper speaks. Toggle with `/voice`.
72
+ - **Outreach System** — supervised AI for social-media engagement (Reddit, Discord, Twitter/X, Facebook) grounded in your indexed knowledge. Full detail below.
73
+ - **Self-Improvement** — detects test failures, dispatches an agent to read the offending code and fix it, verifies, broadcasts to other instances. Optional Anthropic-API guardian review.
74
+ - **Auto Researcher** — autonomous RAG-pipeline optimizer that experiments with parameters, keeps wins, reverts losses.
75
+
76
+ **Workflow Surfaces**
77
+ - **File Manager** — drag from your real desktop into the in-app File Manager. Color-code files, copy & paste, drag-and-drop reorganize. Folder / List / Media views. Right-click menus (copy, paste, delete, recursive index). Files attach to clients, projects, websites, notes, or code repos.
78
+ - **Notes Manager** · **Media Manager** · **Project Management** · **Client Management** · **Websites Management** — consistent grid+detail UI for the working surfaces a small business actually uses. Cross-linked: documents attach to projects attach to clients attach to websites.
79
+ - **Dashboard** — live status grid: model health, GPU usage, RAG state, agent activity, plugin states.
80
+ - **Code Editor** — Monaco-based IDE with right-click "explain", "fix", "generate" via the AI assistant.
81
+ - **Code Analyzer · Code Repos** — repo-level understanding and per-repo indexing.
82
+ - **Task Scheduler** — cron-style scheduling for any agent task or generation job.
83
+ - **Rules & Prompts** — import/export rules and prompts as a portable bundle.
84
+
85
+ **Integration**
86
+ - **ComfyUI Backend** — managed as a plugin, used as the execution layer for advanced video pipelines.
87
+ - **WordPress Connectivity** — push generated content directly into a WordPress site via a companion plugin. Functional today; ships with security disclaimers and a finishing-pass on the roadmap before the plugin moves out of beta.
88
+
89
+ **Platform**
90
+ - **Plugin System** — every heavy capability (ComfyUI, Vision Pipeline, Audio Foundry, Upscaling, Discord, Swarm) is a managed plugin with health monitoring, port-based orphan cleanup, and a **System Resource Orchestrator** that arbitrates VRAM between them so two big models don't fight for the GPU.
91
+ - **CPU Offload** for models that don't fit in VRAM.
92
+ - **GPU + CPU Resource Monitor** — live, always visible.
93
+ - **Interconnector / Cluster** — install Guaardvark on multiple local machines, master/client architecture with approval workflows, automatic load balancing across the fleet, hardware profile auto-detection.
94
+ - **Model Management** — download voice/video/image models from HuggingFace with progress tracking. Quick-switch between local Ollama models. Quick-switch embedding models grouped by parameter count.
95
+ - **Backup & Restore** — granular or full system backup, schema-migration-aware restore, cross-version compatible.
96
+ - **Advanced Settings** — debugging toggles, RAG knobs, cache controls, diagnostic tools, test runners, self-improvement controls — exposed in the UI, not hidden behind a "config files only" wall.
97
+
98
+ <p align="center">
99
+ <img src="https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/guaardvark-demo.gif" alt="Guaardvark Demo" width="100%">
100
+ </p>
101
+
102
+ <p align="center">
103
+ <img src="https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/swarm-demo.gif" alt="Agent Swarm — parallel Claude Code agents across isolated git worktrees" width="100%">
104
+ <br>
105
+ <em>Agent Swarm — parse a plan, spawn parallel agents in isolated git worktrees, resolve the dependency DAG, merge back to main.</em>
106
+ </p>
107
+
108
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
109
+ [![CI](https://github.com/guaardvark/guaardvark/actions/workflows/ci.yml/badge.svg)](https://github.com/guaardvark/guaardvark/actions/workflows/ci.yml)
110
+ [![PyPI](https://img.shields.io/pypi/v/guaardvark?label=PyPI&color=blue)](https://pypi.org/project/guaardvark/)
111
+ [![GitHub stars](https://img.shields.io/github/stars/guaardvark/guaardvark?style=social)](https://github.com/guaardvark/guaardvark/stargazers)
112
+ [![GitHub issues](https://img.shields.io/github/issues/guaardvark/guaardvark)](https://github.com/guaardvark/guaardvark/issues)
113
+ [![Sponsor](https://img.shields.io/badge/Sponsor-Guaardvark-ff69b4?logo=github-sponsors)](https://github.com/sponsors/guaardvark)
114
+
115
+ ```bash
116
+ git clone https://github.com/guaardvark/guaardvark.git && cd guaardvark && ./start.sh
117
+ ```
118
+
119
+ One command. Installs everything. Starts all services. Done.
120
+
121
+ ### AI-Generated Film — Made Entirely with Guaardvark
122
+
123
+ Every frame generated on a single desktop GPU. No cloud. No stock footage. No API keys.
124
+
125
+ [![Gotham Rising — AI-Generated Short Film](https://img.youtube.com/vi/8MdtM3HurJo/maxresdefault.jpg)](https://www.youtube.com/watch?v=8MdtM3HurJo)
126
+
127
+ ---
128
+
129
+ ## What Makes This Different
130
+
131
+ ### AgentBrain — Three-Tier Neural Routing
132
+
133
+ Every message is routed through a three-tier decision engine that picks the fastest path to the right answer. Reflexes fire in under a millisecond. Instinct handles single-shot requests in one LLM call. Deliberation spins up a full ReACT reasoning loop when the problem demands it.
134
+
135
+ | Agent Control | Agent Tools |
136
+ |:-:|:-:|
137
+ | ![Agents](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/agents-page.png) | ![Tools](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/agent-tools-page.png) |
138
+
139
+ | Tier | Name | Latency | LLM Calls | When It Fires |
140
+ |------|------|---------|-----------|---------------|
141
+ | 1 | **Reflex** | <100ms | 0 | Greetings, farewells, media controls — pattern-matched, no inference |
142
+ | 2 | **Instinct** | 1–3s | 1 | Single-shot questions, web searches, image generation, vision tasks |
143
+ | 3 | **Deliberation** | 5–30s | 3–10 | Multi-step research, analysis chains, complex agent tasks |
144
+
145
+ - **Automatic escalation** — Tier 2 can signal complexity and hand off to Tier 3 mid-response
146
+ - **Agent-screen gating** — when the virtual screen isn't being viewed, vision models fall through to the normal ReACT loop with the full tool registry instead of always trying to drive the screen. Click and type tools only appear when a user actually has the agent screen open.
147
+ - **BrainState singleton** — pre-computes tool schemas, model capabilities, system prompts, and reflex tables at startup so routing adds zero overhead
148
+ - **Warm-up** — background thread loads the active model into VRAM before the first request arrives
149
+
150
+ ### Autonomous Screen Agents
151
+
152
+ Guaardvark agents control a real virtual desktop (Xvfb + openbox at 1280x720). They see the screen through vision models, move the mouse, click buttons, type text, navigate browsers, and verify their own actions.
153
+
154
+ - **Unified vision brain** — Gemma4 sees the screen and decides the next action in a single inference call. Qwen3-VL handles coordinate estimation. Both calibrated per-model with tracked scale factors.
155
+ - **Closed-loop servo targeting** — three-attempt adaptive strategy: ballistic move → single correction with crosshair overlay → full corrections with zoom-cropped analysis around the cursor
156
+ - **45+ deterministic recipes** — browser navigation, tabs, scroll, search, find, zoom, copy/paste — all execute instantly from a JSON recipe library, bypassing the vision loop entirely
157
+ - **Obstacle detection** — handles popups, permission dialogs, and notification bars with automatic thinking model escalation
158
+ - **Self-QA sweep** — agent navigates every page of its own UI and reports what's working and what's broken
159
+ - **Live agent monitor** — real-time SEE/THINK/ACT transcript of every decision the agent makes
160
+ - **Integrated screen viewer** — draggable, resizable VNC viewer on any page with popup window mode
161
+
162
+ #### Supported Vision Models
163
+
164
+ | Model | Role | Coordinate System | Notes |
165
+ |-------|------|-------------------|-------|
166
+ | Gemma4 (e4b) | Sees + decides | 1024x1024 normalized, box_2d `[y1,x1,y2,x2]` | Unified brain — vision and reasoning in one call |
167
+ | Qwen3-VL (2b) | Coordinate estimation | 1024px internal width | Default servo eyes, fast and accurate on dark UIs |
168
+ | Qwen3-VL (4b/8b) | Escalation eyes | 1024px internal width | Automatic escalation after 3 consecutive failures |
169
+ | Moondream | Fallback eyes | 1024px internal width | For text-only models that need external vision |
170
+
171
+ ### Swarm Orchestrator — Parallel Agent Execution
172
+
173
+ Launch multiple AI coding agents in parallel, each working in an isolated git worktree on its own branch. Results merge back with dependency-ordered conflict detection, optional test validation, and full cost tracking.
174
+
175
+ - **Two backends** — Claude Code (cloud, cost-tracked at $0.015/$0.075 per 1K tokens) and Cline/OpenClaw (fully local via Ollama, zero cost)
176
+ - **Flight Mode** — fully offline operation. Auto-detects network state, falls back to local models, serializes file conflicts automatically. No prompts, no internet required.
177
+ - **Git worktree isolation** — each task gets its own branch and working directory. All worktrees share the `.git` directory (lightweight). Automatically excluded from `git status`.
178
+ - **Dependency-aware merging** — topological sort ensures foundational changes land first. Dry-run conflict detection before real merge. Test suite validation before integration.
179
+ - **Built-in templates** — REST API scaffold, refactor-and-extract, test coverage expansion, Flight Mode demo
180
+ - **Up to 20 concurrent agents** — configurable limit with automatic slot management
181
+ - **Live dashboard** — real-time status, per-task logs, cost breakdown, elapsed time, disk usage
182
+
183
+ ### Video Generation Pipeline
184
+
185
+ State-of-the-art video generation running entirely on your GPU. No cloud APIs, no per-minute billing, no content restrictions.
186
+
187
+ | Video Generation | Plugin System |
188
+ |:-:|:-:|
189
+ | ![Video Gen](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/video-generation-page.png) | ![Plugins](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/plugins-page.png) |
190
+
191
+ | Model | Type | Max Duration | Native Resolution | VRAM |
192
+ |-------|------|-------------|-------------------|------|
193
+ | **Wan 2.2 (14B MoE)** | Text-to-Video | 5s (81 frames @ 16fps) | 832x480 | 11GB |
194
+ | **CogVideoX-5B** | Text-to-Video | 6s (49 frames @ 8fps) | 720x480 | 16GB |
195
+ | **CogVideoX-2B** | Text-to-Video | 6s (49 frames @ 8fps) | 720x480 | 12GB |
196
+ | **CogVideoX-5B I2V** | Image-to-Video | 6s (49 frames @ 8fps) | 720x480 | 16GB |
197
+ | **SVD XT** | Text-to-Video | 3.5s (25 frames @ 7fps) | 512x512 | <8GB |
198
+
199
+ - **Resolution options** — 512px, 576px, 720px, 1280px, 1920px (1080p), and custom dimensions (multiples of 8)
200
+ - **Quality tiers** — Fast (10 steps), Standard (30), High (40), Maximum (50)
201
+ - **Frame interpolation** — 1x raw, 2x doubled FPS, 2x + upscale for cinema-quality output
202
+ - **Prompt enhancement** — Cinematic, Realistic, Artistic, Anime, or raw
203
+ - **Low VRAM mode** — automatically reduces resolution, frames, and inference steps for 8–12GB GPUs
204
+ - **Batch processing** — queue multiple videos from a prompt list, processed by Celery workers
205
+ - **ComfyUI integration** — one-click launch to the node editor for custom workflows
206
+
207
+ ### Audio Studio — Music, FX, and Neural Voice
208
+
209
+ Three audio backends in one plugin with shared GPU-arbitration so they don't trample each other or fight Ollama for VRAM.
210
+
211
+ - **Music generation** — ACE-Step v1 (3.5B) for full songs with vocals or instrumental-only mode. Suno-style chip-prompt UX (Genre / Mood / Instrument) with optional LLM "Polish" pass that translates plain English into ACE-Step's tag vocabulary plus a paired negative prompt. ~10 GB VRAM at fp16.
212
+ - **FX Lab** — Stable Audio Open for sound effects and short ambient pieces. Light, fast, runs alongside other models.
213
+ - **Neural Voice** — Chatterbox as the primary TTS backend, Kokoro as a fast fallback, Piper for narration with 6 voice profiles included. Used for chat narration, voiceover for videos, and the voice-chat conversational mode.
214
+ - **Voice Cloning** — opt-in, gated behind an explicit consent prompt before any clone is created or used. Reference clips are kept under your control; the system never auto-clones from incidental audio.
215
+ - **Built-in audio player** — generated WAVs and MP3s open in an in-app player modal instead of triggering a browser download. Documents page surfaces audio rows with prompt, model, duration, and a waveform.
216
+ - **Suno export** — bulk-export a Suno library into the local DocumentsPage for use with the other generators.
217
+
218
+ ### Video Editor — Shotcut-lite Timeline
219
+
220
+ A built-in non-linear editor for stitching generated clips, layering text, and rendering finished videos — without leaving the app.
221
+
222
+ | Lane | Holds | Source |
223
+ |------|-------|--------|
224
+ | **Video** | one clip per timeline (multi-clip tracking on the roadmap) | Media Library — drag-and-drop |
225
+ | **Text** | unlimited overlays, draggable on the preview, properties-panel for size/color/rotation | Add-Text button + properties editor |
226
+ | **Audio** | one music or voice clip | Media Library — Audio tab |
227
+
228
+ - **Visual trim slider** — Material UI range slider bound to source duration, two thumbs for start/end, live monospace readout. No more typing seconds into number inputs.
229
+ - **Tabbed icon-grid library** — three tabs (Video / Audio / Images) with counts in the tab labels. 36px tiles, drag from tile to matching timeline track.
230
+ - **Real text overlay rendering** — backend uses `ffmpeg drawtext` (9 named positions, optional outline + translucent box, proper escaping for colons/quotes/commas). Original is preserved.
231
+ - **Keyboard shortcuts** — space to play/pause, arrow keys to scrub, `t` to add text, `del` to remove selected, `cmd+z` for one-step undo.
232
+ - **JobOperationGate** — render path checks the gate before grabbing the GPU, so a render won't trample an active video generation or upscaling job.
233
+ - **Standalone Video Text Overlay tool** — for the simple one-off case where you don't need a timeline.
234
+
235
+ ### GPU Image Upscaling — 4K and 8K Output
236
+
237
+ Upscale images and video frames to 4K (3840px) or 8K (7680px) resolution using GPU-accelerated super-resolution models.
238
+
239
+ | Model | Scale | Size | Best For |
240
+ |-------|-------|------|----------|
241
+ | HAT-L SRx4 | 4x | 159 MB | Maximum quality restoration |
242
+ | RealESRGAN x4plus | 4x | 64 MB | General-purpose, photorealistic |
243
+ | RealESRGAN x2plus | 2x | 64 MB | Mild upscaling |
244
+ | RealESRGAN x4plus (Anime) | 4x | 17 MB | Anime and stylized content |
245
+ | realesr-animevideov3 | 4x | 6 MB | Video-optimized anime |
246
+ | 4x-UltraSharp | 4x | 67 MB | Enhanced sharpness |
247
+ | 4x NMKD-Superscale | 4x | 67 MB | Advanced super-scaling |
248
+ | 4x Foolhardy Remacri | 4x | 67 MB | Texture-focused upscaling |
249
+
250
+ - **Two-pass mode** — run the model twice for maximum quality
251
+ - **Precision control** — FP16 (standard GPUs), BF16 (Ampere+), torch.compile for up to 3x speedup
252
+ - **Video upscaling** — frame-by-frame processing with progress tracking for MP4, MKV, AVI, MOV, WebM
253
+ - **Watch folder** — optional auto-processing of new files dropped into a directory
254
+
255
+ ### RAG That Actually Works
256
+
257
+ Chat grounded in your documents. Upload files, build a knowledge base, and ask questions. The AI reads and understands your content — not just keyword matching.
258
+
259
+ | Chat with Agent Screen | Agent YouTube Search |
260
+ |:-:|:-:|
261
+ | ![Chat](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/chat-agent-youtube-search.png) | ![Agent YouTube](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/chat-agent-youtube-search-wide.png) |
262
+
263
+ - **Hybrid retrieval** — BM25 keyword + vector semantic search combined
264
+ - **Smart chunking** — code files get AST-informed chunking, prose gets semantic splitting
265
+ - **Multiple embedding models** — switch between lightweight (300M) and high-quality (4B+) via UI
266
+ - **RAG Autoresearch** — autonomous optimization loop that experiments with parameters, keeps improvements, reverts regressions
267
+ - **Entity extraction** — automatic entity and relationship indexing
268
+ - **Per-project isolation** — each project has its own knowledge base and chat context
269
+
270
+ ### Self-Improving AI
271
+
272
+ The system runs its own test suite, identifies failures, dispatches an AI agent to read the code and fix the bugs, verifies the fix, and broadcasts the learning to other instances. No human in the loop.
273
+
274
+ - **Three modes** — Scheduled (every 6 hours), Reactive (triggered by repeated 500 errors), Directed (manual tasks)
275
+ - **Guardian review** — Uncle Claude (Anthropic API) reviews code changes for safety before applying, with risk levels and halt directives
276
+ - **Verification loop** — re-runs tests after every fix to confirm it worked
277
+ - **Pending fixes queue** — stage, review, approve, or reject proposed changes
278
+ - **Cross-machine learning** — fixes propagate to all connected instances via the Interconnector
279
+
280
+ ### Outreach System — Supervised AI for Social-Media Engagement
281
+
282
+ A supervised, auditable framework for drafting and posting authentic comments on Reddit, Discord, Twitter/X, and Facebook — using your own indexed knowledge as the source of truth for citations and context. The point isn't volume. It's keeping up with engagement on your own products and topics, with the agent handling the legwork.
283
+
284
+ **How it works**:
285
+
286
+ 1. **Discover** — the agent scouts target threads either by URL (you paste one into the New Draft modal) or by walking platform-specific entry points (subscribed subreddits, Discord channels, Twitter feeds, Facebook groups).
287
+ 2. **Context** — for each candidate post, the agent fetches the OP body and top comments. Reddit goes through the JSON API (fast, no scrape). Discord, Twitter, and Facebook go through the agent's logged-in Firefox session over CDP/BiDi, with a vision-model fallback when DOM selectors drift after a platform redesign.
288
+ 3. **Draft** — your local LLM composes a reply grounded in the thread context plus citations from your indexed documents (clients, projects, products, examples — whatever you've fed the knowledge base).
289
+ 4. **Grade** — every draft is scored against a relevance + quality rubric. Anything below threshold is dropped before it reaches the queue. Generic "great post!" replies don't survive grading.
290
+ 5. **Review** — drafts land in a queue. In supervised mode (the default), nothing posts without your approval. Edit, save, approve, reject — your call on each one.
291
+ 6. **Post** — approved drafts are posted via the platform's logged-in browser session, using a persona-shaped voice and a vision-driven send. Reddit posting is fully wired and verified end-to-end. Discord/Twitter/Facebook posting is in flight; drafting, queueing, and the supervised review surface already work for all four.
292
+
293
+ **Three layers of safety**:
294
+
295
+ - **Kill switch** at the system level. Flip it off and every outreach pipeline — drafting, queueing, posting — stops mid-flight. Nothing escapes.
296
+ - **Supervised mode** is the default. Drafts queue, never auto-post. You approve each one explicitly.
297
+ - **Cadence gates** — at most 1 post per 30 minutes per platform, configurable. Prevents bot-shaped behavior and respects platform anti-spam expectations.
298
+
299
+ **Audit log** — every action (scout, draft, grade, approve, reject, post, fail) is recorded in a JSONL audit trail with timestamps, draft IDs, and outcomes. Exportable for compliance or post-hoc review.
300
+
301
+ **Persona system** — a single configurable persona (voice, expertise areas, citation style, what to never say) shapes every draft for consistency. Your replies sound like you, not like an LLM.
302
+
303
+ **Manual draft mode** — paste a thread URL, the agent auto-scouts the context, the LLM seeds a draft, you edit and save. Full human control with the agent doing the legwork (scouting, context-fetching, citation suggestion).
304
+
305
+ **On-demand passes** — instead of waiting for the cron, fire a pass for a specific platform or subreddit on demand from the UI. Useful for active engagement around a launch or a thread you spotted.
306
+
307
+ **Why it's not spam** — outreach is anchored on your own knowledge base. Citations point at YOUR documentation, YOUR examples. The system grades drafts for genuine relevance and refuses to engage when it can't add value. The cadence gate keeps the volume human-paced. Supervised mode keeps the human in the loop. The result is closer to "an assistant that helps you keep up with engagement on your own products and topics" than "an outbound bot."
308
+
309
+ ---
310
+
311
+ ## Full Feature Set
312
+
313
+ ### AI & Chat
314
+ - **60+ registered tools** across 13 categories — web search, direct URL fetch, browser automation, code execution, file management, media control, desktop automation, MCP integration, knowledge base, image generation, agent control, memory management
315
+ - **`fetch_url` primitive** — single-purpose URL fetcher separate from `web_search`, so the model picks the right tool on the first try when you name a specific domain
316
+ - **9 specialized agents** — code assistant, content creator, research agent, browser automation, vision control, and more
317
+ - **ReACT agent loop** — iterative reasoning, action, observation with tool execution guard and circuit breaker
318
+ - **Streaming responses** via Socket.IO with conversational fast-path (~700ms)
319
+ - **Tool call transparency** — collapsible tool call cards showing parameters, results, timing, and success/error status inline in chat
320
+ - Runtime model switching — swap LLMs through the UI, GPU memory managed automatically
321
+ - Voice interface — Whisper.cpp STT + Piper TTS with narration and voiceover
322
+ - Session history with search, grouping, previews, and persistent tool call data
323
+ - **Persistent memory** — save facts, instructions, and context across sessions with automatic LLM injection
324
+ - **Uncle Claude escalation** — optional Anthropic API integration for problems that need a bigger model, with monthly token budgeting
325
+
326
+ ### Image Generation
327
+ - Stable Diffusion via Diffusers library — batch queue with auto-registration to the file system
328
+ - Face restoration, anatomy enhancement, and detail controls
329
+ - Image library with thumbnail grid, lightbox preview, keyboard navigation, batch operations
330
+ - **Bates-numbered output** — generated files auto-registered with timestamped sequential naming
331
+
332
+ ### Audio Studio
333
+ - ACE-Step v1 (3.5B) for full-song music generation with vocals or instrumental-only
334
+ - Stable Audio Open for FX and short ambient pieces
335
+ - Chatterbox + Kokoro neural TTS, plus 6 Piper voice profiles
336
+ - Voice cloning with explicit consent gating
337
+ - Suno-style chip-prompt UX with optional LLM "Polish" pass for ACE-Step's tag vocabulary
338
+ - In-app audio player modal — generated audio doesn't trigger downloads
339
+ - Suno bulk-export landing in the local DocumentsPage
340
+
341
+ ### Video Editor
342
+ - Three-lane timeline (video / text / audio) with drag-and-drop from the Media Library
343
+ - Real text overlay rendering via `ffmpeg drawtext` (9 positions, outline + box options)
344
+ - Visual trim slider, keyboard shortcuts, one-step undo
345
+ - Tabbed icon-grid library with counts in tab labels
346
+ - JobOperationGate hook so renders coordinate VRAM with other GPU-heavy jobs
347
+
348
+ ### Outreach System
349
+ - Reddit / Discord / Twitter-X / Facebook drafting + queueing
350
+ - Reddit posting fully wired; other platforms in flight
351
+ - Three-layer safety (kill switch + supervised mode + cadence gates)
352
+ - Persona system + audit log + on-demand passes
353
+ - Indexed-knowledge citations grounded in your documents
354
+
355
+ ### Voice + Voice Chat
356
+ - Whisper.cpp for speech-to-text, Piper for text-to-speech
357
+ - Hands-free conversation mode toggled by `/voice`
358
+ - Narration buttons on assistant responses for any message
359
+ - Continuous voice chat with VAD-driven turn-taking
360
+
361
+ ### Agent & Code Tools
362
+ - **Monaco code editor** — built-in IDE with AI-powered explain, fix, and generate via right-click context menu
363
+ - **Code Analyzer** — repo-level static analysis surfaced in the editor
364
+ - **Code Repos** — per-repo indexing and cross-repo search
365
+ - **Self-demo system** — automated feature tour with screen recording and TTS narration
366
+ - **Media viewer** — inline document and media previews with thumbnail strip navigation
367
+
368
+ ### File & Document Management
369
+ - Desktop-style UI — draggable folder icons, resizable windows, right-click context menus
370
+ - Drag from your real desktop into the in-app File Manager (preserves folder structure)
371
+ - Color-code files, copy/paste, drag-and-drop reorganize
372
+ - Folder / List / Media views; switch on the fly
373
+ - Right-click menus: copy, paste, delete, recursive-index
374
+ - Files attach to clients, projects, websites, notes, or code repos for organized retrieval
375
+ - **Notes Manager** · **Media Manager** — first-class surfaces alongside Documents
376
+
377
+ ### Project · Client · Website Management
378
+ - Grid+detail UI for each — consistent shape, easy to learn one and know all three
379
+ - Cross-linked: documents attach to projects, projects attach to clients, clients attach to websites
380
+ - Per-project knowledge base isolation for RAG
381
+ - Per-website settings carry through to outreach personas and WordPress integration
382
+
383
+ ### WordPress Connectivity
384
+ - Companion plugin pushes generated content (text, images, video, audio) directly into a WordPress site
385
+ - Functional today; ships with explicit security disclaimers
386
+ - Roadmap: finishing pass + security hardening before the plugin moves out of beta
387
+ - Treat as opt-in for now — read the disclaimer before deploying to a production site
388
+
389
+ ### Task Scheduler
390
+ - Cron-style scheduling for any agent task or generation job
391
+ - Manage from the Tasks page; live status mirrored to the Activity feed
392
+ - Backed by Celery beat with persistent job history that survives restarts
393
+
394
+ ### Rules & Prompts
395
+ - System prompts and behavior rules stored as portable bundles
396
+ - Import/export to share between machines or back up before risky tweaks
397
+ - COMMAND_RULE entries surface as custom slash commands in the chat input
398
+
399
+ ### Multi-Machine Sync (Interconnector)
400
+ - Connect multiple Guaardvark instances into a family that shares code, learnings, and model configs
401
+ - Master/client architecture with approval workflows and pre-sync backups
402
+ - Hardware profile auto-detection on each node
403
+ - Routing-table builder distributes workloads across the fleet by capability
404
+
405
+ ### Plugin System
406
+ - **Managed plugins** with health monitoring, port-based orphan cleanup, and auto-restore on restart
407
+ - Ollama, ComfyUI, Vision Pipeline, Audio Foundry, Upscaling, Swarm Orchestrator, Discord
408
+ - **System Resource Orchestrator** arbitrates VRAM between plugins so they don't trample each other
409
+ - **CPU Offload** for models that don't fit in VRAM
410
+ - Live GPU + CPU resource monitor, persistent across the UI
411
+ - Model download management from HuggingFace with progress tracking — voice, video, image models
412
+
413
+ ### Vision Pipeline
414
+ - Real-time frame analysis via Ollama vision models with adaptive FPS throttling
415
+ - Two-layer change detection — perceptual hash + semantic analysis
416
+ - Local camera capture with device enumeration and stream management
417
+ - Context buffer with sliding window and compression
418
+
419
+ ### Self-Improvement & Research
420
+ - **Self-Improvement Engine** — detect → fix → verify → broadcast loop with three modes (Scheduled, Reactive, Directed)
421
+ - **Auto Researcher** — autonomous RAG-pipeline optimizer that experiments with parameters, keeps wins, reverts losses
422
+ - **Pending Fixes queue** — stage, review, approve, or reject proposed code changes
423
+ - **Cross-machine learning** — fixes propagate to all connected Interconnector nodes
424
+
425
+ ### Backup & Restore
426
+ - Granular per-area backups (data only, full, code) or single-shot full system
427
+ - Schema-migration-aware restore so an older backup can come back to a newer schema cleanly
428
+ - Cross-version compatible
429
+
430
+ ### Advanced Settings
431
+ - Debugging toggles, RAG knobs, cache controls, diagnostic tools, test runners, self-improvement controls
432
+ - Surfaced in the UI, not hidden behind a "config files only" wall
433
+ - Sectioned by area (Chat, RAG, Memory, Voice, Agents, Plugins, etc.) for quick navigation
434
+
435
+ ### System
436
+ - Dashboard with live status cards for model health, GPU, self-improvement, RAG, plugins, agent activity
437
+ - Celery background task system with live progress
438
+ - Six built-in themes
439
+ - Container support with Containerfile for isolated testing
440
+
441
+ ---
442
+
443
+ ## Screenshots
444
+
445
+ | Dashboard | Code Editor |
446
+ |:-:|:-:|
447
+ | ![Dashboard](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/dashboard-page.png) | ![Code Editor](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/code-editor-page.png) |
448
+
449
+ | Media Library | Video Generation |
450
+ |:-:|:-:|
451
+ | ![Media](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/media-library-page.png) | ![Video Gen](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/video-generation-page.png) |
452
+
453
+ | Plugins | Swarm Plan Editor |
454
+ |:-:|:-:|
455
+ | ![Plugins](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/plugins-page.png) | ![Swarm](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/swarm-plan-editor.png) |
456
+
457
+ | Settings — RAG | Settings — Memory |
458
+ |:-:|:-:|
459
+ | ![Settings RAG](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/settings-page-rag.png) | ![Settings Memory](https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/settings-page-memory.png) |
460
+
461
+ ---
462
+
463
+ ## Quick Start
464
+
465
+ ```bash
466
+ git clone https://github.com/guaardvark/guaardvark.git
467
+ cd guaardvark
468
+ ./start.sh
469
+ ```
470
+
471
+ First run handles everything: Python venv, Node dependencies, PostgreSQL, Redis, Ollama, Whisper.cpp, database migrations, frontend build, and all services. Requires your system password once for PostgreSQL setup.
472
+
473
+ | Service | URL |
474
+ |---------|-----|
475
+ | Web UI | http://localhost:5173 |
476
+ | API | http://localhost:5000 |
477
+ | Health Check | http://localhost:5000/api/health |
478
+
479
+ ```bash
480
+ ./start.sh # Full startup with health checks
481
+ ./start.sh --fast # Skip dependency checks
482
+ ./start.sh --test # Health diagnostics
483
+ ./start.sh --plugins # Start all enabled plugins
484
+ ./stop.sh # Stop all services
485
+ ```
486
+
487
+ ### Install via PyPI
488
+
489
+ ```bash
490
+ pip install guaardvark
491
+ ```
492
+
493
+ The CLI connects to a running Guaardvark instance or launches a lightweight embedded server automatically.
494
+
495
+ ---
496
+
497
+ ## CLI
498
+
499
+ 41 commands with tab completion and fuzzy matching. Install from PyPI or use the built-in REPL.
500
+
501
+ ```bash
502
+ guaardvark # Interactive REPL
503
+ guaardvark status # System dashboard
504
+ guaardvark chat "explain this codebase" # Chat with RAG context
505
+ guaardvark search "query" # Semantic search
506
+ guaardvark files upload report.pdf # Upload and index
507
+ ```
508
+
509
+ ### REPL Slash Commands
510
+
511
+ ```
512
+ /imagine <prompt> Generate an image from text
513
+ /video <prompt> Generate a video from text
514
+ /voice <text> Text-to-speech output
515
+ /agent Toggle autonomous agent mode
516
+ /web Open the web UI
517
+ /ingest <path> Index files or directories for RAG
518
+ /search <query> Semantic search over indexed documents
519
+ /models list List available Ollama models
520
+ /remember <text> Save to persistent memory
521
+ /memory list|search Browse saved memories
522
+ /backup create Create a system backup
523
+ /jobs list|watch Monitor background tasks
524
+ /config View or change settings
525
+ /help Full command reference
526
+ ```
527
+
528
+ ---
529
+
530
+ ## Requirements
531
+
532
+ | Dependency | Version | Notes |
533
+ |-----------|---------|-------|
534
+ | Python | 3.12+ | Backend |
535
+ | Node.js | 20+ | Frontend build |
536
+ | PostgreSQL | 14+ | Auto-installed |
537
+ | Redis | 5.0+ | Auto-installed |
538
+ | Ollama | latest | Local LLM inference |
539
+ | CUDA GPU | 8GB+ VRAM | 16GB recommended for video generation |
540
+
541
+ ### GPU Memory Guide
542
+
543
+ | Feature | Minimum | Recommended |
544
+ |---------|---------|-------------|
545
+ | Chat + RAG | 4GB | 8GB |
546
+ | Image generation | 6GB | 12GB |
547
+ | Wan 2.2 video | 11GB | 16GB |
548
+ | CogVideoX-5B video | 16GB | 20GB |
549
+ | Upscaling | 0.5GB | 2–4GB |
550
+
551
+ ---
552
+
553
+ ## Architecture
554
+
555
+ ```
556
+ Browser / CLI (PyPI: guaardvark)
557
+ | HTTP + WebSocket
558
+ v
559
+ Flask (68 REST blueprints + GraphQL + Socket.IO)
560
+ |
561
+ +-- AgentBrain (3-tier routing: Reflex → Instinct → Deliberation)
562
+ |
563
+ Service Layer (48 modules)
564
+ |-- Agent Executor (ReACT loop + 57 tools + BrainState)
565
+ |-- RAG Pipeline (LlamaIndex + hybrid retrieval)
566
+ |-- Self-Improvement Engine (detect → fix → verify → broadcast)
567
+ |-- Generation Services (image, video, voice, content)
568
+ |-- Swarm Orchestrator (parallel agents + git worktree isolation)
569
+ |-- Servo Controller (closed-loop vision targeting + calibration)
570
+ |-- Vision Pipeline (frame analysis + camera capture)
571
+ \-- Interconnector (multi-machine sync)
572
+ |
573
+ +---+---+---+---+
574
+ v v v v v
575
+ PostgreSQL Redis Ollama Virtual Display ComfyUI
576
+ Celery (Xvfb :99)
577
+ ```
578
+
579
+ **Frontend:** React 18 · Vite · Material-UI v5 · Zustand · Apollo Client · Monaco Editor · Socket.IO
580
+ **Models:** Gemma4 · Qwen3-VL · Qwen3 · Llama 3 · Moondream · Stable Diffusion · Wan 2.2 · CogVideoX · Real-ESRGAN · HAT
581
+
582
+ ---
583
+
584
+ ## Support the Project
585
+
586
+ Guaardvark is built with love by a solo developer. If it's useful to you:
587
+
588
+ - [Ko-fi](https://ko-fi.com/albenze) (zero fees!)
589
+ - [GitHub Sponsors](https://github.com/sponsors/guaardvark)
590
+ - [PayPal](https://paypal.me/albenze)
591
+
592
+ Star the repo if you find it interesting — it helps with visibility.
593
+
594
+ ---
595
+
596
+ ## Contributing
597
+
598
+ We welcome contributions! See the [Contributing Guide](CONTRIBUTING.md) to get started.
599
+
600
+ Looking for something to work on? Check out issues labeled [`good first issue`](https://github.com/guaardvark/guaardvark/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22).
601
+
602
+ ---
603
+
604
+ ## License
605
+
606
+ [MIT License](LICENSE) — Copyright (c) 2025-2026 Albenze, Inc.
607
+
608
+ <p align="center">
609
+ <img src="https://raw.githubusercontent.com/guaardvark/guaardvark/main/docs/screenshots/guaardvark-mascot.png" alt="Guaardvark mascot" width="110" height="107">
610
+ </p>