@techwavedev/agi-agent-kit 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/README.md +147 -0
  3. package/bin/init.js +471 -0
  4. package/package.json +36 -0
  5. package/templates/.agent/agents/backend-specialist.md +263 -0
  6. package/templates/.agent/agents/code-archaeologist.md +106 -0
  7. package/templates/.agent/agents/database-architect.md +226 -0
  8. package/templates/.agent/agents/debugger.md +225 -0
  9. package/templates/.agent/agents/devops-engineer.md +242 -0
  10. package/templates/.agent/agents/documentation-writer.md +104 -0
  11. package/templates/.agent/agents/explorer-agent.md +73 -0
  12. package/templates/.agent/agents/frontend-specialist.md +556 -0
  13. package/templates/.agent/agents/game-developer.md +162 -0
  14. package/templates/.agent/agents/mobile-developer.md +377 -0
  15. package/templates/.agent/agents/orchestrator.md +416 -0
  16. package/templates/.agent/agents/penetration-tester.md +188 -0
  17. package/templates/.agent/agents/performance-optimizer.md +187 -0
  18. package/templates/.agent/agents/product-manager.md +112 -0
  19. package/templates/.agent/agents/project-planner.md +403 -0
  20. package/templates/.agent/agents/qa-automation-engineer.md +109 -0
  21. package/templates/.agent/agents/security-auditor.md +170 -0
  22. package/templates/.agent/agents/seo-specialist.md +111 -0
  23. package/templates/.agent/agents/test-engineer.md +158 -0
  24. package/templates/.agent/rules/GEMINI.md +253 -0
  25. package/templates/.agent/workflows/brainstorm.md +113 -0
  26. package/templates/.agent/workflows/create.md +59 -0
  27. package/templates/.agent/workflows/debug.md +103 -0
  28. package/templates/.agent/workflows/deploy.md +176 -0
  29. package/templates/.agent/workflows/enhance.md +63 -0
  30. package/templates/.agent/workflows/orchestrate.md +237 -0
  31. package/templates/.agent/workflows/plan.md +89 -0
  32. package/templates/.agent/workflows/preview.md +81 -0
  33. package/templates/.agent/workflows/status.md +86 -0
  34. package/templates/.agent/workflows/test.md +144 -0
  35. package/templates/.agent/workflows/ui-ux-pro-max.md +296 -0
  36. package/templates/base/.env.example +54 -0
  37. package/templates/base/AGENTS.md +463 -0
  38. package/templates/base/requirements.txt +6 -0
  39. package/templates/base/skill-creator/LICENSE.txt +202 -0
  40. package/templates/base/skill-creator/SKILL_skillcreator.md +389 -0
  41. package/templates/base/skill-creator/references/output-patterns.md +82 -0
  42. package/templates/base/skill-creator/references/workflows.md +28 -0
  43. package/templates/base/skill-creator/scripts/init_skill.py +304 -0
  44. package/templates/base/skill-creator/scripts/package_skill.py +110 -0
  45. package/templates/base/skill-creator/scripts/quick_validate.py +95 -0
  46. package/templates/base/skill-creator/scripts/update_catalog.py +371 -0
  47. package/templates/skills/core/README.md +21 -0
  48. package/templates/skills/core/documentation/SKILL.md +351 -0
  49. package/templates/skills/core/documentation/references/best_practices.md +201 -0
  50. package/templates/skills/core/documentation/scripts/analyze_code.py +307 -0
  51. package/templates/skills/core/documentation/scripts/detect_changes.py +460 -0
  52. package/templates/skills/core/documentation/scripts/generate_changelog.py +312 -0
  53. package/templates/skills/core/documentation/scripts/sync_docs.py +272 -0
  54. package/templates/skills/core/documentation/scripts/update_skill_docs.py +366 -0
  55. package/templates/skills/core/pdf-reader/SKILL.md +104 -0
  56. package/templates/skills/core/pdf-reader/references/pdf_libraries.md +83 -0
  57. package/templates/skills/core/pdf-reader/scripts/extract_text.py +295 -0
  58. package/templates/skills/core/qdrant-memory/SKILL.md +435 -0
  59. package/templates/skills/core/qdrant-memory/references/advanced_patterns.md +375 -0
  60. package/templates/skills/core/qdrant-memory/references/collection_schemas.md +229 -0
  61. package/templates/skills/core/qdrant-memory/references/complete_guide.md +724 -0
  62. package/templates/skills/core/qdrant-memory/references/embedding_models.md +325 -0
  63. package/templates/skills/core/qdrant-memory/scripts/benchmark_token_savings.py +640 -0
  64. package/templates/skills/core/qdrant-memory/scripts/embedding_utils.py +323 -0
  65. package/templates/skills/core/qdrant-memory/scripts/hybrid_search.py +214 -0
  66. package/templates/skills/core/qdrant-memory/scripts/init_collection.py +193 -0
  67. package/templates/skills/core/qdrant-memory/scripts/memory_retrieval.py +345 -0
  68. package/templates/skills/core/qdrant-memory/scripts/semantic_cache.py +282 -0
  69. package/templates/skills/core/qdrant-memory/scripts/test_skill.py +655 -0
  70. package/templates/skills/core/webcrawler/SKILL.md +292 -0
  71. package/templates/skills/core/webcrawler/references/advanced_crawling.md +181 -0
  72. package/templates/skills/core/webcrawler/scripts/crawl_docs.py +532 -0
  73. package/templates/skills/core/webcrawler/scripts/extract_page.py +189 -0
  74. package/templates/skills/core/webcrawler/scripts/filter_docs.py +200 -0
  75. package/templates/skills/knowledge/api-patterns/SKILL.md +81 -0
  76. package/templates/skills/knowledge/api-patterns/api-style.md +42 -0
  77. package/templates/skills/knowledge/api-patterns/auth.md +24 -0
  78. package/templates/skills/knowledge/api-patterns/documentation.md +26 -0
  79. package/templates/skills/knowledge/api-patterns/graphql.md +41 -0
  80. package/templates/skills/knowledge/api-patterns/rate-limiting.md +31 -0
  81. package/templates/skills/knowledge/api-patterns/response.md +37 -0
  82. package/templates/skills/knowledge/api-patterns/rest.md +40 -0
  83. package/templates/skills/knowledge/api-patterns/scripts/api_validator.py +211 -0
  84. package/templates/skills/knowledge/api-patterns/security-testing.md +122 -0
  85. package/templates/skills/knowledge/api-patterns/trpc.md +41 -0
  86. package/templates/skills/knowledge/api-patterns/versioning.md +22 -0
  87. package/templates/skills/knowledge/app-builder/SKILL.md +75 -0
  88. package/templates/skills/knowledge/app-builder/agent-coordination.md +71 -0
  89. package/templates/skills/knowledge/app-builder/feature-building.md +53 -0
  90. package/templates/skills/knowledge/app-builder/project-detection.md +34 -0
  91. package/templates/skills/knowledge/app-builder/scaffolding.md +118 -0
  92. package/templates/skills/knowledge/app-builder/tech-stack.md +40 -0
  93. package/templates/skills/knowledge/app-builder/templates/SKILL.md +39 -0
  94. package/templates/skills/knowledge/app-builder/templates/astro-static/TEMPLATE.md +76 -0
  95. package/templates/skills/knowledge/app-builder/templates/chrome-extension/TEMPLATE.md +92 -0
  96. package/templates/skills/knowledge/app-builder/templates/cli-tool/TEMPLATE.md +88 -0
  97. package/templates/skills/knowledge/app-builder/templates/electron-desktop/TEMPLATE.md +88 -0
  98. package/templates/skills/knowledge/app-builder/templates/express-api/TEMPLATE.md +83 -0
  99. package/templates/skills/knowledge/app-builder/templates/flutter-app/TEMPLATE.md +90 -0
  100. package/templates/skills/knowledge/app-builder/templates/monorepo-turborepo/TEMPLATE.md +90 -0
  101. package/templates/skills/knowledge/app-builder/templates/nextjs-fullstack/TEMPLATE.md +82 -0
  102. package/templates/skills/knowledge/app-builder/templates/nextjs-saas/TEMPLATE.md +100 -0
  103. package/templates/skills/knowledge/app-builder/templates/nextjs-static/TEMPLATE.md +106 -0
  104. package/templates/skills/knowledge/app-builder/templates/nuxt-app/TEMPLATE.md +101 -0
  105. package/templates/skills/knowledge/app-builder/templates/python-fastapi/TEMPLATE.md +83 -0
  106. package/templates/skills/knowledge/app-builder/templates/react-native-app/TEMPLATE.md +93 -0
  107. package/templates/skills/knowledge/architecture/SKILL.md +55 -0
  108. package/templates/skills/knowledge/architecture/context-discovery.md +43 -0
  109. package/templates/skills/knowledge/architecture/examples.md +94 -0
  110. package/templates/skills/knowledge/architecture/pattern-selection.md +68 -0
  111. package/templates/skills/knowledge/architecture/patterns-reference.md +50 -0
  112. package/templates/skills/knowledge/architecture/trade-off-analysis.md +77 -0
  113. package/templates/skills/knowledge/bash-linux/SKILL.md +199 -0
  114. package/templates/skills/knowledge/behavioral-modes/SKILL.md +242 -0
  115. package/templates/skills/knowledge/brainstorming/SKILL.md +163 -0
  116. package/templates/skills/knowledge/brainstorming/dynamic-questioning.md +350 -0
  117. package/templates/skills/knowledge/clean-code/SKILL.md +201 -0
  118. package/templates/skills/knowledge/code-review-checklist/SKILL.md +109 -0
  119. package/templates/skills/knowledge/database-design/SKILL.md +52 -0
  120. package/templates/skills/knowledge/database-design/database-selection.md +43 -0
  121. package/templates/skills/knowledge/database-design/indexing.md +39 -0
  122. package/templates/skills/knowledge/database-design/migrations.md +48 -0
  123. package/templates/skills/knowledge/database-design/optimization.md +36 -0
  124. package/templates/skills/knowledge/database-design/orm-selection.md +30 -0
  125. package/templates/skills/knowledge/database-design/schema-design.md +56 -0
  126. package/templates/skills/knowledge/database-design/scripts/schema_validator.py +172 -0
  127. package/templates/skills/knowledge/deployment-procedures/SKILL.md +241 -0
  128. package/templates/skills/knowledge/doc.md +177 -0
  129. package/templates/skills/knowledge/documentation-templates/SKILL.md +194 -0
  130. package/templates/skills/knowledge/frontend-design/SKILL.md +396 -0
  131. package/templates/skills/knowledge/frontend-design/animation-guide.md +331 -0
  132. package/templates/skills/knowledge/frontend-design/color-system.md +311 -0
  133. package/templates/skills/knowledge/frontend-design/decision-trees.md +418 -0
  134. package/templates/skills/knowledge/frontend-design/motion-graphics.md +306 -0
  135. package/templates/skills/knowledge/frontend-design/scripts/accessibility_checker.py +183 -0
  136. package/templates/skills/knowledge/frontend-design/scripts/ux_audit.py +722 -0
  137. package/templates/skills/knowledge/frontend-design/typography-system.md +345 -0
  138. package/templates/skills/knowledge/frontend-design/ux-psychology.md +541 -0
  139. package/templates/skills/knowledge/frontend-design/visual-effects.md +383 -0
  140. package/templates/skills/knowledge/game-development/2d-games/SKILL.md +119 -0
  141. package/templates/skills/knowledge/game-development/3d-games/SKILL.md +135 -0
  142. package/templates/skills/knowledge/game-development/SKILL.md +167 -0
  143. package/templates/skills/knowledge/game-development/game-art/SKILL.md +185 -0
  144. package/templates/skills/knowledge/game-development/game-audio/SKILL.md +190 -0
  145. package/templates/skills/knowledge/game-development/game-design/SKILL.md +129 -0
  146. package/templates/skills/knowledge/game-development/mobile-games/SKILL.md +108 -0
  147. package/templates/skills/knowledge/game-development/multiplayer/SKILL.md +132 -0
  148. package/templates/skills/knowledge/game-development/pc-games/SKILL.md +144 -0
  149. package/templates/skills/knowledge/game-development/vr-ar/SKILL.md +123 -0
  150. package/templates/skills/knowledge/game-development/web-games/SKILL.md +150 -0
  151. package/templates/skills/knowledge/geo-fundamentals/SKILL.md +156 -0
  152. package/templates/skills/knowledge/geo-fundamentals/scripts/geo_checker.py +289 -0
  153. package/templates/skills/knowledge/i18n-localization/SKILL.md +154 -0
  154. package/templates/skills/knowledge/i18n-localization/scripts/i18n_checker.py +241 -0
  155. package/templates/skills/knowledge/intelligent-routing/SKILL.md +334 -0
  156. package/templates/skills/knowledge/lint-and-validate/SKILL.md +45 -0
  157. package/templates/skills/knowledge/lint-and-validate/scripts/lint_runner.py +172 -0
  158. package/templates/skills/knowledge/lint-and-validate/scripts/type_coverage.py +173 -0
  159. package/templates/skills/knowledge/mcp-builder/SKILL.md +176 -0
  160. package/templates/skills/knowledge/mobile-design/SKILL.md +394 -0
  161. package/templates/skills/knowledge/mobile-design/decision-trees.md +516 -0
  162. package/templates/skills/knowledge/mobile-design/mobile-backend.md +491 -0
  163. package/templates/skills/knowledge/mobile-design/mobile-color-system.md +420 -0
  164. package/templates/skills/knowledge/mobile-design/mobile-debugging.md +122 -0
  165. package/templates/skills/knowledge/mobile-design/mobile-design-thinking.md +357 -0
  166. package/templates/skills/knowledge/mobile-design/mobile-navigation.md +458 -0
  167. package/templates/skills/knowledge/mobile-design/mobile-performance.md +767 -0
  168. package/templates/skills/knowledge/mobile-design/mobile-testing.md +356 -0
  169. package/templates/skills/knowledge/mobile-design/mobile-typography.md +433 -0
  170. package/templates/skills/knowledge/mobile-design/platform-android.md +666 -0
  171. package/templates/skills/knowledge/mobile-design/platform-ios.md +561 -0
  172. package/templates/skills/knowledge/mobile-design/scripts/mobile_audit.py +670 -0
  173. package/templates/skills/knowledge/mobile-design/touch-psychology.md +537 -0
  174. package/templates/skills/knowledge/nextjs-best-practices/SKILL.md +203 -0
  175. package/templates/skills/knowledge/nodejs-best-practices/SKILL.md +333 -0
  176. package/templates/skills/knowledge/parallel-agents/SKILL.md +175 -0
  177. package/templates/skills/knowledge/performance-profiling/SKILL.md +143 -0
  178. package/templates/skills/knowledge/performance-profiling/scripts/lighthouse_audit.py +76 -0
  179. package/templates/skills/knowledge/plan-writing/SKILL.md +152 -0
  180. package/templates/skills/knowledge/powershell-windows/SKILL.md +167 -0
  181. package/templates/skills/knowledge/python-patterns/SKILL.md +441 -0
  182. package/templates/skills/knowledge/react-patterns/SKILL.md +198 -0
  183. package/templates/skills/knowledge/red-team-tactics/SKILL.md +199 -0
  184. package/templates/skills/knowledge/seo-fundamentals/SKILL.md +129 -0
  185. package/templates/skills/knowledge/seo-fundamentals/scripts/seo_checker.py +219 -0
  186. package/templates/skills/knowledge/server-management/SKILL.md +161 -0
  187. package/templates/skills/knowledge/systematic-debugging/SKILL.md +109 -0
  188. package/templates/skills/knowledge/tailwind-patterns/SKILL.md +269 -0
  189. package/templates/skills/knowledge/tdd-workflow/SKILL.md +149 -0
  190. package/templates/skills/knowledge/testing-patterns/SKILL.md +178 -0
  191. package/templates/skills/knowledge/testing-patterns/scripts/test_runner.py +219 -0
  192. package/templates/skills/knowledge/vulnerability-scanner/SKILL.md +276 -0
  193. package/templates/skills/knowledge/vulnerability-scanner/checklists.md +121 -0
  194. package/templates/skills/knowledge/vulnerability-scanner/scripts/security_scan.py +458 -0
  195. package/templates/skills/knowledge/webapp-testing/SKILL.md +187 -0
  196. package/templates/skills/knowledge/webapp-testing/scripts/playwright_runner.py +173 -0
@@ -0,0 +1,640 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script: benchmark_token_savings.py
4
+ Purpose: Demonstrate real-world token savings with qdrant-memory skill.
5
+
6
+ Compares three scenarios:
7
+ 1. NO CACHE: Every query goes to LLM with full context
8
+ 2. WITH SKILL: Using skill knowledge but no caching
9
+ 3. WITH QDRANT: Semantic cache + targeted context retrieval
10
+
11
+ Usage:
12
+ # Start services first
13
+ ollama serve &
14
+ docker run -d -p 6333:6333 qdrant/qdrant
15
+
16
+ # Run benchmark
17
+ python3 benchmark_token_savings.py
18
+
19
+ # Run with visualization
20
+ python3 benchmark_token_savings.py --visualize
21
+
22
+ Output:
23
+ Creates benchmark results in .tmp/qdrant_benchmark/
24
+ """
25
+
26
+ import argparse
27
+ import json
28
+ import os
29
+ import sys
30
+ import time
31
+ import hashlib
32
+ from datetime import datetime
33
+ from typing import Dict, List, Any, Optional
34
+ from urllib.request import Request, urlopen
35
+ from urllib.error import URLError
36
+
37
+ # Add scripts directory to path
38
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
39
+
40
+ try:
41
+ from embedding_utils import get_embedding, check_embedding_service, EMBEDDING_PROVIDER
42
+ except ImportError:
43
+ print("Error: Run from skills/qdrant-memory/scripts/ directory")
44
+ sys.exit(1)
45
+
46
+ # Configuration
47
+ QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
48
+ BENCHMARK_COLLECTION = "benchmark_cache"
49
+ OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "../../../.tmp/qdrant_benchmark")
50
+
51
+ # Simulated skill context (from gitlab skill as example)
52
+ SKILL_CONTEXT = """
53
+ # GitLab Skill Context
54
+
55
+ ## Agent Installation
56
+ To install a GitLab agent on EKS:
57
+ 1. Register the agent in GitLab UI (Infrastructure > Kubernetes clusters)
58
+ 2. Create agent token and store as Kubernetes secret
59
+ 3. Install via Helm: helm upgrade --install gitlab-agent gitlab/gitlab-agent
60
+ 4. Verify: kubectl get pods -n gitlab-agent
61
+
62
+ ## Common Troubleshooting
63
+ - Agent not connecting: Check firewall rules for KAS endpoint
64
+ - Certificate errors: Verify TLS configuration
65
+ - Token expired: Rotate agent token in GitLab UI
66
+
67
+ ## GitOps Workflow
68
+ 1. Create .gitlab/agents/<agent-name>/config.yaml
69
+ 2. Define gitops project paths
70
+ 3. Push manifests to monitored repository
71
+ 4. Agent syncs automatically
72
+ """
73
+
74
+ # Simulated conversation history (20K tokens worth)
75
+ CONVERSATION_HISTORY = """
76
+ User: How do I set up GitLab CI/CD?
77
+ Assistant: To set up GitLab CI/CD, create a .gitlab-ci.yml file in your repository root...
78
+
79
+ User: What about Docker integration?
80
+ Assistant: For Docker integration with GitLab CI/CD, you'll need to configure Docker-in-Docker (dind) or use Kaniko for building images...
81
+
82
+ User: Can you explain the stages concept?
83
+ Assistant: Stages in GitLab CI/CD define the order of job execution. Common stages include build, test, and deploy...
84
+
85
+ User: How do I cache dependencies?
86
+ Assistant: To cache dependencies, use the cache keyword in your .gitlab-ci.yml. For example, cache node_modules for npm projects...
87
+
88
+ User: What about artifacts?
89
+ Assistant: Artifacts are files created by jobs that can be passed to subsequent jobs. Use the artifacts keyword to define paths...
90
+
91
+ User: How do I set up environments?
92
+ Assistant: Environments in GitLab represent deployment targets. Define them using the environment keyword in your job...
93
+
94
+ User: Can I use variables?
95
+ Assistant: Yes, GitLab CI/CD supports various types of variables including predefined, custom, and protected variables...
96
+
97
+ User: How do I trigger pipelines?
98
+ Assistant: Pipelines can be triggered by pushes, merge requests, schedules, API calls, or manually from the UI...
99
+
100
+ User: What about parallel jobs?
101
+ Assistant: Use the parallel keyword to run multiple instances of a job. This is useful for splitting test suites...
102
+
103
+ User: How do I deploy to Kubernetes?
104
+ Assistant: For Kubernetes deployment, you can use kubectl commands directly or integrate with GitLab's Kubernetes agent...
105
+ """ * 10 # Repeat to simulate ~20K tokens
106
+
107
+ # Test queries - includes similar queries to test semantic cache
108
+ TEST_QUERIES = [
109
+ # Unique queries
110
+ "How do I install the GitLab agent on EKS?",
111
+ "What are the troubleshooting steps for agent connection issues?",
112
+ "How do I configure GitOps with GitLab?",
113
+ "What is the process to register a new agent?",
114
+ "How do I rotate the agent token?",
115
+
116
+ # Repeated similar queries (should hit cache)
117
+ "How can I set up a GitLab agent in EKS?", # Similar to query 1
118
+ "What should I check if my agent won't connect?", # Similar to query 2
119
+ "How do I use GitOps with GitLab agent?", # Similar to query 3
120
+ "Steps to install GitLab Kubernetes agent?", # Similar to query 1
121
+ "Agent not connecting, how to troubleshoot?", # Similar to query 2
122
+ ]
123
+
124
+
125
+ def count_tokens(text: str) -> int:
126
+ """Estimate token count (rough approximation: ~4 chars per token)."""
127
+ return len(text) // 4
128
+
129
+
130
+ def qdrant_request(method: str, endpoint: str, data: Optional[Dict] = None) -> Dict:
131
+ """Make a request to Qdrant API."""
132
+ url = f"{QDRANT_URL}{endpoint}"
133
+ req = Request(
134
+ url,
135
+ data=json.dumps(data).encode() if data else None,
136
+ headers={"Content-Type": "application/json"},
137
+ method=method
138
+ )
139
+ with urlopen(req, timeout=30) as response:
140
+ return json.loads(response.read().decode())
141
+
142
+
143
+ def setup_benchmark_collection() -> bool:
144
+ """Create benchmark collection in Qdrant."""
145
+ try:
146
+ # Delete if exists
147
+ try:
148
+ qdrant_request("DELETE", f"/collections/{BENCHMARK_COLLECTION}")
149
+ except:
150
+ pass
151
+
152
+ # Create new collection
153
+ embedding_dim = len(get_embedding("test"))
154
+ payload = {
155
+ "vectors": {
156
+ "size": embedding_dim,
157
+ "distance": "Cosine"
158
+ }
159
+ }
160
+ qdrant_request("PUT", f"/collections/{BENCHMARK_COLLECTION}", payload)
161
+ return True
162
+ except Exception as e:
163
+ print(f"Error setting up collection: {e}")
164
+ return False
165
+
166
+
167
+ def check_semantic_cache(query: str, threshold: float = 0.88) -> Optional[Dict]:
168
+ """Check if similar query exists in cache."""
169
+ try:
170
+ embedding = get_embedding(query)
171
+
172
+ search_payload = {
173
+ "vector": embedding,
174
+ "limit": 1,
175
+ "score_threshold": threshold,
176
+ "with_payload": True
177
+ }
178
+
179
+ result = qdrant_request(
180
+ "POST",
181
+ f"/collections/{BENCHMARK_COLLECTION}/points/search",
182
+ search_payload
183
+ )
184
+
185
+ if result.get("result") and len(result["result"]) > 0:
186
+ hit = result["result"][0]
187
+ return {
188
+ "cache_hit": True,
189
+ "score": hit["score"],
190
+ "response": hit["payload"].get("response"),
191
+ "original_query": hit["payload"].get("query")
192
+ }
193
+ return None
194
+ except Exception as e:
195
+ return None
196
+
197
+
198
+ def store_in_cache(query: str, response: str) -> bool:
199
+ """Store query-response pair in semantic cache."""
200
+ try:
201
+ embedding = get_embedding(query)
202
+ point_id = int(hashlib.md5(query.encode()).hexdigest()[:16], 16) % (2**63)
203
+
204
+ payload = {
205
+ "points": [{
206
+ "id": point_id,
207
+ "vector": embedding,
208
+ "payload": {
209
+ "query": query,
210
+ "response": response,
211
+ "timestamp": datetime.utcnow().isoformat()
212
+ }
213
+ }]
214
+ }
215
+
216
+ qdrant_request("PUT", f"/collections/{BENCHMARK_COLLECTION}/points?wait=true", payload)
217
+ return True
218
+ except Exception as e:
219
+ return False
220
+
221
+
222
+ def simulate_llm_response(query: str) -> str:
223
+ """Simulate an LLM response (would be actual LLM call in production)."""
224
+ # In production, this would call the actual LLM
225
+ # For benchmark, we return a simulated response
226
+ return f"[Simulated response for: {query[:50]}...] This would be the LLM's detailed answer about {query.split()[3:6]}..."
227
+
228
+
229
+ def run_benchmark() -> Dict[str, Any]:
230
+ """Run the full benchmark comparing three scenarios."""
231
+
232
+ print("\n" + "="*60)
233
+ print("🧪 QDRANT MEMORY BENCHMARK")
234
+ print("="*60)
235
+
236
+ # Check prerequisites
237
+ print("\nšŸ“‹ Checking prerequisites...")
238
+
239
+ # Check Qdrant
240
+ try:
241
+ qdrant_request("GET", "/collections")
242
+ print(" āœ“ Qdrant is running")
243
+ except:
244
+ print(" āœ— Qdrant not available. Start with: docker run -p 6333:6333 qdrant/qdrant")
245
+ return {}
246
+
247
+ # Check embeddings
248
+ status = check_embedding_service()
249
+ if status["status"] != "ok":
250
+ print(f" āœ— Embedding service: {status['message']}")
251
+ return {}
252
+ print(f" āœ“ Embeddings: {EMBEDDING_PROVIDER} ({status.get('model', 'unknown')})")
253
+
254
+ # Setup collection
255
+ if not setup_benchmark_collection():
256
+ return {}
257
+ print(" āœ“ Benchmark collection created")
258
+
259
+ results = {
260
+ "timestamp": datetime.utcnow().isoformat(),
261
+ "embedding_provider": EMBEDDING_PROVIDER,
262
+ "queries_tested": len(TEST_QUERIES),
263
+ "scenarios": {}
264
+ }
265
+
266
+ # =========================================================================
267
+ # SCENARIO 1: No Cache (Every query uses full context)
268
+ # =========================================================================
269
+ print("\n" + "-"*60)
270
+ print("šŸ“Š SCENARIO 1: NO CACHE (Full context every time)")
271
+ print("-"*60)
272
+
273
+ scenario1_tokens = {
274
+ "input_tokens": 0,
275
+ "output_tokens": 0,
276
+ "total_tokens": 0,
277
+ "queries": []
278
+ }
279
+
280
+ for query in TEST_QUERIES:
281
+ # Full context = conversation history + skill context + query
282
+ full_prompt = f"{CONVERSATION_HISTORY}\n\n{SKILL_CONTEXT}\n\nUser: {query}"
283
+ input_tokens = count_tokens(full_prompt)
284
+
285
+ response = simulate_llm_response(query)
286
+ output_tokens = count_tokens(response)
287
+
288
+ scenario1_tokens["input_tokens"] += input_tokens
289
+ scenario1_tokens["output_tokens"] += output_tokens
290
+ scenario1_tokens["queries"].append({
291
+ "query": query,
292
+ "input_tokens": input_tokens,
293
+ "output_tokens": output_tokens,
294
+ "cache_hit": False
295
+ })
296
+
297
+ scenario1_tokens["total_tokens"] = scenario1_tokens["input_tokens"] + scenario1_tokens["output_tokens"]
298
+ results["scenarios"]["no_cache"] = scenario1_tokens
299
+
300
+ print(f" Total queries: {len(TEST_QUERIES)}")
301
+ print(f" Input tokens: {scenario1_tokens['input_tokens']:,}")
302
+ print(f" Output tokens: {scenario1_tokens['output_tokens']:,}")
303
+ print(f" TOTAL TOKENS: {scenario1_tokens['total_tokens']:,}")
304
+
305
+ # =========================================================================
306
+ # SCENARIO 2: With Skill (Targeted context, no cache)
307
+ # =========================================================================
308
+ print("\n" + "-"*60)
309
+ print("šŸ“Š SCENARIO 2: WITH SKILL (Targeted context, no cache)")
310
+ print("-"*60)
311
+
312
+ scenario2_tokens = {
313
+ "input_tokens": 0,
314
+ "output_tokens": 0,
315
+ "total_tokens": 0,
316
+ "queries": []
317
+ }
318
+
319
+ for query in TEST_QUERIES:
320
+ # Skill context only (no full history)
321
+ targeted_prompt = f"{SKILL_CONTEXT}\n\nUser: {query}"
322
+ input_tokens = count_tokens(targeted_prompt)
323
+
324
+ response = simulate_llm_response(query)
325
+ output_tokens = count_tokens(response)
326
+
327
+ scenario2_tokens["input_tokens"] += input_tokens
328
+ scenario2_tokens["output_tokens"] += output_tokens
329
+ scenario2_tokens["queries"].append({
330
+ "query": query,
331
+ "input_tokens": input_tokens,
332
+ "output_tokens": output_tokens,
333
+ "cache_hit": False
334
+ })
335
+
336
+ scenario2_tokens["total_tokens"] = scenario2_tokens["input_tokens"] + scenario2_tokens["output_tokens"]
337
+ results["scenarios"]["with_skill"] = scenario2_tokens
338
+
339
+ skill_savings = (1 - scenario2_tokens["total_tokens"] / scenario1_tokens["total_tokens"]) * 100
340
+ print(f" Total queries: {len(TEST_QUERIES)}")
341
+ print(f" Input tokens: {scenario2_tokens['input_tokens']:,}")
342
+ print(f" Output tokens: {scenario2_tokens['output_tokens']:,}")
343
+ print(f" TOTAL TOKENS: {scenario2_tokens['total_tokens']:,}")
344
+ print(f" šŸ’° Savings vs No Cache: {skill_savings:.1f}%")
345
+
346
+ # =========================================================================
347
+ # SCENARIO 3: With Qdrant (Semantic cache + targeted retrieval)
348
+ # =========================================================================
349
+ print("\n" + "-"*60)
350
+ print("šŸ“Š SCENARIO 3: WITH QDRANT (Semantic cache + retrieval)")
351
+ print("-"*60)
352
+
353
+ scenario3_tokens = {
354
+ "input_tokens": 0,
355
+ "output_tokens": 0,
356
+ "total_tokens": 0,
357
+ "cache_hits": 0,
358
+ "cache_misses": 0,
359
+ "embedding_calls": 0,
360
+ "queries": []
361
+ }
362
+
363
+ for i, query in enumerate(TEST_QUERIES):
364
+ scenario3_tokens["embedding_calls"] += 1
365
+
366
+ # Check semantic cache first
367
+ cache_result = check_semantic_cache(query)
368
+
369
+ if cache_result:
370
+ # CACHE HIT - no LLM call needed!
371
+ scenario3_tokens["cache_hits"] += 1
372
+ input_tokens = 0 # No LLM input
373
+ output_tokens = 0 # No LLM output
374
+ cache_hit = True
375
+ print(f" āœ“ Query {i+1}: CACHE HIT (score: {cache_result['score']:.3f})")
376
+ else:
377
+ # CACHE MISS - call LLM with targeted context
378
+ scenario3_tokens["cache_misses"] += 1
379
+ targeted_prompt = f"{SKILL_CONTEXT}\n\nUser: {query}"
380
+ input_tokens = count_tokens(targeted_prompt)
381
+
382
+ response = simulate_llm_response(query)
383
+ output_tokens = count_tokens(response)
384
+ cache_hit = False
385
+
386
+ # Store in cache for future
387
+ store_in_cache(query, response)
388
+ print(f" ā—‹ Query {i+1}: Cache miss, stored for future")
389
+
390
+ scenario3_tokens["input_tokens"] += input_tokens
391
+ scenario3_tokens["output_tokens"] += output_tokens
392
+ scenario3_tokens["queries"].append({
393
+ "query": query,
394
+ "input_tokens": input_tokens,
395
+ "output_tokens": output_tokens,
396
+ "cache_hit": cache_hit
397
+ })
398
+
399
+ scenario3_tokens["total_tokens"] = scenario3_tokens["input_tokens"] + scenario3_tokens["output_tokens"]
400
+ results["scenarios"]["with_qdrant"] = scenario3_tokens
401
+
402
+ qdrant_savings = (1 - scenario3_tokens["total_tokens"] / scenario1_tokens["total_tokens"]) * 100
403
+ print(f"\n Total queries: {len(TEST_QUERIES)}")
404
+ print(f" Cache hits: {scenario3_tokens['cache_hits']} ({scenario3_tokens['cache_hits']/len(TEST_QUERIES)*100:.0f}%)")
405
+ print(f" Cache misses: {scenario3_tokens['cache_misses']}")
406
+ print(f" Input tokens: {scenario3_tokens['input_tokens']:,}")
407
+ print(f" Output tokens: {scenario3_tokens['output_tokens']:,}")
408
+ print(f" TOTAL TOKENS: {scenario3_tokens['total_tokens']:,}")
409
+ print(f" šŸ’° Savings vs No Cache: {qdrant_savings:.1f}%")
410
+
411
+ # =========================================================================
412
+ # SUMMARY
413
+ # =========================================================================
414
+ print("\n" + "="*60)
415
+ print("šŸ“ˆ BENCHMARK SUMMARY")
416
+ print("="*60)
417
+
418
+ print(f"""
419
+ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”
420
+ │ Scenario │ Tokens │ Savings vs Baseline │
421
+ ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
422
+ │ 1. No Cache (baseline) │ {scenario1_tokens['total_tokens']:>8,} │ - │
423
+ │ 2. With Skill │ {scenario2_tokens['total_tokens']:>8,} │ {skill_savings:>6.1f}% │
424
+ │ 3. With Qdrant │ {scenario3_tokens['total_tokens']:>8,} │ {qdrant_savings:>6.1f}% │
425
+ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜
426
+ """)
427
+
428
+ print(f"šŸŽÆ Qdrant Memory saved {qdrant_savings:.0f}% tokens compared to no caching!")
429
+ print(f" That's {scenario1_tokens['total_tokens'] - scenario3_tokens['total_tokens']:,} tokens saved in just {len(TEST_QUERIES)} queries.\n")
430
+
431
+ return results
432
+
433
+
434
+ def generate_visualization(results: Dict[str, Any], output_dir: str):
435
+ """Generate HTML visualization of benchmark results."""
436
+
437
+ os.makedirs(output_dir, exist_ok=True)
438
+
439
+ scenarios = results.get("scenarios", {})
440
+ no_cache = scenarios.get("no_cache", {})
441
+ with_skill = scenarios.get("with_skill", {})
442
+ with_qdrant = scenarios.get("with_qdrant", {})
443
+
444
+ html_content = f"""<!DOCTYPE html>
445
+ <html lang="en">
446
+ <head>
447
+ <meta charset="UTF-8">
448
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
449
+ <title>Qdrant Memory Benchmark Results</title>
450
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
451
+ <style>
452
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
453
+ body {{
454
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
455
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
456
+ color: #eee;
457
+ min-height: 100vh;
458
+ padding: 2rem;
459
+ }}
460
+ .container {{ max-width: 1200px; margin: 0 auto; }}
461
+ h1 {{
462
+ text-align: center;
463
+ margin-bottom: 2rem;
464
+ background: linear-gradient(90deg, #00d4ff, #7c3aed);
465
+ -webkit-background-clip: text;
466
+ -webkit-text-fill-color: transparent;
467
+ font-size: 2.5rem;
468
+ }}
469
+ .cards {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1.5rem; margin-bottom: 2rem; }}
470
+ .card {{
471
+ background: rgba(255,255,255,0.05);
472
+ backdrop-filter: blur(10px);
473
+ border-radius: 16px;
474
+ padding: 1.5rem;
475
+ border: 1px solid rgba(255,255,255,0.1);
476
+ }}
477
+ .card h3 {{ color: #00d4ff; margin-bottom: 1rem; }}
478
+ .stat {{ font-size: 2.5rem; font-weight: bold; margin: 0.5rem 0; }}
479
+ .stat.green {{ color: #10b981; }}
480
+ .stat.blue {{ color: #3b82f6; }}
481
+ .stat.purple {{ color: #8b5cf6; }}
482
+ .label {{ color: #888; font-size: 0.9rem; }}
483
+ .savings {{
484
+ display: inline-block;
485
+ background: linear-gradient(90deg, #10b981, #059669);
486
+ padding: 0.25rem 0.75rem;
487
+ border-radius: 20px;
488
+ font-weight: bold;
489
+ margin-top: 0.5rem;
490
+ }}
491
+ .chart-container {{
492
+ background: rgba(255,255,255,0.05);
493
+ backdrop-filter: blur(10px);
494
+ border-radius: 16px;
495
+ padding: 2rem;
496
+ margin-bottom: 2rem;
497
+ border: 1px solid rgba(255,255,255,0.1);
498
+ }}
499
+ .chart-title {{ color: #00d4ff; margin-bottom: 1rem; font-size: 1.25rem; }}
500
+ .meta {{ text-align: center; color: #666; margin-top: 2rem; font-size: 0.85rem; }}
501
+ </style>
502
+ </head>
503
+ <body>
504
+ <div class="container">
505
+ <h1>🧠 Qdrant Memory Benchmark</h1>
506
+
507
+ <div class="cards">
508
+ <div class="card">
509
+ <h3>šŸ“¦ No Cache (Baseline)</h3>
510
+ <div class="stat blue">{no_cache.get('total_tokens', 0):,}</div>
511
+ <div class="label">Total Tokens</div>
512
+ <p style="margin-top: 1rem; color: #888;">Full context sent every query</p>
513
+ </div>
514
+
515
+ <div class="card">
516
+ <h3>šŸŽÆ With Skill</h3>
517
+ <div class="stat purple">{with_skill.get('total_tokens', 0):,}</div>
518
+ <div class="label">Total Tokens</div>
519
+ <div class="savings">{((1 - with_skill.get('total_tokens', 1) / max(no_cache.get('total_tokens', 1), 1)) * 100):.0f}% saved</div>
520
+ </div>
521
+
522
+ <div class="card">
523
+ <h3>šŸš€ With Qdrant</h3>
524
+ <div class="stat green">{with_qdrant.get('total_tokens', 0):,}</div>
525
+ <div class="label">Total Tokens</div>
526
+ <div class="savings">{((1 - with_qdrant.get('total_tokens', 1) / max(no_cache.get('total_tokens', 1), 1)) * 100):.0f}% saved</div>
527
+ <p style="margin-top: 0.5rem; color: #10b981;">
528
+ {with_qdrant.get('cache_hits', 0)} cache hits / {results.get('queries_tested', 0)} queries
529
+ </p>
530
+ </div>
531
+ </div>
532
+
533
+ <div class="chart-container">
534
+ <div class="chart-title">Token Usage Comparison</div>
535
+ <canvas id="barChart" height="100"></canvas>
536
+ </div>
537
+
538
+ <div class="chart-container">
539
+ <div class="chart-title">Cache Performance</div>
540
+ <canvas id="pieChart" height="100"></canvas>
541
+ </div>
542
+
543
+ <div class="meta">
544
+ <p>Benchmark run: {results.get('timestamp', 'N/A')}</p>
545
+ <p>Embedding Provider: {results.get('embedding_provider', 'N/A')}</p>
546
+ <p>Queries Tested: {results.get('queries_tested', 0)}</p>
547
+ </div>
548
+ </div>
549
+
550
+ <script>
551
+ // Bar Chart - Token Comparison
552
+ new Chart(document.getElementById('barChart'), {{
553
+ type: 'bar',
554
+ data: {{
555
+ labels: ['No Cache', 'With Skill', 'With Qdrant'],
556
+ datasets: [{{
557
+ label: 'Input Tokens',
558
+ data: [{no_cache.get('input_tokens', 0)}, {with_skill.get('input_tokens', 0)}, {with_qdrant.get('input_tokens', 0)}],
559
+ backgroundColor: 'rgba(59, 130, 246, 0.8)',
560
+ borderRadius: 8
561
+ }}, {{
562
+ label: 'Output Tokens',
563
+ data: [{no_cache.get('output_tokens', 0)}, {with_skill.get('output_tokens', 0)}, {with_qdrant.get('output_tokens', 0)}],
564
+ backgroundColor: 'rgba(139, 92, 246, 0.8)',
565
+ borderRadius: 8
566
+ }}]
567
+ }},
568
+ options: {{
569
+ responsive: true,
570
+ scales: {{
571
+ x: {{ stacked: true, grid: {{ color: 'rgba(255,255,255,0.1)' }} }},
572
+ y: {{ stacked: true, grid: {{ color: 'rgba(255,255,255,0.1)' }} }}
573
+ }},
574
+ plugins: {{
575
+ legend: {{ labels: {{ color: '#eee' }} }}
576
+ }}
577
+ }}
578
+ }});
579
+
580
+ // Pie Chart - Cache Performance
581
+ new Chart(document.getElementById('pieChart'), {{
582
+ type: 'doughnut',
583
+ data: {{
584
+ labels: ['Cache Hits', 'Cache Misses'],
585
+ datasets: [{{
586
+ data: [{with_qdrant.get('cache_hits', 0)}, {with_qdrant.get('cache_misses', 0)}],
587
+ backgroundColor: ['rgba(16, 185, 129, 0.8)', 'rgba(239, 68, 68, 0.5)'],
588
+ borderWidth: 0
589
+ }}]
590
+ }},
591
+ options: {{
592
+ responsive: true,
593
+ plugins: {{
594
+ legend: {{ labels: {{ color: '#eee' }}, position: 'bottom' }}
595
+ }}
596
+ }}
597
+ }});
598
+ </script>
599
+ </body>
600
+ </html>
601
+ """
602
+
603
+ # Save HTML
604
+ html_path = os.path.join(output_dir, "benchmark_results.html")
605
+ with open(html_path, "w") as f:
606
+ f.write(html_content)
607
+
608
+ # Save JSON
609
+ json_path = os.path.join(output_dir, "benchmark_results.json")
610
+ with open(json_path, "w") as f:
611
+ json.dump(results, f, indent=2)
612
+
613
+ print(f"\nšŸ“Š Visualization saved to:")
614
+ print(f" HTML: {html_path}")
615
+ print(f" JSON: {json_path}")
616
+
617
+ return html_path
618
+
619
+
620
+ def main():
621
+ parser = argparse.ArgumentParser(description="Benchmark qdrant-memory token savings")
622
+ parser.add_argument("--visualize", action="store_true", help="Generate HTML visualization")
623
+ parser.add_argument("--output", default=OUTPUT_DIR, help="Output directory for results")
624
+ args = parser.parse_args()
625
+
626
+ results = run_benchmark()
627
+
628
+ if results and args.visualize:
629
+ html_path = generate_visualization(results, args.output)
630
+
631
+ # Try to open in browser
632
+ try:
633
+ import webbrowser
634
+ webbrowser.open(f"file://{os.path.abspath(html_path)}")
635
+ except:
636
+ pass
637
+
638
+
639
+ if __name__ == "__main__":
640
+ main()