@musashishao/agent-kit 1.2.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.agent/mcp-gateway/README.md +121 -0
  2. package/.agent/mcp-gateway/dist/index.d.ts +11 -0
  3. package/.agent/mcp-gateway/dist/index.js +504 -0
  4. package/.agent/mcp-gateway/dist/sync/debouncer.d.ts +56 -0
  5. package/.agent/mcp-gateway/dist/sync/debouncer.js +112 -0
  6. package/.agent/mcp-gateway/dist/sync/incremental_syncer.d.ts +58 -0
  7. package/.agent/mcp-gateway/dist/sync/incremental_syncer.js +172 -0
  8. package/.agent/mcp-gateway/dist/sync/index.d.ts +6 -0
  9. package/.agent/mcp-gateway/dist/sync/index.js +6 -0
  10. package/.agent/mcp-gateway/dist/sync/timestamp_checker.d.ts +69 -0
  11. package/.agent/mcp-gateway/dist/sync/timestamp_checker.js +169 -0
  12. package/.agent/mcp-gateway/package.json +28 -0
  13. package/.agent/mcp-gateway/src/index.ts +608 -0
  14. package/.agent/mcp-gateway/src/sync/debouncer.ts +129 -0
  15. package/.agent/mcp-gateway/src/sync/incremental_syncer.ts +237 -0
  16. package/.agent/mcp-gateway/src/sync/index.ts +7 -0
  17. package/.agent/mcp-gateway/src/sync/timestamp_checker.ts +194 -0
  18. package/.agent/scripts/ak_cli.py +549 -0
  19. package/.agent/scripts/setup_host.py +557 -0
  20. package/.agent/scripts/verify_install.py +174 -0
  21. package/.agent/skills/app-builder/SKILL.md +51 -1
  22. package/.agent/skills/app-builder/scripts/generate_ai_infra.py +510 -0
  23. package/.agent/skills/documentation-templates/SKILL.md +9 -1
  24. package/.agent/skills/documentation-templates/agents-template.md +202 -0
  25. package/.agent/skills/graph-mapper/SKILL.md +211 -0
  26. package/.agent/skills/graph-mapper/scripts/generate_graph.py +705 -0
  27. package/.agent/skills/rag-engineering/SKILL.md +342 -0
  28. package/.agent/skills/rag-engineering/chunking-strategies.md +229 -0
  29. package/.agent/skills/rag-engineering/contextual-retrieval.md +261 -0
  30. package/.agent/skills/rag-engineering/hybrid-search.md +356 -0
  31. package/.agent/skills/rag-engineering/scripts/chunk_code.py +916 -0
  32. package/.agent/templates/mcp_configs/claude_desktop.json +14 -0
  33. package/.agent/templates/mcp_configs/cursor.json +13 -0
  34. package/.agent/templates/mcp_configs/vscode.json +13 -0
  35. package/.agent/workflows/create.md +70 -2
  36. package/bin/cli.js +91 -0
  37. package/docs/AI_DATA_INFRASTRUCTURE.md +288 -0
  38. package/docs/CHANGELOG_AI_INFRA.md +111 -0
  39. package/docs/PLAN-universal-intelligence.md +48 -0
  40. package/package.json +7 -2
@@ -0,0 +1,261 @@
1
+ # Contextual Retrieval
2
+
3
+ > Anthropic's technique that reduces retrieval failures by 49%.
4
+
5
+ ---
6
+
7
+ ## The Problem
8
+
9
+ Standard chunks lack context:
10
+
11
+ ```typescript
12
+ // This chunk is meaningless in isolation
13
+ return amount * rate;
14
+ ```
15
+
16
+ Questions the AI can't answer:
17
+ - What is `amount`?
18
+ - Where does `rate` come from?
19
+ - What calls this code?
20
+
21
+ ---
22
+
23
+ ## The Solution
24
+
25
+ Add a **context summary** before embedding:
26
+
27
+ ```
28
+ [CONTEXT]
29
+ File: src/utils/tax.ts
30
+ Function: calculateTax
31
+ Purpose: Calculates tax amount for customer billing
32
+ Called by: ShippingService.calculateShipping(), InvoiceGenerator.generate()
33
+ Dependencies: TAX_RATE constant from ./constants.ts
34
+ [/CONTEXT]
35
+
36
+ function calculateTax(amount: number): number {
37
+ const rate = TAX_RATE;
38
+ return amount * rate;
39
+ }
40
+ ```
41
+
42
+ Now the embedding captures:
43
+ - File location
44
+ - Function purpose
45
+ - Callers and dependencies
46
+ - Business context
47
+
48
+ ---
49
+
50
+ ## Implementation
51
+
52
+ ### Step 1: Gather Context Sources
53
+
54
+ ```python
55
+ def gather_context(chunk: dict, project_info: dict, graph: dict) -> dict:
56
+ """Collect all context sources for a chunk."""
57
+
58
+ file_path = chunk['metadata']['file_path']
59
+ chunk_name = chunk['metadata'].get('name', 'unknown')
60
+
61
+ return {
62
+ 'file_path': file_path,
63
+ 'chunk_name': chunk_name,
64
+ 'file_purpose': get_file_purpose(file_path, project_info),
65
+ 'dependencies': graph.get_dependencies(file_path),
66
+ 'dependents': graph.get_dependents(file_path),
67
+ 'project_context': project_info.get('summary', ''),
68
+ }
69
+ ```
70
+
71
+ ### Step 2: Generate Context Summary
72
+
73
+ Option A: **Template-based** (Fast, deterministic)
74
+
75
+ ```python
76
+ def generate_context_template(ctx: dict) -> str:
77
+ """Generate context using templates."""
78
+
79
+ return f"""[CONTEXT]
80
+ File: {ctx['file_path']}
81
+ Name: {ctx['chunk_name']}
82
+ Purpose: {ctx['file_purpose']}
83
+ Imported by: {', '.join(ctx['dependents'][:5]) or 'None'}
84
+ Imports: {', '.join(ctx['dependencies'][:5]) or 'None'}
85
+ [/CONTEXT]"""
86
+ ```
87
+
88
+ Option B: **LLM-generated** (Rich, semantic)
89
+
90
+ ```python
91
+ def generate_context_llm(chunk: str, ctx: dict) -> str:
92
+ """Generate context using LLM."""
93
+
94
+ prompt = f"""Analyze this code chunk and write a 2-3 sentence summary.
95
+
96
+ File: {ctx['file_path']}
97
+ Project: {ctx['project_context']}
98
+ Dependencies: {ctx['dependencies']}
99
+ Used by: {ctx['dependents']}
100
+
101
+ Code:
102
+ ```
103
+ {chunk}
104
+ ```
105
+
106
+ Summary (2-3 sentences explaining what this code does and its role):"""
107
+
108
+ return llm.generate(prompt, max_tokens=100)
109
+ ```
110
+
111
+ ### Step 3: Combine Context + Content
112
+
113
+ ```python
114
+ def create_contextual_chunk(chunk: dict, context: str) -> dict:
115
+ """Create final chunk with context prepended."""
116
+
117
+ contextual_content = f"{context}\n\n{chunk['content']}"
118
+
119
+ return {
120
+ 'id': chunk['id'],
121
+ 'content': contextual_content,
122
+ 'original_content': chunk['content'],
123
+ 'context': context,
124
+ 'metadata': chunk['metadata']
125
+ }
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Context Sources (Priority Order)
131
+
132
+ | Source | Information | Priority |
133
+ |--------|-------------|----------|
134
+ | **File Path** | Location in project | P0 (Always) |
135
+ | **AGENTS.md** | Project overview | P0 (Always) |
136
+ | **Knowledge Graph** | Dependencies | P1 (If available) |
137
+ | **Docstrings/Comments** | Developer intent | P1 (If available) |
138
+ | **Git History** | Change context | P2 (Optional) |
139
+
140
+ ---
141
+
142
+ ## When to Use LLM vs Template
143
+
144
+ | Scenario | Approach | Reason |
145
+ |----------|----------|--------|
146
+ | Initial indexing | Template | Speed |
147
+ | High-value files | LLM | Quality |
148
+ | Simple utilities | Template | Overkill for LLM |
149
+ | Complex business logic | LLM | Need semantic understanding |
150
+
151
+ ---
152
+
153
+ ## Cost Optimization
154
+
155
+ LLM context generation is expensive. Optimize:
156
+
157
+ 1. **Cache aggressively** - Context rarely changes
158
+ 2. **Batch processing** - Send multiple chunks per request
159
+ 3. **Use smaller models** - Claude Haiku, GPT-3.5-turbo
160
+ 4. **Template for simple cases** - 80% template, 20% LLM
161
+
162
+ ### Cost Estimation
163
+
164
+ | Codebase Size | Chunks | LLM Calls | Est. Cost |
165
+ |---------------|--------|-----------|-----------|
166
+ | 10k lines | ~200 | 200 | ~$0.50 |
167
+ | 100k lines | ~2,000 | 2,000 | ~$5.00 |
168
+ | 1M lines | ~20,000 | 20,000 | ~$50.00 |
169
+
170
+ ---
171
+
172
+ ## Quality Measurement
173
+
174
+ ### Good Context Indicators
175
+
176
+ - [ ] Mentions file purpose
177
+ - [ ] Lists key dependencies
178
+ - [ ] Describes what code does (not just syntax)
179
+ - [ ] Under 100 tokens (concise)
180
+
181
+ ### Bad Context Examples
182
+
183
+ ❌ Too vague:
184
+ ```
185
+ [CONTEXT]
186
+ This is code from the project.
187
+ [/CONTEXT]
188
+ ```
189
+
190
+ ❌ Too long:
191
+ ```
192
+ [CONTEXT]
193
+ This function is located in src/utils/tax.ts which is part of the utilities
194
+ folder that contains various helper functions used throughout the application
195
+ for different purposes including but not limited to calculations, formatting,
196
+ validation, and data transformation. The specific function calculateTax...
197
+ (200 more tokens)
198
+ [/CONTEXT]
199
+ ```
200
+
201
+ ✅ Just right:
202
+ ```
203
+ [CONTEXT]
204
+ File: src/utils/tax.ts
205
+ Function calculateTax computes tax for billing. Used by ShippingService
206
+ and InvoiceGenerator. Depends on TAX_RATE from constants.
207
+ [/CONTEXT]
208
+ ```
209
+
210
+ ---
211
+
212
+ ## Integration with graph-mapper
213
+
214
+ Use Knowledge Graph data for richer context:
215
+
216
+ ```python
217
+ from graph_mapper import load_graph
218
+
219
+ def enrich_context_with_graph(chunk: dict, graph_path: str) -> dict:
220
+ """Add dependency info from graph."""
221
+
222
+ graph = load_graph(graph_path)
223
+ file_id = chunk['metadata']['file_path']
224
+
225
+ # Get direct dependencies
226
+ imports = graph.get_imports(file_id)
227
+
228
+ # Get files that import this
229
+ imported_by = graph.get_importers(file_id)
230
+
231
+ # Calculate impact score
232
+ impact = graph.calculate_impact(file_id)
233
+
234
+ return {
235
+ 'imports': imports,
236
+ 'imported_by': imported_by,
237
+ 'impact_score': impact['score'],
238
+ 'impact_files': impact['files'][:5]
239
+ }
240
+ ```
241
+
242
+ ---
243
+
244
+ ## Retrieval Impact
245
+
246
+ ### Without Contextual Retrieval
247
+
248
+ Query: "How is tax calculated for shipping?"
249
+
250
+ Results: Random code mentioning "tax" or "shipping"
251
+ - ❌ `const TAX = 0.1;` (irrelevant constant)
252
+ - ❌ `// TODO: add tax` (comment)
253
+ - ⚠️ `calculateTax(...)` (correct but no context)
254
+
255
+ ### With Contextual Retrieval
256
+
257
+ Results: Properly contextualized chunks
258
+ - ✅ `[calculateTax used by ShippingService] function calculateTax...`
259
+ - ✅ `[ShippingService imports calculateTax] class ShippingService...`
260
+
261
+ **Improvement: 35-49% reduction in failed retrievals** (Anthropic research)
@@ -0,0 +1,356 @@
1
+ # Hybrid Search
2
+
3
+ > Combine semantic (vector) and keyword (BM25) search for best results.
4
+
5
+ ---
6
+
7
+ ## Why Hybrid?
8
+
9
+ | Query Type | Vector Search | BM25 Search | Winner |
10
+ |------------|---------------|-------------|--------|
11
+ | "How to handle user authentication" | ✅ Great | ⚠️ Okay | Vector |
12
+ | "Error 503" | ❌ Poor | ✅ Great | BM25 |
13
+ | "calculateTax function" | ⚠️ Okay | ✅ Great | BM25 |
14
+ | "similar to login flow" | ✅ Great | ❌ Poor | Vector |
15
+
16
+ **Hybrid = Best of both worlds**
17
+
18
+ ---
19
+
20
+ ## Architecture
21
+
22
+ ```
23
+ ┌─────────────────┐
24
+ │ Query │
25
+ └────────┬────────┘
26
+
27
+ ┌──────────────┴──────────────┐
28
+ │ │
29
+ ▼ ▼
30
+ ┌─────────────────┐ ┌─────────────────┐
31
+ │ Vector Search │ │ BM25 Search │
32
+ │ (Semantic) │ │ (Keyword) │
33
+ └────────┬────────┘ └────────┬────────┘
34
+ │ │
35
+ │ Top 50 results each │
36
+ │ │
37
+ └──────────────┬──────────────┘
38
+
39
+
40
+ ┌─────────────────┐
41
+ │ Fusion (RRF) │
42
+ │ Combine ranks │
43
+ └────────┬────────┘
44
+
45
+
46
+ ┌─────────────────┐
47
+ │ Reranker │
48
+ │ Top 10 │
49
+ └────────┬────────┘
50
+
51
+
52
+ ┌─────────────────┐
53
+ │ Final Results │
54
+ └─────────────────┘
55
+ ```
56
+
57
+ ---
58
+
59
+ ## Implementation
60
+
61
+ ### Step 1: Set Up Both Indexes
62
+
63
+ ```python
64
+ import chromadb
65
+ from rank_bm25 import BM25Okapi
66
+
67
+ class HybridSearchEngine:
68
+ def __init__(self, chunks: list[dict]):
69
+ # Vector index (ChromaDB)
70
+ self.chroma_client = chromadb.Client()
71
+ self.collection = self.chroma_client.create_collection("code_chunks")
72
+
73
+ # Add chunks to vector DB
74
+ self.collection.add(
75
+ documents=[c['content'] for c in chunks],
76
+ ids=[c['id'] for c in chunks],
77
+ metadatas=[c['metadata'] for c in chunks]
78
+ )
79
+
80
+ # BM25 index
81
+ tokenized_chunks = [c['content'].split() for c in chunks]
82
+ self.bm25 = BM25Okapi(tokenized_chunks)
83
+ self.chunk_ids = [c['id'] for c in chunks]
84
+ self.chunks = {c['id']: c for c in chunks}
85
+ ```
86
+
87
+ ### Step 2: Implement Search Methods
88
+
89
+ ```python
90
+ def vector_search(self, query: str, top_k: int = 50) -> list[tuple]:
91
+ """Semantic search using embeddings."""
92
+ results = self.collection.query(
93
+ query_texts=[query],
94
+ n_results=top_k
95
+ )
96
+
97
+ # Return [(id, score), ...]
98
+ return list(zip(
99
+ results['ids'][0],
100
+ results['distances'][0]
101
+ ))
102
+
103
+ def bm25_search(self, query: str, top_k: int = 50) -> list[tuple]:
104
+ """Keyword search using BM25."""
105
+ tokenized_query = query.split()
106
+ scores = self.bm25.get_scores(tokenized_query)
107
+
108
+ # Get top k
109
+ top_indices = sorted(
110
+ range(len(scores)),
111
+ key=lambda i: scores[i],
112
+ reverse=True
113
+ )[:top_k]
114
+
115
+ return [(self.chunk_ids[i], scores[i]) for i in top_indices]
116
+ ```
117
+
118
+ ### Step 3: Reciprocal Rank Fusion
119
+
120
+ ```python
121
+ def reciprocal_rank_fusion(
122
+ self,
123
+ results_list: list[list[tuple]],
124
+ k: int = 60
125
+ ) -> list[tuple]:
126
+ """
127
+ Combine multiple ranked lists using RRF.
128
+
129
+ Formula: score(d) = Σ 1/(k + rank(d))
130
+
131
+ Args:
132
+ results_list: List of [(id, score), ...] for each search method
133
+ k: Constant to prevent high scores (default 60)
134
+ """
135
+ scores = {}
136
+
137
+ for results in results_list:
138
+ for rank, (doc_id, _) in enumerate(results):
139
+ if doc_id not in scores:
140
+ scores[doc_id] = 0
141
+ scores[doc_id] += 1 / (k + rank + 1)
142
+
143
+ # Sort by combined score
144
+ sorted_results = sorted(
145
+ scores.items(),
146
+ key=lambda x: x[1],
147
+ reverse=True
148
+ )
149
+
150
+ return sorted_results
151
+ ```
152
+
153
+ ### Step 4: Hybrid Search Method
154
+
155
+ ```python
156
+ def hybrid_search(
157
+ self,
158
+ query: str,
159
+ top_k: int = 20,
160
+ vector_weight: float = 0.5
161
+ ) -> list[dict]:
162
+ """
163
+ Perform hybrid search combining vector and BM25.
164
+
165
+ Args:
166
+ query: Search query
167
+ top_k: Number of results to return
168
+ vector_weight: Weight for vector search (0-1)
169
+ """
170
+ # Get results from both methods
171
+ vector_results = self.vector_search(query, top_k=50)
172
+ bm25_results = self.bm25_search(query, top_k=50)
173
+
174
+ # Fuse results
175
+ fused = self.reciprocal_rank_fusion([vector_results, bm25_results])
176
+
177
+ # Get top k chunks
178
+ top_ids = [doc_id for doc_id, score in fused[:top_k]]
179
+
180
+ return [self.chunks[doc_id] for doc_id in top_ids]
181
+ ```
182
+
183
+ ---
184
+
185
+ ## Weighting Strategies
186
+
187
+ ### Fixed Weights
188
+
189
+ ```python
190
+ # Equal weight
191
+ vector_weight = 0.5
192
+ bm25_weight = 0.5
193
+
194
+ # Semantic-heavy (for conceptual queries)
195
+ vector_weight = 0.7
196
+ bm25_weight = 0.3
197
+
198
+ # Keyword-heavy (for exact matches)
199
+ vector_weight = 0.3
200
+ bm25_weight = 0.7
201
+ ```
202
+
203
+ ### Dynamic Weights
204
+
205
+ ```python
206
+ def determine_weights(query: str) -> tuple[float, float]:
207
+ """Dynamically adjust weights based on query type."""
208
+
209
+ # Check for exact identifiers (function names, error codes)
210
+ has_identifier = bool(re.search(r'[A-Z][a-z]+[A-Z]|Error\s*\d+|_\w+_', query))
211
+
212
+ # Check for conceptual language
213
+ conceptual_words = {'how', 'what', 'why', 'similar', 'like', 'related'}
214
+ is_conceptual = any(word in query.lower() for word in conceptual_words)
215
+
216
+ if has_identifier:
217
+ return (0.3, 0.7) # Favor BM25
218
+ elif is_conceptual:
219
+ return (0.7, 0.3) # Favor Vector
220
+ else:
221
+ return (0.5, 0.5) # Equal
222
+ ```
223
+
224
+ ---
225
+
226
+ ## Alternative: Weighted Score Fusion
227
+
228
+ Instead of RRF, use weighted scores directly:
229
+
230
+ ```python
231
+ def weighted_score_fusion(
232
+ vector_results: list[tuple],
233
+ bm25_results: list[tuple],
234
+ vector_weight: float = 0.5
235
+ ) -> list[tuple]:
236
+ """Combine using normalized weighted scores."""
237
+
238
+ # Normalize scores to 0-1 range
239
+ def normalize(results):
240
+ if not results:
241
+ return {}
242
+ scores = [s for _, s in results]
243
+ min_s, max_s = min(scores), max(scores)
244
+ range_s = max_s - min_s or 1
245
+ return {
246
+ doc_id: (score - min_s) / range_s
247
+ for doc_id, score in results
248
+ }
249
+
250
+ vector_norm = normalize(vector_results)
251
+ bm25_norm = normalize(bm25_results)
252
+
253
+ # Combine weighted scores
254
+ all_ids = set(vector_norm.keys()) | set(bm25_norm.keys())
255
+ combined = {}
256
+
257
+ for doc_id in all_ids:
258
+ v_score = vector_norm.get(doc_id, 0) * vector_weight
259
+ b_score = bm25_norm.get(doc_id, 0) * (1 - vector_weight)
260
+ combined[doc_id] = v_score + b_score
261
+
262
+ return sorted(combined.items(), key=lambda x: x[1], reverse=True)
263
+ ```
264
+
265
+ ---
266
+
267
+ ## Performance Optimization
268
+
269
+ ### Caching
270
+
271
+ ```python
272
+ from functools import lru_cache
273
+
274
+ @lru_cache(maxsize=1000)
275
+ def cached_search(query: str, top_k: int = 20) -> tuple:
276
+ """Cache frequent queries."""
277
+ results = hybrid_search(query, top_k)
278
+ return tuple(r['id'] for r in results)
279
+ ```
280
+
281
+ ### Batch Queries
282
+
283
+ ```python
284
+ def batch_hybrid_search(queries: list[str], top_k: int = 20) -> list[list[dict]]:
285
+ """Process multiple queries efficiently."""
286
+
287
+ # Batch vector search
288
+ vector_results = collection.query(
289
+ query_texts=queries,
290
+ n_results=50
291
+ )
292
+
293
+ # BM25 for each (can't batch easily)
294
+ bm25_results = [bm25_search(q, 50) for q in queries]
295
+
296
+ # Fuse each
297
+ return [
298
+ reciprocal_rank_fusion([vector_results[i], bm25_results[i]])[:top_k]
299
+ for i in range(len(queries))
300
+ ]
301
+ ```
302
+
303
+ ---
304
+
305
+ ## Evaluation Metrics
306
+
307
+ ### Hit Rate @ K
308
+
309
+ ```python
310
+ def hit_rate(queries: list, ground_truth: list, k: int = 10) -> float:
311
+ """Percentage of queries where correct answer is in top K."""
312
+ hits = 0
313
+ for query, expected in zip(queries, ground_truth):
314
+ results = hybrid_search(query, top_k=k)
315
+ result_ids = [r['id'] for r in results]
316
+ if expected in result_ids:
317
+ hits += 1
318
+ return hits / len(queries)
319
+ ```
320
+
321
+ ### Mean Reciprocal Rank (MRR)
322
+
323
+ ```python
324
+ def mrr(queries: list, ground_truth: list) -> float:
325
+ """Average of 1/rank of first correct result."""
326
+ reciprocal_ranks = []
327
+ for query, expected in zip(queries, ground_truth):
328
+ results = hybrid_search(query, top_k=100)
329
+ for rank, result in enumerate(results, 1):
330
+ if result['id'] == expected:
331
+ reciprocal_ranks.append(1 / rank)
332
+ break
333
+ else:
334
+ reciprocal_ranks.append(0)
335
+ return sum(reciprocal_ranks) / len(reciprocal_ranks)
336
+ ```
337
+
338
+ ---
339
+
340
+ ## Quick Setup (Copy-Paste Ready)
341
+
342
+ ```python
343
+ # Install dependencies
344
+ # pip install chromadb rank-bm25
345
+
346
+ import chromadb
347
+ from rank_bm25 import BM25Okapi
348
+
349
+ def create_hybrid_engine(chunks):
350
+ """One-liner to create hybrid search engine."""
351
+ return HybridSearchEngine(chunks)
352
+
353
+ # Usage
354
+ engine = create_hybrid_engine(my_chunks)
355
+ results = engine.hybrid_search("how to calculate tax", top_k=10)
356
+ ```