tribunal-kit 4.4.3 → 4.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/scripts/marathon_harness.js +115 -18
- package/.agent/scripts/prompt_compiler.js +54 -23
- package/.agent/skills/advanced-rag-pipelines/SKILL.md +47 -0
- package/.agent/skills/browser-native-ai/SKILL.md +52 -0
- package/.agent/skills/generative-ui-expert/SKILL.md +77 -0
- package/.agent/skills/webgpu-performance/SKILL.md +67 -0
- package/README.md +113 -242
- package/bin/tribunal-kit.js +2 -2
- package/bin/wrapper.js +98 -0
- package/package.json +4 -3
- package/scripts/postinstall.js +127 -0
- package/.agent/scripts/append_flow.js +0 -72
- package/.agent/scripts/colors.js +0 -11
- package/.agent/scripts/compress_skills.js +0 -141
- package/.agent/scripts/consolidate_skills.js +0 -149
- package/.agent/scripts/deep_compress.js +0 -150
- package/.agent/scripts/patch_skills_meta.js +0 -156
- package/.agent/scripts/patch_skills_output.js +0 -244
- package/.agent/scripts/strip_tribunal.js +0 -47
- package/.agent/scripts/utils.js +0 -17
|
@@ -39,7 +39,7 @@ const ARCHIVE_DIR = path.join(MARATHON_DIR, 'archive');
|
|
|
39
39
|
|
|
40
40
|
const VALID_COMMANDS = new Set([
|
|
41
41
|
'init', 'status', 'next', 'mark', 'log',
|
|
42
|
-
'session-start', 'session-end', 'reset', 'add-feature'
|
|
42
|
+
'session-start', 'session-end', 'reset', 'add-feature', 'distill'
|
|
43
43
|
]);
|
|
44
44
|
|
|
45
45
|
// ── Schema Defaults ──────────────────────────────────────────────────────────
|
|
@@ -153,25 +153,50 @@ function getGitBranch() {
|
|
|
153
153
|
// ── Progress Helpers ─────────────────────────────────────────────────────────
|
|
154
154
|
|
|
155
155
|
/**
|
|
156
|
-
* Count passing features.
|
|
156
|
+
* Count passing features and blocked features.
|
|
157
157
|
* @param {object} featureList
|
|
158
|
-
* @returns {{ total: number, passing: number, failing: number }}
|
|
158
|
+
* @returns {{ total: number, passing: number, failing: number, blocked: number }}
|
|
159
159
|
*/
|
|
160
160
|
function countFeatures(featureList) {
|
|
161
161
|
const features = featureList.features || [];
|
|
162
162
|
const total = features.length;
|
|
163
163
|
const passing = features.filter(f => f.passes === true).length;
|
|
164
|
-
|
|
164
|
+
let blocked = 0;
|
|
165
|
+
|
|
166
|
+
features.forEach(f => {
|
|
167
|
+
if (!f.passes && f.dependencies && f.dependencies.length > 0) {
|
|
168
|
+
const allPassed = f.dependencies.every(depId => {
|
|
169
|
+
const dep = features.find(d => d.id === depId);
|
|
170
|
+
return dep && dep.passes === true;
|
|
171
|
+
});
|
|
172
|
+
if (!allPassed) blocked++;
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
return { total, passing, failing: total - passing, blocked };
|
|
165
177
|
}
|
|
166
178
|
|
|
167
179
|
/**
|
|
168
|
-
* Get the next unfinished feature.
|
|
180
|
+
* Get the next unfinished, unblocked feature.
|
|
169
181
|
* @param {object} featureList
|
|
170
182
|
* @returns {object|null}
|
|
171
183
|
*/
|
|
172
184
|
function getNextFeature(featureList) {
|
|
173
185
|
const features = featureList.features || [];
|
|
174
|
-
return features.find(f =>
|
|
186
|
+
return features.find(f => {
|
|
187
|
+
if (f.passes === true) return false;
|
|
188
|
+
|
|
189
|
+
// Check dependencies (DAG)
|
|
190
|
+
if (f.dependencies && f.dependencies.length > 0) {
|
|
191
|
+
const allPassed = f.dependencies.every(depId => {
|
|
192
|
+
const dep = features.find(d => d.id === depId);
|
|
193
|
+
return dep && dep.passes === true;
|
|
194
|
+
});
|
|
195
|
+
if (!allPassed) return false; // Feature is blocked
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return true;
|
|
199
|
+
}) || null;
|
|
175
200
|
}
|
|
176
201
|
|
|
177
202
|
/**
|
|
@@ -240,8 +265,9 @@ function cmdInit(spec) {
|
|
|
240
265
|
* @param {string} category
|
|
241
266
|
* @param {string} description
|
|
242
267
|
* @param {string[]} steps
|
|
268
|
+
* @param {number[]} deps
|
|
243
269
|
*/
|
|
244
|
-
function cmdAddFeature(category, description, steps) {
|
|
270
|
+
function cmdAddFeature(category, description, steps, deps = []) {
|
|
245
271
|
if (!isActive()) {
|
|
246
272
|
console.error(`${RED}❌ No active marathon. Run ${CYAN}init${RED} first.${RESET}`);
|
|
247
273
|
process.exit(1);
|
|
@@ -264,6 +290,9 @@ function cmdAddFeature(category, description, steps) {
|
|
|
264
290
|
category: category.toLowerCase(),
|
|
265
291
|
description,
|
|
266
292
|
steps: steps.length > 0 ? steps : ['Implement and verify'],
|
|
293
|
+
dependencies: deps,
|
|
294
|
+
attempts: 0,
|
|
295
|
+
failureReasons: [],
|
|
267
296
|
passes: false,
|
|
268
297
|
sessionCompleted: null
|
|
269
298
|
};
|
|
@@ -289,7 +318,7 @@ function cmdStatus() {
|
|
|
289
318
|
const progress = readJSON(PROGRESS_FILE);
|
|
290
319
|
if (!featureList || !progress) return;
|
|
291
320
|
|
|
292
|
-
const { total, passing, failing } = countFeatures(featureList);
|
|
321
|
+
const { total, passing, failing, blocked } = countFeatures(featureList);
|
|
293
322
|
const nextFeature = getNextFeature(featureList);
|
|
294
323
|
const sessions = progress.sessions || [];
|
|
295
324
|
const lastSession = sessions[sessions.length - 1] || null;
|
|
@@ -303,7 +332,8 @@ function cmdStatus() {
|
|
|
303
332
|
console.log();
|
|
304
333
|
|
|
305
334
|
// ── Progress Bar ──
|
|
306
|
-
|
|
335
|
+
const blockedInfo = blocked > 0 ? ` (${YELLOW}${blocked} blocked${RESET})` : '';
|
|
336
|
+
console.log(` ${BOLD}Progress:${RESET} ${progressBar(passing, total)} ${GREEN}${passing}${RESET}/${total} features${blockedInfo}`);
|
|
307
337
|
console.log();
|
|
308
338
|
|
|
309
339
|
// ── Category Breakdown ──
|
|
@@ -382,7 +412,12 @@ function cmdNext() {
|
|
|
382
412
|
const nextFeature = getNextFeature(featureList);
|
|
383
413
|
|
|
384
414
|
if (!nextFeature) {
|
|
385
|
-
|
|
415
|
+
if (passing === total) {
|
|
416
|
+
console.log(`${GREEN}${BOLD}🎉 All ${total} features are passing! Marathon complete.${RESET}`);
|
|
417
|
+
} else {
|
|
418
|
+
console.log(`${RED}${BOLD}⚠️ Deadlock detected: ${total - passing} features remain, but all are blocked by failing dependencies.${RESET}`);
|
|
419
|
+
console.log(` ${DIM}Check 'status' and use 'mark <id> pass' to resolve dependencies.${RESET}`);
|
|
420
|
+
}
|
|
386
421
|
return;
|
|
387
422
|
}
|
|
388
423
|
|
|
@@ -400,6 +435,14 @@ function cmdNext() {
|
|
|
400
435
|
console.log();
|
|
401
436
|
}
|
|
402
437
|
|
|
438
|
+
if (nextFeature.failureReasons && nextFeature.failureReasons.length > 0) {
|
|
439
|
+
console.log(` ${RED}${BOLD}Previous Failures (${nextFeature.attempts} attempts):${RESET}`);
|
|
440
|
+
for (const reason of nextFeature.failureReasons) {
|
|
441
|
+
console.log(` ${DIM}* ${reason}${RESET}`);
|
|
442
|
+
}
|
|
443
|
+
console.log();
|
|
444
|
+
}
|
|
445
|
+
|
|
403
446
|
console.log(` ${DIM}When done: marathon_harness.js mark ${nextFeature.id} pass${RESET}`);
|
|
404
447
|
console.log();
|
|
405
448
|
}
|
|
@@ -408,8 +451,9 @@ function cmdNext() {
|
|
|
408
451
|
* Mark a feature as passing or failing.
|
|
409
452
|
* @param {number} id
|
|
410
453
|
* @param {string} verdict - 'pass' or 'fail'
|
|
454
|
+
* @param {string} [reason] - Reason for failure
|
|
411
455
|
*/
|
|
412
|
-
function cmdMark(id, verdict) {
|
|
456
|
+
function cmdMark(id, verdict, reason) {
|
|
413
457
|
if (!isActive()) {
|
|
414
458
|
console.error(`${RED}❌ No active marathon.${RESET}`);
|
|
415
459
|
process.exit(1);
|
|
@@ -437,6 +481,14 @@ function cmdMark(id, verdict) {
|
|
|
437
481
|
feature.passes = newPasses;
|
|
438
482
|
feature.sessionCompleted = newPasses ? new Date().toISOString() : null;
|
|
439
483
|
|
|
484
|
+
if (!newPasses) {
|
|
485
|
+
feature.attempts = (feature.attempts || 0) + 1;
|
|
486
|
+
if (reason) {
|
|
487
|
+
if (!feature.failureReasons) feature.failureReasons = [];
|
|
488
|
+
feature.failureReasons.push(`Attempt ${feature.attempts}: ${reason}`);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
440
492
|
writeJSON(FEATURE_LIST_FILE, featureList);
|
|
441
493
|
|
|
442
494
|
const { total, passing } = countFeatures(featureList);
|
|
@@ -482,6 +534,30 @@ function cmdLog(message) {
|
|
|
482
534
|
ok(`Logged: ${message}`);
|
|
483
535
|
}
|
|
484
536
|
|
|
537
|
+
/**
|
|
538
|
+
* Distill a lesson learned into memory context.
|
|
539
|
+
* @param {string} lesson
|
|
540
|
+
*/
|
|
541
|
+
function cmdDistill(lesson) {
|
|
542
|
+
if (!isActive()) {
|
|
543
|
+
console.error(`${RED}❌ No active marathon.${RESET}`);
|
|
544
|
+
process.exit(1);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
if (!lesson) {
|
|
548
|
+
console.error(`${RED}❌ Lesson required. Usage: distill "Your architectural lesson"${RESET}`);
|
|
549
|
+
process.exit(1);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
ensureDir();
|
|
553
|
+
const DISTILL_FILE = path.join(MARATHON_DIR, 'distilled_context.md');
|
|
554
|
+
const timestamp = new Date().toISOString().slice(0, 16);
|
|
555
|
+
const entry = `- [${timestamp}] ${lesson}\n`;
|
|
556
|
+
|
|
557
|
+
fs.appendFileSync(DISTILL_FILE, entry, 'utf8');
|
|
558
|
+
ok(`Distilled memory saved: ${lesson}`);
|
|
559
|
+
}
|
|
560
|
+
|
|
485
561
|
/**
|
|
486
562
|
* Start a new session — reads state, shows bearings.
|
|
487
563
|
*/
|
|
@@ -567,7 +643,11 @@ function cmdSessionStart() {
|
|
|
567
643
|
}
|
|
568
644
|
}
|
|
569
645
|
} else {
|
|
570
|
-
|
|
646
|
+
if (passing === total) {
|
|
647
|
+
console.log(` ${GREEN}${BOLD}🎉 All features passing! Nothing to implement.${RESET}`);
|
|
648
|
+
} else {
|
|
649
|
+
console.log(` ${RED}${BOLD}⚠️ Deadlock: ${total - passing} features are blocked by failing dependencies.${RESET}`);
|
|
650
|
+
}
|
|
571
651
|
}
|
|
572
652
|
console.log();
|
|
573
653
|
|
|
@@ -717,11 +797,12 @@ function showHelp() {
|
|
|
717
797
|
cmd('status', 'Show progress dashboard');
|
|
718
798
|
cmd('next', 'Show the next unfinished feature');
|
|
719
799
|
cmd('mark <id> pass', 'Mark a feature as passing');
|
|
720
|
-
cmd('mark <id> fail', 'Mark a feature as failing');
|
|
800
|
+
cmd('mark <id> fail', 'Mark a feature as failing (optional: "reason")');
|
|
721
801
|
cmd('log "note"', 'Add a timestamped progress note');
|
|
802
|
+
cmd('distill "rule"', 'Save an architectural rule or lesson to memory');
|
|
722
803
|
cmd('session-start', 'Begin a new work session (reads state, shows bearings)');
|
|
723
804
|
cmd('session-end', 'End session with optional summary');
|
|
724
|
-
cmd('add-feature', 'Add a feature
|
|
805
|
+
cmd('add-feature', 'Add a feature (supports --deps=1,2,3 for DAG dependencies)');
|
|
725
806
|
cmd('reset', 'Archive current marathon and start fresh');
|
|
726
807
|
console.log();
|
|
727
808
|
}
|
|
@@ -759,11 +840,12 @@ function main() {
|
|
|
759
840
|
case 'mark': {
|
|
760
841
|
const id = parseInt(args[1], 10);
|
|
761
842
|
const verdict = (args[2] || '').toLowerCase();
|
|
843
|
+
const reason = args.slice(3).join(' ').trim();
|
|
762
844
|
if (isNaN(id)) {
|
|
763
|
-
console.error(`${RED}❌ Feature ID required. Usage: mark <id> pass|fail${RESET}`);
|
|
845
|
+
console.error(`${RED}❌ Feature ID required. Usage: mark <id> pass|fail "reason"${RESET}`);
|
|
764
846
|
process.exit(1);
|
|
765
847
|
}
|
|
766
|
-
cmdMark(id, verdict);
|
|
848
|
+
cmdMark(id, verdict, reason);
|
|
767
849
|
break;
|
|
768
850
|
}
|
|
769
851
|
case 'log': {
|
|
@@ -782,8 +864,23 @@ function main() {
|
|
|
782
864
|
case 'add-feature': {
|
|
783
865
|
const category = args[1] || '';
|
|
784
866
|
const description = args[2] || '';
|
|
785
|
-
|
|
786
|
-
|
|
867
|
+
let steps = args.slice(3);
|
|
868
|
+
let deps = [];
|
|
869
|
+
|
|
870
|
+
steps = steps.filter(step => {
|
|
871
|
+
if (step.startsWith('--deps=')) {
|
|
872
|
+
deps = step.replace('--deps=', '').split(',').map(Number).filter(n => !isNaN(n));
|
|
873
|
+
return false;
|
|
874
|
+
}
|
|
875
|
+
return true;
|
|
876
|
+
});
|
|
877
|
+
|
|
878
|
+
cmdAddFeature(category, description, steps, deps);
|
|
879
|
+
break;
|
|
880
|
+
}
|
|
881
|
+
case 'distill': {
|
|
882
|
+
const lesson = args.slice(1).join(' ').trim();
|
|
883
|
+
cmdDistill(lesson);
|
|
787
884
|
break;
|
|
788
885
|
}
|
|
789
886
|
case 'reset':
|
|
@@ -8,30 +8,13 @@ if (!rawInput) {
|
|
|
8
8
|
process.exit(1);
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
-
// 1.
|
|
12
|
-
|
|
13
|
-
.replace(/hey,? /gi, '')
|
|
14
|
-
.replace(/can you /gi, '')
|
|
15
|
-
.replace(/could you /gi, '')
|
|
16
|
-
.replace(/please /gi, '')
|
|
17
|
-
.replace(/i want to /gi, '')
|
|
18
|
-
.replace(/i need you to /gi, '')
|
|
19
|
-
.replace(/for me/gi, '')
|
|
20
|
-
.replace(/would it be possible to /gi, '')
|
|
21
|
-
.trim();
|
|
11
|
+
// 1. Keep original input, only optionally strip leading "please" for action matching
|
|
12
|
+
const cleanInput = rawInput.trim();
|
|
22
13
|
|
|
23
14
|
// 2. Extract Action (Intent mapping)
|
|
24
|
-
const actionMatch = cleanInput.match(/^(build|create|fix|debug|refactor|update|write|design|audit)\b/i);
|
|
15
|
+
const actionMatch = cleanInput.match(/^(?:(?:hey,?\s*|please\s+|can you\s+|could you\s+|would you\s+|i need you to\s+|i want to\s+)*)(build|create|fix|debug|refactor|update|write|design|audit)\b/i);
|
|
25
16
|
const action = actionMatch ? actionMatch[1].toLowerCase() : 'execute';
|
|
26
17
|
|
|
27
|
-
// Remove the action from the target string
|
|
28
|
-
if (actionMatch) {
|
|
29
|
-
cleanInput = cleanInput.substring(actionMatch[0].length).trim();
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// Strip leading articles
|
|
33
|
-
cleanInput = cleanInput.replace(/^(a|an|some)\s+/i, '');
|
|
34
|
-
|
|
35
18
|
// 3. Extract Technology Stack
|
|
36
19
|
const techKeywords = [
|
|
37
20
|
'react', 'tailwind', 'next.js', 'sql', 'postgres', 'express',
|
|
@@ -41,16 +24,64 @@ const techKeywords = [
|
|
|
41
24
|
|
|
42
25
|
const stack = [];
|
|
43
26
|
techKeywords.forEach(tech => {
|
|
44
|
-
// Use word boundaries to prevent partial matches
|
|
45
27
|
const regex = new RegExp(`\\b${tech.replace('.', '\\.')}\\b`, 'i');
|
|
46
28
|
if (regex.test(cleanInput)) {
|
|
47
29
|
stack.push(tech.toLowerCase());
|
|
48
30
|
}
|
|
49
31
|
});
|
|
50
32
|
|
|
51
|
-
// 4.
|
|
33
|
+
// 4. Intelligent Pre-Routing
|
|
34
|
+
const routerMap = {
|
|
35
|
+
'react': ['react-specialist', 'frontend-design'],
|
|
36
|
+
'tailwind': ['tailwind-patterns'],
|
|
37
|
+
'next.js': ['nextjs-react-expert', 'react-specialist'],
|
|
38
|
+
'sql': ['sql-pro', 'database-design'],
|
|
39
|
+
'postgres': ['database-design'],
|
|
40
|
+
'express': ['nodejs-best-practices'],
|
|
41
|
+
'python': ['python-pro'],
|
|
42
|
+
'node': ['nodejs-best-practices'],
|
|
43
|
+
'vue': ['vue-expert'],
|
|
44
|
+
'svelte': ['frontend-design'],
|
|
45
|
+
'typescript': ['typescript-advanced'],
|
|
46
|
+
'js': ['clean-code'],
|
|
47
|
+
'css': ['tailwind-patterns'],
|
|
48
|
+
'html': ['frontend-design'],
|
|
49
|
+
'prisma': ['database-design'],
|
|
50
|
+
'drizzle': ['database-design']
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const actionRouter = {
|
|
54
|
+
'build': ['architecture'],
|
|
55
|
+
'create': ['architecture'],
|
|
56
|
+
'fix': ['systematic-debugging'],
|
|
57
|
+
'debug': ['systematic-debugging'],
|
|
58
|
+
'refactor': ['clean-code'],
|
|
59
|
+
'update': ['clean-code'],
|
|
60
|
+
'write': ['clean-code'],
|
|
61
|
+
'design': ['frontend-design'],
|
|
62
|
+
'audit': ['vulnerability-scanner', 'lint-and-validate']
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const recommendedSkills = new Set();
|
|
66
|
+
|
|
67
|
+
if (actionRouter[action]) {
|
|
68
|
+
actionRouter[action].forEach(s => recommendedSkills.add(s));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
stack.forEach(tech => {
|
|
72
|
+
if (routerMap[tech]) {
|
|
73
|
+
routerMap[tech].forEach(s => recommendedSkills.add(s));
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const finalSkills = Array.from(recommendedSkills).slice(0, 3);
|
|
78
|
+
|
|
79
|
+
// 5. Output highly compressed YAML
|
|
52
80
|
console.log('---');
|
|
53
81
|
console.log(`action: ${action}`);
|
|
54
|
-
console.log(`target:
|
|
82
|
+
console.log(`target: |`);
|
|
83
|
+
const indentedTarget = cleanInput.split('\n').map(line => ' ' + line).join('\n');
|
|
84
|
+
console.log(indentedTarget);
|
|
55
85
|
console.log(`stack: [${stack.join(', ')}]`);
|
|
86
|
+
console.log(`recommended_skills: [${finalSkills.join(', ')}]`);
|
|
56
87
|
console.log('---');
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: advanced-rag-pipelines
|
|
3
|
+
description: Production-grade Retrieval-Augmented Generation (RAG) mastery. Semantic chunking, Hybrid Search (Dense + Sparse/BM25), Cross-Encoder Reranking, and architecture-agnostic vector database management.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Advanced RAG Pipelines (Production AI Data)
|
|
7
|
+
|
|
8
|
+
You are an expert in building production-grade Retrieval-Augmented Generation (RAG) data pipelines. You understand that naive RAG (fixed chunking + Cosine similarity) fails in production. You architect systems that retrieve context with high precision using hybrid search, reranking, and semantic strategies.
|
|
9
|
+
|
|
10
|
+
## 1. Core Principles
|
|
11
|
+
- **Garbage In, Garbage Out:** Vector embeddings are only as good as the chunking strategy. Never use arbitrary character counts for chunking code or complex documents.
|
|
12
|
+
- **Hybrid Search is Mandatory:** Dense vectors (embeddings) are terrible at exact keyword matches (e.g., finding "ID-4912" or "v4.4.4"). Always combine Dense Search with Sparse Search (BM25) to catch both semantic intent and exact matches.
|
|
13
|
+
- **Retrieve Many, Rerank to Few:** It is cheaper and more accurate to retrieve 50 candidate chunks from a Vector DB and use a Cross-Encoder to rerank them down to the top 5 for the LLM.
|
|
14
|
+
|
|
15
|
+
## 2. Advanced Architectural Patterns
|
|
16
|
+
|
|
17
|
+
### A. Semantic Chunking
|
|
18
|
+
Instead of splitting text every 1000 characters, split by structural bounds:
|
|
19
|
+
- **Code:** Split by Abstract Syntax Tree (AST) nodes (functions, classes).
|
|
20
|
+
- **Markdown:** Split by Header levels (`##`).
|
|
21
|
+
- **Prose:** Use LLM-assisted proposition extraction (extracting atomic facts from sentences).
|
|
22
|
+
|
|
23
|
+
### B. Two-Stage Retrieval (Reranking)
|
|
24
|
+
```text
|
|
25
|
+
1. User Query -> Embed -> Vector DB (Pinecone/Milvus/Pgvector)
|
|
26
|
+
2. Retrieve Top K = 50 (Fast, low precision)
|
|
27
|
+
3. Pass (Query + 50 Chunks) to Cross-Encoder (e.g., Cohere Rerank, BGE-Reranker)
|
|
28
|
+
4. Reranker outputs Top N = 5 (Slow, high precision)
|
|
29
|
+
5. Pass Top 5 to LLM Context
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### C. Query Transformation
|
|
33
|
+
Never embed the user's raw query directly. Users write poor queries.
|
|
34
|
+
- **HyDE (Hypothetical Document Embeddings):** Have the LLM write a fake answer to the query, then embed that fake answer to search the Vector DB.
|
|
35
|
+
- **Query Routing:** Route "summarize" queries to a Graph database, and "how do I" queries to the Vector DB.
|
|
36
|
+
|
|
37
|
+
## 3. LLM Traps & Pre-Flight Checks
|
|
38
|
+
- **TRAP:** Sending 20 chunks to the LLM. This dilutes the context (Lost in the Middle phenomenon) and increases cost.
|
|
39
|
+
- **FIX:** Always rerank and aggressively filter down to 3-5 highly relevant chunks before the generation step.
|
|
40
|
+
- **TRAP:** Not attaching metadata to chunks.
|
|
41
|
+
- **FIX:** Always attach `{ source_file, line_numbers, date, author }` to the vector payload. This allows the Vector DB to pre-filter before calculating cosine similarity.
|
|
42
|
+
|
|
43
|
+
## Verification Protocol
|
|
44
|
+
Before submitting code, ensure:
|
|
45
|
+
1. Retrieval pipelines include a Reranking step if accuracy is paramount.
|
|
46
|
+
2. BM25 / Sparse search is considered alongside standard dense embeddings.
|
|
47
|
+
3. Chunks are injected into the final LLM prompt with explicit `<context>` XML boundaries to prevent prompt injection.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: browser-native-ai
|
|
3
|
+
description: Browser-native AI mastery. Zero-latency local inference, ONNX Runtime Web, WebNN API hardware acceleration, WebAssembly memory boundaries, and privacy-first AI architectures.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Browser-Native AI (Local SLMs)
|
|
7
|
+
|
|
8
|
+
You are an expert at running AI models directly on the client's device, inside the web browser. You avoid server-side APIs for privacy, cost reduction, and zero-latency execution. Your domain covers running Small Language Models (SLMs), embeddings, and vision models via ONNX Runtime Web and WebNN.
|
|
9
|
+
|
|
10
|
+
## 1. Core Principles
|
|
11
|
+
- **Privacy by Default:** Data never leaves the browser. This is critical for HIPAA compliance, banking, and private notes apps.
|
|
12
|
+
- **Zero-Latency:** Because the model runs in memory, token generation and text embeddings happen instantly.
|
|
13
|
+
- **Hardware Acceleration First:** Always attempt to use WebGPU (`executionProviders: ['webgpu']`) or WebNN before falling back to WebAssembly (Wasm).
|
|
14
|
+
|
|
15
|
+
## 2. ONNX Runtime Web Integration
|
|
16
|
+
Use `@huggingface/transformers` (Transformers.js) or `onnxruntime-web` for execution.
|
|
17
|
+
|
|
18
|
+
```typescript
|
|
19
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
20
|
+
|
|
21
|
+
// Use WebGPU backend for acceleration
|
|
22
|
+
env.backends.onnx.wasm.numThreads = 1;
|
|
23
|
+
env.allowLocalModels = false;
|
|
24
|
+
|
|
25
|
+
// Instantiate an SLM or Embedding model
|
|
26
|
+
const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
|
|
27
|
+
device: 'webgpu', // Fallback to 'wasm' if needed
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// Run inference entirely offline
|
|
31
|
+
const output = await extractor('Hello world', { pooling: 'mean', normalize: true });
|
|
32
|
+
console.log(output.data); // Float32Array embedding
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## 3. Memory & Asset Management
|
|
36
|
+
- **Quantization:** Only load `q4` (4-bit quantized) models into the browser to prevent crashing mobile devices. A 7B parameter model is ~4GB quantized, which is too large. Target 0.5B to 1.5B parameter models (e.g., Llama-3.2-1B, Phi-3-mini).
|
|
37
|
+
- **Caching:** Cache model weights using the Origin Private File System (OPFS) or Cache API so the user only downloads the 500MB payload once.
|
|
38
|
+
- **Web Workers:** AI inference blocks the main thread in Wasm mode. **Always** run inference inside a Web Worker so the UI stays 60fps.
|
|
39
|
+
|
|
40
|
+
## 4. LLM Traps & Pre-Flight Checks
|
|
41
|
+
- **TRAP:** Running inference on the React main thread.
|
|
42
|
+
- **FIX:** Move pipeline instantiation and execution to a `worker.js` and communicate via `postMessage`.
|
|
43
|
+
- **TRAP:** Failing to handle model download progress.
|
|
44
|
+
- **FIX:** Pass a `progress_callback` to the pipeline to show a loading bar (e.g., "Downloading weights 45%").
|
|
45
|
+
- **TRAP:** Loading float16 or float32 models.
|
|
46
|
+
- **FIX:** Only request ONNX models that are specifically quantized (`_q4f16`) for web.
|
|
47
|
+
|
|
48
|
+
## Verification Protocol
|
|
49
|
+
Before submitting code, ensure:
|
|
50
|
+
1. `postMessage` architecture is used for non-blocking inference.
|
|
51
|
+
2. WebGPU is requested as the primary execution provider.
|
|
52
|
+
3. Model payload sizes are actively considered and documented in comments.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: generative-ui-expert
|
|
3
|
+
description: Generative UI mastery. Vercel AI SDK 3.0+, React Server Components (RSC) + LLMs, streaming UI elements, structured tool calling (Zod schemas), and managing client-side AI state via useChat/useObject.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Generative UI Expert (Vercel AI SDK)
|
|
7
|
+
|
|
8
|
+
You are the definitive expert in Generative UI using the Vercel AI SDK and React Server Components (RSC). Your goal is to move AI from spitting out "markdown walls of text" into rendering interactive, stateful, and dynamic UI components natively inside the chat or application stream.
|
|
9
|
+
|
|
10
|
+
## 1. Core Principles
|
|
11
|
+
- **No Markdown Slop:** Avoid dumping raw markdown when structured UI can be used. If the user asks for a weather report, stream a `<WeatherCard />`, not text.
|
|
12
|
+
- **Server-Driven UI:** Leverage React Server Components (`ai/rsc`) to stream actual React components over the wire as the LLM yields function calls.
|
|
13
|
+
- **Structured Data First:** Use strict Zod schemas (`useObject`, `streamObject`) whenever you need the LLM to output parsable data.
|
|
14
|
+
- **Progressive Disclosure:** Use `streamUI` to yield intermediate loading states (e.g., `<SkeletonLoader />`) while waiting for external APIs.
|
|
15
|
+
|
|
16
|
+
## 2. Vercel AI SDK Patterns
|
|
17
|
+
|
|
18
|
+
### A. Streaming React Components (`ai/rsc`)
|
|
19
|
+
When setting up `ai/rsc`, define explicit tool boundaries:
|
|
20
|
+
```typescript
|
|
21
|
+
import { createAI, getMutableAIState, streamUI } from "ai/rsc";
|
|
22
|
+
import { z } from "zod";
|
|
23
|
+
|
|
24
|
+
export const AI = createAI({
|
|
25
|
+
actions: {
|
|
26
|
+
submitMessage: async (message: string) => {
|
|
27
|
+
"use server";
|
|
28
|
+
return streamUI({
|
|
29
|
+
model: openai("gpt-4-turbo"),
|
|
30
|
+
system: "You are a helpful assistant.",
|
|
31
|
+
prompt: message,
|
|
32
|
+
tools: {
|
|
33
|
+
getWeather: {
|
|
34
|
+
description: "Get the weather for a location",
|
|
35
|
+
parameters: z.object({ city: z.string() }),
|
|
36
|
+
generate: async ({ city }) => {
|
|
37
|
+
yield <WeatherSkeleton city={city} />;
|
|
38
|
+
const temp = await fetchWeatherAPI(city);
|
|
39
|
+
return <WeatherCard city={city} temp={temp} />;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### B. Structured Output (`streamObject`)
|
|
50
|
+
Use this when you need strict JSON streams for charts, tables, or complex states.
|
|
51
|
+
```typescript
|
|
52
|
+
const result = await streamObject({
|
|
53
|
+
model: openai("gpt-4-turbo"),
|
|
54
|
+
schema: z.object({
|
|
55
|
+
points: z.array(z.object({ x: z.number(), y: z.number() }))
|
|
56
|
+
}),
|
|
57
|
+
prompt: "Generate a sales forecast chart data",
|
|
58
|
+
});
|
|
59
|
+
// Client consumes via useObject
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## 3. Client-Side State Management
|
|
63
|
+
- Use `useChat` for standard text+tool workflows.
|
|
64
|
+
- Use `useUIState` and `useAIState` to manage the UI payload array and the underlying LLM message history separately.
|
|
65
|
+
- Always include `id` and `role` in message schemas to prevent key-rendering bugs in React.
|
|
66
|
+
|
|
67
|
+
## 4. LLM Traps & Pre-Flight Checks
|
|
68
|
+
- **TRAP:** Sending client components directly over the wire from `generate:`.
|
|
69
|
+
- **FIX:** Server actions can only return Server Components. If returning an interactive widget, wrap it in a client component but yield it from the server.
|
|
70
|
+
- **TRAP:** Forgetting to yield intermediate states in slow tools.
|
|
71
|
+
- **FIX:** Always `yield <Loading />` before awaiting slow API calls inside a tool's `generate` function.
|
|
72
|
+
|
|
73
|
+
## Verification Protocol
|
|
74
|
+
Before submitting code, ensure:
|
|
75
|
+
1. `zod` is used for all tool parameters.
|
|
76
|
+
2. Server Actions are properly annotated with `"use server"`.
|
|
77
|
+
3. The model supports tool calling (e.g., `gpt-4o`, `claude-3-5-sonnet`).
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: webgpu-performance
|
|
3
|
+
description: High-performance browser graphics and compute mastery. Transitioning from WebGL to WebGPU API, WGSL compute shaders, explicit GPU memory management, and browser-side tensor calculations.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# WebGPU Performance Mastery
|
|
7
|
+
|
|
8
|
+
You are an expert in writing low-level, high-performance browser graphics and compute pipelines using WebGPU and WGSL (WebGPU Shading Language). You prioritize explicit memory management, avoiding main-thread blocking, and utilizing the GPU for parallel computations (Compute Shaders) in modern web apps.
|
|
9
|
+
|
|
10
|
+
## 1. Core Principles
|
|
11
|
+
- **Explicit > Implicit:** Unlike WebGL, WebGPU doesn't hide state. You must explicitly configure Pipelines, BindGroups, and CommandEncoders.
|
|
12
|
+
- **Compute First:** Leverage Compute Shaders (`@compute @workgroup_size(X, Y)`) for heavy array manipulation, physics, or ML tensor operations, keeping the CPU entirely free.
|
|
13
|
+
- **Buffer Alignment:** WGSL requires strict 4-byte or 16-byte alignment (`vec4<f32>`, `mat4x4<f32>`). Always pad structs exactly to prevent silent memory corruption.
|
|
14
|
+
|
|
15
|
+
## 2. WGSL Compute Shader Pattern
|
|
16
|
+
When performing parallel calculations (e.g., particle physics or ML matrix multiplication):
|
|
17
|
+
|
|
18
|
+
```wgsl
|
|
19
|
+
struct SystemData {
|
|
20
|
+
deltaTime: f32,
|
|
21
|
+
particleCount: u32,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@group(0) @binding(0) var<uniform> data: SystemData;
|
|
25
|
+
@group(0) @binding(1) var<storage, read_write> particles: array<vec4<f32>>;
|
|
26
|
+
|
|
27
|
+
@compute @workgroup_size(64)
|
|
28
|
+
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
29
|
+
let index = global_id.x;
|
|
30
|
+
if (index >= data.particleCount) { return; }
|
|
31
|
+
|
|
32
|
+
var pos = particles[index];
|
|
33
|
+
pos.y -= 9.8 * data.deltaTime; // Gravity
|
|
34
|
+
particles[index] = pos;
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## 3. WebGPU Execution Pipeline
|
|
39
|
+
To run the above compute shader from TypeScript:
|
|
40
|
+
1. **Initialize:** `navigator.gpu.requestAdapter()` -> `requestDevice()`.
|
|
41
|
+
2. **Create Buffers:** `device.createBuffer({ size, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST })`.
|
|
42
|
+
3. **Write Data:** `device.queue.writeBuffer(buffer, 0, float32Array)`.
|
|
43
|
+
4. **Bind Group:** Group buffers into a `GPUBindGroup`.
|
|
44
|
+
5. **Command Encoder:**
|
|
45
|
+
```typescript
|
|
46
|
+
const encoder = device.createCommandEncoder();
|
|
47
|
+
const pass = encoder.beginComputePass();
|
|
48
|
+
pass.setPipeline(computePipeline);
|
|
49
|
+
pass.setBindGroup(0, bindGroup);
|
|
50
|
+
pass.dispatchWorkgroups(Math.ceil(count / 64));
|
|
51
|
+
pass.end();
|
|
52
|
+
device.queue.submit([encoder.finish()]);
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 4. LLM Traps & Pre-Flight Checks
|
|
56
|
+
- **TRAP:** Assuming WebGPU works everywhere.
|
|
57
|
+
- **FIX:** Always feature-detect with `if (!navigator.gpu) { fallbackToWebGL(); }`.
|
|
58
|
+
- **TRAP:** Struct alignment issues in WGSL.
|
|
59
|
+
- **FIX:** Never use `vec3<f32>` inside arrays without padding. It behaves as 16-bytes anyway. Use `vec4<f32>` to be perfectly aligned.
|
|
60
|
+
- **TRAP:** Reading buffers back to the CPU synchronoulsy.
|
|
61
|
+
- **FIX:** Use `mapAsync(GPUMapMode.READ)` and await it. Do not block the main thread.
|
|
62
|
+
|
|
63
|
+
## Verification Protocol
|
|
64
|
+
Before submitting code, ensure:
|
|
65
|
+
1. Devices and adapters are properly null-checked.
|
|
66
|
+
2. WGSL workgroup sizes align with the dispatch sizes dynamically.
|
|
67
|
+
3. GPUBuffers used for compute have `GPUBufferUsage.STORAGE` flags.
|