@triedotdev/mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,950 @@
1
+ // src/ingest/agent-builder.ts
2
+ import Anthropic2 from "@anthropic-ai/sdk";
3
+
4
+ // src/ingest/document-parser.ts
5
+ import { readFile } from "fs/promises";
6
+ import { extname, basename } from "path";
7
+ async function parseDocument(filePath) {
8
+ const ext = extname(filePath).toLowerCase();
9
+ const fileType = getFileType(ext);
10
+ if (!fileType) {
11
+ throw new Error(`Unsupported file type: ${ext}. Supported: .pdf, .txt, .md, .rtf`);
12
+ }
13
+ let rawText;
14
+ let metadata = {
15
+ fileType,
16
+ originalPath: filePath,
17
+ parsedAt: (/* @__PURE__ */ new Date()).toISOString()
18
+ };
19
+ switch (fileType) {
20
+ case "pdf":
21
+ const pdfResult = await parsePDF(filePath);
22
+ rawText = pdfResult.text;
23
+ metadata.pageCount = pdfResult.pageCount;
24
+ if (pdfResult.title !== void 0) {
25
+ metadata.title = pdfResult.title;
26
+ }
27
+ break;
28
+ case "txt":
29
+ rawText = await parseTXT(filePath);
30
+ break;
31
+ case "md":
32
+ rawText = await parseMarkdown(filePath);
33
+ break;
34
+ case "rtf":
35
+ rawText = await parseRTF(filePath);
36
+ break;
37
+ default:
38
+ throw new Error(`Unsupported file type: ${fileType}`);
39
+ }
40
+ metadata.wordCount = countWords(rawText);
41
+ const sections = extractSections(rawText, fileType);
42
+ if (!metadata.title) {
43
+ metadata.title = extractTitle(rawText, sections) || basename(filePath, ext);
44
+ }
45
+ return {
46
+ rawText,
47
+ metadata,
48
+ sections
49
+ };
50
+ }
51
+ function getFileType(ext) {
52
+ const typeMap = {
53
+ ".pdf": "pdf",
54
+ ".txt": "txt",
55
+ ".md": "md",
56
+ ".markdown": "md",
57
+ ".rtf": "rtf"
58
+ };
59
+ return typeMap[ext] || null;
60
+ }
61
+ async function parsePDF(filePath) {
62
+ try {
63
+ const pdfParse = (await import("pdf-parse")).default;
64
+ const dataBuffer = await readFile(filePath);
65
+ const data = await pdfParse(dataBuffer);
66
+ const result = {
67
+ text: data.text,
68
+ pageCount: data.numpages
69
+ };
70
+ if (data.info?.Title) {
71
+ result.title = data.info.Title;
72
+ }
73
+ return result;
74
+ } catch (error) {
75
+ if (error.code === "MODULE_NOT_FOUND") {
76
+ throw new Error(
77
+ "PDF parsing requires the pdf-parse package. Install it with: npm install pdf-parse"
78
+ );
79
+ }
80
+ throw error;
81
+ }
82
+ }
83
+ async function parseTXT(filePath) {
84
+ return await readFile(filePath, "utf-8");
85
+ }
86
+ async function parseMarkdown(filePath) {
87
+ const content = await readFile(filePath, "utf-8");
88
+ return content.replace(/^[-*_]{3,}$/gm, "").replace(/\n{3,}/g, "\n\n").trim();
89
+ }
90
+ async function parseRTF(filePath) {
91
+ const content = await readFile(filePath, "utf-8");
92
+ return stripRTF(content);
93
+ }
94
+ function stripRTF(rtf) {
95
+ let text = rtf.replace(/^{\\rtf1[^}]*}/i, "");
96
+ text = text.replace(/\\[a-z]+(-?\d+)?[ ]?/gi, "");
97
+ text = text.replace(/{[^{}]*}/g, "");
98
+ text = text.replace(/\\'([0-9a-f]{2})/gi, (_, hex) => String.fromCharCode(parseInt(hex, 16))).replace(/\\par\b/g, "\n").replace(/\\tab\b/g, " ").replace(/\\line\b/g, "\n").replace(/[{}\\]/g, "");
99
+ text = text.replace(/\r\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
100
+ return text;
101
+ }
102
+ function countWords(text) {
103
+ return text.split(/\s+/).filter((word) => word.length > 0).length;
104
+ }
105
+ function extractSections(text, fileType) {
106
+ const sections = [];
107
+ if (fileType === "md") {
108
+ const headingRegex = /^(#{1,6})\s+(.+)$/gm;
109
+ let match;
110
+ while ((match = headingRegex.exec(text)) !== null) {
111
+ const level = match[1].length;
112
+ const title = match[2].trim();
113
+ const startIndex = match.index;
114
+ if (sections.length > 0) {
115
+ const lastSection = sections[sections.length - 1];
116
+ lastSection.endIndex = startIndex;
117
+ lastSection.content = text.slice(
118
+ lastSection.startIndex,
119
+ startIndex
120
+ ).trim();
121
+ }
122
+ sections.push({
123
+ title,
124
+ level,
125
+ startIndex,
126
+ endIndex: text.length,
127
+ content: ""
128
+ });
129
+ }
130
+ if (sections.length > 0) {
131
+ const lastSection = sections[sections.length - 1];
132
+ lastSection.content = text.slice(
133
+ lastSection.startIndex
134
+ ).trim();
135
+ }
136
+ } else {
137
+ const chapterPatterns = [
138
+ /^chapter\s+(\d+|[ivxlc]+)[:\.\s]+(.*)$/gim,
139
+ /^section\s+(\d+|[ivxlc]+)[:\.\s]+(.*)$/gim,
140
+ /^part\s+(\d+|[ivxlc]+)[:\.\s]+(.*)$/gim,
141
+ /^(\d+)\.\s+([A-Z][^.]+)$/gm,
142
+ /^([A-Z][A-Z\s]+)$/gm
143
+ // ALL CAPS headings
144
+ ];
145
+ for (const pattern of chapterPatterns) {
146
+ let match;
147
+ pattern.lastIndex = 0;
148
+ while ((match = pattern.exec(text)) !== null) {
149
+ const title = match[2] || match[1] || match[0];
150
+ sections.push({
151
+ title: title.trim(),
152
+ level: 1,
153
+ startIndex: match.index,
154
+ endIndex: text.length,
155
+ content: ""
156
+ });
157
+ }
158
+ if (sections.length > 0) break;
159
+ }
160
+ sections.sort((a, b) => a.startIndex - b.startIndex);
161
+ for (let i = 0; i < sections.length; i++) {
162
+ const section = sections[i];
163
+ const nextSection = sections[i + 1];
164
+ const endIndex = nextSection !== void 0 ? nextSection.startIndex : text.length;
165
+ section.endIndex = endIndex;
166
+ section.content = text.slice(section.startIndex, endIndex).trim();
167
+ }
168
+ }
169
+ if (sections.length === 0) {
170
+ sections.push({
171
+ title: "Document Content",
172
+ level: 1,
173
+ startIndex: 0,
174
+ endIndex: text.length,
175
+ content: text.trim()
176
+ });
177
+ }
178
+ return sections;
179
+ }
180
+ function extractTitle(text, sections) {
181
+ const firstSection = sections[0];
182
+ if (firstSection !== void 0 && firstSection.title !== "Document Content") {
183
+ return firstSection.title;
184
+ }
185
+ const firstLine = text.split("\n")[0]?.trim();
186
+ if (firstLine && firstLine.length < 100 && !firstLine.includes(".")) {
187
+ return firstLine;
188
+ }
189
+ return null;
190
+ }
191
+ function chunkDocument(result, maxChunkSize = 4e3) {
192
+ const chunks = [];
193
+ if (result.rawText.length <= maxChunkSize) {
194
+ return [result.rawText];
195
+ }
196
+ if (result.sections.length > 1) {
197
+ let currentChunk = "";
198
+ for (const section of result.sections) {
199
+ const sectionText = `## ${section.title}
200
+
201
+ ${section.content}
202
+
203
+ `;
204
+ if (currentChunk.length + sectionText.length > maxChunkSize) {
205
+ if (currentChunk) chunks.push(currentChunk.trim());
206
+ if (sectionText.length > maxChunkSize) {
207
+ chunks.push(...splitByParagraphs(sectionText, maxChunkSize));
208
+ currentChunk = "";
209
+ } else {
210
+ currentChunk = sectionText;
211
+ }
212
+ } else {
213
+ currentChunk += sectionText;
214
+ }
215
+ }
216
+ if (currentChunk) chunks.push(currentChunk.trim());
217
+ } else {
218
+ chunks.push(...splitByParagraphs(result.rawText, maxChunkSize));
219
+ }
220
+ return chunks;
221
+ }
222
+ function splitByParagraphs(text, maxSize) {
223
+ const chunks = [];
224
+ const paragraphs = text.split(/\n\s*\n/);
225
+ let currentChunk = "";
226
+ for (const para of paragraphs) {
227
+ if (currentChunk.length + para.length + 2 > maxSize) {
228
+ if (currentChunk) chunks.push(currentChunk.trim());
229
+ if (para.length > maxSize) {
230
+ chunks.push(...splitBySentences(para, maxSize));
231
+ currentChunk = "";
232
+ } else {
233
+ currentChunk = para;
234
+ }
235
+ } else {
236
+ currentChunk += (currentChunk ? "\n\n" : "") + para;
237
+ }
238
+ }
239
+ if (currentChunk) chunks.push(currentChunk.trim());
240
+ return chunks;
241
+ }
242
+ function splitBySentences(text, maxSize) {
243
+ const chunks = [];
244
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
245
+ let currentChunk = "";
246
+ for (const sentence of sentences) {
247
+ if (currentChunk.length + sentence.length > maxSize) {
248
+ if (currentChunk) chunks.push(currentChunk.trim());
249
+ currentChunk = sentence;
250
+ } else {
251
+ currentChunk += sentence;
252
+ }
253
+ }
254
+ if (currentChunk) chunks.push(currentChunk.trim());
255
+ return chunks;
256
+ }
257
+
258
+ // src/ingest/knowledge-compressor.ts
259
+ import Anthropic from "@anthropic-ai/sdk";
260
+
261
+ // src/ingest/compression-prompts.ts
262
+ var COMPRESSION_PROMPTS = {
263
+ /**
264
+ * System prompt for the knowledge extractor
265
+ */
266
+ system: `You are an expert knowledge extraction system. Your job is to analyze documents and extract structured, actionable knowledge that can be used by a code review agent.
267
+
268
+ You must output valid JSON that matches the required schema exactly. Be thorough but concise - extract the essence of the knowledge without unnecessary verbosity.
269
+
270
+ Focus on:
271
+ 1. Core concepts that are fundamental to understand the material
272
+ 2. Best practices that should be followed
273
+ 3. Anti-patterns and mistakes to avoid
274
+ 4. Detection patterns that could identify issues in code
275
+ 5. Key terminology and definitions`,
276
+ /**
277
+ * Prompt for extracting knowledge from a chunk
278
+ */
279
+ extractChunk: `Analyze this document chunk and extract structured knowledge.
280
+
281
+ ## Document Chunk:
282
+ {{chunk}}
283
+
284
+ ## Instructions:
285
+ Extract the following from this chunk:
286
+
287
+ 1. **Core Concepts**: Key ideas, principles, or rules that are taught
288
+ 2. **Best Practices**: Recommended approaches or patterns
289
+ 3. **Anti-Patterns**: Things to avoid, common mistakes
290
+ 4. **Code Patterns**: Any code patterns or detection rules that could identify issues
291
+ 5. **Terminology**: Important terms and their definitions
292
+
293
+ Output as JSON:
294
+ {
295
+ "coreConcepts": [
296
+ {
297
+ "name": "string",
298
+ "description": "string",
299
+ "importance": "critical" | "important" | "supplementary",
300
+ "keywords": ["string"]
301
+ }
302
+ ],
303
+ "bestPractices": [
304
+ {
305
+ "name": "string",
306
+ "description": "string",
307
+ "rationale": "string",
308
+ "codeExample": "string or null"
309
+ }
310
+ ],
311
+ "antiPatterns": [
312
+ {
313
+ "name": "string",
314
+ "description": "string",
315
+ "whyBad": "string",
316
+ "betterAlternative": "string"
317
+ }
318
+ ],
319
+ "codePatterns": [
320
+ {
321
+ "name": "string",
322
+ "description": "string",
323
+ "type": "best-practice" | "anti-pattern" | "security" | "compliance",
324
+ "regexHint": "string (a regex pattern that might detect this, or null)",
325
+ "keywords": ["string"]
326
+ }
327
+ ],
328
+ "terminology": {
329
+ "term": "definition"
330
+ }
331
+ }
332
+
333
+ Only include items that are clearly present in the chunk. Quality over quantity.`,
334
+ /**
335
+ * Prompt for merging extracted knowledge
336
+ */
337
+ mergeKnowledge: `Merge and deduplicate these knowledge extractions into a cohesive summary.
338
+
339
+ ## Extractions to Merge:
340
+ {{extractions}}
341
+
342
+ ## Instructions:
343
+ 1. Combine similar concepts
344
+ 2. Remove duplicates
345
+ 3. Prioritize the most important items
346
+ 4. Ensure consistency in terminology
347
+ 5. Rank items by importance
348
+
349
+ Output a single merged JSON with the same structure, keeping only the most valuable and distinct items.
350
+ Limit to top 20 core concepts, 15 best practices, 15 anti-patterns, and 25 code patterns.`,
351
+ /**
352
+ * Prompt for generating detection rules
353
+ */
354
+ generateDetectionRules: `Based on this knowledge base, generate detection rules for a code review agent.
355
+
356
+ ## Knowledge Base:
357
+ {{knowledge}}
358
+
359
+ ## Document Context:
360
+ - Title: {{title}}
361
+ - Domain: {{domain}}
362
+ - Word Count: {{wordCount}}
363
+
364
+ ## Instructions:
365
+ For each anti-pattern and best practice, generate detection rules that could find violations in code.
366
+
367
+ For each rule, provide:
368
+ 1. A unique ID (format: {{prefix}}-XXX)
369
+ 2. Clear name and description
370
+ 3. Severity (critical, serious, moderate, low, info)
371
+ 4. Detection patterns:
372
+ - regex: Array of regex patterns (JavaScript-compatible)
373
+ - keywords: Words that might indicate this issue
374
+ - semantic: Natural language description for AI-based detection
375
+ 5. Fix information:
376
+ - description: How to fix
377
+ - example: Code example if applicable
378
+ - autoFixable: boolean
379
+
380
+ Output as JSON array of detection rules:
381
+ [
382
+ {
383
+ "id": "string",
384
+ "name": "string",
385
+ "description": "string",
386
+ "severity": "critical" | "serious" | "moderate" | "low" | "info",
387
+ "patterns": {
388
+ "regex": ["string"],
389
+ "keywords": ["string"],
390
+ "semantic": "string"
391
+ },
392
+ "fix": {
393
+ "description": "string",
394
+ "example": "string or null",
395
+ "autoFixable": boolean
396
+ },
397
+ "regulation": "string or null (for legal/compliance rules)",
398
+ "category": "string"
399
+ }
400
+ ]
401
+
402
+ Generate 15-30 detection rules prioritizing the most impactful issues.`,
403
+ /**
404
+ * Prompt for generating agent prompts
405
+ */
406
+ generateAgentPrompts: `Generate system and analysis prompts for a code review agent based on this knowledge.
407
+
408
+ ## Knowledge Base Summary:
409
+ {{summary}}
410
+
411
+ ## Core Concepts:
412
+ {{concepts}}
413
+
414
+ ## Detection Focus:
415
+ {{patterns}}
416
+
417
+ ## Agent Info:
418
+ - Name: {{agentName}}
419
+ - Category: {{category}}
420
+ - Domain: {{domain}}
421
+
422
+ ## Instructions:
423
+ Generate:
424
+ 1. A system prompt that gives the agent its persona and expertise
425
+ 2. An analysis prompt template for reviewing code
426
+ 3. A fix prompt template for suggesting fixes
427
+
428
+ The prompts should:
429
+ - Reference the specific knowledge from the document
430
+ - Be authoritative but helpful
431
+ - Include the key concepts and terminology
432
+ - Guide the agent to look for the specific patterns
433
+
434
+ Output as JSON:
435
+ {
436
+ "systemPrompt": "string",
437
+ "analysisPrompt": "string (use {{code}}, {{filePath}}, {{language}} as placeholders)",
438
+ "fixPrompt": "string (use {{issue}}, {{code}}, {{filePath}} as placeholders)"
439
+ }`,
440
+ /**
441
+ * Prompt for detecting document domain
442
+ */
443
+ detectDomain: `Analyze this document and determine its primary domain/category.
444
+
445
+ ## Document Title:
446
+ {{title}}
447
+
448
+ ## Sample Content:
449
+ {{sample}}
450
+
451
+ ## Instructions:
452
+ Determine the primary domain of this document. Choose ONE:
453
+ - "technical": Programming, frameworks, libraries, code patterns
454
+ - "legal": Laws, regulations, compliance (GDPR, HIPAA, etc.)
455
+ - "policy": Company policies, internal rules, guidelines
456
+ - "security": Security practices, vulnerability prevention
457
+ - "architecture": System design, patterns, architecture principles
458
+ - "general": General knowledge, doesn't fit other categories
459
+
460
+ Also determine:
461
+ 1. What types of code/files this knowledge applies to
462
+ 2. What context signals should trigger this agent
463
+ 3. Key content patterns to look for
464
+
465
+ Output as JSON:
466
+ {
467
+ "domain": "technical" | "legal" | "policy" | "security" | "architecture" | "general",
468
+ "filePatterns": ["*.ext", ...],
469
+ "contentPatterns": ["regex pattern", ...],
470
+ "contextSignals": ["touchesAuth", "touchesUI", ...],
471
+ "reasoning": "Brief explanation of why this domain was chosen"
472
+ }`,
473
+ /**
474
+ * Prompt for generating a summary
475
+ */
476
+ generateSummary: `Create a concise executive summary of this document for use in an AI agent's context.
477
+
478
+ ## Document:
479
+ {{content}}
480
+
481
+ ## Instructions:
482
+ Write a 2-3 paragraph summary that:
483
+ 1. Explains what the document covers
484
+ 2. Highlights the most important takeaways
485
+ 3. Describes how this knowledge applies to code review
486
+
487
+ The summary will be used as context for an AI code review agent, so focus on actionable insights.
488
+
489
+ Keep it under 500 words.`
490
+ };
491
+
492
+ // src/ingest/knowledge-compressor.ts
493
+ async function compressKnowledge(document, options) {
494
+ const { agentName, maxChunkSize = 4e3, verbose = false } = options;
495
+ const client = new Anthropic();
496
+ const log = verbose ? console.error.bind(console) : () => {
497
+ };
498
+ log("\u{1F4DA} Starting knowledge compression...");
499
+ log(" \u251C\u2500 Detecting document domain...");
500
+ const domainInfo = await detectDomain(client, document);
501
+ log(` \u2502 \u2514\u2500 Domain: ${domainInfo.domain}`);
502
+ const chunks = chunkDocument(document, maxChunkSize);
503
+ log(` \u251C\u2500 Document chunked into ${chunks.length} pieces`);
504
+ log(" \u251C\u2500 Extracting knowledge from chunks...");
505
+ const extractions = [];
506
+ for (let i = 0; i < chunks.length; i++) {
507
+ log(` \u2502 \u251C\u2500 Processing chunk ${i + 1}/${chunks.length}...`);
508
+ try {
509
+ const chunk = chunks[i];
510
+ if (chunk !== void 0) {
511
+ const extraction = await extractFromChunk(client, chunk);
512
+ extractions.push(extraction);
513
+ }
514
+ } catch (error) {
515
+ log(` \u2502 \u2502 \u2514\u2500 Warning: Failed to extract from chunk ${i + 1}`);
516
+ }
517
+ }
518
+ log(" \u251C\u2500 Merging and deduplicating knowledge...");
519
+ const mergedKnowledge = await mergeExtractions(client, extractions);
520
+ log(" \u251C\u2500 Generating detection rules...");
521
+ const detectionRules = await generateDetectionRules(
522
+ client,
523
+ mergedKnowledge,
524
+ document.metadata.title || agentName,
525
+ domainInfo.domain,
526
+ agentName
527
+ );
528
+ log(` \u2502 \u2514\u2500 Generated ${detectionRules.length} detection rules`);
529
+ log(" \u2514\u2500 Generating knowledge summary...");
530
+ const summary = await generateSummary(client, document.rawText.slice(0, 8e3));
531
+ const compressed = {
532
+ domain: domainInfo.domain,
533
+ summary,
534
+ coreConcepts: mergedKnowledge.coreConcepts,
535
+ bestPractices: mergedKnowledge.bestPractices,
536
+ antiPatterns: mergedKnowledge.antiPatterns,
537
+ detectionRules,
538
+ glossary: mergedKnowledge.terminology,
539
+ sourceDocument: {
540
+ title: document.metadata.title || agentName,
541
+ wordCount: document.metadata.wordCount,
542
+ compressionRatio: Math.round(document.metadata.wordCount / (summary.length + JSON.stringify(detectionRules).length / 5))
543
+ }
544
+ };
545
+ return compressed;
546
+ }
547
+ async function detectDomain(client, document) {
548
+ const sampleSize = Math.min(document.rawText.length, 3e3);
549
+ const sample = document.rawText.slice(0, sampleSize);
550
+ const prompt = COMPRESSION_PROMPTS.detectDomain.replace("{{title}}", document.metadata.title || "Unknown").replace("{{sample}}", sample);
551
+ const response = await client.messages.create({
552
+ model: "claude-sonnet-4-20250514",
553
+ max_tokens: 1e3,
554
+ system: COMPRESSION_PROMPTS.system,
555
+ messages: [{ role: "user", content: prompt }]
556
+ });
557
+ const firstContent = response.content[0];
558
+ const text = firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
559
+ try {
560
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
561
+ if (jsonMatch) {
562
+ return JSON.parse(jsonMatch[0]);
563
+ }
564
+ } catch (e) {
565
+ }
566
+ return {
567
+ domain: "general",
568
+ filePatterns: ["*"],
569
+ contentPatterns: [],
570
+ contextSignals: [],
571
+ reasoning: "Could not determine domain, using general"
572
+ };
573
+ }
574
+ async function extractFromChunk(client, chunk) {
575
+ const prompt = COMPRESSION_PROMPTS.extractChunk.replace("{{chunk}}", chunk);
576
+ const response = await client.messages.create({
577
+ model: "claude-sonnet-4-20250514",
578
+ max_tokens: 4e3,
579
+ system: COMPRESSION_PROMPTS.system,
580
+ messages: [{ role: "user", content: prompt }]
581
+ });
582
+ const firstContent = response.content[0];
583
+ const text = firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
584
+ try {
585
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
586
+ if (jsonMatch) {
587
+ const parsed = JSON.parse(jsonMatch[0]);
588
+ return {
589
+ coreConcepts: parsed.coreConcepts || [],
590
+ bestPractices: parsed.bestPractices || [],
591
+ antiPatterns: parsed.antiPatterns || [],
592
+ codePatterns: parsed.codePatterns || [],
593
+ terminology: parsed.terminology || {}
594
+ };
595
+ }
596
+ } catch (e) {
597
+ }
598
+ return {
599
+ coreConcepts: [],
600
+ bestPractices: [],
601
+ antiPatterns: [],
602
+ codePatterns: [],
603
+ terminology: {}
604
+ };
605
+ }
606
+ async function mergeExtractions(client, extractions) {
607
+ if (extractions.length <= 2) {
608
+ return combineExtractions(extractions);
609
+ }
610
+ const prompt = COMPRESSION_PROMPTS.mergeKnowledge.replace("{{extractions}}", JSON.stringify(extractions, null, 2));
611
+ const response = await client.messages.create({
612
+ model: "claude-sonnet-4-20250514",
613
+ max_tokens: 8e3,
614
+ system: COMPRESSION_PROMPTS.system,
615
+ messages: [{ role: "user", content: prompt }]
616
+ });
617
+ const firstContent = response.content[0];
618
+ const text = firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
619
+ try {
620
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
621
+ if (jsonMatch) {
622
+ return JSON.parse(jsonMatch[0]);
623
+ }
624
+ } catch (e) {
625
+ }
626
+ return combineExtractions(extractions);
627
+ }
628
+ function combineExtractions(extractions) {
629
+ const combined = {
630
+ coreConcepts: [],
631
+ bestPractices: [],
632
+ antiPatterns: [],
633
+ codePatterns: [],
634
+ terminology: {}
635
+ };
636
+ const seenConcepts = /* @__PURE__ */ new Set();
637
+ const seenPractices = /* @__PURE__ */ new Set();
638
+ const seenAntiPatterns = /* @__PURE__ */ new Set();
639
+ const seenPatterns = /* @__PURE__ */ new Set();
640
+ for (const extraction of extractions) {
641
+ for (const concept of extraction.coreConcepts) {
642
+ const key = concept.name.toLowerCase();
643
+ if (!seenConcepts.has(key)) {
644
+ seenConcepts.add(key);
645
+ combined.coreConcepts.push(concept);
646
+ }
647
+ }
648
+ for (const practice of extraction.bestPractices) {
649
+ const key = practice.name.toLowerCase();
650
+ if (!seenPractices.has(key)) {
651
+ seenPractices.add(key);
652
+ combined.bestPractices.push(practice);
653
+ }
654
+ }
655
+ for (const anti of extraction.antiPatterns) {
656
+ const key = anti.name.toLowerCase();
657
+ if (!seenAntiPatterns.has(key)) {
658
+ seenAntiPatterns.add(key);
659
+ combined.antiPatterns.push(anti);
660
+ }
661
+ }
662
+ for (const pattern of extraction.codePatterns) {
663
+ const key = pattern.name.toLowerCase();
664
+ if (!seenPatterns.has(key)) {
665
+ seenPatterns.add(key);
666
+ combined.codePatterns.push(pattern);
667
+ }
668
+ }
669
+ Object.assign(combined.terminology, extraction.terminology);
670
+ }
671
+ return combined;
672
+ }
673
+ async function generateDetectionRules(client, knowledge, title, domain, agentName) {
674
+ const prefix = agentName.toUpperCase().replace(/[^A-Z]/g, "").slice(0, 4) || "CUST";
675
+ const prompt = COMPRESSION_PROMPTS.generateDetectionRules.replace("{{knowledge}}", JSON.stringify(knowledge, null, 2)).replace("{{title}}", title).replace("{{domain}}", domain).replace("{{wordCount}}", String(knowledge.coreConcepts.length * 100)).replace(/\{\{prefix\}\}/g, prefix);
676
+ const response = await client.messages.create({
677
+ model: "claude-sonnet-4-20250514",
678
+ max_tokens: 8e3,
679
+ system: COMPRESSION_PROMPTS.system,
680
+ messages: [{ role: "user", content: prompt }]
681
+ });
682
+ const firstContent = response.content[0];
683
+ const text = firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
684
+ try {
685
+ const jsonMatch = text.match(/\[[\s\S]*\]/);
686
+ if (jsonMatch) {
687
+ const rules = JSON.parse(jsonMatch[0]);
688
+ return rules.map((rule, i) => ({
689
+ id: rule.id || `${prefix}-${String(i + 1).padStart(3, "0")}`,
690
+ name: rule.name || "Unknown Rule",
691
+ description: rule.description || "",
692
+ severity: rule.severity || "moderate",
693
+ patterns: {
694
+ regex: rule.patterns?.regex || [],
695
+ keywords: rule.patterns?.keywords || [],
696
+ semantic: rule.patterns?.semantic || ""
697
+ },
698
+ fix: {
699
+ description: rule.fix?.description || "Review and fix manually",
700
+ example: rule.fix?.example || void 0,
701
+ autoFixable: rule.fix?.autoFixable || false
702
+ },
703
+ regulation: rule.regulation || void 0,
704
+ category: rule.category || domain
705
+ }));
706
+ }
707
+ } catch (e) {
708
+ console.error("Failed to parse detection rules:", e);
709
+ }
710
+ return [];
711
+ }
712
+ async function generateSummary(client, content) {
713
+ const prompt = COMPRESSION_PROMPTS.generateSummary.replace("{{content}}", content);
714
+ const response = await client.messages.create({
715
+ model: "claude-sonnet-4-20250514",
716
+ max_tokens: 1e3,
717
+ system: COMPRESSION_PROMPTS.system,
718
+ messages: [{ role: "user", content: prompt }]
719
+ });
720
+ const firstContent = response.content[0];
721
+ return firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
722
+ }
723
+ async function generateAgentPrompts(client, knowledge, agentName, category) {
724
+ const prompt = COMPRESSION_PROMPTS.generateAgentPrompts.replace("{{summary}}", knowledge.summary).replace("{{concepts}}", JSON.stringify(knowledge.coreConcepts.slice(0, 10), null, 2)).replace("{{patterns}}", JSON.stringify(knowledge.detectionRules.slice(0, 10), null, 2)).replace("{{agentName}}", agentName).replace("{{category}}", category).replace("{{domain}}", knowledge.domain);
725
+ const response = await client.messages.create({
726
+ model: "claude-sonnet-4-20250514",
727
+ max_tokens: 4e3,
728
+ system: COMPRESSION_PROMPTS.system,
729
+ messages: [{ role: "user", content: prompt }]
730
+ });
731
+ const firstContent = response.content[0];
732
+ const text = firstContent !== void 0 && firstContent.type === "text" ? firstContent.text : "";
733
+ try {
734
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
735
+ if (jsonMatch) {
736
+ return JSON.parse(jsonMatch[0]);
737
+ }
738
+ } catch (e) {
739
+ }
740
+ return {
741
+ systemPrompt: `You are an expert code reviewer specializing in ${category}. Review code based on best practices and patterns from "${agentName}".`,
742
+ analysisPrompt: `Review this code for issues related to ${category}:
743
+
744
+ \`\`\`{{language}}
745
+ {{code}}
746
+ \`\`\`
747
+
748
+ File: {{filePath}}`,
749
+ fixPrompt: `Fix this issue: {{issue}}
750
+
751
+ Code:
752
+ \`\`\`{{language}}
753
+ {{code}}
754
+ \`\`\`
755
+
756
+ File: {{filePath}}`
757
+ };
758
+ }
759
+
760
+ // src/ingest/agent-builder.ts
761
+ import { mkdir, writeFile, readFile as readFile2 } from "fs/promises";
762
+ import { join } from "path";
763
+ async function buildAgentFromDocument(options, verbose = true) {
764
+ const { filePath, agentName, category } = options;
765
+ const log = verbose ? console.error.bind(console) : () => {
766
+ };
767
+ try {
768
+ log("\u{1F4DA} Parsing document...");
769
+ const document = await parseDocument(filePath);
770
+ log(` \u251C\u2500 File type: ${document.metadata.fileType}`);
771
+ log(` \u251C\u2500 Words: ${document.metadata.wordCount.toLocaleString()}`);
772
+ log(` \u2514\u2500 Sections: ${document.sections.length}`);
773
+ log("\n\u{1F9E0} Compressing knowledge...");
774
+ const compressOptions = {
775
+ agentName,
776
+ verbose
777
+ };
778
+ if (category !== void 0) {
779
+ compressOptions.category = category;
780
+ }
781
+ const knowledge = await compressKnowledge(document, compressOptions);
782
+ log(` \u251C\u2500 Core concepts: ${knowledge.coreConcepts.length}`);
783
+ log(` \u251C\u2500 Best practices: ${knowledge.bestPractices.length}`);
784
+ log(` \u251C\u2500 Anti-patterns: ${knowledge.antiPatterns.length}`);
785
+ log(` \u2514\u2500 Detection rules: ${knowledge.detectionRules.length}`);
786
+ log("\n\u{1F4DD} Generating agent prompts...");
787
+ const client = new Anthropic2();
788
+ const prompts = await generateAgentPrompts(
789
+ client,
790
+ knowledge,
791
+ agentName,
792
+ category || knowledge.domain
793
+ );
794
+ log("\n\u{1F916} Building agent configuration...");
795
+ const agentConfig = buildAgentConfig(
796
+ document,
797
+ knowledge,
798
+ prompts,
799
+ options
800
+ );
801
+ const configPath = await saveAgentConfig(agentConfig);
802
+ log(` \u2514\u2500 Saved to: ${configPath}`);
803
+ return {
804
+ success: true,
805
+ agentName: agentConfig.name,
806
+ configPath,
807
+ stats: {
808
+ documentWords: document.metadata.wordCount,
809
+ conceptsExtracted: knowledge.coreConcepts.length,
810
+ patternsGenerated: knowledge.detectionRules.length,
811
+ compressionRatio: knowledge.sourceDocument.compressionRatio
812
+ }
813
+ };
814
+ } catch (error) {
815
+ const errorMessage = error instanceof Error ? error.message : String(error);
816
+ log(`
817
+ \u274C Error: ${errorMessage}`);
818
+ return {
819
+ success: false,
820
+ agentName,
821
+ configPath: "",
822
+ stats: {
823
+ documentWords: 0,
824
+ conceptsExtracted: 0,
825
+ patternsGenerated: 0,
826
+ compressionRatio: 0
827
+ },
828
+ error: errorMessage
829
+ };
830
+ }
831
+ }
832
+ function buildAgentConfig(document, knowledge, prompts, options) {
833
+ const { agentName, displayName, description, category } = options;
834
+ const docTitle = document.metadata.title;
835
+ const activationRules = buildActivationRules(knowledge);
836
+ return {
837
+ name: sanitizeAgentName(agentName),
838
+ displayName: displayName || formatDisplayName(agentName),
839
+ description: description || `Code review agent based on "${document.metadata.title || agentName}"`,
840
+ version: "1.0.0",
841
+ category: category || knowledge.domain,
842
+ source: {
843
+ type: "document",
844
+ originalFile: document.metadata.originalPath,
845
+ fileType: document.metadata.fileType,
846
+ compressedAt: (/* @__PURE__ */ new Date()).toISOString(),
847
+ ...docTitle !== void 0 && { documentTitle: docTitle }
848
+ },
849
+ systemPrompt: prompts.systemPrompt,
850
+ analysisPrompt: prompts.analysisPrompt,
851
+ fixPrompt: prompts.fixPrompt,
852
+ activationRules,
853
+ patterns: knowledge.detectionRules,
854
+ knowledge
855
+ };
856
+ }
857
+ function buildActivationRules(knowledge) {
858
+ const domainRules = {
859
+ technical: {
860
+ filePatterns: ["*.ts", "*.tsx", "*.js", "*.jsx", "*.py", "*.go", "*.rs"],
861
+ contextSignals: ["touchesUI", "touchesAPI"],
862
+ priority: 2
863
+ },
864
+ legal: {
865
+ filePatterns: ["*"],
866
+ contextSignals: ["touchesUserData", "touchesAuth", "touchesPayments"],
867
+ priority: 2
868
+ },
869
+ policy: {
870
+ filePatterns: ["*"],
871
+ contextSignals: ["touchesAuth", "touchesAPI", "touchesDatabase"],
872
+ priority: 3
873
+ },
874
+ security: {
875
+ filePatterns: ["*"],
876
+ contextSignals: ["touchesAuth", "touchesCrypto", "touchesAPI", "touchesDatabase"],
877
+ priority: 1
878
+ },
879
+ architecture: {
880
+ filePatterns: ["*.ts", "*.tsx", "*.js", "*.jsx", "*.py", "*.go"],
881
+ contextSignals: ["touchesAPI", "touchesDatabase"],
882
+ priority: 2
883
+ },
884
+ general: {
885
+ filePatterns: ["*"],
886
+ contextSignals: [],
887
+ priority: 3
888
+ }
889
+ };
890
+ const domainDefaults = domainRules[knowledge.domain] ?? domainRules.general;
891
+ const contentPatterns = [];
892
+ for (const rule of knowledge.detectionRules) {
893
+ if (rule.patterns.keywords) {
894
+ contentPatterns.push(...rule.patterns.keywords.slice(0, 3));
895
+ }
896
+ }
897
+ for (const concept of knowledge.coreConcepts.slice(0, 5)) {
898
+ if (concept.keywords) {
899
+ contentPatterns.push(...concept.keywords.slice(0, 2));
900
+ }
901
+ }
902
+ const uniquePatterns = [...new Set(contentPatterns)].slice(0, 20);
903
+ return {
904
+ filePatterns: domainDefaults.filePatterns ?? ["*"],
905
+ contentPatterns: uniquePatterns,
906
+ contextSignals: domainDefaults.contextSignals ?? [],
907
+ minConfidence: 0.3,
908
+ priority: domainDefaults.priority ?? 2
909
+ };
910
+ }
911
+ async function saveAgentConfig(config) {
912
+ const trieDir = join(process.cwd(), ".trie", "agents");
913
+ await mkdir(trieDir, { recursive: true });
914
+ const configPath = join(trieDir, `${config.name}.json`);
915
+ await writeFile(configPath, JSON.stringify(config, null, 2));
916
+ return configPath;
917
+ }
918
+ async function loadAgentConfig(name) {
919
+ try {
920
+ const configPath = join(process.cwd(), ".trie", "agents", `${name}.json`);
921
+ const content = await readFile2(configPath, "utf-8");
922
+ return JSON.parse(content);
923
+ } catch {
924
+ return null;
925
+ }
926
+ }
927
+ async function listCustomAgents() {
928
+ try {
929
+ const { readdir } = await import("fs/promises");
930
+ const trieDir = join(process.cwd(), ".trie", "agents");
931
+ const files = await readdir(trieDir);
932
+ return files.filter((f) => f.endsWith(".json")).map((f) => f.replace(".json", ""));
933
+ } catch {
934
+ return [];
935
+ }
936
+ }
937
+ function sanitizeAgentName(name) {
938
+ return name.toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
939
+ }
940
+ function formatDisplayName(name) {
941
+ return name.split(/[-_]/).map((word) => word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
942
+ }
943
+
944
+ export {
945
+ parseDocument,
946
+ buildAgentFromDocument,
947
+ loadAgentConfig,
948
+ listCustomAgents
949
+ };
950
+ //# sourceMappingURL=chunk-EYNAGEQK.js.map