@afterxleep/doc-bot 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -15
- package/package.json +1 -1
- package/src/index.js +36 -0
- package/src/services/DocumentIndex.js +388 -0
- package/src/services/InferenceEngine.js +43 -2
- package/src/services/__tests__/DocumentIndex.test.js +807 -0
- package/src/services/__tests__/InferenceEngine.integration.test.js +300 -0
package/README.md
CHANGED
|
@@ -9,9 +9,10 @@ A generic MCP (Model Context Protocol) server that provides intelligent document
|
|
|
9
9
|
|
|
10
10
|
doc-bot is an intelligent documentation server that:
|
|
11
11
|
- 🔍 **Searches** your project documentation instantly
|
|
12
|
-
- 🧠 **
|
|
12
|
+
- 🧠 **Auto-indexes** content for smart inference (no manual keyword mapping!)
|
|
13
13
|
- 📋 **Applies** global rules to every AI interaction
|
|
14
14
|
- 🎯 **Suggests** contextual documentation based on file patterns
|
|
15
|
+
- 🤖 **Detects** code patterns, frameworks, and keywords automatically
|
|
15
16
|
- 🔄 **Updates** automatically when docs change
|
|
16
17
|
|
|
17
18
|
## Installation
|
|
@@ -66,6 +67,58 @@ your-project/
|
|
|
66
67
|
- **Guides** (`guides/`): Step-by-step instructions for specific tasks
|
|
67
68
|
- **Reference** (`reference/`): Quick lookups and troubleshooting
|
|
68
69
|
|
|
70
|
+
### Example documentation files:
|
|
71
|
+
|
|
72
|
+
**Global Rule Example** (`doc-bot/core/coding-standards.md`):
|
|
73
|
+
```markdown
|
|
74
|
+
---
|
|
75
|
+
title: "Coding Standards"
|
|
76
|
+
description: "Core coding standards that apply to all code"
|
|
77
|
+
keywords: ["code-quality", "standards", "best-practices"]
|
|
78
|
+
tags: ["core", "quality"]
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
# Coding Standards
|
|
82
|
+
|
|
83
|
+
- Use 2 spaces for indentation
|
|
84
|
+
- Maximum line length: 100 characters
|
|
85
|
+
- Always use const/let, never var
|
|
86
|
+
- Prefer async/await over promises
|
|
87
|
+
- Write descriptive variable names
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Contextual Rule Example** (`doc-bot/guides/testing.md`):
|
|
91
|
+
```markdown
|
|
92
|
+
---
|
|
93
|
+
title: "Testing Guide"
|
|
94
|
+
description: "How to write and run tests"
|
|
95
|
+
keywords: ["testing", "jest", "tdd", "unit-tests"]
|
|
96
|
+
tags: ["testing", "quality"]
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
# Testing Guide
|
|
100
|
+
|
|
101
|
+
All test files should:
|
|
102
|
+
- Use describe/it blocks for organization
|
|
103
|
+
- Include both positive and negative test cases
|
|
104
|
+
- Mock external dependencies
|
|
105
|
+
- Aim for 80%+ code coverage
|
|
106
|
+
|
|
107
|
+
Run tests with: `npm test`
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**👀 See `examples/` folder for complete example files with proper frontmatter and content structure.**
|
|
111
|
+
|
|
112
|
+
## Rule Enforcement
|
|
113
|
+
|
|
114
|
+
Doc-bot ensures your rules are **always considered** through MCP system prompt injection:
|
|
115
|
+
|
|
116
|
+
- **Global Rules**: Automatically injected into the agent's system prompt as critical requirements
|
|
117
|
+
- **Contextual Rules**: Applied when working with matching files/patterns
|
|
118
|
+
- **Automatic Compliance**: Agent must check rules before generating any code
|
|
119
|
+
|
|
120
|
+
The `docs://system-prompt` resource delivers your global rules directly to the agent's context, making rule violations impossible to ignore.
|
|
121
|
+
|
|
69
122
|
## The manifest file
|
|
70
123
|
|
|
71
124
|
The `doc-bot/manifest.json` file controls how your documentation works:
|
|
@@ -84,18 +137,6 @@ The `doc-bot/manifest.json` file controls how your documentation works:
|
|
|
84
137
|
"*.spec.js": ["guides/testing.md"],
|
|
85
138
|
"src/components/*": ["guides/react-components.md"],
|
|
86
139
|
"src/api/*": ["guides/api-development.md"]
|
|
87
|
-
},
|
|
88
|
-
"inference": {
|
|
89
|
-
"keywords": {
|
|
90
|
-
"testing": ["guides/testing.md"],
|
|
91
|
-
"deployment": ["guides/deployment.md"],
|
|
92
|
-
"api": ["guides/api-development.md"]
|
|
93
|
-
},
|
|
94
|
-
"patterns": {
|
|
95
|
-
"describe(": ["guides/testing.md"],
|
|
96
|
-
"it(": ["guides/testing.md"],
|
|
97
|
-
"fetch(": ["guides/api-development.md"]
|
|
98
|
-
}
|
|
99
140
|
}
|
|
100
141
|
}
|
|
101
142
|
```
|
|
@@ -104,8 +145,38 @@ The `doc-bot/manifest.json` file controls how your documentation works:
|
|
|
104
145
|
|
|
105
146
|
- **`globalRules`**: Documents that apply to every AI interaction
|
|
106
147
|
- **`contextualRules`**: Documents triggered by specific file patterns (e.g., test files → testing guide)
|
|
107
|
-
|
|
108
|
-
|
|
148
|
+
|
|
149
|
+
### 🎯 Automatic Inference (New!)
|
|
150
|
+
|
|
151
|
+
doc-bot automatically analyzes your documentation content to build smart indexes. No more manual keyword mappings! It automatically:
|
|
152
|
+
|
|
153
|
+
- **Extracts keywords** from document metadata (frontmatter)
|
|
154
|
+
- **Detects technical terms** in your documentation content
|
|
155
|
+
- **Recognizes code patterns** in code blocks (React hooks, SQL commands, etc.)
|
|
156
|
+
- **Identifies frameworks** mentioned in your docs
|
|
157
|
+
- **Indexes file extensions** referenced in documentation
|
|
158
|
+
|
|
159
|
+
Just write good documentation with descriptive frontmatter, and doc-bot handles the rest!
|
|
160
|
+
|
|
161
|
+
### Writing documentation for best results
|
|
162
|
+
|
|
163
|
+
To maximize the automatic inference capabilities, include frontmatter in your markdown files:
|
|
164
|
+
|
|
165
|
+
```markdown
|
|
166
|
+
---
|
|
167
|
+
title: "React Component Guidelines"
|
|
168
|
+
description: "Best practices for building React components"
|
|
169
|
+
keywords: ["react", "components", "hooks", "jsx"]
|
|
170
|
+
tags: ["frontend", "development"]
|
|
171
|
+
category: "guides"
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
# React Component Guidelines
|
|
175
|
+
|
|
176
|
+
Your documentation content here...
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
The automatic indexing will use this metadata along with analyzing your content to provide intelligent suggestions.
|
|
109
180
|
|
|
110
181
|
## Development setup
|
|
111
182
|
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -68,6 +68,12 @@ class DocsServer {
|
|
|
68
68
|
name: 'Documentation Manifest',
|
|
69
69
|
description: 'Project documentation configuration',
|
|
70
70
|
mimeType: 'application/json'
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
uri: 'docs://system-prompt',
|
|
74
|
+
name: 'System Prompt Injection',
|
|
75
|
+
description: 'Critical rules that must be considered before generating any code',
|
|
76
|
+
mimeType: 'text/plain'
|
|
71
77
|
}
|
|
72
78
|
]
|
|
73
79
|
};
|
|
@@ -107,6 +113,16 @@ class DocsServer {
|
|
|
107
113
|
text: JSON.stringify(manifest, null, 2)
|
|
108
114
|
}]
|
|
109
115
|
};
|
|
116
|
+
|
|
117
|
+
case 'docs://system-prompt':
|
|
118
|
+
const systemPrompt = await this.generateSystemPrompt();
|
|
119
|
+
return {
|
|
120
|
+
contents: [{
|
|
121
|
+
uri,
|
|
122
|
+
mimeType: 'text/plain',
|
|
123
|
+
text: systemPrompt
|
|
124
|
+
}]
|
|
125
|
+
};
|
|
110
126
|
|
|
111
127
|
default:
|
|
112
128
|
throw new Error(`Unknown resource: ${uri}`);
|
|
@@ -351,6 +367,26 @@ class DocsServer {
|
|
|
351
367
|
|
|
352
368
|
return output;
|
|
353
369
|
}
|
|
370
|
+
|
|
371
|
+
async generateSystemPrompt() {
|
|
372
|
+
const globalRules = await this.docService.getGlobalRules();
|
|
373
|
+
|
|
374
|
+
if (!globalRules || globalRules.length === 0) {
|
|
375
|
+
return 'No global documentation rules defined.';
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
let prompt = '# CRITICAL: Project Documentation Rules\n\n';
|
|
379
|
+
prompt += 'IMPORTANT: You MUST follow these rules before generating ANY code:\n\n';
|
|
380
|
+
|
|
381
|
+
globalRules.forEach((rule, index) => {
|
|
382
|
+
prompt += `## Rule ${index + 1}: ${rule.metadata?.title || rule.fileName}\n`;
|
|
383
|
+
prompt += `${rule.content}\n\n`;
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
prompt += '⚠️ VIOLATION OF THESE RULES IS NOT ACCEPTABLE. Always check compliance before responding.\n';
|
|
387
|
+
|
|
388
|
+
return prompt;
|
|
389
|
+
}
|
|
354
390
|
|
|
355
391
|
async start() {
|
|
356
392
|
// Initialize services
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
class DocumentIndex {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.keywordIndex = new Map();
|
|
4
|
+
this.topicIndex = new Map();
|
|
5
|
+
this.patternIndex = new Map();
|
|
6
|
+
this.extensionIndex = new Map();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async buildIndexes(documents) {
|
|
10
|
+
for (const document of documents) {
|
|
11
|
+
await this.indexDocument(document);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async indexDocument(document) {
|
|
16
|
+
if (!document) {
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Index keywords from metadata (if present)
|
|
21
|
+
if (document.metadata?.keywords) {
|
|
22
|
+
const keywords = Array.isArray(document.metadata.keywords)
|
|
23
|
+
? document.metadata.keywords
|
|
24
|
+
: [document.metadata.keywords];
|
|
25
|
+
|
|
26
|
+
for (const keyword of keywords) {
|
|
27
|
+
this.addToIndex(this.keywordIndex, keyword.toLowerCase(), document, 10);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Index topics from tags and category (if present)
|
|
32
|
+
if (document.metadata?.tags) {
|
|
33
|
+
const tags = Array.isArray(document.metadata.tags)
|
|
34
|
+
? document.metadata.tags
|
|
35
|
+
: [document.metadata.tags];
|
|
36
|
+
|
|
37
|
+
for (const tag of tags) {
|
|
38
|
+
this.addToIndex(this.topicIndex, tag.toLowerCase(), document, 5);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (document.metadata?.category) {
|
|
43
|
+
this.addToIndex(this.topicIndex, document.metadata.category.toLowerCase(), document, 5);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Index content keywords
|
|
47
|
+
if (document.content) {
|
|
48
|
+
await this.indexContentKeywords(document);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async indexContentKeywords(document) {
|
|
53
|
+
const content = document.content;
|
|
54
|
+
|
|
55
|
+
// Extract keywords from code blocks
|
|
56
|
+
this.extractCodeBlockKeywords(content, document);
|
|
57
|
+
|
|
58
|
+
// Extract keywords from headings
|
|
59
|
+
this.extractHeadingKeywords(content, document);
|
|
60
|
+
|
|
61
|
+
// Extract file extensions
|
|
62
|
+
this.extractFileExtensions(content, document);
|
|
63
|
+
|
|
64
|
+
// Extract framework and library names
|
|
65
|
+
this.extractFrameworkNames(content, document);
|
|
66
|
+
|
|
67
|
+
// Extract code patterns
|
|
68
|
+
this.extractCodePatterns(content, document);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
extractCodeBlockKeywords(content, document) {
|
|
72
|
+
// Match code blocks with language specifiers
|
|
73
|
+
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
|
|
74
|
+
let match;
|
|
75
|
+
|
|
76
|
+
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
77
|
+
const codeContent = match[2];
|
|
78
|
+
|
|
79
|
+
// Extract common library/framework names from code
|
|
80
|
+
const patterns = [
|
|
81
|
+
/require\(['"]([^'"]+)['"]\)/g,
|
|
82
|
+
/import\s+(\w+)/g,
|
|
83
|
+
/from\s+['"]([^'"]+)['"]/g,
|
|
84
|
+
/\b(express|mongoose|bodyParser|flask|sqlalchemy|react|vue|angular|django|fastapi|axios|lodash|moment|uuid)\b/gi
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
for (const pattern of patterns) {
|
|
88
|
+
let patternMatch;
|
|
89
|
+
while ((patternMatch = pattern.exec(codeContent)) !== null) {
|
|
90
|
+
const keyword = patternMatch[1]?.toLowerCase() || patternMatch[0]?.toLowerCase();
|
|
91
|
+
if (keyword && !this.isCommonWord(keyword)) {
|
|
92
|
+
this.addToIndex(this.keywordIndex, keyword, document, 3); // Lower score for content keywords
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
extractHeadingKeywords(content, document) {
|
|
100
|
+
// Extract from markdown headings
|
|
101
|
+
const headingRegex = /^#{1,6}\s+(.+)$/gm;
|
|
102
|
+
let match;
|
|
103
|
+
|
|
104
|
+
while ((match = headingRegex.exec(content)) !== null) {
|
|
105
|
+
const heading = match[1];
|
|
106
|
+
const words = heading.split(/\s+/);
|
|
107
|
+
|
|
108
|
+
for (const word of words) {
|
|
109
|
+
const cleanWord = word.toLowerCase().replace(/[^\w\-\/]/g, '');
|
|
110
|
+
if (cleanWord && !this.isCommonWord(cleanWord)) {
|
|
111
|
+
this.addToIndex(this.keywordIndex, cleanWord, document, 2); // Lower score for content keywords
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
extractFileExtensions(content, document) {
|
|
118
|
+
// Extract file extensions mentioned in content
|
|
119
|
+
const extensionRegex = /\*\.(\w+)\b/g;
|
|
120
|
+
let match;
|
|
121
|
+
|
|
122
|
+
while ((match = extensionRegex.exec(content)) !== null) {
|
|
123
|
+
const extension = match[1].toLowerCase();
|
|
124
|
+
this.addToIndex(this.extensionIndex, extension, document);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
extractFrameworkNames(content, document) {
|
|
129
|
+
// Common framework and technology names
|
|
130
|
+
const techPatterns = [
|
|
131
|
+
/\b(react|vue|angular|svelte|next\.js|nuxt\.js|gatsby)\b/gi,
|
|
132
|
+
/\b(node\.js|express|fastify|koa|nest\.js)\b/gi,
|
|
133
|
+
/\b(postgresql|mysql|mongodb|redis|elasticsearch)\b/gi,
|
|
134
|
+
/\b(docker|kubernetes|terraform|ansible)\b/gi,
|
|
135
|
+
/\b(aws|azure|gcp|heroku|vercel|netlify)\b/gi,
|
|
136
|
+
/\b(typescript|javascript|python|java|golang|rust)\b/gi
|
|
137
|
+
];
|
|
138
|
+
|
|
139
|
+
for (const pattern of techPatterns) {
|
|
140
|
+
let match;
|
|
141
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
142
|
+
const keyword = match[0].toLowerCase();
|
|
143
|
+
if (!this.isCommonWord(keyword)) {
|
|
144
|
+
this.addToIndex(this.keywordIndex, keyword, document, 2); // Lower score for content keywords
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
extractCodePatterns(content, document) {
|
|
151
|
+
// Match code blocks with language specifiers
|
|
152
|
+
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
|
|
153
|
+
let match;
|
|
154
|
+
|
|
155
|
+
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
156
|
+
const language = match[1]?.toLowerCase();
|
|
157
|
+
const codeContent = match[2];
|
|
158
|
+
|
|
159
|
+
// Define patterns for different languages
|
|
160
|
+
const patterns = this.getCodePatterns(language);
|
|
161
|
+
|
|
162
|
+
for (const pattern of patterns) {
|
|
163
|
+
let patternMatch;
|
|
164
|
+
while ((patternMatch = pattern.regex.exec(codeContent)) !== null) {
|
|
165
|
+
const patternKey = pattern.key || patternMatch[0];
|
|
166
|
+
this.addToIndex(this.patternIndex, patternKey, document, 6); // Medium-high score for patterns
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
getCodePatterns(language) {
|
|
173
|
+
const patterns = [];
|
|
174
|
+
|
|
175
|
+
// JavaScript/TypeScript patterns
|
|
176
|
+
if (!language || language === 'javascript' || language === 'js' || language === 'typescript' || language === 'ts') {
|
|
177
|
+
patterns.push(
|
|
178
|
+
{ regex: /\buseState\b/g, key: 'useState' },
|
|
179
|
+
{ regex: /\buseEffect\b/g, key: 'useEffect' },
|
|
180
|
+
{ regex: /\buseCallback\b/g, key: 'useCallback' },
|
|
181
|
+
{ regex: /\buseMemo\b/g, key: 'useMemo' },
|
|
182
|
+
{ regex: /\buseContext\b/g, key: 'useContext' },
|
|
183
|
+
{ regex: /\buseReducer\b/g, key: 'useReducer' },
|
|
184
|
+
{ regex: /app\.get\(/g, key: 'app.get' },
|
|
185
|
+
{ regex: /app\.post\(/g, key: 'app.post' },
|
|
186
|
+
{ regex: /app\.put\(/g, key: 'app.put' },
|
|
187
|
+
{ regex: /app\.delete\(/g, key: 'app.delete' },
|
|
188
|
+
{ regex: /describe\(/g, key: 'describe(' },
|
|
189
|
+
{ regex: /it\(/g, key: 'it(' },
|
|
190
|
+
{ regex: /test\(/g, key: 'test(' },
|
|
191
|
+
{ regex: /expect\(/g, key: 'expect(' },
|
|
192
|
+
{ regex: /async\s+function/g, key: 'async function' },
|
|
193
|
+
{ regex: /\.then\(/g, key: '.then(' },
|
|
194
|
+
{ regex: /\.catch\(/g, key: '.catch(' },
|
|
195
|
+
{ regex: /await\s+/g, key: 'await' }
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Python patterns
|
|
200
|
+
if (language === 'python' || language === 'py') {
|
|
201
|
+
patterns.push(
|
|
202
|
+
{ regex: /\bdef\s+/g, key: 'def ' },
|
|
203
|
+
{ regex: /\bclass\s+/g, key: 'class ' },
|
|
204
|
+
{ regex: /\b__init__\b/g, key: '__init__' },
|
|
205
|
+
{ regex: /\bif\s+__name__\s*==\s*['"]__main__['"]/g, key: 'if __name__' },
|
|
206
|
+
{ regex: /\bimport\s+/g, key: 'import ' },
|
|
207
|
+
{ regex: /\bfrom\s+\w+\s+import/g, key: 'from import' },
|
|
208
|
+
{ regex: /\btry:/g, key: 'try:' },
|
|
209
|
+
{ regex: /\bexcept\s+/g, key: 'except ' },
|
|
210
|
+
{ regex: /\bwith\s+/g, key: 'with ' },
|
|
211
|
+
{ regex: /@\w+/g, key: 'decorator' }
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// SQL patterns
|
|
216
|
+
if (language === 'sql') {
|
|
217
|
+
patterns.push(
|
|
218
|
+
{ regex: /\bSELECT\b/gi, key: 'SELECT' },
|
|
219
|
+
{ regex: /\bINSERT\s+INTO\b/gi, key: 'INSERT INTO' },
|
|
220
|
+
{ regex: /\bUPDATE\b/gi, key: 'UPDATE' },
|
|
221
|
+
{ regex: /\bDELETE\s+FROM\b/gi, key: 'DELETE FROM' },
|
|
222
|
+
{ regex: /\bCREATE\s+TABLE\b/gi, key: 'CREATE TABLE' },
|
|
223
|
+
{ regex: /\bALTER\s+TABLE\b/gi, key: 'ALTER TABLE' },
|
|
224
|
+
{ regex: /\bDROP\s+TABLE\b/gi, key: 'DROP TABLE' },
|
|
225
|
+
{ regex: /\bJOIN\b/gi, key: 'JOIN' },
|
|
226
|
+
{ regex: /\bLEFT\s+JOIN\b/gi, key: 'LEFT JOIN' },
|
|
227
|
+
{ regex: /\bINNER\s+JOIN\b/gi, key: 'INNER JOIN' }
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Java patterns
|
|
232
|
+
if (language === 'java') {
|
|
233
|
+
patterns.push(
|
|
234
|
+
{ regex: /\bpublic\s+class\b/g, key: 'public class' },
|
|
235
|
+
{ regex: /\bprivate\s+\w+/g, key: 'private' },
|
|
236
|
+
{ regex: /\bpublic\s+static\s+void\s+main/g, key: 'main method' },
|
|
237
|
+
{ regex: /@Override/g, key: '@Override' },
|
|
238
|
+
{ regex: /\bnew\s+\w+\(/g, key: 'new' }
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Docker patterns
|
|
243
|
+
if (language === 'dockerfile' || language === 'docker') {
|
|
244
|
+
patterns.push(
|
|
245
|
+
{ regex: /\bFROM\b/gi, key: 'FROM' },
|
|
246
|
+
{ regex: /\bRUN\b/gi, key: 'RUN' },
|
|
247
|
+
{ regex: /\bCOPY\b/gi, key: 'COPY' },
|
|
248
|
+
{ regex: /\bADD\b/gi, key: 'ADD' },
|
|
249
|
+
{ regex: /\bEXPOSE\b/gi, key: 'EXPOSE' },
|
|
250
|
+
{ regex: /\bCMD\b/gi, key: 'CMD' },
|
|
251
|
+
{ regex: /\bENTRYPOINT\b/gi, key: 'ENTRYPOINT' }
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return patterns;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
isCommonWord(word) {
|
|
259
|
+
const commonWords = new Set([
|
|
260
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
|
|
261
|
+
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
|
|
262
|
+
'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall',
|
|
263
|
+
'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they',
|
|
264
|
+
'me', 'him', 'her', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their',
|
|
265
|
+
'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'before',
|
|
266
|
+
'behind', 'below', 'beneath', 'beside', 'between', 'beyond', 'during', 'except',
|
|
267
|
+
'from', 'inside', 'into', 'near', 'outside', 'over', 'since', 'through', 'under',
|
|
268
|
+
'until', 'up', 'upon', 'within', 'without', 'how', 'what', 'when', 'where', 'why',
|
|
269
|
+
'who', 'which', 'whose', 'whom', 'very', 'so', 'too', 'quite', 'rather', 'such',
|
|
270
|
+
'guide', 'documentation', 'helps', 'developers', 'system', 'useful', 'explains',
|
|
271
|
+
'use', 'using', 'used', 'get', 'getting', 'set', 'setting', 'make', 'making',
|
|
272
|
+
'create', 'creating', 'build', 'building', 'run', 'running', 'start', 'starting'
|
|
273
|
+
]);
|
|
274
|
+
|
|
275
|
+
return commonWords.has(word.toLowerCase()) || word.length < 2;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
addToIndex(index, key, document, score = 1) {
|
|
279
|
+
if (!index.has(key)) {
|
|
280
|
+
index.set(key, []);
|
|
281
|
+
}
|
|
282
|
+
index.get(key).push({ document, score });
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
findRelevantDocs(context) {
|
|
286
|
+
if (!context || Object.keys(context).length === 0) {
|
|
287
|
+
return [];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const candidates = new Map();
|
|
291
|
+
|
|
292
|
+
// Search by query keywords
|
|
293
|
+
if (context.query) {
|
|
294
|
+
this.searchKeywords(context.query, candidates);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Search by code snippet patterns
|
|
298
|
+
if (context.codeSnippet) {
|
|
299
|
+
this.searchCodePatterns(context.codeSnippet, candidates);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Search by file extension
|
|
303
|
+
if (context.filePath) {
|
|
304
|
+
this.searchFileExtension(context.filePath, candidates);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return this.scoreAndRank(candidates);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
searchKeywords(query, candidates) {
|
|
311
|
+
const queryLower = query.toLowerCase();
|
|
312
|
+
const words = queryLower.split(/\s+/);
|
|
313
|
+
|
|
314
|
+
for (const word of words) {
|
|
315
|
+
// Search in keyword index
|
|
316
|
+
if (this.keywordIndex.has(word)) {
|
|
317
|
+
const entries = this.keywordIndex.get(word);
|
|
318
|
+
for (const entry of entries) {
|
|
319
|
+
this.addCandidate(candidates, entry.document, entry.score);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Search in topic index
|
|
324
|
+
if (this.topicIndex.has(word)) {
|
|
325
|
+
const entries = this.topicIndex.get(word);
|
|
326
|
+
for (const entry of entries) {
|
|
327
|
+
this.addCandidate(candidates, entry.document, entry.score);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
searchCodePatterns(codeSnippet, candidates) {
|
|
334
|
+
if (this.patternIndex.size > 0) {
|
|
335
|
+
// Search for patterns in the code snippet
|
|
336
|
+
for (const [pattern, entries] of this.patternIndex) {
|
|
337
|
+
// Check if the pattern exists in the code snippet
|
|
338
|
+
let found = false;
|
|
339
|
+
|
|
340
|
+
// For SQL patterns, do case-insensitive matching
|
|
341
|
+
if (pattern.toUpperCase() === pattern) {
|
|
342
|
+
found = codeSnippet.toUpperCase().includes(pattern);
|
|
343
|
+
} else {
|
|
344
|
+
found = codeSnippet.includes(pattern);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (found) {
|
|
348
|
+
for (const entry of entries) {
|
|
349
|
+
this.addCandidate(candidates, entry.document, 8); // High score for pattern match
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
searchFileExtension(filePath, candidates) {
|
|
357
|
+
// For now, implement basic extension matching
|
|
358
|
+
// This will be enhanced in later iterations
|
|
359
|
+
if (this.extensionIndex.size > 0) {
|
|
360
|
+
const extension = filePath.split('.').pop()?.toLowerCase();
|
|
361
|
+
if (extension && this.extensionIndex.has(extension)) {
|
|
362
|
+
const entries = this.extensionIndex.get(extension);
|
|
363
|
+
for (const entry of entries) {
|
|
364
|
+
this.addCandidate(candidates, entry.document, 3); // Lower score for extension match
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
addCandidate(candidates, document, score) {
|
|
371
|
+
const key = document.fileName || document.filePath;
|
|
372
|
+
if (!candidates.has(key)) {
|
|
373
|
+
candidates.set(key, { document, score: 0 });
|
|
374
|
+
}
|
|
375
|
+
candidates.get(key).score += score;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
scoreAndRank(candidates) {
|
|
379
|
+
const results = Array.from(candidates.values());
|
|
380
|
+
|
|
381
|
+
// Sort by score (descending)
|
|
382
|
+
results.sort((a, b) => b.score - a.score);
|
|
383
|
+
|
|
384
|
+
return results;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
module.exports = { DocumentIndex };
|
|
@@ -1,13 +1,28 @@
|
|
|
1
1
|
const path = require('path');
|
|
2
|
+
const { DocumentIndex } = require('./DocumentIndex');
|
|
2
3
|
|
|
3
4
|
class InferenceEngine {
|
|
4
5
|
constructor(documentationService, manifestLoader = null) {
|
|
5
6
|
this.docService = documentationService;
|
|
6
7
|
this.manifestLoader = manifestLoader;
|
|
8
|
+
this.documentIndex = new DocumentIndex();
|
|
9
|
+
this.isIndexBuilt = false;
|
|
7
10
|
}
|
|
8
11
|
|
|
9
12
|
async initialize() {
|
|
10
|
-
//
|
|
13
|
+
// Build the document index with all available documents
|
|
14
|
+
await this.buildDocumentIndex();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async buildDocumentIndex() {
|
|
18
|
+
try {
|
|
19
|
+
const documents = await this.docService.getAllDocuments();
|
|
20
|
+
await this.documentIndex.buildIndexes(documents);
|
|
21
|
+
this.isIndexBuilt = true;
|
|
22
|
+
} catch (error) {
|
|
23
|
+
console.error('Error building document index:', error);
|
|
24
|
+
this.isIndexBuilt = false;
|
|
25
|
+
}
|
|
11
26
|
}
|
|
12
27
|
|
|
13
28
|
async getRelevantDocumentation(context) {
|
|
@@ -48,6 +63,25 @@ class InferenceEngine {
|
|
|
48
63
|
}
|
|
49
64
|
|
|
50
65
|
async getInferredDocs(context) {
|
|
66
|
+
// Ensure the index is built
|
|
67
|
+
if (!this.isIndexBuilt) {
|
|
68
|
+
await this.buildDocumentIndex();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Use the smart DocumentIndex for inference
|
|
72
|
+
if (this.isIndexBuilt) {
|
|
73
|
+
const results = this.documentIndex.findRelevantDocs(context);
|
|
74
|
+
return results.map(result => ({
|
|
75
|
+
...result.document,
|
|
76
|
+
inferenceScore: result.score
|
|
77
|
+
}));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Fallback to legacy method if index building failed
|
|
81
|
+
return this.getLegacyInferredDocs(context);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async getLegacyInferredDocs(context) {
|
|
51
85
|
const docs = [];
|
|
52
86
|
|
|
53
87
|
// Keyword-based inference
|
|
@@ -181,7 +215,14 @@ class InferenceEngine {
|
|
|
181
215
|
}
|
|
182
216
|
|
|
183
217
|
if (inferredDocs.length > 0) {
|
|
184
|
-
|
|
218
|
+
// If using DocumentIndex, factor in inference scores
|
|
219
|
+
if (this.isIndexBuilt && inferredDocs.some(doc => doc.inferenceScore)) {
|
|
220
|
+
const avgScore = inferredDocs.reduce((sum, doc) => sum + (doc.inferenceScore || 0), 0) / inferredDocs.length;
|
|
221
|
+
const normalizedScore = Math.min(avgScore / 20, 1.0); // Normalize to 0-1
|
|
222
|
+
confidence += normalizedScore * 0.3;
|
|
223
|
+
} else {
|
|
224
|
+
confidence += Math.min(inferredDocs.length * 0.05, 0.2);
|
|
225
|
+
}
|
|
185
226
|
}
|
|
186
227
|
|
|
187
228
|
// Reduce confidence if no matches found
|