@afterxleep/doc-bot 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -27
- package/bin/doc-bot.js +10 -10
- package/package.json +1 -1
- package/src/services/DocumentIndex.js +388 -0
- package/src/services/InferenceEngine.js +43 -2
- package/src/services/__tests__/DocumentIndex.test.js +807 -0
- package/src/services/__tests__/InferenceEngine.integration.test.js +300 -0
package/README.md
CHANGED
|
@@ -3,36 +3,39 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@afterxleep/doc-bot)
|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
|
|
6
|
-
A generic MCP (Model Context Protocol) server that provides intelligent documentation access for any project. Works with
|
|
6
|
+
A generic MCP (Model Context Protocol) server that provides intelligent documentation access for any project. Works with any MCP-compatible AI tools and IDEs.
|
|
7
7
|
|
|
8
8
|
## What is doc-bot?
|
|
9
9
|
|
|
10
10
|
doc-bot is an intelligent documentation server that:
|
|
11
11
|
- 🔍 **Searches** your project documentation instantly
|
|
12
|
-
- 🧠 **
|
|
12
|
+
- 🧠 **Auto-indexes** content for smart inference (no manual keyword mapping!)
|
|
13
13
|
- 📋 **Applies** global rules to every AI interaction
|
|
14
14
|
- 🎯 **Suggests** contextual documentation based on file patterns
|
|
15
|
+
- 🤖 **Detects** code patterns, frameworks, and keywords automatically
|
|
15
16
|
- 🔄 **Updates** automatically when docs change
|
|
16
17
|
|
|
17
18
|
## Installation
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
1. **Create your documentation folder** in your project root (see organization section below)
|
|
20
21
|
|
|
21
|
-
|
|
22
|
+
2. **Add doc-bot to your MCP-compatible AI tool configuration**:
|
|
23
|
+
|
|
24
|
+
**For Claude Code** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
22
25
|
```json
|
|
23
26
|
{
|
|
24
27
|
"mcpServers": {
|
|
25
28
|
"docs": {
|
|
26
29
|
"command": "npx",
|
|
27
|
-
"args": ["@afterxleep/doc-bot"]
|
|
30
|
+
"args": ["@afterxleep/doc-bot", "--docs", "./doc-bot"]
|
|
28
31
|
}
|
|
29
32
|
}
|
|
30
33
|
}
|
|
31
34
|
```
|
|
32
35
|
|
|
33
|
-
|
|
36
|
+
**For Cursor or other MCP tools**: Add similar configuration pointing to your documentation folder
|
|
34
37
|
|
|
35
|
-
3. **
|
|
38
|
+
3. **Restart your AI tool**
|
|
36
39
|
|
|
37
40
|
## How to organize your documentation
|
|
38
41
|
|
|
@@ -53,9 +56,9 @@ your-project/
|
|
|
53
56
|
└── package.json
|
|
54
57
|
```
|
|
55
58
|
|
|
56
|
-
**Note:** You can use
|
|
57
|
-
```
|
|
58
|
-
|
|
59
|
+
**Note:** You can use any folder name - just specify it in your MCP configuration:
|
|
60
|
+
```json
|
|
61
|
+
"args": ["@afterxleep/doc-bot", "--docs", "./my-custom-docs"]
|
|
59
62
|
```
|
|
60
63
|
|
|
61
64
|
### Documentation types:
|
|
@@ -64,6 +67,48 @@ npx @afterxleep/doc-bot --docs ./my-custom-docs
|
|
|
64
67
|
- **Guides** (`guides/`): Step-by-step instructions for specific tasks
|
|
65
68
|
- **Reference** (`reference/`): Quick lookups and troubleshooting
|
|
66
69
|
|
|
70
|
+
### Example documentation files:
|
|
71
|
+
|
|
72
|
+
**Global Rule Example** (`doc-bot/core/coding-standards.md`):
|
|
73
|
+
```markdown
|
|
74
|
+
---
|
|
75
|
+
title: "Coding Standards"
|
|
76
|
+
description: "Core coding standards that apply to all code"
|
|
77
|
+
keywords: ["code-quality", "standards", "best-practices"]
|
|
78
|
+
tags: ["core", "quality"]
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
# Coding Standards
|
|
82
|
+
|
|
83
|
+
- Use 2 spaces for indentation
|
|
84
|
+
- Maximum line length: 100 characters
|
|
85
|
+
- Always use const/let, never var
|
|
86
|
+
- Prefer async/await over promises
|
|
87
|
+
- Write descriptive variable names
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Contextual Rule Example** (`doc-bot/guides/testing.md`):
|
|
91
|
+
```markdown
|
|
92
|
+
---
|
|
93
|
+
title: "Testing Guide"
|
|
94
|
+
description: "How to write and run tests"
|
|
95
|
+
keywords: ["testing", "jest", "tdd", "unit-tests"]
|
|
96
|
+
tags: ["testing", "quality"]
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
# Testing Guide
|
|
100
|
+
|
|
101
|
+
All test files should:
|
|
102
|
+
- Use describe/it blocks for organization
|
|
103
|
+
- Include both positive and negative test cases
|
|
104
|
+
- Mock external dependencies
|
|
105
|
+
- Aim for 80%+ code coverage
|
|
106
|
+
|
|
107
|
+
Run tests with: `npm test`
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**👀 See `examples/` folder for complete example files with proper frontmatter and content structure.**
|
|
111
|
+
|
|
67
112
|
## The manifest file
|
|
68
113
|
|
|
69
114
|
The `doc-bot/manifest.json` file controls how your documentation works:
|
|
@@ -82,18 +127,6 @@ The `doc-bot/manifest.json` file controls how your documentation works:
|
|
|
82
127
|
"*.spec.js": ["guides/testing.md"],
|
|
83
128
|
"src/components/*": ["guides/react-components.md"],
|
|
84
129
|
"src/api/*": ["guides/api-development.md"]
|
|
85
|
-
},
|
|
86
|
-
"inference": {
|
|
87
|
-
"keywords": {
|
|
88
|
-
"testing": ["guides/testing.md"],
|
|
89
|
-
"deployment": ["guides/deployment.md"],
|
|
90
|
-
"api": ["guides/api-development.md"]
|
|
91
|
-
},
|
|
92
|
-
"patterns": {
|
|
93
|
-
"describe(": ["guides/testing.md"],
|
|
94
|
-
"it(": ["guides/testing.md"],
|
|
95
|
-
"fetch(": ["guides/api-development.md"]
|
|
96
|
-
}
|
|
97
130
|
}
|
|
98
131
|
}
|
|
99
132
|
```
|
|
@@ -102,8 +135,38 @@ The `doc-bot/manifest.json` file controls how your documentation works:
|
|
|
102
135
|
|
|
103
136
|
- **`globalRules`**: Documents that apply to every AI interaction
|
|
104
137
|
- **`contextualRules`**: Documents triggered by specific file patterns (e.g., test files → testing guide)
|
|
105
|
-
|
|
106
|
-
|
|
138
|
+
|
|
139
|
+
### 🎯 Automatic Inference (New!)
|
|
140
|
+
|
|
141
|
+
doc-bot automatically analyzes your documentation content to build smart indexes. No more manual keyword mappings! It automatically:
|
|
142
|
+
|
|
143
|
+
- **Extracts keywords** from document metadata (frontmatter)
|
|
144
|
+
- **Detects technical terms** in your documentation content
|
|
145
|
+
- **Recognizes code patterns** in code blocks (React hooks, SQL commands, etc.)
|
|
146
|
+
- **Identifies frameworks** mentioned in your docs
|
|
147
|
+
- **Indexes file extensions** referenced in documentation
|
|
148
|
+
|
|
149
|
+
Just write good documentation with descriptive frontmatter, and doc-bot handles the rest!
|
|
150
|
+
|
|
151
|
+
### Writing documentation for best results
|
|
152
|
+
|
|
153
|
+
To maximize the automatic inference capabilities, include frontmatter in your markdown files:
|
|
154
|
+
|
|
155
|
+
```markdown
|
|
156
|
+
---
|
|
157
|
+
title: "React Component Guidelines"
|
|
158
|
+
description: "Best practices for building React components"
|
|
159
|
+
keywords: ["react", "components", "hooks", "jsx"]
|
|
160
|
+
tags: ["frontend", "development"]
|
|
161
|
+
category: "guides"
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
# React Component Guidelines
|
|
165
|
+
|
|
166
|
+
Your documentation content here...
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
The automatic indexing will use this metadata along with analyzing your content to provide intelligent suggestions.
|
|
107
170
|
|
|
108
171
|
## Development setup
|
|
109
172
|
|
|
@@ -137,7 +200,7 @@ The `doc-bot/manifest.json` file controls how your documentation works:
|
|
|
137
200
|
|
|
138
201
|
### Testing your setup
|
|
139
202
|
|
|
140
|
-
Ask
|
|
203
|
+
Ask your AI assistant something like "What documentation is available?" to test that doc-bot is working.
|
|
141
204
|
|
|
142
205
|
### CLI Options
|
|
143
206
|
|
|
@@ -145,9 +208,9 @@ Ask Claude something like "What documentation is available?" to test that doc-bo
|
|
|
145
208
|
doc-bot [options]
|
|
146
209
|
|
|
147
210
|
Options:
|
|
211
|
+
-d, --docs <path> Path to docs folder (required)
|
|
212
|
+
-c, --config <path> Path to manifest file (default: <docs-path>/manifest.json)
|
|
148
213
|
-p, --port <port> Port to run server on (default: 3000)
|
|
149
|
-
-d, --docs <path> Path to docs folder (default: ./doc-bot)
|
|
150
|
-
-c, --config <path> Path to manifest file (default: ./doc-bot/manifest.json)
|
|
151
214
|
-v, --verbose Enable verbose logging
|
|
152
215
|
-w, --watch Watch for file changes
|
|
153
216
|
-h, --help Show help
|
package/bin/doc-bot.js
CHANGED
|
@@ -8,10 +8,10 @@ const { DocsServer } = require('../src/index.js');
|
|
|
8
8
|
program
|
|
9
9
|
.name('doc-bot')
|
|
10
10
|
.description('Generic MCP server for intelligent documentation access')
|
|
11
|
-
.version('1.0.
|
|
11
|
+
.version('1.0.2')
|
|
12
12
|
.option('-p, --port <port>', 'Port to run server on', '3000')
|
|
13
|
-
.
|
|
14
|
-
.option('-c, --config <path>', 'Path to manifest file'
|
|
13
|
+
.requiredOption('-d, --docs <path>', 'Path to docs folder')
|
|
14
|
+
.option('-c, --config <path>', 'Path to manifest file')
|
|
15
15
|
.option('-v, --verbose', 'Enable verbose logging')
|
|
16
16
|
.option('-w, --watch', 'Watch for file changes')
|
|
17
17
|
.parse();
|
|
@@ -20,19 +20,19 @@ const options = program.opts();
|
|
|
20
20
|
|
|
21
21
|
async function main() {
|
|
22
22
|
const docsPath = path.resolve(options.docs);
|
|
23
|
-
const configPath = path.resolve(options.config);
|
|
23
|
+
const configPath = options.config ? path.resolve(options.config) : path.resolve(options.docs, 'manifest.json');
|
|
24
24
|
|
|
25
|
-
// Check if
|
|
25
|
+
// Check if documentation folder exists
|
|
26
26
|
if (!await fs.pathExists(docsPath)) {
|
|
27
27
|
console.error(`❌ Documentation folder not found: ${docsPath}`);
|
|
28
28
|
console.log('');
|
|
29
|
-
console.log('📖 To get started, create
|
|
29
|
+
console.log('📖 To get started, create your documentation folder:');
|
|
30
30
|
console.log('');
|
|
31
|
-
console.log(
|
|
32
|
-
console.log(
|
|
33
|
-
console.log(
|
|
31
|
+
console.log(` mkdir ${path.basename(docsPath)}`);
|
|
32
|
+
console.log(` echo '{"name": "My Project Documentation", "globalRules": []}' > ${path.basename(docsPath)}/manifest.json`);
|
|
33
|
+
console.log(` echo "# Getting Started" > ${path.basename(docsPath)}/README.md`);
|
|
34
34
|
console.log('');
|
|
35
|
-
console.log('Then
|
|
35
|
+
console.log('Then configure your MCP client to use this folder.');
|
|
36
36
|
process.exit(1);
|
|
37
37
|
}
|
|
38
38
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
class DocumentIndex {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.keywordIndex = new Map();
|
|
4
|
+
this.topicIndex = new Map();
|
|
5
|
+
this.patternIndex = new Map();
|
|
6
|
+
this.extensionIndex = new Map();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async buildIndexes(documents) {
|
|
10
|
+
for (const document of documents) {
|
|
11
|
+
await this.indexDocument(document);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async indexDocument(document) {
|
|
16
|
+
if (!document) {
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Index keywords from metadata (if present)
|
|
21
|
+
if (document.metadata?.keywords) {
|
|
22
|
+
const keywords = Array.isArray(document.metadata.keywords)
|
|
23
|
+
? document.metadata.keywords
|
|
24
|
+
: [document.metadata.keywords];
|
|
25
|
+
|
|
26
|
+
for (const keyword of keywords) {
|
|
27
|
+
this.addToIndex(this.keywordIndex, keyword.toLowerCase(), document, 10);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Index topics from tags and category (if present)
|
|
32
|
+
if (document.metadata?.tags) {
|
|
33
|
+
const tags = Array.isArray(document.metadata.tags)
|
|
34
|
+
? document.metadata.tags
|
|
35
|
+
: [document.metadata.tags];
|
|
36
|
+
|
|
37
|
+
for (const tag of tags) {
|
|
38
|
+
this.addToIndex(this.topicIndex, tag.toLowerCase(), document, 5);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (document.metadata?.category) {
|
|
43
|
+
this.addToIndex(this.topicIndex, document.metadata.category.toLowerCase(), document, 5);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Index content keywords
|
|
47
|
+
if (document.content) {
|
|
48
|
+
await this.indexContentKeywords(document);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async indexContentKeywords(document) {
|
|
53
|
+
const content = document.content;
|
|
54
|
+
|
|
55
|
+
// Extract keywords from code blocks
|
|
56
|
+
this.extractCodeBlockKeywords(content, document);
|
|
57
|
+
|
|
58
|
+
// Extract keywords from headings
|
|
59
|
+
this.extractHeadingKeywords(content, document);
|
|
60
|
+
|
|
61
|
+
// Extract file extensions
|
|
62
|
+
this.extractFileExtensions(content, document);
|
|
63
|
+
|
|
64
|
+
// Extract framework and library names
|
|
65
|
+
this.extractFrameworkNames(content, document);
|
|
66
|
+
|
|
67
|
+
// Extract code patterns
|
|
68
|
+
this.extractCodePatterns(content, document);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
extractCodeBlockKeywords(content, document) {
|
|
72
|
+
// Match code blocks with language specifiers
|
|
73
|
+
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
|
|
74
|
+
let match;
|
|
75
|
+
|
|
76
|
+
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
77
|
+
const codeContent = match[2];
|
|
78
|
+
|
|
79
|
+
// Extract common library/framework names from code
|
|
80
|
+
const patterns = [
|
|
81
|
+
/require\(['"]([^'"]+)['"]\)/g,
|
|
82
|
+
/import\s+(\w+)/g,
|
|
83
|
+
/from\s+['"]([^'"]+)['"]/g,
|
|
84
|
+
/\b(express|mongoose|bodyParser|flask|sqlalchemy|react|vue|angular|django|fastapi|axios|lodash|moment|uuid)\b/gi
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
for (const pattern of patterns) {
|
|
88
|
+
let patternMatch;
|
|
89
|
+
while ((patternMatch = pattern.exec(codeContent)) !== null) {
|
|
90
|
+
const keyword = patternMatch[1]?.toLowerCase() || patternMatch[0]?.toLowerCase();
|
|
91
|
+
if (keyword && !this.isCommonWord(keyword)) {
|
|
92
|
+
this.addToIndex(this.keywordIndex, keyword, document, 3); // Lower score for content keywords
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
extractHeadingKeywords(content, document) {
|
|
100
|
+
// Extract from markdown headings
|
|
101
|
+
const headingRegex = /^#{1,6}\s+(.+)$/gm;
|
|
102
|
+
let match;
|
|
103
|
+
|
|
104
|
+
while ((match = headingRegex.exec(content)) !== null) {
|
|
105
|
+
const heading = match[1];
|
|
106
|
+
const words = heading.split(/\s+/);
|
|
107
|
+
|
|
108
|
+
for (const word of words) {
|
|
109
|
+
const cleanWord = word.toLowerCase().replace(/[^\w\-\/]/g, '');
|
|
110
|
+
if (cleanWord && !this.isCommonWord(cleanWord)) {
|
|
111
|
+
this.addToIndex(this.keywordIndex, cleanWord, document, 2); // Lower score for content keywords
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
extractFileExtensions(content, document) {
|
|
118
|
+
// Extract file extensions mentioned in content
|
|
119
|
+
const extensionRegex = /\*\.(\w+)\b/g;
|
|
120
|
+
let match;
|
|
121
|
+
|
|
122
|
+
while ((match = extensionRegex.exec(content)) !== null) {
|
|
123
|
+
const extension = match[1].toLowerCase();
|
|
124
|
+
this.addToIndex(this.extensionIndex, extension, document);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
extractFrameworkNames(content, document) {
|
|
129
|
+
// Common framework and technology names
|
|
130
|
+
const techPatterns = [
|
|
131
|
+
/\b(react|vue|angular|svelte|next\.js|nuxt\.js|gatsby)\b/gi,
|
|
132
|
+
/\b(node\.js|express|fastify|koa|nest\.js)\b/gi,
|
|
133
|
+
/\b(postgresql|mysql|mongodb|redis|elasticsearch)\b/gi,
|
|
134
|
+
/\b(docker|kubernetes|terraform|ansible)\b/gi,
|
|
135
|
+
/\b(aws|azure|gcp|heroku|vercel|netlify)\b/gi,
|
|
136
|
+
/\b(typescript|javascript|python|java|golang|rust)\b/gi
|
|
137
|
+
];
|
|
138
|
+
|
|
139
|
+
for (const pattern of techPatterns) {
|
|
140
|
+
let match;
|
|
141
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
142
|
+
const keyword = match[0].toLowerCase();
|
|
143
|
+
if (!this.isCommonWord(keyword)) {
|
|
144
|
+
this.addToIndex(this.keywordIndex, keyword, document, 2); // Lower score for content keywords
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
extractCodePatterns(content, document) {
|
|
151
|
+
// Match code blocks with language specifiers
|
|
152
|
+
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)```/g;
|
|
153
|
+
let match;
|
|
154
|
+
|
|
155
|
+
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
156
|
+
const language = match[1]?.toLowerCase();
|
|
157
|
+
const codeContent = match[2];
|
|
158
|
+
|
|
159
|
+
// Define patterns for different languages
|
|
160
|
+
const patterns = this.getCodePatterns(language);
|
|
161
|
+
|
|
162
|
+
for (const pattern of patterns) {
|
|
163
|
+
let patternMatch;
|
|
164
|
+
while ((patternMatch = pattern.regex.exec(codeContent)) !== null) {
|
|
165
|
+
const patternKey = pattern.key || patternMatch[0];
|
|
166
|
+
this.addToIndex(this.patternIndex, patternKey, document, 6); // Medium-high score for patterns
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
getCodePatterns(language) {
|
|
173
|
+
const patterns = [];
|
|
174
|
+
|
|
175
|
+
// JavaScript/TypeScript patterns
|
|
176
|
+
if (!language || language === 'javascript' || language === 'js' || language === 'typescript' || language === 'ts') {
|
|
177
|
+
patterns.push(
|
|
178
|
+
{ regex: /\buseState\b/g, key: 'useState' },
|
|
179
|
+
{ regex: /\buseEffect\b/g, key: 'useEffect' },
|
|
180
|
+
{ regex: /\buseCallback\b/g, key: 'useCallback' },
|
|
181
|
+
{ regex: /\buseMemo\b/g, key: 'useMemo' },
|
|
182
|
+
{ regex: /\buseContext\b/g, key: 'useContext' },
|
|
183
|
+
{ regex: /\buseReducer\b/g, key: 'useReducer' },
|
|
184
|
+
{ regex: /app\.get\(/g, key: 'app.get' },
|
|
185
|
+
{ regex: /app\.post\(/g, key: 'app.post' },
|
|
186
|
+
{ regex: /app\.put\(/g, key: 'app.put' },
|
|
187
|
+
{ regex: /app\.delete\(/g, key: 'app.delete' },
|
|
188
|
+
{ regex: /describe\(/g, key: 'describe(' },
|
|
189
|
+
{ regex: /it\(/g, key: 'it(' },
|
|
190
|
+
{ regex: /test\(/g, key: 'test(' },
|
|
191
|
+
{ regex: /expect\(/g, key: 'expect(' },
|
|
192
|
+
{ regex: /async\s+function/g, key: 'async function' },
|
|
193
|
+
{ regex: /\.then\(/g, key: '.then(' },
|
|
194
|
+
{ regex: /\.catch\(/g, key: '.catch(' },
|
|
195
|
+
{ regex: /await\s+/g, key: 'await' }
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Python patterns
|
|
200
|
+
if (language === 'python' || language === 'py') {
|
|
201
|
+
patterns.push(
|
|
202
|
+
{ regex: /\bdef\s+/g, key: 'def ' },
|
|
203
|
+
{ regex: /\bclass\s+/g, key: 'class ' },
|
|
204
|
+
{ regex: /\b__init__\b/g, key: '__init__' },
|
|
205
|
+
{ regex: /\bif\s+__name__\s*==\s*['"]__main__['"]/g, key: 'if __name__' },
|
|
206
|
+
{ regex: /\bimport\s+/g, key: 'import ' },
|
|
207
|
+
{ regex: /\bfrom\s+\w+\s+import/g, key: 'from import' },
|
|
208
|
+
{ regex: /\btry:/g, key: 'try:' },
|
|
209
|
+
{ regex: /\bexcept\s+/g, key: 'except ' },
|
|
210
|
+
{ regex: /\bwith\s+/g, key: 'with ' },
|
|
211
|
+
{ regex: /@\w+/g, key: 'decorator' }
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// SQL patterns
|
|
216
|
+
if (language === 'sql') {
|
|
217
|
+
patterns.push(
|
|
218
|
+
{ regex: /\bSELECT\b/gi, key: 'SELECT' },
|
|
219
|
+
{ regex: /\bINSERT\s+INTO\b/gi, key: 'INSERT INTO' },
|
|
220
|
+
{ regex: /\bUPDATE\b/gi, key: 'UPDATE' },
|
|
221
|
+
{ regex: /\bDELETE\s+FROM\b/gi, key: 'DELETE FROM' },
|
|
222
|
+
{ regex: /\bCREATE\s+TABLE\b/gi, key: 'CREATE TABLE' },
|
|
223
|
+
{ regex: /\bALTER\s+TABLE\b/gi, key: 'ALTER TABLE' },
|
|
224
|
+
{ regex: /\bDROP\s+TABLE\b/gi, key: 'DROP TABLE' },
|
|
225
|
+
{ regex: /\bJOIN\b/gi, key: 'JOIN' },
|
|
226
|
+
{ regex: /\bLEFT\s+JOIN\b/gi, key: 'LEFT JOIN' },
|
|
227
|
+
{ regex: /\bINNER\s+JOIN\b/gi, key: 'INNER JOIN' }
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Java patterns
|
|
232
|
+
if (language === 'java') {
|
|
233
|
+
patterns.push(
|
|
234
|
+
{ regex: /\bpublic\s+class\b/g, key: 'public class' },
|
|
235
|
+
{ regex: /\bprivate\s+\w+/g, key: 'private' },
|
|
236
|
+
{ regex: /\bpublic\s+static\s+void\s+main/g, key: 'main method' },
|
|
237
|
+
{ regex: /@Override/g, key: '@Override' },
|
|
238
|
+
{ regex: /\bnew\s+\w+\(/g, key: 'new' }
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Docker patterns
|
|
243
|
+
if (language === 'dockerfile' || language === 'docker') {
|
|
244
|
+
patterns.push(
|
|
245
|
+
{ regex: /\bFROM\b/gi, key: 'FROM' },
|
|
246
|
+
{ regex: /\bRUN\b/gi, key: 'RUN' },
|
|
247
|
+
{ regex: /\bCOPY\b/gi, key: 'COPY' },
|
|
248
|
+
{ regex: /\bADD\b/gi, key: 'ADD' },
|
|
249
|
+
{ regex: /\bEXPOSE\b/gi, key: 'EXPOSE' },
|
|
250
|
+
{ regex: /\bCMD\b/gi, key: 'CMD' },
|
|
251
|
+
{ regex: /\bENTRYPOINT\b/gi, key: 'ENTRYPOINT' }
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return patterns;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
isCommonWord(word) {
|
|
259
|
+
const commonWords = new Set([
|
|
260
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
|
|
261
|
+
'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
|
|
262
|
+
'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall',
|
|
263
|
+
'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they',
|
|
264
|
+
'me', 'him', 'her', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their',
|
|
265
|
+
'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'before',
|
|
266
|
+
'behind', 'below', 'beneath', 'beside', 'between', 'beyond', 'during', 'except',
|
|
267
|
+
'from', 'inside', 'into', 'near', 'outside', 'over', 'since', 'through', 'under',
|
|
268
|
+
'until', 'up', 'upon', 'within', 'without', 'how', 'what', 'when', 'where', 'why',
|
|
269
|
+
'who', 'which', 'whose', 'whom', 'very', 'so', 'too', 'quite', 'rather', 'such',
|
|
270
|
+
'guide', 'documentation', 'helps', 'developers', 'system', 'useful', 'explains',
|
|
271
|
+
'use', 'using', 'used', 'get', 'getting', 'set', 'setting', 'make', 'making',
|
|
272
|
+
'create', 'creating', 'build', 'building', 'run', 'running', 'start', 'starting'
|
|
273
|
+
]);
|
|
274
|
+
|
|
275
|
+
return commonWords.has(word.toLowerCase()) || word.length < 2;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
addToIndex(index, key, document, score = 1) {
|
|
279
|
+
if (!index.has(key)) {
|
|
280
|
+
index.set(key, []);
|
|
281
|
+
}
|
|
282
|
+
index.get(key).push({ document, score });
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
findRelevantDocs(context) {
|
|
286
|
+
if (!context || Object.keys(context).length === 0) {
|
|
287
|
+
return [];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const candidates = new Map();
|
|
291
|
+
|
|
292
|
+
// Search by query keywords
|
|
293
|
+
if (context.query) {
|
|
294
|
+
this.searchKeywords(context.query, candidates);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Search by code snippet patterns
|
|
298
|
+
if (context.codeSnippet) {
|
|
299
|
+
this.searchCodePatterns(context.codeSnippet, candidates);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Search by file extension
|
|
303
|
+
if (context.filePath) {
|
|
304
|
+
this.searchFileExtension(context.filePath, candidates);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return this.scoreAndRank(candidates);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
searchKeywords(query, candidates) {
|
|
311
|
+
const queryLower = query.toLowerCase();
|
|
312
|
+
const words = queryLower.split(/\s+/);
|
|
313
|
+
|
|
314
|
+
for (const word of words) {
|
|
315
|
+
// Search in keyword index
|
|
316
|
+
if (this.keywordIndex.has(word)) {
|
|
317
|
+
const entries = this.keywordIndex.get(word);
|
|
318
|
+
for (const entry of entries) {
|
|
319
|
+
this.addCandidate(candidates, entry.document, entry.score);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Search in topic index
|
|
324
|
+
if (this.topicIndex.has(word)) {
|
|
325
|
+
const entries = this.topicIndex.get(word);
|
|
326
|
+
for (const entry of entries) {
|
|
327
|
+
this.addCandidate(candidates, entry.document, entry.score);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
searchCodePatterns(codeSnippet, candidates) {
|
|
334
|
+
if (this.patternIndex.size > 0) {
|
|
335
|
+
// Search for patterns in the code snippet
|
|
336
|
+
for (const [pattern, entries] of this.patternIndex) {
|
|
337
|
+
// Check if the pattern exists in the code snippet
|
|
338
|
+
let found = false;
|
|
339
|
+
|
|
340
|
+
// For SQL patterns, do case-insensitive matching
|
|
341
|
+
if (pattern.toUpperCase() === pattern) {
|
|
342
|
+
found = codeSnippet.toUpperCase().includes(pattern);
|
|
343
|
+
} else {
|
|
344
|
+
found = codeSnippet.includes(pattern);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (found) {
|
|
348
|
+
for (const entry of entries) {
|
|
349
|
+
this.addCandidate(candidates, entry.document, 8); // High score for pattern match
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
searchFileExtension(filePath, candidates) {
|
|
357
|
+
// For now, implement basic extension matching
|
|
358
|
+
// This will be enhanced in later iterations
|
|
359
|
+
if (this.extensionIndex.size > 0) {
|
|
360
|
+
const extension = filePath.split('.').pop()?.toLowerCase();
|
|
361
|
+
if (extension && this.extensionIndex.has(extension)) {
|
|
362
|
+
const entries = this.extensionIndex.get(extension);
|
|
363
|
+
for (const entry of entries) {
|
|
364
|
+
this.addCandidate(candidates, entry.document, 3); // Lower score for extension match
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
addCandidate(candidates, document, score) {
|
|
371
|
+
const key = document.fileName || document.filePath;
|
|
372
|
+
if (!candidates.has(key)) {
|
|
373
|
+
candidates.set(key, { document, score: 0 });
|
|
374
|
+
}
|
|
375
|
+
candidates.get(key).score += score;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
scoreAndRank(candidates) {
|
|
379
|
+
const results = Array.from(candidates.values());
|
|
380
|
+
|
|
381
|
+
// Sort by score (descending)
|
|
382
|
+
results.sort((a, b) => b.score - a.score);
|
|
383
|
+
|
|
384
|
+
return results;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
module.exports = { DocumentIndex };
|