@hubblecommerce/overmind-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/dist/src/config/app.config.js +52 -0
- package/dist/src/embeddings/embeddings.interface.js +1 -0
- package/dist/src/embeddings/transformers.provider.js +19 -0
- package/dist/src/integrations/confluence/confluence.client.js +230 -0
- package/dist/src/integrations/confluence/confluence.types.js +1 -0
- package/dist/src/integrations/gitlab/gitlab.client.js +15 -0
- package/dist/src/integrations/gitlab/gitlab.types.js +1 -0
- package/dist/src/integrations/jira/jira.client.js +150 -0
- package/dist/src/llm/anthropic-retry-wrapper.js +110 -0
- package/dist/src/llm/anthropic.provider.js +11 -0
- package/dist/src/processors/confluence-document.processor.js +162 -0
- package/dist/src/processors/confluence-html-parser.js +220 -0
- package/dist/src/tools/confluence-search.tool.js +152 -0
- package/dist/src/tools/get-current-date.tool.js +18 -0
- package/dist/src/vectorstore/postgres.provider.js +95 -0
- package/dist/src/vectorstore/vectorstore.interface.js +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { parseConfluenceHtml } from './confluence-html-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Split markdown content by H2 headings (## Section)
|
|
4
|
+
* Each H2 section becomes one chunk
|
|
5
|
+
*/
|
|
6
|
+
export function splitMarkdownBySections(markdown) {
|
|
7
|
+
const chunks = [];
|
|
8
|
+
// Split by H2 headings (## )
|
|
9
|
+
const h2Pattern = /^## (.+)$/gm;
|
|
10
|
+
const matches = [...markdown.matchAll(h2Pattern)];
|
|
11
|
+
if (matches.length === 0) {
|
|
12
|
+
// No H2 headings - treat entire content as single chunk
|
|
13
|
+
return [{
|
|
14
|
+
sectionTitle: undefined,
|
|
15
|
+
content: markdown.trim(),
|
|
16
|
+
sectionIndex: 0,
|
|
17
|
+
}];
|
|
18
|
+
}
|
|
19
|
+
// Process each section
|
|
20
|
+
for (let i = 0; i < matches.length; i++) {
|
|
21
|
+
const match = matches[i];
|
|
22
|
+
const sectionTitle = match[1].trim();
|
|
23
|
+
const startIndex = match.index ?? 0;
|
|
24
|
+
const endIndex = i < matches.length - 1 ? (matches[i + 1].index ?? markdown.length) : markdown.length;
|
|
25
|
+
const sectionContent = markdown.substring(startIndex, endIndex).trim();
|
|
26
|
+
chunks.push({
|
|
27
|
+
sectionTitle,
|
|
28
|
+
content: sectionContent,
|
|
29
|
+
sectionIndex: i,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
return chunks;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Extract metadata from content
|
|
36
|
+
*/
|
|
37
|
+
function analyzeContent(content) {
|
|
38
|
+
const wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
39
|
+
const hasCodeBlocks = /```/.test(content);
|
|
40
|
+
const hasTables = /\|.*\|/.test(content);
|
|
41
|
+
const hasLinks = /\[.+\]\(.+\)/.test(content);
|
|
42
|
+
return {
|
|
43
|
+
word_count: wordCount,
|
|
44
|
+
has_code_blocks: hasCodeBlocks,
|
|
45
|
+
has_tables: hasTables,
|
|
46
|
+
has_links: hasLinks,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Extract metadata from labels
|
|
51
|
+
* Categorizes labels into specific fields and collects the rest as keywords
|
|
52
|
+
*/
|
|
53
|
+
function extractMetadataFromLabels(labels) {
|
|
54
|
+
const contentTypeLabels = ['project-info', 'faq', 'guide', 'decision', 'template'];
|
|
55
|
+
const categoryLabels = ['active', 'maintenance', 'legacy', 'archived'];
|
|
56
|
+
const contentType = labels.find(label => contentTypeLabels.includes(label));
|
|
57
|
+
const customer = labels.find(label => label.startsWith('customer-'));
|
|
58
|
+
const category = labels.find(label => categoryLabels.includes(label));
|
|
59
|
+
// Keywords: all labels that weren't categorized
|
|
60
|
+
const categorizedLabels = new Set([
|
|
61
|
+
contentType,
|
|
62
|
+
customer,
|
|
63
|
+
category,
|
|
64
|
+
].filter(Boolean));
|
|
65
|
+
const keywords = labels.filter(label => !categorizedLabels.has(label));
|
|
66
|
+
return {
|
|
67
|
+
content_type: contentType,
|
|
68
|
+
customer,
|
|
69
|
+
category,
|
|
70
|
+
keywords,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Process a Confluence page into section-based chunks
|
|
75
|
+
* One chunk per H2 section
|
|
76
|
+
*/
|
|
77
|
+
export function processConfluencePage(page, spaceKey, baseUrl, labels = []) {
|
|
78
|
+
// Extract HTML content
|
|
79
|
+
const htmlContent = page.body?.storage?.value;
|
|
80
|
+
if (!htmlContent) {
|
|
81
|
+
console.warn(`Page ${page.id} (${page.title}) has no content, skipping`);
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
// Parse HTML to Markdown
|
|
85
|
+
const markdown = parseConfluenceHtml(htmlContent);
|
|
86
|
+
if (!markdown || markdown.trim().length === 0) {
|
|
87
|
+
console.warn(`Page ${page.id} (${page.title}) resulted in empty markdown after parsing, skipping`);
|
|
88
|
+
return [];
|
|
89
|
+
}
|
|
90
|
+
// Split by H2 sections
|
|
91
|
+
const sections = splitMarkdownBySections(markdown);
|
|
92
|
+
// Construct page URL
|
|
93
|
+
const pageUrl = `${baseUrl.replace(/\/$/, '')}/wiki${page._links.webui}`;
|
|
94
|
+
// Generate document ID base
|
|
95
|
+
const documentIdBase = `confluence:${spaceKey}:${page.id}`;
|
|
96
|
+
// Extract metadata from labels
|
|
97
|
+
const labelMetadata = extractMetadataFromLabels(labels);
|
|
98
|
+
// Get current timestamp
|
|
99
|
+
const indexedAt = new Date().toISOString();
|
|
100
|
+
// Create chunks
|
|
101
|
+
const chunks = sections.map((section) => {
|
|
102
|
+
// Analyze content
|
|
103
|
+
const contentAnalysis = analyzeContent(section.content);
|
|
104
|
+
// Determine chunk type
|
|
105
|
+
const chunkType = section.sectionTitle ? 'section' : 'full_page';
|
|
106
|
+
// Create document ID
|
|
107
|
+
const documentId = `${documentIdBase}_section_${section.sectionIndex}`;
|
|
108
|
+
const metadata = {
|
|
109
|
+
// === Source Info ===
|
|
110
|
+
source: 'confluence',
|
|
111
|
+
page_id: page.id,
|
|
112
|
+
page_title: page.title,
|
|
113
|
+
page_url: pageUrl,
|
|
114
|
+
space_key: spaceKey,
|
|
115
|
+
// === Content Structure ===
|
|
116
|
+
section_title: section.sectionTitle,
|
|
117
|
+
section_index: section.sectionIndex,
|
|
118
|
+
chunk_type: chunkType,
|
|
119
|
+
// === Labels ===
|
|
120
|
+
labels,
|
|
121
|
+
// === Extracted Metadata ===
|
|
122
|
+
...labelMetadata,
|
|
123
|
+
// === Timestamps ===
|
|
124
|
+
last_updated: page.version.createdAt,
|
|
125
|
+
page_version: page.version.number,
|
|
126
|
+
indexed_at: indexedAt,
|
|
127
|
+
// === Author Info ===
|
|
128
|
+
last_updated_by: page.version.authorId,
|
|
129
|
+
page_creator: page.authorId,
|
|
130
|
+
// === Quality Indicators ===
|
|
131
|
+
...contentAnalysis,
|
|
132
|
+
// === Parent Context ===
|
|
133
|
+
parent_page_id: page.parentId,
|
|
134
|
+
// === Standard fields ===
|
|
135
|
+
document_id: documentId,
|
|
136
|
+
document_type: 'confluence_page',
|
|
137
|
+
};
|
|
138
|
+
return {
|
|
139
|
+
pageContent: section.content,
|
|
140
|
+
metadata,
|
|
141
|
+
};
|
|
142
|
+
});
|
|
143
|
+
return chunks;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Process multiple Confluence pages in batch
|
|
147
|
+
*/
|
|
148
|
+
export function processConfluencePages(pages, spaceKey, baseUrl, labelsMap = new Map()) {
|
|
149
|
+
const allChunks = [];
|
|
150
|
+
for (const page of pages) {
|
|
151
|
+
try {
|
|
152
|
+
const labels = labelsMap.get(page.id) || [];
|
|
153
|
+
const pageChunks = processConfluencePage(page, spaceKey, baseUrl, labels);
|
|
154
|
+
allChunks.push(...pageChunks);
|
|
155
|
+
}
|
|
156
|
+
catch (error) {
|
|
157
|
+
console.error(`Error processing page ${page.id} (${page.title}):`, error);
|
|
158
|
+
// Continue with other pages
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return allChunks;
|
|
162
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
/**
|
|
3
|
+
* Parse Confluence storage format HTML to clean Markdown
|
|
4
|
+
* Uses Turndown with custom rules for Confluence-specific elements
|
|
5
|
+
*/
|
|
6
|
+
export function parseConfluenceHtml(html) {
|
|
7
|
+
const turndownService = new TurndownService({
|
|
8
|
+
headingStyle: 'atx', // Use # for headings
|
|
9
|
+
hr: '---',
|
|
10
|
+
bulletListMarker: '-',
|
|
11
|
+
codeBlockStyle: 'fenced',
|
|
12
|
+
emDelimiter: '*',
|
|
13
|
+
strongDelimiter: '**',
|
|
14
|
+
});
|
|
15
|
+
// Add custom rules for Confluence-specific elements
|
|
16
|
+
addConfluenceRules(turndownService);
|
|
17
|
+
// Convert HTML to Markdown
|
|
18
|
+
const markdown = turndownService.turndown(html);
|
|
19
|
+
// Clean up extra whitespace
|
|
20
|
+
return markdown
|
|
21
|
+
.replace(/\n{3,}/g, '\n\n') // Max 2 consecutive newlines
|
|
22
|
+
.replace(/[ \t]+$/gm, '') // Remove trailing spaces
|
|
23
|
+
.trim();
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Add custom Turndown rules for Confluence storage format elements
|
|
27
|
+
*/
|
|
28
|
+
function addConfluenceRules(turndownService) {
|
|
29
|
+
// Confluence Code Macro
|
|
30
|
+
turndownService.addRule('confluenceCodeMacro', {
|
|
31
|
+
filter: function (node) {
|
|
32
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
33
|
+
node.getAttribute('ac:name') === 'code');
|
|
34
|
+
},
|
|
35
|
+
replacement: function (content, node) {
|
|
36
|
+
// Extract language parameter
|
|
37
|
+
const langParam = node.querySelector('ac\\:parameter[ac\\:name="language"]');
|
|
38
|
+
const language = langParam?.textContent || '';
|
|
39
|
+
// Extract code content
|
|
40
|
+
const codeBody = node.querySelector('ac\\:plain-text-body');
|
|
41
|
+
const code = codeBody?.textContent || '';
|
|
42
|
+
return '\n```' + language + '\n' + code + '\n```\n';
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
// Confluence Info/Warning/Note/Tip Panels
|
|
46
|
+
turndownService.addRule('confluenceInfoPanels', {
|
|
47
|
+
filter: function (node) {
|
|
48
|
+
if (node.nodeName !== 'AC:STRUCTURED-MACRO')
|
|
49
|
+
return false;
|
|
50
|
+
const macroName = node.getAttribute('ac:name');
|
|
51
|
+
return ['info', 'note', 'tip', 'warning', 'panel'].includes(macroName || '');
|
|
52
|
+
},
|
|
53
|
+
replacement: function (content, node) {
|
|
54
|
+
const element = node;
|
|
55
|
+
const macroName = element.getAttribute('ac:name') || 'info';
|
|
56
|
+
// Extract content from rich-text-body or plain-text-body
|
|
57
|
+
const richTextBody = element.querySelector('ac\\:rich-text-body');
|
|
58
|
+
const plainTextBody = element.querySelector('ac\\:plain-text-body');
|
|
59
|
+
const bodyContent = richTextBody?.textContent || plainTextBody?.textContent || content;
|
|
60
|
+
// Format as blockquote with label
|
|
61
|
+
const label = macroName.toUpperCase();
|
|
62
|
+
return '\n> **' + label + '**: ' + bodyContent.trim() + '\n\n';
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
// Confluence TOC (Table of Contents) Macro
|
|
66
|
+
turndownService.addRule('confluenceTocMacro', {
|
|
67
|
+
filter: function (node) {
|
|
68
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
69
|
+
node.getAttribute('ac:name') === 'toc');
|
|
70
|
+
},
|
|
71
|
+
replacement: function () {
|
|
72
|
+
return '\n**Table of Contents**\n\n';
|
|
73
|
+
},
|
|
74
|
+
});
|
|
75
|
+
// Confluence Excerpt Macro
|
|
76
|
+
turndownService.addRule('confluenceExcerptMacro', {
|
|
77
|
+
filter: function (node) {
|
|
78
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
79
|
+
node.getAttribute('ac:name') === 'excerpt');
|
|
80
|
+
},
|
|
81
|
+
replacement: function (content, node) {
|
|
82
|
+
const richTextBody = node.querySelector('ac\\:rich-text-body');
|
|
83
|
+
return richTextBody?.textContent || content;
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
// Confluence Expand Macro (collapsible sections)
|
|
87
|
+
turndownService.addRule('confluenceExpandMacro', {
|
|
88
|
+
filter: function (node) {
|
|
89
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
90
|
+
node.getAttribute('ac:name') === 'expand');
|
|
91
|
+
},
|
|
92
|
+
replacement: function (content, node) {
|
|
93
|
+
const titleParam = node.querySelector('ac\\:parameter[ac\\:name="title"]');
|
|
94
|
+
const title = titleParam?.textContent || 'Details';
|
|
95
|
+
const richTextBody = node.querySelector('ac\\:rich-text-body');
|
|
96
|
+
const bodyContent = richTextBody?.textContent || content;
|
|
97
|
+
return '\n**' + title + '**\n\n' + bodyContent + '\n\n';
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
// Confluence Quote Macro
|
|
101
|
+
turndownService.addRule('confluenceQuoteMacro', {
|
|
102
|
+
filter: function (node) {
|
|
103
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
104
|
+
node.getAttribute('ac:name') === 'quote');
|
|
105
|
+
},
|
|
106
|
+
replacement: function (content, node) {
|
|
107
|
+
const richTextBody = node.querySelector('ac\\:rich-text-body');
|
|
108
|
+
const bodyContent = richTextBody?.textContent || content;
|
|
109
|
+
return '\n> ' + bodyContent.trim().split('\n').join('\n> ') + '\n\n';
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
// Confluence Rich Text Body (nested content container)
|
|
113
|
+
turndownService.addRule('confluenceRichTextBody', {
|
|
114
|
+
filter: function (node) {
|
|
115
|
+
return node.nodeName === 'AC:RICH-TEXT-BODY';
|
|
116
|
+
},
|
|
117
|
+
replacement: function (content) {
|
|
118
|
+
return content;
|
|
119
|
+
},
|
|
120
|
+
});
|
|
121
|
+
// Confluence Plain Text Body (nested content container)
|
|
122
|
+
turndownService.addRule('confluencePlainTextBody', {
|
|
123
|
+
filter: function (node) {
|
|
124
|
+
return node.nodeName === 'AC:PLAIN-TEXT-BODY';
|
|
125
|
+
},
|
|
126
|
+
replacement: function (content) {
|
|
127
|
+
return content;
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
// Confluence Links (internal page links)
|
|
131
|
+
turndownService.addRule('confluenceLinks', {
|
|
132
|
+
filter: function (node) {
|
|
133
|
+
return node.nodeName === 'AC:LINK';
|
|
134
|
+
},
|
|
135
|
+
replacement: function (content, node) {
|
|
136
|
+
// Try to extract link target
|
|
137
|
+
const element = node;
|
|
138
|
+
const pageRef = element.querySelector('ri\\:page');
|
|
139
|
+
const urlRef = element.querySelector('ri\\:url');
|
|
140
|
+
const linkBody = element.querySelector('ac\\:link-body, ac\\:plain-text-link-body');
|
|
141
|
+
const linkText = linkBody?.textContent || content || 'link';
|
|
142
|
+
const href = pageRef?.getAttribute('ri:content-title') ||
|
|
143
|
+
urlRef?.getAttribute('ri:value') ||
|
|
144
|
+
element.getAttribute('ac:anchor') || '';
|
|
145
|
+
if (href) {
|
|
146
|
+
return '[' + linkText + '](' + href + ')';
|
|
147
|
+
}
|
|
148
|
+
return linkText;
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
// Confluence Status Macro (status labels)
|
|
152
|
+
turndownService.addRule('confluenceStatusMacro', {
|
|
153
|
+
filter: function (node) {
|
|
154
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
155
|
+
node.getAttribute('ac:name') === 'status');
|
|
156
|
+
},
|
|
157
|
+
replacement: function (content, node) {
|
|
158
|
+
const titleParam = node.querySelector('ac\\:parameter[ac\\:name="title"]');
|
|
159
|
+
const title = titleParam?.textContent || 'Status';
|
|
160
|
+
const colorParam = node.querySelector('ac\\:parameter[ac\\:name="colour"]');
|
|
161
|
+
const color = colorParam?.textContent || '';
|
|
162
|
+
return '**[' + title + ']**' + (color ? ' (' + color + ')' : '');
|
|
163
|
+
},
|
|
164
|
+
});
|
|
165
|
+
// Confluence Anchor (page anchors/bookmarks)
|
|
166
|
+
turndownService.addRule('confluenceAnchor', {
|
|
167
|
+
filter: function (node) {
|
|
168
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
169
|
+
node.getAttribute('ac:name') === 'anchor');
|
|
170
|
+
},
|
|
171
|
+
replacement: function (content, node) {
|
|
172
|
+
const anchorParam = node.querySelector('ac\\:parameter');
|
|
173
|
+
const anchorName = anchorParam?.textContent || '';
|
|
174
|
+
return anchorName ? '<a name="' + anchorName + '"></a>' : '';
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
// Confluence JIRA Macro (embedded JIRA issues)
|
|
178
|
+
turndownService.addRule('confluenceJiraMacro', {
|
|
179
|
+
filter: function (node) {
|
|
180
|
+
return (node.nodeName === 'AC:STRUCTURED-MACRO' &&
|
|
181
|
+
node.getAttribute('ac:name') === 'jira');
|
|
182
|
+
},
|
|
183
|
+
replacement: function (content, node) {
|
|
184
|
+
// Extract JIRA parameters
|
|
185
|
+
const element = node;
|
|
186
|
+
const serverParam = element.querySelector('ac\\:parameter[ac\\:name="server"]');
|
|
187
|
+
const keyParam = element.querySelector('ac\\:parameter[ac\\:name="key"]');
|
|
188
|
+
const jiraKey = keyParam?.textContent || '';
|
|
189
|
+
const serverName = serverParam?.textContent || '';
|
|
190
|
+
if (jiraKey) {
|
|
191
|
+
return `\n**JIRA Issue:** ${jiraKey}${serverName ? ` (${serverName})` : ''}\n\n`;
|
|
192
|
+
}
|
|
193
|
+
return '';
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
// Generic handler for unknown Confluence macros
|
|
197
|
+
// Only catches macros that haven't been handled by specific rules above
|
|
198
|
+
turndownService.addRule('confluenceUnknownMacros', {
|
|
199
|
+
filter: function (node) {
|
|
200
|
+
if (node.nodeName !== 'AC:STRUCTURED-MACRO')
|
|
201
|
+
return false;
|
|
202
|
+
const macroName = node.getAttribute('ac:name');
|
|
203
|
+
// List of known/handled macros - skip these
|
|
204
|
+
const knownMacros = [
|
|
205
|
+
'code', 'info', 'note', 'tip', 'warning', 'panel',
|
|
206
|
+
'toc', 'excerpt', 'expand', 'quote', 'status', 'anchor', 'jira'
|
|
207
|
+
];
|
|
208
|
+
return !knownMacros.includes(macroName || '');
|
|
209
|
+
},
|
|
210
|
+
replacement: function (content, node) {
|
|
211
|
+
const element = node;
|
|
212
|
+
const macroName = element.getAttribute('ac:name') || 'unknown';
|
|
213
|
+
console.warn(`Unknown Confluence macro: ${macroName}`);
|
|
214
|
+
// Try to extract any text content from the macro
|
|
215
|
+
const richTextBody = element.querySelector('ac\\:rich-text-body');
|
|
216
|
+
const plainTextBody = element.querySelector('ac\\:plain-text-body');
|
|
217
|
+
return richTextBody?.textContent || plainTextBody?.textContent || content || '';
|
|
218
|
+
},
|
|
219
|
+
});
|
|
220
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { tool } from '@langchain/core/tools';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
/**
|
|
4
|
+
* Create Confluence agent tool for searching the knowledge base
|
|
5
|
+
* Supports semantic search with optional metadata filtering
|
|
6
|
+
*/
|
|
7
|
+
export function createConfluenceAgentTool(vectorStore) {
|
|
8
|
+
const schema = z.object({
|
|
9
|
+
query: z.string().describe("Semantic search query for Confluence knowledge base"),
|
|
10
|
+
filters: z.object({
|
|
11
|
+
keywords: z.array(z.string()).optional()
|
|
12
|
+
.describe("Filter by Confluence labels (e.g., ['shopware', 'deployment'])"),
|
|
13
|
+
content_type: z.enum(['project-info', 'faq', 'guide', 'decision', 'template']).optional()
|
|
14
|
+
.describe("Type of content to search"),
|
|
15
|
+
customer: z.string().optional()
|
|
16
|
+
.describe("Filter by customer name (e.g., 'customer-xyz')"),
|
|
17
|
+
category: z.enum(['active', 'maintenance', 'legacy', 'archived']).optional()
|
|
18
|
+
.describe("Project/content lifecycle category"),
|
|
19
|
+
space_key: z.string().optional()
|
|
20
|
+
.describe("Confluence space key to search within"),
|
|
21
|
+
chunk_type: z.enum(['section', 'full_page']).optional()
|
|
22
|
+
.describe("Type of content chunk (section or full_page)"),
|
|
23
|
+
has_code_blocks: z.boolean().optional()
|
|
24
|
+
.describe("Filter for content with code examples"),
|
|
25
|
+
has_tables: z.boolean().optional()
|
|
26
|
+
.describe("Filter for content with tables"),
|
|
27
|
+
has_links: z.boolean().optional()
|
|
28
|
+
.describe("Filter for content with links"),
|
|
29
|
+
min_word_count: z.number().optional()
|
|
30
|
+
.describe("Minimum word count for content"),
|
|
31
|
+
last_updated_by: z.string().optional()
|
|
32
|
+
.describe("Filter by author user ID"),
|
|
33
|
+
page_creator: z.string().optional()
|
|
34
|
+
.describe("Filter by page creator user ID"),
|
|
35
|
+
parent_page_id: z.string().optional()
|
|
36
|
+
.describe("Filter by parent page ID (child pages only)")
|
|
37
|
+
}).optional().describe("Optional metadata filters to narrow search results")
|
|
38
|
+
});
|
|
39
|
+
return tool(async ({ query, filters }) => {
|
|
40
|
+
try {
|
|
41
|
+
console.log(`\nš Confluence Agent searching: "${query}"`);
|
|
42
|
+
if (filters) {
|
|
43
|
+
console.log(` Filters:`, filters);
|
|
44
|
+
}
|
|
45
|
+
// Build metadata filter for vector store
|
|
46
|
+
const metadataFilter = {
|
|
47
|
+
source: 'confluence'
|
|
48
|
+
};
|
|
49
|
+
if (filters?.keywords && filters.keywords.length > 0) {
|
|
50
|
+
metadataFilter.labels = { arrayContains: filters.keywords };
|
|
51
|
+
}
|
|
52
|
+
// Handle content_type filter - exclude templates and guides by default unless explicitly requested
|
|
53
|
+
if (filters?.content_type) {
|
|
54
|
+
metadataFilter.content_type = filters.content_type;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
// Exclude both 'template' and 'guide' by default
|
|
58
|
+
metadataFilter.content_type = { notIn: ['template', 'guide'] };
|
|
59
|
+
}
|
|
60
|
+
if (filters?.customer) {
|
|
61
|
+
metadataFilter.customer = filters.customer;
|
|
62
|
+
}
|
|
63
|
+
if (filters?.category) {
|
|
64
|
+
metadataFilter.category = filters.category;
|
|
65
|
+
}
|
|
66
|
+
if (filters?.space_key) {
|
|
67
|
+
metadataFilter.space_key = filters.space_key;
|
|
68
|
+
}
|
|
69
|
+
if (filters?.chunk_type) {
|
|
70
|
+
metadataFilter.chunk_type = filters.chunk_type;
|
|
71
|
+
}
|
|
72
|
+
if (filters?.has_code_blocks !== undefined) {
|
|
73
|
+
metadataFilter.has_code_blocks = filters.has_code_blocks;
|
|
74
|
+
}
|
|
75
|
+
if (filters?.has_tables !== undefined) {
|
|
76
|
+
metadataFilter.has_tables = filters.has_tables;
|
|
77
|
+
}
|
|
78
|
+
if (filters?.has_links !== undefined) {
|
|
79
|
+
metadataFilter.has_links = filters.has_links;
|
|
80
|
+
}
|
|
81
|
+
if (filters?.min_word_count !== undefined) {
|
|
82
|
+
metadataFilter.word_count = { gte: filters.min_word_count };
|
|
83
|
+
}
|
|
84
|
+
if (filters?.last_updated_by) {
|
|
85
|
+
metadataFilter.last_updated_by = filters.last_updated_by;
|
|
86
|
+
}
|
|
87
|
+
if (filters?.page_creator) {
|
|
88
|
+
metadataFilter.page_creator = filters.page_creator;
|
|
89
|
+
}
|
|
90
|
+
if (filters?.parent_page_id) {
|
|
91
|
+
metadataFilter.parent_page_id = filters.parent_page_id;
|
|
92
|
+
}
|
|
93
|
+
// Perform vector search with scores
|
|
94
|
+
const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 5, Object.keys(metadataFilter).length > 0 ? metadataFilter : undefined);
|
|
95
|
+
console.log(` Found ${resultsWithScores.length} results`);
|
|
96
|
+
// Calculate similarity metrics
|
|
97
|
+
const scores = resultsWithScores.map(r => r.score);
|
|
98
|
+
const avgSimilarity = scores.length > 0
|
|
99
|
+
? scores.reduce((sum, score) => sum + score, 0) / scores.length
|
|
100
|
+
: 0;
|
|
101
|
+
// Calculate variance (measure of score consistency)
|
|
102
|
+
// Example: If scores are [0.8, 0.6, 0.9] and avgSimilarity is 0.767:
|
|
103
|
+
// - (0.8 - 0.767)² + (0.6 - 0.767)² + (0.9 - 0.767)² = 0.033
|
|
104
|
+
// - 0.033 / 3 = 0.011 (variance)
|
|
105
|
+
const scoreVariance = scores.length > 1
|
|
106
|
+
? scores.reduce((sum, score) => sum + Math.pow(score - avgSimilarity, 2), 0) / scores.length
|
|
107
|
+
: 0;
|
|
108
|
+
console.log(` Avg similarity: ${avgSimilarity.toFixed(3)}, Variance: ${scoreVariance.toFixed(3)}`);
|
|
109
|
+
// Format results for LLM
|
|
110
|
+
const formattedResults = resultsWithScores.map(({ document }) => {
|
|
111
|
+
return {
|
|
112
|
+
content: document.pageContent,
|
|
113
|
+
metadata: document.metadata
|
|
114
|
+
};
|
|
115
|
+
});
|
|
116
|
+
const result = {
|
|
117
|
+
documents: formattedResults,
|
|
118
|
+
searchQuery: query,
|
|
119
|
+
filters: filters || {},
|
|
120
|
+
resultCount: resultsWithScores.length,
|
|
121
|
+
avgSimilarity,
|
|
122
|
+
scoreVariance
|
|
123
|
+
};
|
|
124
|
+
// Return as string for createReactAgent compatibility
|
|
125
|
+
return JSON.stringify(result, null, 2);
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
129
|
+
console.error(`ā Confluence Agent error:`, errorMessage);
|
|
130
|
+
const errorResult = {
|
|
131
|
+
error: true,
|
|
132
|
+
message: `Tool execution failed. ${errorMessage}`,
|
|
133
|
+
query,
|
|
134
|
+
filters: filters || {},
|
|
135
|
+
suggestion: 'Please check your input parameters and try again with a simpler query or different filters.'
|
|
136
|
+
};
|
|
137
|
+
return JSON.stringify(errorResult, null, 2);
|
|
138
|
+
}
|
|
139
|
+
}, {
|
|
140
|
+
name: "confluence_agent",
|
|
141
|
+
description: `Search Confluence knowledge base for implicit organizational knowledge including:
|
|
142
|
+
- Deployment rules and procedures
|
|
143
|
+
- Team contacts and expertise areas
|
|
144
|
+
- Project mappings (GitLab ā Jira ā Customer)
|
|
145
|
+
- Historical technical decisions
|
|
146
|
+
- Approved FAQs and documentation
|
|
147
|
+
- Business rules and constraints
|
|
148
|
+
|
|
149
|
+
Supports semantic search with lots of metadata filters which need to be used for precise targeting.`,
|
|
150
|
+
schema
|
|
151
|
+
});
|
|
152
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { tool } from '@langchain/core/tools';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
export function createGetCurrentDateTool() {
|
|
4
|
+
const getCurrentDateSchema = z.object({});
|
|
5
|
+
return tool(() => {
|
|
6
|
+
const currentDate = new Date();
|
|
7
|
+
const year = currentDate.getFullYear();
|
|
8
|
+
const month = (currentDate.getMonth() + 1).toString().padStart(2, '0');
|
|
9
|
+
const day = currentDate.getDate().toString().padStart(2, '0');
|
|
10
|
+
const formattedDate = `${year}-${month}-${day}`;
|
|
11
|
+
console.log(`\nš
Returning current date: ${formattedDate}`);
|
|
12
|
+
return formattedDate;
|
|
13
|
+
}, {
|
|
14
|
+
name: 'get_current_date',
|
|
15
|
+
description: 'Returns the current date in YYYY-MM-DD format. Use this tool when the user asks a question that requires knowledge of the current date, such as "in the last week" or "since yesterday".',
|
|
16
|
+
schema: getCurrentDateSchema,
|
|
17
|
+
});
|
|
18
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
|
|
2
|
+
import { Pool } from 'pg';
|
|
3
|
+
/**
|
|
4
|
+
* Test PostgreSQL database connection
|
|
5
|
+
* @throws Error if connection fails
|
|
6
|
+
*/
|
|
7
|
+
export async function testPostgresConnection(config) {
|
|
8
|
+
const pool = new Pool({
|
|
9
|
+
host: config.host,
|
|
10
|
+
port: config.port,
|
|
11
|
+
user: config.user,
|
|
12
|
+
password: config.password,
|
|
13
|
+
database: config.database,
|
|
14
|
+
});
|
|
15
|
+
try {
|
|
16
|
+
await pool.query('SELECT 1');
|
|
17
|
+
}
|
|
18
|
+
finally {
|
|
19
|
+
await pool.end();
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export async function createPostgresVectorStore(embeddings, documents, config) {
|
|
23
|
+
console.log('Creating PostgreSQL vector store...');
|
|
24
|
+
const poolConfig = {
|
|
25
|
+
host: config.host,
|
|
26
|
+
port: config.port,
|
|
27
|
+
user: config.user,
|
|
28
|
+
password: config.password,
|
|
29
|
+
database: config.database,
|
|
30
|
+
};
|
|
31
|
+
const pgVectorStore = await PGVectorStore.initialize(embeddings, {
|
|
32
|
+
postgresConnectionOptions: poolConfig,
|
|
33
|
+
tableName: config.tableName,
|
|
34
|
+
columns: {
|
|
35
|
+
idColumnName: 'id',
|
|
36
|
+
vectorColumnName: 'embedding',
|
|
37
|
+
contentColumnName: 'content',
|
|
38
|
+
metadataColumnName: 'metadata',
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
// Delete existing documents with the same document_id before inserting
|
|
42
|
+
if (documents.length > 0) {
|
|
43
|
+
// Collect all unique document_ids from the documents array
|
|
44
|
+
const documentIds = new Set();
|
|
45
|
+
for (const doc of documents) {
|
|
46
|
+
if (doc.metadata?.document_id) {
|
|
47
|
+
documentIds.add(String(doc.metadata.document_id));
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (documentIds.size > 0) {
|
|
51
|
+
const pool = new Pool(poolConfig);
|
|
52
|
+
try {
|
|
53
|
+
const documentIdsArray = Array.from(documentIds);
|
|
54
|
+
const result = await pool.query(`DELETE FROM ${config.tableName} WHERE metadata->>'document_id' = ANY($1)`, [documentIdsArray]);
|
|
55
|
+
console.log(`Deleted ${result.rowCount} existing chunks for ${documentIds.size} document(s): ${documentIdsArray.join(', ')}`);
|
|
56
|
+
}
|
|
57
|
+
finally {
|
|
58
|
+
await pool.end();
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
await pgVectorStore.addDocuments(documents);
|
|
62
|
+
}
|
|
63
|
+
console.log('PostgreSQL vector store created');
|
|
64
|
+
// Wrap PGVectorStore to match VectorStoreProvider interface
|
|
65
|
+
return {
|
|
66
|
+
async addDocuments(docs) {
|
|
67
|
+
await pgVectorStore.addDocuments(docs);
|
|
68
|
+
},
|
|
69
|
+
async similaritySearch(query, k, filter) {
|
|
70
|
+
// If filter is a metadata object, pass it to pgvector's native filter
|
|
71
|
+
if (filter && typeof filter === 'object' && !('call' in filter)) {
|
|
72
|
+
return await pgVectorStore.similaritySearch(query, k, filter);
|
|
73
|
+
}
|
|
74
|
+
// If filter is a function, apply it to results
|
|
75
|
+
const results = await pgVectorStore.similaritySearch(query, k);
|
|
76
|
+
return (filter && typeof filter === 'function') ? results.filter(filter) : results;
|
|
77
|
+
},
|
|
78
|
+
async similaritySearchWithScore(query, k, filter) {
|
|
79
|
+
// If filter is a metadata object, pass it to pgvector's native filter
|
|
80
|
+
if (filter && typeof filter === 'object' && !('call' in filter)) {
|
|
81
|
+
const results = await pgVectorStore.similaritySearchWithScore(query, k, filter);
|
|
82
|
+
return results.map(([document, score]) => ({ document, score }));
|
|
83
|
+
}
|
|
84
|
+
// If filter is a function, apply it to results
|
|
85
|
+
const results = await pgVectorStore.similaritySearchWithScore(query, k);
|
|
86
|
+
const mapped = results.map(([document, score]) => ({ document, score }));
|
|
87
|
+
return (filter && typeof filter === 'function')
|
|
88
|
+
? mapped.filter(item => filter(item.document))
|
|
89
|
+
: mapped;
|
|
90
|
+
},
|
|
91
|
+
async cleanup() {
|
|
92
|
+
await pgVectorStore.end();
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|