@agenticmail/enterprise 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +183 -0
- package/agenticmail-enterprise.db +0 -0
- package/dashboards/README.md +120 -0
- package/dashboards/dotnet/Program.cs +261 -0
- package/dashboards/express/app.js +146 -0
- package/dashboards/go/main.go +513 -0
- package/dashboards/html/index.html +535 -0
- package/dashboards/java/AgenticMailDashboard.java +376 -0
- package/dashboards/php/index.php +414 -0
- package/dashboards/python/app.py +273 -0
- package/dashboards/ruby/app.rb +195 -0
- package/dist/chunk-77IDQJL3.js +7 -0
- package/dist/chunk-7RGCCHIT.js +115 -0
- package/dist/chunk-DXNKR3TG.js +1355 -0
- package/dist/chunk-IQWA44WT.js +970 -0
- package/dist/chunk-LCUZGIDH.js +965 -0
- package/dist/chunk-N2JVTNNJ.js +2553 -0
- package/dist/chunk-O462UJBH.js +363 -0
- package/dist/chunk-PNKVD2UK.js +26 -0
- package/dist/cli.js +218 -0
- package/dist/dashboard/index.html +558 -0
- package/dist/db-adapter-DEWEFNIV.js +7 -0
- package/dist/dynamodb-CCGL2E77.js +426 -0
- package/dist/engine/index.js +1261 -0
- package/dist/index.js +522 -0
- package/dist/mongodb-ODTXIVPV.js +319 -0
- package/dist/mysql-RM3S2FV5.js +521 -0
- package/dist/postgres-LN7A6MGQ.js +518 -0
- package/dist/routes-2JEPIIKC.js +441 -0
- package/dist/routes-74ZLKJKP.js +399 -0
- package/dist/server.js +7 -0
- package/dist/sqlite-3K5YOZ4K.js +439 -0
- package/dist/turso-LDWODSDI.js +442 -0
- package/package.json +49 -0
- package/src/admin/routes.ts +331 -0
- package/src/auth/routes.ts +130 -0
- package/src/cli.ts +260 -0
- package/src/dashboard/index.html +558 -0
- package/src/db/adapter.ts +230 -0
- package/src/db/dynamodb.ts +456 -0
- package/src/db/factory.ts +51 -0
- package/src/db/mongodb.ts +360 -0
- package/src/db/mysql.ts +472 -0
- package/src/db/postgres.ts +479 -0
- package/src/db/sql-schema.ts +123 -0
- package/src/db/sqlite.ts +391 -0
- package/src/db/turso.ts +411 -0
- package/src/deploy/fly.ts +368 -0
- package/src/deploy/managed.ts +213 -0
- package/src/engine/activity.ts +474 -0
- package/src/engine/agent-config.ts +429 -0
- package/src/engine/agenticmail-bridge.ts +296 -0
- package/src/engine/approvals.ts +278 -0
- package/src/engine/db-adapter.ts +682 -0
- package/src/engine/db-schema.ts +335 -0
- package/src/engine/deployer.ts +595 -0
- package/src/engine/index.ts +134 -0
- package/src/engine/knowledge.ts +486 -0
- package/src/engine/lifecycle.ts +635 -0
- package/src/engine/openclaw-hook.ts +371 -0
- package/src/engine/routes.ts +528 -0
- package/src/engine/skills.ts +473 -0
- package/src/engine/tenant.ts +345 -0
- package/src/engine/tool-catalog.ts +189 -0
- package/src/index.ts +64 -0
- package/src/lib/resilience.ts +326 -0
- package/src/middleware/index.ts +286 -0
- package/src/server.ts +310 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enterprise Engine — Public API
|
|
3
|
+
*
|
|
4
|
+
* The complete engine powering managed OpenClaw+AgenticMail deployment:
|
|
5
|
+
*
|
|
6
|
+
* 1. Skill Registry + Permission Engine — what tools each agent can use
|
|
7
|
+
* 2. Agent Config Generator — workspace files, gateway config, deploy scripts
|
|
8
|
+
* 3. Deployment Engine — Docker, VPS, Fly.io, Railway provisioning
|
|
9
|
+
* 4. Approval Workflows — human-in-the-loop for sensitive operations
|
|
10
|
+
* 5. Agent Lifecycle Manager — state machine, health checks, auto-recovery
|
|
11
|
+
* 6. Knowledge Base — document ingestion, chunking, RAG retrieval
|
|
12
|
+
* 7. Multi-Tenant Isolation — org limits, quotas, billing, plan enforcement
|
|
13
|
+
* 8. Real-Time Activity Tracking — live tool calls, conversations, cost tracking
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// 1. Skills & Permissions
|
|
17
|
+
export {
|
|
18
|
+
PermissionEngine,
|
|
19
|
+
BUILTIN_SKILLS,
|
|
20
|
+
PRESET_PROFILES,
|
|
21
|
+
type SkillDefinition,
|
|
22
|
+
type ToolDefinition,
|
|
23
|
+
type ConfigField,
|
|
24
|
+
type SkillCategory,
|
|
25
|
+
type ToolCategory,
|
|
26
|
+
type RiskLevel,
|
|
27
|
+
type SideEffect,
|
|
28
|
+
type AgentPermissionProfile,
|
|
29
|
+
type PermissionResult,
|
|
30
|
+
} from './skills.js';
|
|
31
|
+
|
|
32
|
+
// 2. Agent Configuration
|
|
33
|
+
export {
|
|
34
|
+
AgentConfigGenerator,
|
|
35
|
+
type AgentConfig,
|
|
36
|
+
type ChannelConfig,
|
|
37
|
+
type DeploymentTarget,
|
|
38
|
+
type DeploymentConfig,
|
|
39
|
+
type DeploymentStatus,
|
|
40
|
+
type WorkspaceFiles,
|
|
41
|
+
type GatewayConfig,
|
|
42
|
+
} from './agent-config.js';
|
|
43
|
+
|
|
44
|
+
// 3. Deployment Engine
|
|
45
|
+
export {
|
|
46
|
+
DeploymentEngine,
|
|
47
|
+
type DeploymentEvent,
|
|
48
|
+
type DeploymentPhase,
|
|
49
|
+
type DeploymentResult,
|
|
50
|
+
type LiveAgentStatus,
|
|
51
|
+
} from './deployer.js';
|
|
52
|
+
|
|
53
|
+
// 4. Approval Workflows
|
|
54
|
+
export {
|
|
55
|
+
ApprovalEngine,
|
|
56
|
+
type ApprovalRequest,
|
|
57
|
+
type ApprovalDecision,
|
|
58
|
+
type ApprovalPolicy,
|
|
59
|
+
} from './approvals.js';
|
|
60
|
+
|
|
61
|
+
// 5. Agent Lifecycle Manager
|
|
62
|
+
export {
|
|
63
|
+
AgentLifecycleManager,
|
|
64
|
+
type ManagedAgent,
|
|
65
|
+
type AgentState,
|
|
66
|
+
type StateTransition,
|
|
67
|
+
type AgentHealth,
|
|
68
|
+
type AgentUsage,
|
|
69
|
+
type LifecycleEvent,
|
|
70
|
+
type LifecycleEventType,
|
|
71
|
+
} from './lifecycle.js';
|
|
72
|
+
|
|
73
|
+
// 6. Knowledge Base
|
|
74
|
+
export {
|
|
75
|
+
KnowledgeBaseEngine,
|
|
76
|
+
type KnowledgeBase,
|
|
77
|
+
type KBDocument,
|
|
78
|
+
type KBChunk,
|
|
79
|
+
type KBConfig,
|
|
80
|
+
type SearchResult,
|
|
81
|
+
} from './knowledge.js';
|
|
82
|
+
|
|
83
|
+
// 7. Multi-Tenant Isolation
|
|
84
|
+
export {
|
|
85
|
+
TenantManager,
|
|
86
|
+
PLAN_LIMITS,
|
|
87
|
+
type Organization,
|
|
88
|
+
type OrgPlan,
|
|
89
|
+
type OrgLimits,
|
|
90
|
+
type OrgUsage,
|
|
91
|
+
type OrgFeature,
|
|
92
|
+
type SSOConfig,
|
|
93
|
+
} from './tenant.js';
|
|
94
|
+
|
|
95
|
+
// 8. Real-Time Activity Tracking
|
|
96
|
+
export {
|
|
97
|
+
ActivityTracker,
|
|
98
|
+
type ActivityEvent,
|
|
99
|
+
type ActivityType,
|
|
100
|
+
type ToolCallRecord,
|
|
101
|
+
type ConversationEntry,
|
|
102
|
+
type AgentTimeline,
|
|
103
|
+
type TimelineEntry,
|
|
104
|
+
} from './activity.js';
|
|
105
|
+
|
|
106
|
+
// 9. Tool Catalog (real OpenClaw + AgenticMail tool IDs)
|
|
107
|
+
export {
|
|
108
|
+
OPENCLAW_CORE_TOOLS,
|
|
109
|
+
AGENTICMAIL_TOOLS,
|
|
110
|
+
ALL_TOOLS,
|
|
111
|
+
TOOL_INDEX,
|
|
112
|
+
getToolsBySkill,
|
|
113
|
+
generateOpenClawToolPolicy,
|
|
114
|
+
} from './tool-catalog.js';
|
|
115
|
+
|
|
116
|
+
// 10. Database Persistence + Migration System
|
|
117
|
+
export { EngineDatabase, type EngineDB } from './db-adapter.js';
|
|
118
|
+
export {
|
|
119
|
+
ENGINE_TABLES,
|
|
120
|
+
ENGINE_TABLES_POSTGRES,
|
|
121
|
+
MIGRATIONS,
|
|
122
|
+
MIGRATIONS_TABLE,
|
|
123
|
+
MIGRATIONS_TABLE_POSTGRES,
|
|
124
|
+
sqliteToPostgres,
|
|
125
|
+
sqliteToMySQL,
|
|
126
|
+
type Migration,
|
|
127
|
+
type DynamicTableDef,
|
|
128
|
+
} from './db-schema.js';
|
|
129
|
+
|
|
130
|
+
// 10. OpenClaw Integration Hook
|
|
131
|
+
export { EnterpriseHook, createEnterpriseHook, type EnterpriseHookConfig, type HookResult } from './openclaw-hook.js';
|
|
132
|
+
|
|
133
|
+
// 11. AgenticMail Bridge
|
|
134
|
+
export { AgenticMailBridge, createAgenticMailBridge, type BridgeConfig, type ToolInterceptor } from './agenticmail-bridge.js';
|
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Base Engine
|
|
3
|
+
*
|
|
4
|
+
* Companies need their agent to know their products, docs, FAQs,
|
|
5
|
+
* internal processes. This engine handles:
|
|
6
|
+
* - Document ingestion (PDF, markdown, HTML, text, CSV)
|
|
7
|
+
* - Chunking and embedding
|
|
8
|
+
* - Semantic search / retrieval (RAG)
|
|
9
|
+
* - Knowledge base CRUD per agent
|
|
10
|
+
*
|
|
11
|
+
* The agent queries this before answering customer questions.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ─── Types ──────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
export interface KnowledgeBase {
|
|
17
|
+
id: string;
|
|
18
|
+
orgId: string;
|
|
19
|
+
name: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
agentIds: string[]; // Which agents can access this KB
|
|
22
|
+
documents: KBDocument[];
|
|
23
|
+
stats: {
|
|
24
|
+
totalDocuments: number;
|
|
25
|
+
totalChunks: number;
|
|
26
|
+
totalTokens: number;
|
|
27
|
+
lastUpdated: string;
|
|
28
|
+
};
|
|
29
|
+
config: KBConfig;
|
|
30
|
+
createdAt: string;
|
|
31
|
+
updatedAt: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface KBDocument {
|
|
35
|
+
id: string;
|
|
36
|
+
knowledgeBaseId: string;
|
|
37
|
+
name: string;
|
|
38
|
+
sourceType: 'file' | 'url' | 'text' | 'api';
|
|
39
|
+
sourceUrl?: string;
|
|
40
|
+
mimeType: string;
|
|
41
|
+
size: number; // Bytes
|
|
42
|
+
chunks: KBChunk[];
|
|
43
|
+
metadata: Record<string, any>;
|
|
44
|
+
status: 'processing' | 'ready' | 'error';
|
|
45
|
+
error?: string;
|
|
46
|
+
createdAt: string;
|
|
47
|
+
updatedAt: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface KBChunk {
|
|
51
|
+
id: string;
|
|
52
|
+
documentId: string;
|
|
53
|
+
content: string;
|
|
54
|
+
tokenCount: number;
|
|
55
|
+
position: number; // Order within document
|
|
56
|
+
embedding?: number[]; // Vector embedding
|
|
57
|
+
metadata: {
|
|
58
|
+
section?: string; // Document section/heading
|
|
59
|
+
page?: number;
|
|
60
|
+
lineStart?: number;
|
|
61
|
+
lineEnd?: number;
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface KBConfig {
|
|
66
|
+
chunkSize: number; // Target tokens per chunk (default: 512)
|
|
67
|
+
chunkOverlap: number; // Overlap tokens between chunks (default: 50)
|
|
68
|
+
embeddingModel: string; // e.g. "text-embedding-3-small"
|
|
69
|
+
embeddingProvider: 'openai' | 'local' | 'none';
|
|
70
|
+
maxResultsPerQuery: number; // Default: 5
|
|
71
|
+
minSimilarityScore: number; // Default: 0.7
|
|
72
|
+
autoRefreshUrls: boolean; // Re-fetch URL sources periodically
|
|
73
|
+
refreshIntervalHours: number; // Default: 24
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export interface SearchResult {
|
|
77
|
+
chunk: KBChunk;
|
|
78
|
+
document: KBDocument;
|
|
79
|
+
score: number; // Similarity score 0-1
|
|
80
|
+
highlight?: string; // Relevant excerpt with match highlighted
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ─── Knowledge Base Engine ──────────────────────────────
|
|
84
|
+
|
|
85
|
+
export class KnowledgeBaseEngine {
|
|
86
|
+
private knowledgeBases = new Map<string, KnowledgeBase>();
|
|
87
|
+
private embeddings = new Map<string, number[]>(); // chunkId → embedding
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Create a new knowledge base
|
|
91
|
+
*/
|
|
92
|
+
createKnowledgeBase(orgId: string, opts: {
|
|
93
|
+
name: string;
|
|
94
|
+
description?: string;
|
|
95
|
+
agentIds?: string[];
|
|
96
|
+
config?: Partial<KBConfig>;
|
|
97
|
+
}): KnowledgeBase {
|
|
98
|
+
const kb: KnowledgeBase = {
|
|
99
|
+
id: crypto.randomUUID(),
|
|
100
|
+
orgId,
|
|
101
|
+
name: opts.name,
|
|
102
|
+
description: opts.description,
|
|
103
|
+
agentIds: opts.agentIds || [],
|
|
104
|
+
documents: [],
|
|
105
|
+
stats: { totalDocuments: 0, totalChunks: 0, totalTokens: 0, lastUpdated: new Date().toISOString() },
|
|
106
|
+
config: {
|
|
107
|
+
chunkSize: 512,
|
|
108
|
+
chunkOverlap: 50,
|
|
109
|
+
embeddingModel: 'text-embedding-3-small',
|
|
110
|
+
embeddingProvider: 'openai',
|
|
111
|
+
maxResultsPerQuery: 5,
|
|
112
|
+
minSimilarityScore: 0.7,
|
|
113
|
+
autoRefreshUrls: false,
|
|
114
|
+
refreshIntervalHours: 24,
|
|
115
|
+
...opts.config,
|
|
116
|
+
},
|
|
117
|
+
createdAt: new Date().toISOString(),
|
|
118
|
+
updatedAt: new Date().toISOString(),
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
this.knowledgeBases.set(kb.id, kb);
|
|
122
|
+
return kb;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Ingest a document into a knowledge base
|
|
127
|
+
*/
|
|
128
|
+
async ingestDocument(kbId: string, opts: {
|
|
129
|
+
name: string;
|
|
130
|
+
content: string;
|
|
131
|
+
sourceType: KBDocument['sourceType'];
|
|
132
|
+
sourceUrl?: string;
|
|
133
|
+
mimeType?: string;
|
|
134
|
+
metadata?: Record<string, any>;
|
|
135
|
+
}): Promise<KBDocument> {
|
|
136
|
+
const kb = this.knowledgeBases.get(kbId);
|
|
137
|
+
if (!kb) throw new Error(`Knowledge base ${kbId} not found`);
|
|
138
|
+
|
|
139
|
+
const doc: KBDocument = {
|
|
140
|
+
id: crypto.randomUUID(),
|
|
141
|
+
knowledgeBaseId: kbId,
|
|
142
|
+
name: opts.name,
|
|
143
|
+
sourceType: opts.sourceType,
|
|
144
|
+
sourceUrl: opts.sourceUrl,
|
|
145
|
+
mimeType: opts.mimeType || 'text/plain',
|
|
146
|
+
size: Buffer.byteLength(opts.content, 'utf-8'),
|
|
147
|
+
chunks: [],
|
|
148
|
+
metadata: opts.metadata || {},
|
|
149
|
+
status: 'processing',
|
|
150
|
+
createdAt: new Date().toISOString(),
|
|
151
|
+
updatedAt: new Date().toISOString(),
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
// Extract text based on format
|
|
156
|
+
const text = this.extractText(opts.content, doc.mimeType);
|
|
157
|
+
|
|
158
|
+
// Chunk the text
|
|
159
|
+
const chunks = this.chunkText(text, doc.id, kb.config);
|
|
160
|
+
doc.chunks = chunks;
|
|
161
|
+
|
|
162
|
+
// Generate embeddings
|
|
163
|
+
if (kb.config.embeddingProvider !== 'none') {
|
|
164
|
+
await this.generateEmbeddings(chunks, kb.config);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
doc.status = 'ready';
|
|
168
|
+
|
|
169
|
+
// Update KB stats
|
|
170
|
+
kb.documents.push(doc);
|
|
171
|
+
kb.stats.totalDocuments = kb.documents.length;
|
|
172
|
+
kb.stats.totalChunks = kb.documents.reduce((sum, d) => sum + d.chunks.length, 0);
|
|
173
|
+
kb.stats.totalTokens = kb.documents.reduce((sum, d) =>
|
|
174
|
+
sum + d.chunks.reduce((cs, c) => cs + c.tokenCount, 0), 0);
|
|
175
|
+
kb.stats.lastUpdated = new Date().toISOString();
|
|
176
|
+
kb.updatedAt = new Date().toISOString();
|
|
177
|
+
|
|
178
|
+
} catch (error: any) {
|
|
179
|
+
doc.status = 'error';
|
|
180
|
+
doc.error = error.message;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return doc;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Search across knowledge bases for an agent
|
|
188
|
+
*/
|
|
189
|
+
async search(agentId: string, query: string, opts?: {
|
|
190
|
+
kbIds?: string[];
|
|
191
|
+
maxResults?: number;
|
|
192
|
+
minScore?: number;
|
|
193
|
+
}): Promise<SearchResult[]> {
|
|
194
|
+
// Find all KBs this agent has access to
|
|
195
|
+
const kbs = Array.from(this.knowledgeBases.values()).filter(kb => {
|
|
196
|
+
if (opts?.kbIds?.length) return opts.kbIds.includes(kb.id);
|
|
197
|
+
return kb.agentIds.includes(agentId) || kb.agentIds.length === 0; // Empty = all agents
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
if (kbs.length === 0) return [];
|
|
201
|
+
|
|
202
|
+
const maxResults = opts?.maxResults || 5;
|
|
203
|
+
const minScore = opts?.minScore || 0.7;
|
|
204
|
+
|
|
205
|
+
// Get query embedding
|
|
206
|
+
const queryEmbedding = await this.getEmbedding(query, kbs[0].config);
|
|
207
|
+
|
|
208
|
+
// Search all chunks across all accessible KBs
|
|
209
|
+
const results: SearchResult[] = [];
|
|
210
|
+
|
|
211
|
+
for (const kb of kbs) {
|
|
212
|
+
for (const doc of kb.documents) {
|
|
213
|
+
if (doc.status !== 'ready') continue;
|
|
214
|
+
|
|
215
|
+
for (const chunk of doc.chunks) {
|
|
216
|
+
let score: number;
|
|
217
|
+
|
|
218
|
+
if (queryEmbedding && chunk.embedding) {
|
|
219
|
+
// Vector similarity search
|
|
220
|
+
score = this.cosineSimilarity(queryEmbedding, chunk.embedding);
|
|
221
|
+
} else {
|
|
222
|
+
// Fallback: keyword matching
|
|
223
|
+
score = this.keywordScore(query, chunk.content);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if (score >= minScore) {
|
|
227
|
+
results.push({
|
|
228
|
+
chunk,
|
|
229
|
+
document: doc,
|
|
230
|
+
score,
|
|
231
|
+
highlight: this.extractHighlight(query, chunk.content),
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Sort by score, return top N
|
|
239
|
+
return results
|
|
240
|
+
.sort((a, b) => b.score - a.score)
|
|
241
|
+
.slice(0, maxResults);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Generate context string for an agent's prompt (RAG injection)
|
|
246
|
+
*/
|
|
247
|
+
async getContext(agentId: string, query: string, maxTokens: number = 2000): Promise<string> {
|
|
248
|
+
const results = await this.search(agentId, query);
|
|
249
|
+
if (results.length === 0) return '';
|
|
250
|
+
|
|
251
|
+
let context = '## Relevant Knowledge Base Context\n\n';
|
|
252
|
+
let tokenCount = 0;
|
|
253
|
+
|
|
254
|
+
for (const result of results) {
|
|
255
|
+
const chunkTokens = result.chunk.tokenCount;
|
|
256
|
+
if (tokenCount + chunkTokens > maxTokens) break;
|
|
257
|
+
|
|
258
|
+
context += `### From: ${result.document.name}`;
|
|
259
|
+
if (result.chunk.metadata.section) context += ` > ${result.chunk.metadata.section}`;
|
|
260
|
+
context += `\n${result.chunk.content}\n\n`;
|
|
261
|
+
tokenCount += chunkTokens;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return context;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ─── CRUD ───────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
getKnowledgeBase(id: string): KnowledgeBase | undefined {
|
|
270
|
+
return this.knowledgeBases.get(id);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
getKnowledgeBasesByOrg(orgId: string): KnowledgeBase[] {
|
|
274
|
+
return Array.from(this.knowledgeBases.values()).filter(kb => kb.orgId === orgId);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
getKnowledgeBasesForAgent(agentId: string): KnowledgeBase[] {
|
|
278
|
+
return Array.from(this.knowledgeBases.values()).filter(kb =>
|
|
279
|
+
kb.agentIds.includes(agentId) || kb.agentIds.length === 0
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
deleteDocument(kbId: string, docId: string): boolean {
|
|
284
|
+
const kb = this.knowledgeBases.get(kbId);
|
|
285
|
+
if (!kb) return false;
|
|
286
|
+
const idx = kb.documents.findIndex(d => d.id === docId);
|
|
287
|
+
if (idx < 0) return false;
|
|
288
|
+
|
|
289
|
+
// Remove embeddings for chunks
|
|
290
|
+
for (const chunk of kb.documents[idx].chunks) {
|
|
291
|
+
this.embeddings.delete(chunk.id);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
kb.documents.splice(idx, 1);
|
|
295
|
+
kb.stats.totalDocuments = kb.documents.length;
|
|
296
|
+
kb.stats.totalChunks = kb.documents.reduce((sum, d) => sum + d.chunks.length, 0);
|
|
297
|
+
kb.updatedAt = new Date().toISOString();
|
|
298
|
+
return true;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
deleteKnowledgeBase(id: string): boolean {
|
|
302
|
+
return this.knowledgeBases.delete(id);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// ─── Text Processing ─────────────────────────────────
|
|
306
|
+
|
|
307
|
+
private extractText(content: string, mimeType: string): string {
|
|
308
|
+
// For now, handle plain text and markdown directly
|
|
309
|
+
// PDF, DOCX, etc. would need additional parsers
|
|
310
|
+
switch (mimeType) {
|
|
311
|
+
case 'text/html':
|
|
312
|
+
// Strip HTML tags
|
|
313
|
+
return content.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
314
|
+
case 'text/csv':
|
|
315
|
+
// Convert CSV rows to readable text
|
|
316
|
+
return content.split('\n').map(row => row.replace(/,/g, ' | ')).join('\n');
|
|
317
|
+
default:
|
|
318
|
+
return content;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
private chunkText(text: string, documentId: string, config: KBConfig): KBChunk[] {
|
|
323
|
+
const chunks: KBChunk[] = [];
|
|
324
|
+
const sentences = this.splitIntoSentences(text);
|
|
325
|
+
let currentChunk = '';
|
|
326
|
+
let currentTokens = 0;
|
|
327
|
+
let position = 0;
|
|
328
|
+
let currentSection: string | undefined;
|
|
329
|
+
|
|
330
|
+
for (const sentence of sentences) {
|
|
331
|
+
// Detect section headings
|
|
332
|
+
const headingMatch = sentence.match(/^#+\s+(.+)$/);
|
|
333
|
+
if (headingMatch) {
|
|
334
|
+
currentSection = headingMatch[1];
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const sentenceTokens = this.estimateTokens(sentence);
|
|
338
|
+
|
|
339
|
+
if (currentTokens + sentenceTokens > config.chunkSize && currentChunk.length > 0) {
|
|
340
|
+
// Save current chunk
|
|
341
|
+
chunks.push({
|
|
342
|
+
id: crypto.randomUUID(),
|
|
343
|
+
documentId,
|
|
344
|
+
content: currentChunk.trim(),
|
|
345
|
+
tokenCount: currentTokens,
|
|
346
|
+
position: position++,
|
|
347
|
+
metadata: { section: currentSection },
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
// Start new chunk with overlap
|
|
351
|
+
const overlapText = this.getOverlapText(currentChunk, config.chunkOverlap);
|
|
352
|
+
currentChunk = overlapText + ' ' + sentence;
|
|
353
|
+
currentTokens = this.estimateTokens(currentChunk);
|
|
354
|
+
} else {
|
|
355
|
+
currentChunk += ' ' + sentence;
|
|
356
|
+
currentTokens += sentenceTokens;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Don't forget the last chunk
|
|
361
|
+
if (currentChunk.trim().length > 0) {
|
|
362
|
+
chunks.push({
|
|
363
|
+
id: crypto.randomUUID(),
|
|
364
|
+
documentId,
|
|
365
|
+
content: currentChunk.trim(),
|
|
366
|
+
tokenCount: currentTokens,
|
|
367
|
+
position: position,
|
|
368
|
+
metadata: { section: currentSection },
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
return chunks;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
private splitIntoSentences(text: string): string[] {
|
|
376
|
+
// Split on sentence boundaries, keeping headings together
|
|
377
|
+
return text.split(/(?<=[.!?])\s+|(?=^#+\s)/m).filter(s => s.trim().length > 0);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
private estimateTokens(text: string): number {
|
|
381
|
+
// Rough estimate: ~4 chars per token for English
|
|
382
|
+
return Math.ceil(text.length / 4);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
private getOverlapText(text: string, overlapTokens: number): string {
|
|
386
|
+
const words = text.split(/\s+/);
|
|
387
|
+
const overlapWords = Math.ceil(overlapTokens * 0.75); // ~0.75 words per token
|
|
388
|
+
return words.slice(-overlapWords).join(' ');
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// ─── Embeddings ─────────────────────────────────────
|
|
392
|
+
|
|
393
|
+
private async generateEmbeddings(chunks: KBChunk[], config: KBConfig) {
|
|
394
|
+
if (config.embeddingProvider === 'openai') {
|
|
395
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
396
|
+
if (!apiKey) return; // Skip if no API key
|
|
397
|
+
|
|
398
|
+
// Batch embeddings (OpenAI supports up to 2048 inputs)
|
|
399
|
+
const batchSize = 100;
|
|
400
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
401
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
402
|
+
try {
|
|
403
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
404
|
+
method: 'POST',
|
|
405
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
|
|
406
|
+
body: JSON.stringify({
|
|
407
|
+
model: config.embeddingModel,
|
|
408
|
+
input: batch.map(c => c.content),
|
|
409
|
+
}),
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
if (response.ok) {
|
|
413
|
+
const data = await response.json() as any;
|
|
414
|
+
for (let j = 0; j < data.data.length; j++) {
|
|
415
|
+
batch[j].embedding = data.data[j].embedding;
|
|
416
|
+
this.embeddings.set(batch[j].id, data.data[j].embedding);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
} catch { /* skip embedding on error */ }
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
private async getEmbedding(text: string, config: KBConfig): Promise<number[] | null> {
|
|
425
|
+
if (config.embeddingProvider !== 'openai') return null;
|
|
426
|
+
|
|
427
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
428
|
+
if (!apiKey) return null;
|
|
429
|
+
|
|
430
|
+
try {
|
|
431
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
432
|
+
method: 'POST',
|
|
433
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
|
|
434
|
+
body: JSON.stringify({ model: config.embeddingModel, input: text }),
|
|
435
|
+
});
|
|
436
|
+
if (response.ok) {
|
|
437
|
+
const data = await response.json() as any;
|
|
438
|
+
return data.data[0].embedding;
|
|
439
|
+
}
|
|
440
|
+
} catch { /* fall through */ }
|
|
441
|
+
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
private cosineSimilarity(a: number[], b: number[]): number {
|
|
446
|
+
if (a.length !== b.length) return 0;
|
|
447
|
+
let dotProduct = 0, normA = 0, normB = 0;
|
|
448
|
+
for (let i = 0; i < a.length; i++) {
|
|
449
|
+
dotProduct += a[i] * b[i];
|
|
450
|
+
normA += a[i] * a[i];
|
|
451
|
+
normB += b[i] * b[i];
|
|
452
|
+
}
|
|
453
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
454
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
private keywordScore(query: string, content: string): number {
|
|
458
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
459
|
+
const contentLower = content.toLowerCase();
|
|
460
|
+
let matches = 0;
|
|
461
|
+
for (const word of queryWords) {
|
|
462
|
+
if (contentLower.includes(word)) matches++;
|
|
463
|
+
}
|
|
464
|
+
return queryWords.length > 0 ? matches / queryWords.length : 0;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
private extractHighlight(query: string, content: string, maxLength: number = 200): string {
|
|
468
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
469
|
+
const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
470
|
+
|
|
471
|
+
// Find sentence with most keyword matches
|
|
472
|
+
let bestSentence = sentences[0] || content.slice(0, maxLength);
|
|
473
|
+
let bestScore = 0;
|
|
474
|
+
|
|
475
|
+
for (const sentence of sentences) {
|
|
476
|
+
const lower = sentence.toLowerCase();
|
|
477
|
+
const score = queryWords.filter(w => lower.includes(w)).length;
|
|
478
|
+
if (score > bestScore) {
|
|
479
|
+
bestScore = score;
|
|
480
|
+
bestSentence = sentence;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return bestSentence.trim().slice(0, maxLength) + (bestSentence.length > maxLength ? '...' : '');
|
|
485
|
+
}
|
|
486
|
+
}
|