@usewhisper/mcp-server 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +182 -154
  2. package/dist/autosubscribe-6EDKPBE2.js +4068 -4068
  3. package/dist/autosubscribe-GHO6YR5A.js +4068 -4068
  4. package/dist/autosubscribe-ISDETQIB.js +435 -435
  5. package/dist/chunk-3WGYBAYR.js +8387 -8387
  6. package/dist/chunk-52VJYCZ7.js +455 -455
  7. package/dist/chunk-5KBZQHDL.js +189 -189
  8. package/dist/chunk-5KIJNY6Z.js +370 -370
  9. package/dist/chunk-7SN3CKDK.js +1076 -1076
  10. package/dist/chunk-B3VWOHUA.js +271 -271
  11. package/dist/chunk-C57DHKTL.js +459 -459
  12. package/dist/chunk-EI5CE3EY.js +616 -616
  13. package/dist/chunk-FTWUJBAH.js +386 -386
  14. package/dist/chunk-H3HSKH2P.js +4841 -4841
  15. package/dist/chunk-JO3ORBZD.js +616 -616
  16. package/dist/chunk-L6DXSM2U.js +456 -456
  17. package/dist/chunk-LMEYV4JD.js +368 -368
  18. package/dist/chunk-MEFLJ4PV.js +8385 -8385
  19. package/dist/chunk-OBLI4FE4.js +275 -275
  20. package/dist/chunk-PPGYJJED.js +271 -271
  21. package/dist/chunk-QGM4M3NI.js +37 -37
  22. package/dist/chunk-T7KMSTWP.js +399 -399
  23. package/dist/chunk-TWEIYHI6.js +399 -399
  24. package/dist/chunk-UYWE7HSU.js +368 -368
  25. package/dist/chunk-X2DL2GWT.js +32 -32
  26. package/dist/chunk-X7HNNNJJ.js +1079 -1079
  27. package/dist/consolidation-2GCKI4RE.js +220 -220
  28. package/dist/consolidation-4JOPW6BG.js +220 -220
  29. package/dist/consolidation-FOVQTWNQ.js +222 -222
  30. package/dist/consolidation-IFQ52E44.js +209 -209
  31. package/dist/context-sharing-4ITCNKG4.js +307 -307
  32. package/dist/context-sharing-6CCFIAKL.js +275 -275
  33. package/dist/context-sharing-GYKLXHZA.js +307 -307
  34. package/dist/context-sharing-PH64JTXS.js +308 -308
  35. package/dist/context-sharing-Y6LTZZOF.js +307 -307
  36. package/dist/cost-optimization-6OIKRSBV.js +195 -195
  37. package/dist/cost-optimization-7DVSTL6R.js +307 -307
  38. package/dist/cost-optimization-BH5NAX33.js +286 -286
  39. package/dist/cost-optimization-F3L5BS5F.js +303 -303
  40. package/dist/ingest-2LPTWUUM.js +16 -16
  41. package/dist/ingest-7T5FAZNC.js +15 -15
  42. package/dist/ingest-EBNIE7XB.js +15 -15
  43. package/dist/ingest-FSHT5BCS.js +15 -15
  44. package/dist/ingest-QE2BTV72.js +14 -14
  45. package/dist/oracle-3RLQF3DP.js +259 -259
  46. package/dist/oracle-FKRTQUUG.js +282 -282
  47. package/dist/oracle-J47QCSEW.js +263 -263
  48. package/dist/oracle-MDP5MZRC.js +256 -256
  49. package/dist/search-BLVHWLWC.js +14 -14
  50. package/dist/search-CZ5NYL5B.js +12 -12
  51. package/dist/search-EG6TYWWW.js +13 -13
  52. package/dist/search-I22QQA7T.js +13 -13
  53. package/dist/search-T7H5G6DW.js +13 -13
  54. package/dist/server.d.ts +2 -2
  55. package/dist/server.js +1973 -169
  56. package/dist/server.js.map +1 -1
  57. package/package.json +51 -51
@@ -1,387 +1,387 @@
1
- import {
2
- embed,
3
- embedSingle,
4
- prisma
5
- } from "./chunk-X2DL2GWT.js";
6
-
7
- // src/engine/chunker.ts
8
- var CODE_EXTENSIONS = /* @__PURE__ */ new Set([
9
- ".ts",
10
- ".tsx",
11
- ".js",
12
- ".jsx",
13
- ".py",
14
- ".java",
15
- ".go",
16
- ".rb",
17
- ".php",
18
- ".cs",
19
- ".rs",
20
- ".swift",
21
- ".kt",
22
- ".scala",
23
- ".c",
24
- ".cpp",
25
- ".h",
26
- ".hpp",
27
- ".sol",
28
- ".vy"
29
- ]);
30
- var CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([
31
- ".json",
32
- ".yaml",
33
- ".yml",
34
- ".toml",
35
- ".ini",
36
- ".env",
37
- ".xml"
38
- ]);
39
- function detectChunkType(filePath, content) {
40
- if (!filePath) return "text";
41
- const ext = "." + filePath.split(".").pop()?.toLowerCase();
42
- if (CODE_EXTENSIONS.has(ext)) return "code";
43
- if (CONFIG_EXTENSIONS.has(ext)) return "config";
44
- if (filePath.includes("schema") || filePath.includes("migration")) return "schema";
45
- if (filePath.endsWith(".md") || filePath.endsWith(".mdx") || filePath.endsWith(".rst")) return "documentation";
46
- if (filePath.includes("openapi") || filePath.includes("swagger")) return "api_spec";
47
- return "text";
48
- }
49
- function chunkText(content, opts = {}) {
50
- const { chunkSize = 1e3, chunkOverlap = 200, filePath, metadata = {} } = opts;
51
- const chunkType = detectChunkType(filePath, content);
52
- if (chunkType === "code") {
53
- return chunkCode(content, { chunkSize, filePath, metadata });
54
- }
55
- return chunkBySize(content, { chunkSize, chunkOverlap, chunkType, metadata });
56
- }
57
- function chunkCode(content, opts) {
58
- const { chunkSize, filePath, metadata = {} } = opts;
59
- const lines = content.split("\n");
60
- const chunks = [];
61
- const boundaries = [
62
- /^(export\s+)?(async\s+)?function\s+/,
63
- /^(export\s+)?(default\s+)?class\s+/,
64
- /^(export\s+)?const\s+\w+\s*=\s*(async\s+)?\(/,
65
- /^(export\s+)?const\s+\w+\s*=\s*\{/,
66
- /^(export\s+)?interface\s+/,
67
- /^(export\s+)?type\s+/,
68
- /^(export\s+)?enum\s+/,
69
- /^def\s+/,
70
- // Python
71
- /^class\s+/,
72
- // Python/Java
73
- /^func\s+/,
74
- // Go
75
- /^pub\s+(fn|struct|enum|impl)/
76
- // Rust
77
- ];
78
- let currentChunk = [];
79
- let currentStart = 0;
80
- for (let i = 0; i < lines.length; i++) {
81
- const trimmed = lines[i].trimStart();
82
- const isBoundary = boundaries.some((b) => b.test(trimmed));
83
- if (isBoundary && currentChunk.length > 0) {
84
- const chunkContent = currentChunk.join("\n").trim();
85
- if (chunkContent.length > 0) {
86
- chunks.push({
87
- content: chunkContent,
88
- chunkType: "code",
89
- chunkIndex: chunks.length,
90
- metadata: {
91
- ...metadata,
92
- filePath,
93
- startLine: currentStart + 1,
94
- endLine: i
95
- }
96
- });
97
- }
98
- currentChunk = [lines[i]];
99
- currentStart = i;
100
- } else {
101
- currentChunk.push(lines[i]);
102
- }
103
- if (currentChunk.join("\n").length > chunkSize * 1.5) {
104
- const chunkContent = currentChunk.join("\n").trim();
105
- if (chunkContent.length > 0) {
106
- chunks.push({
107
- content: chunkContent,
108
- chunkType: "code",
109
- chunkIndex: chunks.length,
110
- metadata: {
111
- ...metadata,
112
- filePath,
113
- startLine: currentStart + 1,
114
- endLine: i + 1
115
- }
116
- });
117
- }
118
- currentChunk = [];
119
- currentStart = i + 1;
120
- }
121
- }
122
- if (currentChunk.length > 0) {
123
- const chunkContent = currentChunk.join("\n").trim();
124
- if (chunkContent.length > 0) {
125
- chunks.push({
126
- content: chunkContent,
127
- chunkType: "code",
128
- chunkIndex: chunks.length,
129
- metadata: {
130
- ...metadata,
131
- filePath,
132
- startLine: currentStart + 1,
133
- endLine: lines.length
134
- }
135
- });
136
- }
137
- }
138
- return chunks;
139
- }
140
- function chunkBySize(content, opts) {
141
- const { chunkSize, chunkOverlap, chunkType, metadata = {} } = opts;
142
- const chunks = [];
143
- const paragraphs = content.split(/\n\n+/);
144
- let current = "";
145
- for (const para of paragraphs) {
146
- if ((current + "\n\n" + para).length > chunkSize && current.length > 0) {
147
- chunks.push({
148
- content: current.trim(),
149
- chunkType,
150
- chunkIndex: chunks.length,
151
- metadata
152
- });
153
- const words = current.split(/\s+/);
154
- const overlapWords = words.slice(-Math.floor(chunkOverlap / 5));
155
- current = overlapWords.join(" ") + "\n\n" + para;
156
- } else {
157
- current = current ? current + "\n\n" + para : para;
158
- }
159
- }
160
- if (current.trim().length > 0) {
161
- chunks.push({
162
- content: current.trim(),
163
- chunkType,
164
- chunkIndex: chunks.length,
165
- metadata
166
- });
167
- }
168
- return chunks;
169
- }
170
-
171
- // src/engine/extractor.ts
172
- import OpenAI from "openai";
173
- var openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
174
- async function extractEntities(projectId, content, chunkType, metadata = {}, chunkId) {
175
- if (content.length < 100) return { entities: 0, relations: 0 };
176
- const isCode = ["code", "function", "class"].includes(chunkType);
177
- const prompt = isCode ? `Analyze this code and extract entities and relationships.
178
-
179
- Entities: functions, classes, interfaces, types, modules, variables, constants, API endpoints, services.
180
- Relations: imports, exports, calls, implements, extends, depends_on, references, part_of.
181
-
182
- Code:
183
- \`\`\`
184
- ${content.slice(0, 3e3)}
185
- \`\`\`
186
-
187
- Respond with JSON only:
188
- {
189
- "entities": [{"name": "...", "type": "function|class|interface|module|constant|api_endpoint|service", "description": "one line"}],
190
- "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "imports|calls|extends|implements|depends_on|references|part_of"}]
191
- }` : `Analyze this text and extract key entities (concepts, people, tools, services, APIs, technologies) and their relationships.
192
-
193
- Text:
194
- ${content.slice(0, 3e3)}
195
-
196
- Respond with JSON only:
197
- {
198
- "entities": [{"name": "...", "type": "concept|tool|service|api|technology|person|organization", "description": "one line"}],
199
- "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "references|depends_on|related_to|part_of|supersedes"}]
200
- }`;
201
- try {
202
- const res = await openai.chat.completions.create({
203
- model: "gpt-4.1-nano",
204
- messages: [{ role: "user", content: prompt }],
205
- temperature: 0,
206
- max_tokens: 1e3,
207
- response_format: { type: "json_object" }
208
- });
209
- const text = res.choices[0]?.message?.content?.trim() || "{}";
210
- const parsed = JSON.parse(text);
211
- const extractedEntities = parsed.entities || [];
212
- const extractedRelations = parsed.relations || [];
213
- let entityCount = 0;
214
- let relationCount = 0;
215
- const entityMap = /* @__PURE__ */ new Map();
216
- for (const ent of extractedEntities.slice(0, 20)) {
217
- if (!ent.name || !ent.type) continue;
218
- const embedding = await embedSingle(`${ent.type}: ${ent.name} - ${ent.description || ""}`);
219
- const entity = await prisma.entity.upsert({
220
- where: {
221
- projectId_name_entityType: {
222
- projectId,
223
- name: ent.name,
224
- entityType: ent.type
225
- }
226
- },
227
- update: {
228
- description: ent.description,
229
- sourceChunkId: chunkId,
230
- embedding,
231
- updatedAt: /* @__PURE__ */ new Date()
232
- },
233
- create: {
234
- projectId,
235
- name: ent.name,
236
- entityType: ent.type,
237
- description: ent.description,
238
- metadata: { ...metadata, autoExtracted: true },
239
- sourceChunkId: chunkId,
240
- embedding
241
- }
242
- });
243
- entityMap.set(`${ent.name}:${ent.type}`, entity.id);
244
- entityCount++;
245
- }
246
- for (const rel of extractedRelations.slice(0, 30)) {
247
- if (!rel.from || !rel.to || !rel.relation) continue;
248
- const fromId = entityMap.get(`${rel.from}:${rel.fromType}`);
249
- const toId = entityMap.get(`${rel.to}:${rel.toType}`);
250
- if (!fromId || !toId) continue;
251
- const validRelations = [
252
- "imports",
253
- "exports",
254
- "calls",
255
- "implements",
256
- "extends",
257
- "references",
258
- "depends_on",
259
- "related_to",
260
- "part_of",
261
- "contradicts",
262
- "supersedes"
263
- ];
264
- if (!validRelations.includes(rel.relation)) continue;
265
- await prisma.entityRelation.upsert({
266
- where: {
267
- fromEntityId_toEntityId_relationType: {
268
- fromEntityId: fromId,
269
- toEntityId: toId,
270
- relationType: rel.relation
271
- }
272
- },
273
- update: {
274
- metadata: { autoExtracted: true }
275
- },
276
- create: {
277
- projectId,
278
- fromEntityId: fromId,
279
- toEntityId: toId,
280
- relationType: rel.relation,
281
- metadata: { autoExtracted: true }
282
- }
283
- });
284
- relationCount++;
285
- }
286
- return { entities: entityCount, relations: relationCount };
287
- } catch {
288
- return { entities: 0, relations: 0 };
289
- }
290
- }
291
-
292
- // src/engine/ingest.ts
293
- import { createHash } from "crypto";
294
- import PQueue from "p-queue";
295
- var queue = new PQueue({ concurrency: 3 });
296
- var ENABLE_AUTO_EXTRACTION = process.env.DISABLE_AUTO_EXTRACTION !== "true";
297
- async function ingestDocument(input) {
298
- const { sourceId, projectId, externalId, title, content, metadata = {}, filePath } = input;
299
- const contentHash = createHash("sha256").update(content).digest("hex");
300
- const doc = await prisma.document.upsert({
301
- where: {
302
- sourceId_externalId: {
303
- sourceId,
304
- externalId
305
- }
306
- },
307
- update: {
308
- title,
309
- content,
310
- metadata,
311
- contentHash,
312
- updatedAt: /* @__PURE__ */ new Date()
313
- },
314
- create: {
315
- sourceId,
316
- projectId,
317
- externalId,
318
- title,
319
- content,
320
- metadata,
321
- contentHash
322
- }
323
- });
324
- await prisma.chunk.deleteMany({
325
- where: { documentId: doc.id }
326
- });
327
- const textChunks = chunkText(content, {
328
- filePath: filePath || externalId,
329
- metadata: { ...metadata, title }
330
- });
331
- if (textChunks.length === 0) return doc;
332
- const batchSize = 50;
333
- const insertedChunkIds = [];
334
- for (let i = 0; i < textChunks.length; i += batchSize) {
335
- const batch = textChunks.slice(i, i + batchSize);
336
- const embeddings = await embed(batch.map((c) => c.content));
337
- const inserted = await prisma.$transaction(
338
- batch.map(
339
- (chunk, j) => prisma.chunk.create({
340
- data: {
341
- documentId: doc.id,
342
- projectId,
343
- content: chunk.content,
344
- chunkType: chunk.chunkType,
345
- chunkIndex: chunk.chunkIndex,
346
- metadata: chunk.metadata,
347
- embedding: embeddings[j],
348
- tokenCount: Math.ceil(chunk.content.length / 4)
349
- },
350
- select: { id: true }
351
- })
352
- )
353
- );
354
- insertedChunkIds.push(...inserted.map((c) => c.id));
355
- }
356
- if (ENABLE_AUTO_EXTRACTION && !input.skipEntityExtraction) {
357
- const chunksToExtract = textChunks.filter((c) => c.content.length > 200).slice(0, 5);
358
- for (let i = 0; i < chunksToExtract.length; i++) {
359
- const chunk = chunksToExtract[i];
360
- const chunkId = insertedChunkIds[textChunks.indexOf(chunk)];
361
- extractEntities(projectId, chunk.content, chunk.chunkType, metadata, chunkId).catch(() => {
362
- });
363
- }
364
- }
365
- const docCount = await prisma.document.count({
366
- where: { sourceId }
367
- });
368
- const chunkCount = await prisma.chunk.count({
369
- where: { documentId: doc.id }
370
- });
371
- await prisma.source.update({
372
- where: { id: sourceId },
373
- data: {
374
- documentCount: docCount,
375
- chunkCount,
376
- lastSyncAt: /* @__PURE__ */ new Date(),
377
- status: "READY",
378
- updatedAt: /* @__PURE__ */ new Date()
379
- }
380
- });
381
- return doc;
382
- }
383
-
384
- export {
385
- ingestDocument
386
- };
1
+ import {
2
+ embed,
3
+ embedSingle,
4
+ prisma
5
+ } from "./chunk-X2DL2GWT.js";
6
+
7
+ // src/engine/chunker.ts
8
+ var CODE_EXTENSIONS = /* @__PURE__ */ new Set([
9
+ ".ts",
10
+ ".tsx",
11
+ ".js",
12
+ ".jsx",
13
+ ".py",
14
+ ".java",
15
+ ".go",
16
+ ".rb",
17
+ ".php",
18
+ ".cs",
19
+ ".rs",
20
+ ".swift",
21
+ ".kt",
22
+ ".scala",
23
+ ".c",
24
+ ".cpp",
25
+ ".h",
26
+ ".hpp",
27
+ ".sol",
28
+ ".vy"
29
+ ]);
30
+ var CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([
31
+ ".json",
32
+ ".yaml",
33
+ ".yml",
34
+ ".toml",
35
+ ".ini",
36
+ ".env",
37
+ ".xml"
38
+ ]);
39
+ function detectChunkType(filePath, content) {
40
+ if (!filePath) return "text";
41
+ const ext = "." + filePath.split(".").pop()?.toLowerCase();
42
+ if (CODE_EXTENSIONS.has(ext)) return "code";
43
+ if (CONFIG_EXTENSIONS.has(ext)) return "config";
44
+ if (filePath.includes("schema") || filePath.includes("migration")) return "schema";
45
+ if (filePath.endsWith(".md") || filePath.endsWith(".mdx") || filePath.endsWith(".rst")) return "documentation";
46
+ if (filePath.includes("openapi") || filePath.includes("swagger")) return "api_spec";
47
+ return "text";
48
+ }
49
+ function chunkText(content, opts = {}) {
50
+ const { chunkSize = 1e3, chunkOverlap = 200, filePath, metadata = {} } = opts;
51
+ const chunkType = detectChunkType(filePath, content);
52
+ if (chunkType === "code") {
53
+ return chunkCode(content, { chunkSize, filePath, metadata });
54
+ }
55
+ return chunkBySize(content, { chunkSize, chunkOverlap, chunkType, metadata });
56
+ }
57
+ function chunkCode(content, opts) {
58
+ const { chunkSize, filePath, metadata = {} } = opts;
59
+ const lines = content.split("\n");
60
+ const chunks = [];
61
+ const boundaries = [
62
+ /^(export\s+)?(async\s+)?function\s+/,
63
+ /^(export\s+)?(default\s+)?class\s+/,
64
+ /^(export\s+)?const\s+\w+\s*=\s*(async\s+)?\(/,
65
+ /^(export\s+)?const\s+\w+\s*=\s*\{/,
66
+ /^(export\s+)?interface\s+/,
67
+ /^(export\s+)?type\s+/,
68
+ /^(export\s+)?enum\s+/,
69
+ /^def\s+/,
70
+ // Python
71
+ /^class\s+/,
72
+ // Python/Java
73
+ /^func\s+/,
74
+ // Go
75
+ /^pub\s+(fn|struct|enum|impl)/
76
+ // Rust
77
+ ];
78
+ let currentChunk = [];
79
+ let currentStart = 0;
80
+ for (let i = 0; i < lines.length; i++) {
81
+ const trimmed = lines[i].trimStart();
82
+ const isBoundary = boundaries.some((b) => b.test(trimmed));
83
+ if (isBoundary && currentChunk.length > 0) {
84
+ const chunkContent = currentChunk.join("\n").trim();
85
+ if (chunkContent.length > 0) {
86
+ chunks.push({
87
+ content: chunkContent,
88
+ chunkType: "code",
89
+ chunkIndex: chunks.length,
90
+ metadata: {
91
+ ...metadata,
92
+ filePath,
93
+ startLine: currentStart + 1,
94
+ endLine: i
95
+ }
96
+ });
97
+ }
98
+ currentChunk = [lines[i]];
99
+ currentStart = i;
100
+ } else {
101
+ currentChunk.push(lines[i]);
102
+ }
103
+ if (currentChunk.join("\n").length > chunkSize * 1.5) {
104
+ const chunkContent = currentChunk.join("\n").trim();
105
+ if (chunkContent.length > 0) {
106
+ chunks.push({
107
+ content: chunkContent,
108
+ chunkType: "code",
109
+ chunkIndex: chunks.length,
110
+ metadata: {
111
+ ...metadata,
112
+ filePath,
113
+ startLine: currentStart + 1,
114
+ endLine: i + 1
115
+ }
116
+ });
117
+ }
118
+ currentChunk = [];
119
+ currentStart = i + 1;
120
+ }
121
+ }
122
+ if (currentChunk.length > 0) {
123
+ const chunkContent = currentChunk.join("\n").trim();
124
+ if (chunkContent.length > 0) {
125
+ chunks.push({
126
+ content: chunkContent,
127
+ chunkType: "code",
128
+ chunkIndex: chunks.length,
129
+ metadata: {
130
+ ...metadata,
131
+ filePath,
132
+ startLine: currentStart + 1,
133
+ endLine: lines.length
134
+ }
135
+ });
136
+ }
137
+ }
138
+ return chunks;
139
+ }
140
+ function chunkBySize(content, opts) {
141
+ const { chunkSize, chunkOverlap, chunkType, metadata = {} } = opts;
142
+ const chunks = [];
143
+ const paragraphs = content.split(/\n\n+/);
144
+ let current = "";
145
+ for (const para of paragraphs) {
146
+ if ((current + "\n\n" + para).length > chunkSize && current.length > 0) {
147
+ chunks.push({
148
+ content: current.trim(),
149
+ chunkType,
150
+ chunkIndex: chunks.length,
151
+ metadata
152
+ });
153
+ const words = current.split(/\s+/);
154
+ const overlapWords = words.slice(-Math.floor(chunkOverlap / 5));
155
+ current = overlapWords.join(" ") + "\n\n" + para;
156
+ } else {
157
+ current = current ? current + "\n\n" + para : para;
158
+ }
159
+ }
160
+ if (current.trim().length > 0) {
161
+ chunks.push({
162
+ content: current.trim(),
163
+ chunkType,
164
+ chunkIndex: chunks.length,
165
+ metadata
166
+ });
167
+ }
168
+ return chunks;
169
+ }
170
+
171
+ // src/engine/extractor.ts
172
+ import OpenAI from "openai";
173
+ var openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
174
+ async function extractEntities(projectId, content, chunkType, metadata = {}, chunkId) {
175
+ if (content.length < 100) return { entities: 0, relations: 0 };
176
+ const isCode = ["code", "function", "class"].includes(chunkType);
177
+ const prompt = isCode ? `Analyze this code and extract entities and relationships.
178
+
179
+ Entities: functions, classes, interfaces, types, modules, variables, constants, API endpoints, services.
180
+ Relations: imports, exports, calls, implements, extends, depends_on, references, part_of.
181
+
182
+ Code:
183
+ \`\`\`
184
+ ${content.slice(0, 3e3)}
185
+ \`\`\`
186
+
187
+ Respond with JSON only:
188
+ {
189
+ "entities": [{"name": "...", "type": "function|class|interface|module|constant|api_endpoint|service", "description": "one line"}],
190
+ "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "imports|calls|extends|implements|depends_on|references|part_of"}]
191
+ }` : `Analyze this text and extract key entities (concepts, people, tools, services, APIs, technologies) and their relationships.
192
+
193
+ Text:
194
+ ${content.slice(0, 3e3)}
195
+
196
+ Respond with JSON only:
197
+ {
198
+ "entities": [{"name": "...", "type": "concept|tool|service|api|technology|person|organization", "description": "one line"}],
199
+ "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "references|depends_on|related_to|part_of|supersedes"}]
200
+ }`;
201
+ try {
202
+ const res = await openai.chat.completions.create({
203
+ model: "gpt-4.1-nano",
204
+ messages: [{ role: "user", content: prompt }],
205
+ temperature: 0,
206
+ max_tokens: 1e3,
207
+ response_format: { type: "json_object" }
208
+ });
209
+ const text = res.choices[0]?.message?.content?.trim() || "{}";
210
+ const parsed = JSON.parse(text);
211
+ const extractedEntities = parsed.entities || [];
212
+ const extractedRelations = parsed.relations || [];
213
+ let entityCount = 0;
214
+ let relationCount = 0;
215
+ const entityMap = /* @__PURE__ */ new Map();
216
+ for (const ent of extractedEntities.slice(0, 20)) {
217
+ if (!ent.name || !ent.type) continue;
218
+ const embedding = await embedSingle(`${ent.type}: ${ent.name} - ${ent.description || ""}`);
219
+ const entity = await prisma.entity.upsert({
220
+ where: {
221
+ projectId_name_entityType: {
222
+ projectId,
223
+ name: ent.name,
224
+ entityType: ent.type
225
+ }
226
+ },
227
+ update: {
228
+ description: ent.description,
229
+ sourceChunkId: chunkId,
230
+ embedding,
231
+ updatedAt: /* @__PURE__ */ new Date()
232
+ },
233
+ create: {
234
+ projectId,
235
+ name: ent.name,
236
+ entityType: ent.type,
237
+ description: ent.description,
238
+ metadata: { ...metadata, autoExtracted: true },
239
+ sourceChunkId: chunkId,
240
+ embedding
241
+ }
242
+ });
243
+ entityMap.set(`${ent.name}:${ent.type}`, entity.id);
244
+ entityCount++;
245
+ }
246
+ for (const rel of extractedRelations.slice(0, 30)) {
247
+ if (!rel.from || !rel.to || !rel.relation) continue;
248
+ const fromId = entityMap.get(`${rel.from}:${rel.fromType}`);
249
+ const toId = entityMap.get(`${rel.to}:${rel.toType}`);
250
+ if (!fromId || !toId) continue;
251
+ const validRelations = [
252
+ "imports",
253
+ "exports",
254
+ "calls",
255
+ "implements",
256
+ "extends",
257
+ "references",
258
+ "depends_on",
259
+ "related_to",
260
+ "part_of",
261
+ "contradicts",
262
+ "supersedes"
263
+ ];
264
+ if (!validRelations.includes(rel.relation)) continue;
265
+ await prisma.entityRelation.upsert({
266
+ where: {
267
+ fromEntityId_toEntityId_relationType: {
268
+ fromEntityId: fromId,
269
+ toEntityId: toId,
270
+ relationType: rel.relation
271
+ }
272
+ },
273
+ update: {
274
+ metadata: { autoExtracted: true }
275
+ },
276
+ create: {
277
+ projectId,
278
+ fromEntityId: fromId,
279
+ toEntityId: toId,
280
+ relationType: rel.relation,
281
+ metadata: { autoExtracted: true }
282
+ }
283
+ });
284
+ relationCount++;
285
+ }
286
+ return { entities: entityCount, relations: relationCount };
287
+ } catch {
288
+ return { entities: 0, relations: 0 };
289
+ }
290
+ }
291
+
292
+ // src/engine/ingest.ts
293
+ import { createHash } from "crypto";
294
+ import PQueue from "p-queue";
295
+ var queue = new PQueue({ concurrency: 3 });
296
+ var ENABLE_AUTO_EXTRACTION = process.env.DISABLE_AUTO_EXTRACTION !== "true";
297
+ async function ingestDocument(input) {
298
+ const { sourceId, projectId, externalId, title, content, metadata = {}, filePath } = input;
299
+ const contentHash = createHash("sha256").update(content).digest("hex");
300
+ const doc = await prisma.document.upsert({
301
+ where: {
302
+ sourceId_externalId: {
303
+ sourceId,
304
+ externalId
305
+ }
306
+ },
307
+ update: {
308
+ title,
309
+ content,
310
+ metadata,
311
+ contentHash,
312
+ updatedAt: /* @__PURE__ */ new Date()
313
+ },
314
+ create: {
315
+ sourceId,
316
+ projectId,
317
+ externalId,
318
+ title,
319
+ content,
320
+ metadata,
321
+ contentHash
322
+ }
323
+ });
324
+ await prisma.chunk.deleteMany({
325
+ where: { documentId: doc.id }
326
+ });
327
+ const textChunks = chunkText(content, {
328
+ filePath: filePath || externalId,
329
+ metadata: { ...metadata, title }
330
+ });
331
+ if (textChunks.length === 0) return doc;
332
+ const batchSize = 50;
333
+ const insertedChunkIds = [];
334
+ for (let i = 0; i < textChunks.length; i += batchSize) {
335
+ const batch = textChunks.slice(i, i + batchSize);
336
+ const embeddings = await embed(batch.map((c) => c.content));
337
+ const inserted = await prisma.$transaction(
338
+ batch.map(
339
+ (chunk, j) => prisma.chunk.create({
340
+ data: {
341
+ documentId: doc.id,
342
+ projectId,
343
+ content: chunk.content,
344
+ chunkType: chunk.chunkType,
345
+ chunkIndex: chunk.chunkIndex,
346
+ metadata: chunk.metadata,
347
+ embedding: embeddings[j],
348
+ tokenCount: Math.ceil(chunk.content.length / 4)
349
+ },
350
+ select: { id: true }
351
+ })
352
+ )
353
+ );
354
+ insertedChunkIds.push(...inserted.map((c) => c.id));
355
+ }
356
+ if (ENABLE_AUTO_EXTRACTION && !input.skipEntityExtraction) {
357
+ const chunksToExtract = textChunks.filter((c) => c.content.length > 200).slice(0, 5);
358
+ for (let i = 0; i < chunksToExtract.length; i++) {
359
+ const chunk = chunksToExtract[i];
360
+ const chunkId = insertedChunkIds[textChunks.indexOf(chunk)];
361
+ extractEntities(projectId, chunk.content, chunk.chunkType, metadata, chunkId).catch(() => {
362
+ });
363
+ }
364
+ }
365
+ const docCount = await prisma.document.count({
366
+ where: { sourceId }
367
+ });
368
+ const chunkCount = await prisma.chunk.count({
369
+ where: { documentId: doc.id }
370
+ });
371
+ await prisma.source.update({
372
+ where: { id: sourceId },
373
+ data: {
374
+ documentCount: docCount,
375
+ chunkCount,
376
+ lastSyncAt: /* @__PURE__ */ new Date(),
377
+ status: "READY",
378
+ updatedAt: /* @__PURE__ */ new Date()
379
+ }
380
+ });
381
+ return doc;
382
+ }
383
+
384
+ export {
385
+ ingestDocument
386
+ };
387
387
  //# sourceMappingURL=chunk-FTWUJBAH.js.map