@usewhisper/mcp-server 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1076 @@
1
+ import {
2
+ embed,
3
+ embedSingle,
4
+ prisma
5
+ } from "./chunk-MEFLJ4PV.js";
6
+ import {
7
+ __commonJS,
8
+ __toESM
9
+ } from "./chunk-QGM4M3NI.js";
10
+
11
+ // ../node_modules/eventemitter3/index.js
12
+ var require_eventemitter3 = __commonJS({
13
+ "../node_modules/eventemitter3/index.js"(exports, module) {
14
+ "use strict";
15
+ var has = Object.prototype.hasOwnProperty;
16
+ var prefix = "~";
17
+ function Events() {
18
+ }
19
+ if (Object.create) {
20
+ Events.prototype = /* @__PURE__ */ Object.create(null);
21
+ if (!new Events().__proto__) prefix = false;
22
+ }
23
+ function EE(fn, context, once) {
24
+ this.fn = fn;
25
+ this.context = context;
26
+ this.once = once || false;
27
+ }
28
+ function addListener(emitter, event, fn, context, once) {
29
+ if (typeof fn !== "function") {
30
+ throw new TypeError("The listener must be a function");
31
+ }
32
+ var listener = new EE(fn, context || emitter, once), evt = prefix ? prefix + event : event;
33
+ if (!emitter._events[evt]) emitter._events[evt] = listener, emitter._eventsCount++;
34
+ else if (!emitter._events[evt].fn) emitter._events[evt].push(listener);
35
+ else emitter._events[evt] = [emitter._events[evt], listener];
36
+ return emitter;
37
+ }
38
+ function clearEvent(emitter, evt) {
39
+ if (--emitter._eventsCount === 0) emitter._events = new Events();
40
+ else delete emitter._events[evt];
41
+ }
42
+ function EventEmitter2() {
43
+ this._events = new Events();
44
+ this._eventsCount = 0;
45
+ }
46
+ EventEmitter2.prototype.eventNames = function eventNames() {
47
+ var names = [], events, name;
48
+ if (this._eventsCount === 0) return names;
49
+ for (name in events = this._events) {
50
+ if (has.call(events, name)) names.push(prefix ? name.slice(1) : name);
51
+ }
52
+ if (Object.getOwnPropertySymbols) {
53
+ return names.concat(Object.getOwnPropertySymbols(events));
54
+ }
55
+ return names;
56
+ };
57
+ EventEmitter2.prototype.listeners = function listeners(event) {
58
+ var evt = prefix ? prefix + event : event, handlers = this._events[evt];
59
+ if (!handlers) return [];
60
+ if (handlers.fn) return [handlers.fn];
61
+ for (var i = 0, l = handlers.length, ee = new Array(l); i < l; i++) {
62
+ ee[i] = handlers[i].fn;
63
+ }
64
+ return ee;
65
+ };
66
+ EventEmitter2.prototype.listenerCount = function listenerCount(event) {
67
+ var evt = prefix ? prefix + event : event, listeners = this._events[evt];
68
+ if (!listeners) return 0;
69
+ if (listeners.fn) return 1;
70
+ return listeners.length;
71
+ };
72
+ EventEmitter2.prototype.emit = function emit(event, a1, a2, a3, a4, a5) {
73
+ var evt = prefix ? prefix + event : event;
74
+ if (!this._events[evt]) return false;
75
+ var listeners = this._events[evt], len = arguments.length, args, i;
76
+ if (listeners.fn) {
77
+ if (listeners.once) this.removeListener(event, listeners.fn, void 0, true);
78
+ switch (len) {
79
+ case 1:
80
+ return listeners.fn.call(listeners.context), true;
81
+ case 2:
82
+ return listeners.fn.call(listeners.context, a1), true;
83
+ case 3:
84
+ return listeners.fn.call(listeners.context, a1, a2), true;
85
+ case 4:
86
+ return listeners.fn.call(listeners.context, a1, a2, a3), true;
87
+ case 5:
88
+ return listeners.fn.call(listeners.context, a1, a2, a3, a4), true;
89
+ case 6:
90
+ return listeners.fn.call(listeners.context, a1, a2, a3, a4, a5), true;
91
+ }
92
+ for (i = 1, args = new Array(len - 1); i < len; i++) {
93
+ args[i - 1] = arguments[i];
94
+ }
95
+ listeners.fn.apply(listeners.context, args);
96
+ } else {
97
+ var length = listeners.length, j;
98
+ for (i = 0; i < length; i++) {
99
+ if (listeners[i].once) this.removeListener(event, listeners[i].fn, void 0, true);
100
+ switch (len) {
101
+ case 1:
102
+ listeners[i].fn.call(listeners[i].context);
103
+ break;
104
+ case 2:
105
+ listeners[i].fn.call(listeners[i].context, a1);
106
+ break;
107
+ case 3:
108
+ listeners[i].fn.call(listeners[i].context, a1, a2);
109
+ break;
110
+ case 4:
111
+ listeners[i].fn.call(listeners[i].context, a1, a2, a3);
112
+ break;
113
+ default:
114
+ if (!args) for (j = 1, args = new Array(len - 1); j < len; j++) {
115
+ args[j - 1] = arguments[j];
116
+ }
117
+ listeners[i].fn.apply(listeners[i].context, args);
118
+ }
119
+ }
120
+ }
121
+ return true;
122
+ };
123
+ EventEmitter2.prototype.on = function on(event, fn, context) {
124
+ return addListener(this, event, fn, context, false);
125
+ };
126
+ EventEmitter2.prototype.once = function once(event, fn, context) {
127
+ return addListener(this, event, fn, context, true);
128
+ };
129
+ EventEmitter2.prototype.removeListener = function removeListener(event, fn, context, once) {
130
+ var evt = prefix ? prefix + event : event;
131
+ if (!this._events[evt]) return this;
132
+ if (!fn) {
133
+ clearEvent(this, evt);
134
+ return this;
135
+ }
136
+ var listeners = this._events[evt];
137
+ if (listeners.fn) {
138
+ if (listeners.fn === fn && (!once || listeners.once) && (!context || listeners.context === context)) {
139
+ clearEvent(this, evt);
140
+ }
141
+ } else {
142
+ for (var i = 0, events = [], length = listeners.length; i < length; i++) {
143
+ if (listeners[i].fn !== fn || once && !listeners[i].once || context && listeners[i].context !== context) {
144
+ events.push(listeners[i]);
145
+ }
146
+ }
147
+ if (events.length) this._events[evt] = events.length === 1 ? events[0] : events;
148
+ else clearEvent(this, evt);
149
+ }
150
+ return this;
151
+ };
152
+ EventEmitter2.prototype.removeAllListeners = function removeAllListeners(event) {
153
+ var evt;
154
+ if (event) {
155
+ evt = prefix ? prefix + event : event;
156
+ if (this._events[evt]) clearEvent(this, evt);
157
+ } else {
158
+ this._events = new Events();
159
+ this._eventsCount = 0;
160
+ }
161
+ return this;
162
+ };
163
+ EventEmitter2.prototype.off = EventEmitter2.prototype.removeListener;
164
+ EventEmitter2.prototype.addListener = EventEmitter2.prototype.on;
165
+ EventEmitter2.prefixed = prefix;
166
+ EventEmitter2.EventEmitter = EventEmitter2;
167
+ if ("undefined" !== typeof module) {
168
+ module.exports = EventEmitter2;
169
+ }
170
+ }
171
+ });
172
+
173
+ // ../src/engine/chunker.ts
174
+ var CODE_EXTENSIONS = /* @__PURE__ */ new Set([
175
+ ".ts",
176
+ ".tsx",
177
+ ".js",
178
+ ".jsx",
179
+ ".py",
180
+ ".java",
181
+ ".go",
182
+ ".rb",
183
+ ".php",
184
+ ".cs",
185
+ ".rs",
186
+ ".swift",
187
+ ".kt",
188
+ ".scala",
189
+ ".c",
190
+ ".cpp",
191
+ ".h",
192
+ ".hpp",
193
+ ".sol",
194
+ ".vy"
195
+ ]);
196
+ var CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([
197
+ ".json",
198
+ ".yaml",
199
+ ".yml",
200
+ ".toml",
201
+ ".ini",
202
+ ".env",
203
+ ".xml"
204
+ ]);
205
+ function detectChunkType(filePath, content) {
206
+ if (!filePath) return "text";
207
+ const ext = "." + filePath.split(".").pop()?.toLowerCase();
208
+ if (CODE_EXTENSIONS.has(ext)) return "code";
209
+ if (CONFIG_EXTENSIONS.has(ext)) return "config";
210
+ if (filePath.includes("schema") || filePath.includes("migration")) return "schema";
211
+ if (filePath.endsWith(".md") || filePath.endsWith(".mdx") || filePath.endsWith(".rst")) return "documentation";
212
+ if (filePath.includes("openapi") || filePath.includes("swagger")) return "api_spec";
213
+ return "text";
214
+ }
215
+ function chunkText(content, opts = {}) {
216
+ const { chunkSize = 1e3, chunkOverlap = 200, filePath, metadata = {} } = opts;
217
+ const chunkType = detectChunkType(filePath, content);
218
+ if (chunkType === "code") {
219
+ return chunkCode(content, { chunkSize, filePath, metadata });
220
+ }
221
+ return chunkBySize(content, { chunkSize, chunkOverlap, chunkType, metadata });
222
+ }
223
+ function chunkCode(content, opts) {
224
+ const { chunkSize, filePath, metadata = {} } = opts;
225
+ const lines = content.split("\n");
226
+ const chunks = [];
227
+ const boundaries = [
228
+ /^(export\s+)?(async\s+)?function\s+/,
229
+ /^(export\s+)?(default\s+)?class\s+/,
230
+ /^(export\s+)?const\s+\w+\s*=\s*(async\s+)?\(/,
231
+ /^(export\s+)?const\s+\w+\s*=\s*\{/,
232
+ /^(export\s+)?interface\s+/,
233
+ /^(export\s+)?type\s+/,
234
+ /^(export\s+)?enum\s+/,
235
+ /^def\s+/,
236
+ // Python
237
+ /^class\s+/,
238
+ // Python/Java
239
+ /^func\s+/,
240
+ // Go
241
+ /^pub\s+(fn|struct|enum|impl)/
242
+ // Rust
243
+ ];
244
+ let currentChunk = [];
245
+ let currentStart = 0;
246
+ for (let i = 0; i < lines.length; i++) {
247
+ const trimmed = lines[i].trimStart();
248
+ const isBoundary = boundaries.some((b) => b.test(trimmed));
249
+ if (isBoundary && currentChunk.length > 0) {
250
+ const chunkContent = currentChunk.join("\n").trim();
251
+ if (chunkContent.length > 0) {
252
+ chunks.push({
253
+ content: chunkContent,
254
+ chunkType: "code",
255
+ chunkIndex: chunks.length,
256
+ metadata: {
257
+ ...metadata,
258
+ filePath,
259
+ startLine: currentStart + 1,
260
+ endLine: i
261
+ }
262
+ });
263
+ }
264
+ currentChunk = [lines[i]];
265
+ currentStart = i;
266
+ } else {
267
+ currentChunk.push(lines[i]);
268
+ }
269
+ if (currentChunk.join("\n").length > chunkSize * 1.5) {
270
+ const chunkContent = currentChunk.join("\n").trim();
271
+ if (chunkContent.length > 0) {
272
+ chunks.push({
273
+ content: chunkContent,
274
+ chunkType: "code",
275
+ chunkIndex: chunks.length,
276
+ metadata: {
277
+ ...metadata,
278
+ filePath,
279
+ startLine: currentStart + 1,
280
+ endLine: i + 1
281
+ }
282
+ });
283
+ }
284
+ currentChunk = [];
285
+ currentStart = i + 1;
286
+ }
287
+ }
288
+ if (currentChunk.length > 0) {
289
+ const chunkContent = currentChunk.join("\n").trim();
290
+ if (chunkContent.length > 0) {
291
+ chunks.push({
292
+ content: chunkContent,
293
+ chunkType: "code",
294
+ chunkIndex: chunks.length,
295
+ metadata: {
296
+ ...metadata,
297
+ filePath,
298
+ startLine: currentStart + 1,
299
+ endLine: lines.length
300
+ }
301
+ });
302
+ }
303
+ }
304
+ return chunks;
305
+ }
306
+ function chunkBySize(content, opts) {
307
+ const { chunkSize, chunkOverlap, chunkType, metadata = {} } = opts;
308
+ const chunks = [];
309
+ const paragraphs = content.split(/\n\n+/);
310
+ let current = "";
311
+ for (const para of paragraphs) {
312
+ if ((current + "\n\n" + para).length > chunkSize && current.length > 0) {
313
+ chunks.push({
314
+ content: current.trim(),
315
+ chunkType,
316
+ chunkIndex: chunks.length,
317
+ metadata
318
+ });
319
+ const words = current.split(/\s+/);
320
+ const overlapWords = words.slice(-Math.floor(chunkOverlap / 5));
321
+ current = overlapWords.join(" ") + "\n\n" + para;
322
+ } else {
323
+ current = current ? current + "\n\n" + para : para;
324
+ }
325
+ }
326
+ if (current.trim().length > 0) {
327
+ chunks.push({
328
+ content: current.trim(),
329
+ chunkType,
330
+ chunkIndex: chunks.length,
331
+ metadata
332
+ });
333
+ }
334
+ return chunks;
335
+ }
336
+
337
+ // ../src/engine/extractor.ts
338
+ import OpenAI from "openai";
339
+ var openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
340
+ async function extractEntities(projectId, content, chunkType, metadata = {}, chunkId) {
341
+ if (content.length < 100) return { entities: 0, relations: 0 };
342
+ const isCode = ["code", "function", "class"].includes(chunkType);
343
+ const prompt = isCode ? `Analyze this code and extract entities and relationships.
344
+
345
+ Entities: functions, classes, interfaces, types, modules, variables, constants, API endpoints, services.
346
+ Relations: imports, exports, calls, implements, extends, depends_on, references, part_of.
347
+
348
+ Code:
349
+ \`\`\`
350
+ ${content.slice(0, 3e3)}
351
+ \`\`\`
352
+
353
+ Respond with JSON only:
354
+ {
355
+ "entities": [{"name": "...", "type": "function|class|interface|module|constant|api_endpoint|service", "description": "one line"}],
356
+ "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "imports|calls|extends|implements|depends_on|references|part_of"}]
357
+ }` : `Analyze this text and extract key entities (concepts, people, tools, services, APIs, technologies) and their relationships.
358
+
359
+ Text:
360
+ ${content.slice(0, 3e3)}
361
+
362
+ Respond with JSON only:
363
+ {
364
+ "entities": [{"name": "...", "type": "concept|tool|service|api|technology|person|organization", "description": "one line"}],
365
+ "relations": [{"from": "name", "fromType": "type", "to": "name", "toType": "type", "relation": "references|depends_on|related_to|part_of|supersedes"}]
366
+ }`;
367
+ try {
368
+ const res = await openai.chat.completions.create({
369
+ model: "gpt-4o-mini",
370
+ messages: [{ role: "user", content: prompt }],
371
+ temperature: 0,
372
+ max_tokens: 1e3,
373
+ response_format: { type: "json_object" }
374
+ });
375
+ const text = res.choices[0]?.message?.content?.trim() || "{}";
376
+ const parsed = JSON.parse(text);
377
+ const extractedEntities = parsed.entities || [];
378
+ const extractedRelations = parsed.relations || [];
379
+ let entityCount = 0;
380
+ let relationCount = 0;
381
+ const entityMap = /* @__PURE__ */ new Map();
382
+ for (const ent of extractedEntities.slice(0, 20)) {
383
+ if (!ent.name || !ent.type) continue;
384
+ const embedding = await embedSingle(`${ent.type}: ${ent.name} - ${ent.description || ""}`);
385
+ const embeddingStr = `[${embedding.join(",")}]`;
386
+ try {
387
+ await prisma.$queryRaw`
388
+ INSERT INTO "entities" (
389
+ id, "projectId", name, "entityType", description, "sourceChunkId", embedding,
390
+ "createdAt", "updatedAt"
391
+ )
392
+ VALUES (
393
+ gen_random_uuid(), ${projectId}, ${ent.name}, ${ent.type}, ${ent.description || ""},
394
+ ${chunkId || null}, ${embeddingStr}::vector, NOW(), NOW()
395
+ )
396
+ ON CONFLICT ("projectId", name, "entityType")
397
+ DO UPDATE SET
398
+ description = EXCLUDED.description,
399
+ "sourceChunkId" = EXCLUDED."sourceChunkId",
400
+ embedding = EXCLUDED.embedding,
401
+ "updatedAt" = NOW()
402
+ `;
403
+ entityMap.set(`${ent.name}:${ent.type}`, ent.name);
404
+ entityCount++;
405
+ } catch (err) {
406
+ console.error("[Extractor] Error upserting entity:", err.message);
407
+ }
408
+ }
409
+ for (const rel of extractedRelations.slice(0, 30)) {
410
+ if (!rel.from || !rel.to || !rel.relation) continue;
411
+ const fromId = entityMap.get(`${rel.from}:${rel.fromType}`);
412
+ const toId = entityMap.get(`${rel.to}:${rel.toType}`);
413
+ if (!fromId || !toId) continue;
414
+ const validRelations = [
415
+ "imports",
416
+ "exports",
417
+ "calls",
418
+ "implements",
419
+ "extends",
420
+ "references",
421
+ "depends_on",
422
+ "related_to",
423
+ "part_of",
424
+ "contradicts",
425
+ "supersedes"
426
+ ];
427
+ if (!validRelations.includes(rel.relation)) continue;
428
+ await prisma.entityRelation.upsert({
429
+ where: {
430
+ fromEntityId_toEntityId_relationType: {
431
+ fromEntityId: fromId,
432
+ toEntityId: toId,
433
+ relationType: rel.relation
434
+ }
435
+ },
436
+ update: {
437
+ metadata: { autoExtracted: true }
438
+ },
439
+ create: {
440
+ projectId,
441
+ fromEntityId: fromId,
442
+ toEntityId: toId,
443
+ relationType: rel.relation,
444
+ metadata: { autoExtracted: true }
445
+ }
446
+ });
447
+ relationCount++;
448
+ }
449
+ return { entities: entityCount, relations: relationCount };
450
+ } catch {
451
+ return { entities: 0, relations: 0 };
452
+ }
453
+ }
454
+
455
+ // ../src/engine/ingest.ts
456
+ import { createHash } from "crypto";
457
+
458
+ // ../node_modules/eventemitter3/index.mjs
459
+ var import_index = __toESM(require_eventemitter3(), 1);
460
+
461
+ // ../node_modules/p-timeout/index.js
462
+ var TimeoutError = class extends Error {
463
+ constructor(message) {
464
+ super(message);
465
+ this.name = "TimeoutError";
466
+ }
467
+ };
468
+ var AbortError = class extends Error {
469
+ constructor(message) {
470
+ super();
471
+ this.name = "AbortError";
472
+ this.message = message;
473
+ }
474
+ };
475
+ var getDOMException = (errorMessage) => globalThis.DOMException === void 0 ? new AbortError(errorMessage) : new DOMException(errorMessage);
476
+ var getAbortedReason = (signal) => {
477
+ const reason = signal.reason === void 0 ? getDOMException("This operation was aborted.") : signal.reason;
478
+ return reason instanceof Error ? reason : getDOMException(reason);
479
+ };
480
+ function pTimeout(promise, options) {
481
+ const {
482
+ milliseconds,
483
+ fallback,
484
+ message,
485
+ customTimers = { setTimeout, clearTimeout }
486
+ } = options;
487
+ let timer;
488
+ let abortHandler;
489
+ const wrappedPromise = new Promise((resolve, reject) => {
490
+ if (typeof milliseconds !== "number" || Math.sign(milliseconds) !== 1) {
491
+ throw new TypeError(`Expected \`milliseconds\` to be a positive number, got \`${milliseconds}\``);
492
+ }
493
+ if (options.signal) {
494
+ const { signal } = options;
495
+ if (signal.aborted) {
496
+ reject(getAbortedReason(signal));
497
+ }
498
+ abortHandler = () => {
499
+ reject(getAbortedReason(signal));
500
+ };
501
+ signal.addEventListener("abort", abortHandler, { once: true });
502
+ }
503
+ if (milliseconds === Number.POSITIVE_INFINITY) {
504
+ promise.then(resolve, reject);
505
+ return;
506
+ }
507
+ const timeoutError = new TimeoutError();
508
+ timer = customTimers.setTimeout.call(void 0, () => {
509
+ if (fallback) {
510
+ try {
511
+ resolve(fallback());
512
+ } catch (error) {
513
+ reject(error);
514
+ }
515
+ return;
516
+ }
517
+ if (typeof promise.cancel === "function") {
518
+ promise.cancel();
519
+ }
520
+ if (message === false) {
521
+ resolve();
522
+ } else if (message instanceof Error) {
523
+ reject(message);
524
+ } else {
525
+ timeoutError.message = message ?? `Promise timed out after ${milliseconds} milliseconds`;
526
+ reject(timeoutError);
527
+ }
528
+ }, milliseconds);
529
+ (async () => {
530
+ try {
531
+ resolve(await promise);
532
+ } catch (error) {
533
+ reject(error);
534
+ }
535
+ })();
536
+ });
537
+ const cancelablePromise = wrappedPromise.finally(() => {
538
+ cancelablePromise.clear();
539
+ if (abortHandler && options.signal) {
540
+ options.signal.removeEventListener("abort", abortHandler);
541
+ }
542
+ });
543
+ cancelablePromise.clear = () => {
544
+ customTimers.clearTimeout.call(void 0, timer);
545
+ timer = void 0;
546
+ };
547
+ return cancelablePromise;
548
+ }
549
+
550
+ // ../node_modules/p-queue/dist/lower-bound.js
551
+ function lowerBound(array, value, comparator) {
552
+ let first = 0;
553
+ let count = array.length;
554
+ while (count > 0) {
555
+ const step = Math.trunc(count / 2);
556
+ let it = first + step;
557
+ if (comparator(array[it], value) <= 0) {
558
+ first = ++it;
559
+ count -= step + 1;
560
+ } else {
561
+ count = step;
562
+ }
563
+ }
564
+ return first;
565
+ }
566
+
567
+ // ../node_modules/p-queue/dist/priority-queue.js
568
+ var PriorityQueue = class {
569
+ #queue = [];
570
+ enqueue(run, options) {
571
+ options = {
572
+ priority: 0,
573
+ ...options
574
+ };
575
+ const element = {
576
+ priority: options.priority,
577
+ id: options.id,
578
+ run
579
+ };
580
+ if (this.size === 0 || this.#queue[this.size - 1].priority >= options.priority) {
581
+ this.#queue.push(element);
582
+ return;
583
+ }
584
+ const index = lowerBound(this.#queue, element, (a, b) => b.priority - a.priority);
585
+ this.#queue.splice(index, 0, element);
586
+ }
587
+ setPriority(id, priority) {
588
+ const index = this.#queue.findIndex((element) => element.id === id);
589
+ if (index === -1) {
590
+ throw new ReferenceError(`No promise function with the id "${id}" exists in the queue.`);
591
+ }
592
+ const [item] = this.#queue.splice(index, 1);
593
+ this.enqueue(item.run, { priority, id });
594
+ }
595
+ dequeue() {
596
+ const item = this.#queue.shift();
597
+ return item?.run;
598
+ }
599
+ filter(options) {
600
+ return this.#queue.filter((element) => element.priority === options.priority).map((element) => element.run);
601
+ }
602
+ get size() {
603
+ return this.#queue.length;
604
+ }
605
+ };
606
+
607
+ // ../node_modules/p-queue/dist/index.js
608
+ var PQueue = class extends import_index.default {
609
+ #carryoverConcurrencyCount;
610
+ #isIntervalIgnored;
611
+ #intervalCount = 0;
612
+ #intervalCap;
613
+ #interval;
614
+ #intervalEnd = 0;
615
+ #intervalId;
616
+ #timeoutId;
617
+ #queue;
618
+ #queueClass;
619
+ #pending = 0;
620
+ // The `!` is needed because of https://github.com/microsoft/TypeScript/issues/32194
621
+ #concurrency;
622
+ #isPaused;
623
+ #throwOnTimeout;
624
+ // Use to assign a unique identifier to a promise function, if not explicitly specified
625
+ #idAssigner = 1n;
626
+ /**
627
+ Per-operation timeout in milliseconds. Operations fulfill once `timeout` elapses if they haven't already.
628
+
629
+ Applies to each future operation.
630
+ */
631
+ timeout;
632
+ // TODO: The `throwOnTimeout` option should affect the return types of `add()` and `addAll()`
633
+ constructor(options) {
634
+ super();
635
+ options = {
636
+ carryoverConcurrencyCount: false,
637
+ intervalCap: Number.POSITIVE_INFINITY,
638
+ interval: 0,
639
+ concurrency: Number.POSITIVE_INFINITY,
640
+ autoStart: true,
641
+ queueClass: PriorityQueue,
642
+ ...options
643
+ };
644
+ if (!(typeof options.intervalCap === "number" && options.intervalCap >= 1)) {
645
+ throw new TypeError(`Expected \`intervalCap\` to be a number from 1 and up, got \`${options.intervalCap?.toString() ?? ""}\` (${typeof options.intervalCap})`);
646
+ }
647
+ if (options.interval === void 0 || !(Number.isFinite(options.interval) && options.interval >= 0)) {
648
+ throw new TypeError(`Expected \`interval\` to be a finite number >= 0, got \`${options.interval?.toString() ?? ""}\` (${typeof options.interval})`);
649
+ }
650
+ this.#carryoverConcurrencyCount = options.carryoverConcurrencyCount;
651
+ this.#isIntervalIgnored = options.intervalCap === Number.POSITIVE_INFINITY || options.interval === 0;
652
+ this.#intervalCap = options.intervalCap;
653
+ this.#interval = options.interval;
654
+ this.#queue = new options.queueClass();
655
+ this.#queueClass = options.queueClass;
656
+ this.concurrency = options.concurrency;
657
+ this.timeout = options.timeout;
658
+ this.#throwOnTimeout = options.throwOnTimeout === true;
659
+ this.#isPaused = options.autoStart === false;
660
+ }
661
+ get #doesIntervalAllowAnother() {
662
+ return this.#isIntervalIgnored || this.#intervalCount < this.#intervalCap;
663
+ }
664
+ get #doesConcurrentAllowAnother() {
665
+ return this.#pending < this.#concurrency;
666
+ }
667
+ #next() {
668
+ this.#pending--;
669
+ this.#tryToStartAnother();
670
+ this.emit("next");
671
+ }
672
+ #onResumeInterval() {
673
+ this.#onInterval();
674
+ this.#initializeIntervalIfNeeded();
675
+ this.#timeoutId = void 0;
676
+ }
677
+ get #isIntervalPaused() {
678
+ const now = Date.now();
679
+ if (this.#intervalId === void 0) {
680
+ const delay = this.#intervalEnd - now;
681
+ if (delay < 0) {
682
+ this.#intervalCount = this.#carryoverConcurrencyCount ? this.#pending : 0;
683
+ } else {
684
+ if (this.#timeoutId === void 0) {
685
+ this.#timeoutId = setTimeout(() => {
686
+ this.#onResumeInterval();
687
+ }, delay);
688
+ }
689
+ return true;
690
+ }
691
+ }
692
+ return false;
693
+ }
694
+ #tryToStartAnother() {
695
+ if (this.#queue.size === 0) {
696
+ if (this.#intervalId) {
697
+ clearInterval(this.#intervalId);
698
+ }
699
+ this.#intervalId = void 0;
700
+ this.emit("empty");
701
+ if (this.#pending === 0) {
702
+ this.emit("idle");
703
+ }
704
+ return false;
705
+ }
706
+ if (!this.#isPaused) {
707
+ const canInitializeInterval = !this.#isIntervalPaused;
708
+ if (this.#doesIntervalAllowAnother && this.#doesConcurrentAllowAnother) {
709
+ const job = this.#queue.dequeue();
710
+ if (!job) {
711
+ return false;
712
+ }
713
+ this.emit("active");
714
+ job();
715
+ if (canInitializeInterval) {
716
+ this.#initializeIntervalIfNeeded();
717
+ }
718
+ return true;
719
+ }
720
+ }
721
+ return false;
722
+ }
723
+ #initializeIntervalIfNeeded() {
724
+ if (this.#isIntervalIgnored || this.#intervalId !== void 0) {
725
+ return;
726
+ }
727
+ this.#intervalId = setInterval(() => {
728
+ this.#onInterval();
729
+ }, this.#interval);
730
+ this.#intervalEnd = Date.now() + this.#interval;
731
+ }
732
+ #onInterval() {
733
+ if (this.#intervalCount === 0 && this.#pending === 0 && this.#intervalId) {
734
+ clearInterval(this.#intervalId);
735
+ this.#intervalId = void 0;
736
+ }
737
+ this.#intervalCount = this.#carryoverConcurrencyCount ? this.#pending : 0;
738
+ this.#processQueue();
739
+ }
740
+ /**
741
+ Executes all queued functions until it reaches the limit.
742
+ */
743
+ #processQueue() {
744
+ while (this.#tryToStartAnother()) {
745
+ }
746
+ }
747
+ get concurrency() {
748
+ return this.#concurrency;
749
+ }
750
+ set concurrency(newConcurrency) {
751
+ if (!(typeof newConcurrency === "number" && newConcurrency >= 1)) {
752
+ throw new TypeError(`Expected \`concurrency\` to be a number from 1 and up, got \`${newConcurrency}\` (${typeof newConcurrency})`);
753
+ }
754
+ this.#concurrency = newConcurrency;
755
+ this.#processQueue();
756
+ }
757
+ async #throwOnAbort(signal) {
758
+ return new Promise((_resolve, reject) => {
759
+ signal.addEventListener("abort", () => {
760
+ reject(signal.reason);
761
+ }, { once: true });
762
+ });
763
+ }
764
+ /**
765
+ Updates the priority of a promise function by its id, affecting its execution order. Requires a defined concurrency limit to take effect.
766
+
767
+ For example, this can be used to prioritize a promise function to run earlier.
768
+
769
+ ```js
770
+ import PQueue from 'p-queue';
771
+
772
+ const queue = new PQueue({concurrency: 1});
773
+
774
+ queue.add(async () => '🦄', {priority: 1});
775
+ queue.add(async () => '🦀', {priority: 0, id: '🦀'});
776
+ queue.add(async () => '🦄', {priority: 1});
777
+ queue.add(async () => '🦄', {priority: 1});
778
+
779
+ queue.setPriority('🦀', 2);
780
+ ```
781
+
782
+ In this case, the promise function with `id: '🦀'` runs second.
783
+
784
+ You can also deprioritize a promise function to delay its execution:
785
+
786
+ ```js
787
+ import PQueue from 'p-queue';
788
+
789
+ const queue = new PQueue({concurrency: 1});
790
+
791
+ queue.add(async () => '🦄', {priority: 1});
792
+ queue.add(async () => '🦀', {priority: 1, id: '🦀'});
793
+ queue.add(async () => '🦄');
794
+ queue.add(async () => '🦄', {priority: 0});
795
+
796
+ queue.setPriority('🦀', -1);
797
+ ```
798
+ Here, the promise function with `id: '🦀'` executes last.
799
+ */
800
+ setPriority(id, priority) {
801
+ this.#queue.setPriority(id, priority);
802
+ }
803
+ async add(function_, options = {}) {
804
+ options.id ??= (this.#idAssigner++).toString();
805
+ options = {
806
+ timeout: this.timeout,
807
+ throwOnTimeout: this.#throwOnTimeout,
808
+ ...options
809
+ };
810
+ return new Promise((resolve, reject) => {
811
+ this.#queue.enqueue(async () => {
812
+ this.#pending++;
813
+ try {
814
+ options.signal?.throwIfAborted();
815
+ this.#intervalCount++;
816
+ let operation = function_({ signal: options.signal });
817
+ if (options.timeout) {
818
+ operation = pTimeout(Promise.resolve(operation), { milliseconds: options.timeout });
819
+ }
820
+ if (options.signal) {
821
+ operation = Promise.race([operation, this.#throwOnAbort(options.signal)]);
822
+ }
823
+ const result = await operation;
824
+ resolve(result);
825
+ this.emit("completed", result);
826
+ } catch (error) {
827
+ if (error instanceof TimeoutError && !options.throwOnTimeout) {
828
+ resolve();
829
+ return;
830
+ }
831
+ reject(error);
832
+ this.emit("error", error);
833
+ } finally {
834
+ this.#next();
835
+ }
836
+ }, options);
837
+ this.emit("add");
838
+ this.#tryToStartAnother();
839
+ });
840
+ }
841
+ async addAll(functions, options) {
842
+ return Promise.all(functions.map(async (function_) => this.add(function_, options)));
843
+ }
844
+ /**
845
+ Start (or resume) executing enqueued tasks within concurrency limit. No need to call this if queue is not paused (via `options.autoStart = false` or by `.pause()` method.)
846
+ */
847
+ start() {
848
+ if (!this.#isPaused) {
849
+ return this;
850
+ }
851
+ this.#isPaused = false;
852
+ this.#processQueue();
853
+ return this;
854
+ }
855
+ /**
856
+ Put queue execution on hold.
857
+ */
858
+ pause() {
859
+ this.#isPaused = true;
860
+ }
861
+ /**
862
+ Clear the queue.
863
+ */
864
+ clear() {
865
+ this.#queue = new this.#queueClass();
866
+ }
867
+ /**
868
+ Can be called multiple times. Useful if you for example add additional items at a later time.
869
+
870
+ @returns A promise that settles when the queue becomes empty.
871
+ */
872
+ async onEmpty() {
873
+ if (this.#queue.size === 0) {
874
+ return;
875
+ }
876
+ await this.#onEvent("empty");
877
+ }
878
+ /**
879
+ @returns A promise that settles when the queue size is less than the given limit: `queue.size < limit`.
880
+
881
+ If you want to avoid having the queue grow beyond a certain size you can `await queue.onSizeLessThan()` before adding a new item.
882
+
883
+ Note that this only limits the number of items waiting to start. There could still be up to `concurrency` jobs already running that this call does not include in its calculation.
884
+ */
885
+ async onSizeLessThan(limit) {
886
+ if (this.#queue.size < limit) {
887
+ return;
888
+ }
889
+ await this.#onEvent("next", () => this.#queue.size < limit);
890
+ }
891
+ /**
892
+ The difference with `.onEmpty` is that `.onIdle` guarantees that all work from the queue has finished. `.onEmpty` merely signals that the queue is empty, but it could mean that some promises haven't completed yet.
893
+
894
+ @returns A promise that settles when the queue becomes empty, and all promises have completed; `queue.size === 0 && queue.pending === 0`.
895
+ */
896
+ async onIdle() {
897
+ if (this.#pending === 0 && this.#queue.size === 0) {
898
+ return;
899
+ }
900
+ await this.#onEvent("idle");
901
+ }
902
+ async #onEvent(event, filter) {
903
+ return new Promise((resolve) => {
904
+ const listener = () => {
905
+ if (filter && !filter()) {
906
+ return;
907
+ }
908
+ this.off(event, listener);
909
+ resolve();
910
+ };
911
+ this.on(event, listener);
912
+ });
913
+ }
914
+ /**
915
+ Size of the queue, the number of queued items waiting to run.
916
+ */
917
+ get size() {
918
+ return this.#queue.size;
919
+ }
920
+ /**
921
+ Size of the queue, filtered by the given options.
922
+
923
+ For example, this can be used to find the number of items remaining in the queue with a specific priority level.
924
+ */
925
+ sizeBy(options) {
926
+ return this.#queue.filter(options).length;
927
+ }
928
+ /**
929
+ Number of running items (no longer in the queue).
930
+ */
931
+ get pending() {
932
+ return this.#pending;
933
+ }
934
+ /**
935
+ Whether the queue is currently paused.
936
+ */
937
+ get isPaused() {
938
+ return this.#isPaused;
939
+ }
940
+ };
941
+
942
+ // ../src/engine/ingest.ts
943
+ var queue = new PQueue({ concurrency: 3 });
944
+ var ENABLE_AUTO_EXTRACTION = process.env.DISABLE_AUTO_EXTRACTION !== "true";
945
+ async function ingestDocument(input) {
946
+ const { sourceId, projectId, externalId, title, content, metadata = {}, filePath } = input;
947
+ const contentHash = createHash("sha256").update(content).digest("hex");
948
+ const doc = await prisma.document.upsert({
949
+ where: {
950
+ sourceId_externalId: {
951
+ sourceId,
952
+ externalId
953
+ }
954
+ },
955
+ update: {
956
+ title,
957
+ content,
958
+ metadata,
959
+ contentHash,
960
+ updatedAt: /* @__PURE__ */ new Date()
961
+ },
962
+ create: {
963
+ sourceId,
964
+ projectId,
965
+ externalId,
966
+ title,
967
+ content,
968
+ metadata,
969
+ contentHash
970
+ }
971
+ });
972
+ await prisma.chunk.deleteMany({
973
+ where: { documentId: doc.id }
974
+ });
975
+ const textChunks = chunkText(content, {
976
+ filePath: filePath || externalId,
977
+ metadata: { ...metadata, title }
978
+ });
979
+ console.log(`[Ingest] Document: ${doc.id}, Chunks to create: ${textChunks.length}`);
980
+ if (textChunks.length === 0) {
981
+ console.log(`[Ingest] No chunks to create for document ${doc.id}`);
982
+ await prisma.source.update({
983
+ where: { id: sourceId },
984
+ data: {
985
+ documentCount: await prisma.document.count({ where: { sourceId } }),
986
+ chunkCount: 0,
987
+ lastSyncAt: /* @__PURE__ */ new Date(),
988
+ status: "READY",
989
+ updatedAt: /* @__PURE__ */ new Date()
990
+ }
991
+ });
992
+ return doc;
993
+ }
994
+ const batchSize = 50;
995
+ const insertedChunkIds = [];
996
+ for (let i = 0; i < textChunks.length; i += batchSize) {
997
+ const batch = textChunks.slice(i, i + batchSize);
998
+ console.log(`[Ingest] Processing batch ${i / batchSize + 1}: ${batch.length} chunks`);
999
+ const embeddingVectors = await embed(batch.map((c) => c.content));
1000
+ for (let j = 0; j < batch.length; j++) {
1001
+ const chunk = batch[j];
1002
+ const vector = embeddingVectors[j];
1003
+ const vectorStr = `[${vector.join(",")}]`;
1004
+ const metadataJson = JSON.stringify(chunk.metadata);
1005
+ try {
1006
+ const inserted = await prisma.$queryRaw`
1007
+ INSERT INTO "chunks" (
1008
+ id, "documentId", "projectId", content, "chunkType",
1009
+ "chunkIndex", metadata, "tokenCount", embedding, "createdAt", "updatedAt"
1010
+ )
1011
+ VALUES (
1012
+ gen_random_uuid(), ${doc.id}, ${projectId}, ${chunk.content}, ${chunk.chunkType},
1013
+ ${chunk.chunkIndex}, ${metadataJson}::jsonb, ${Math.ceil(chunk.content.length / 4)},
1014
+ ${vectorStr}::vector, NOW(), NOW()
1015
+ )
1016
+ RETURNING id
1017
+ `;
1018
+ const chunkId = inserted[0]?.id;
1019
+ console.log(`[Ingest] Created chunk: ${chunkId}`);
1020
+ insertedChunkIds.push(chunkId);
1021
+ } catch (err) {
1022
+ console.error("[Ingest] Error inserting chunk with embedding:", err.message);
1023
+ try {
1024
+ const inserted = await prisma.$queryRaw`
1025
+ INSERT INTO "chunks" (
1026
+ id, "documentId", "projectId", content, "chunkType",
1027
+ "chunkIndex", metadata, "tokenCount", "createdAt", "updatedAt"
1028
+ )
1029
+ VALUES (
1030
+ gen_random_uuid(), ${doc.id}, ${projectId}, ${chunk.content}, ${chunk.chunkType},
1031
+ ${chunk.chunkIndex}, ${metadataJson}::jsonb, ${Math.ceil(chunk.content.length / 4)},
1032
+ NOW(), NOW()
1033
+ )
1034
+ RETURNING id
1035
+ `;
1036
+ const chunkId = inserted[0]?.id;
1037
+ console.log(`[Ingest] Created chunk (no embedding): ${chunkId}`);
1038
+ insertedChunkIds.push(chunkId);
1039
+ } catch (err2) {
1040
+ console.error("[Ingest] Error inserting chunk:", err2.message);
1041
+ }
1042
+ }
1043
+ }
1044
+ }
1045
+ console.log(`[Ingest] Total chunks created: ${insertedChunkIds.length} for document ${doc.id}`);
1046
+ if (ENABLE_AUTO_EXTRACTION && !input.skipEntityExtraction) {
1047
+ const chunksToExtract = textChunks.filter((c) => c.content.length > 200).slice(0, 5);
1048
+ for (let i = 0; i < chunksToExtract.length; i++) {
1049
+ const chunk = chunksToExtract[i];
1050
+ const chunkId = insertedChunkIds[textChunks.indexOf(chunk)];
1051
+ extractEntities(projectId, chunk.content, chunk.chunkType, metadata, chunkId).catch(() => {
1052
+ });
1053
+ }
1054
+ }
1055
+ const docCount = await prisma.document.count({
1056
+ where: { sourceId }
1057
+ });
1058
+ const chunkCount = await prisma.chunk.count({
1059
+ where: { documentId: doc.id }
1060
+ });
1061
+ await prisma.source.update({
1062
+ where: { id: sourceId },
1063
+ data: {
1064
+ documentCount: docCount,
1065
+ chunkCount,
1066
+ lastSyncAt: /* @__PURE__ */ new Date(),
1067
+ status: "READY",
1068
+ updatedAt: /* @__PURE__ */ new Date()
1069
+ }
1070
+ });
1071
+ return doc;
1072
+ }
1073
+
1074
+ export {
1075
+ ingestDocument
1076
+ };