@exulu/backend 1.67.0 → 1.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ZPZKOT6I.js → chunk-VPSLTGZF.js} +1428 -139
- package/dist/{convert-exulu-tools-to-ai-sdk-tools-4B7BQ5G2.js → convert-exulu-tools-to-ai-sdk-tools-CHQF36XW.js} +1 -1
- package/dist/index.cjs +24279 -22720
- package/dist/index.d.cts +256 -100
- package/dist/index.d.ts +256 -100
- package/dist/index.js +2837 -2645
- package/ee/agentic-retrieval/v3/agent-loop.ts +4 -4
- package/ee/agentic-retrieval/v3/index.ts +20 -6
- package/ee/python/documents/processing/doc_processor.ts +79 -34
- package/ee/workers.ts +3 -17
- package/package.json +1 -1
- package/ee/agentic-retrieval/v4/agent-loop.ts +0 -208
- package/ee/agentic-retrieval/v4/context-sampler.ts +0 -79
- package/ee/agentic-retrieval/v4/index.ts +0 -690
- package/ee/agentic-retrieval/v4/types.ts +0 -58
|
@@ -362,9 +362,14 @@ var supervise = async (cfg) => {
|
|
|
362
362
|
}
|
|
363
363
|
};
|
|
364
364
|
var _packageRoot;
|
|
365
|
+
var _clientMode = false;
|
|
365
366
|
var setLiteLLMPackageRoot = (root) => {
|
|
366
367
|
_packageRoot = root;
|
|
367
368
|
};
|
|
369
|
+
var enableLiteLLMClientMode = () => {
|
|
370
|
+
if (internal.readyPromise) return;
|
|
371
|
+
_clientMode = true;
|
|
372
|
+
};
|
|
368
373
|
var startLiteLLMSupervisor = async (options = {}) => {
|
|
369
374
|
if (!isLiteLLMEnabled()) return;
|
|
370
375
|
if (internal.readyPromise) {
|
|
@@ -413,6 +418,27 @@ var startLiteLLMSupervisor = async (options = {}) => {
|
|
|
413
418
|
};
|
|
414
419
|
var waitForLiteLLMReady = async () => {
|
|
415
420
|
if (!isLiteLLMEnabled()) return;
|
|
421
|
+
if (_clientMode) {
|
|
422
|
+
if (internal.state === "ready") return;
|
|
423
|
+
const host = process.env.LITELLM_HOST ?? "127.0.0.1";
|
|
424
|
+
const port = process.env.LITELLM_PORT ?? "4000";
|
|
425
|
+
const url = `http://${host}:${port}/health/liveliness`;
|
|
426
|
+
let res;
|
|
427
|
+
try {
|
|
428
|
+
res = await fetch(url, { method: "GET" });
|
|
429
|
+
} catch (err) {
|
|
430
|
+
throw new Error(
|
|
431
|
+
`LiteLLM proxy not reachable at ${url} (is the Exulu server process running?): ${err.message}`
|
|
432
|
+
);
|
|
433
|
+
}
|
|
434
|
+
if (!res.ok) {
|
|
435
|
+
throw new Error(
|
|
436
|
+
`LiteLLM proxy health probe at ${url} returned ${res.status}.`
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
internal.state = "ready";
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
416
442
|
if (!internal.readyPromise) {
|
|
417
443
|
return startLiteLLMSupervisor();
|
|
418
444
|
}
|
|
@@ -499,6 +525,12 @@ function buildTags(input) {
|
|
|
499
525
|
if (input.routine_name) {
|
|
500
526
|
candidates.push("routine_name_" + input.routine_name);
|
|
501
527
|
}
|
|
528
|
+
if (input.context_id) {
|
|
529
|
+
candidates.push("context_id_" + input.context_id);
|
|
530
|
+
}
|
|
531
|
+
if (input.context_name) {
|
|
532
|
+
candidates.push("context_name_" + input.context_name);
|
|
533
|
+
}
|
|
502
534
|
console.log("[EXULU] Candidates", candidates);
|
|
503
535
|
const out = [];
|
|
504
536
|
for (const candidate of candidates) {
|
|
@@ -1109,7 +1141,10 @@ async function resolveModel(input) {
|
|
|
1109
1141
|
const litellm = getLiteLLMProvider({
|
|
1110
1142
|
user,
|
|
1111
1143
|
role: user?.role,
|
|
1112
|
-
project
|
|
1144
|
+
// Fall back to the caller's own project (set on API keys) when no
|
|
1145
|
+
// explicit request project is supplied, so API-triggered requests are
|
|
1146
|
+
// attributed to the key's project.
|
|
1147
|
+
project: project ?? user?.project,
|
|
1113
1148
|
agent,
|
|
1114
1149
|
team: user?.team,
|
|
1115
1150
|
routine
|
|
@@ -1568,7 +1603,7 @@ var ExuluTool = class _ExuluTool {
|
|
|
1568
1603
|
});
|
|
1569
1604
|
providerapikey = resolved.apiKey;
|
|
1570
1605
|
}
|
|
1571
|
-
const { convertExuluToolsToAiSdkTools: convertExuluToolsToAiSdkTools2 } = await import("./convert-exulu-tools-to-ai-sdk-tools-
|
|
1606
|
+
const { convertExuluToolsToAiSdkTools: convertExuluToolsToAiSdkTools2 } = await import("./convert-exulu-tools-to-ai-sdk-tools-CHQF36XW.js");
|
|
1572
1607
|
const tools = await convertExuluToolsToAiSdkTools2(
|
|
1573
1608
|
[this],
|
|
1574
1609
|
[],
|
|
@@ -1577,7 +1612,6 @@ var ExuluTool = class _ExuluTool {
|
|
|
1577
1612
|
agent.tools,
|
|
1578
1613
|
providerapikey,
|
|
1579
1614
|
void 0,
|
|
1580
|
-
void 0,
|
|
1581
1615
|
user,
|
|
1582
1616
|
config,
|
|
1583
1617
|
void 0,
|
|
@@ -1843,6 +1877,113 @@ var createSessionItemsRetrievalTool = async ({
|
|
|
1843
1877
|
import { z as z9 } from "zod";
|
|
1844
1878
|
import { createBashTool } from "bash-tool";
|
|
1845
1879
|
|
|
1880
|
+
// src/exulu/resolve-reranker.ts
|
|
1881
|
+
import fs from "fs";
|
|
1882
|
+
var ResolveRerankerError = class extends Error {
|
|
1883
|
+
constructor(code, message) {
|
|
1884
|
+
super(message);
|
|
1885
|
+
this.code = code;
|
|
1886
|
+
this.name = "ResolveRerankerError";
|
|
1887
|
+
}
|
|
1888
|
+
};
|
|
1889
|
+
async function resolveReranker(input) {
|
|
1890
|
+
const { model, contextId, contextName, user, userId, roleId, project, agent, routine } = input;
|
|
1891
|
+
if (!isLiteLLMEnabled()) {
|
|
1892
|
+
throw new ResolveRerankerError(
|
|
1893
|
+
"LITELLM_NOT_CONFIGURED",
|
|
1894
|
+
"resolveReranker requires EXULU_USE_LITELLM=true \u2014 reranking is served exclusively through the LiteLLM proxy."
|
|
1895
|
+
);
|
|
1896
|
+
}
|
|
1897
|
+
try {
|
|
1898
|
+
await waitForLiteLLMReady();
|
|
1899
|
+
} catch (err) {
|
|
1900
|
+
throw new ResolveRerankerError(
|
|
1901
|
+
"LITELLM_NOT_READY",
|
|
1902
|
+
`LiteLLM is not ready: ${err.message}`
|
|
1903
|
+
);
|
|
1904
|
+
}
|
|
1905
|
+
const host = process.env.LITELLM_HOST ?? "127.0.0.1";
|
|
1906
|
+
const port = process.env.LITELLM_PORT ?? "4000";
|
|
1907
|
+
const masterKey = process.env.LITELLM_MASTER_KEY;
|
|
1908
|
+
if (!masterKey) {
|
|
1909
|
+
throw new ResolveRerankerError(
|
|
1910
|
+
"LITELLM_NOT_CONFIGURED",
|
|
1911
|
+
"LITELLM_MASTER_KEY is required when EXULU_USE_LITELLM=true"
|
|
1912
|
+
);
|
|
1913
|
+
}
|
|
1914
|
+
const resolvedUserId = user?.id ?? userId;
|
|
1915
|
+
if (resolvedUserId) await provisionDefaultUserBudget(resolvedUserId);
|
|
1916
|
+
const role = user?.role;
|
|
1917
|
+
const tags = buildTags({
|
|
1918
|
+
user_id: resolvedUserId,
|
|
1919
|
+
role_id: role?.id ?? roleId,
|
|
1920
|
+
project_id: (project ?? user?.project)?.id,
|
|
1921
|
+
agent_id: agent?.id,
|
|
1922
|
+
team_id: user?.team?.id,
|
|
1923
|
+
routine_id: routine?.id,
|
|
1924
|
+
context_id: contextId,
|
|
1925
|
+
user_name: !user ? void 0 : user.type === "api" ? user.firstname ?? user.email : user.email,
|
|
1926
|
+
role_name: role?.name,
|
|
1927
|
+
project_name: (project ?? user?.project)?.name,
|
|
1928
|
+
agent_name: agent?.name,
|
|
1929
|
+
team_name: user?.team?.name,
|
|
1930
|
+
routine_name: routine?.name,
|
|
1931
|
+
context_name: contextName
|
|
1932
|
+
});
|
|
1933
|
+
const endpoint = `http://${host}:${port}/v1/rerank`;
|
|
1934
|
+
const rerank = async (query, chunks, opts) => {
|
|
1935
|
+
try {
|
|
1936
|
+
if (chunks.length === 0) return [];
|
|
1937
|
+
const documents = chunks.map(
|
|
1938
|
+
(c) => `${c.item_name ?? ""}: ${c.chunk_content ?? ""}`
|
|
1939
|
+
);
|
|
1940
|
+
const res = await fetch(endpoint, {
|
|
1941
|
+
method: "POST",
|
|
1942
|
+
headers: {
|
|
1943
|
+
Authorization: `Bearer ${masterKey}`,
|
|
1944
|
+
"Content-Type": "application/json"
|
|
1945
|
+
},
|
|
1946
|
+
body: JSON.stringify({
|
|
1947
|
+
model,
|
|
1948
|
+
query,
|
|
1949
|
+
documents,
|
|
1950
|
+
// Default to scoring every document so callers can fully reorder; a
|
|
1951
|
+
// caller can pass a smaller topN as an optimization hint.
|
|
1952
|
+
top_n: opts?.topN ?? documents.length,
|
|
1953
|
+
// LiteLLM reads metadata.tags for tag-based spend tracking.
|
|
1954
|
+
metadata: { tags }
|
|
1955
|
+
})
|
|
1956
|
+
});
|
|
1957
|
+
if (!res.ok) {
|
|
1958
|
+
const text = await res.text().catch(() => "");
|
|
1959
|
+
throw new Error(
|
|
1960
|
+
`[EXULU] LiteLLM /v1/rerank returned ${res.status} for model "${model}": ${text}`
|
|
1961
|
+
);
|
|
1962
|
+
}
|
|
1963
|
+
const json = await res.json();
|
|
1964
|
+
if (!Array.isArray(json.results)) {
|
|
1965
|
+
throw new Error(
|
|
1966
|
+
`[EXULU] LiteLLM /v1/rerank returned no results for model "${model}".`
|
|
1967
|
+
);
|
|
1968
|
+
}
|
|
1969
|
+
const reranked = json.results.filter(
|
|
1970
|
+
(r) => typeof r.index === "number" && !!chunks[r.index]
|
|
1971
|
+
).map((r) => ({
|
|
1972
|
+
...chunks[r.index],
|
|
1973
|
+
rerank_score: r.relevance_score ?? 0
|
|
1974
|
+
}));
|
|
1975
|
+
reranked.sort((a, b) => b.rerank_score - a.rerank_score);
|
|
1976
|
+
fs.writeFileSync("reranked.json", JSON.stringify(reranked, null, 2));
|
|
1977
|
+
return reranked;
|
|
1978
|
+
} catch (err) {
|
|
1979
|
+
console.error("[EXULU] Error reranking:", err);
|
|
1980
|
+
fs.writeFileSync("reranked.json", JSON.stringify(err, null, 2));
|
|
1981
|
+
return [];
|
|
1982
|
+
}
|
|
1983
|
+
};
|
|
1984
|
+
return { model, rerank };
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1846
1987
|
// ee/entitlements.ts
|
|
1847
1988
|
var ENTITLEMENTS = {
|
|
1848
1989
|
"rbac": false,
|
|
@@ -1895,7 +2036,7 @@ async function withRetry(generateFn, maxRetries = 3) {
|
|
|
1895
2036
|
if (attempt === maxRetries) {
|
|
1896
2037
|
throw error;
|
|
1897
2038
|
}
|
|
1898
|
-
await new Promise((
|
|
2039
|
+
await new Promise((resolve4) => setTimeout(resolve4, Math.pow(2, attempt) * 1e3));
|
|
1899
2040
|
}
|
|
1900
2041
|
}
|
|
1901
2042
|
throw lastError;
|
|
@@ -2037,6 +2178,12 @@ var authentication = async ({
|
|
|
2037
2178
|
user.team = team;
|
|
2038
2179
|
}
|
|
2039
2180
|
}
|
|
2181
|
+
if (user?.project) {
|
|
2182
|
+
const project = await db2.from("projects").select("*").where("id", user?.project).first();
|
|
2183
|
+
if (project) {
|
|
2184
|
+
user.project = project;
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2040
2187
|
if (!user) {
|
|
2041
2188
|
return {
|
|
2042
2189
|
error: true,
|
|
@@ -2102,6 +2249,18 @@ var authentication = async ({
|
|
|
2102
2249
|
user.role = role;
|
|
2103
2250
|
}
|
|
2104
2251
|
}
|
|
2252
|
+
if (user?.team) {
|
|
2253
|
+
const team = await db2.from("teams").select("*").where("id", user?.team).first();
|
|
2254
|
+
if (team) {
|
|
2255
|
+
user.team = team;
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
if (user?.project) {
|
|
2259
|
+
const project = await db2.from("projects").select("*").where("id", user?.project).first();
|
|
2260
|
+
if (project) {
|
|
2261
|
+
user.project = project;
|
|
2262
|
+
}
|
|
2263
|
+
}
|
|
2105
2264
|
return {
|
|
2106
2265
|
error: false,
|
|
2107
2266
|
code: 200,
|
|
@@ -2270,7 +2429,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
|
|
|
2270
2429
|
if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
|
|
2271
2430
|
if (attempt < maxRetries) {
|
|
2272
2431
|
const backoffMs = Math.pow(2, attempt) * 1e3;
|
|
2273
|
-
await new Promise((
|
|
2432
|
+
await new Promise((resolve4) => setTimeout(resolve4, backoffMs));
|
|
2274
2433
|
s3Client = void 0;
|
|
2275
2434
|
getS3Client(config);
|
|
2276
2435
|
continue;
|
|
@@ -2994,6 +3153,864 @@ var ExuluStorage = class {
|
|
|
2994
3153
|
// src/exulu/context.ts
|
|
2995
3154
|
import pgvector2 from "pgvector/knex";
|
|
2996
3155
|
|
|
3156
|
+
// ee/tokenizer.ts
|
|
3157
|
+
import { Tiktoken } from "tiktoken/lite";
|
|
3158
|
+
import { load } from "tiktoken/load";
|
|
3159
|
+
import registry2 from "tiktoken/registry.json" with { type: "json" };
|
|
3160
|
+
import models from "tiktoken/model_to_encoding.json" with { type: "json" };
|
|
3161
|
+
var ExuluTokenizer = class {
|
|
3162
|
+
constructor() {
|
|
3163
|
+
}
|
|
3164
|
+
encoder = null;
|
|
3165
|
+
async create(modelName) {
|
|
3166
|
+
if (this.encoder) {
|
|
3167
|
+
return this.encoder;
|
|
3168
|
+
}
|
|
3169
|
+
const time = performance.now();
|
|
3170
|
+
console.log("[EXULU] Loading tokenizer.", modelName);
|
|
3171
|
+
const model = await load(registry2[models[modelName]]);
|
|
3172
|
+
console.log("[EXULU] Loaded tokenizer.", modelName, performance.now() - time);
|
|
3173
|
+
const encoder = new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
|
|
3174
|
+
console.log("[EXULU] Set encoder.");
|
|
3175
|
+
this.encoder = encoder;
|
|
3176
|
+
return encoder;
|
|
3177
|
+
}
|
|
3178
|
+
async decode(tokens) {
|
|
3179
|
+
if (!this.encoder) {
|
|
3180
|
+
throw new Error("Tokenizer not initialized");
|
|
3181
|
+
}
|
|
3182
|
+
const text = this.encoder.decode(tokens);
|
|
3183
|
+
return new TextDecoder().decode(text);
|
|
3184
|
+
}
|
|
3185
|
+
async decodeBatch(tokenSequences) {
|
|
3186
|
+
if (!this.encoder) {
|
|
3187
|
+
throw new Error("Tokenizer not initialized");
|
|
3188
|
+
}
|
|
3189
|
+
const promises = tokenSequences.map((tokens) => this.decode(tokens));
|
|
3190
|
+
return await Promise.all(promises);
|
|
3191
|
+
}
|
|
3192
|
+
encode(text) {
|
|
3193
|
+
if (!this.encoder) {
|
|
3194
|
+
throw new Error("Tokenizer not initialized");
|
|
3195
|
+
}
|
|
3196
|
+
const time = performance.now();
|
|
3197
|
+
console.log("[EXULU] Encoding text length: " + (text?.length || 0));
|
|
3198
|
+
const tokens = this.encoder.encode(text);
|
|
3199
|
+
console.log("[EXULU] Finished encoding text.", performance.now() - time);
|
|
3200
|
+
return tokens;
|
|
3201
|
+
}
|
|
3202
|
+
async countTokensBatch(texts) {
|
|
3203
|
+
if (!this.encoder) {
|
|
3204
|
+
throw new Error("Tokenizer not initialized");
|
|
3205
|
+
}
|
|
3206
|
+
const promises = texts.map((text) => Promise.resolve(this.countTokens(text)));
|
|
3207
|
+
return await Promise.all(promises);
|
|
3208
|
+
}
|
|
3209
|
+
countTokens(text) {
|
|
3210
|
+
if (!this.encoder) {
|
|
3211
|
+
throw new Error("Tokenizer not initialized");
|
|
3212
|
+
}
|
|
3213
|
+
const tokens = this.encoder.encode(text);
|
|
3214
|
+
const count = tokens.length;
|
|
3215
|
+
console.log("[EXULU] Token count.", count);
|
|
3216
|
+
return count;
|
|
3217
|
+
}
|
|
3218
|
+
async free() {
|
|
3219
|
+
console.log("[EXULU] Freeing tokenizer.");
|
|
3220
|
+
if (this.encoder) {
|
|
3221
|
+
this.encoder.free();
|
|
3222
|
+
}
|
|
3223
|
+
}
|
|
3224
|
+
};
|
|
3225
|
+
|
|
3226
|
+
// src/chunking/types/base.ts
|
|
3227
|
+
var Chunk = class _Chunk {
|
|
3228
|
+
/** The text of the chunk. */
|
|
3229
|
+
text;
|
|
3230
|
+
/** The starting index of the chunk in the original text. */
|
|
3231
|
+
startIndex;
|
|
3232
|
+
/** The ending index of the chunk in the original text. */
|
|
3233
|
+
endIndex;
|
|
3234
|
+
/** The number of tokens in the chunk. */
|
|
3235
|
+
tokenCount;
|
|
3236
|
+
/** Optional embedding for the chunk. */
|
|
3237
|
+
embedding;
|
|
3238
|
+
/**
|
|
3239
|
+
* Constructs a new Chunk object.
|
|
3240
|
+
*
|
|
3241
|
+
* @param {ChunkData} data - The data to construct the Chunk from.
|
|
3242
|
+
*/
|
|
3243
|
+
constructor(data) {
|
|
3244
|
+
this.text = data.text;
|
|
3245
|
+
this.startIndex = data.startIndex;
|
|
3246
|
+
this.endIndex = data.endIndex;
|
|
3247
|
+
this.tokenCount = data.tokenCount;
|
|
3248
|
+
this.embedding = data.embedding;
|
|
3249
|
+
if (this.startIndex > this.endIndex) {
|
|
3250
|
+
throw new Error("Start index must be less than or equal to end index.");
|
|
3251
|
+
}
|
|
3252
|
+
if (this.tokenCount < 0) {
|
|
3253
|
+
throw new Error("Token count must be a non-negative integer.");
|
|
3254
|
+
}
|
|
3255
|
+
}
|
|
3256
|
+
/** Return a string representation of the Chunk.
|
|
3257
|
+
*
|
|
3258
|
+
* @returns {string} The text of the chunk.
|
|
3259
|
+
*/
|
|
3260
|
+
toString() {
|
|
3261
|
+
return this.text;
|
|
3262
|
+
}
|
|
3263
|
+
/** Return a detailed string representation of the Chunk.
|
|
3264
|
+
*
|
|
3265
|
+
* @returns {string} The detailed string representation of the Chunk.
|
|
3266
|
+
*/
|
|
3267
|
+
toRepresentation() {
|
|
3268
|
+
let repr = `Chunk(text='${this.text}', tokenCount=${this.tokenCount}, startIndex=${this.startIndex}, endIndex=${this.endIndex}`;
|
|
3269
|
+
repr += ")";
|
|
3270
|
+
return repr;
|
|
3271
|
+
}
|
|
3272
|
+
/** Return a slice of the chunk's text.
|
|
3273
|
+
*
|
|
3274
|
+
* @param {number} [start] - The starting index of the slice.
|
|
3275
|
+
* @param {number} [end] - The ending index of the slice.
|
|
3276
|
+
* @returns {string} The slice of the chunk's text.
|
|
3277
|
+
*/
|
|
3278
|
+
slice(start, end) {
|
|
3279
|
+
return this.text.slice(start, end);
|
|
3280
|
+
}
|
|
3281
|
+
/** Return the Chunk as a dictionary-like object.
|
|
3282
|
+
*
|
|
3283
|
+
* @returns {ChunkData} The dictionary-like object.
|
|
3284
|
+
*/
|
|
3285
|
+
toDict() {
|
|
3286
|
+
return {
|
|
3287
|
+
text: this.text,
|
|
3288
|
+
startIndex: this.startIndex,
|
|
3289
|
+
endIndex: this.endIndex,
|
|
3290
|
+
tokenCount: this.tokenCount,
|
|
3291
|
+
embedding: this.embedding
|
|
3292
|
+
};
|
|
3293
|
+
}
|
|
3294
|
+
/** Create a Chunk object from a dictionary-like object.
|
|
3295
|
+
*
|
|
3296
|
+
* @param {ChunkData} data - The dictionary-like object.
|
|
3297
|
+
* @returns {Chunk} The Chunk object.
|
|
3298
|
+
*/
|
|
3299
|
+
static fromDict(data) {
|
|
3300
|
+
return new _Chunk({
|
|
3301
|
+
text: data.text,
|
|
3302
|
+
startIndex: data.startIndex,
|
|
3303
|
+
endIndex: data.endIndex,
|
|
3304
|
+
tokenCount: data.tokenCount,
|
|
3305
|
+
embedding: data.embedding
|
|
3306
|
+
});
|
|
3307
|
+
}
|
|
3308
|
+
/** Return a deep copy of the chunk.
|
|
3309
|
+
*
|
|
3310
|
+
* @returns {Chunk} The deep copy of the chunk.
|
|
3311
|
+
*/
|
|
3312
|
+
copy() {
|
|
3313
|
+
return _Chunk.fromDict(this.toDict());
|
|
3314
|
+
}
|
|
3315
|
+
};
|
|
3316
|
+
|
|
3317
|
+
// src/chunking/types/sentence.ts
|
|
3318
|
+
var Sentence = class _Sentence {
|
|
3319
|
+
/** The text of the sentence */
|
|
3320
|
+
text;
|
|
3321
|
+
/** The starting index of the sentence in the original text */
|
|
3322
|
+
startIndex;
|
|
3323
|
+
/** The ending index of the sentence in the original text */
|
|
3324
|
+
endIndex;
|
|
3325
|
+
/** The number of tokens in the sentence */
|
|
3326
|
+
tokenCount;
|
|
3327
|
+
constructor(data) {
|
|
3328
|
+
this.text = data.text;
|
|
3329
|
+
this.startIndex = data.startIndex;
|
|
3330
|
+
this.endIndex = data.endIndex;
|
|
3331
|
+
this.tokenCount = data.tokenCount;
|
|
3332
|
+
}
|
|
3333
|
+
/** Return a string representation of the Sentence */
|
|
3334
|
+
toString() {
|
|
3335
|
+
return `Sentence(text=${this.text}, startIndex=${this.startIndex}, endIndex=${this.endIndex}, tokenCount=${this.tokenCount})`;
|
|
3336
|
+
}
|
|
3337
|
+
/** Return the Sentence as a dictionary-like object */
|
|
3338
|
+
toDict() {
|
|
3339
|
+
return {
|
|
3340
|
+
text: this.text,
|
|
3341
|
+
startIndex: this.startIndex,
|
|
3342
|
+
endIndex: this.endIndex,
|
|
3343
|
+
tokenCount: this.tokenCount
|
|
3344
|
+
};
|
|
3345
|
+
}
|
|
3346
|
+
/** Create a Sentence object from a dictionary-like object */
|
|
3347
|
+
static fromDict(data) {
|
|
3348
|
+
return new _Sentence(data);
|
|
3349
|
+
}
|
|
3350
|
+
};
|
|
3351
|
+
var SentenceChunk = class _SentenceChunk extends Chunk {
|
|
3352
|
+
/** List of sentences in the chunk */
|
|
3353
|
+
sentences;
|
|
3354
|
+
constructor(data) {
|
|
3355
|
+
super(data);
|
|
3356
|
+
this.sentences = data.sentences;
|
|
3357
|
+
this.embedding = data.embedding ?? void 0;
|
|
3358
|
+
}
|
|
3359
|
+
/**
|
|
3360
|
+
* Returns a detailed string representation of the SentenceChunk, including its text, start and end indices, token count, and a list of all contained sentences with their metadata.
|
|
3361
|
+
*
|
|
3362
|
+
* This method overrides the base {@link Chunk} toString method to provide a more informative output, which is especially useful for debugging and logging. Each sentence in the chunk is represented using its own toString method, and all sentences are included in the output.
|
|
3363
|
+
*
|
|
3364
|
+
* @returns {string} A string describing the SentenceChunk and all its sentences, e.g.,
|
|
3365
|
+
* SentenceChunk(text=..., startIndex=..., endIndex=..., tokenCount=..., sentences=[Sentence(...), ...])
|
|
3366
|
+
*/
|
|
3367
|
+
toString() {
|
|
3368
|
+
const sentencesStr = this.sentences.map((s) => s.toString()).join(", ");
|
|
3369
|
+
return `SentenceChunk(text=${this.text}, startIndex=${this.startIndex}, endIndex=${this.endIndex}, tokenCount=${this.tokenCount}, sentences=[${sentencesStr}])`;
|
|
3370
|
+
}
|
|
3371
|
+
/**
|
|
3372
|
+
* Returns the SentenceChunk as a dictionary-like object.
|
|
3373
|
+
*
|
|
3374
|
+
* This method extends the base {@link Chunk} toDict method to include the sentences in the chunk.
|
|
3375
|
+
*
|
|
3376
|
+
* @returns {SentenceChunkData} A dictionary-like object containing the chunk's text, start and end indices, token count, and an array of sentence data.
|
|
3377
|
+
/** Return the SentenceChunk as a dictionary-like object */
|
|
3378
|
+
toDict() {
|
|
3379
|
+
const baseDict = super.toDict();
|
|
3380
|
+
return {
|
|
3381
|
+
...baseDict,
|
|
3382
|
+
sentences: this.sentences.map((sentence) => sentence.toDict())
|
|
3383
|
+
};
|
|
3384
|
+
}
|
|
3385
|
+
/**
|
|
3386
|
+
* Creates a SentenceChunk object from a dictionary-like object.
|
|
3387
|
+
*
|
|
3388
|
+
* This method extends the base {@link Chunk} fromDict method to include the sentences in the chunk.
|
|
3389
|
+
*
|
|
3390
|
+
* @param {SentenceChunkData} data - A dictionary-like object containing the chunk's text, start and end indices, token count, and an array of sentence data.
|
|
3391
|
+
* @returns {SentenceChunk} A new SentenceChunk object created from the provided dictionary-like object.
|
|
3392
|
+
*/
|
|
3393
|
+
static fromDict(data) {
|
|
3394
|
+
const sentences = data.sentences.map((sentence) => Sentence.fromDict(sentence));
|
|
3395
|
+
return new _SentenceChunk({
|
|
3396
|
+
text: data.text,
|
|
3397
|
+
startIndex: data.startIndex,
|
|
3398
|
+
endIndex: data.endIndex,
|
|
3399
|
+
tokenCount: data.tokenCount,
|
|
3400
|
+
sentences,
|
|
3401
|
+
embedding: data.embedding ?? void 0
|
|
3402
|
+
});
|
|
3403
|
+
}
|
|
3404
|
+
};
|
|
3405
|
+
|
|
3406
|
+
// src/chunking/base.ts
|
|
3407
|
+
var BaseChunker = class {
|
|
3408
|
+
tokenizer;
|
|
3409
|
+
_useConcurrency = true;
|
|
3410
|
+
// Determines if batch processing uses Promise.all
|
|
3411
|
+
constructor(tokenizer) {
|
|
3412
|
+
this.tokenizer = tokenizer;
|
|
3413
|
+
}
|
|
3414
|
+
/**
|
|
3415
|
+
* Returns a string representation of the chunker instance.
|
|
3416
|
+
*
|
|
3417
|
+
* @returns {string} The class name and constructor signature.
|
|
3418
|
+
*/
|
|
3419
|
+
toString() {
|
|
3420
|
+
return `${this.constructor.name}()`;
|
|
3421
|
+
}
|
|
3422
|
+
async call(textOrTexts, showProgress = false) {
|
|
3423
|
+
if (typeof textOrTexts === "string") {
|
|
3424
|
+
return this.chunk(textOrTexts);
|
|
3425
|
+
} else if (Array.isArray(textOrTexts)) {
|
|
3426
|
+
return this.chunkBatch(textOrTexts, showProgress);
|
|
3427
|
+
} else {
|
|
3428
|
+
throw new Error("Input must be a string or an array of strings.");
|
|
3429
|
+
}
|
|
3430
|
+
}
|
|
3431
|
+
/**
|
|
3432
|
+
* Process a batch of texts sequentially (one after another).
|
|
3433
|
+
*
|
|
3434
|
+
* @protected
|
|
3435
|
+
* @param {string[]} texts - The texts to chunk.
|
|
3436
|
+
* @param {boolean} [showProgress=false] - Whether to display progress in the console.
|
|
3437
|
+
* @returns {Promise<Chunk[][]>} An array of chunked results for each input text.
|
|
3438
|
+
*/
|
|
3439
|
+
async _sequential_batch_processing(texts, showProgress = false) {
|
|
3440
|
+
const results = [];
|
|
3441
|
+
const total = texts.length;
|
|
3442
|
+
for (let i = 0; i < total; i++) {
|
|
3443
|
+
if (showProgress && total > 1) {
|
|
3444
|
+
const progress = Math.round((i + 1) / total * 100);
|
|
3445
|
+
process.stdout.write(`Sequential processing: Document ${i + 1}/${total} (${progress}%)\r`);
|
|
3446
|
+
}
|
|
3447
|
+
results.push(await this.chunk(texts[i]));
|
|
3448
|
+
}
|
|
3449
|
+
if (showProgress && total > 1) {
|
|
3450
|
+
process.stdout.write("\n");
|
|
3451
|
+
}
|
|
3452
|
+
return results;
|
|
3453
|
+
}
|
|
3454
|
+
/**
|
|
3455
|
+
* Process a batch of texts concurrently using Promise.all.
|
|
3456
|
+
*
|
|
3457
|
+
* @protected
|
|
3458
|
+
* @param {string[]} texts - The texts to chunk.
|
|
3459
|
+
* @param {boolean} [showProgress=false] - Whether to display progress in the console.
|
|
3460
|
+
* @returns {Promise<Chunk[][]>} An array of chunked results for each input text.
|
|
3461
|
+
*/
|
|
3462
|
+
async _concurrent_batch_processing(texts, showProgress = false) {
|
|
3463
|
+
const total = texts.length;
|
|
3464
|
+
let completedCount = 0;
|
|
3465
|
+
const updateProgress = () => {
|
|
3466
|
+
if (showProgress && total > 1) {
|
|
3467
|
+
completedCount++;
|
|
3468
|
+
const progress = Math.round(completedCount / total * 100);
|
|
3469
|
+
process.stdout.write(
|
|
3470
|
+
`Concurrent processing: Document ${completedCount}/${total} (${progress}%)\r`
|
|
3471
|
+
);
|
|
3472
|
+
}
|
|
3473
|
+
};
|
|
3474
|
+
const chunkPromises = texts.map(
|
|
3475
|
+
(text) => this.chunk(text).then((result) => {
|
|
3476
|
+
updateProgress();
|
|
3477
|
+
return result;
|
|
3478
|
+
})
|
|
3479
|
+
);
|
|
3480
|
+
const results = await Promise.all(chunkPromises);
|
|
3481
|
+
if (showProgress && total > 1 && completedCount > 0) {
|
|
3482
|
+
process.stdout.write("\n");
|
|
3483
|
+
}
|
|
3484
|
+
return results;
|
|
3485
|
+
}
|
|
3486
|
+
/**
|
|
3487
|
+
* Chunk a batch of texts, using either concurrent or sequential processing.
|
|
3488
|
+
*
|
|
3489
|
+
* If only one text is provided, processes it directly without batch overhead.
|
|
3490
|
+
*
|
|
3491
|
+
* @param {string[]} texts - The texts to chunk.
|
|
3492
|
+
* @param {boolean} [showProgress=true] - Whether to display progress in the console.
|
|
3493
|
+
* @returns {Promise<Chunk[][]>} An array of chunked results for each input text.
|
|
3494
|
+
*/
|
|
3495
|
+
async chunkBatch(texts, showProgress = true) {
|
|
3496
|
+
if (texts.length === 0) {
|
|
3497
|
+
return [];
|
|
3498
|
+
}
|
|
3499
|
+
if (texts.length === 1) {
|
|
3500
|
+
return [await this.chunk(texts[0])];
|
|
3501
|
+
}
|
|
3502
|
+
if (this._useConcurrency) {
|
|
3503
|
+
return this._concurrent_batch_processing(texts, showProgress);
|
|
3504
|
+
} else {
|
|
3505
|
+
return this._sequential_batch_processing(texts, showProgress);
|
|
3506
|
+
}
|
|
3507
|
+
}
|
|
3508
|
+
};
|
|
3509
|
+
|
|
3510
|
+
// src/chunking/sentence.ts
|
|
3511
|
+
var SentenceChunker = class _SentenceChunker extends BaseChunker {
|
|
3512
|
+
chunkSize;
|
|
3513
|
+
chunkOverlap;
|
|
3514
|
+
minSentencesPerChunk;
|
|
3515
|
+
minCharactersPerSentence;
|
|
3516
|
+
approximate;
|
|
3517
|
+
delim;
|
|
3518
|
+
includeDelim;
|
|
3519
|
+
sep;
|
|
3520
|
+
/**
|
|
3521
|
+
* Private constructor. Use `SentenceChunker.create()` to instantiate.
|
|
3522
|
+
*
|
|
3523
|
+
* @param {Tokenizer} tokenizer - The tokenizer to use for token counting.
|
|
3524
|
+
* @param {number} chunkSize - Maximum number of tokens per chunk.
|
|
3525
|
+
* @param {number} chunkOverlap - Number of tokens to overlap between consecutive chunks.
|
|
3526
|
+
* @param {number} minSentencesPerChunk - Minimum number of sentences per chunk.
|
|
3527
|
+
* @param {number} minCharactersPerSentence - Minimum number of characters for a valid sentence.
|
|
3528
|
+
* @param {boolean} approximate - Whether to use approximate token counting.
|
|
3529
|
+
* @param {string[]} delim - List of sentence delimiters to use for splitting.
|
|
3530
|
+
* @param {('prev' | 'next' | null)} includeDelim - Whether to include the delimiter with the previous sentence ('prev'), next sentence ('next'), or exclude it (null).
|
|
3531
|
+
*/
|
|
3532
|
+
constructor(tokenizer, chunkSize, chunkOverlap, minSentencesPerChunk, minCharactersPerSentence, approximate, delim, includeDelim) {
|
|
3533
|
+
super(tokenizer);
|
|
3534
|
+
if (chunkSize <= 0) {
|
|
3535
|
+
throw new Error("chunkSize must be greater than 0");
|
|
3536
|
+
}
|
|
3537
|
+
if (chunkOverlap < 0) {
|
|
3538
|
+
throw new Error("chunkOverlap must be non-negative");
|
|
3539
|
+
}
|
|
3540
|
+
if (chunkOverlap >= chunkSize) {
|
|
3541
|
+
throw new Error("chunkOverlap must be less than chunkSize");
|
|
3542
|
+
}
|
|
3543
|
+
if (minSentencesPerChunk <= 0) {
|
|
3544
|
+
throw new Error("minSentencesPerChunk must be greater than 0");
|
|
3545
|
+
}
|
|
3546
|
+
if (minCharactersPerSentence <= 0) {
|
|
3547
|
+
throw new Error("minCharactersPerSentence must be greater than 0");
|
|
3548
|
+
}
|
|
3549
|
+
if (!delim) {
|
|
3550
|
+
throw new Error("delim must be a list of strings or a string");
|
|
3551
|
+
}
|
|
3552
|
+
if (includeDelim !== "prev" && includeDelim !== "next" && includeDelim !== null) {
|
|
3553
|
+
throw new Error("includeDelim must be 'prev', 'next' or null");
|
|
3554
|
+
}
|
|
3555
|
+
if (approximate) {
|
|
3556
|
+
console.warn(
|
|
3557
|
+
"Approximate has been deprecated and will be removed from next version onwards!"
|
|
3558
|
+
);
|
|
3559
|
+
}
|
|
3560
|
+
this.chunkSize = chunkSize;
|
|
3561
|
+
this.chunkOverlap = chunkOverlap;
|
|
3562
|
+
this.minSentencesPerChunk = minSentencesPerChunk;
|
|
3563
|
+
this.minCharactersPerSentence = minCharactersPerSentence;
|
|
3564
|
+
this.approximate = approximate;
|
|
3565
|
+
this.delim = delim;
|
|
3566
|
+
this.includeDelim = includeDelim;
|
|
3567
|
+
this.sep = "\u2704";
|
|
3568
|
+
}
|
|
3569
|
+
/**
|
|
3570
|
+
* Creates and initializes a SentenceChunker instance that is directly callable.
|
|
3571
|
+
*
|
|
3572
|
+
* This method is a static factory function that returns a Promise resolving to a CallableSentenceChunker instance.
|
|
3573
|
+
* The returned instance is a callable function that can be used to chunk text strings or arrays of text strings.
|
|
3574
|
+
*
|
|
3575
|
+
* @param {SentenceChunkerOptions} [options] - Options for configuring the SentenceChunker.
|
|
3576
|
+
* @returns {Promise<CallableSentenceChunker>} A promise that resolves to a callable SentenceChunker instance.
|
|
3577
|
+
*
|
|
3578
|
+
* @example
|
|
3579
|
+
* const chunker = await SentenceChunker.create();
|
|
3580
|
+
* const chunks = await chunker("This is a sample text.");
|
|
3581
|
+
* const batchChunks = await chunker(["Text 1", "Text 2"]);
|
|
3582
|
+
*
|
|
3583
|
+
* @see SentenceChunkerOptions
|
|
3584
|
+
*/
|
|
3585
|
+
static async create(options = {}) {
|
|
3586
|
+
const {
|
|
3587
|
+
tokenizer = "gpt-3.5-turbo",
|
|
3588
|
+
chunkSize = 512,
|
|
3589
|
+
chunkOverlap = 0,
|
|
3590
|
+
minSentencesPerChunk = 1,
|
|
3591
|
+
minCharactersPerSentence = 12,
|
|
3592
|
+
approximate = false,
|
|
3593
|
+
delim = [". ", "! ", "? ", "\n"],
|
|
3594
|
+
includeDelim = "prev"
|
|
3595
|
+
} = options;
|
|
3596
|
+
const tokenizerInstance = new ExuluTokenizer();
|
|
3597
|
+
await tokenizerInstance.create(tokenizer);
|
|
3598
|
+
const plainInstance = new _SentenceChunker(
|
|
3599
|
+
tokenizerInstance,
|
|
3600
|
+
chunkSize,
|
|
3601
|
+
chunkOverlap,
|
|
3602
|
+
minSentencesPerChunk,
|
|
3603
|
+
minCharactersPerSentence,
|
|
3604
|
+
approximate,
|
|
3605
|
+
delim,
|
|
3606
|
+
includeDelim
|
|
3607
|
+
);
|
|
3608
|
+
const callableFn = function(textOrTexts, showProgress) {
|
|
3609
|
+
if (typeof textOrTexts === "string") {
|
|
3610
|
+
return plainInstance.call(textOrTexts, showProgress);
|
|
3611
|
+
} else {
|
|
3612
|
+
return plainInstance.call(textOrTexts, showProgress);
|
|
3613
|
+
}
|
|
3614
|
+
};
|
|
3615
|
+
Object.setPrototypeOf(callableFn, _SentenceChunker.prototype);
|
|
3616
|
+
Object.assign(callableFn, plainInstance);
|
|
3617
|
+
return callableFn;
|
|
3618
|
+
}
|
|
3619
|
+
// NOTE: The replace + split method is not the best/most efficient way in general to be doing this. It works well in python because python implements .replace and .split in C while the re library is much slower in python.
|
|
3620
|
+
// NOTE: The new split -> join -> split is so weird, but it works. I don't quite like it however.
|
|
3621
|
+
// TODO: Implement a more efficient method for splitting text into sentences.
|
|
3622
|
+
/**
|
|
3623
|
+
* Fast sentence splitting while maintaining accuracy.
|
|
3624
|
+
*
|
|
3625
|
+
* @param {string} text - The text to split into sentences.
|
|
3626
|
+
* @returns {string[]} An array of sentences.
|
|
3627
|
+
*/
|
|
3628
|
+
_splitText(text) {
|
|
3629
|
+
let t = text;
|
|
3630
|
+
for (const c of this.delim) {
|
|
3631
|
+
if (this.includeDelim === "prev") {
|
|
3632
|
+
t = t.split(c).join(c + this.sep);
|
|
3633
|
+
} else if (this.includeDelim === "next") {
|
|
3634
|
+
t = t.split(c).join(this.sep + c);
|
|
3635
|
+
} else {
|
|
3636
|
+
t = t.split(c).join(this.sep);
|
|
3637
|
+
}
|
|
3638
|
+
}
|
|
3639
|
+
const splits = t.split(this.sep);
|
|
3640
|
+
const sentences = [];
|
|
3641
|
+
let current = "";
|
|
3642
|
+
for (const s of splits) {
|
|
3643
|
+
if (!current) {
|
|
3644
|
+
current = s;
|
|
3645
|
+
} else {
|
|
3646
|
+
if (current.length >= this.minCharactersPerSentence) {
|
|
3647
|
+
sentences.push(current);
|
|
3648
|
+
current = s;
|
|
3649
|
+
} else {
|
|
3650
|
+
current += s;
|
|
3651
|
+
}
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
if (current) {
|
|
3655
|
+
sentences.push(current);
|
|
3656
|
+
}
|
|
3657
|
+
return sentences;
|
|
3658
|
+
}
|
|
3659
|
+
/**
|
|
3660
|
+
* Split text into sentences and calculate token counts for each sentence.
|
|
3661
|
+
*
|
|
3662
|
+
* @param {string} text - The text to split into sentences.
|
|
3663
|
+
* @returns {Promise<Sentence[]>} An array of Sentence objects.
|
|
3664
|
+
*/
|
|
3665
|
+
async _prepareSentences(text) {
|
|
3666
|
+
const sentenceTexts = this._splitText(text);
|
|
3667
|
+
if (!sentenceTexts.length) {
|
|
3668
|
+
return [];
|
|
3669
|
+
}
|
|
3670
|
+
const positions = [];
|
|
3671
|
+
let currentPos = 0;
|
|
3672
|
+
for (const sent of sentenceTexts) {
|
|
3673
|
+
positions.push(currentPos);
|
|
3674
|
+
currentPos += sent.length;
|
|
3675
|
+
}
|
|
3676
|
+
const tokenCounts = await this.tokenizer.countTokensBatch(sentenceTexts);
|
|
3677
|
+
return sentenceTexts.map(
|
|
3678
|
+
(sent, i) => new Sentence({
|
|
3679
|
+
text: sent,
|
|
3680
|
+
startIndex: positions[i],
|
|
3681
|
+
endIndex: positions[i] + sent.length,
|
|
3682
|
+
tokenCount: tokenCounts[i]
|
|
3683
|
+
})
|
|
3684
|
+
);
|
|
3685
|
+
}
|
|
3686
|
+
/**
|
|
3687
|
+
* Create a chunk from a list of sentences.
|
|
3688
|
+
*
|
|
3689
|
+
* @param {Sentence[]} sentences - The sentences to create a chunk from.
|
|
3690
|
+
* @returns {Promise<SentenceChunk>} A promise that resolves to a SentenceChunk object.
|
|
3691
|
+
*/
|
|
3692
|
+
async _createChunk(sentences) {
|
|
3693
|
+
const chunkText = sentences.map((sentence) => sentence.text).join("");
|
|
3694
|
+
const tokenCount = this.tokenizer.countTokens(chunkText);
|
|
3695
|
+
return new SentenceChunk({
|
|
3696
|
+
text: chunkText,
|
|
3697
|
+
startIndex: sentences[0].startIndex,
|
|
3698
|
+
endIndex: sentences[sentences.length - 1].endIndex,
|
|
3699
|
+
tokenCount,
|
|
3700
|
+
sentences
|
|
3701
|
+
});
|
|
3702
|
+
}
|
|
3703
|
+
/**
|
|
3704
|
+
* Split text into overlapping chunks based on sentences while respecting token limits.
|
|
3705
|
+
*
|
|
3706
|
+
* @param {string} text - The text to split into chunks.
|
|
3707
|
+
* @returns {Promise<SentenceChunk[]>} A promise that resolves to an array of SentenceChunk objects.
|
|
3708
|
+
*/
|
|
3709
|
+
async chunk(text) {
|
|
3710
|
+
if (!text.trim()) {
|
|
3711
|
+
return [];
|
|
3712
|
+
}
|
|
3713
|
+
const sentences = await this._prepareSentences(text);
|
|
3714
|
+
if (!sentences.length) {
|
|
3715
|
+
return [];
|
|
3716
|
+
}
|
|
3717
|
+
const tokenSums = [];
|
|
3718
|
+
let sum = 0;
|
|
3719
|
+
for (const sentence of sentences) {
|
|
3720
|
+
tokenSums.push(sum);
|
|
3721
|
+
sum += sentence.tokenCount;
|
|
3722
|
+
}
|
|
3723
|
+
tokenSums.push(sum);
|
|
3724
|
+
const chunks = [];
|
|
3725
|
+
let pos = 0;
|
|
3726
|
+
while (pos < sentences.length) {
|
|
3727
|
+
const targetTokens = tokenSums[pos] + this.chunkSize;
|
|
3728
|
+
let splitIdx = this._bisectLeft(tokenSums, targetTokens, pos) - 1;
|
|
3729
|
+
splitIdx = Math.min(splitIdx, sentences.length);
|
|
3730
|
+
splitIdx = Math.max(splitIdx, pos + 1);
|
|
3731
|
+
if (splitIdx - pos < this.minSentencesPerChunk) {
|
|
3732
|
+
if (pos + this.minSentencesPerChunk <= sentences.length) {
|
|
3733
|
+
splitIdx = pos + this.minSentencesPerChunk;
|
|
3734
|
+
} else {
|
|
3735
|
+
console.warn(
|
|
3736
|
+
`Minimum sentences per chunk as ${this.minSentencesPerChunk} could not be met for all chunks. Last chunk of the text will have only ${sentences.length - pos} sentences. Consider increasing the chunk_size or decreasing the min_sentences_per_chunk.`
|
|
3737
|
+
);
|
|
3738
|
+
splitIdx = sentences.length;
|
|
3739
|
+
}
|
|
3740
|
+
}
|
|
3741
|
+
const chunkSentences = sentences.slice(pos, splitIdx);
|
|
3742
|
+
chunks.push(await this._createChunk(chunkSentences));
|
|
3743
|
+
if (this.chunkOverlap > 0 && splitIdx < sentences.length) {
|
|
3744
|
+
let overlapTokens = 0;
|
|
3745
|
+
let overlapIdx = splitIdx - 1;
|
|
3746
|
+
while (overlapIdx > pos && overlapTokens < this.chunkOverlap) {
|
|
3747
|
+
const sent = sentences[overlapIdx];
|
|
3748
|
+
const nextTokens = overlapTokens + sent.tokenCount + 1;
|
|
3749
|
+
if (nextTokens > this.chunkOverlap) {
|
|
3750
|
+
break;
|
|
3751
|
+
}
|
|
3752
|
+
overlapTokens = nextTokens;
|
|
3753
|
+
overlapIdx--;
|
|
3754
|
+
}
|
|
3755
|
+
pos = overlapIdx + 1;
|
|
3756
|
+
} else {
|
|
3757
|
+
pos = splitIdx;
|
|
3758
|
+
}
|
|
3759
|
+
}
|
|
3760
|
+
await this.tokenizer.free();
|
|
3761
|
+
return chunks;
|
|
3762
|
+
}
|
|
3763
|
+
/**
|
|
3764
|
+
* Binary search to find the leftmost position where value should be inserted to maintain order.
|
|
3765
|
+
*
|
|
3766
|
+
* @param {number[]} arr - The array to search.
|
|
3767
|
+
* @param {number} value - The value to search for.
|
|
3768
|
+
* @param {number} [lo] - The starting index of the search.
|
|
3769
|
+
* @returns {number} The index of the leftmost position where value should be inserted.
|
|
3770
|
+
*/
|
|
3771
|
+
_bisectLeft(arr, value, lo = 0) {
|
|
3772
|
+
let hi = arr.length;
|
|
3773
|
+
while (lo < hi) {
|
|
3774
|
+
const mid = lo + hi >>> 1;
|
|
3775
|
+
if (arr[mid] < value) {
|
|
3776
|
+
lo = mid + 1;
|
|
3777
|
+
} else {
|
|
3778
|
+
hi = mid;
|
|
3779
|
+
}
|
|
3780
|
+
}
|
|
3781
|
+
return lo;
|
|
3782
|
+
}
|
|
3783
|
+
/**
|
|
3784
|
+
* Return a string representation of the SentenceChunker.
|
|
3785
|
+
*
|
|
3786
|
+
* @returns {string} A string representation of the SentenceChunker.
|
|
3787
|
+
*/
|
|
3788
|
+
toString() {
|
|
3789
|
+
return `SentenceChunker(tokenizer=${JSON.stringify(this.tokenizer)}, chunkSize=${this.chunkSize}, chunkOverlap=${this.chunkOverlap}, minSentencesPerChunk=${this.minSentencesPerChunk}, minCharactersPerSentence=${this.minCharactersPerSentence}, approximate=${this.approximate}, delim=${JSON.stringify(this.delim)}, includeDelim=${this.includeDelim})`;
|
|
3790
|
+
}
|
|
3791
|
+
};
|
|
3792
|
+
|
|
3793
|
+
// src/exulu/chunker.ts
|
|
3794
|
+
var defaultChunker = async (item, maxChunkSize) => {
|
|
3795
|
+
const body = typeof item.content === "string" && item.content || typeof item.description === "string" && item.description || "";
|
|
3796
|
+
const name = typeof item.name === "string" ? item.name : "";
|
|
3797
|
+
const text = [name, body].filter(Boolean).join("\n\n").trim();
|
|
3798
|
+
if (!text) {
|
|
3799
|
+
return { item, chunks: [] };
|
|
3800
|
+
}
|
|
3801
|
+
const chunker = await SentenceChunker.create({ chunkSize: maxChunkSize });
|
|
3802
|
+
const sentenceChunks = await chunker(text);
|
|
3803
|
+
const chunks = sentenceChunks.map((c, index) => ({ content: c.text.trim(), index })).filter((c) => c.content.length > 0).map((c, index) => ({ content: c.content, index }));
|
|
3804
|
+
return { item, chunks };
|
|
3805
|
+
};
|
|
3806
|
+
|
|
3807
|
+
// src/exulu/litellm/parse-embedding-models.ts
|
|
3808
|
+
import { existsSync as existsSync2, readFileSync } from "fs";
|
|
3809
|
+
import { resolve as resolve2 } from "path";
|
|
3810
|
+
var DEFAULT_MAX_CHUNK_SIZE = 1024;
|
|
3811
|
+
var DEFAULT_MAX_BATCH_SIZE = 100;
|
|
3812
|
+
var stripComment = (line) => {
|
|
3813
|
+
const idx = line.indexOf("#");
|
|
3814
|
+
return idx >= 0 ? line.slice(0, idx) : line;
|
|
3815
|
+
};
|
|
3816
|
+
var parseInt10 = (raw) => {
|
|
3817
|
+
const n = Number(raw.trim());
|
|
3818
|
+
return Number.isInteger(n) ? n : void 0;
|
|
3819
|
+
};
|
|
3820
|
+
var resolveLiteLLMConfigPath = () => process.env.LITELLM_CONFIG_PATH ?? resolve2(process.cwd(), "./config.litellm.yaml");
|
|
3821
|
+
var parseEmbeddingModels = (configPath) => {
|
|
3822
|
+
if (!existsSync2(configPath)) return [];
|
|
3823
|
+
const text = readFileSync(configPath, "utf8");
|
|
3824
|
+
const lines = text.split("\n");
|
|
3825
|
+
const entries = [];
|
|
3826
|
+
let current;
|
|
3827
|
+
for (const rawLine of lines) {
|
|
3828
|
+
const noComment = stripComment(rawLine);
|
|
3829
|
+
if (!noComment.trim()) continue;
|
|
3830
|
+
const indent = (rawLine.match(/^\s*/)?.[0] ?? "").length;
|
|
3831
|
+
const modelNameMatch = noComment.match(
|
|
3832
|
+
/^\s*-\s*model_name\s*:\s*["']?([^"'\s#]+)["']?\s*$/
|
|
3833
|
+
);
|
|
3834
|
+
if (modelNameMatch) {
|
|
3835
|
+
if (current) entries.push(current);
|
|
3836
|
+
current = { model_name: modelNameMatch[1], indent };
|
|
3837
|
+
continue;
|
|
3838
|
+
}
|
|
3839
|
+
if (!current) continue;
|
|
3840
|
+
if (indent <= current.indent && !/^\s*-\s/.test(rawLine)) {
|
|
3841
|
+
entries.push(current);
|
|
3842
|
+
current = void 0;
|
|
3843
|
+
continue;
|
|
3844
|
+
}
|
|
3845
|
+
const kvMatch = noComment.match(/^\s*(\w+)\s*:\s*(.+?)\s*$/);
|
|
3846
|
+
if (!kvMatch) continue;
|
|
3847
|
+
const key = kvMatch[1] ?? "";
|
|
3848
|
+
const rawValue = kvMatch[2] ?? "";
|
|
3849
|
+
switch (key) {
|
|
3850
|
+
case "dimensionality": {
|
|
3851
|
+
current.dimensionality = parseInt10(rawValue);
|
|
3852
|
+
break;
|
|
3853
|
+
}
|
|
3854
|
+
case "max_chunk_size": {
|
|
3855
|
+
current.max_chunk_size = parseInt10(rawValue);
|
|
3856
|
+
break;
|
|
3857
|
+
}
|
|
3858
|
+
case "max_batch_size": {
|
|
3859
|
+
current.max_batch_size = parseInt10(rawValue);
|
|
3860
|
+
break;
|
|
3861
|
+
}
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
if (current) entries.push(current);
|
|
3865
|
+
return entries.filter((e) => typeof e.dimensionality === "number" && e.dimensionality > 0).map((e) => ({
|
|
3866
|
+
model_name: e.model_name,
|
|
3867
|
+
dimensionality: e.dimensionality,
|
|
3868
|
+
maxChunkSize: typeof e.max_chunk_size === "number" && e.max_chunk_size > 0 ? e.max_chunk_size : DEFAULT_MAX_CHUNK_SIZE,
|
|
3869
|
+
maxBatchSize: typeof e.max_batch_size === "number" && e.max_batch_size > 0 ? e.max_batch_size : DEFAULT_MAX_BATCH_SIZE
|
|
3870
|
+
}));
|
|
3871
|
+
};
|
|
3872
|
+
var getEmbeddingModelInfo = (modelName, configPath = resolveLiteLLMConfigPath()) => {
|
|
3873
|
+
const models2 = parseEmbeddingModels(configPath);
|
|
3874
|
+
const found = models2.find((m) => m.model_name === modelName);
|
|
3875
|
+
if (!found) {
|
|
3876
|
+
throw new Error(
|
|
3877
|
+
`[EXULU] Embedding model "${modelName}" was not found in ${configPath}, or its entry is missing a numeric \`model_info.dimensionality\`. Add it, e.g.:
|
|
3878
|
+
- model_name: ${modelName}
|
|
3879
|
+
litellm_params:
|
|
3880
|
+
model: <provider>/${modelName}
|
|
3881
|
+
model_info:
|
|
3882
|
+
dimensionality: 1024 # required (matches the model's output size)
|
|
3883
|
+
max_chunk_size: 1024 # optional
|
|
3884
|
+
max_batch_size: 100 # optional`
|
|
3885
|
+
);
|
|
3886
|
+
}
|
|
3887
|
+
return found;
|
|
3888
|
+
};
|
|
3889
|
+
|
|
3890
|
+
// src/exulu/resolve-embedder.ts
|
|
3891
|
+
var ResolveEmbedderError = class extends Error {
|
|
3892
|
+
constructor(code, message) {
|
|
3893
|
+
super(message);
|
|
3894
|
+
this.code = code;
|
|
3895
|
+
this.name = "ResolveEmbedderError";
|
|
3896
|
+
}
|
|
3897
|
+
};
|
|
3898
|
+
async function resolveEmbedder(input) {
|
|
3899
|
+
const { model, contextId, contextName, user, userId, roleId, project, agent, routine } = input;
|
|
3900
|
+
if (!isLiteLLMEnabled()) {
|
|
3901
|
+
throw new ResolveEmbedderError(
|
|
3902
|
+
"LITELLM_NOT_CONFIGURED",
|
|
3903
|
+
"resolveEmbedder requires EXULU_USE_LITELLM=true \u2014 embeddings are served exclusively through the LiteLLM proxy."
|
|
3904
|
+
);
|
|
3905
|
+
}
|
|
3906
|
+
try {
|
|
3907
|
+
await waitForLiteLLMReady();
|
|
3908
|
+
} catch (err) {
|
|
3909
|
+
throw new ResolveEmbedderError(
|
|
3910
|
+
"LITELLM_NOT_READY",
|
|
3911
|
+
`LiteLLM is not ready: ${err.message}`
|
|
3912
|
+
);
|
|
3913
|
+
}
|
|
3914
|
+
const host = process.env.LITELLM_HOST ?? "127.0.0.1";
|
|
3915
|
+
const port = process.env.LITELLM_PORT ?? "4000";
|
|
3916
|
+
const masterKey = process.env.LITELLM_MASTER_KEY;
|
|
3917
|
+
if (!masterKey) {
|
|
3918
|
+
throw new ResolveEmbedderError(
|
|
3919
|
+
"LITELLM_NOT_CONFIGURED",
|
|
3920
|
+
"LITELLM_MASTER_KEY is required when EXULU_USE_LITELLM=true"
|
|
3921
|
+
);
|
|
3922
|
+
}
|
|
3923
|
+
const resolvedUserId = user?.id ?? userId;
|
|
3924
|
+
if (resolvedUserId) await provisionDefaultUserBudget(resolvedUserId);
|
|
3925
|
+
const { dimensionality, maxChunkSize, maxBatchSize } = getEmbeddingModelInfo(model);
|
|
3926
|
+
const role = user?.role;
|
|
3927
|
+
const tags = buildTags({
|
|
3928
|
+
user_id: resolvedUserId,
|
|
3929
|
+
role_id: role?.id ?? roleId,
|
|
3930
|
+
project_id: (project ?? user?.project)?.id,
|
|
3931
|
+
agent_id: agent?.id,
|
|
3932
|
+
team_id: user?.team?.id,
|
|
3933
|
+
routine_id: routine?.id,
|
|
3934
|
+
context_id: contextId,
|
|
3935
|
+
user_name: !user ? void 0 : user.type === "api" ? user.firstname ?? user.email : user.email,
|
|
3936
|
+
role_name: role?.name,
|
|
3937
|
+
project_name: (project ?? user?.project)?.name,
|
|
3938
|
+
agent_name: agent?.name,
|
|
3939
|
+
team_name: user?.team?.name,
|
|
3940
|
+
routine_name: routine?.name,
|
|
3941
|
+
context_name: contextName
|
|
3942
|
+
});
|
|
3943
|
+
const endpoint = `http://${host}:${port}/v1/embeddings`;
|
|
3944
|
+
const embedBatch = async (batch) => {
|
|
3945
|
+
const res = await fetch(endpoint, {
|
|
3946
|
+
method: "POST",
|
|
3947
|
+
headers: {
|
|
3948
|
+
Authorization: `Bearer ${masterKey}`,
|
|
3949
|
+
"Content-Type": "application/json"
|
|
3950
|
+
},
|
|
3951
|
+
body: JSON.stringify({
|
|
3952
|
+
model,
|
|
3953
|
+
input: batch,
|
|
3954
|
+
encoding_format: "float",
|
|
3955
|
+
// Pin the output size to the configured column dimensions. LiteLLM's
|
|
3956
|
+
// `drop_params: true` silently drops this for providers that don't
|
|
3957
|
+
// support it; for those, a dimensionality mismatch surfaces as an
|
|
3958
|
+
// insert error (the correct fail-fast).
|
|
3959
|
+
dimensions: dimensionality,
|
|
3960
|
+
// LiteLLM reads metadata.tags for tag-based spend tracking — same
|
|
3961
|
+
// mechanism createTaggedFetch uses for chat completions.
|
|
3962
|
+
metadata: { tags }
|
|
3963
|
+
})
|
|
3964
|
+
});
|
|
3965
|
+
if (!res.ok) {
|
|
3966
|
+
const text = await res.text().catch(() => "");
|
|
3967
|
+
throw new Error(
|
|
3968
|
+
`[EXULU] LiteLLM /v1/embeddings returned ${res.status} for model "${model}": ${text}`
|
|
3969
|
+
);
|
|
3970
|
+
}
|
|
3971
|
+
const json = await res.json();
|
|
3972
|
+
const data = json.data ?? [];
|
|
3973
|
+
const ordered = [...data].sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
|
|
3974
|
+
const vectors = ordered.map((d) => d.embedding ?? []);
|
|
3975
|
+
if (vectors.length !== batch.length) {
|
|
3976
|
+
throw new Error(
|
|
3977
|
+
`[EXULU] LiteLLM /v1/embeddings returned ${vectors.length} vectors for ${batch.length} inputs (model "${model}").`
|
|
3978
|
+
);
|
|
3979
|
+
}
|
|
3980
|
+
return vectors;
|
|
3981
|
+
};
|
|
3982
|
+
const embed = async (inputs) => {
|
|
3983
|
+
if (inputs.length === 0) return [];
|
|
3984
|
+
const out = [];
|
|
3985
|
+
for (let i = 0; i < inputs.length; i += maxBatchSize) {
|
|
3986
|
+
const batch = inputs.slice(i, i + maxBatchSize);
|
|
3987
|
+
const vectors = await embedBatch(batch);
|
|
3988
|
+
out.push(...vectors);
|
|
3989
|
+
}
|
|
3990
|
+
return out;
|
|
3991
|
+
};
|
|
3992
|
+
return {
|
|
3993
|
+
model,
|
|
3994
|
+
dimensions: dimensionality,
|
|
3995
|
+
maxChunkSize,
|
|
3996
|
+
maxBatchSize,
|
|
3997
|
+
embed
|
|
3998
|
+
};
|
|
3999
|
+
}
|
|
4000
|
+
|
|
4001
|
+
// types/enums/statistics.ts
|
|
4002
|
+
var STATISTICS_TYPE_ENUM = {
|
|
4003
|
+
CONTEXT_RETRIEVE: "CONTEXT_RETRIEVE",
|
|
4004
|
+
SOURCE_UPDATE: "SOURCE_UPDATE",
|
|
4005
|
+
EMBEDDER_UPSERT: "EMBEDDER_UPSERT",
|
|
4006
|
+
EMBEDDER_GENERATE: "EMBEDDER_GENERATE",
|
|
4007
|
+
EMBEDDER_DELETE: "EMBEDDER_DELETE",
|
|
4008
|
+
WORKFLOW_RUN: "WORKFLOW_RUN",
|
|
4009
|
+
CONTEXT_UPSERT: "CONTEXT_UPSERT",
|
|
4010
|
+
TOOL_CALL: "TOOL_CALL",
|
|
4011
|
+
AGENT_RUN: "AGENT_RUN"
|
|
4012
|
+
};
|
|
4013
|
+
|
|
2997
4014
|
// src/utils/query-preprocessing.ts
|
|
2998
4015
|
import { franc } from "franc";
|
|
2999
4016
|
import natural from "natural";
|
|
@@ -3266,19 +4283,6 @@ var applyFilters = (query, filters, table, field_prefix) => {
|
|
|
3266
4283
|
return query;
|
|
3267
4284
|
};
|
|
3268
4285
|
|
|
3269
|
-
// types/enums/statistics.ts
|
|
3270
|
-
var STATISTICS_TYPE_ENUM = {
|
|
3271
|
-
CONTEXT_RETRIEVE: "CONTEXT_RETRIEVE",
|
|
3272
|
-
SOURCE_UPDATE: "SOURCE_UPDATE",
|
|
3273
|
-
EMBEDDER_UPSERT: "EMBEDDER_UPSERT",
|
|
3274
|
-
EMBEDDER_GENERATE: "EMBEDDER_GENERATE",
|
|
3275
|
-
EMBEDDER_DELETE: "EMBEDDER_DELETE",
|
|
3276
|
-
WORKFLOW_RUN: "WORKFLOW_RUN",
|
|
3277
|
-
CONTEXT_UPSERT: "CONTEXT_UPSERT",
|
|
3278
|
-
TOOL_CALL: "TOOL_CALL",
|
|
3279
|
-
AGENT_RUN: "AGENT_RUN"
|
|
3280
|
-
};
|
|
3281
|
-
|
|
3282
4286
|
// types/models/vector-methods.ts
|
|
3283
4287
|
var VectorMethodEnum = {
|
|
3284
4288
|
"cosineDistance": "cosineDistance",
|
|
@@ -4027,6 +5031,15 @@ var usersSchema = {
|
|
|
4027
5031
|
name: "favourite_items",
|
|
4028
5032
|
type: "json"
|
|
4029
5033
|
},
|
|
5034
|
+
{
|
|
5035
|
+
// Knowledge: per-user "recently viewed" data items. Ordered JSON array
|
|
5036
|
+
// of global item ids ("<contextId>/<itemId>"), most-recent first, capped
|
|
5037
|
+
// client-side. Auto-added to existing DBs by the init-exulu-db column
|
|
5038
|
+
// sync; read via userById, written via usersUpdateOne (mirrors
|
|
5039
|
+
// favourite_items).
|
|
5040
|
+
name: "recently_viewed_items",
|
|
5041
|
+
type: "json"
|
|
5042
|
+
},
|
|
4030
5043
|
{
|
|
4031
5044
|
name: "firstname",
|
|
4032
5045
|
type: "text"
|
|
@@ -4106,6 +5119,12 @@ var usersSchema = {
|
|
|
4106
5119
|
{
|
|
4107
5120
|
name: "team",
|
|
4108
5121
|
type: "uuid"
|
|
5122
|
+
},
|
|
5123
|
+
{
|
|
5124
|
+
// Optional attribution target for API keys (type "api"): tags requests
|
|
5125
|
+
// triggered by the key with project_id_ for LiteLLM cost attribution.
|
|
5126
|
+
name: "project",
|
|
5127
|
+
type: "uuid"
|
|
4109
5128
|
}
|
|
4110
5129
|
]
|
|
4111
5130
|
};
|
|
@@ -4135,35 +5154,6 @@ var platformConfigurationsSchema = {
|
|
|
4135
5154
|
}
|
|
4136
5155
|
]
|
|
4137
5156
|
};
|
|
4138
|
-
var embedderSettingsSchema = {
|
|
4139
|
-
type: "embedder_settings",
|
|
4140
|
-
name: {
|
|
4141
|
-
plural: "embedder_settings",
|
|
4142
|
-
singular: "embedder_setting"
|
|
4143
|
-
},
|
|
4144
|
-
RBAC: false,
|
|
4145
|
-
fields: [
|
|
4146
|
-
{
|
|
4147
|
-
name: "context",
|
|
4148
|
-
type: "text"
|
|
4149
|
-
// id of the ExuluContext class
|
|
4150
|
-
},
|
|
4151
|
-
{
|
|
4152
|
-
name: "embedder",
|
|
4153
|
-
type: "text"
|
|
4154
|
-
// id of the ExuluEmbedder class
|
|
4155
|
-
},
|
|
4156
|
-
{
|
|
4157
|
-
name: "name",
|
|
4158
|
-
type: "text"
|
|
4159
|
-
},
|
|
4160
|
-
{
|
|
4161
|
-
name: "value",
|
|
4162
|
-
type: "text"
|
|
4163
|
-
// reference to an exulu variable
|
|
4164
|
-
}
|
|
4165
|
-
]
|
|
4166
|
-
};
|
|
4167
5157
|
var entityTypeSettingsSchema = {
|
|
4168
5158
|
type: "entity_type_settings",
|
|
4169
5159
|
name: {
|
|
@@ -4191,6 +5181,13 @@ var entityTypeSettingsSchema = {
|
|
|
4191
5181
|
name: "active",
|
|
4192
5182
|
type: "boolean",
|
|
4193
5183
|
default: true
|
|
5184
|
+
},
|
|
5185
|
+
{
|
|
5186
|
+
// "active" = a configured type (used in extraction); "suggested" = a type
|
|
5187
|
+
// the extractor proposed (active:false) awaiting promotion on the UI.
|
|
5188
|
+
name: "status",
|
|
5189
|
+
type: "text",
|
|
5190
|
+
default: "active"
|
|
4194
5191
|
}
|
|
4195
5192
|
]
|
|
4196
5193
|
};
|
|
@@ -4282,7 +5279,21 @@ var transcriptionJobsSchema = {
|
|
|
4282
5279
|
{ name: "target_rbac_users", type: "json" },
|
|
4283
5280
|
{ name: "target_rbac_roles", type: "json" },
|
|
4284
5281
|
{ name: "saved_item_id", type: "uuid", required: false },
|
|
4285
|
-
{ name: "error", type: "text" }
|
|
5282
|
+
{ name: "error", type: "text" },
|
|
5283
|
+
// Recall.ai meeting-bot fields. source discriminates the pipeline: whisper
|
|
5284
|
+
// rows are driven by the polling loop, recall rows by webhooks.
|
|
5285
|
+
// Design doc: docs/superpowers/specs/2026-06-19-recall-meeting-recording-design.md
|
|
5286
|
+
{ name: "source", type: "text", default: "whisper", index: true },
|
|
5287
|
+
{ name: "meeting_url", type: "text" },
|
|
5288
|
+
{ name: "recall_bot_id", type: "text", index: true },
|
|
5289
|
+
{ name: "recall_recording_id", type: "text", index: true },
|
|
5290
|
+
{ name: "recall_transcript_id", type: "text", index: true },
|
|
5291
|
+
{ name: "bot_status", type: "text" },
|
|
5292
|
+
{ name: "join_at", type: "date" },
|
|
5293
|
+
// Selected per-meeting post-processing: [{ prompt_id, agent_id }].
|
|
5294
|
+
{ name: "post_processing_prompts", type: "json" },
|
|
5295
|
+
// Results: [{ prompt_id, agent_id, prompt_name, status, output, error, ran_at }].
|
|
5296
|
+
{ name: "post_processing_outputs", type: "json" }
|
|
4286
5297
|
]
|
|
4287
5298
|
};
|
|
4288
5299
|
var imageGenerationsSchema = {
|
|
@@ -4425,7 +5436,6 @@ var coreSchemas = {
|
|
|
4425
5436
|
variablesSchema: () => addCoreFields(variablesSchema),
|
|
4426
5437
|
platformConfigurationsSchema: () => addCoreFields(platformConfigurationsSchema),
|
|
4427
5438
|
promptLibrarySchema: () => addCoreFields(promptLibrarySchema),
|
|
4428
|
-
embedderSettingsSchema: () => addCoreFields(embedderSettingsSchema),
|
|
4429
5439
|
entityTypeSettingsSchema: () => addCoreFields(entityTypeSettingsSchema),
|
|
4430
5440
|
promptFavoritesSchema: () => addCoreFields(promptFavoritesSchema),
|
|
4431
5441
|
contextPresetsSchema: () => addCoreFields(contextPresetsSchema),
|
|
@@ -4560,11 +5570,89 @@ var entitiesEnabled = async (context) => {
|
|
|
4560
5570
|
const types = await hydrateEntityTypes(context);
|
|
4561
5571
|
return types.length > 0;
|
|
4562
5572
|
};
|
|
5573
|
+
var upsertEntitySuggestions = async (context, suggestions) => {
|
|
5574
|
+
if (!suggestions.length) return;
|
|
5575
|
+
try {
|
|
5576
|
+
const { db: db2 } = await postgresClient();
|
|
5577
|
+
const existing = await db2.from("entity_type_settings").where({ context: context.id }).select("name");
|
|
5578
|
+
const existingNames = new Set(
|
|
5579
|
+
existing.map(
|
|
5580
|
+
(r) => String(r.name || "").toLowerCase().trim()
|
|
5581
|
+
)
|
|
5582
|
+
);
|
|
5583
|
+
const rows = suggestions.filter((s) => s.name && !existingNames.has(s.name.toLowerCase().trim())).map((s) => ({
|
|
5584
|
+
name: s.name,
|
|
5585
|
+
description: s.example ? `${s.description} (e.g. ${s.example})` : s.description,
|
|
5586
|
+
context: context.id,
|
|
5587
|
+
active: false,
|
|
5588
|
+
status: "suggested"
|
|
5589
|
+
}));
|
|
5590
|
+
if (rows.length) {
|
|
5591
|
+
await db2.from("entity_type_settings").insert(rows);
|
|
5592
|
+
}
|
|
5593
|
+
} catch (err) {
|
|
5594
|
+
console.warn(
|
|
5595
|
+
"[EXULU] Could not persist entity suggestions:",
|
|
5596
|
+
err.message
|
|
5597
|
+
);
|
|
5598
|
+
}
|
|
5599
|
+
};
|
|
5600
|
+
var entityModelKey = (contextId) => `entity_extraction_model:${contextId}`;
|
|
5601
|
+
var getEntityModelSetting = async (contextId) => {
|
|
5602
|
+
try {
|
|
5603
|
+
const { db: db2 } = await postgresClient();
|
|
5604
|
+
const row = await db2.from("platform_configurations").where({ config_key: entityModelKey(contextId) }).first();
|
|
5605
|
+
if (!row?.config_value) return null;
|
|
5606
|
+
const raw = row.config_value;
|
|
5607
|
+
let value = raw;
|
|
5608
|
+
if (typeof raw === "string") {
|
|
5609
|
+
try {
|
|
5610
|
+
value = JSON.parse(raw);
|
|
5611
|
+
} catch {
|
|
5612
|
+
value = raw;
|
|
5613
|
+
}
|
|
5614
|
+
}
|
|
5615
|
+
return typeof value === "string" && value.trim() ? value.trim() : null;
|
|
5616
|
+
} catch (err) {
|
|
5617
|
+
console.warn("[EXULU] Could not read entity model setting:", err.message);
|
|
5618
|
+
return null;
|
|
5619
|
+
}
|
|
5620
|
+
};
|
|
5621
|
+
var setEntityModelSetting = async (contextId, modelId) => {
|
|
5622
|
+
const { db: db2 } = await postgresClient();
|
|
5623
|
+
const key = entityModelKey(contextId);
|
|
5624
|
+
if (!modelId || !modelId.trim()) {
|
|
5625
|
+
await db2.from("platform_configurations").where({ config_key: key }).del();
|
|
5626
|
+
return;
|
|
5627
|
+
}
|
|
5628
|
+
const value = JSON.stringify(modelId.trim());
|
|
5629
|
+
await db2.from("platform_configurations").insert({
|
|
5630
|
+
config_key: key,
|
|
5631
|
+
config_value: value,
|
|
5632
|
+
description: `Entity extraction model for context ${contextId}`
|
|
5633
|
+
}).onConflict("config_key").merge({ config_value: value });
|
|
5634
|
+
};
|
|
5635
|
+
var resolveEntityModel = async (context) => {
|
|
5636
|
+
const databaseModel = await getEntityModelSetting(context.id);
|
|
5637
|
+
const codeModel = context.entities?.model ?? null;
|
|
5638
|
+
const envModel = process.env.EXULU_ENTITY_EXTRACTION_MODEL ?? null;
|
|
5639
|
+
if (databaseModel) {
|
|
5640
|
+
return { effectiveModel: databaseModel, source: "database", databaseModel, codeModel };
|
|
5641
|
+
}
|
|
5642
|
+
if (codeModel) {
|
|
5643
|
+
return { effectiveModel: codeModel, source: "code", databaseModel, codeModel };
|
|
5644
|
+
}
|
|
5645
|
+
if (envModel) {
|
|
5646
|
+
return { effectiveModel: envModel, source: "env", databaseModel, codeModel };
|
|
5647
|
+
}
|
|
5648
|
+
return { effectiveModel: null, source: null, databaseModel, codeModel };
|
|
5649
|
+
};
|
|
4563
5650
|
|
|
4564
5651
|
// src/exulu/entities/extractor.ts
|
|
4565
5652
|
import { generateText as generateText2, Output as Output2 } from "ai";
|
|
4566
5653
|
import { z as z5 } from "zod";
|
|
4567
5654
|
var CHUNK_BATCH_SIZE = 30;
|
|
5655
|
+
var MAX_SUGGESTIONS = 5;
|
|
4568
5656
|
var mentionSchema = z5.object({
|
|
4569
5657
|
entities: z5.array(
|
|
4570
5658
|
z5.object({
|
|
@@ -4574,23 +5662,51 @@ var mentionSchema = z5.object({
|
|
|
4574
5662
|
canonical: z5.string().describe("The canonical, language-normalized name used to merge variants."),
|
|
4575
5663
|
confidence: z5.number().min(0).max(1).describe("Confidence 0..1 that this is a valid entity.")
|
|
4576
5664
|
})
|
|
4577
|
-
)
|
|
5665
|
+
),
|
|
5666
|
+
suggestedTypes: z5.array(
|
|
5667
|
+
z5.object({
|
|
5668
|
+
name: z5.string().describe("A concise NEW entity type name, e.g. 'Error Code', 'Component'."),
|
|
5669
|
+
description: z5.string().describe("What this type captures, one sentence."),
|
|
5670
|
+
mentions: z5.array(
|
|
5671
|
+
z5.object({
|
|
5672
|
+
chunkIndex: z5.number().int().describe("Index of the chunk this mention was found in."),
|
|
5673
|
+
mention: z5.string().describe("The exact surface form as it appears in the text."),
|
|
5674
|
+
canonical: z5.string().describe("Canonical, language-normalized name."),
|
|
5675
|
+
confidence: z5.number().min(0).max(1).describe("Confidence 0..1.")
|
|
5676
|
+
})
|
|
5677
|
+
).describe("Every mention of this new type found in the provided chunks.")
|
|
5678
|
+
})
|
|
5679
|
+
).describe(
|
|
5680
|
+
"Entity TYPES that recur in the text but are NOT in the configured list and would be worth tracking. Empty array if none."
|
|
5681
|
+
).optional()
|
|
4578
5682
|
});
|
|
4579
5683
|
var buildSystemPrompt = (types, canonicalLanguage) => {
|
|
4580
5684
|
const typeList = types.map((t) => `- ${t.name}: ${t.description}`).join("\n");
|
|
5685
|
+
const typeNames = types.map((t) => `"${t.name}"`).join(", ");
|
|
4581
5686
|
return [
|
|
4582
|
-
"You are an entity extraction engine. Extract entities of ONLY the
|
|
5687
|
+
"You are an entity extraction engine. Extract entities of ONLY the types listed below from the provided text chunks.",
|
|
4583
5688
|
"",
|
|
4584
|
-
|
|
5689
|
+
'Entity types (the "type" field of every entity you output MUST be exactly one of these names):',
|
|
4585
5690
|
typeList,
|
|
4586
5691
|
"",
|
|
5692
|
+
"For each entity, output an object with:",
|
|
5693
|
+
`- "type": EXACTLY one of these type names \u2014 ${typeNames}. NEVER put the description, a category value, or a generic label like "named entity"/"classification" in this field.`,
|
|
5694
|
+
'- "mention" and "canonical": the value (see how to choose them below).',
|
|
5695
|
+
'- "chunkIndex": the index of the chunk the entity was found in.',
|
|
5696
|
+
'- "confidence": your certainty the entity is valid (0..1).',
|
|
5697
|
+
"",
|
|
5698
|
+
"How to choose mention/canonical depends on each type's description:",
|
|
5699
|
+
`- If the type names concrete things mentioned in the text (a person, product, place, code, organization): output the exact surface form as it appears as "mention", and a "canonical" normalized to ${canonicalLanguage}. Output a SEPARATE entity object for every distinct value \u2014 a single chunk may contain many of these, including several of the SAME type (e.g. three different cities \u2192 three "City" entities) and several of different types. Never collapse them or limit yourself to one per chunk.`,
|
|
5700
|
+
`- If the type is a classification or property (the description defines a fixed set of categories, or asks you to judge a property of the content \u2014 e.g. "is this a fact or an instruction"): pick the single best-fitting category for the chunk and output that category as BOTH "mention" and "canonical", even if that exact word is not in the text. Output at most one entity of such a type per chunk.`,
|
|
5701
|
+
"",
|
|
4587
5702
|
"Rules:",
|
|
4588
|
-
|
|
5703
|
+
"- A chunk can yield MULTIPLE entities \u2014 of the same named-entity type and of different types. Extract every distinct entity you find. The one-per-chunk limit applies ONLY to classification/property types.",
|
|
4589
5704
|
`- The canonical name merges variants and translations: e.g. "M\xFCnchen" and "MUC" both canonicalize to the ${canonicalLanguage} form "Munich". "Acme Inc" and "ACME" both canonicalize to "Acme".`,
|
|
4590
5705
|
"- DO NOT translate or alter identifiers, case numbers, SKUs, product codes, or proper product names \u2014 keep those verbatim as their own canonical.",
|
|
4591
|
-
"- Only
|
|
4592
|
-
"-
|
|
4593
|
-
"
|
|
5706
|
+
"- Only put entities of the listed types in the `entities` array. Do NOT put anything else there.",
|
|
5707
|
+
"- Use an empty `entities` array when no listed type applies to the text.",
|
|
5708
|
+
"",
|
|
5709
|
+
`Separately, in "suggestedTypes", propose up to ${MAX_SUGGESTIONS} entity TYPES that recur in the text but are NOT in the list above and would be worth tracking (e.g. a kind of code, component, or product that keeps appearing). For each, give a concise name, a one-sentence description, and the full list of its "mentions" found in the chunks \u2014 extract those mentions exactly as you would for a configured named-entity type (each with its chunkIndex, the surface-form "mention", a normalized "canonical", and "confidence"). Leave the array empty if nothing stands out. Never put these in the "entities" array.`
|
|
4594
5710
|
].join("\n");
|
|
4595
5711
|
};
|
|
4596
5712
|
var buildUserPrompt = (chunks) => {
|
|
@@ -4602,13 +5718,17 @@ var extractEntitiesForItem = async ({
|
|
|
4602
5718
|
chunks,
|
|
4603
5719
|
types
|
|
4604
5720
|
}) => {
|
|
4605
|
-
|
|
4606
|
-
|
|
5721
|
+
const empty = {
|
|
5722
|
+
mentions: [],
|
|
5723
|
+
suggestions: []
|
|
5724
|
+
};
|
|
5725
|
+
if (!types.length || !chunks.length) return empty;
|
|
5726
|
+
const { effectiveModel: modelId } = await resolveEntityModel(context);
|
|
4607
5727
|
if (!modelId) {
|
|
4608
5728
|
console.warn(
|
|
4609
|
-
`[EXULU] Entity extraction skipped for context ${context.id}: no entities.model
|
|
5729
|
+
`[EXULU] Entity extraction skipped for context ${context.id}: no model configured. Select one in the Entities tab, set context.entities.model in code, or set EXULU_ENTITY_EXTRACTION_MODEL.`
|
|
4610
5730
|
);
|
|
4611
|
-
return
|
|
5731
|
+
return empty;
|
|
4612
5732
|
}
|
|
4613
5733
|
const canonicalLanguage = context.entities?.canonicalLanguage || "english";
|
|
4614
5734
|
const confidenceThreshold = context.entities?.confidenceThreshold ?? 0.5;
|
|
@@ -4625,15 +5745,15 @@ var extractEntitiesForItem = async ({
|
|
|
4625
5745
|
`[EXULU] Entity extraction skipped for context ${context.id}: could not resolve model ${modelId}:`,
|
|
4626
5746
|
err.message
|
|
4627
5747
|
);
|
|
4628
|
-
return
|
|
5748
|
+
return empty;
|
|
4629
5749
|
}
|
|
4630
5750
|
const system = buildSystemPrompt(types, canonicalLanguage);
|
|
4631
|
-
const validTypeNames = new Set(types.map((t) => t.name.toLowerCase().trim()));
|
|
4632
5751
|
const batches = [];
|
|
4633
5752
|
for (let i = 0; i < chunks.length; i += CHUNK_BATCH_SIZE) {
|
|
4634
5753
|
batches.push(chunks.slice(i, i + CHUNK_BATCH_SIZE));
|
|
4635
5754
|
}
|
|
4636
5755
|
const mentions = [];
|
|
5756
|
+
const suggestionsByName = /* @__PURE__ */ new Map();
|
|
4637
5757
|
for (const batch of batches) {
|
|
4638
5758
|
try {
|
|
4639
5759
|
const { output } = await generateText2({
|
|
@@ -4647,16 +5767,47 @@ var extractEntitiesForItem = async ({
|
|
|
4647
5767
|
for (const e of output.entities) {
|
|
4648
5768
|
if (!e.mention || !e.canonical || !e.type) continue;
|
|
4649
5769
|
if (e.confidence < confidenceThreshold) continue;
|
|
4650
|
-
|
|
4651
|
-
const declared = types.find(
|
|
5770
|
+
const eType = e.type.toLowerCase().trim();
|
|
5771
|
+
const declared = types.find(
|
|
5772
|
+
(t) => t.name.toLowerCase().trim() === eType || (t.description || "").toLowerCase().trim() === eType
|
|
5773
|
+
);
|
|
5774
|
+
if (!declared) continue;
|
|
4652
5775
|
mentions.push({
|
|
4653
5776
|
chunkIndex: e.chunkIndex,
|
|
4654
|
-
type: declared
|
|
5777
|
+
type: declared.name,
|
|
4655
5778
|
mention: e.mention,
|
|
4656
5779
|
canonical: e.canonical,
|
|
4657
5780
|
confidence: e.confidence
|
|
4658
5781
|
});
|
|
4659
5782
|
}
|
|
5783
|
+
for (const s of output.suggestedTypes ?? []) {
|
|
5784
|
+
const name = (s?.name || "").trim();
|
|
5785
|
+
if (!name) continue;
|
|
5786
|
+
const key = name.toLowerCase();
|
|
5787
|
+
const isConfigured = types.some(
|
|
5788
|
+
(t) => t.name.toLowerCase().trim() === key || (t.description || "").toLowerCase().trim() === key
|
|
5789
|
+
);
|
|
5790
|
+
if (isConfigured) continue;
|
|
5791
|
+
const sMentions = (s.mentions ?? []).filter(
|
|
5792
|
+
(m) => m.mention && m.canonical && m.confidence >= confidenceThreshold
|
|
5793
|
+
);
|
|
5794
|
+
for (const m of sMentions) {
|
|
5795
|
+
mentions.push({
|
|
5796
|
+
chunkIndex: m.chunkIndex,
|
|
5797
|
+
type: name,
|
|
5798
|
+
mention: m.mention,
|
|
5799
|
+
canonical: m.canonical,
|
|
5800
|
+
confidence: m.confidence
|
|
5801
|
+
});
|
|
5802
|
+
}
|
|
5803
|
+
if (!suggestionsByName.has(key)) {
|
|
5804
|
+
suggestionsByName.set(key, {
|
|
5805
|
+
name,
|
|
5806
|
+
description: (s.description || "").trim(),
|
|
5807
|
+
example: sMentions[0]?.mention || void 0
|
|
5808
|
+
});
|
|
5809
|
+
}
|
|
5810
|
+
}
|
|
4660
5811
|
} catch (err) {
|
|
4661
5812
|
console.error(
|
|
4662
5813
|
`[EXULU] Entity extraction batch failed for context ${context.id} (continuing):`,
|
|
@@ -4664,7 +5815,11 @@ var extractEntitiesForItem = async ({
|
|
|
4664
5815
|
);
|
|
4665
5816
|
}
|
|
4666
5817
|
}
|
|
4667
|
-
|
|
5818
|
+
const suggestions = [...suggestionsByName.values()];
|
|
5819
|
+
console.log(
|
|
5820
|
+
`[EXULU][entities] context ${context.id}: kept ${mentions.length} mention(s), ${suggestions.length} suggestion(s).`
|
|
5821
|
+
);
|
|
5822
|
+
return { mentions, suggestions };
|
|
4668
5823
|
};
|
|
4669
5824
|
|
|
4670
5825
|
// src/exulu/entities/normalize.ts
|
|
@@ -4746,6 +5901,27 @@ var getEntityIdsForItem = async (context, itemId) => {
|
|
|
4746
5901
|
const rows = await db2(junctionTable).where({ item_id: itemId }).distinct("entity_id");
|
|
4747
5902
|
return rows.map((r) => r.entity_id);
|
|
4748
5903
|
};
|
|
5904
|
+
var getEntitiesForItem = async (context, itemId) => {
|
|
5905
|
+
if (!await chunkEntitiesTableExists(context)) return [];
|
|
5906
|
+
const { db: db2 } = await postgresClient();
|
|
5907
|
+
const junctionTable = getChunkEntitiesTableName(context.id);
|
|
5908
|
+
const entitiesTable = getEntitiesTableName(context.id);
|
|
5909
|
+
const rows = await db2(`${junctionTable} as j`).join(`${entitiesTable} as e`, "e.id", "j.entity_id").where("j.item_id", itemId).groupBy("e.id", "e.type", "e.display_name").select(
|
|
5910
|
+
"e.id as id",
|
|
5911
|
+
"e.type as type",
|
|
5912
|
+
"e.display_name as name",
|
|
5913
|
+
db2.raw("COUNT(*)::int as mentions")
|
|
5914
|
+
).orderBy([
|
|
5915
|
+
{ column: "e.type", order: "asc" },
|
|
5916
|
+
{ column: "e.display_name", order: "asc" }
|
|
5917
|
+
]);
|
|
5918
|
+
return rows.map((r) => ({
|
|
5919
|
+
id: r.id,
|
|
5920
|
+
type: r.type,
|
|
5921
|
+
name: r.name,
|
|
5922
|
+
mentions: Number(r.mentions) || 0
|
|
5923
|
+
}));
|
|
5924
|
+
};
|
|
4749
5925
|
var ingestEntitiesForItem = async ({
|
|
4750
5926
|
context,
|
|
4751
5927
|
itemId,
|
|
@@ -4830,6 +6006,37 @@ var ingestEntitiesForItem = async ({
|
|
|
4830
6006
|
});
|
|
4831
6007
|
});
|
|
4832
6008
|
};
|
|
6009
|
+
var detachEntitiesForItem = async (context, itemId) => {
|
|
6010
|
+
if (!await chunkEntitiesTableExists(context)) return 0;
|
|
6011
|
+
const { db: db2 } = await postgresClient();
|
|
6012
|
+
const entitiesTable = getEntitiesTableName(context.id);
|
|
6013
|
+
const junctionTable = getChunkEntitiesTableName(context.id);
|
|
6014
|
+
const itemsTable = getTableName(context.id);
|
|
6015
|
+
let detached = 0;
|
|
6016
|
+
await db2.transaction(async (trx) => {
|
|
6017
|
+
const affected = (await trx(junctionTable).where({ item_id: itemId }).distinct("entity_id")).map((r) => r.entity_id);
|
|
6018
|
+
detached = affected.length;
|
|
6019
|
+
if (!affected.length) return;
|
|
6020
|
+
await trx(junctionTable).where({ item_id: itemId }).delete();
|
|
6021
|
+
await trx(entitiesTable).whereIn("id", affected).update({ mention_count: 0, doc_count: 0 });
|
|
6022
|
+
const placeholders = affected.map(() => "?").join(",");
|
|
6023
|
+
await trx.raw(
|
|
6024
|
+
`UPDATE ${entitiesTable} e
|
|
6025
|
+
SET mention_count = sub.mc, doc_count = sub.dc
|
|
6026
|
+
FROM (
|
|
6027
|
+
SELECT entity_id, COUNT(*)::int AS mc, COUNT(DISTINCT item_id)::int AS dc
|
|
6028
|
+
FROM ${junctionTable}
|
|
6029
|
+
WHERE entity_id IN (${placeholders})
|
|
6030
|
+
GROUP BY entity_id
|
|
6031
|
+
) sub
|
|
6032
|
+
WHERE e.id = sub.entity_id`,
|
|
6033
|
+
affected
|
|
6034
|
+
);
|
|
6035
|
+
await trx(entitiesTable).whereIn("id", affected).where({ mention_count: 0 }).delete();
|
|
6036
|
+
await trx(itemsTable).where({ id: itemId }).update({ entities_updated_at: (/* @__PURE__ */ new Date()).toISOString(), entity_types_signature: null });
|
|
6037
|
+
});
|
|
6038
|
+
return detached;
|
|
6039
|
+
};
|
|
4833
6040
|
var resolveQueryEntities = async (context, mentions) => {
|
|
4834
6041
|
if (!mentions.length) return [];
|
|
4835
6042
|
const { db: db2 } = await postgresClient();
|
|
@@ -4961,22 +6168,31 @@ var extractAndIngestEntities = async ({
|
|
|
4961
6168
|
}) => {
|
|
4962
6169
|
try {
|
|
4963
6170
|
const types = await hydrateEntityTypes(context);
|
|
4964
|
-
if (!types.length || !context.embedder) return;
|
|
6171
|
+
if (!types.length || !context.embedder) return 0;
|
|
4965
6172
|
await ensureEntityTables(context);
|
|
4966
6173
|
const { db: db2 } = await postgresClient();
|
|
4967
6174
|
const chunkRows = await db2(getChunksTableName(context.id)).where({ source: itemId }).select("chunk_index", "content").orderBy("chunk_index", "asc");
|
|
4968
6175
|
const chunks = chunkRows.map((c) => ({ index: Number(c.chunk_index), content: c.content })).filter((c) => c.content);
|
|
4969
|
-
const mentions = await extractEntitiesForItem({
|
|
6176
|
+
const { mentions, suggestions } = await extractEntitiesForItem({
|
|
6177
|
+
context,
|
|
6178
|
+
chunks,
|
|
6179
|
+
types
|
|
6180
|
+
});
|
|
4970
6181
|
const signature = computeTypesSignature(types);
|
|
4971
6182
|
await ingestEntitiesForItem({ context, itemId, mentions, signature, previousEntityIds });
|
|
6183
|
+
if (suggestions.length) {
|
|
6184
|
+
await upsertEntitySuggestions(context, suggestions);
|
|
6185
|
+
}
|
|
4972
6186
|
console.log(
|
|
4973
6187
|
`[EXULU] Entity ingestion complete for item ${itemId} in context ${context.id}: ${mentions.length} mentions.`
|
|
4974
6188
|
);
|
|
6189
|
+
return mentions.length;
|
|
4975
6190
|
} catch (err) {
|
|
4976
6191
|
console.error(
|
|
4977
6192
|
`[EXULU] Entity ingestion failed for item ${itemId} in context ${context.id} (non-fatal):`,
|
|
4978
6193
|
err.message
|
|
4979
6194
|
);
|
|
6195
|
+
return 0;
|
|
4980
6196
|
}
|
|
4981
6197
|
};
|
|
4982
6198
|
|
|
@@ -5088,20 +6304,27 @@ var vectorSearch = async ({
|
|
|
5088
6304
|
if (stemmedQuery) {
|
|
5089
6305
|
query = stemmedQuery;
|
|
5090
6306
|
}
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5097
|
-
|
|
5098
|
-
user?.id,
|
|
6307
|
+
await updateStatistic({
|
|
6308
|
+
name: "count",
|
|
6309
|
+
label: table.name.singular,
|
|
6310
|
+
type: STATISTICS_TYPE_ENUM.EMBEDDER_GENERATE,
|
|
6311
|
+
trigger,
|
|
6312
|
+
count: 1,
|
|
6313
|
+
user: user?.id,
|
|
5099
6314
|
role
|
|
5100
|
-
);
|
|
5101
|
-
|
|
6315
|
+
});
|
|
6316
|
+
const resolved = await resolveEmbedder({
|
|
6317
|
+
model: embedder.model,
|
|
6318
|
+
contextId: context.id,
|
|
6319
|
+
contextName: context.name,
|
|
6320
|
+
user,
|
|
6321
|
+
roleId: role
|
|
6322
|
+
});
|
|
6323
|
+
const [queryVector] = await resolved.embed([query], { inputType: "query" });
|
|
6324
|
+
if (!queryVector?.length) {
|
|
5102
6325
|
throw new Error("No vector generated for query.");
|
|
5103
6326
|
}
|
|
5104
|
-
vector =
|
|
6327
|
+
vector = queryVector;
|
|
5105
6328
|
vectorStr = `ARRAY[${vector.join(",")}]`;
|
|
5106
6329
|
vectorExpr = `${vectorStr}::vector`;
|
|
5107
6330
|
}
|
|
@@ -5289,7 +6512,7 @@ var vectorSearch = async ({
|
|
|
5289
6512
|
if (entitiesOn && rawQuery) {
|
|
5290
6513
|
try {
|
|
5291
6514
|
const types = await hydrateEntityTypes(context);
|
|
5292
|
-
const queryMentions = await extractEntitiesForItem({
|
|
6515
|
+
const { mentions: queryMentions } = await extractEntitiesForItem({
|
|
5293
6516
|
context,
|
|
5294
6517
|
chunks: [{ index: 0, content: rawQuery }],
|
|
5295
6518
|
types
|
|
@@ -5328,7 +6551,6 @@ var vectorSearch = async ({
|
|
|
5328
6551
|
{ length: expand.before },
|
|
5329
6552
|
(_, i) => chunk.chunk_index - expand.before + i
|
|
5330
6553
|
).filter((index) => index >= 0);
|
|
5331
|
-
console.log("[EXULU] Indices to fetch:", indicesToFetch);
|
|
5332
6554
|
await Promise.all(
|
|
5333
6555
|
indicesToFetch.map(async (index) => {
|
|
5334
6556
|
if (expandedMap.has(`${chunk.item_id}-${index}`)) {
|
|
@@ -5373,7 +6595,6 @@ var vectorSearch = async ({
|
|
|
5373
6595
|
{ length: expand.after },
|
|
5374
6596
|
(_, i) => chunk.chunk_index + i + 1
|
|
5375
6597
|
);
|
|
5376
|
-
console.log("[EXULU] Indices to fetch:", indicesToFetch);
|
|
5377
6598
|
await Promise.all(
|
|
5378
6599
|
indicesToFetch.map(async (index) => {
|
|
5379
6600
|
if (expandedMap.has(`${chunk.item_id}-${index}`)) {
|
|
@@ -5449,7 +6670,7 @@ var vectorSearch = async ({
|
|
|
5449
6670
|
context: {
|
|
5450
6671
|
name: table.name.singular,
|
|
5451
6672
|
id: table.id || "",
|
|
5452
|
-
embedder: embedder.
|
|
6673
|
+
embedder: embedder.model
|
|
5453
6674
|
},
|
|
5454
6675
|
chunks: results,
|
|
5455
6676
|
entityInsights
|
|
@@ -5698,6 +6919,12 @@ var ExuluContext2 = class {
|
|
|
5698
6919
|
processor;
|
|
5699
6920
|
description;
|
|
5700
6921
|
embedder;
|
|
6922
|
+
/**
|
|
6923
|
+
* Splits an item into embeddable chunks. Moved here from the removed
|
|
6924
|
+
* ExuluEmbedder. When omitted, the built-in `defaultChunker` (SentenceChunker)
|
|
6925
|
+
* is used so a context works from just an embedder model name.
|
|
6926
|
+
*/
|
|
6927
|
+
chunker;
|
|
5701
6928
|
queryRewriter;
|
|
5702
6929
|
resultReranker;
|
|
5703
6930
|
configuration;
|
|
@@ -5713,6 +6940,7 @@ var ExuluContext2 = class {
|
|
|
5713
6940
|
name,
|
|
5714
6941
|
description,
|
|
5715
6942
|
embedder,
|
|
6943
|
+
chunker,
|
|
5716
6944
|
processor,
|
|
5717
6945
|
active,
|
|
5718
6946
|
fields,
|
|
@@ -5744,6 +6972,7 @@ var ExuluContext2 = class {
|
|
|
5744
6972
|
};
|
|
5745
6973
|
this.description = description;
|
|
5746
6974
|
this.embedder = embedder;
|
|
6975
|
+
this.chunker = chunker;
|
|
5747
6976
|
this.active = active;
|
|
5748
6977
|
this.queryRewriter = queryRewriter;
|
|
5749
6978
|
this.resultReranker = resultReranker;
|
|
@@ -5904,20 +7133,40 @@ var ExuluContext2 = class {
|
|
|
5904
7133
|
throw new Error("Item id is required for generating embeddings.");
|
|
5905
7134
|
}
|
|
5906
7135
|
const { db: db2 } = await postgresClient();
|
|
5907
|
-
|
|
5908
|
-
|
|
5909
|
-
|
|
5910
|
-
|
|
5911
|
-
|
|
5912
|
-
|
|
5913
|
-
|
|
5914
|
-
|
|
5915
|
-
|
|
5916
|
-
|
|
5917
|
-
|
|
5918
|
-
|
|
5919
|
-
|
|
7136
|
+
if (statistics) {
|
|
7137
|
+
await updateStatistic({
|
|
7138
|
+
name: "count",
|
|
7139
|
+
label: statistics.label,
|
|
7140
|
+
type: STATISTICS_TYPE_ENUM.EMBEDDER_GENERATE,
|
|
7141
|
+
trigger: statistics.trigger,
|
|
7142
|
+
count: 1,
|
|
7143
|
+
user,
|
|
7144
|
+
role
|
|
7145
|
+
});
|
|
7146
|
+
}
|
|
7147
|
+
const source = item.id;
|
|
7148
|
+
const resolved = await resolveEmbedder({
|
|
7149
|
+
model: this.embedder.model,
|
|
7150
|
+
contextId: this.id,
|
|
7151
|
+
contextName: this.name,
|
|
7152
|
+
userId: user,
|
|
7153
|
+
roleId: role
|
|
7154
|
+
});
|
|
7155
|
+
const chunkerFn = this.chunker ?? defaultChunker;
|
|
7156
|
+
const { chunks: produced } = await chunkerFn(
|
|
7157
|
+
{ ...item, id: item.id },
|
|
7158
|
+
resolved.maxChunkSize,
|
|
7159
|
+
{ storage: new ExuluStorage({ config }) }
|
|
5920
7160
|
);
|
|
7161
|
+
console.log("[EXULU] Generating embeddings.");
|
|
7162
|
+
const contents = produced.map((c) => c.content);
|
|
7163
|
+
const vectors = contents.length ? await resolved.embed(contents, { inputType: "document" }) : [];
|
|
7164
|
+
const chunks = produced.map((c, i) => ({
|
|
7165
|
+
content: c.content,
|
|
7166
|
+
index: c.index,
|
|
7167
|
+
metadata: c.metadata ?? {},
|
|
7168
|
+
vector: vectors[i] ?? []
|
|
7169
|
+
}));
|
|
5921
7170
|
const previousEntityIds = await captureEntitiesBeforeReembed(this, item.id);
|
|
5922
7171
|
await db2.from(getChunksTableName(this.id)).where({ source }).delete();
|
|
5923
7172
|
if (chunks?.length) {
|
|
@@ -6188,8 +7437,8 @@ var ExuluContext2 = class {
|
|
|
6188
7437
|
console.log("[EXULU] embedder is in queue mode, scheduling job.");
|
|
6189
7438
|
const job = await bullmqDecorator({
|
|
6190
7439
|
timeoutInSeconds: queue.timeoutInSeconds || 180,
|
|
6191
|
-
label: `${this.embedder.
|
|
6192
|
-
embedder: this.embedder.
|
|
7440
|
+
label: `${this.embedder.model}`,
|
|
7441
|
+
embedder: this.embedder.model,
|
|
6193
7442
|
context: this.id,
|
|
6194
7443
|
backoff: queue.backoff || {
|
|
6195
7444
|
type: "exponential",
|
|
@@ -6214,7 +7463,7 @@ var ExuluContext2 = class {
|
|
|
6214
7463
|
config,
|
|
6215
7464
|
user,
|
|
6216
7465
|
{
|
|
6217
|
-
label: this.embedder.
|
|
7466
|
+
label: this.embedder.model,
|
|
6218
7467
|
trigger: trigger || "agent"
|
|
6219
7468
|
},
|
|
6220
7469
|
role,
|
|
@@ -6320,6 +7569,25 @@ var ExuluContext2 = class {
|
|
|
6320
7569
|
}
|
|
6321
7570
|
return { processed: batch.length, skipped };
|
|
6322
7571
|
},
|
|
7572
|
+
/**
|
|
7573
|
+
* Extract + ingest entities for a SINGLE item — powers the item detail
|
|
7574
|
+
* page's "Extract entities" test action. Returns the number of mentions
|
|
7575
|
+
* found so the UI can report the result.
|
|
7576
|
+
*/
|
|
7577
|
+
extractItem: async (itemId) => {
|
|
7578
|
+
if (!await entitiesEnabled(this)) {
|
|
7579
|
+
throw new Error(
|
|
7580
|
+
"Entity extraction is not configured for this context (no entity types, or no embedder)."
|
|
7581
|
+
);
|
|
7582
|
+
}
|
|
7583
|
+
const extracted = await extractAndIngestEntities({ context: this, itemId });
|
|
7584
|
+
return { extracted };
|
|
7585
|
+
},
|
|
7586
|
+
/** Detach all entities from a single item (drops links, prunes orphans). */
|
|
7587
|
+
detachItem: async (itemId) => {
|
|
7588
|
+
const detached = await detachEntitiesForItem(this, itemId);
|
|
7589
|
+
return { detached };
|
|
7590
|
+
},
|
|
6323
7591
|
/** Remove all entities (and their mentions via cascade) of a given type. */
|
|
6324
7592
|
purgeType: async (typeName) => {
|
|
6325
7593
|
if (!await entitiesTableExists(this)) return { removed: 0 };
|
|
@@ -6374,18 +7642,19 @@ var ExuluContext2 = class {
|
|
|
6374
7642
|
const { db: db2 } = await postgresClient();
|
|
6375
7643
|
const tableName = getChunksTableName(this.id);
|
|
6376
7644
|
console.log("[EXULU] Creating table: " + tableName);
|
|
7645
|
+
if (!this.embedder) {
|
|
7646
|
+
throw new Error(
|
|
7647
|
+
"Embedder must be set for context " + this.name + " to create chunks table."
|
|
7648
|
+
);
|
|
7649
|
+
}
|
|
7650
|
+
const { dimensionality } = getEmbeddingModelInfo(this.embedder.model);
|
|
6377
7651
|
await db2.schema.createTable(tableName, (table) => {
|
|
6378
|
-
if (!this.embedder) {
|
|
6379
|
-
throw new Error(
|
|
6380
|
-
"Embedder must be set for context " + this.name + " to create chunks table."
|
|
6381
|
-
);
|
|
6382
|
-
}
|
|
6383
7652
|
table.uuid("id").primary().defaultTo(db2.fn.uuid());
|
|
6384
7653
|
table.uuid("source").references("id").inTable(getTableName(this.id));
|
|
6385
7654
|
table.text("content");
|
|
6386
7655
|
table.jsonb("metadata");
|
|
6387
7656
|
table.integer("chunk_index");
|
|
6388
|
-
table.specificType("embedding", `vector(${
|
|
7657
|
+
table.specificType("embedding", `vector(${dimensionality})`);
|
|
6389
7658
|
const languages = this.configuration.languages?.length ? this.configuration.languages : ["english"];
|
|
6390
7659
|
const tsvectorExpression = languages.map((lang) => `to_tsvector('${lang}', coalesce(content, ''))`).join(" || ");
|
|
6391
7660
|
table.specificType(
|
|
@@ -7043,16 +8312,16 @@ async function createDynamicTools(chunks, hadExcludedContent) {
|
|
|
7043
8312
|
}
|
|
7044
8313
|
|
|
7045
8314
|
// ee/agentic-retrieval/v3/session-tools-registry.ts
|
|
7046
|
-
var
|
|
8315
|
+
var registry3 = /* @__PURE__ */ new Map();
|
|
7047
8316
|
function registerSessionTools(sessionId, tools) {
|
|
7048
|
-
const existing =
|
|
8317
|
+
const existing = registry3.get(sessionId) ?? /* @__PURE__ */ new Map();
|
|
7049
8318
|
for (const [name, toolDef] of Object.entries(tools)) {
|
|
7050
8319
|
existing.set(name, toolDef);
|
|
7051
8320
|
}
|
|
7052
|
-
|
|
8321
|
+
registry3.set(sessionId, existing);
|
|
7053
8322
|
}
|
|
7054
8323
|
function getSessionTools(sessionId) {
|
|
7055
|
-
const toolMap =
|
|
8324
|
+
const toolMap = registry3.get(sessionId);
|
|
7056
8325
|
if (!toolMap || toolMap.size === 0) return {};
|
|
7057
8326
|
return Object.fromEntries(toolMap.entries());
|
|
7058
8327
|
}
|
|
@@ -7162,8 +8431,8 @@ ${customInstructions}` : ""
|
|
|
7162
8431
|
(tc) => tc.toolName === "search_content" && tc.input?.includeContent === false || tc.toolName === "search_items_by_name"
|
|
7163
8432
|
);
|
|
7164
8433
|
if (reranker && stepChunks.length > 0) {
|
|
7165
|
-
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.
|
|
7166
|
-
stepChunks = await reranker.
|
|
8434
|
+
console.log(`[EXULU] v3 reranking ${stepChunks.length} chunks with ${reranker.model}`);
|
|
8435
|
+
stepChunks = await reranker.rerank(query, stepChunks);
|
|
7167
8436
|
}
|
|
7168
8437
|
const newDynamic = await createDynamicTools(stepChunks, hadExcludedContent);
|
|
7169
8438
|
Object.assign(dynamicTools, newDynamic);
|
|
@@ -7251,7 +8520,7 @@ ${customInstructions}` : ""
|
|
|
7251
8520
|
}
|
|
7252
8521
|
|
|
7253
8522
|
// ee/agentic-retrieval/v3/trajectory.ts
|
|
7254
|
-
import * as
|
|
8523
|
+
import * as fs2 from "fs/promises";
|
|
7255
8524
|
import * as path from "path";
|
|
7256
8525
|
var trajectoryRegistry = {
|
|
7257
8526
|
lastFile: void 0
|
|
@@ -7452,13 +8721,13 @@ var TrajectoryLogger = class {
|
|
|
7452
8721
|
};
|
|
7453
8722
|
if (!writeFiles) return void 0;
|
|
7454
8723
|
try {
|
|
7455
|
-
await
|
|
8724
|
+
await fs2.mkdir(this.logDir, { recursive: true });
|
|
7456
8725
|
const ts = Date.now();
|
|
7457
8726
|
const jsonPath = path.join(this.logDir, `trajectory_${ts}.json`);
|
|
7458
8727
|
const mdPath = path.join(this.logDir, `trajectory_${ts}.md`);
|
|
7459
8728
|
await Promise.all([
|
|
7460
|
-
|
|
7461
|
-
|
|
8729
|
+
fs2.writeFile(jsonPath, JSON.stringify(this.data, null, 2), "utf-8"),
|
|
8730
|
+
fs2.writeFile(mdPath, this.toMarkdown(durationMs, success, error), "utf-8")
|
|
7462
8731
|
]);
|
|
7463
8732
|
console.log(`[EXULU] v3 trajectory saved: trajectory_${ts}.json + trajectory_${ts}.md`);
|
|
7464
8733
|
trajectoryRegistry.lastFile = jsonPath;
|
|
@@ -7626,7 +8895,6 @@ SCOPE CONSTRAINT: Retrieval is scoped to preselected items/contexts. Per context
|
|
|
7626
8895
|
function createAgenticRetrievalToolV3({
|
|
7627
8896
|
contexts,
|
|
7628
8897
|
instructions: adminInstructions,
|
|
7629
|
-
rerankers,
|
|
7630
8898
|
user,
|
|
7631
8899
|
role,
|
|
7632
8900
|
model,
|
|
@@ -7761,7 +9029,18 @@ function createAgenticRetrievalToolV3({
|
|
|
7761
9029
|
requiresPreselectedContexts = toolVariablesConfig["require_preselected_contexts"] === true || toolVariablesConfig["require_preselected_contexts"] === "true";
|
|
7762
9030
|
const rerankerId = toolVariablesConfig["reranker"];
|
|
7763
9031
|
if (rerankerId && rerankerId !== "none") {
|
|
7764
|
-
|
|
9032
|
+
try {
|
|
9033
|
+
configuredReranker = await resolveReranker({
|
|
9034
|
+
model: rerankerId,
|
|
9035
|
+
user,
|
|
9036
|
+
roleId: role
|
|
9037
|
+
});
|
|
9038
|
+
} catch (err) {
|
|
9039
|
+
console.warn(
|
|
9040
|
+
`[EXULU] v3 \u2014 could not resolve reranker "${rerankerId}", continuing without reranking:`,
|
|
9041
|
+
err
|
|
9042
|
+
);
|
|
9043
|
+
}
|
|
7765
9044
|
}
|
|
7766
9045
|
}
|
|
7767
9046
|
console.log("[EXULU] Managed context enabled:", managedContextEnabled);
|
|
@@ -7926,14 +9205,14 @@ import {
|
|
|
7926
9205
|
SandboxManager
|
|
7927
9206
|
} from "@anthropic-ai/sandbox-runtime";
|
|
7928
9207
|
import { mkdir as mkdir2, rm, writeFile as writeFile2, readFile as fsReadFile, readdir, stat } from "fs/promises";
|
|
7929
|
-
import { existsSync as
|
|
7930
|
-
import { join as join3, dirname, resolve as
|
|
9208
|
+
import { existsSync as existsSync4 } from "fs";
|
|
9209
|
+
import { join as join3, dirname, resolve as resolve3, relative, posix } from "path";
|
|
7931
9210
|
import { exec as exec2, spawn as spawn2 } from "child_process";
|
|
7932
9211
|
import { promisify as promisify2 } from "util";
|
|
7933
9212
|
|
|
7934
9213
|
// src/exulu/system-dependencies.ts
|
|
7935
9214
|
import { exec } from "child_process";
|
|
7936
|
-
import { existsSync as
|
|
9215
|
+
import { existsSync as existsSync3 } from "fs";
|
|
7937
9216
|
import { join as join2 } from "path";
|
|
7938
9217
|
import { promisify } from "util";
|
|
7939
9218
|
var execAsync = promisify(exec);
|
|
@@ -7999,7 +9278,7 @@ async function probeDependency(dep) {
|
|
|
7999
9278
|
case "npm-global": {
|
|
8000
9279
|
const root = await getNpmGlobalRoot();
|
|
8001
9280
|
if (!root) return false;
|
|
8002
|
-
return
|
|
9281
|
+
return existsSync3(join2(root, dep.check.packageName));
|
|
8003
9282
|
}
|
|
8004
9283
|
}
|
|
8005
9284
|
}
|
|
@@ -8078,18 +9357,18 @@ function probeSandboxSupport() {
|
|
|
8078
9357
|
if (process.platform !== "linux") {
|
|
8079
9358
|
return { canSandbox: false, reason: `Unsupported platform: ${process.platform}` };
|
|
8080
9359
|
}
|
|
8081
|
-
return await new Promise((
|
|
9360
|
+
return await new Promise((resolve4) => {
|
|
8082
9361
|
const child = spawn2("bwrap", ["--dev-bind", "/", "/", "--", "/bin/true"]);
|
|
8083
9362
|
let stderr = "";
|
|
8084
9363
|
child.stderr.on("data", (chunk) => {
|
|
8085
9364
|
stderr += chunk.toString();
|
|
8086
9365
|
});
|
|
8087
9366
|
child.on("error", (err) => {
|
|
8088
|
-
|
|
9367
|
+
resolve4({ canSandbox: false, reason: `bwrap not executable: ${err.message}` });
|
|
8089
9368
|
});
|
|
8090
9369
|
child.on("exit", (code) => {
|
|
8091
|
-
if (code === 0)
|
|
8092
|
-
else
|
|
9370
|
+
if (code === 0) resolve4({ canSandbox: true });
|
|
9371
|
+
else resolve4({ canSandbox: false, reason: stderr.trim() || `bwrap exited ${code}` });
|
|
8093
9372
|
});
|
|
8094
9373
|
});
|
|
8095
9374
|
})();
|
|
@@ -8135,7 +9414,7 @@ async function downloadSkill(skill, skillsDirectory, config) {
|
|
|
8135
9414
|
}
|
|
8136
9415
|
}
|
|
8137
9416
|
function isArtifactPath(absPath, sessionDir) {
|
|
8138
|
-
const resolved =
|
|
9417
|
+
const resolved = resolve3(absPath);
|
|
8139
9418
|
const rel = relative(sessionDir, resolved);
|
|
8140
9419
|
if (!rel || rel.startsWith("..")) return false;
|
|
8141
9420
|
const first = rel.split("/")[0];
|
|
@@ -8194,7 +9473,7 @@ async function restoreArtifactsFromS3(sessionDir, sessionId, userId, config) {
|
|
|
8194
9473
|
async function downloadKeyIntoSandbox(opts) {
|
|
8195
9474
|
const { sessionId, userId, fullS3Key, config } = opts;
|
|
8196
9475
|
const sessionDir = join3("/tmp", "exulu-sessions", sessionId);
|
|
8197
|
-
if (!
|
|
9476
|
+
if (!existsSync4(sessionDir)) {
|
|
8198
9477
|
return { written: false };
|
|
8199
9478
|
}
|
|
8200
9479
|
const userPrefix = `user_${userId}/sessions/${sessionId}/`;
|
|
@@ -8228,7 +9507,7 @@ async function createSessionSandbox(sessionId, skills, config, userId) {
|
|
|
8228
9507
|
return cached.handle;
|
|
8229
9508
|
}
|
|
8230
9509
|
const sessionDir = join3("/tmp", "exulu-sessions", sessionId);
|
|
8231
|
-
const dirExisted =
|
|
9510
|
+
const dirExisted = existsSync4(sessionDir);
|
|
8232
9511
|
await mkdir2(sessionDir, { recursive: true });
|
|
8233
9512
|
const skillsDirectory = join3(sessionDir, "skills");
|
|
8234
9513
|
const installedSkills = /* @__PURE__ */ new Map();
|
|
@@ -8355,7 +9634,7 @@ Probe error: ${probe.reason ?? "(no detail)"}`
|
|
|
8355
9634
|
if (!persistenceEnabled || !isArtifactPath(absPath, sessionDir)) {
|
|
8356
9635
|
return {};
|
|
8357
9636
|
}
|
|
8358
|
-
const rel = relative(sessionDir,
|
|
9637
|
+
const rel = relative(sessionDir, resolve3(absPath));
|
|
8359
9638
|
const s3Key = artifactS3Key(sessionId, rel);
|
|
8360
9639
|
const out = {};
|
|
8361
9640
|
try {
|
|
@@ -8647,7 +9926,7 @@ var hydrateVariables = async (tool6) => {
|
|
|
8647
9926
|
await Promise.all(promises);
|
|
8648
9927
|
return tool6;
|
|
8649
9928
|
};
|
|
8650
|
-
var convertExuluToolsToAiSdkTools = async (currentTools, currentSkills, approvedTools, allExuluTools, configs, providerapikey, contexts,
|
|
9929
|
+
var convertExuluToolsToAiSdkTools = async (currentTools, currentSkills, approvedTools, allExuluTools, configs, providerapikey, contexts, user, exuluConfig, sessionID, req, project, sessionItems, model, agent, memoryItems) => {
|
|
8651
9930
|
if (!currentTools) return {};
|
|
8652
9931
|
if (!allExuluTools) {
|
|
8653
9932
|
allExuluTools = [];
|
|
@@ -8715,7 +9994,6 @@ var convertExuluToolsToAiSdkTools = async (currentTools, currentSkills, approved
|
|
|
8715
9994
|
const agenticSearchTool = createAgenticRetrievalToolV3({
|
|
8716
9995
|
contexts: contexts.filter((context) => context.id !== agent?.memory),
|
|
8717
9996
|
// dont include the agents memory in the agentic search tool!
|
|
8718
|
-
rerankers: rerankers || [],
|
|
8719
9997
|
user,
|
|
8720
9998
|
role: user?.role?.id,
|
|
8721
9999
|
model,
|
|
@@ -8911,6 +10189,7 @@ export {
|
|
|
8911
10189
|
postgresClient,
|
|
8912
10190
|
authentication,
|
|
8913
10191
|
STATISTICS_TYPE_ENUM,
|
|
10192
|
+
resolveLiteLLMConfigPath,
|
|
8914
10193
|
getPresignedUrl,
|
|
8915
10194
|
uploadFile,
|
|
8916
10195
|
listS3ObjectsByPrefix,
|
|
@@ -8923,17 +10202,15 @@ export {
|
|
|
8923
10202
|
createUppyRoutes,
|
|
8924
10203
|
ExuluStorage,
|
|
8925
10204
|
sanitizeName,
|
|
8926
|
-
|
|
8927
|
-
|
|
8928
|
-
|
|
8929
|
-
|
|
8930
|
-
|
|
8931
|
-
convertContextToTableDefinition,
|
|
8932
|
-
updateStatistic,
|
|
8933
|
-
checkRecordAccess,
|
|
10205
|
+
ExuluTokenizer,
|
|
10206
|
+
Chunk,
|
|
10207
|
+
BaseChunker,
|
|
10208
|
+
SentenceChunker,
|
|
10209
|
+
defaultChunker,
|
|
8934
10210
|
LITELLM_UI_PATH,
|
|
8935
10211
|
isLiteLLMEnabled,
|
|
8936
10212
|
setLiteLLMPackageRoot,
|
|
10213
|
+
enableLiteLLMClientMode,
|
|
8937
10214
|
startLiteLLMSupervisor,
|
|
8938
10215
|
waitForLiteLLMReady,
|
|
8939
10216
|
buildTags,
|
|
@@ -8951,9 +10228,20 @@ export {
|
|
|
8951
10228
|
getTagBudgetMap,
|
|
8952
10229
|
provisionDefaultUserBudget,
|
|
8953
10230
|
getUserBudgetView,
|
|
10231
|
+
updateStatistic,
|
|
10232
|
+
applySorting,
|
|
10233
|
+
applyAccessControl,
|
|
10234
|
+
applyFilters,
|
|
10235
|
+
checkLicense,
|
|
10236
|
+
coreSchemas,
|
|
10237
|
+
convertContextToTableDefinition,
|
|
10238
|
+
setEntityModelSetting,
|
|
10239
|
+
resolveEntityModel,
|
|
10240
|
+
checkRecordAccess,
|
|
8954
10241
|
ResolveModelError,
|
|
8955
10242
|
resolveModel,
|
|
8956
10243
|
exuluApp,
|
|
10244
|
+
getEntitiesForItem,
|
|
8957
10245
|
ensureEntityTables,
|
|
8958
10246
|
vectorSearch,
|
|
8959
10247
|
mapType,
|
|
@@ -8961,6 +10249,7 @@ export {
|
|
|
8961
10249
|
getTableName,
|
|
8962
10250
|
getChunksTableName,
|
|
8963
10251
|
ExuluContext2 as ExuluContext,
|
|
10252
|
+
resolveReranker,
|
|
8964
10253
|
oauthRegistry,
|
|
8965
10254
|
oauthTokenStore,
|
|
8966
10255
|
OAUTH_CALLBACK_PATH,
|