@exulu/backend 1.53.0 → 1.53.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +183 -170
- package/dist/index.js +172 -159
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -805,7 +805,7 @@ var JOB_STATUS_ENUM = {
|
|
|
805
805
|
};
|
|
806
806
|
|
|
807
807
|
// ee/agentic-retrieval/index.ts
|
|
808
|
-
var
|
|
808
|
+
var import_zod6 = require("zod");
|
|
809
809
|
var import_ai2 = require("ai");
|
|
810
810
|
|
|
811
811
|
// src/uppy/index.ts
|
|
@@ -3229,7 +3229,7 @@ var mapType = (t, type, name, defaultValue, unique) => {
|
|
|
3229
3229
|
|
|
3230
3230
|
// src/exulu/tool.ts
|
|
3231
3231
|
var import_ai = require("ai");
|
|
3232
|
-
var
|
|
3232
|
+
var import_zod4 = require("zod");
|
|
3233
3233
|
var import_crypto_js2 = __toESM(require("crypto-js"), 1);
|
|
3234
3234
|
|
|
3235
3235
|
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
@@ -3445,6 +3445,102 @@ function sanitizeToolName(name) {
|
|
|
3445
3445
|
|
|
3446
3446
|
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
3447
3447
|
var import_node_crypto2 = require("crypto");
|
|
3448
|
+
|
|
3449
|
+
// src/templates/tools/memory-tool.ts
|
|
3450
|
+
var import_zod3 = require("zod");
|
|
3451
|
+
var import_fs = __toESM(require("fs"), 1);
|
|
3452
|
+
var createNewMemoryItemTool = (agent, context) => {
|
|
3453
|
+
const fields = {
|
|
3454
|
+
name: import_zod3.z.string().describe("The name of the item to create"),
|
|
3455
|
+
description: import_zod3.z.string().describe("The description of the item to create")
|
|
3456
|
+
};
|
|
3457
|
+
for (const field of context.fields) {
|
|
3458
|
+
switch (field.type) {
|
|
3459
|
+
case "text":
|
|
3460
|
+
case "longText":
|
|
3461
|
+
case "shortText":
|
|
3462
|
+
case "code":
|
|
3463
|
+
case "enum":
|
|
3464
|
+
fields[field.name] = import_zod3.z.string().describe("The " + field.name + " of the item to create");
|
|
3465
|
+
break;
|
|
3466
|
+
case "json":
|
|
3467
|
+
fields[field.name] = import_zod3.z.string({}).describe(
|
|
3468
|
+
"The " + field.name + " of the item to create, it should be a valid JSON string."
|
|
3469
|
+
);
|
|
3470
|
+
break;
|
|
3471
|
+
case "markdown":
|
|
3472
|
+
fields[field.name] = import_zod3.z.string().describe(
|
|
3473
|
+
"The " + field.name + " of the item to create, it should be a valid Markdown string."
|
|
3474
|
+
);
|
|
3475
|
+
break;
|
|
3476
|
+
case "number":
|
|
3477
|
+
fields[field.name] = import_zod3.z.number().describe("The " + field.name + " of the item to create");
|
|
3478
|
+
break;
|
|
3479
|
+
case "boolean":
|
|
3480
|
+
fields[field.name] = import_zod3.z.boolean().describe("The " + field.name + " of the item to create");
|
|
3481
|
+
break;
|
|
3482
|
+
case "file":
|
|
3483
|
+
case "uuid":
|
|
3484
|
+
case "date":
|
|
3485
|
+
break;
|
|
3486
|
+
default:
|
|
3487
|
+
fields[field.name] = import_zod3.z.string().describe("The " + field.name + " of the item to create");
|
|
3488
|
+
break;
|
|
3489
|
+
}
|
|
3490
|
+
}
|
|
3491
|
+
const toolName = "create_" + sanitizeName(context.name) + "_memory_item";
|
|
3492
|
+
return new ExuluTool({
|
|
3493
|
+
id: toolName,
|
|
3494
|
+
name: "Create " + context.name + " Memory Item",
|
|
3495
|
+
category: agent.name + "_memory",
|
|
3496
|
+
description: "Create a new memory item in the " + agent.name + " memory context",
|
|
3497
|
+
type: "function",
|
|
3498
|
+
inputSchema: import_zod3.z.object(fields),
|
|
3499
|
+
config: [],
|
|
3500
|
+
execute: async ({ name, description, mode, information, exuluConfig, user }) => {
|
|
3501
|
+
let result = { result: "" };
|
|
3502
|
+
import_fs.default.writeFileSync("memory-tool.json", JSON.stringify({ name, description, information }, null, 2));
|
|
3503
|
+
try {
|
|
3504
|
+
const newItem = {
|
|
3505
|
+
name,
|
|
3506
|
+
description,
|
|
3507
|
+
information,
|
|
3508
|
+
rights_mode: "public"
|
|
3509
|
+
};
|
|
3510
|
+
const { item: createdItem, job: createdJob } = await context.createItem(
|
|
3511
|
+
newItem,
|
|
3512
|
+
exuluConfig,
|
|
3513
|
+
user?.id,
|
|
3514
|
+
user?.role?.id,
|
|
3515
|
+
false
|
|
3516
|
+
);
|
|
3517
|
+
import_fs.default.writeFileSync("memory-tool-created.json", JSON.stringify({ createdItem, createdJob }, null, 2));
|
|
3518
|
+
if (createdJob) {
|
|
3519
|
+
result = {
|
|
3520
|
+
result: `Created a Job to create the memory item with the following ID: ${createdJob}`
|
|
3521
|
+
};
|
|
3522
|
+
} else if (createdItem) {
|
|
3523
|
+
result = {
|
|
3524
|
+
result: `Created memory item with the following ID: ${createdItem.id}`
|
|
3525
|
+
};
|
|
3526
|
+
} else {
|
|
3527
|
+
result = {
|
|
3528
|
+
result: `Failed to create memory item`
|
|
3529
|
+
};
|
|
3530
|
+
}
|
|
3531
|
+
} catch (error) {
|
|
3532
|
+
import_fs.default.writeFileSync("memory-tool-error.json", JSON.stringify({ name, description, information, error }, null, 2));
|
|
3533
|
+
console.error("[EXULU] Error creating memory item", error);
|
|
3534
|
+
result = {
|
|
3535
|
+
result: `Failed to create memory item: ${error instanceof Error ? error.message : String(error)}`
|
|
3536
|
+
};
|
|
3537
|
+
}
|
|
3538
|
+
return result;
|
|
3539
|
+
}
|
|
3540
|
+
});
|
|
3541
|
+
};
|
|
3542
|
+
|
|
3543
|
+
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
3448
3544
|
var generateS3Key = (filename) => `${(0, import_node_crypto2.randomUUID)()}-${filename}`;
|
|
3449
3545
|
var s3Client2;
|
|
3450
3546
|
var getMimeType = (type) => {
|
|
@@ -3548,6 +3644,21 @@ var convertExuluToolsToAiSdkTools = async (currentTools, approvedTools, allExulu
|
|
|
3548
3644
|
currentTools.push(projectRetrievalTool);
|
|
3549
3645
|
}
|
|
3550
3646
|
}
|
|
3647
|
+
if (agent?.memory && contexts?.length) {
|
|
3648
|
+
const context = contexts.find((context2) => context2.id === agent?.memory);
|
|
3649
|
+
if (!context) {
|
|
3650
|
+
throw new Error(
|
|
3651
|
+
"Context was set for agent memory but not found in the contexts: " + agent?.memory + " please double check with a developer to see if the context was removed from code."
|
|
3652
|
+
);
|
|
3653
|
+
}
|
|
3654
|
+
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
3655
|
+
if (createNewMemoryTool) {
|
|
3656
|
+
if (!currentTools) {
|
|
3657
|
+
currentTools = [];
|
|
3658
|
+
}
|
|
3659
|
+
currentTools.push(createNewMemoryTool);
|
|
3660
|
+
}
|
|
3661
|
+
}
|
|
3551
3662
|
console.log("[EXULU] Convert tools array to object, session items", items);
|
|
3552
3663
|
if (items) {
|
|
3553
3664
|
const sessionItemsRetrievalTool = await createSessionItemsRetrievalTool({
|
|
@@ -3793,7 +3904,7 @@ var ExuluTool = class {
|
|
|
3793
3904
|
this.type = type;
|
|
3794
3905
|
this.tool = (0, import_ai.tool)({
|
|
3795
3906
|
description,
|
|
3796
|
-
inputSchema: inputSchema ||
|
|
3907
|
+
inputSchema: inputSchema || import_zod4.z.object({}),
|
|
3797
3908
|
execute: execute2
|
|
3798
3909
|
});
|
|
3799
3910
|
}
|
|
@@ -3886,7 +3997,7 @@ var ExuluTool = class {
|
|
|
3886
3997
|
};
|
|
3887
3998
|
|
|
3888
3999
|
// src/exulu/context.ts
|
|
3889
|
-
var
|
|
4000
|
+
var import_zod5 = require("zod");
|
|
3890
4001
|
|
|
3891
4002
|
// ee/queues/decorator.ts
|
|
3892
4003
|
var import_bullmq2 = require("bullmq");
|
|
@@ -4608,12 +4719,12 @@ var ExuluContext2 = class {
|
|
|
4608
4719
|
category: "contexts",
|
|
4609
4720
|
needsApproval: true,
|
|
4610
4721
|
// todo make configurable
|
|
4611
|
-
inputSchema:
|
|
4612
|
-
query:
|
|
4613
|
-
keywords:
|
|
4722
|
+
inputSchema: import_zod5.z.object({
|
|
4723
|
+
query: import_zod5.z.string().describe("The original question that the user asked"),
|
|
4724
|
+
keywords: import_zod5.z.array(import_zod5.z.string()).describe(
|
|
4614
4725
|
"The keywords that are relevant to the user's question, for example names of specific products, systems or parts, IDs, etc."
|
|
4615
4726
|
),
|
|
4616
|
-
method:
|
|
4727
|
+
method: import_zod5.z.enum(["keyword", "semantic", "hybrid"]).default("hybrid").describe(
|
|
4617
4728
|
"Search method: 'hybrid' (best for most queries - combines semantic understanding with exact term matching), 'keyword' (best for exact terms, technical names, IDs, or specific phrases), 'semantic' (best for conceptual queries where synonyms and paraphrasing matter)"
|
|
4618
4729
|
)
|
|
4619
4730
|
}),
|
|
@@ -4824,11 +4935,11 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4824
4935
|
return await (0, import_ai2.generateText)({
|
|
4825
4936
|
model,
|
|
4826
4937
|
output: import_ai2.Output.object({
|
|
4827
|
-
schema:
|
|
4828
|
-
reasoning:
|
|
4938
|
+
schema: import_zod6.z.object({
|
|
4939
|
+
reasoning: import_zod6.z.string().describe(
|
|
4829
4940
|
"The reasoning for the next step and why the agent needs to take this step. It MUST start with 'I must call tool XYZ', and MUST include the inputs for that tool."
|
|
4830
4941
|
),
|
|
4831
|
-
finished:
|
|
4942
|
+
finished: import_zod6.z.boolean().describe(
|
|
4832
4943
|
"Whether the agent has finished meaning no further steps are needed, this should only be true if the agent believes no further tool calls are needed to get the relevant information for the query."
|
|
4833
4944
|
)
|
|
4834
4945
|
})
|
|
@@ -4958,9 +5069,9 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4958
5069
|
if (chunksCount > 1) {
|
|
4959
5070
|
dynamicTools[getMoreToolName] = (0, import_ai2.tool)({
|
|
4960
5071
|
description: `The item ${chunk.item_name} has a total of${chunksCount} chunks, this tool allows you to get more content from this item across all its pages / chunks.`,
|
|
4961
|
-
inputSchema:
|
|
4962
|
-
from_index:
|
|
4963
|
-
to_index:
|
|
5072
|
+
inputSchema: import_zod6.z.object({
|
|
5073
|
+
from_index: import_zod6.z.number().default(1).describe("The index of the chunk to start from."),
|
|
5074
|
+
to_index: import_zod6.z.number().max(chunksCount).describe("The index of the chunk to end at, max is " + chunksCount)
|
|
4964
5075
|
}),
|
|
4965
5076
|
execute: async ({ from_index, to_index }) => {
|
|
4966
5077
|
const chunks2 = await db2(chunksTable).select("*").where("source", chunk.item_id).whereBetween("chunk_index", [from_index, to_index]).orderBy("chunk_index", "asc");
|
|
@@ -4988,8 +5099,8 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4988
5099
|
);
|
|
4989
5100
|
dynamicTools[getContentToolName] = (0, import_ai2.tool)({
|
|
4990
5101
|
description: `Get the content of the page ${chunk.chunk_index} for the item ${chunk.item_name}`,
|
|
4991
|
-
inputSchema:
|
|
4992
|
-
reasoning:
|
|
5102
|
+
inputSchema: import_zod6.z.object({
|
|
5103
|
+
reasoning: import_zod6.z.string().describe("The reasoning for why you need to get the content of the page.")
|
|
4993
5104
|
}),
|
|
4994
5105
|
execute: async ({ reasoning }) => {
|
|
4995
5106
|
const { db: db3 } = await postgresClient();
|
|
@@ -5057,8 +5168,8 @@ var createAgenticRetrievalAgent = ({
|
|
|
5057
5168
|
search_items_by_name: (0, import_ai2.tool)({
|
|
5058
5169
|
description: `
|
|
5059
5170
|
Search for relevant items by name across the available knowledge bases.`,
|
|
5060
|
-
inputSchema:
|
|
5061
|
-
knowledge_base_ids:
|
|
5171
|
+
inputSchema: import_zod6.z.object({
|
|
5172
|
+
knowledge_base_ids: import_zod6.z.array(import_zod6.z.enum(contexts.map((ctx) => ctx.id))).describe(`
|
|
5062
5173
|
The available knowledge bases are:
|
|
5063
5174
|
${contexts.map(
|
|
5064
5175
|
(ctx) => `
|
|
@@ -5070,8 +5181,8 @@ var createAgenticRetrievalAgent = ({
|
|
|
5070
5181
|
`
|
|
5071
5182
|
).join("\n")}
|
|
5072
5183
|
`),
|
|
5073
|
-
item_name:
|
|
5074
|
-
limit:
|
|
5184
|
+
item_name: import_zod6.z.string().describe("The name of the item to search for."),
|
|
5185
|
+
limit: import_zod6.z.number().default(100).describe(
|
|
5075
5186
|
"Maximum number of items to return (max 400), if searching through multiple knowledge bases, the limit is applied for each knowledge base individually."
|
|
5076
5187
|
)
|
|
5077
5188
|
}),
|
|
@@ -5177,11 +5288,11 @@ var createAgenticRetrievalAgent = ({
|
|
|
5177
5288
|
- You can always fetch content later if needed
|
|
5178
5289
|
|
|
5179
5290
|
`,
|
|
5180
|
-
inputSchema:
|
|
5181
|
-
query:
|
|
5291
|
+
inputSchema: import_zod6.z.object({
|
|
5292
|
+
query: import_zod6.z.string().describe(
|
|
5182
5293
|
"The search query to find relevant chunks, this must always be related to the content you are looking for, not something like 'Page 2'."
|
|
5183
5294
|
),
|
|
5184
|
-
knowledge_base_ids:
|
|
5295
|
+
knowledge_base_ids: import_zod6.z.array(import_zod6.z.enum(contexts.map((ctx) => ctx.id))).describe(`
|
|
5185
5296
|
The available knowledge bases are:
|
|
5186
5297
|
${contexts.map(
|
|
5187
5298
|
(ctx) => `
|
|
@@ -5193,25 +5304,25 @@ var createAgenticRetrievalAgent = ({
|
|
|
5193
5304
|
`
|
|
5194
5305
|
).join("\n")}
|
|
5195
5306
|
`),
|
|
5196
|
-
keywords:
|
|
5307
|
+
keywords: import_zod6.z.array(import_zod6.z.string()).optional().describe(
|
|
5197
5308
|
"Keywords to search for. Usually extracted from the query, allowing for more precise search results."
|
|
5198
5309
|
),
|
|
5199
|
-
searchMethod:
|
|
5310
|
+
searchMethod: import_zod6.z.enum(["keyword", "semantic", "hybrid"]).default("hybrid").describe(
|
|
5200
5311
|
"Search method: 'hybrid' (best for most queries - combines semantic understanding with exact term matching), 'keyword' (best for exact terms, technical names, IDs, or specific phrases), 'semantic' (best for conceptual queries where synonyms and paraphrasing matter)"
|
|
5201
5312
|
),
|
|
5202
|
-
includeContent:
|
|
5313
|
+
includeContent: import_zod6.z.boolean().default(true).describe(
|
|
5203
5314
|
"Whether to include the full chunk content in results. Set to FALSE when you only need to know WHICH documents/items are relevant (lists, overviews, counts). Set to TRUE when you need the ACTUAL content to answer the question (information, details, explanations). You can always fetch content later, so prefer FALSE for efficiency when listing documents."
|
|
5204
5315
|
),
|
|
5205
|
-
item_ids:
|
|
5316
|
+
item_ids: import_zod6.z.array(import_zod6.z.string()).optional().describe(
|
|
5206
5317
|
"Use if you wish to retrieve content from specific items (documents) based on the item ID."
|
|
5207
5318
|
),
|
|
5208
|
-
item_names:
|
|
5319
|
+
item_names: import_zod6.z.array(import_zod6.z.string()).optional().describe(
|
|
5209
5320
|
"Use if you wish to retrieve content from specific items (documents) based on the item name. Can be a partial match."
|
|
5210
5321
|
),
|
|
5211
|
-
item_external_ids:
|
|
5322
|
+
item_external_ids: import_zod6.z.array(import_zod6.z.string()).optional().describe(
|
|
5212
5323
|
"Use if you wish to retrieve content from specific items (documents) based on the item external ID. Can be a partial match."
|
|
5213
5324
|
),
|
|
5214
|
-
limit:
|
|
5325
|
+
limit: import_zod6.z.number().default(10).describe("Maximum number of chunks to return (max 10)")
|
|
5215
5326
|
}),
|
|
5216
5327
|
execute: async ({
|
|
5217
5328
|
query,
|
|
@@ -5402,9 +5513,9 @@ var createAgenticRetrievalTool = ({
|
|
|
5402
5513
|
default: true
|
|
5403
5514
|
}))
|
|
5404
5515
|
],
|
|
5405
|
-
inputSchema:
|
|
5406
|
-
query:
|
|
5407
|
-
userInstructions:
|
|
5516
|
+
inputSchema: import_zod6.z.object({
|
|
5517
|
+
query: import_zod6.z.string().describe("The question or query to answer using the knowledge bases"),
|
|
5518
|
+
userInstructions: import_zod6.z.string().optional().describe("Instructions provided by the user to customize the retrieval process.")
|
|
5408
5519
|
}),
|
|
5409
5520
|
execute: async function* ({
|
|
5410
5521
|
query,
|
|
@@ -9897,7 +10008,7 @@ var import_utils5 = require("@apollo/utils.keyvaluecache");
|
|
|
9897
10008
|
var import_body_parser = __toESM(require("body-parser"), 1);
|
|
9898
10009
|
var import_crypto_js6 = __toESM(require("crypto-js"), 1);
|
|
9899
10010
|
var import_openai = __toESM(require("openai"), 1);
|
|
9900
|
-
var
|
|
10011
|
+
var import_fs2 = __toESM(require("fs"), 1);
|
|
9901
10012
|
var import_node_crypto4 = require("crypto");
|
|
9902
10013
|
var import_api2 = require("@opentelemetry/api");
|
|
9903
10014
|
var import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
@@ -9946,98 +10057,6 @@ function generateSlug(name) {
|
|
|
9946
10057
|
// src/exulu/provider.ts
|
|
9947
10058
|
var import_crypto_js5 = __toESM(require("crypto-js"), 1);
|
|
9948
10059
|
var import_officeparser = require("officeparser");
|
|
9949
|
-
|
|
9950
|
-
// src/templates/tools/memory-tool.ts
|
|
9951
|
-
var import_zod6 = require("zod");
|
|
9952
|
-
var createNewMemoryItemTool = (agent, context) => {
|
|
9953
|
-
const fields = {
|
|
9954
|
-
name: import_zod6.z.string().describe("The name of the item to create"),
|
|
9955
|
-
description: import_zod6.z.string().describe("The description of the item to create")
|
|
9956
|
-
};
|
|
9957
|
-
for (const field of context.fields) {
|
|
9958
|
-
switch (field.type) {
|
|
9959
|
-
case "text":
|
|
9960
|
-
case "longText":
|
|
9961
|
-
case "shortText":
|
|
9962
|
-
case "code":
|
|
9963
|
-
case "enum":
|
|
9964
|
-
fields[field.name] = import_zod6.z.string().describe("The " + field.name + " of the item to create");
|
|
9965
|
-
break;
|
|
9966
|
-
case "json":
|
|
9967
|
-
fields[field.name] = import_zod6.z.string({}).describe(
|
|
9968
|
-
"The " + field.name + " of the item to create, it should be a valid JSON string."
|
|
9969
|
-
);
|
|
9970
|
-
break;
|
|
9971
|
-
case "markdown":
|
|
9972
|
-
fields[field.name] = import_zod6.z.string().describe(
|
|
9973
|
-
"The " + field.name + " of the item to create, it should be a valid Markdown string."
|
|
9974
|
-
);
|
|
9975
|
-
break;
|
|
9976
|
-
case "number":
|
|
9977
|
-
fields[field.name] = import_zod6.z.number().describe("The " + field.name + " of the item to create");
|
|
9978
|
-
break;
|
|
9979
|
-
case "boolean":
|
|
9980
|
-
fields[field.name] = import_zod6.z.boolean().describe("The " + field.name + " of the item to create");
|
|
9981
|
-
break;
|
|
9982
|
-
case "file":
|
|
9983
|
-
case "uuid":
|
|
9984
|
-
case "date":
|
|
9985
|
-
break;
|
|
9986
|
-
default:
|
|
9987
|
-
fields[field.name] = import_zod6.z.string().describe("The " + field.name + " of the item to create");
|
|
9988
|
-
break;
|
|
9989
|
-
}
|
|
9990
|
-
}
|
|
9991
|
-
return new ExuluTool({
|
|
9992
|
-
id: "create_" + agent.name + "_memory_item",
|
|
9993
|
-
name: "Create " + agent.name + " Memory Item",
|
|
9994
|
-
category: agent.name + "_memory",
|
|
9995
|
-
description: "Create a new memory item in the " + agent.name + " memory context",
|
|
9996
|
-
type: "function",
|
|
9997
|
-
inputSchema: import_zod6.z.object(fields),
|
|
9998
|
-
config: [],
|
|
9999
|
-
execute: async ({ name, description, mode, information, exuluConfig, user }) => {
|
|
10000
|
-
let result = { result: "" };
|
|
10001
|
-
switch (mode) {
|
|
10002
|
-
case "learnings":
|
|
10003
|
-
break;
|
|
10004
|
-
case "knowledge":
|
|
10005
|
-
const newItem = {
|
|
10006
|
-
name,
|
|
10007
|
-
description,
|
|
10008
|
-
information,
|
|
10009
|
-
rights_mode: "public"
|
|
10010
|
-
};
|
|
10011
|
-
const { item: createdItem, job: createdJob } = await context.createItem(
|
|
10012
|
-
newItem,
|
|
10013
|
-
exuluConfig,
|
|
10014
|
-
user?.id,
|
|
10015
|
-
user?.role?.id,
|
|
10016
|
-
false
|
|
10017
|
-
);
|
|
10018
|
-
if (createdJob) {
|
|
10019
|
-
result = {
|
|
10020
|
-
result: `Created a Job to create the memory item with the following ID: ${createdJob}`
|
|
10021
|
-
};
|
|
10022
|
-
} else if (createdItem) {
|
|
10023
|
-
result = {
|
|
10024
|
-
result: `Created memory item with the following ID: ${createdItem.id}`
|
|
10025
|
-
};
|
|
10026
|
-
} else {
|
|
10027
|
-
result = {
|
|
10028
|
-
result: `Failed to create memory item`
|
|
10029
|
-
};
|
|
10030
|
-
}
|
|
10031
|
-
break;
|
|
10032
|
-
default:
|
|
10033
|
-
throw new Error(`Invalid mode: ${mode}`);
|
|
10034
|
-
}
|
|
10035
|
-
return result;
|
|
10036
|
-
}
|
|
10037
|
-
});
|
|
10038
|
-
};
|
|
10039
|
-
|
|
10040
|
-
// src/exulu/provider.ts
|
|
10041
10060
|
var ExuluProvider = class {
|
|
10042
10061
|
// Must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or
|
|
10043
10062
|
// underscores and be a max length of 80 characters and at least 5 characters long.
|
|
@@ -10297,13 +10316,6 @@ var ExuluProvider = class {
|
|
|
10297
10316
|
|
|
10298
10317
|
${result.chunks.map((chunk) => chunk.chunk_content).join("\n\n")}`;
|
|
10299
10318
|
}
|
|
10300
|
-
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
10301
|
-
if (createNewMemoryTool) {
|
|
10302
|
-
if (!currentTools) {
|
|
10303
|
-
currentTools = [];
|
|
10304
|
-
}
|
|
10305
|
-
currentTools.push(createNewMemoryTool);
|
|
10306
|
-
}
|
|
10307
10319
|
}
|
|
10308
10320
|
const personalizationInformation = exuluConfig?.privacy?.systemPromptPersonalization !== false ? `
|
|
10309
10321
|
${user?.firstname ? `The users first name is "${user.firstname}"` : ""}
|
|
@@ -10367,6 +10379,10 @@ var ExuluProvider = class {
|
|
|
10367
10379
|
Example: {url: https://www.google.com, title: Google, snippet: The result of the web search.}
|
|
10368
10380
|
`;
|
|
10369
10381
|
}
|
|
10382
|
+
system += `
|
|
10383
|
+
|
|
10384
|
+
When a tool execution is not approved by the user, do not retry it unless explicitly asked by the user. ' +
|
|
10385
|
+
'Inform the user that the action was not performed.`;
|
|
10370
10386
|
if (prompt) {
|
|
10371
10387
|
let result = { object: null, text: "" };
|
|
10372
10388
|
let inputTokens = 0;
|
|
@@ -10681,13 +10697,6 @@ ${extractedText}
|
|
|
10681
10697
|
|
|
10682
10698
|
${result2.chunks.map((chunk) => chunk.chunk_content).join("\n\n")}`;
|
|
10683
10699
|
}
|
|
10684
|
-
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
10685
|
-
if (createNewMemoryTool) {
|
|
10686
|
-
if (!currentTools) {
|
|
10687
|
-
currentTools = [];
|
|
10688
|
-
}
|
|
10689
|
-
currentTools.push(createNewMemoryTool);
|
|
10690
|
-
}
|
|
10691
10700
|
}
|
|
10692
10701
|
messages = messages.filter(
|
|
10693
10702
|
(message2, index, self) => index === self.findLastIndex((t) => t.id === message2.id)
|
|
@@ -10746,6 +10755,10 @@ ${extractedText}
|
|
|
10746
10755
|
Example: {url: https://www.google.com, title: Google, snippet: The result of the web search.}
|
|
10747
10756
|
`;
|
|
10748
10757
|
}
|
|
10758
|
+
system += `
|
|
10759
|
+
|
|
10760
|
+
When a tool execution is not approved by the user, do not retry it unless explicitly asked by the user. ' +
|
|
10761
|
+
'Inform the user that the action was not performed.`;
|
|
10749
10762
|
const result = (0, import_ai4.streamText)({
|
|
10750
10763
|
model,
|
|
10751
10764
|
// Should be a LanguageModelV1
|
|
@@ -10894,7 +10907,7 @@ var REQUEST_SIZE_LIMIT = "50mb";
|
|
|
10894
10907
|
var getExuluVersionNumber = async () => {
|
|
10895
10908
|
try {
|
|
10896
10909
|
const path2 = process.cwd();
|
|
10897
|
-
const packageJson =
|
|
10910
|
+
const packageJson = import_fs2.default.readFileSync(path2 + "/package.json", "utf8");
|
|
10898
10911
|
const packageData = JSON.parse(packageJson);
|
|
10899
10912
|
const exuluVersion = packageData.dependencies["@exulu/backend"];
|
|
10900
10913
|
console.log(`[EXULU] Installed exulu-backend version: ${exuluVersion}`);
|
|
@@ -16261,7 +16274,7 @@ var MarkdownChunker = class {
|
|
|
16261
16274
|
var import_child_process = require("child_process");
|
|
16262
16275
|
var import_util2 = require("util");
|
|
16263
16276
|
var import_path = require("path");
|
|
16264
|
-
var
|
|
16277
|
+
var import_fs3 = require("fs");
|
|
16265
16278
|
var import_url = require("url");
|
|
16266
16279
|
var execAsync = (0, import_util2.promisify)(import_child_process.exec);
|
|
16267
16280
|
function getPackageRoot() {
|
|
@@ -16271,9 +16284,9 @@ function getPackageRoot() {
|
|
|
16271
16284
|
const maxAttempts = 10;
|
|
16272
16285
|
while (attempts < maxAttempts) {
|
|
16273
16286
|
const packageJsonPath = (0, import_path.join)(currentDir, "package.json");
|
|
16274
|
-
if ((0,
|
|
16287
|
+
if ((0, import_fs3.existsSync)(packageJsonPath)) {
|
|
16275
16288
|
try {
|
|
16276
|
-
const packageJson = JSON.parse((0,
|
|
16289
|
+
const packageJson = JSON.parse((0, import_fs3.readFileSync)(packageJsonPath, "utf-8"));
|
|
16277
16290
|
if (packageJson.name === "@exulu/backend") {
|
|
16278
16291
|
return currentDir;
|
|
16279
16292
|
}
|
|
@@ -16300,7 +16313,7 @@ function isPythonEnvironmentSetup(packageRoot) {
|
|
|
16300
16313
|
const root = packageRoot ?? getPackageRoot();
|
|
16301
16314
|
const venvPath = getVenvPath(root);
|
|
16302
16315
|
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
16303
|
-
return (0,
|
|
16316
|
+
return (0, import_fs3.existsSync)(venvPath) && (0, import_fs3.existsSync)(pythonPath);
|
|
16304
16317
|
}
|
|
16305
16318
|
async function setupPythonEnvironment(options = {}) {
|
|
16306
16319
|
const {
|
|
@@ -16321,7 +16334,7 @@ async function setupPythonEnvironment(options = {}) {
|
|
|
16321
16334
|
};
|
|
16322
16335
|
}
|
|
16323
16336
|
const setupScriptPath = getSetupScriptPath(packageRoot);
|
|
16324
|
-
if (!(0,
|
|
16337
|
+
if (!(0, import_fs3.existsSync)(setupScriptPath)) {
|
|
16325
16338
|
return {
|
|
16326
16339
|
success: false,
|
|
16327
16340
|
message: `Setup script not found at: ${setupScriptPath}`,
|
|
@@ -16403,13 +16416,13 @@ async function validatePythonEnvironment(packageRoot, checkPackages = true) {
|
|
|
16403
16416
|
const root = packageRoot ?? getPackageRoot();
|
|
16404
16417
|
const venvPath = getVenvPath(root);
|
|
16405
16418
|
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
16406
|
-
if (!(0,
|
|
16419
|
+
if (!(0, import_fs3.existsSync)(venvPath)) {
|
|
16407
16420
|
return {
|
|
16408
16421
|
valid: false,
|
|
16409
16422
|
message: getPythonSetupInstructions()
|
|
16410
16423
|
};
|
|
16411
16424
|
}
|
|
16412
|
-
if (!(0,
|
|
16425
|
+
if (!(0, import_fs3.existsSync)(pythonPath)) {
|
|
16413
16426
|
return {
|
|
16414
16427
|
valid: false,
|
|
16415
16428
|
message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
@@ -16461,7 +16474,7 @@ Or manually run the setup script:
|
|
|
16461
16474
|
}
|
|
16462
16475
|
|
|
16463
16476
|
// ee/python/documents/processing/doc_processor.ts
|
|
16464
|
-
var
|
|
16477
|
+
var fs3 = __toESM(require("fs"), 1);
|
|
16465
16478
|
var path = __toESM(require("path"), 1);
|
|
16466
16479
|
var import_ai7 = require("ai");
|
|
16467
16480
|
var import_zod13 = require("zod");
|
|
@@ -16476,7 +16489,7 @@ var import_officeparser2 = require("officeparser");
|
|
|
16476
16489
|
var import_child_process2 = require("child_process");
|
|
16477
16490
|
var import_util3 = require("util");
|
|
16478
16491
|
var import_path2 = require("path");
|
|
16479
|
-
var
|
|
16492
|
+
var import_fs4 = require("fs");
|
|
16480
16493
|
var import_url2 = require("url");
|
|
16481
16494
|
var execAsync2 = (0, import_util3.promisify)(import_child_process2.exec);
|
|
16482
16495
|
function getPackageRoot2() {
|
|
@@ -16486,9 +16499,9 @@ function getPackageRoot2() {
|
|
|
16486
16499
|
const maxAttempts = 10;
|
|
16487
16500
|
while (attempts < maxAttempts) {
|
|
16488
16501
|
const packageJsonPath = (0, import_path2.join)(currentDir, "package.json");
|
|
16489
|
-
if ((0,
|
|
16502
|
+
if ((0, import_fs4.existsSync)(packageJsonPath)) {
|
|
16490
16503
|
try {
|
|
16491
|
-
const packageJson = JSON.parse((0,
|
|
16504
|
+
const packageJson = JSON.parse((0, import_fs4.readFileSync)(packageJsonPath, "utf-8"));
|
|
16492
16505
|
if (packageJson.name === "@exulu/backend") {
|
|
16493
16506
|
return currentDir;
|
|
16494
16507
|
}
|
|
@@ -16550,7 +16563,7 @@ async function executePythonScript(config) {
|
|
|
16550
16563
|
await validatePythonEnvironmentForExecution(packageRoot);
|
|
16551
16564
|
}
|
|
16552
16565
|
const resolvedScriptPath = (0, import_path2.resolve)(packageRoot, scriptPath);
|
|
16553
|
-
if (!(0,
|
|
16566
|
+
if (!(0, import_fs4.existsSync)(resolvedScriptPath)) {
|
|
16554
16567
|
throw new PythonExecutionError(
|
|
16555
16568
|
`Python script not found: ${resolvedScriptPath}`,
|
|
16556
16569
|
"",
|
|
@@ -16686,7 +16699,7 @@ function reconstructHeadings(correctedText, headingsHierarchy) {
|
|
|
16686
16699
|
return result;
|
|
16687
16700
|
}
|
|
16688
16701
|
async function validatePageWithVLM(page, imagePath, model) {
|
|
16689
|
-
const imageBuffer = await
|
|
16702
|
+
const imageBuffer = await fs3.promises.readFile(imagePath);
|
|
16690
16703
|
const imageBase64 = imageBuffer.toString("base64");
|
|
16691
16704
|
const mimeType = "image/png";
|
|
16692
16705
|
const prompt = `You are a document validation assistant. Your task is to analyze a page image and correct the output of an OCR/parsing pipeline. The content may include tables, technical diagrams, schematics, and structured text.
|
|
@@ -17018,7 +17031,7 @@ ${setupResult.output || ""}`);
|
|
|
17018
17031
|
if (!result.success) {
|
|
17019
17032
|
throw new Error(`Document processing failed: ${result.stderr}`);
|
|
17020
17033
|
}
|
|
17021
|
-
const jsonContent = await
|
|
17034
|
+
const jsonContent = await fs3.promises.readFile(paths.json, "utf-8");
|
|
17022
17035
|
json = JSON.parse(jsonContent);
|
|
17023
17036
|
} else if (config?.processor.name === "officeparser") {
|
|
17024
17037
|
const text = await (0, import_officeparser2.parseOfficeAsync)(buffer, {
|
|
@@ -17051,9 +17064,9 @@ ${setupResult.output || ""}`);
|
|
|
17051
17064
|
}, 10);
|
|
17052
17065
|
const parser = new import_liteparse.LiteParse();
|
|
17053
17066
|
const screenshots = await parser.screenshot(paths.source, void 0);
|
|
17054
|
-
await
|
|
17067
|
+
await fs3.promises.mkdir(paths.images, { recursive: true });
|
|
17055
17068
|
for (const screenshot of screenshots) {
|
|
17056
|
-
await
|
|
17069
|
+
await fs3.promises.writeFile(
|
|
17057
17070
|
path.join(
|
|
17058
17071
|
paths.images,
|
|
17059
17072
|
`${screenshot.pageNum}.png`
|
|
@@ -17068,15 +17081,15 @@ ${setupResult.output || ""}`);
|
|
|
17068
17081
|
image: screenshots.find((s) => s.pageNum === page.index + 1)?.imagePath,
|
|
17069
17082
|
headings: []
|
|
17070
17083
|
}));
|
|
17071
|
-
|
|
17084
|
+
fs3.writeFileSync(paths.json, JSON.stringify(json, null, 2));
|
|
17072
17085
|
} else if (config?.processor.name === "liteparse") {
|
|
17073
17086
|
const parser = new import_liteparse.LiteParse();
|
|
17074
17087
|
const result = await parser.parse(paths.source);
|
|
17075
17088
|
const screenshots = await parser.screenshot(paths.source, void 0);
|
|
17076
17089
|
console.log(`[EXULU] Liteparse screenshots: ${JSON.stringify(screenshots)}`);
|
|
17077
|
-
await
|
|
17090
|
+
await fs3.promises.mkdir(paths.images, { recursive: true });
|
|
17078
17091
|
for (const screenshot of screenshots) {
|
|
17079
|
-
await
|
|
17092
|
+
await fs3.promises.writeFile(path.join(paths.images, `${screenshot.pageNum}.png`), screenshot.imageBuffer);
|
|
17080
17093
|
screenshot.imagePath = path.join(paths.images, `${screenshot.pageNum}.png`);
|
|
17081
17094
|
}
|
|
17082
17095
|
json = result.pages.map((page) => ({
|
|
@@ -17084,7 +17097,7 @@ ${setupResult.output || ""}`);
|
|
|
17084
17097
|
content: page.text,
|
|
17085
17098
|
image: screenshots.find((s) => s.pageNum === page.pageNum)?.imagePath
|
|
17086
17099
|
}));
|
|
17087
|
-
|
|
17100
|
+
fs3.writeFileSync(paths.json, JSON.stringify(json, null, 2));
|
|
17088
17101
|
}
|
|
17089
17102
|
console.log(`[EXULU]
|
|
17090
17103
|
\u2713 Document processing completed successfully`);
|
|
@@ -17115,13 +17128,13 @@ ${setupResult.output || ""}`);
|
|
|
17115
17128
|
console.log(`[EXULU] Corrected: ${page.vlm_corrected_text.substring(0, 150)}...`);
|
|
17116
17129
|
});
|
|
17117
17130
|
}
|
|
17118
|
-
await
|
|
17131
|
+
await fs3.promises.writeFile(
|
|
17119
17132
|
paths.json,
|
|
17120
17133
|
JSON.stringify(json, null, 2),
|
|
17121
17134
|
"utf-8"
|
|
17122
17135
|
);
|
|
17123
17136
|
}
|
|
17124
|
-
const markdownStream =
|
|
17137
|
+
const markdownStream = fs3.createWriteStream(paths.markdown, { encoding: "utf-8" });
|
|
17125
17138
|
for (let i = 0; i < json.length; i++) {
|
|
17126
17139
|
const p = json[i];
|
|
17127
17140
|
if (!p) continue;
|
|
@@ -17137,7 +17150,7 @@ ${setupResult.output || ""}`);
|
|
|
17137
17150
|
});
|
|
17138
17151
|
console.log(`[EXULU] Validated output saved to: ${paths.json}`);
|
|
17139
17152
|
console.log(`[EXULU] Validated markdown saved to: ${paths.markdown}`);
|
|
17140
|
-
const markdown = await
|
|
17153
|
+
const markdown = await fs3.promises.readFile(paths.markdown, "utf-8");
|
|
17141
17154
|
const processedJson = json.map((e) => {
|
|
17142
17155
|
const finalContent = e.vlm_corrected_text ?? e.content;
|
|
17143
17156
|
return {
|
|
@@ -17168,7 +17181,7 @@ var loadFile = async (file, name, tempDir) => {
|
|
|
17168
17181
|
let buffer;
|
|
17169
17182
|
if (Buffer.isBuffer(file)) {
|
|
17170
17183
|
filePath = path.join(tempDir, `${UUID}.${fileType}`);
|
|
17171
|
-
await
|
|
17184
|
+
await fs3.promises.writeFile(filePath, file);
|
|
17172
17185
|
buffer = file;
|
|
17173
17186
|
} else {
|
|
17174
17187
|
filePath = filePath.trim();
|
|
@@ -17176,11 +17189,11 @@ var loadFile = async (file, name, tempDir) => {
|
|
|
17176
17189
|
const response = await fetch(filePath);
|
|
17177
17190
|
const array = await response.arrayBuffer();
|
|
17178
17191
|
const tempFilePath = path.join(tempDir, `${UUID}.${fileType}`);
|
|
17179
|
-
await
|
|
17192
|
+
await fs3.promises.writeFile(tempFilePath, Buffer.from(array));
|
|
17180
17193
|
buffer = Buffer.from(array);
|
|
17181
17194
|
filePath = tempFilePath;
|
|
17182
17195
|
} else {
|
|
17183
|
-
buffer = await
|
|
17196
|
+
buffer = await fs3.promises.readFile(file);
|
|
17184
17197
|
}
|
|
17185
17198
|
}
|
|
17186
17199
|
return { filePath, fileType, buffer };
|
|
@@ -17198,9 +17211,9 @@ async function documentProcessor({
|
|
|
17198
17211
|
const tempDir = path.join(process.cwd(), "temp", uuid);
|
|
17199
17212
|
const localFilesAndFoldersToDelete = [tempDir];
|
|
17200
17213
|
console.log(`[EXULU] Temporary directory for processing document ${name}: ${tempDir}`);
|
|
17201
|
-
await
|
|
17214
|
+
await fs3.promises.mkdir(tempDir, { recursive: true });
|
|
17202
17215
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
17203
|
-
await
|
|
17216
|
+
await fs3.promises.writeFile(path.join(tempDir, "created_at.txt"), timestamp);
|
|
17204
17217
|
try {
|
|
17205
17218
|
const {
|
|
17206
17219
|
filePath,
|
|
@@ -17241,7 +17254,7 @@ async function documentProcessor({
|
|
|
17241
17254
|
if (config?.debugging?.deleteTempFiles !== false) {
|
|
17242
17255
|
for (const file2 of localFilesAndFoldersToDelete) {
|
|
17243
17256
|
try {
|
|
17244
|
-
await
|
|
17257
|
+
await fs3.promises.rm(file2, { recursive: true });
|
|
17245
17258
|
console.log(`[EXULU] Deleted file or folder: ${file2}`);
|
|
17246
17259
|
} catch (error) {
|
|
17247
17260
|
console.error(`[EXULU] Error deleting file or folder: ${file2}`, error);
|
package/dist/index.js
CHANGED
|
@@ -747,7 +747,7 @@ var JOB_STATUS_ENUM = {
|
|
|
747
747
|
};
|
|
748
748
|
|
|
749
749
|
// ee/agentic-retrieval/index.ts
|
|
750
|
-
import { z as
|
|
750
|
+
import { z as z6 } from "zod";
|
|
751
751
|
import {
|
|
752
752
|
stepCountIs,
|
|
753
753
|
tool as tool2,
|
|
@@ -3188,7 +3188,7 @@ var mapType = (t, type, name, defaultValue, unique) => {
|
|
|
3188
3188
|
|
|
3189
3189
|
// src/exulu/tool.ts
|
|
3190
3190
|
import { tool } from "ai";
|
|
3191
|
-
import { z as
|
|
3191
|
+
import { z as z4 } from "zod";
|
|
3192
3192
|
import CryptoJS2 from "crypto-js";
|
|
3193
3193
|
|
|
3194
3194
|
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
@@ -3404,6 +3404,102 @@ function sanitizeToolName(name) {
|
|
|
3404
3404
|
|
|
3405
3405
|
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
3406
3406
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
3407
|
+
|
|
3408
|
+
// src/templates/tools/memory-tool.ts
|
|
3409
|
+
import { z as z3 } from "zod";
|
|
3410
|
+
import fs from "fs";
|
|
3411
|
+
var createNewMemoryItemTool = (agent, context) => {
|
|
3412
|
+
const fields = {
|
|
3413
|
+
name: z3.string().describe("The name of the item to create"),
|
|
3414
|
+
description: z3.string().describe("The description of the item to create")
|
|
3415
|
+
};
|
|
3416
|
+
for (const field of context.fields) {
|
|
3417
|
+
switch (field.type) {
|
|
3418
|
+
case "text":
|
|
3419
|
+
case "longText":
|
|
3420
|
+
case "shortText":
|
|
3421
|
+
case "code":
|
|
3422
|
+
case "enum":
|
|
3423
|
+
fields[field.name] = z3.string().describe("The " + field.name + " of the item to create");
|
|
3424
|
+
break;
|
|
3425
|
+
case "json":
|
|
3426
|
+
fields[field.name] = z3.string({}).describe(
|
|
3427
|
+
"The " + field.name + " of the item to create, it should be a valid JSON string."
|
|
3428
|
+
);
|
|
3429
|
+
break;
|
|
3430
|
+
case "markdown":
|
|
3431
|
+
fields[field.name] = z3.string().describe(
|
|
3432
|
+
"The " + field.name + " of the item to create, it should be a valid Markdown string."
|
|
3433
|
+
);
|
|
3434
|
+
break;
|
|
3435
|
+
case "number":
|
|
3436
|
+
fields[field.name] = z3.number().describe("The " + field.name + " of the item to create");
|
|
3437
|
+
break;
|
|
3438
|
+
case "boolean":
|
|
3439
|
+
fields[field.name] = z3.boolean().describe("The " + field.name + " of the item to create");
|
|
3440
|
+
break;
|
|
3441
|
+
case "file":
|
|
3442
|
+
case "uuid":
|
|
3443
|
+
case "date":
|
|
3444
|
+
break;
|
|
3445
|
+
default:
|
|
3446
|
+
fields[field.name] = z3.string().describe("The " + field.name + " of the item to create");
|
|
3447
|
+
break;
|
|
3448
|
+
}
|
|
3449
|
+
}
|
|
3450
|
+
const toolName = "create_" + sanitizeName(context.name) + "_memory_item";
|
|
3451
|
+
return new ExuluTool({
|
|
3452
|
+
id: toolName,
|
|
3453
|
+
name: "Create " + context.name + " Memory Item",
|
|
3454
|
+
category: agent.name + "_memory",
|
|
3455
|
+
description: "Create a new memory item in the " + agent.name + " memory context",
|
|
3456
|
+
type: "function",
|
|
3457
|
+
inputSchema: z3.object(fields),
|
|
3458
|
+
config: [],
|
|
3459
|
+
execute: async ({ name, description, mode, information, exuluConfig, user }) => {
|
|
3460
|
+
let result = { result: "" };
|
|
3461
|
+
fs.writeFileSync("memory-tool.json", JSON.stringify({ name, description, information }, null, 2));
|
|
3462
|
+
try {
|
|
3463
|
+
const newItem = {
|
|
3464
|
+
name,
|
|
3465
|
+
description,
|
|
3466
|
+
information,
|
|
3467
|
+
rights_mode: "public"
|
|
3468
|
+
};
|
|
3469
|
+
const { item: createdItem, job: createdJob } = await context.createItem(
|
|
3470
|
+
newItem,
|
|
3471
|
+
exuluConfig,
|
|
3472
|
+
user?.id,
|
|
3473
|
+
user?.role?.id,
|
|
3474
|
+
false
|
|
3475
|
+
);
|
|
3476
|
+
fs.writeFileSync("memory-tool-created.json", JSON.stringify({ createdItem, createdJob }, null, 2));
|
|
3477
|
+
if (createdJob) {
|
|
3478
|
+
result = {
|
|
3479
|
+
result: `Created a Job to create the memory item with the following ID: ${createdJob}`
|
|
3480
|
+
};
|
|
3481
|
+
} else if (createdItem) {
|
|
3482
|
+
result = {
|
|
3483
|
+
result: `Created memory item with the following ID: ${createdItem.id}`
|
|
3484
|
+
};
|
|
3485
|
+
} else {
|
|
3486
|
+
result = {
|
|
3487
|
+
result: `Failed to create memory item`
|
|
3488
|
+
};
|
|
3489
|
+
}
|
|
3490
|
+
} catch (error) {
|
|
3491
|
+
fs.writeFileSync("memory-tool-error.json", JSON.stringify({ name, description, information, error }, null, 2));
|
|
3492
|
+
console.error("[EXULU] Error creating memory item", error);
|
|
3493
|
+
result = {
|
|
3494
|
+
result: `Failed to create memory item: ${error instanceof Error ? error.message : String(error)}`
|
|
3495
|
+
};
|
|
3496
|
+
}
|
|
3497
|
+
return result;
|
|
3498
|
+
}
|
|
3499
|
+
});
|
|
3500
|
+
};
|
|
3501
|
+
|
|
3502
|
+
// src/templates/tools/convert-exulu-tools-to-ai-sdk-tools.ts
|
|
3407
3503
|
var generateS3Key = (filename) => `${randomUUID2()}-${filename}`;
|
|
3408
3504
|
var s3Client2;
|
|
3409
3505
|
var getMimeType = (type) => {
|
|
@@ -3507,6 +3603,21 @@ var convertExuluToolsToAiSdkTools = async (currentTools, approvedTools, allExulu
|
|
|
3507
3603
|
currentTools.push(projectRetrievalTool);
|
|
3508
3604
|
}
|
|
3509
3605
|
}
|
|
3606
|
+
if (agent?.memory && contexts?.length) {
|
|
3607
|
+
const context = contexts.find((context2) => context2.id === agent?.memory);
|
|
3608
|
+
if (!context) {
|
|
3609
|
+
throw new Error(
|
|
3610
|
+
"Context was set for agent memory but not found in the contexts: " + agent?.memory + " please double check with a developer to see if the context was removed from code."
|
|
3611
|
+
);
|
|
3612
|
+
}
|
|
3613
|
+
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
3614
|
+
if (createNewMemoryTool) {
|
|
3615
|
+
if (!currentTools) {
|
|
3616
|
+
currentTools = [];
|
|
3617
|
+
}
|
|
3618
|
+
currentTools.push(createNewMemoryTool);
|
|
3619
|
+
}
|
|
3620
|
+
}
|
|
3510
3621
|
console.log("[EXULU] Convert tools array to object, session items", items);
|
|
3511
3622
|
if (items) {
|
|
3512
3623
|
const sessionItemsRetrievalTool = await createSessionItemsRetrievalTool({
|
|
@@ -3752,7 +3863,7 @@ var ExuluTool = class {
|
|
|
3752
3863
|
this.type = type;
|
|
3753
3864
|
this.tool = tool({
|
|
3754
3865
|
description,
|
|
3755
|
-
inputSchema: inputSchema ||
|
|
3866
|
+
inputSchema: inputSchema || z4.object({}),
|
|
3756
3867
|
execute: execute2
|
|
3757
3868
|
});
|
|
3758
3869
|
}
|
|
@@ -3845,7 +3956,7 @@ var ExuluTool = class {
|
|
|
3845
3956
|
};
|
|
3846
3957
|
|
|
3847
3958
|
// src/exulu/context.ts
|
|
3848
|
-
import { z as
|
|
3959
|
+
import { z as z5 } from "zod";
|
|
3849
3960
|
|
|
3850
3961
|
// ee/queues/decorator.ts
|
|
3851
3962
|
import "bullmq";
|
|
@@ -4567,12 +4678,12 @@ var ExuluContext2 = class {
|
|
|
4567
4678
|
category: "contexts",
|
|
4568
4679
|
needsApproval: true,
|
|
4569
4680
|
// todo make configurable
|
|
4570
|
-
inputSchema:
|
|
4571
|
-
query:
|
|
4572
|
-
keywords:
|
|
4681
|
+
inputSchema: z5.object({
|
|
4682
|
+
query: z5.string().describe("The original question that the user asked"),
|
|
4683
|
+
keywords: z5.array(z5.string()).describe(
|
|
4573
4684
|
"The keywords that are relevant to the user's question, for example names of specific products, systems or parts, IDs, etc."
|
|
4574
4685
|
),
|
|
4575
|
-
method:
|
|
4686
|
+
method: z5.enum(["keyword", "semantic", "hybrid"]).default("hybrid").describe(
|
|
4576
4687
|
"Search method: 'hybrid' (best for most queries - combines semantic understanding with exact term matching), 'keyword' (best for exact terms, technical names, IDs, or specific phrases), 'semantic' (best for conceptual queries where synonyms and paraphrasing matter)"
|
|
4577
4688
|
)
|
|
4578
4689
|
}),
|
|
@@ -4783,11 +4894,11 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4783
4894
|
return await generateText({
|
|
4784
4895
|
model,
|
|
4785
4896
|
output: Output.object({
|
|
4786
|
-
schema:
|
|
4787
|
-
reasoning:
|
|
4897
|
+
schema: z6.object({
|
|
4898
|
+
reasoning: z6.string().describe(
|
|
4788
4899
|
"The reasoning for the next step and why the agent needs to take this step. It MUST start with 'I must call tool XYZ', and MUST include the inputs for that tool."
|
|
4789
4900
|
),
|
|
4790
|
-
finished:
|
|
4901
|
+
finished: z6.boolean().describe(
|
|
4791
4902
|
"Whether the agent has finished meaning no further steps are needed, this should only be true if the agent believes no further tool calls are needed to get the relevant information for the query."
|
|
4792
4903
|
)
|
|
4793
4904
|
})
|
|
@@ -4917,9 +5028,9 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4917
5028
|
if (chunksCount > 1) {
|
|
4918
5029
|
dynamicTools[getMoreToolName] = tool2({
|
|
4919
5030
|
description: `The item ${chunk.item_name} has a total of${chunksCount} chunks, this tool allows you to get more content from this item across all its pages / chunks.`,
|
|
4920
|
-
inputSchema:
|
|
4921
|
-
from_index:
|
|
4922
|
-
to_index:
|
|
5031
|
+
inputSchema: z6.object({
|
|
5032
|
+
from_index: z6.number().default(1).describe("The index of the chunk to start from."),
|
|
5033
|
+
to_index: z6.number().max(chunksCount).describe("The index of the chunk to end at, max is " + chunksCount)
|
|
4923
5034
|
}),
|
|
4924
5035
|
execute: async ({ from_index, to_index }) => {
|
|
4925
5036
|
const chunks2 = await db2(chunksTable).select("*").where("source", chunk.item_id).whereBetween("chunk_index", [from_index, to_index]).orderBy("chunk_index", "asc");
|
|
@@ -4947,8 +5058,8 @@ function createCustomAgenticRetrievalToolLoopAgent({
|
|
|
4947
5058
|
);
|
|
4948
5059
|
dynamicTools[getContentToolName] = tool2({
|
|
4949
5060
|
description: `Get the content of the page ${chunk.chunk_index} for the item ${chunk.item_name}`,
|
|
4950
|
-
inputSchema:
|
|
4951
|
-
reasoning:
|
|
5061
|
+
inputSchema: z6.object({
|
|
5062
|
+
reasoning: z6.string().describe("The reasoning for why you need to get the content of the page.")
|
|
4952
5063
|
}),
|
|
4953
5064
|
execute: async ({ reasoning }) => {
|
|
4954
5065
|
const { db: db3 } = await postgresClient();
|
|
@@ -5016,8 +5127,8 @@ var createAgenticRetrievalAgent = ({
|
|
|
5016
5127
|
search_items_by_name: tool2({
|
|
5017
5128
|
description: `
|
|
5018
5129
|
Search for relevant items by name across the available knowledge bases.`,
|
|
5019
|
-
inputSchema:
|
|
5020
|
-
knowledge_base_ids:
|
|
5130
|
+
inputSchema: z6.object({
|
|
5131
|
+
knowledge_base_ids: z6.array(z6.enum(contexts.map((ctx) => ctx.id))).describe(`
|
|
5021
5132
|
The available knowledge bases are:
|
|
5022
5133
|
${contexts.map(
|
|
5023
5134
|
(ctx) => `
|
|
@@ -5029,8 +5140,8 @@ var createAgenticRetrievalAgent = ({
|
|
|
5029
5140
|
`
|
|
5030
5141
|
).join("\n")}
|
|
5031
5142
|
`),
|
|
5032
|
-
item_name:
|
|
5033
|
-
limit:
|
|
5143
|
+
item_name: z6.string().describe("The name of the item to search for."),
|
|
5144
|
+
limit: z6.number().default(100).describe(
|
|
5034
5145
|
"Maximum number of items to return (max 400), if searching through multiple knowledge bases, the limit is applied for each knowledge base individually."
|
|
5035
5146
|
)
|
|
5036
5147
|
}),
|
|
@@ -5136,11 +5247,11 @@ var createAgenticRetrievalAgent = ({
|
|
|
5136
5247
|
- You can always fetch content later if needed
|
|
5137
5248
|
|
|
5138
5249
|
`,
|
|
5139
|
-
inputSchema:
|
|
5140
|
-
query:
|
|
5250
|
+
inputSchema: z6.object({
|
|
5251
|
+
query: z6.string().describe(
|
|
5141
5252
|
"The search query to find relevant chunks, this must always be related to the content you are looking for, not something like 'Page 2'."
|
|
5142
5253
|
),
|
|
5143
|
-
knowledge_base_ids:
|
|
5254
|
+
knowledge_base_ids: z6.array(z6.enum(contexts.map((ctx) => ctx.id))).describe(`
|
|
5144
5255
|
The available knowledge bases are:
|
|
5145
5256
|
${contexts.map(
|
|
5146
5257
|
(ctx) => `
|
|
@@ -5152,25 +5263,25 @@ var createAgenticRetrievalAgent = ({
|
|
|
5152
5263
|
`
|
|
5153
5264
|
).join("\n")}
|
|
5154
5265
|
`),
|
|
5155
|
-
keywords:
|
|
5266
|
+
keywords: z6.array(z6.string()).optional().describe(
|
|
5156
5267
|
"Keywords to search for. Usually extracted from the query, allowing for more precise search results."
|
|
5157
5268
|
),
|
|
5158
|
-
searchMethod:
|
|
5269
|
+
searchMethod: z6.enum(["keyword", "semantic", "hybrid"]).default("hybrid").describe(
|
|
5159
5270
|
"Search method: 'hybrid' (best for most queries - combines semantic understanding with exact term matching), 'keyword' (best for exact terms, technical names, IDs, or specific phrases), 'semantic' (best for conceptual queries where synonyms and paraphrasing matter)"
|
|
5160
5271
|
),
|
|
5161
|
-
includeContent:
|
|
5272
|
+
includeContent: z6.boolean().default(true).describe(
|
|
5162
5273
|
"Whether to include the full chunk content in results. Set to FALSE when you only need to know WHICH documents/items are relevant (lists, overviews, counts). Set to TRUE when you need the ACTUAL content to answer the question (information, details, explanations). You can always fetch content later, so prefer FALSE for efficiency when listing documents."
|
|
5163
5274
|
),
|
|
5164
|
-
item_ids:
|
|
5275
|
+
item_ids: z6.array(z6.string()).optional().describe(
|
|
5165
5276
|
"Use if you wish to retrieve content from specific items (documents) based on the item ID."
|
|
5166
5277
|
),
|
|
5167
|
-
item_names:
|
|
5278
|
+
item_names: z6.array(z6.string()).optional().describe(
|
|
5168
5279
|
"Use if you wish to retrieve content from specific items (documents) based on the item name. Can be a partial match."
|
|
5169
5280
|
),
|
|
5170
|
-
item_external_ids:
|
|
5281
|
+
item_external_ids: z6.array(z6.string()).optional().describe(
|
|
5171
5282
|
"Use if you wish to retrieve content from specific items (documents) based on the item external ID. Can be a partial match."
|
|
5172
5283
|
),
|
|
5173
|
-
limit:
|
|
5284
|
+
limit: z6.number().default(10).describe("Maximum number of chunks to return (max 10)")
|
|
5174
5285
|
}),
|
|
5175
5286
|
execute: async ({
|
|
5176
5287
|
query,
|
|
@@ -5361,9 +5472,9 @@ var createAgenticRetrievalTool = ({
|
|
|
5361
5472
|
default: true
|
|
5362
5473
|
}))
|
|
5363
5474
|
],
|
|
5364
|
-
inputSchema:
|
|
5365
|
-
query:
|
|
5366
|
-
userInstructions:
|
|
5475
|
+
inputSchema: z6.object({
|
|
5476
|
+
query: z6.string().describe("The question or query to answer using the knowledge bases"),
|
|
5477
|
+
userInstructions: z6.string().optional().describe("Instructions provided by the user to customize the retrieval process.")
|
|
5367
5478
|
}),
|
|
5368
5479
|
execute: async function* ({
|
|
5369
5480
|
query,
|
|
@@ -9856,7 +9967,7 @@ import { InMemoryLRUCache } from "@apollo/utils.keyvaluecache";
|
|
|
9856
9967
|
import bodyParser from "body-parser";
|
|
9857
9968
|
import CryptoJS6 from "crypto-js";
|
|
9858
9969
|
import OpenAI from "openai";
|
|
9859
|
-
import
|
|
9970
|
+
import fs2 from "fs";
|
|
9860
9971
|
import { randomUUID as randomUUID4 } from "crypto";
|
|
9861
9972
|
import "@opentelemetry/api";
|
|
9862
9973
|
import Anthropic from "@anthropic-ai/sdk";
|
|
@@ -9912,98 +10023,6 @@ function generateSlug(name) {
|
|
|
9912
10023
|
// src/exulu/provider.ts
|
|
9913
10024
|
import CryptoJS5 from "crypto-js";
|
|
9914
10025
|
import { parseOfficeAsync } from "officeparser";
|
|
9915
|
-
|
|
9916
|
-
// src/templates/tools/memory-tool.ts
|
|
9917
|
-
import { z as z6 } from "zod";
|
|
9918
|
-
var createNewMemoryItemTool = (agent, context) => {
|
|
9919
|
-
const fields = {
|
|
9920
|
-
name: z6.string().describe("The name of the item to create"),
|
|
9921
|
-
description: z6.string().describe("The description of the item to create")
|
|
9922
|
-
};
|
|
9923
|
-
for (const field of context.fields) {
|
|
9924
|
-
switch (field.type) {
|
|
9925
|
-
case "text":
|
|
9926
|
-
case "longText":
|
|
9927
|
-
case "shortText":
|
|
9928
|
-
case "code":
|
|
9929
|
-
case "enum":
|
|
9930
|
-
fields[field.name] = z6.string().describe("The " + field.name + " of the item to create");
|
|
9931
|
-
break;
|
|
9932
|
-
case "json":
|
|
9933
|
-
fields[field.name] = z6.string({}).describe(
|
|
9934
|
-
"The " + field.name + " of the item to create, it should be a valid JSON string."
|
|
9935
|
-
);
|
|
9936
|
-
break;
|
|
9937
|
-
case "markdown":
|
|
9938
|
-
fields[field.name] = z6.string().describe(
|
|
9939
|
-
"The " + field.name + " of the item to create, it should be a valid Markdown string."
|
|
9940
|
-
);
|
|
9941
|
-
break;
|
|
9942
|
-
case "number":
|
|
9943
|
-
fields[field.name] = z6.number().describe("The " + field.name + " of the item to create");
|
|
9944
|
-
break;
|
|
9945
|
-
case "boolean":
|
|
9946
|
-
fields[field.name] = z6.boolean().describe("The " + field.name + " of the item to create");
|
|
9947
|
-
break;
|
|
9948
|
-
case "file":
|
|
9949
|
-
case "uuid":
|
|
9950
|
-
case "date":
|
|
9951
|
-
break;
|
|
9952
|
-
default:
|
|
9953
|
-
fields[field.name] = z6.string().describe("The " + field.name + " of the item to create");
|
|
9954
|
-
break;
|
|
9955
|
-
}
|
|
9956
|
-
}
|
|
9957
|
-
return new ExuluTool({
|
|
9958
|
-
id: "create_" + agent.name + "_memory_item",
|
|
9959
|
-
name: "Create " + agent.name + " Memory Item",
|
|
9960
|
-
category: agent.name + "_memory",
|
|
9961
|
-
description: "Create a new memory item in the " + agent.name + " memory context",
|
|
9962
|
-
type: "function",
|
|
9963
|
-
inputSchema: z6.object(fields),
|
|
9964
|
-
config: [],
|
|
9965
|
-
execute: async ({ name, description, mode, information, exuluConfig, user }) => {
|
|
9966
|
-
let result = { result: "" };
|
|
9967
|
-
switch (mode) {
|
|
9968
|
-
case "learnings":
|
|
9969
|
-
break;
|
|
9970
|
-
case "knowledge":
|
|
9971
|
-
const newItem = {
|
|
9972
|
-
name,
|
|
9973
|
-
description,
|
|
9974
|
-
information,
|
|
9975
|
-
rights_mode: "public"
|
|
9976
|
-
};
|
|
9977
|
-
const { item: createdItem, job: createdJob } = await context.createItem(
|
|
9978
|
-
newItem,
|
|
9979
|
-
exuluConfig,
|
|
9980
|
-
user?.id,
|
|
9981
|
-
user?.role?.id,
|
|
9982
|
-
false
|
|
9983
|
-
);
|
|
9984
|
-
if (createdJob) {
|
|
9985
|
-
result = {
|
|
9986
|
-
result: `Created a Job to create the memory item with the following ID: ${createdJob}`
|
|
9987
|
-
};
|
|
9988
|
-
} else if (createdItem) {
|
|
9989
|
-
result = {
|
|
9990
|
-
result: `Created memory item with the following ID: ${createdItem.id}`
|
|
9991
|
-
};
|
|
9992
|
-
} else {
|
|
9993
|
-
result = {
|
|
9994
|
-
result: `Failed to create memory item`
|
|
9995
|
-
};
|
|
9996
|
-
}
|
|
9997
|
-
break;
|
|
9998
|
-
default:
|
|
9999
|
-
throw new Error(`Invalid mode: ${mode}`);
|
|
10000
|
-
}
|
|
10001
|
-
return result;
|
|
10002
|
-
}
|
|
10003
|
-
});
|
|
10004
|
-
};
|
|
10005
|
-
|
|
10006
|
-
// src/exulu/provider.ts
|
|
10007
10026
|
var ExuluProvider = class {
|
|
10008
10027
|
// Must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or
|
|
10009
10028
|
// underscores and be a max length of 80 characters and at least 5 characters long.
|
|
@@ -10263,13 +10282,6 @@ var ExuluProvider = class {
|
|
|
10263
10282
|
|
|
10264
10283
|
${result.chunks.map((chunk) => chunk.chunk_content).join("\n\n")}`;
|
|
10265
10284
|
}
|
|
10266
|
-
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
10267
|
-
if (createNewMemoryTool) {
|
|
10268
|
-
if (!currentTools) {
|
|
10269
|
-
currentTools = [];
|
|
10270
|
-
}
|
|
10271
|
-
currentTools.push(createNewMemoryTool);
|
|
10272
|
-
}
|
|
10273
10285
|
}
|
|
10274
10286
|
const personalizationInformation = exuluConfig?.privacy?.systemPromptPersonalization !== false ? `
|
|
10275
10287
|
${user?.firstname ? `The users first name is "${user.firstname}"` : ""}
|
|
@@ -10333,6 +10345,10 @@ var ExuluProvider = class {
|
|
|
10333
10345
|
Example: {url: https://www.google.com, title: Google, snippet: The result of the web search.}
|
|
10334
10346
|
`;
|
|
10335
10347
|
}
|
|
10348
|
+
system += `
|
|
10349
|
+
|
|
10350
|
+
When a tool execution is not approved by the user, do not retry it unless explicitly asked by the user. ' +
|
|
10351
|
+
'Inform the user that the action was not performed.`;
|
|
10336
10352
|
if (prompt) {
|
|
10337
10353
|
let result = { object: null, text: "" };
|
|
10338
10354
|
let inputTokens = 0;
|
|
@@ -10647,13 +10663,6 @@ ${extractedText}
|
|
|
10647
10663
|
|
|
10648
10664
|
${result2.chunks.map((chunk) => chunk.chunk_content).join("\n\n")}`;
|
|
10649
10665
|
}
|
|
10650
|
-
const createNewMemoryTool = createNewMemoryItemTool(agent, context);
|
|
10651
|
-
if (createNewMemoryTool) {
|
|
10652
|
-
if (!currentTools) {
|
|
10653
|
-
currentTools = [];
|
|
10654
|
-
}
|
|
10655
|
-
currentTools.push(createNewMemoryTool);
|
|
10656
|
-
}
|
|
10657
10666
|
}
|
|
10658
10667
|
messages = messages.filter(
|
|
10659
10668
|
(message2, index, self) => index === self.findLastIndex((t) => t.id === message2.id)
|
|
@@ -10712,6 +10721,10 @@ ${extractedText}
|
|
|
10712
10721
|
Example: {url: https://www.google.com, title: Google, snippet: The result of the web search.}
|
|
10713
10722
|
`;
|
|
10714
10723
|
}
|
|
10724
|
+
system += `
|
|
10725
|
+
|
|
10726
|
+
When a tool execution is not approved by the user, do not retry it unless explicitly asked by the user. ' +
|
|
10727
|
+
'Inform the user that the action was not performed.`;
|
|
10715
10728
|
const result = streamText({
|
|
10716
10729
|
model,
|
|
10717
10730
|
// Should be a LanguageModelV1
|
|
@@ -10860,7 +10873,7 @@ var REQUEST_SIZE_LIMIT = "50mb";
|
|
|
10860
10873
|
var getExuluVersionNumber = async () => {
|
|
10861
10874
|
try {
|
|
10862
10875
|
const path2 = process.cwd();
|
|
10863
|
-
const packageJson =
|
|
10876
|
+
const packageJson = fs2.readFileSync(path2 + "/package.json", "utf8");
|
|
10864
10877
|
const packageData = JSON.parse(packageJson);
|
|
10865
10878
|
const exuluVersion = packageData.dependencies["@exulu/backend"];
|
|
10866
10879
|
console.log(`[EXULU] Installed exulu-backend version: ${exuluVersion}`);
|
|
@@ -16427,7 +16440,7 @@ Or manually run the setup script:
|
|
|
16427
16440
|
}
|
|
16428
16441
|
|
|
16429
16442
|
// ee/python/documents/processing/doc_processor.ts
|
|
16430
|
-
import * as
|
|
16443
|
+
import * as fs3 from "fs";
|
|
16431
16444
|
import * as path from "path";
|
|
16432
16445
|
import { generateText as generateText3, Output as Output3 } from "ai";
|
|
16433
16446
|
import { z as z13 } from "zod";
|
|
@@ -16652,7 +16665,7 @@ function reconstructHeadings(correctedText, headingsHierarchy) {
|
|
|
16652
16665
|
return result;
|
|
16653
16666
|
}
|
|
16654
16667
|
async function validatePageWithVLM(page, imagePath, model) {
|
|
16655
|
-
const imageBuffer = await
|
|
16668
|
+
const imageBuffer = await fs3.promises.readFile(imagePath);
|
|
16656
16669
|
const imageBase64 = imageBuffer.toString("base64");
|
|
16657
16670
|
const mimeType = "image/png";
|
|
16658
16671
|
const prompt = `You are a document validation assistant. Your task is to analyze a page image and correct the output of an OCR/parsing pipeline. The content may include tables, technical diagrams, schematics, and structured text.
|
|
@@ -16984,7 +16997,7 @@ ${setupResult.output || ""}`);
|
|
|
16984
16997
|
if (!result.success) {
|
|
16985
16998
|
throw new Error(`Document processing failed: ${result.stderr}`);
|
|
16986
16999
|
}
|
|
16987
|
-
const jsonContent = await
|
|
17000
|
+
const jsonContent = await fs3.promises.readFile(paths.json, "utf-8");
|
|
16988
17001
|
json = JSON.parse(jsonContent);
|
|
16989
17002
|
} else if (config?.processor.name === "officeparser") {
|
|
16990
17003
|
const text = await parseOfficeAsync2(buffer, {
|
|
@@ -17017,9 +17030,9 @@ ${setupResult.output || ""}`);
|
|
|
17017
17030
|
}, 10);
|
|
17018
17031
|
const parser = new LiteParse();
|
|
17019
17032
|
const screenshots = await parser.screenshot(paths.source, void 0);
|
|
17020
|
-
await
|
|
17033
|
+
await fs3.promises.mkdir(paths.images, { recursive: true });
|
|
17021
17034
|
for (const screenshot of screenshots) {
|
|
17022
|
-
await
|
|
17035
|
+
await fs3.promises.writeFile(
|
|
17023
17036
|
path.join(
|
|
17024
17037
|
paths.images,
|
|
17025
17038
|
`${screenshot.pageNum}.png`
|
|
@@ -17034,15 +17047,15 @@ ${setupResult.output || ""}`);
|
|
|
17034
17047
|
image: screenshots.find((s) => s.pageNum === page.index + 1)?.imagePath,
|
|
17035
17048
|
headings: []
|
|
17036
17049
|
}));
|
|
17037
|
-
|
|
17050
|
+
fs3.writeFileSync(paths.json, JSON.stringify(json, null, 2));
|
|
17038
17051
|
} else if (config?.processor.name === "liteparse") {
|
|
17039
17052
|
const parser = new LiteParse();
|
|
17040
17053
|
const result = await parser.parse(paths.source);
|
|
17041
17054
|
const screenshots = await parser.screenshot(paths.source, void 0);
|
|
17042
17055
|
console.log(`[EXULU] Liteparse screenshots: ${JSON.stringify(screenshots)}`);
|
|
17043
|
-
await
|
|
17056
|
+
await fs3.promises.mkdir(paths.images, { recursive: true });
|
|
17044
17057
|
for (const screenshot of screenshots) {
|
|
17045
|
-
await
|
|
17058
|
+
await fs3.promises.writeFile(path.join(paths.images, `${screenshot.pageNum}.png`), screenshot.imageBuffer);
|
|
17046
17059
|
screenshot.imagePath = path.join(paths.images, `${screenshot.pageNum}.png`);
|
|
17047
17060
|
}
|
|
17048
17061
|
json = result.pages.map((page) => ({
|
|
@@ -17050,7 +17063,7 @@ ${setupResult.output || ""}`);
|
|
|
17050
17063
|
content: page.text,
|
|
17051
17064
|
image: screenshots.find((s) => s.pageNum === page.pageNum)?.imagePath
|
|
17052
17065
|
}));
|
|
17053
|
-
|
|
17066
|
+
fs3.writeFileSync(paths.json, JSON.stringify(json, null, 2));
|
|
17054
17067
|
}
|
|
17055
17068
|
console.log(`[EXULU]
|
|
17056
17069
|
\u2713 Document processing completed successfully`);
|
|
@@ -17081,13 +17094,13 @@ ${setupResult.output || ""}`);
|
|
|
17081
17094
|
console.log(`[EXULU] Corrected: ${page.vlm_corrected_text.substring(0, 150)}...`);
|
|
17082
17095
|
});
|
|
17083
17096
|
}
|
|
17084
|
-
await
|
|
17097
|
+
await fs3.promises.writeFile(
|
|
17085
17098
|
paths.json,
|
|
17086
17099
|
JSON.stringify(json, null, 2),
|
|
17087
17100
|
"utf-8"
|
|
17088
17101
|
);
|
|
17089
17102
|
}
|
|
17090
|
-
const markdownStream =
|
|
17103
|
+
const markdownStream = fs3.createWriteStream(paths.markdown, { encoding: "utf-8" });
|
|
17091
17104
|
for (let i = 0; i < json.length; i++) {
|
|
17092
17105
|
const p = json[i];
|
|
17093
17106
|
if (!p) continue;
|
|
@@ -17103,7 +17116,7 @@ ${setupResult.output || ""}`);
|
|
|
17103
17116
|
});
|
|
17104
17117
|
console.log(`[EXULU] Validated output saved to: ${paths.json}`);
|
|
17105
17118
|
console.log(`[EXULU] Validated markdown saved to: ${paths.markdown}`);
|
|
17106
|
-
const markdown = await
|
|
17119
|
+
const markdown = await fs3.promises.readFile(paths.markdown, "utf-8");
|
|
17107
17120
|
const processedJson = json.map((e) => {
|
|
17108
17121
|
const finalContent = e.vlm_corrected_text ?? e.content;
|
|
17109
17122
|
return {
|
|
@@ -17134,7 +17147,7 @@ var loadFile = async (file, name, tempDir) => {
|
|
|
17134
17147
|
let buffer;
|
|
17135
17148
|
if (Buffer.isBuffer(file)) {
|
|
17136
17149
|
filePath = path.join(tempDir, `${UUID}.${fileType}`);
|
|
17137
|
-
await
|
|
17150
|
+
await fs3.promises.writeFile(filePath, file);
|
|
17138
17151
|
buffer = file;
|
|
17139
17152
|
} else {
|
|
17140
17153
|
filePath = filePath.trim();
|
|
@@ -17142,11 +17155,11 @@ var loadFile = async (file, name, tempDir) => {
|
|
|
17142
17155
|
const response = await fetch(filePath);
|
|
17143
17156
|
const array = await response.arrayBuffer();
|
|
17144
17157
|
const tempFilePath = path.join(tempDir, `${UUID}.${fileType}`);
|
|
17145
|
-
await
|
|
17158
|
+
await fs3.promises.writeFile(tempFilePath, Buffer.from(array));
|
|
17146
17159
|
buffer = Buffer.from(array);
|
|
17147
17160
|
filePath = tempFilePath;
|
|
17148
17161
|
} else {
|
|
17149
|
-
buffer = await
|
|
17162
|
+
buffer = await fs3.promises.readFile(file);
|
|
17150
17163
|
}
|
|
17151
17164
|
}
|
|
17152
17165
|
return { filePath, fileType, buffer };
|
|
@@ -17164,9 +17177,9 @@ async function documentProcessor({
|
|
|
17164
17177
|
const tempDir = path.join(process.cwd(), "temp", uuid);
|
|
17165
17178
|
const localFilesAndFoldersToDelete = [tempDir];
|
|
17166
17179
|
console.log(`[EXULU] Temporary directory for processing document ${name}: ${tempDir}`);
|
|
17167
|
-
await
|
|
17180
|
+
await fs3.promises.mkdir(tempDir, { recursive: true });
|
|
17168
17181
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
17169
|
-
await
|
|
17182
|
+
await fs3.promises.writeFile(path.join(tempDir, "created_at.txt"), timestamp);
|
|
17170
17183
|
try {
|
|
17171
17184
|
const {
|
|
17172
17185
|
filePath,
|
|
@@ -17207,7 +17220,7 @@ async function documentProcessor({
|
|
|
17207
17220
|
if (config?.debugging?.deleteTempFiles !== false) {
|
|
17208
17221
|
for (const file2 of localFilesAndFoldersToDelete) {
|
|
17209
17222
|
try {
|
|
17210
|
-
await
|
|
17223
|
+
await fs3.promises.rm(file2, { recursive: true });
|
|
17211
17224
|
console.log(`[EXULU] Deleted file or folder: ${file2}`);
|
|
17212
17225
|
} catch (error) {
|
|
17213
17226
|
console.error(`[EXULU] Error deleting file or folder: ${file2}`, error);
|